src/mem/ruby/profiler/Profiler.cc

   1 /*
   2  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /*
  30    This file has been modified by Kevin Moore and Dan Nussbaum of the
  31    Scalable Systems Research Group at Sun Microsystems Laboratories
  32    (http://research.sun.com/scalable/) to support the Adaptive
  33    Transactional Memory Test Platform (ATMTP).
  34
  35    Please send email to atmtp-interest@sun.com with feedback, questions, or
  36    to request future announcements about ATMTP.
  37
  38    ----------------------------------------------------------------------
  39
  40    File modification date: 2008-02-23
  41
  42    ----------------------------------------------------------------------
  43 */
  44
  45 /*
  46  * Profiler.cc
  47  *
  48  * Description: See Profiler.hh
  49  *
  50  * $Id$
  51  *
  52  */
  53
  54 #include "mem/ruby/profiler/Profiler.hh"
  55 #include "mem/ruby/profiler/AddressProfiler.hh"
  56 #include "mem/ruby/system/System.hh"
  57 #include "mem/ruby/network/Network.hh"
  58 #include "mem/gems_common/PrioHeap.hh"
  59 #include "mem/protocol/CacheMsg.hh"
  60 #include "mem/protocol/Protocol.hh"
  61 #include "mem/gems_common/util.hh"
  62 #include "mem/gems_common/Map.hh"
  63 #include "mem/ruby/common/Debug.hh"
  64 #include "mem/protocol/MachineType.hh"
  65
  66 #include "mem/ruby/system/System.hh"
  67
  68 // Allows use of times() library call, which determines virtual runtime
  69 #include <sys/times.h>
  70
  71 extern std::ostream * debug_cout_ptr;
  72
  73 static double process_memory_total();
  74 static double process_memory_resident();
  75
  76 Profiler::Profiler(const Params *p)
  77     : SimObject(p)
  78 {
  79   m_requestProfileMap_ptr = new Map<string, int>;
  80
  81   m_inst_profiler_ptr = NULL;
  82   m_address_profiler_ptr = NULL;
  83
  84   m_real_time_start_time = time(NULL); // Not reset in clearStats()
  85   m_stats_period = 1000000; // Default
  86   m_periodic_output_file_ptr = &cerr;
  87
  88   m_hot_lines = p->hot_lines;
  89   m_all_instructions = p->all_instructions;
  90
  91   m_num_of_sequencers = p->num_of_sequencers;
  92
  93   //
  94   // Initialize the memory controller profiler structs
  95   //
  96   m_mc_profilers.setSize(p->mem_cntrl_count);
  97   for (int mem_cntrl = 0; mem_cntrl < p->mem_cntrl_count; mem_cntrl++) {
  98     m_mc_profilers[mem_cntrl] = new memory_control_profiler;
  99     m_mc_profilers[mem_cntrl]->m_memReq = 0;
 100     m_mc_profilers[mem_cntrl]->m_memBankBusy = 0;
 101     m_mc_profilers[mem_cntrl]->m_memBusBusy = 0;
 102     m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0;
 103     m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0;
 104     m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0;
 105     m_mc_profilers[mem_cntrl]->m_memRefresh = 0;
 106     m_mc_profilers[mem_cntrl]->m_memRead = 0;
 107     m_mc_profilers[mem_cntrl]->m_memWrite = 0;
 108     m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0;
 109     m_mc_profilers[mem_cntrl]->m_memInputQ = 0;
 110     m_mc_profilers[mem_cntrl]->m_memBankQ = 0;
 111     m_mc_profilers[mem_cntrl]->m_memArbWait = 0;
 112     m_mc_profilers[mem_cntrl]->m_memRandBusy = 0;
 113     m_mc_profilers[mem_cntrl]->m_memNotOld = 0;
 114
 115     m_mc_profilers[mem_cntrl]->m_banks_per_rank = p->banks_per_rank;
 116     m_mc_profilers[mem_cntrl]->m_ranks_per_dimm = p->ranks_per_dimm;
 117     m_mc_profilers[mem_cntrl]->m_dimms_per_channel =
 118       p->dimms_per_channel;
 119
 120     int totalBanks = p->banks_per_rank *
 121                      p->ranks_per_dimm *
 122                      p->dimms_per_channel;
 123
 124     m_mc_profilers[mem_cntrl]->m_memBankCount.setSize(totalBanks);
 125   }
 126
 127   m_hot_lines = false;
 128   m_all_instructions = false;
 129
 130   m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
 131   m_address_profiler_ptr -> setHotLines(m_hot_lines);
 132   m_address_profiler_ptr -> setAllInstructions(m_all_instructions);
 133
 134   if (m_all_instructions) {
 135     m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
 136     m_inst_profiler_ptr -> setHotLines(m_hot_lines);
 137     m_inst_profiler_ptr -> setAllInstructions(m_all_instructions);
 138   }
 139 }
 140
 141 Profiler::~Profiler()
 142 {
 143   if (m_periodic_output_file_ptr != &cerr) {
 144     delete m_periodic_output_file_ptr;
 145   }
 146
 147   for (int mem_cntrl = 0;
 148        mem_cntrl < m_mc_profilers.size();
 149        mem_cntrl++) {
 150     delete m_mc_profilers[mem_cntrl];
 151   }
 152
 153   delete m_requestProfileMap_ptr;
 154 }
 155
 156 void Profiler::wakeup()
 157 {
 158   // FIXME - avoid the repeated code
 159
 160   Vector<integer_t> perProcCycleCount;
 161   perProcCycleCount.setSize(m_num_of_sequencers);
 162
 163   for(int i=0; i < m_num_of_sequencers; i++) {
 164     perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 165     // The +1 allows us to avoid division by zero
 166   }
 167
 168   integer_t total_misses = m_perProcTotalMisses.sum();
 169   integer_t simics_cycles_executed = perProcCycleCount.sum();
 170   integer_t transactions_started = m_perProcStartTransaction.sum();
 171   integer_t transactions_ended = m_perProcEndTransaction.sum();
 172
 173   (*m_periodic_output_file_ptr) << "ruby_cycles: "
 174                                 << g_eventQueue_ptr->getTime()-m_ruby_start
 175                                 << endl;
 176
 177   (*m_periodic_output_file_ptr) << "total_misses: "
 178                                 << total_misses
 179                                 << " "
 180                                 << m_perProcTotalMisses
 181                                 << endl;
 182
 183   (*m_periodic_output_file_ptr) << "simics_cycles_executed: "
 184                                 << simics_cycles_executed
 185                                 << " "
 186                                 << perProcCycleCount
 187                                 << endl;
 188
 189   (*m_periodic_output_file_ptr) << "transactions_started: "
 190                                 << transactions_started
 191                                 << " "
 192                                 << m_perProcStartTransaction
 193                                 << endl;
 194
 195   (*m_periodic_output_file_ptr) << "transactions_ended: "
 196                                 << transactions_ended
 197                                 << " "
 198                                 << m_perProcEndTransaction
 199                                 << endl;
 200
 201   (*m_periodic_output_file_ptr) << "mbytes_resident: "
 202                                 << process_memory_resident()
 203                                 << endl;
 204
 205   (*m_periodic_output_file_ptr) << "mbytes_total: "
 206                                 << process_memory_total()
 207                                 << endl;
 208
 209   if (process_memory_total() > 0) {
 210     (*m_periodic_output_file_ptr) << "resident_ratio: "
 211                           << process_memory_resident()/process_memory_total()
 212                           << endl;
 213   }
 214
 215   (*m_periodic_output_file_ptr) << "miss_latency: "
 216                                 << m_allMissLatencyHistogram
 217                                 << endl;
 218
 219   *m_periodic_output_file_ptr << endl;
 220
 221   if (m_all_instructions) {
 222     m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr);
 223   }
 224
 225   //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
 226   g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
 227 }
 228
 229 void Profiler::setPeriodicStatsFile(const string& filename)
 230 {
 231   cout << "Recording periodic statistics to file '" << filename << "' every "
 232        << m_stats_period << " Ruby cycles" << endl;
 233
 234   if (m_periodic_output_file_ptr != &cerr) {
 235     delete m_periodic_output_file_ptr;
 236   }
 237
 238   m_periodic_output_file_ptr = new ofstream(filename.c_str());
 239   g_eventQueue_ptr->scheduleEvent(this, 1);
 240 }
 241
 242 void Profiler::setPeriodicStatsInterval(integer_t period)
 243 {
 244   cout << "Recording periodic statistics every " << m_stats_period
 245        << " Ruby cycles" << endl;
 246
 247   m_stats_period = period;
 248   g_eventQueue_ptr->scheduleEvent(this, 1);
 249 }
 250
 251 void Profiler::printConfig(ostream& out) const
 252 {
 253   out << endl;
 254   out << "Profiler Configuration" << endl;
 255   out << "----------------------" << endl;
 256   out << "periodic_stats_period: " << m_stats_period << endl;
 257 }
 258
 259 void Profiler::print(ostream& out) const
 260 {
 261   out << "[Profiler]";
 262 }
 263
 264 void Profiler::printStats(ostream& out, bool short_stats)
 265 {
 266   out << endl;
 267   if (short_stats) {
 268     out << "SHORT ";
 269   }
 270   out << "Profiler Stats" << endl;
 271   out << "--------------" << endl;
 272
 273   time_t real_time_current = time(NULL);
 274   double seconds = difftime(real_time_current, m_real_time_start_time);
 275   double minutes = seconds/60.0;
 276   double hours = minutes/60.0;
 277   double days = hours/24.0;
 278   Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
 279
 280   if (!short_stats) {
 281     out << "Elapsed_time_in_seconds: " << seconds << endl;
 282     out << "Elapsed_time_in_minutes: " << minutes << endl;
 283     out << "Elapsed_time_in_hours: " << hours << endl;
 284     out << "Elapsed_time_in_days: " << days << endl;
 285     out << endl;
 286   }
 287
 288   // print the virtual runtimes as well
 289   struct tms vtime;
 290   times(&vtime);
 291   seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
 292   minutes = seconds / 60.0;
 293   hours = minutes / 60.0;
 294   days = hours / 24.0;
 295   out << "Virtual_time_in_seconds: " << seconds << endl;
 296   out << "Virtual_time_in_minutes: " << minutes << endl;
 297   out << "Virtual_time_in_hours:   " << hours << endl;
 298   out << "Virtual_time_in_days:    " << days << endl;
 299   out << endl;
 300
 301   out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
 302   out << "Ruby_start_time: " << m_ruby_start << endl;
 303   out << "Ruby_cycles: " << ruby_cycles << endl;
 304   out << endl;
 305
 306   if (!short_stats) {
 307     out << "mbytes_resident: " << process_memory_resident() << endl;
 308     out << "mbytes_total: " << process_memory_total() << endl;
 309     if (process_memory_total() > 0) {
 310       out << "resident_ratio: "
 311           << process_memory_resident()/process_memory_total() << endl;
 312     }
 313     out << endl;
 314
 315   }
 316
 317   Vector<integer_t> perProcCycleCount;
 318   Vector<double> perProcCyclesPerTrans;
 319   Vector<double> perProcMissesPerTrans;
 320
 321
 322   perProcCycleCount.setSize(m_num_of_sequencers);
 323   perProcCyclesPerTrans.setSize(m_num_of_sequencers);
 324   perProcMissesPerTrans.setSize(m_num_of_sequencers);
 325
 326   for(int i=0; i < m_num_of_sequencers; i++) {
 327     perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 328     // The +1 allows us to avoid division by zero
 329
 330     int trans = m_perProcEndTransaction[i];
 331     if (trans == 0) {
 332       perProcCyclesPerTrans[i] = 0;
 333       perProcMissesPerTrans[i] = 0;
 334     } else {
 335       perProcCyclesPerTrans[i] = ruby_cycles / double(trans);
 336       perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans);
 337     }
 338   }
 339
 340   integer_t total_misses = m_perProcTotalMisses.sum();
 341   integer_t user_misses = m_perProcUserMisses.sum();
 342   integer_t supervisor_misses = m_perProcSupervisorMisses.sum();
 343   integer_t simics_cycles_executed = perProcCycleCount.sum();
 344   integer_t transactions_started = m_perProcStartTransaction.sum();
 345   integer_t transactions_ended = m_perProcEndTransaction.sum();
 346
 347   double cycles_per_transaction = (transactions_ended != 0) ? (m_num_of_sequencers * double(ruby_cycles)) / double(transactions_ended) : 0;
 348   double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0;
 349
 350   out << "Total_misses: " << total_misses << endl;
 351   out << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl;
 352   out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl;
 353   out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl;
 354   out << endl;
 355   out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl;
 356   out << endl;
 357   out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl;
 358   out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl;
 359   out << "cycles_per_transaction: " << cycles_per_transaction  << " " << perProcCyclesPerTrans << endl;
 360   out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl;
 361
 362   out << endl;
 363
 364   out << endl;
 365
 366   for (int mem_cntrl = 0;
 367        mem_cntrl < m_mc_profilers.size();
 368        mem_cntrl++) {
 369     uint64 m_memReq = m_mc_profilers[mem_cntrl]->m_memReq;
 370     uint64 m_memRefresh = m_mc_profilers[mem_cntrl]->m_memRefresh;
 371     uint64 m_memInputQ = m_mc_profilers[mem_cntrl]->m_memInputQ;
 372     uint64 m_memBankQ = m_mc_profilers[mem_cntrl]->m_memBankQ;
 373     uint64 m_memWaitCycles = m_mc_profilers[mem_cntrl]->m_memWaitCycles;
 374     uint64 m_memRead = m_mc_profilers[mem_cntrl]->m_memRead;
 375     uint64 m_memWrite = m_mc_profilers[mem_cntrl]->m_memWrite;
 376     uint64 m_memBankBusy = m_mc_profilers[mem_cntrl]->m_memBankBusy;
 377     uint64 m_memRandBusy = m_mc_profilers[mem_cntrl]->m_memRandBusy;
 378     uint64 m_memNotOld = m_mc_profilers[mem_cntrl]->m_memNotOld;
 379     uint64 m_memArbWait = m_mc_profilers[mem_cntrl]->m_memArbWait;
 380     uint64 m_memBusBusy = m_mc_profilers[mem_cntrl]->m_memBusBusy;
 381     uint64 m_memTfawBusy = m_mc_profilers[mem_cntrl]->m_memTfawBusy;
 382     uint64 m_memReadWriteBusy = m_mc_profilers[mem_cntrl]->m_memReadWriteBusy;
 383     uint64 m_memDataBusBusy = m_mc_profilers[mem_cntrl]->m_memDataBusBusy;
 384     Vector<uint64> m_memBankCount = m_mc_profilers[mem_cntrl]->m_memBankCount;
 385
 386     if (m_memReq || m_memRefresh) {    // if there's a memory controller at all
 387       uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
 388       double stallsPerReq = total_stalls * 1.0 / m_memReq;
 389       out << "Memory control " << mem_cntrl << ":" << endl;
 390       out << "  memory_total_requests: " << m_memReq << endl;  // does not include refreshes
 391       out << "  memory_reads: " << m_memRead << endl;
 392       out << "  memory_writes: " << m_memWrite << endl;
 393       out << "  memory_refreshes: " << m_memRefresh << endl;
 394       out << "  memory_total_request_delays: " << total_stalls << endl;
 395       out << "  memory_delays_per_request: " << stallsPerReq << endl;
 396       out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
 397       out << "  memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl;
 398       out << "  memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl;
 399       // Note:  The following "memory stalls" entries are a breakdown of the
 400       // cycles which already showed up in m_memWaitCycles.  The order is
 401       // significant; it is the priority of attributing the cycles.
 402       // For example, bank_busy is before arbitration because if the bank was
 403       // busy, we didn't even check arbitration.
 404       // Note:  "not old enough" means that since we grouped waiting heads-of-queues
 405       // into batches to avoid starvation, a request in a newer batch
 406       // didn't try to arbitrate yet because there are older requests waiting.
 407       out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
 408       out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
 409       out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
 410       out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
 411       out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
 412       out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
 413       out << "  memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl;
 414       out << "  memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl;
 415       out << "  accesses_per_bank: ";
 416       for (int bank=0; bank < m_memBankCount.size(); bank++) {
 417         out << m_memBankCount[bank] << "  ";
 418         //if ((bank % 8) == 7) out << "                     " << endl;
 419       }
 420       out << endl;
 421       out << endl;
 422     }
 423   }
 424   if (!short_stats) {
 425     out << "Busy Controller Counts:" << endl;
 426     for(int i=0; i < MachineType_NUM; i++) {
 427       for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
 428         MachineID machID;
 429         machID.type = (MachineType)i;
 430         machID.num = j;
 431         out << machID << ":" << m_busyControllerCount[i][j] << "  ";
 432         if ((j+1)%8 == 0) {
 433           out << endl;
 434         }
 435       }
 436       out << endl;
 437     }
 438     out << endl;
 439
 440     out << "Busy Bank Count:" << m_busyBankCount << endl;
 441     out << endl;
 442
 443     out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl;
 444     out << endl;
 445   }
 446
 447   if (!short_stats) {
 448     out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
 449     out << "----------------------------------------" << endl;
 450     out << "miss_latency: " << m_allMissLatencyHistogram << endl;
 451     for(int i=0; i<m_missLatencyHistograms.size(); i++) {
 452       if (m_missLatencyHistograms[i].size() > 0) {
 453         out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl;
 454       }
 455     }
 456     for(int i=0; i<m_machLatencyHistograms.size(); i++) {
 457       if (m_machLatencyHistograms[i].size() > 0) {
 458         out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl;
 459       }
 460     }
 461
 462     out << endl;
 463
 464     out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
 465     out << "------------------------------------" << endl;
 466     out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
 467     for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
 468       if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
 469         out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl;
 470       }
 471     }
 472     for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
 473       if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
 474         out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl;
 475       }
 476     }
 477     out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl;
 478
 479     if (m_all_sharing_histogram.size() > 0) {
 480       out << "all_sharing: " << m_all_sharing_histogram << endl;
 481       out << "read_sharing: " << m_read_sharing_histogram << endl;
 482       out << "write_sharing: " << m_write_sharing_histogram << endl;
 483
 484       out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl;
 485       out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl;
 486       out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl;
 487
 488       int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
 489       out << "all_misses: " << total_miss << endl;
 490       out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
 491       out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
 492       out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl;
 493       out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl;
 494       out << endl;
 495     }
 496
 497     if (m_outstanding_requests.size() > 0) {
 498       out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl;
 499       out << endl;
 500     }
 501   }
 502
 503   if (!short_stats) {
 504     out << "Request vs. RubySystem State Profile" << endl;
 505     out << "--------------------------------" << endl;
 506     out << endl;
 507
 508     Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
 509     requestProfileKeys.sortVector();
 510
 511     for(int i=0; i<requestProfileKeys.size(); i++) {
 512       int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
 513       double percent = (100.0*double(temp_int))/double(m_requests);
 514       while (requestProfileKeys[i] != "") {
 515         out << setw(10) << string_split(requestProfileKeys[i], ':');
 516       }
 517       out << setw(11) << temp_int;
 518       out << setw(14) << percent << endl;
 519     }
 520     out << endl;
 521
 522     out << "filter_action: " << m_filter_action_histogram << endl;
 523
 524     if (!m_all_instructions) {
 525       m_address_profiler_ptr->printStats(out);
 526     }
 527
 528     if (m_all_instructions) {
 529       m_inst_profiler_ptr->printStats(out);
 530     }
 531
 532     out << endl;
 533     out << "Message Delayed Cycles" << endl;
 534     out << "----------------------" << endl;
 535     out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
 536     out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl;
 537     for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
 538       out << "  virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl;
 539     }
 540
 541     printResourceUsage(out);
 542   }
 543
 544 }
 545
 546 void Profiler::printResourceUsage(ostream& out) const
 547 {
 548   out << endl;
 549   out << "Resource Usage" << endl;
 550   out << "--------------" << endl;
 551
 552   integer_t pagesize = getpagesize(); // page size in bytes
 553   out << "page_size: " << pagesize << endl;
 554
 555   rusage usage;
 556   getrusage (RUSAGE_SELF, &usage);
 557
 558   out << "user_time: " << usage.ru_utime.tv_sec << endl;
 559   out << "system_time: " << usage.ru_stime.tv_sec << endl;
 560   out << "page_reclaims: " << usage.ru_minflt << endl;
 561   out << "page_faults: " << usage.ru_majflt << endl;
 562   out << "swaps: " << usage.ru_nswap << endl;
 563   out << "block_inputs: " << usage.ru_inblock << endl;
 564   out << "block_outputs: " << usage.ru_oublock << endl;
 565 }
 566
 567 void Profiler::clearStats()
 568 {
 569   m_ruby_start = g_eventQueue_ptr->getTime();
 570
 571   m_cycles_executed_at_start.setSize(m_num_of_sequencers);
 572   for (int i=0; i < m_num_of_sequencers; i++) {
 573     if (g_system_ptr == NULL) {
 574       m_cycles_executed_at_start[i] = 0;
 575     } else {
 576       m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
 577     }
 578   }
 579
 580   m_perProcTotalMisses.setSize(m_num_of_sequencers);
 581   m_perProcUserMisses.setSize(m_num_of_sequencers);
 582   m_perProcSupervisorMisses.setSize(m_num_of_sequencers);
 583   m_perProcStartTransaction.setSize(m_num_of_sequencers);
 584   m_perProcEndTransaction.setSize(m_num_of_sequencers);
 585
 586   for(int i=0; i < m_num_of_sequencers; i++) {
 587     m_perProcTotalMisses[i] = 0;
 588     m_perProcUserMisses[i] = 0;
 589     m_perProcSupervisorMisses[i] = 0;
 590     m_perProcStartTransaction[i] = 0;
 591     m_perProcEndTransaction[i] = 0;
 592   }
 593
 594   m_busyControllerCount.setSize(MachineType_NUM); // all machines
 595   for(int i=0; i < MachineType_NUM; i++) {
 596     m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i));
 597     for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
 598       m_busyControllerCount[i][j] = 0;
 599     }
 600   }
 601   m_busyBankCount = 0;
 602
 603   m_delayedCyclesHistogram.clear();
 604   m_delayedCyclesNonPFHistogram.clear();
 605   m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
 606   for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) {
 607     m_delayedCyclesVCHistograms[i].clear();
 608   }
 609
 610   m_missLatencyHistograms.setSize(RubyRequestType_NUM);
 611   for(int i=0; i<m_missLatencyHistograms.size(); i++) {
 612     m_missLatencyHistograms[i].clear(200);
 613   }
 614   m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
 615   for(int i=0; i<m_machLatencyHistograms.size(); i++) {
 616     m_machLatencyHistograms[i].clear(200);
 617   }
 618   m_allMissLatencyHistogram.clear(200);
 619
 620   m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
 621   for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
 622     m_SWPrefetchLatencyHistograms[i].clear(200);
 623   }
 624   m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
 625   for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
 626     m_SWPrefetchMachLatencyHistograms[i].clear(200);
 627   }
 628   m_allSWPrefetchLatencyHistogram.clear(200);
 629
 630   m_sequencer_requests.clear();
 631   m_read_sharing_histogram.clear();
 632   m_write_sharing_histogram.clear();
 633   m_all_sharing_histogram.clear();
 634   m_cache_to_cache = 0;
 635   m_memory_to_cache = 0;
 636
 637   // clear HashMaps
 638   m_requestProfileMap_ptr->clear();
 639
 640   // count requests profiled
 641   m_requests = 0;
 642
 643   m_outstanding_requests.clear();
 644   m_outstanding_persistent_requests.clear();
 645
 646 //added by SS
 647   vector<string>::iterator it;
 648
 649   for (int mem_cntrl = 0;
 650        mem_cntrl < m_mc_profilers.size();
 651        mem_cntrl++) {
 652     m_mc_profilers[mem_cntrl]->m_memReq = 0;
 653     m_mc_profilers[mem_cntrl]->m_memBankBusy = 0;
 654     m_mc_profilers[mem_cntrl]->m_memBusBusy = 0;
 655     m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0;
 656     m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0;
 657     m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0;
 658     m_mc_profilers[mem_cntrl]->m_memRefresh = 0;
 659     m_mc_profilers[mem_cntrl]->m_memRead = 0;
 660     m_mc_profilers[mem_cntrl]->m_memWrite = 0;
 661     m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0;
 662     m_mc_profilers[mem_cntrl]->m_memInputQ = 0;
 663     m_mc_profilers[mem_cntrl]->m_memBankQ = 0;
 664     m_mc_profilers[mem_cntrl]->m_memArbWait = 0;
 665     m_mc_profilers[mem_cntrl]->m_memRandBusy = 0;
 666     m_mc_profilers[mem_cntrl]->m_memNotOld = 0;
 667
 668     for (int bank=0;
 669          bank < m_mc_profilers[mem_cntrl]->m_memBankCount.size();
 670          bank++) {
 671         m_mc_profilers[mem_cntrl]->m_memBankCount[bank] = 0;
 672     }
 673   }
 674   // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
 675   //g_eventQueue_ptr->triggerAllEvents();
 676
 677   // update the start time
 678   m_ruby_start = g_eventQueue_ptr->getTime();
 679 }
 680
 681 void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
 682 {
 683   if (msg.getType() != CacheRequestType_IFETCH) {
 684
 685     // Note: The following line should be commented out if you want to
 686     // use the special profiling that is part of the GS320 protocol
 687
 688     // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
 689     m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false);
 690   }
 691 }
 692
 693 void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner)
 694 {
 695   Set set_contacted(owner);
 696   if (type == AccessType_Write) {
 697     set_contacted.addSet(sharers);
 698   }
 699   set_contacted.remove(requestor);
 700   int number_contacted = set_contacted.count();
 701
 702   if (type == AccessType_Write) {
 703     m_write_sharing_histogram.add(number_contacted);
 704   } else {
 705     m_read_sharing_histogram.add(number_contacted);
 706   }
 707   m_all_sharing_histogram.add(number_contacted);
 708
 709   if (number_contacted == 0) {
 710     m_memory_to_cache++;
 711   } else {
 712     m_cache_to_cache++;
 713   }
 714
 715 }
 716
 717 void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) {
 718   assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
 719   m_delayedCyclesHistogram.add(delayCycles);
 720   m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
 721   if (virtualNetwork != 0) {
 722     m_delayedCyclesNonPFHistogram.add(delayCycles);
 723   }
 724 }
 725
 726 // profiles original cache requests including PUTs
 727 void Profiler::profileRequest(const string& requestStr)
 728 {
 729   m_requests++;
 730
 731   if (m_requestProfileMap_ptr->exist(requestStr)) {
 732     (m_requestProfileMap_ptr->lookup(requestStr))++;
 733   } else {
 734     m_requestProfileMap_ptr->add(requestStr, 1);
 735   }
 736 }
 737
 738 void Profiler::startTransaction(int cpu)
 739 {
 740   m_perProcStartTransaction[cpu]++;
 741 }
 742
 743 void Profiler::endTransaction(int cpu)
 744 {
 745   m_perProcEndTransaction[cpu]++;
 746 }
 747
 748 void Profiler::controllerBusy(MachineID machID)
 749 {
 750   m_busyControllerCount[(int)machID.type][(int)machID.num]++;
 751 }
 752
 753 void Profiler::profilePFWait(Time waitTime)
 754 {
 755   m_prefetchWaitHistogram.add(waitTime);
 756 }
 757
 758 void Profiler::bankBusy()
 759 {
 760   m_busyBankCount++;
 761 }
 762
 763 // non-zero cycle demand request
 764 void Profiler::missLatency(Time t, RubyRequestType type)
 765 {
 766   m_allMissLatencyHistogram.add(t);
 767   m_missLatencyHistograms[type].add(t);
 768 }
 769
 770 // non-zero cycle prefetch request
 771 void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach)
 772 {
 773   m_allSWPrefetchLatencyHistogram.add(t);
 774   m_SWPrefetchLatencyHistograms[type].add(t);
 775   m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
 776   if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) {
 777     m_SWPrefetchL2MissLatencyHistogram.add(t);
 778   }
 779 }
 780
 781 void Profiler::profileTransition(const string& component, NodeID version, Address addr,
 782                                  const string& state, const string& event,
 783                                  const string& next_state, const string& note)
 784 {
 785   const int EVENT_SPACES = 20;
 786   const int ID_SPACES = 3;
 787   const int TIME_SPACES = 7;
 788   const int COMP_SPACES = 10;
 789   const int STATE_SPACES = 6;
 790
 791   if ((g_debug_ptr->getDebugTime() > 0) &&
 792       (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) {
 793     (* debug_cout_ptr).flags(ios::right);
 794     (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
 795     (* debug_cout_ptr) << setw(ID_SPACES) << version << " ";
 796     (* debug_cout_ptr) << setw(COMP_SPACES) << component;
 797     (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " ";
 798
 799     (* debug_cout_ptr).flags(ios::right);
 800     (* debug_cout_ptr) << setw(STATE_SPACES) << state;
 801     (* debug_cout_ptr) << ">";
 802     (* debug_cout_ptr).flags(ios::left);
 803     (* debug_cout_ptr) << setw(STATE_SPACES) << next_state;
 804
 805     (* debug_cout_ptr) << " " << addr << " " << note;
 806
 807     (* debug_cout_ptr) << endl;
 808   }
 809 }
 810
 811 // Helper function
 812 static double process_memory_total()
 813 {
 814   const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
 815   ifstream proc_file;
 816   proc_file.open("/proc/self/statm");
 817   int total_size_in_pages = 0;
 818   int res_size_in_pages = 0;
 819   proc_file >> total_size_in_pages;
 820   proc_file >> res_size_in_pages;
 821   return double(total_size_in_pages)*MULTIPLIER; // size in megabytes
 822 }
 823
 824 static double process_memory_resident()
 825 {
 826   const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
 827   ifstream proc_file;
 828   proc_file.open("/proc/self/statm");
 829   int total_size_in_pages = 0;
 830   int res_size_in_pages = 0;
 831   proc_file >> total_size_in_pages;
 832   proc_file >> res_size_in_pages;
 833   return double(res_size_in_pages)*MULTIPLIER; // size in megabytes
 834 }
 835
 836 void Profiler::rubyWatch(int id){
 837     //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1");
 838   uint64 tr = 0;//SIMICS_read_register(id, rn_g1);
 839     Address watch_address = Address(tr);
 840     const int ID_SPACES = 3;
 841     const int TIME_SPACES = 7;
 842
 843     (* debug_cout_ptr).flags(ios::right);
 844     (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
 845     (* debug_cout_ptr) << setw(ID_SPACES) << id << " "
 846                        << "RUBY WATCH "
 847                        << watch_address
 848                        << endl;
 849
 850     if(!m_watch_address_list_ptr->exist(watch_address)){
 851       m_watch_address_list_ptr->add(watch_address, 1);
 852     }
 853 }
 854
 855 bool Profiler::watchAddress(Address addr){
 856     if (m_watch_address_list_ptr->exist(addr))
 857       return true;
 858     else
 859       return false;
 860 }
 861
 862 int64 Profiler::getTotalTransactionsExecuted() const {
 863   return m_perProcEndTransaction.sum();
 864 }
 865
 866 // For MemoryControl:
 867 void Profiler::profileMemReq(int mem_cntrl, int bank) {
 868   m_mc_profilers[mem_cntrl]->m_memReq++;
 869   m_mc_profilers[mem_cntrl]->m_memBankCount[bank]++;
 870 }
 871
 872 void Profiler::profileMemBankBusy(int mem_cntrl) {
 873   m_mc_profilers[mem_cntrl]->m_memBankBusy++;
 874 }
 875
 876 void Profiler::profileMemBusBusy(int mem_cntrl) {
 877   m_mc_profilers[mem_cntrl]->m_memBusBusy++;
 878 }
 879
 880 void Profiler::profileMemReadWriteBusy(int mem_cntrl) {
 881   m_mc_profilers[mem_cntrl]->m_memReadWriteBusy++;
 882 }
 883
 884 void Profiler::profileMemDataBusBusy(int mem_cntrl) {
 885   m_mc_profilers[mem_cntrl]->m_memDataBusBusy++;
 886 }
 887
 888 void Profiler::profileMemTfawBusy(int mem_cntrl) {
 889   m_mc_profilers[mem_cntrl]->m_memTfawBusy++;
 890 }
 891
 892 void Profiler::profileMemRefresh(int mem_cntrl) {
 893   m_mc_profilers[mem_cntrl]->m_memRefresh++;
 894 }
 895
 896 void Profiler::profileMemRead(int mem_cntrl) {
 897   m_mc_profilers[mem_cntrl]->m_memRead++;
 898 }
 899
 900 void Profiler::profileMemWrite(int mem_cntrl) {
 901   m_mc_profilers[mem_cntrl]->m_memWrite++;
 902 }
 903
 904 void Profiler::profileMemWaitCycles(int mem_cntrl, int cycles) {
 905   m_mc_profilers[mem_cntrl]->m_memWaitCycles += cycles;
 906 }
 907
 908 void Profiler::profileMemInputQ(int mem_cntrl, int cycles) {
 909   m_mc_profilers[mem_cntrl]->m_memInputQ += cycles;
 910 }
 911
 912 void Profiler::profileMemBankQ(int mem_cntrl, int cycles) {
 913   m_mc_profilers[mem_cntrl]->m_memBankQ += cycles;
 914 }
 915
 916 void Profiler::profileMemArbWait(int mem_cntrl, int cycles) {
 917   m_mc_profilers[mem_cntrl]->m_memArbWait += cycles;
 918 }
 919
 920 void Profiler::profileMemRandBusy(int mem_cntrl) {
 921   m_mc_profilers[mem_cntrl]->m_memRandBusy++;
 922 }
 923
 924 void Profiler::profileMemNotOld(int mem_cntrl) {
 925   m_mc_profilers[mem_cntrl]->m_memNotOld++;
 926 }
 927
 928
 929 Profiler *
 930 RubyProfilerParams::create()
 931 {
 932     return new Profiler(this);
 933 }