src/mem/ruby/profiler/Profiler.cc

   1 /*
   2  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /*
  30    This file has been modified by Kevin Moore and Dan Nussbaum of the
  31    Scalable Systems Research Group at Sun Microsystems Laboratories
  32    (http://research.sun.com/scalable/) to support the Adaptive
  33    Transactional Memory Test Platform (ATMTP).
  34
  35    Please send email to atmtp-interest@sun.com with feedback, questions, or
  36    to request future announcements about ATMTP.
  37
  38    ----------------------------------------------------------------------
  39
  40    File modification date: 2008-02-23
  41
  42    ----------------------------------------------------------------------
  43 */
  44
  45 /*
  46  * Profiler.cc
  47  *
  48  * Description: See Profiler.hh
  49  *
  50  * $Id$
  51  *
  52  */
  53
  54 #include "mem/ruby/profiler/Profiler.hh"
  55 #include "mem/ruby/profiler/AddressProfiler.hh"
  56 #include "mem/ruby/system/System.hh"
  57 #include "mem/ruby/network/Network.hh"
  58 #include "mem/gems_common/PrioHeap.hh"
  59 #include "mem/protocol/CacheMsg.hh"
  60 #include "mem/protocol/Protocol.hh"
  61 #include "mem/gems_common/util.hh"
  62 #include "mem/gems_common/Map.hh"
  63 #include "mem/ruby/common/Debug.hh"
  64 #include "mem/protocol/MachineType.hh"
  65
  66 #include "mem/ruby/system/System.hh"
  67
  68 // Allows use of times() library call, which determines virtual runtime
  69 #include <sys/times.h>
  70
  71 extern std::ostream * debug_cout_ptr;
  72
  73 static double process_memory_total();
  74 static double process_memory_resident();
  75
  76 Profiler::Profiler(const Params *p)
  77     : SimObject(p)
  78 {
  79   m_requestProfileMap_ptr = new Map<string, int>;
  80
  81   m_inst_profiler_ptr = NULL;
  82   m_address_profiler_ptr = NULL;
  83
  84   m_real_time_start_time = time(NULL); // Not reset in clearStats()
  85   m_stats_period = 1000000; // Default
  86   m_periodic_output_file_ptr = &cerr;
  87
  88   m_hot_lines = p->hot_lines;
  89   m_all_instructions = p->all_instructions;
  90
  91   //
  92   // Initialize the memory controller profiler structs
  93   //
  94   m_mc_profilers.setSize(p->mem_cntrl_count);
  95   for (int mem_cntrl = 0; mem_cntrl < p->mem_cntrl_count; mem_cntrl++) {
  96     m_mc_profilers[mem_cntrl] = new memory_control_profiler;
  97     m_mc_profilers[mem_cntrl]->m_memReq = 0;
  98     m_mc_profilers[mem_cntrl]->m_memBankBusy = 0;
  99     m_mc_profilers[mem_cntrl]->m_memBusBusy = 0;
 100     m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0;
 101     m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0;
 102     m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0;
 103     m_mc_profilers[mem_cntrl]->m_memRefresh = 0;
 104     m_mc_profilers[mem_cntrl]->m_memRead = 0;
 105     m_mc_profilers[mem_cntrl]->m_memWrite = 0;
 106     m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0;
 107     m_mc_profilers[mem_cntrl]->m_memInputQ = 0;
 108     m_mc_profilers[mem_cntrl]->m_memBankQ = 0;
 109     m_mc_profilers[mem_cntrl]->m_memArbWait = 0;
 110     m_mc_profilers[mem_cntrl]->m_memRandBusy = 0;
 111     m_mc_profilers[mem_cntrl]->m_memNotOld = 0;
 112
 113     m_mc_profilers[mem_cntrl]->m_banks_per_rank = p->banks_per_rank;
 114     m_mc_profilers[mem_cntrl]->m_ranks_per_dimm = p->ranks_per_dimm;
 115     m_mc_profilers[mem_cntrl]->m_dimms_per_channel =
 116       p->dimms_per_channel;
 117
 118     int totalBanks = p->banks_per_rank *
 119                      p->ranks_per_dimm *
 120                      p->dimms_per_channel;
 121
 122     m_mc_profilers[mem_cntrl]->m_memBankCount.setSize(totalBanks);
 123   }
 124
 125   m_hot_lines = false;
 126   m_all_instructions = false;
 127
 128   m_address_profiler_ptr = new AddressProfiler;
 129   m_address_profiler_ptr -> setHotLines(m_hot_lines);
 130   m_address_profiler_ptr -> setAllInstructions(m_all_instructions);
 131
 132   if (m_all_instructions) {
 133     m_inst_profiler_ptr = new AddressProfiler;
 134     m_inst_profiler_ptr -> setHotLines(m_hot_lines);
 135     m_inst_profiler_ptr -> setAllInstructions(m_all_instructions);
 136   }
 137 }
 138
 139 Profiler::~Profiler()
 140 {
 141   if (m_periodic_output_file_ptr != &cerr) {
 142     delete m_periodic_output_file_ptr;
 143   }
 144
 145   for (int mem_cntrl = 0;
 146        mem_cntrl < m_mc_profilers.size();
 147        mem_cntrl++) {
 148     delete m_mc_profilers[mem_cntrl];
 149   }
 150
 151   delete m_requestProfileMap_ptr;
 152 }
 153
 154 void Profiler::wakeup()
 155 {
 156   // FIXME - avoid the repeated code
 157
 158   Vector<integer_t> perProcCycleCount;
 159   perProcCycleCount.setSize(RubySystem::getNumberOfSequencers());
 160
 161   for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
 162     perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 163     // The +1 allows us to avoid division by zero
 164   }
 165
 166   integer_t total_misses = m_perProcTotalMisses.sum();
 167   integer_t simics_cycles_executed = perProcCycleCount.sum();
 168   integer_t transactions_started = m_perProcStartTransaction.sum();
 169   integer_t transactions_ended = m_perProcEndTransaction.sum();
 170
 171   (*m_periodic_output_file_ptr) << "ruby_cycles: "
 172                                 << g_eventQueue_ptr->getTime()-m_ruby_start
 173                                 << endl;
 174
 175   (*m_periodic_output_file_ptr) << "total_misses: "
 176                                 << total_misses
 177                                 << " "
 178                                 << m_perProcTotalMisses
 179                                 << endl;
 180
 181   (*m_periodic_output_file_ptr) << "simics_cycles_executed: "
 182                                 << simics_cycles_executed
 183                                 << " "
 184                                 << perProcCycleCount
 185                                 << endl;
 186
 187   (*m_periodic_output_file_ptr) << "transactions_started: "
 188                                 << transactions_started
 189                                 << " "
 190                                 << m_perProcStartTransaction
 191                                 << endl;
 192
 193   (*m_periodic_output_file_ptr) << "transactions_ended: "
 194                                 << transactions_ended
 195                                 << " "
 196                                 << m_perProcEndTransaction
 197                                 << endl;
 198
 199   (*m_periodic_output_file_ptr) << "mbytes_resident: "
 200                                 << process_memory_resident()
 201                                 << endl;
 202
 203   (*m_periodic_output_file_ptr) << "mbytes_total: "
 204                                 << process_memory_total()
 205                                 << endl;
 206
 207   if (process_memory_total() > 0) {
 208     (*m_periodic_output_file_ptr) << "resident_ratio: "
 209                           << process_memory_resident()/process_memory_total()
 210                           << endl;
 211   }
 212
 213   (*m_periodic_output_file_ptr) << "miss_latency: "
 214                                 << m_allMissLatencyHistogram
 215                                 << endl;
 216
 217   *m_periodic_output_file_ptr << endl;
 218
 219   if (m_all_instructions) {
 220     m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr);
 221   }
 222
 223   //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
 224   g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
 225 }
 226
 227 void Profiler::setPeriodicStatsFile(const string& filename)
 228 {
 229   cout << "Recording periodic statistics to file '" << filename << "' every "
 230        << m_stats_period << " Ruby cycles" << endl;
 231
 232   if (m_periodic_output_file_ptr != &cerr) {
 233     delete m_periodic_output_file_ptr;
 234   }
 235
 236   m_periodic_output_file_ptr = new ofstream(filename.c_str());
 237   g_eventQueue_ptr->scheduleEvent(this, 1);
 238 }
 239
 240 void Profiler::setPeriodicStatsInterval(integer_t period)
 241 {
 242   cout << "Recording periodic statistics every " << m_stats_period
 243        << " Ruby cycles" << endl;
 244
 245   m_stats_period = period;
 246   g_eventQueue_ptr->scheduleEvent(this, 1);
 247 }
 248
 249 void Profiler::printConfig(ostream& out) const
 250 {
 251   out << endl;
 252   out << "Profiler Configuration" << endl;
 253   out << "----------------------" << endl;
 254   out << "periodic_stats_period: " << m_stats_period << endl;
 255 }
 256
 257 void Profiler::print(ostream& out) const
 258 {
 259   out << "[Profiler]";
 260 }
 261
 262 void Profiler::printStats(ostream& out, bool short_stats)
 263 {
 264   out << endl;
 265   if (short_stats) {
 266     out << "SHORT ";
 267   }
 268   out << "Profiler Stats" << endl;
 269   out << "--------------" << endl;
 270
 271   time_t real_time_current = time(NULL);
 272   double seconds = difftime(real_time_current, m_real_time_start_time);
 273   double minutes = seconds/60.0;
 274   double hours = minutes/60.0;
 275   double days = hours/24.0;
 276   Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
 277
 278   if (!short_stats) {
 279     out << "Elapsed_time_in_seconds: " << seconds << endl;
 280     out << "Elapsed_time_in_minutes: " << minutes << endl;
 281     out << "Elapsed_time_in_hours: " << hours << endl;
 282     out << "Elapsed_time_in_days: " << days << endl;
 283     out << endl;
 284   }
 285
 286   // print the virtual runtimes as well
 287   struct tms vtime;
 288   times(&vtime);
 289   seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
 290   minutes = seconds / 60.0;
 291   hours = minutes / 60.0;
 292   days = hours / 24.0;
 293   out << "Virtual_time_in_seconds: " << seconds << endl;
 294   out << "Virtual_time_in_minutes: " << minutes << endl;
 295   out << "Virtual_time_in_hours:   " << hours << endl;
 296   out << "Virtual_time_in_days:    " << days << endl;
 297   out << endl;
 298
 299   out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
 300   out << "Ruby_start_time: " << m_ruby_start << endl;
 301   out << "Ruby_cycles: " << ruby_cycles << endl;
 302   out << endl;
 303
 304   if (!short_stats) {
 305     out << "mbytes_resident: " << process_memory_resident() << endl;
 306     out << "mbytes_total: " << process_memory_total() << endl;
 307     if (process_memory_total() > 0) {
 308       out << "resident_ratio: "
 309           << process_memory_resident()/process_memory_total() << endl;
 310     }
 311     out << endl;
 312
 313   }
 314
 315   Vector<integer_t> perProcCycleCount;
 316   Vector<double> perProcCyclesPerTrans;
 317   Vector<double> perProcMissesPerTrans;
 318
 319
 320   perProcCycleCount.setSize(RubySystem::getNumberOfSequencers());
 321   perProcCyclesPerTrans.setSize(RubySystem::getNumberOfSequencers());
 322   perProcMissesPerTrans.setSize(RubySystem::getNumberOfSequencers());
 323
 324   for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
 325     perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 326     // The +1 allows us to avoid division by zero
 327
 328     int trans = m_perProcEndTransaction[i];
 329     if (trans == 0) {
 330       perProcCyclesPerTrans[i] = 0;
 331       perProcMissesPerTrans[i] = 0;
 332     } else {
 333       perProcCyclesPerTrans[i] = ruby_cycles / double(trans);
 334       perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans);
 335     }
 336   }
 337
 338   integer_t total_misses = m_perProcTotalMisses.sum();
 339   integer_t user_misses = m_perProcUserMisses.sum();
 340   integer_t supervisor_misses = m_perProcSupervisorMisses.sum();
 341   integer_t simics_cycles_executed = perProcCycleCount.sum();
 342   integer_t transactions_started = m_perProcStartTransaction.sum();
 343   integer_t transactions_ended = m_perProcEndTransaction.sum();
 344
 345   double cycles_per_transaction = (transactions_ended != 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles)) / double(transactions_ended) : 0;
 346   double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0;
 347
 348   out << "Total_misses: " << total_misses << endl;
 349   out << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl;
 350   out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl;
 351   out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl;
 352   out << endl;
 353   out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl;
 354   out << endl;
 355   out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl;
 356   out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl;
 357   out << "cycles_per_transaction: " << cycles_per_transaction  << " " << perProcCyclesPerTrans << endl;
 358   out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl;
 359
 360   out << endl;
 361
 362   out << endl;
 363
 364   for (int mem_cntrl = 0;
 365        mem_cntrl < m_mc_profilers.size();
 366        mem_cntrl++) {
 367     uint64 m_memReq = m_mc_profilers[mem_cntrl]->m_memReq;
 368     uint64 m_memRefresh = m_mc_profilers[mem_cntrl]->m_memRefresh;
 369     uint64 m_memInputQ = m_mc_profilers[mem_cntrl]->m_memInputQ;
 370     uint64 m_memBankQ = m_mc_profilers[mem_cntrl]->m_memBankQ;
 371     uint64 m_memWaitCycles = m_mc_profilers[mem_cntrl]->m_memWaitCycles;
 372     uint64 m_memRead = m_mc_profilers[mem_cntrl]->m_memRead;
 373     uint64 m_memWrite = m_mc_profilers[mem_cntrl]->m_memWrite;
 374     uint64 m_memBankBusy = m_mc_profilers[mem_cntrl]->m_memBankBusy;
 375     uint64 m_memRandBusy = m_mc_profilers[mem_cntrl]->m_memRandBusy;
 376     uint64 m_memNotOld = m_mc_profilers[mem_cntrl]->m_memNotOld;
 377     uint64 m_memArbWait = m_mc_profilers[mem_cntrl]->m_memArbWait;
 378     uint64 m_memBusBusy = m_mc_profilers[mem_cntrl]->m_memBusBusy;
 379     uint64 m_memTfawBusy = m_mc_profilers[mem_cntrl]->m_memTfawBusy;
 380     uint64 m_memReadWriteBusy = m_mc_profilers[mem_cntrl]->m_memReadWriteBusy;
 381     uint64 m_memDataBusBusy = m_mc_profilers[mem_cntrl]->m_memDataBusBusy;
 382     Vector<uint64> m_memBankCount = m_mc_profilers[mem_cntrl]->m_memBankCount;
 383
 384     if (m_memReq || m_memRefresh) {    // if there's a memory controller at all
 385       uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
 386       double stallsPerReq = total_stalls * 1.0 / m_memReq;
 387       out << "Memory control " << mem_cntrl << ":" << endl;
 388       out << "  memory_total_requests: " << m_memReq << endl;  // does not include refreshes
 389       out << "  memory_reads: " << m_memRead << endl;
 390       out << "  memory_writes: " << m_memWrite << endl;
 391       out << "  memory_refreshes: " << m_memRefresh << endl;
 392       out << "  memory_total_request_delays: " << total_stalls << endl;
 393       out << "  memory_delays_per_request: " << stallsPerReq << endl;
 394       out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
 395       out << "  memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl;
 396       out << "  memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl;
 397       // Note:  The following "memory stalls" entries are a breakdown of the
 398       // cycles which already showed up in m_memWaitCycles.  The order is
 399       // significant; it is the priority of attributing the cycles.
 400       // For example, bank_busy is before arbitration because if the bank was
 401       // busy, we didn't even check arbitration.
 402       // Note:  "not old enough" means that since we grouped waiting heads-of-queues
 403       // into batches to avoid starvation, a request in a newer batch
 404       // didn't try to arbitrate yet because there are older requests waiting.
 405       out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
 406       out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
 407       out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
 408       out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
 409       out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
 410       out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
 411       out << "  memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl;
 412       out << "  memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl;
 413       out << "  accesses_per_bank: ";
 414       for (int bank=0; bank < m_memBankCount.size(); bank++) {
 415         out << m_memBankCount[bank] << "  ";
 416         //if ((bank % 8) == 7) out << "                     " << endl;
 417       }
 418       out << endl;
 419       out << endl;
 420     }
 421   }
 422   if (!short_stats) {
 423     out << "Busy Controller Counts:" << endl;
 424     for(int i=0; i < MachineType_NUM; i++) {
 425       for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
 426         MachineID machID;
 427         machID.type = (MachineType)i;
 428         machID.num = j;
 429         out << machID << ":" << m_busyControllerCount[i][j] << "  ";
 430         if ((j+1)%8 == 0) {
 431           out << endl;
 432         }
 433       }
 434       out << endl;
 435     }
 436     out << endl;
 437
 438     out << "Busy Bank Count:" << m_busyBankCount << endl;
 439     out << endl;
 440
 441     out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl;
 442     out << endl;
 443   }
 444
 445   if (!short_stats) {
 446     out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
 447     out << "----------------------------------------" << endl;
 448     out << "miss_latency: " << m_allMissLatencyHistogram << endl;
 449     for(int i=0; i<m_missLatencyHistograms.size(); i++) {
 450       if (m_missLatencyHistograms[i].size() > 0) {
 451         out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl;
 452       }
 453     }
 454     for(int i=0; i<m_machLatencyHistograms.size(); i++) {
 455       if (m_machLatencyHistograms[i].size() > 0) {
 456         out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl;
 457       }
 458     }
 459
 460     out << endl;
 461
 462     out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
 463     out << "------------------------------------" << endl;
 464     out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
 465     for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
 466       if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
 467         out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl;
 468       }
 469     }
 470     for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
 471       if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
 472         out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl;
 473       }
 474     }
 475     out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl;
 476
 477     if (m_all_sharing_histogram.size() > 0) {
 478       out << "all_sharing: " << m_all_sharing_histogram << endl;
 479       out << "read_sharing: " << m_read_sharing_histogram << endl;
 480       out << "write_sharing: " << m_write_sharing_histogram << endl;
 481
 482       out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl;
 483       out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl;
 484       out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl;
 485
 486       int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
 487       out << "all_misses: " << total_miss << endl;
 488       out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
 489       out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
 490       out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl;
 491       out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl;
 492       out << endl;
 493     }
 494
 495     if (m_outstanding_requests.size() > 0) {
 496       out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl;
 497       out << endl;
 498     }
 499   }
 500
 501   if (!short_stats) {
 502     out << "Request vs. RubySystem State Profile" << endl;
 503     out << "--------------------------------" << endl;
 504     out << endl;
 505
 506     Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
 507     requestProfileKeys.sortVector();
 508
 509     for(int i=0; i<requestProfileKeys.size(); i++) {
 510       int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
 511       double percent = (100.0*double(temp_int))/double(m_requests);
 512       while (requestProfileKeys[i] != "") {
 513         out << setw(10) << string_split(requestProfileKeys[i], ':');
 514       }
 515       out << setw(11) << temp_int;
 516       out << setw(14) << percent << endl;
 517     }
 518     out << endl;
 519
 520     out << "filter_action: " << m_filter_action_histogram << endl;
 521
 522     if (!m_all_instructions) {
 523       m_address_profiler_ptr->printStats(out);
 524     }
 525
 526     if (m_all_instructions) {
 527       m_inst_profiler_ptr->printStats(out);
 528     }
 529
 530     out << endl;
 531     out << "Message Delayed Cycles" << endl;
 532     out << "----------------------" << endl;
 533     out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
 534     out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl;
 535     for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
 536       out << "  virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl;
 537     }
 538
 539     printResourceUsage(out);
 540   }
 541
 542 }
 543
 544 void Profiler::printResourceUsage(ostream& out) const
 545 {
 546   out << endl;
 547   out << "Resource Usage" << endl;
 548   out << "--------------" << endl;
 549
 550   integer_t pagesize = getpagesize(); // page size in bytes
 551   out << "page_size: " << pagesize << endl;
 552
 553   rusage usage;
 554   getrusage (RUSAGE_SELF, &usage);
 555
 556   out << "user_time: " << usage.ru_utime.tv_sec << endl;
 557   out << "system_time: " << usage.ru_stime.tv_sec << endl;
 558   out << "page_reclaims: " << usage.ru_minflt << endl;
 559   out << "page_faults: " << usage.ru_majflt << endl;
 560   out << "swaps: " << usage.ru_nswap << endl;
 561   out << "block_inputs: " << usage.ru_inblock << endl;
 562   out << "block_outputs: " << usage.ru_oublock << endl;
 563 }
 564
 565 void Profiler::clearStats()
 566 {
 567   m_ruby_start = g_eventQueue_ptr->getTime();
 568
 569   m_cycles_executed_at_start.setSize(RubySystem::getNumberOfSequencers());
 570   for (int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
 571     if (g_system_ptr == NULL) {
 572       m_cycles_executed_at_start[i] = 0;
 573     } else {
 574       m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
 575     }
 576   }
 577
 578   m_perProcTotalMisses.setSize(RubySystem::getNumberOfSequencers());
 579   m_perProcUserMisses.setSize(RubySystem::getNumberOfSequencers());
 580   m_perProcSupervisorMisses.setSize(RubySystem::getNumberOfSequencers());
 581   m_perProcStartTransaction.setSize(RubySystem::getNumberOfSequencers());
 582   m_perProcEndTransaction.setSize(RubySystem::getNumberOfSequencers());
 583
 584   for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) {
 585     m_perProcTotalMisses[i] = 0;
 586     m_perProcUserMisses[i] = 0;
 587     m_perProcSupervisorMisses[i] = 0;
 588     m_perProcStartTransaction[i] = 0;
 589     m_perProcEndTransaction[i] = 0;
 590   }
 591
 592   m_busyControllerCount.setSize(MachineType_NUM); // all machines
 593   for(int i=0; i < MachineType_NUM; i++) {
 594     m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i));
 595     for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
 596       m_busyControllerCount[i][j] = 0;
 597     }
 598   }
 599   m_busyBankCount = 0;
 600
 601   m_delayedCyclesHistogram.clear();
 602   m_delayedCyclesNonPFHistogram.clear();
 603   m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
 604   for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) {
 605     m_delayedCyclesVCHistograms[i].clear();
 606   }
 607
 608   m_missLatencyHistograms.setSize(RubyRequestType_NUM);
 609   for(int i=0; i<m_missLatencyHistograms.size(); i++) {
 610     m_missLatencyHistograms[i].clear(200);
 611   }
 612   m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
 613   for(int i=0; i<m_machLatencyHistograms.size(); i++) {
 614     m_machLatencyHistograms[i].clear(200);
 615   }
 616   m_allMissLatencyHistogram.clear(200);
 617
 618   m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
 619   for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
 620     m_SWPrefetchLatencyHistograms[i].clear(200);
 621   }
 622   m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
 623   for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
 624     m_SWPrefetchMachLatencyHistograms[i].clear(200);
 625   }
 626   m_allSWPrefetchLatencyHistogram.clear(200);
 627
 628   m_sequencer_requests.clear();
 629   m_read_sharing_histogram.clear();
 630   m_write_sharing_histogram.clear();
 631   m_all_sharing_histogram.clear();
 632   m_cache_to_cache = 0;
 633   m_memory_to_cache = 0;
 634
 635   // clear HashMaps
 636   m_requestProfileMap_ptr->clear();
 637
 638   // count requests profiled
 639   m_requests = 0;
 640
 641   m_outstanding_requests.clear();
 642   m_outstanding_persistent_requests.clear();
 643
 644 //added by SS
 645   vector<string>::iterator it;
 646
 647   for (int mem_cntrl = 0;
 648        mem_cntrl < m_mc_profilers.size();
 649        mem_cntrl++) {
 650     m_mc_profilers[mem_cntrl]->m_memReq = 0;
 651     m_mc_profilers[mem_cntrl]->m_memBankBusy = 0;
 652     m_mc_profilers[mem_cntrl]->m_memBusBusy = 0;
 653     m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0;
 654     m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0;
 655     m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0;
 656     m_mc_profilers[mem_cntrl]->m_memRefresh = 0;
 657     m_mc_profilers[mem_cntrl]->m_memRead = 0;
 658     m_mc_profilers[mem_cntrl]->m_memWrite = 0;
 659     m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0;
 660     m_mc_profilers[mem_cntrl]->m_memInputQ = 0;
 661     m_mc_profilers[mem_cntrl]->m_memBankQ = 0;
 662     m_mc_profilers[mem_cntrl]->m_memArbWait = 0;
 663     m_mc_profilers[mem_cntrl]->m_memRandBusy = 0;
 664     m_mc_profilers[mem_cntrl]->m_memNotOld = 0;
 665
 666     for (int bank=0;
 667          bank < m_mc_profilers[mem_cntrl]->m_memBankCount.size();
 668          bank++) {
 669         m_mc_profilers[mem_cntrl]->m_memBankCount[bank] = 0;
 670     }
 671   }
 672   // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
 673   //g_eventQueue_ptr->triggerAllEvents();
 674
 675   // update the start time
 676   m_ruby_start = g_eventQueue_ptr->getTime();
 677 }
 678
 679 void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
 680 {
 681   if (msg.getType() != CacheRequestType_IFETCH) {
 682
 683     // Note: The following line should be commented out if you want to
 684     // use the special profiling that is part of the GS320 protocol
 685
 686     // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
 687     m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false);
 688   }
 689 }
 690
 691 void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner)
 692 {
 693   Set set_contacted(owner);
 694   if (type == AccessType_Write) {
 695     set_contacted.addSet(sharers);
 696   }
 697   set_contacted.remove(requestor);
 698   int number_contacted = set_contacted.count();
 699
 700   if (type == AccessType_Write) {
 701     m_write_sharing_histogram.add(number_contacted);
 702   } else {
 703     m_read_sharing_histogram.add(number_contacted);
 704   }
 705   m_all_sharing_histogram.add(number_contacted);
 706
 707   if (number_contacted == 0) {
 708     m_memory_to_cache++;
 709   } else {
 710     m_cache_to_cache++;
 711   }
 712
 713 }
 714
 715 void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) {
 716   assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
 717   m_delayedCyclesHistogram.add(delayCycles);
 718   m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
 719   if (virtualNetwork != 0) {
 720     m_delayedCyclesNonPFHistogram.add(delayCycles);
 721   }
 722 }
 723
 724 // profiles original cache requests including PUTs
 725 void Profiler::profileRequest(const string& requestStr)
 726 {
 727   m_requests++;
 728
 729   if (m_requestProfileMap_ptr->exist(requestStr)) {
 730     (m_requestProfileMap_ptr->lookup(requestStr))++;
 731   } else {
 732     m_requestProfileMap_ptr->add(requestStr, 1);
 733   }
 734 }
 735
 736 void Profiler::startTransaction(int cpu)
 737 {
 738   m_perProcStartTransaction[cpu]++;
 739 }
 740
 741 void Profiler::endTransaction(int cpu)
 742 {
 743   m_perProcEndTransaction[cpu]++;
 744 }
 745
 746 void Profiler::controllerBusy(MachineID machID)
 747 {
 748   m_busyControllerCount[(int)machID.type][(int)machID.num]++;
 749 }
 750
 751 void Profiler::profilePFWait(Time waitTime)
 752 {
 753   m_prefetchWaitHistogram.add(waitTime);
 754 }
 755
 756 void Profiler::bankBusy()
 757 {
 758   m_busyBankCount++;
 759 }
 760
 761 // non-zero cycle demand request
 762 void Profiler::missLatency(Time t, RubyRequestType type)
 763 {
 764   m_allMissLatencyHistogram.add(t);
 765   m_missLatencyHistograms[type].add(t);
 766 }
 767
 768 // non-zero cycle prefetch request
 769 void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach)
 770 {
 771   m_allSWPrefetchLatencyHistogram.add(t);
 772   m_SWPrefetchLatencyHistograms[type].add(t);
 773   m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
 774   if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) {
 775     m_SWPrefetchL2MissLatencyHistogram.add(t);
 776   }
 777 }
 778
 779 void Profiler::profileTransition(const string& component, NodeID version, Address addr,
 780                                  const string& state, const string& event,
 781                                  const string& next_state, const string& note)
 782 {
 783   const int EVENT_SPACES = 20;
 784   const int ID_SPACES = 3;
 785   const int TIME_SPACES = 7;
 786   const int COMP_SPACES = 10;
 787   const int STATE_SPACES = 6;
 788
 789   if ((g_debug_ptr->getDebugTime() > 0) &&
 790       (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) {
 791     (* debug_cout_ptr).flags(ios::right);
 792     (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
 793     (* debug_cout_ptr) << setw(ID_SPACES) << version << " ";
 794     (* debug_cout_ptr) << setw(COMP_SPACES) << component;
 795     (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " ";
 796
 797     (* debug_cout_ptr).flags(ios::right);
 798     (* debug_cout_ptr) << setw(STATE_SPACES) << state;
 799     (* debug_cout_ptr) << ">";
 800     (* debug_cout_ptr).flags(ios::left);
 801     (* debug_cout_ptr) << setw(STATE_SPACES) << next_state;
 802
 803     (* debug_cout_ptr) << " " << addr << " " << note;
 804
 805     (* debug_cout_ptr) << endl;
 806   }
 807 }
 808
 809 // Helper function
 810 static double process_memory_total()
 811 {
 812   const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
 813   ifstream proc_file;
 814   proc_file.open("/proc/self/statm");
 815   int total_size_in_pages = 0;
 816   int res_size_in_pages = 0;
 817   proc_file >> total_size_in_pages;
 818   proc_file >> res_size_in_pages;
 819   return double(total_size_in_pages)*MULTIPLIER; // size in megabytes
 820 }
 821
 822 static double process_memory_resident()
 823 {
 824   const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
 825   ifstream proc_file;
 826   proc_file.open("/proc/self/statm");
 827   int total_size_in_pages = 0;
 828   int res_size_in_pages = 0;
 829   proc_file >> total_size_in_pages;
 830   proc_file >> res_size_in_pages;
 831   return double(res_size_in_pages)*MULTIPLIER; // size in megabytes
 832 }
 833
 834 void Profiler::rubyWatch(int id){
 835     //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1");
 836   uint64 tr = 0;//SIMICS_read_register(id, rn_g1);
 837     Address watch_address = Address(tr);
 838     const int ID_SPACES = 3;
 839     const int TIME_SPACES = 7;
 840
 841     (* debug_cout_ptr).flags(ios::right);
 842     (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
 843     (* debug_cout_ptr) << setw(ID_SPACES) << id << " "
 844                        << "RUBY WATCH "
 845                        << watch_address
 846                        << endl;
 847
 848     if(!m_watch_address_list_ptr->exist(watch_address)){
 849       m_watch_address_list_ptr->add(watch_address, 1);
 850     }
 851 }
 852
 853 bool Profiler::watchAddress(Address addr){
 854     if (m_watch_address_list_ptr->exist(addr))
 855       return true;
 856     else
 857       return false;
 858 }
 859
 860 int64 Profiler::getTotalTransactionsExecuted() const {
 861   return m_perProcEndTransaction.sum();
 862 }
 863
 864 // For MemoryControl:
 865 void Profiler::profileMemReq(int mem_cntrl, int bank) {
 866   m_mc_profilers[mem_cntrl]->m_memReq++;
 867   m_mc_profilers[mem_cntrl]->m_memBankCount[bank]++;
 868 }
 869
 870 void Profiler::profileMemBankBusy(int mem_cntrl) {
 871   m_mc_profilers[mem_cntrl]->m_memBankBusy++;
 872 }
 873
 874 void Profiler::profileMemBusBusy(int mem_cntrl) {
 875   m_mc_profilers[mem_cntrl]->m_memBusBusy++;
 876 }
 877
 878 void Profiler::profileMemReadWriteBusy(int mem_cntrl) {
 879   m_mc_profilers[mem_cntrl]->m_memReadWriteBusy++;
 880 }
 881
 882 void Profiler::profileMemDataBusBusy(int mem_cntrl) {
 883   m_mc_profilers[mem_cntrl]->m_memDataBusBusy++;
 884 }
 885
 886 void Profiler::profileMemTfawBusy(int mem_cntrl) {
 887   m_mc_profilers[mem_cntrl]->m_memTfawBusy++;
 888 }
 889
 890 void Profiler::profileMemRefresh(int mem_cntrl) {
 891   m_mc_profilers[mem_cntrl]->m_memRefresh++;
 892 }
 893
 894 void Profiler::profileMemRead(int mem_cntrl) {
 895   m_mc_profilers[mem_cntrl]->m_memRead++;
 896 }
 897
 898 void Profiler::profileMemWrite(int mem_cntrl) {
 899   m_mc_profilers[mem_cntrl]->m_memWrite++;
 900 }
 901
 902 void Profiler::profileMemWaitCycles(int mem_cntrl, int cycles) {
 903   m_mc_profilers[mem_cntrl]->m_memWaitCycles += cycles;
 904 }
 905
 906 void Profiler::profileMemInputQ(int mem_cntrl, int cycles) {
 907   m_mc_profilers[mem_cntrl]->m_memInputQ += cycles;
 908 }
 909
 910 void Profiler::profileMemBankQ(int mem_cntrl, int cycles) {
 911   m_mc_profilers[mem_cntrl]->m_memBankQ += cycles;
 912 }
 913
 914 void Profiler::profileMemArbWait(int mem_cntrl, int cycles) {
 915   m_mc_profilers[mem_cntrl]->m_memArbWait += cycles;
 916 }
 917
 918 void Profiler::profileMemRandBusy(int mem_cntrl) {
 919   m_mc_profilers[mem_cntrl]->m_memRandBusy++;
 920 }
 921
 922 void Profiler::profileMemNotOld(int mem_cntrl) {
 923   m_mc_profilers[mem_cntrl]->m_memNotOld++;
 924 }
 925
 926
 927 Profiler *
 928 RubyProfilerParams::create()
 929 {
 930     return new Profiler(this);
 931 }