src/mem/ruby/profiler/Profiler.cc

   1 /*
   2  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /*
  30    This file has been modified by Kevin Moore and Dan Nussbaum of the
  31    Scalable Systems Research Group at Sun Microsystems Laboratories
  32    (http://research.sun.com/scalable/) to support the Adaptive
  33    Transactional Memory Test Platform (ATMTP).
  34
  35    Please send email to atmtp-interest@sun.com with feedback, questions, or
  36    to request future announcements about ATMTP.
  37
  38    ----------------------------------------------------------------------
  39
  40    File modification date: 2008-02-23
  41
  42    ----------------------------------------------------------------------
  43 */
  44
  45 // Allows use of times() library call, which determines virtual runtime
  46 #include <sys/resource.h>
  47 #include <sys/times.h>
  48
  49 #include <algorithm>
  50 #include <fstream>
  51
  52 #include "base/stl_helpers.hh"
  53 #include "base/str.hh"
  54 #include "mem/protocol/MachineType.hh"
  55 #include "mem/protocol/Protocol.hh"
  56 #include "mem/protocol/RubyRequest.hh"
  57 #include "mem/ruby/network/Network.hh"
  58 #include "mem/ruby/profiler/AddressProfiler.hh"
  59 #include "mem/ruby/profiler/Profiler.hh"
  60 #include "mem/ruby/system/System.hh"
  61
  62 using namespace std;
  63 using m5::stl_helpers::operator<<;
  64
  65 static double process_memory_total();
  66 static double process_memory_resident();
  67
  68 Profiler::Profiler(const Params *p)
  69     : SimObject(p)
  70 {
  71     m_inst_profiler_ptr = NULL;
  72     m_address_profiler_ptr = NULL;
  73
  74     m_real_time_start_time = time(NULL); // Not reset in clearStats()
  75     m_stats_period = 1000000; // Default
  76     m_periodic_output_file_ptr = &cerr;
  77
  78     m_hot_lines = p->hot_lines;
  79     m_all_instructions = p->all_instructions;
  80
  81     m_num_of_sequencers = p->num_of_sequencers;
  82
  83     m_hot_lines = false;
  84     m_all_instructions = false;
  85
  86     m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
  87     m_address_profiler_ptr->setHotLines(m_hot_lines);
  88     m_address_profiler_ptr->setAllInstructions(m_all_instructions);
  89
  90     if (m_all_instructions) {
  91         m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
  92         m_inst_profiler_ptr->setHotLines(m_hot_lines);
  93         m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
  94     }
  95
  96     p->ruby_system->registerProfiler(this);
  97 }
  98
  99 Profiler::~Profiler()
 100 {
 101     if (m_periodic_output_file_ptr != &cerr) {
 102         delete m_periodic_output_file_ptr;
 103     }
 104 }
 105
 106 void
 107 Profiler::wakeup()
 108 {
 109     // FIXME - avoid the repeated code
 110
 111     vector<integer_t> perProcCycleCount(m_num_of_sequencers);
 112
 113     for (int i = 0; i < m_num_of_sequencers; i++) {
 114         perProcCycleCount[i] =
 115             g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 116         // The +1 allows us to avoid division by zero
 117     }
 118
 119     ostream &out = *m_periodic_output_file_ptr;
 120
 121     out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
 122         << "mbytes_resident: " << process_memory_resident() << endl
 123         << "mbytes_total: " << process_memory_total() << endl;
 124
 125     if (process_memory_total() > 0) {
 126         out << "resident_ratio: "
 127             << process_memory_resident() / process_memory_total() << endl;
 128     }
 129
 130     out << "miss_latency: " << m_allMissLatencyHistogram << endl;
 131
 132     out << endl;
 133
 134     if (m_all_instructions) {
 135         m_inst_profiler_ptr->printStats(out);
 136     }
 137
 138     //g_system_ptr->getNetwork()->printStats(out);
 139     g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
 140 }
 141
 142 void
 143 Profiler::setPeriodicStatsFile(const string& filename)
 144 {
 145     cout << "Recording periodic statistics to file '" << filename << "' every "
 146          << m_stats_period << " Ruby cycles" << endl;
 147
 148     if (m_periodic_output_file_ptr != &cerr) {
 149         delete m_periodic_output_file_ptr;
 150     }
 151
 152     m_periodic_output_file_ptr = new ofstream(filename.c_str());
 153     g_eventQueue_ptr->scheduleEvent(this, 1);
 154 }
 155
 156 void
 157 Profiler::setPeriodicStatsInterval(integer_t period)
 158 {
 159     cout << "Recording periodic statistics every " << m_stats_period
 160          << " Ruby cycles" << endl;
 161
 162     m_stats_period = period;
 163     g_eventQueue_ptr->scheduleEvent(this, 1);
 164 }
 165
 166 void
 167 Profiler::printConfig(ostream& out) const
 168 {
 169     out << endl;
 170     out << "Profiler Configuration" << endl;
 171     out << "----------------------" << endl;
 172     out << "periodic_stats_period: " << m_stats_period << endl;
 173 }
 174
 175 void
 176 Profiler::print(ostream& out) const
 177 {
 178     out << "[Profiler]";
 179 }
 180
 181 void
 182 Profiler::printStats(ostream& out, bool short_stats)
 183 {
 184     out << endl;
 185     if (short_stats) {
 186         out << "SHORT ";
 187     }
 188     out << "Profiler Stats" << endl;
 189     out << "--------------" << endl;
 190
 191     time_t real_time_current = time(NULL);
 192     double seconds = difftime(real_time_current, m_real_time_start_time);
 193     double minutes = seconds / 60.0;
 194     double hours = minutes / 60.0;
 195     double days = hours / 24.0;
 196     Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
 197
 198     if (!short_stats) {
 199         out << "Elapsed_time_in_seconds: " << seconds << endl;
 200         out << "Elapsed_time_in_minutes: " << minutes << endl;
 201         out << "Elapsed_time_in_hours: " << hours << endl;
 202         out << "Elapsed_time_in_days: " << days << endl;
 203         out << endl;
 204     }
 205
 206     // print the virtual runtimes as well
 207     struct tms vtime;
 208     times(&vtime);
 209     seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
 210     minutes = seconds / 60.0;
 211     hours = minutes / 60.0;
 212     days = hours / 24.0;
 213     out << "Virtual_time_in_seconds: " << seconds << endl;
 214     out << "Virtual_time_in_minutes: " << minutes << endl;
 215     out << "Virtual_time_in_hours:   " << hours << endl;
 216     out << "Virtual_time_in_days:    " << days << endl;
 217     out << endl;
 218
 219     out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
 220     out << "Ruby_start_time: " << m_ruby_start << endl;
 221     out << "Ruby_cycles: " << ruby_cycles << endl;
 222     out << endl;
 223
 224     if (!short_stats) {
 225         out << "mbytes_resident: " << process_memory_resident() << endl;
 226         out << "mbytes_total: " << process_memory_total() << endl;
 227         if (process_memory_total() > 0) {
 228             out << "resident_ratio: "
 229                 << process_memory_resident()/process_memory_total() << endl;
 230         }
 231         out << endl;
 232     }
 233
 234     vector<integer_t> perProcCycleCount(m_num_of_sequencers);
 235
 236     for (int i = 0; i < m_num_of_sequencers; i++) {
 237         perProcCycleCount[i] =
 238             g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
 239         // The +1 allows us to avoid division by zero
 240     }
 241
 242     out << "ruby_cycles_executed: " << perProcCycleCount << endl;
 243
 244     out << endl;
 245
 246     if (!short_stats) {
 247         out << "Busy Controller Counts:" << endl;
 248         for (int i = 0; i < MachineType_NUM; i++) {
 249             int size = MachineType_base_count((MachineType)i);
 250             for (int j = 0; j < size; j++) {
 251                 MachineID machID;
 252                 machID.type = (MachineType)i;
 253                 machID.num = j;
 254                 out << machID << ":" << m_busyControllerCount[i][j] << "  ";
 255                 if ((j + 1) % 8 == 0) {
 256                     out << endl;
 257                 }
 258             }
 259             out << endl;
 260         }
 261         out << endl;
 262
 263         out << "Busy Bank Count:" << m_busyBankCount << endl;
 264         out << endl;
 265
 266         out << "sequencer_requests_outstanding: "
 267             << m_sequencer_requests << endl;
 268         out << endl;
 269     }
 270
 271     if (!short_stats) {
 272         out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
 273         out << "----------------------------------------" << endl;
 274         out << "miss_latency: " << m_allMissLatencyHistogram << endl;
 275         for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
 276             if (m_missLatencyHistograms[i].size() > 0) {
 277                 out << "miss_latency_" << RubyRequestType(i) << ": "
 278                     << m_missLatencyHistograms[i] << endl;
 279             }
 280         }
 281         for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
 282             if (m_machLatencyHistograms[i].size() > 0) {
 283                 out << "miss_latency_" << GenericMachineType(i) << ": "
 284                     << m_machLatencyHistograms[i] << endl;
 285             }
 286         }
 287
 288         out << "miss_latency_wCC_issue_to_initial_request: "
 289             << m_wCCIssueToInitialRequestHistogram << endl;
 290         out << "miss_latency_wCC_initial_forward_request: "
 291             << m_wCCInitialRequestToForwardRequestHistogram << endl;
 292         out << "miss_latency_wCC_forward_to_first_response: "
 293             << m_wCCForwardRequestToFirstResponseHistogram << endl;
 294         out << "miss_latency_wCC_first_response_to_completion: "
 295             << m_wCCFirstResponseToCompleteHistogram << endl;
 296         out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
 297         out << "miss_latency_dir_issue_to_initial_request: "
 298             << m_dirIssueToInitialRequestHistogram << endl;
 299         out << "miss_latency_dir_initial_forward_request: "
 300             << m_dirInitialRequestToForwardRequestHistogram << endl;
 301         out << "miss_latency_dir_forward_to_first_response: "
 302             << m_dirForwardRequestToFirstResponseHistogram << endl;
 303         out << "miss_latency_dir_first_response_to_completion: "
 304             << m_dirFirstResponseToCompleteHistogram << endl;
 305         out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
 306
 307         for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
 308             for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
 309                 if (m_missMachLatencyHistograms[i][j].size() > 0) {
 310                     out << "miss_latency_" << RubyRequestType(i)
 311                         << "_" << GenericMachineType(j) << ": "
 312                         << m_missMachLatencyHistograms[i][j] << endl;
 313                 }
 314             }
 315         }
 316
 317         out << endl;
 318
 319         out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
 320         out << "------------------------------------" << endl;
 321         out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
 322         for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
 323             if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
 324                 out << "prefetch_latency_" << RubyRequestType(i) << ": "
 325                     << m_SWPrefetchLatencyHistograms[i] << endl;
 326             }
 327         }
 328         for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
 329             if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
 330                 out << "prefetch_latency_" << GenericMachineType(i) << ": "
 331                     << m_SWPrefetchMachLatencyHistograms[i] << endl;
 332             }
 333         }
 334         out << "prefetch_latency_L2Miss:"
 335             << m_SWPrefetchL2MissLatencyHistogram << endl;
 336
 337         if (m_all_sharing_histogram.size() > 0) {
 338             out << "all_sharing: " << m_all_sharing_histogram << endl;
 339             out << "read_sharing: " << m_read_sharing_histogram << endl;
 340             out << "write_sharing: " << m_write_sharing_histogram << endl;
 341
 342             out << "all_sharing_percent: ";
 343             m_all_sharing_histogram.printPercent(out);
 344             out << endl;
 345
 346             out << "read_sharing_percent: ";
 347             m_read_sharing_histogram.printPercent(out);
 348             out << endl;
 349
 350             out << "write_sharing_percent: ";
 351             m_write_sharing_histogram.printPercent(out);
 352             out << endl;
 353
 354             int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
 355             out << "all_misses: " << total_miss << endl;
 356             out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
 357             out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
 358             out << "cache_to_cache_percent: "
 359                 << 100.0 * (double(m_cache_to_cache) / double(total_miss))
 360                 << endl;
 361             out << "memory_to_cache_percent: "
 362                 << 100.0 * (double(m_memory_to_cache) / double(total_miss))
 363                 << endl;
 364             out << endl;
 365         }
 366
 367         if (m_outstanding_requests.size() > 0) {
 368             out << "outstanding_requests: ";
 369             m_outstanding_requests.printPercent(out);
 370             out << endl;
 371             out << endl;
 372         }
 373     }
 374
 375     if (!short_stats) {
 376         out << "Request vs. RubySystem State Profile" << endl;
 377         out << "--------------------------------" << endl;
 378         out << endl;
 379
 380         map<string, int>::const_iterator i = m_requestProfileMap.begin();
 381         map<string, int>::const_iterator end = m_requestProfileMap.end();
 382         for (; i != end; ++i) {
 383             const string &key = i->first;
 384             int count = i->second;
 385
 386             double percent = (100.0 * double(count)) / double(m_requests);
 387             vector<string> items;
 388             tokenize(items, key, ':');
 389             vector<string>::iterator j = items.begin();
 390             vector<string>::iterator end = items.end();
 391             for (; j != end; ++i)
 392                 out << setw(10) << *j;
 393             out << setw(11) << count;
 394             out << setw(14) << percent << endl;
 395         }
 396         out << endl;
 397
 398         out << "filter_action: " << m_filter_action_histogram << endl;
 399
 400         if (!m_all_instructions) {
 401             m_address_profiler_ptr->printStats(out);
 402         }
 403
 404         if (m_all_instructions) {
 405             m_inst_profiler_ptr->printStats(out);
 406         }
 407
 408         out << endl;
 409         out << "Message Delayed Cycles" << endl;
 410         out << "----------------------" << endl;
 411         out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
 412         out << "Total_nonPF_delay_cycles: "
 413             << m_delayedCyclesNonPFHistogram << endl;
 414         for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
 415             out << "  virtual_network_" << i << "_delay_cycles: "
 416                 << m_delayedCyclesVCHistograms[i] << endl;
 417         }
 418
 419         printResourceUsage(out);
 420     }
 421 }
 422
 423 void
 424 Profiler::printResourceUsage(ostream& out) const
 425 {
 426     out << endl;
 427     out << "Resource Usage" << endl;
 428     out << "--------------" << endl;
 429
 430     integer_t pagesize = getpagesize(); // page size in bytes
 431     out << "page_size: " << pagesize << endl;
 432
 433     rusage usage;
 434     getrusage (RUSAGE_SELF, &usage);
 435
 436     out << "user_time: " << usage.ru_utime.tv_sec << endl;
 437     out << "system_time: " << usage.ru_stime.tv_sec << endl;
 438     out << "page_reclaims: " << usage.ru_minflt << endl;
 439     out << "page_faults: " << usage.ru_majflt << endl;
 440     out << "swaps: " << usage.ru_nswap << endl;
 441     out << "block_inputs: " << usage.ru_inblock << endl;
 442     out << "block_outputs: " << usage.ru_oublock << endl;
 443 }
 444
 445 void
 446 Profiler::clearStats()
 447 {
 448     m_ruby_start = g_eventQueue_ptr->getTime();
 449
 450     m_cycles_executed_at_start.resize(m_num_of_sequencers);
 451     for (int i = 0; i < m_num_of_sequencers; i++) {
 452         if (g_system_ptr == NULL) {
 453             m_cycles_executed_at_start[i] = 0;
 454         } else {
 455             m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
 456         }
 457     }
 458
 459     m_busyControllerCount.resize(MachineType_NUM); // all machines
 460     for (int i = 0; i < MachineType_NUM; i++) {
 461         int size = MachineType_base_count((MachineType)i);
 462         m_busyControllerCount[i].resize(size);
 463         for (int j = 0; j < size; j++) {
 464             m_busyControllerCount[i][j] = 0;
 465         }
 466     }
 467     m_busyBankCount = 0;
 468
 469     m_delayedCyclesHistogram.clear();
 470     m_delayedCyclesNonPFHistogram.clear();
 471     int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
 472     m_delayedCyclesVCHistograms.resize(size);
 473     for (int i = 0; i < size; i++) {
 474         m_delayedCyclesVCHistograms[i].clear();
 475     }
 476
 477     m_missLatencyHistograms.resize(RubyRequestType_NUM);
 478     for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
 479         m_missLatencyHistograms[i].clear(200);
 480     }
 481     m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
 482     for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
 483         m_machLatencyHistograms[i].clear(200);
 484     }
 485     m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
 486     for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
 487         m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
 488         for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
 489             m_missMachLatencyHistograms[i][j].clear(200);
 490         }
 491     }
 492     m_allMissLatencyHistogram.clear(200);
 493     m_wCCIssueToInitialRequestHistogram.clear(200);
 494     m_wCCInitialRequestToForwardRequestHistogram.clear(200);
 495     m_wCCForwardRequestToFirstResponseHistogram.clear(200);
 496     m_wCCFirstResponseToCompleteHistogram.clear(200);
 497     m_wCCIncompleteTimes = 0;
 498     m_dirIssueToInitialRequestHistogram.clear(200);
 499     m_dirInitialRequestToForwardRequestHistogram.clear(200);
 500     m_dirForwardRequestToFirstResponseHistogram.clear(200);
 501     m_dirFirstResponseToCompleteHistogram.clear(200);
 502     m_dirIncompleteTimes = 0;
 503
 504     m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
 505     for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
 506         m_SWPrefetchLatencyHistograms[i].clear(200);
 507     }
 508     m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
 509     for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
 510         m_SWPrefetchMachLatencyHistograms[i].clear(200);
 511     }
 512     m_allSWPrefetchLatencyHistogram.clear(200);
 513
 514     m_sequencer_requests.clear();
 515     m_read_sharing_histogram.clear();
 516     m_write_sharing_histogram.clear();
 517     m_all_sharing_histogram.clear();
 518     m_cache_to_cache = 0;
 519     m_memory_to_cache = 0;
 520
 521     // clear HashMaps
 522     m_requestProfileMap.clear();
 523
 524     // count requests profiled
 525     m_requests = 0;
 526
 527     m_outstanding_requests.clear();
 528     m_outstanding_persistent_requests.clear();
 529
 530     // Flush the prefetches through the system - used so that there
 531     // are no outstanding requests after stats are cleared
 532     //g_eventQueue_ptr->triggerAllEvents();
 533
 534     // update the start time
 535     m_ruby_start = g_eventQueue_ptr->getTime();
 536 }
 537
 538 void
 539 Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
 540 {
 541     if (msg.getType() != RubyRequestType_IFETCH) {
 542         // Note: The following line should be commented out if you
 543         // want to use the special profiling that is part of the GS320
 544         // protocol
 545
 546         // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
 547         // profiled by the AddressProfiler
 548         m_address_profiler_ptr->
 549             addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
 550                            msg.getType(), msg.getAccessMode(), id, false);
 551     }
 552 }
 553
 554 void
 555 Profiler::profileSharing(const Address& addr, AccessType type,
 556                          NodeID requestor, const Set& sharers,
 557                          const Set& owner)
 558 {
 559     Set set_contacted(owner);
 560     if (type == AccessType_Write) {
 561         set_contacted.addSet(sharers);
 562     }
 563     set_contacted.remove(requestor);
 564     int number_contacted = set_contacted.count();
 565
 566     if (type == AccessType_Write) {
 567         m_write_sharing_histogram.add(number_contacted);
 568     } else {
 569         m_read_sharing_histogram.add(number_contacted);
 570     }
 571     m_all_sharing_histogram.add(number_contacted);
 572
 573     if (number_contacted == 0) {
 574         m_memory_to_cache++;
 575     } else {
 576         m_cache_to_cache++;
 577     }
 578 }
 579
 580 void
 581 Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
 582 {
 583     assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
 584     m_delayedCyclesHistogram.add(delayCycles);
 585     m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
 586     if (virtualNetwork != 0) {
 587         m_delayedCyclesNonPFHistogram.add(delayCycles);
 588     }
 589 }
 590
 591 // profiles original cache requests including PUTs
 592 void
 593 Profiler::profileRequest(const string& requestStr)
 594 {
 595     m_requests++;
 596
 597     // if it doesn't exist, conveniently, it will be created with the
 598     // default value which is 0
 599     m_requestProfileMap[requestStr]++;
 600 }
 601
 602 void
 603 Profiler::controllerBusy(MachineID machID)
 604 {
 605     m_busyControllerCount[(int)machID.type][(int)machID.num]++;
 606 }
 607
 608 void
 609 Profiler::profilePFWait(Time waitTime)
 610 {
 611     m_prefetchWaitHistogram.add(waitTime);
 612 }
 613
 614 void
 615 Profiler::bankBusy()
 616 {
 617     m_busyBankCount++;
 618 }
 619
 620 // non-zero cycle demand request
 621 void
 622 Profiler::missLatency(Time cycles,
 623                       RubyRequestType type,
 624                       const GenericMachineType respondingMach)
 625 {
 626     m_allMissLatencyHistogram.add(cycles);
 627     m_missLatencyHistograms[type].add(cycles);
 628     m_machLatencyHistograms[respondingMach].add(cycles);
 629     m_missMachLatencyHistograms[type][respondingMach].add(cycles);
 630 }
 631
 632 void
 633 Profiler::missLatencyWcc(Time issuedTime,
 634                          Time initialRequestTime,
 635                          Time forwardRequestTime,
 636                          Time firstResponseTime,
 637                          Time completionTime)
 638 {
 639     if ((issuedTime <= initialRequestTime) &&
 640         (initialRequestTime <= forwardRequestTime) &&
 641         (forwardRequestTime <= firstResponseTime) &&
 642         (firstResponseTime <= completionTime)) {
 643         m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
 644
 645         m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
 646                                                          initialRequestTime);
 647
 648         m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
 649                                                         forwardRequestTime);
 650
 651         m_wCCFirstResponseToCompleteHistogram.add(completionTime -
 652                                                   firstResponseTime);
 653     } else {
 654         m_wCCIncompleteTimes++;
 655     }
 656 }
 657
 658 void
 659 Profiler::missLatencyDir(Time issuedTime,
 660                          Time initialRequestTime,
 661                          Time forwardRequestTime,
 662                          Time firstResponseTime,
 663                          Time completionTime)
 664 {
 665     if ((issuedTime <= initialRequestTime) &&
 666         (initialRequestTime <= forwardRequestTime) &&
 667         (forwardRequestTime <= firstResponseTime) &&
 668         (firstResponseTime <= completionTime)) {
 669         m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
 670
 671         m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
 672                                                          initialRequestTime);
 673
 674         m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
 675                                                         forwardRequestTime);
 676
 677         m_dirFirstResponseToCompleteHistogram.add(completionTime -
 678                                                   firstResponseTime);
 679     } else {
 680         m_dirIncompleteTimes++;
 681     }
 682 }
 683
 684 // non-zero cycle prefetch request
 685 void
 686 Profiler::swPrefetchLatency(Time cycles,
 687                             RubyRequestType type,
 688                             const GenericMachineType respondingMach)
 689 {
 690     m_allSWPrefetchLatencyHistogram.add(cycles);
 691     m_SWPrefetchLatencyHistograms[type].add(cycles);
 692     m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
 693     if (respondingMach == GenericMachineType_Directory ||
 694         respondingMach == GenericMachineType_NUM) {
 695         m_SWPrefetchL2MissLatencyHistogram.add(cycles);
 696     }
 697 }
 698
 699 // Helper function
 700 static double
 701 process_memory_total()
 702 {
 703     // 4kB page size, 1024*1024 bytes per MB,
 704     const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
 705     ifstream proc_file;
 706     proc_file.open("/proc/self/statm");
 707     int total_size_in_pages = 0;
 708     int res_size_in_pages = 0;
 709     proc_file >> total_size_in_pages;
 710     proc_file >> res_size_in_pages;
 711     return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
 712 }
 713
 714 static double
 715 process_memory_resident()
 716 {
 717     // 4kB page size, 1024*1024 bytes per MB,
 718     const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
 719     ifstream proc_file;
 720     proc_file.open("/proc/self/statm");
 721     int total_size_in_pages = 0;
 722     int res_size_in_pages = 0;
 723     proc_file >> total_size_in_pages;
 724     proc_file >> res_size_in_pages;
 725     return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
 726 }
 727
 728 void
 729 Profiler::rubyWatch(int id)
 730 {
 731     uint64 tr = 0;
 732     Address watch_address = Address(tr);
 733
 734     DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr->getTime(), id,
 735         watch_address);
 736
 737     // don't care about success or failure
 738     m_watch_address_set.insert(watch_address);
 739 }
 740
 741 bool
 742 Profiler::watchAddress(Address addr)
 743 {
 744     return m_watch_address_set.count(addr) > 0;
 745 }
 746
 747 Profiler *
 748 RubyProfilerParams::create()
 749 {
 750     return new Profiler(this);
 751 }