ruby: profiler: lots of inter-related changes
authorNilay Vaish <nilay@cs.wisc.edu>
Tue, 25 Jun 2013 05:32:03 +0000 (00:32 -0500)
committerNilay Vaish <nilay@cs.wisc.edu>
Tue, 25 Jun 2013 05:32:03 +0000 (00:32 -0500)
The patch started of with removing the global variables from the profiler for
profiling the miss latency of requests made to the cache. The corrresponding
histograms have been moved to the Sequencer. These are combined together when
the histograms are printed. Separate histograms are now maintained for
tracking latency of all requests together, of hits only and of misses only.

A particular set of histograms used to use the type GenericMachineType defined
in one of the protocol files. This patch removes this type. Now, everything
that relied on this type would use MachineType instead. To do this, SLICC has
been changed so that multiple machine types can be declared by a controller
in its preamble.

14 files changed:
src/mem/protocol/MI_example-cache.sm
src/mem/protocol/MOESI_CMP_token-L1cache.sm
src/mem/protocol/MOESI_hammer-cache.sm
src/mem/protocol/RubySlicc_ComponentMapping.sm
src/mem/protocol/RubySlicc_Exports.sm
src/mem/protocol/RubySlicc_Types.sm
src/mem/ruby/common/Histogram.cc
src/mem/ruby/profiler/Profiler.cc
src/mem/ruby/profiler/Profiler.hh
src/mem/ruby/system/Sequencer.cc
src/mem/ruby/system/Sequencer.hh
src/mem/slicc/ast/MachineAST.py
src/mem/slicc/parser.py
src/mem/slicc/symbols/Type.py

index 2b505f047c41b3af922db906d38b07a4a9bf6e76..8aed261fafcaa43c10742cb9a10ee4895903f16b 100644 (file)
@@ -177,18 +177,6 @@ machine(L1Cache, "MI Example L1 Cache")
     return getCacheEntry(addr).DataBlk;
   }
 
-  GenericMachineType getNondirectHitMachType(MachineID sender) {
-    if (machineIDToMachineType(sender) == MachineType:L1Cache) {
-      //
-      // NOTE direct local hits should not call this
-      //
-      return GenericMachineType:L1Cache_wCC; 
-    } else {
-      return ConvertMachToGenericMach(machineIDToMachineType(sender));
-    }
-  }
-
-
   // NETWORK PORTS
 
   out_port(requestNetwork_out, RequestMsg, requestFromCache);
@@ -347,36 +335,30 @@ machine(L1Cache, "MI Example L1 Cache")
   action(r_load_hit, "r", desc="Notify sequencer the load completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc,"%s\n", cache_entry.DataBlk);
-    sequencer.readCallback(address, 
-                           GenericMachineType:L1Cache,
-                           cache_entry.DataBlk);
+    sequencer.readCallback(address, cache_entry.DataBlk, false);
   }
 
   action(rx_load_hit, "rx", desc="External load completed.") {
     peek(responseNetwork_in, ResponseMsg) {
       assert(is_valid(cache_entry));
       DPRINTF(RubySlicc,"%s\n", cache_entry.DataBlk);
-      sequencer.readCallback(address, 
-                             getNondirectHitMachType(in_msg.Sender),
-                             cache_entry.DataBlk);
+      sequencer.readCallback(address, cache_entry.DataBlk, true,
+                             machineIDToMachineType(in_msg.Sender));
     }
   }
 
   action(s_store_hit, "s", desc="Notify sequencer that store completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc,"%s\n", cache_entry.DataBlk);
-    sequencer.writeCallback(address, 
-                            GenericMachineType:L1Cache,
-                            cache_entry.DataBlk);
+    sequencer.writeCallback(address, cache_entry.DataBlk, false);
   }
 
   action(sx_store_hit, "sx", desc="External store completed.") {
     peek(responseNetwork_in, ResponseMsg) {
       assert(is_valid(cache_entry));
       DPRINTF(RubySlicc,"%s\n", cache_entry.DataBlk);
-      sequencer.writeCallback(address, 
-                              getNondirectHitMachType(in_msg.Sender),
-                              cache_entry.DataBlk);
+      sequencer.writeCallback(address, cache_entry.DataBlk, true,
+                              machineIDToMachineType(in_msg.Sender));
     }
   }
 
index 91e4f355e43d8c4dfd947318a7572799641cd5f2..e472d04370712f4eaca194e1af8931b1e9dc9783 100644 (file)
@@ -399,26 +399,21 @@ machine(L1Cache, "Token protocol")
     }
   }
 
-  GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
+  // NOTE: direct local hits should not call this function
+  bool isExternalHit(Address addr, MachineID sender) {
     if (machineIDToMachineType(sender) == MachineType:L1Cache) {
-      //
-      // NOTE direct local hits should not call this
-      //
-      return GenericMachineType:L1Cache_wCC;
+      return true;
     } else if (machineIDToMachineType(sender) == MachineType:L2Cache) {
 
-      if (sender == (mapAddressToRange(addr,
-                                       MachineType:L2Cache,
-                                       l2_select_low_bit,
-                                       l2_select_num_bits))) {
-
-        return GenericMachineType:L2Cache;
+      if (sender == mapAddressToRange(addr, MachineType:L2Cache,
+                      l2_select_low_bit, l2_select_num_bits)) {
+        return false;
       } else {
-        return GenericMachineType:L2Cache_wCC;
+        return  true;
       }
-    } else {
-      return ConvertMachToGenericMach(machineIDToMachineType(sender));
     }
+
+    return true;
   }
 
   bool okToIssueStarving(Address addr, MachineID machineID) {
@@ -1289,10 +1284,8 @@ machine(L1Cache, "Token protocol")
     DPRINTF(RubySlicc, "Address: %s, Data Block: %s\n",
             address, cache_entry.DataBlk);
 
-    sequencer.readCallback(address,
-                           GenericMachineType:L1Cache,
-                           cache_entry.DataBlk);
-
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           MachineType:L1Cache);
   }
 
   action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") {
@@ -1300,11 +1293,9 @@ machine(L1Cache, "Token protocol")
     DPRINTF(RubySlicc, "Address: %s, Data Block: %s\n",
             address, cache_entry.DataBlk);
     peek(responseNetwork_in, ResponseMsg) {
-
-      sequencer.readCallback(address,
-                             getNondirectHitMachType(address, in_msg.Sender),
-                             cache_entry.DataBlk);
-
+      sequencer.readCallback(address, cache_entry.DataBlk,
+                             isExternalHit(address, in_msg.Sender),
+                             machineIDToMachineType(in_msg.Sender));
     }
   }
 
@@ -1313,10 +1304,8 @@ machine(L1Cache, "Token protocol")
     DPRINTF(RubySlicc, "Address: %s, Data Block: %s\n",
             address, cache_entry.DataBlk);
 
-    sequencer.writeCallback(address,
-                            GenericMachineType:L1Cache,
-                            cache_entry.DataBlk);
-
+    sequencer.writeCallback(address, cache_entry.DataBlk, false,
+                            MachineType:L1Cache);
     cache_entry.Dirty := true;
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
   }
@@ -1326,11 +1315,9 @@ machine(L1Cache, "Token protocol")
     DPRINTF(RubySlicc, "Address: %s, Data Block: %s\n",
             address, cache_entry.DataBlk);
     peek(responseNetwork_in, ResponseMsg) {
-
-      sequencer.writeCallback(address,
-                              getNondirectHitMachType(address, in_msg.Sender),
-                              cache_entry.DataBlk);
-
+      sequencer.writeCallback(address, cache_entry.DataBlk,
+                              isExternalHit(address, in_msg.Sender),
+                              machineIDToMachineType(in_msg.Sender));
     }
     cache_entry.Dirty := true;
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
index 8ffa2c2ac0a26644a4548d3e93c80f498f3ab457..b99a03098f2d10a2e3a83bdef6aa7845bdfc3b47 100644 (file)
@@ -33,7 +33,7 @@
  *          Brad Beckmann
  */
 
-machine(L1Cache, "AMD Hammer-like protocol")
+machine({L1Cache, L2Cache}, "AMD Hammer-like protocol")
 : Sequencer * sequencer,
   CacheMemory * L1Icache,
   CacheMemory * L1Dcache,
@@ -288,24 +288,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
     }
   }
 
-  GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
-    if (machineIDToMachineType(sender) == MachineType:L1Cache) {
-      //
-      // NOTE direct local hits should not call this
-      //
-      return GenericMachineType:L1Cache_wCC;
-    } else {
-      return ConvertMachToGenericMach(machineIDToMachineType(sender));
-    }
-  }
-
-  GenericMachineType testAndClearLocalHit(Entry cache_entry) {
+  MachineType testAndClearLocalHit(Entry cache_entry) {
     if (is_valid(cache_entry) && cache_entry.FromL2) {
       cache_entry.FromL2 := false;
-      return GenericMachineType:L2Cache;
-    } else {
-      return GenericMachineType:L1Cache;
+      return MachineType:L2Cache;
     }
+    return MachineType:L1Cache;
   }
 
   bool IsAtomicAccessed(Entry cache_entry) {
@@ -853,8 +841,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
-    sequencer.readCallback(address, testAndClearLocalHit(cache_entry),
-                           cache_entry.DataBlk);
+    sequencer.readCallback(address, cache_entry.DataBlk, false,
+                           testAndClearLocalHit(cache_entry));
   }
 
   action(hx_external_load_hit, "hx", desc="load required external msgs") {
@@ -863,12 +851,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
 
-      sequencer.readCallback(address,
-                             getNondirectHitMachType(in_msg.Addr, in_msg.Sender),
-                             cache_entry.DataBlk,
-                             tbe.InitialRequestTime,
-                             tbe.ForwardRequestTime,
-                             tbe.FirstResponseTime);
+      sequencer.readCallback(address, cache_entry.DataBlk, true,
+                 machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+                 tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
   }
 
@@ -876,8 +861,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(cache_entry));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(mandatoryQueue_in, RubyRequest) {
-      sequencer.writeCallback(address, testAndClearLocalHit(cache_entry),
-                              cache_entry.DataBlk);
+      sequencer.writeCallback(address, cache_entry.DataBlk, false,
+                              testAndClearLocalHit(cache_entry));
 
       cache_entry.Dirty := true;
       if (in_msg.Type == RubyRequestType:ATOMIC) {
@@ -889,7 +874,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
   action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") {
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", tbe.DataBlk);
-    sequencer.writeCallback(address, GenericMachineType:L1Cache,tbe.DataBlk);
+    sequencer.writeCallback(address, tbe.DataBlk, false, MachineType:L1Cache);
   }
 
   action(sx_external_store_hit, "sx", desc="store required external msgs.") {
@@ -898,12 +883,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     peek(responseToCache_in, ResponseMsg) {
 
-      sequencer.writeCallback(address,
-                              getNondirectHitMachType(address, in_msg.Sender),
-                              cache_entry.DataBlk,
-                              tbe.InitialRequestTime,
-                              tbe.ForwardRequestTime,
-                              tbe.FirstResponseTime);
+      sequencer.writeCallback(address, cache_entry.DataBlk, true,
+              machineIDToMachineType(in_msg.Sender), tbe.InitialRequestTime,
+              tbe.ForwardRequestTime, tbe.FirstResponseTime);
     }
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
     cache_entry.Dirty := true;
@@ -914,12 +896,9 @@ machine(L1Cache, "AMD Hammer-like protocol")
     assert(is_valid(tbe));
     DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
 
-    sequencer.writeCallback(address,
-                            getNondirectHitMachType(address, tbe.LastResponder),
-                            cache_entry.DataBlk,
-                            tbe.InitialRequestTime,
-                            tbe.ForwardRequestTime,
-                            tbe.FirstResponseTime);
+    sequencer.writeCallback(address, cache_entry.DataBlk, true,
+            machineIDToMachineType(tbe.LastResponder), tbe.InitialRequestTime,
+            tbe.ForwardRequestTime, tbe.FirstResponseTime);
 
     cache_entry.Dirty := true;
   }
index 4f6f0e3d1313e1f6b29fdb277c12a31f15aa2ea7..7c40901d7fa2aa884cffaceb7da02d874ac50cbd 100644 (file)
@@ -38,4 +38,3 @@ NodeID map_Address_to_DirectoryNode(Address addr);
 NodeID machineIDToNodeID(MachineID machID);
 NodeID machineIDToVersion(MachineID machID);
 MachineType machineIDToMachineType(MachineID machID);
-GenericMachineType ConvertMachToGenericMach(MachineType machType);
index 015ae8cb36ef20df2a633cb08ddf4327fc1f7f75..e0371f8969726d7d3eda29d4c85e4c7223d864b0 100644 (file)
@@ -168,18 +168,6 @@ enumeration(MemoryControlRequestType, desc="...", default="MemoryControlRequestT
   Default,    desc="Replace this with access_types passed to the DMA Ruby object";
 }
 
-enumeration(GenericMachineType, desc="...", default="GenericMachineType_NULL") {
-  L1Cache,     desc="L1 Cache Mach";
-  L2Cache,     desc="L2 Cache Mach";
-  L3Cache,     desc="L3 Cache Mach";
-  Directory,   desc="Directory Mach";
-  DMA,         desc="DMA Mach";
-  Collector,   desc="Collector Mach";
-  L1Cache_wCC, desc="L1 Cache Mach with Cache Coherence (used for miss latency profile)";
-  L2Cache_wCC, desc="L1 Cache Mach with Cache Coherence (used for miss latency profile)";
-  NULL,        desc="null mach type";
-}
-
 // MessageSizeType
 enumeration(MessageSizeType, default="MessageSizeType_Undefined", desc="...") {
   Undefined,  desc="Undefined";
index acd86a8fe61db62422f8946b6eb2e333d3cca7ff..a601b2cfc1376ab335ee669c7db647f4a383db4f 100644 (file)
@@ -97,17 +97,18 @@ structure (NetDest, external = "yes", non_obj="yes") {
 
 structure (Sequencer, external = "yes") {
   void readCallback(Address, DataBlock);
-  void readCallback(Address, GenericMachineType, DataBlock);
-  void readCallback(Address, GenericMachineType, DataBlock,
+  void readCallback(Address, DataBlock, bool);
+  void readCallback(Address, DataBlock, bool, MachineType);
+  void readCallback(Address, DataBlock, bool, MachineType,
                     Cycles, Cycles, Cycles);
 
   void writeCallback(Address, DataBlock);
-  void writeCallback(Address, GenericMachineType, DataBlock);
-  void writeCallback(Address, GenericMachineType, DataBlock,
+  void writeCallback(Address, DataBlock, bool);
+  void writeCallback(Address, DataBlock, bool, MachineType);
+  void writeCallback(Address, DataBlock, bool, MachineType,
                      Cycles, Cycles, Cycles);
 
   void checkCoherence(Address);
-  void profileNack(Address, int, int, uint64);
   void evictionCallback(Address);
   void recordRequestType(SequencerRequestType);
   bool checkResourceAvailable(CacheResourceType, Address);
index 0558e5198c0b38e84b0ec155de7bce43f0ce46d5..e377bc253511caa69b379eae6042084728e96b0c 100644 (file)
@@ -124,7 +124,12 @@ Histogram::add(Histogram& hist)
     uint32_t t_bins = m_data.size();
 
     if (hist.getBins() != t_bins) {
-        fatal("Histograms with different number of bins cannot be combined!");
+        if (m_count == 0) {
+            m_data.resize(hist.getBins());
+        } else {
+            fatal("Histograms with different number of bins "
+                  "cannot be combined!");
+        }
     }
 
     m_max = max(m_max, hist.getMax());
index 76c4dba74bf9389dde8c5234d47e61105fcd234b..9a963684fe6df78959f0b2f6e531ea9e6b9577a3 100644 (file)
@@ -212,6 +212,169 @@ Profiler::printOutstandingReqProfile(ostream &out) const
         << sequencerRequests << endl;
 }
 
+void
+Profiler::printMissLatencyProfile(ostream &out) const
+{
+    // Collate the miss latencies histograms from all the sequencers
+    Histogram latency_hist;
+    std::vector<Histogram> type_latency_hist(RubyRequestType_NUM);
+
+    Histogram hit_latency_hist;
+    std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM);
+
+    std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM);
+    std::vector<std::vector<Histogram> >
+        hit_type_mach_latency_hist(RubyRequestType_NUM,
+                               std::vector<Histogram>(MachineType_NUM));
+
+    Histogram miss_latency_hist;
+    std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM);
+
+    std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM);
+    std::vector<std::vector<Histogram> >
+        miss_type_mach_latency_hist(RubyRequestType_NUM,
+                               std::vector<Histogram>(MachineType_NUM));
+
+    std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM);
+    std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM);
+    std::vector<Histogram>
+        forward_to_first_response_delay_hist(MachineType_NUM);
+    std::vector<Histogram>
+        first_response_to_completion_delay_hist(MachineType_NUM);
+    std::vector<uint64_t> incomplete_times(MachineType_NUM);
+
+    for (uint32_t i = 0; i < MachineType_NUM; i++) {
+        for (map<uint32_t, AbstractController*>::iterator it =
+                  g_abs_controls[i].begin();
+             it != g_abs_controls[i].end(); ++it) {
+
+            AbstractController *ctr = (*it).second;
+            Sequencer *seq = ctr->getSequencer();
+            if (seq != NULL) {
+                // add all the latencies
+                latency_hist.add(seq->getLatencyHist());
+                hit_latency_hist.add(seq->getHitLatencyHist());
+                miss_latency_hist.add(seq->getMissLatencyHist());
+
+                // add the per request type latencies
+                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
+                    type_latency_hist[j]
+                        .add(seq->getTypeLatencyHist(j));
+                    hit_type_latency_hist[j]
+                        .add(seq->getHitTypeLatencyHist(j));
+                    miss_type_latency_hist[j]
+                        .add(seq->getMissTypeLatencyHist(j));
+                }
+
+                // add the per machine type miss latencies
+                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
+                    hit_mach_latency_hist[j]
+                        .add(seq->getHitMachLatencyHist(j));
+                    miss_mach_latency_hist[j]
+                        .add(seq->getMissMachLatencyHist(j));
+
+                    issue_to_initial_delay_hist[j].add(
+                        seq->getIssueToInitialDelayHist(MachineType(j)));
+
+                    initial_to_forward_delay_hist[j].add(
+                        seq->getInitialToForwardDelayHist(MachineType(j)));
+                    forward_to_first_response_delay_hist[j].add(seq->
+                        getForwardRequestToFirstResponseHist(MachineType(j)));
+
+                    first_response_to_completion_delay_hist[j].add(seq->
+                        getFirstResponseToCompletionDelayHist(MachineType(j)));
+                    incomplete_times[j] +=
+                        seq->getIncompleteTimes(MachineType(j));
+                }
+
+                // add the per (request, machine) type miss latencies
+                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
+                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
+                        hit_type_mach_latency_hist[j][k].add(
+                            seq->getHitTypeMachLatencyHist(j,k));
+                        miss_type_mach_latency_hist[j][k].add(
+                            seq->getMissTypeMachLatencyHist(j,k));
+                    }
+                }
+            }
+        }
+    }
+
+    out << "latency: " << latency_hist << endl;
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        if (type_latency_hist[i].size() > 0) {
+            out << "latency: " << RubyRequestType(i) << ": "
+                << type_latency_hist[i] << endl;
+        }
+    }
+
+    out << "hit latency: " << hit_latency_hist << endl;
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        if (hit_type_latency_hist[i].size() > 0) {
+            out << "hit latency: " << RubyRequestType(i) << ": "
+                << hit_type_latency_hist[i] << endl;
+        }
+    }
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        if (hit_mach_latency_hist[i].size() > 0) {
+            out << "hit latency: " << MachineType(i) << ": "
+                << hit_mach_latency_hist[i] << endl;
+        }
+    }
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        for (int j = 0; j < MachineType_NUM; j++) {
+            if (hit_type_mach_latency_hist[i][j].size() > 0) {
+                out << "hit latency: " << RubyRequestType(i)
+                    << ": " << MachineType(j) << ": "
+                    << hit_type_mach_latency_hist[i][j] << endl;
+            }
+        }
+    }
+
+    out << "miss latency: " << miss_latency_hist << endl;
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        if (miss_type_latency_hist[i].size() > 0) {
+            out << "miss latency: " << RubyRequestType(i) << ": "
+                << miss_type_latency_hist[i] << endl;
+        }
+    }
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        if (miss_mach_latency_hist[i].size() > 0) {
+            out << "miss latency: " << MachineType(i) << ": "
+                << miss_mach_latency_hist[i] << endl;
+
+            out << "miss latency: " << MachineType(i)
+                << "::issue_to_initial_request: "
+                << issue_to_initial_delay_hist[i] << endl;
+            out << "miss latency: " << MachineType(i)
+                << "::initial_to_forward_request: "
+                << initial_to_forward_delay_hist[i] << endl;
+            out << "miss latency: " << MachineType(i)
+                << "::forward_to_first_response: "
+                << forward_to_first_response_delay_hist[i] << endl;
+            out << "miss latency: " << MachineType(i)
+                << "::first_response_to_completion: "
+                << first_response_to_completion_delay_hist[i] << endl;
+            out << "incomplete times: " << incomplete_times[i] << endl;
+        }
+    }
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        for (int j = 0; j < MachineType_NUM; j++) {
+            if (miss_type_mach_latency_hist[i][j].size() > 0) {
+                out << "miss latency: " << RubyRequestType(i)
+                    << ": " << MachineType(j) << ": "
+                    << miss_type_mach_latency_hist[i][j] << endl;
+            }
+        }
+    }
+
+    out << endl;
+}
+
 void
 Profiler::printStats(ostream& out, bool short_stats)
 {
@@ -296,68 +459,7 @@ Profiler::printStats(ostream& out, bool short_stats)
     if (!short_stats) {
         out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
         out << "----------------------------------------" << endl;
-        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
-        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-            if (m_missLatencyHistograms[i].size() > 0) {
-                out << "miss_latency_" << RubyRequestType(i) << ": "
-                    << m_missLatencyHistograms[i] << endl;
-            }
-        }
-        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
-            if (m_machLatencyHistograms[i].size() > 0) {
-                out << "miss_latency_" << GenericMachineType(i) << ": "
-                    << m_machLatencyHistograms[i] << endl;
-            }
-        }
-
-        out << "miss_latency_wCC_issue_to_initial_request: " 
-            << m_wCCIssueToInitialRequestHistogram << endl;
-        out << "miss_latency_wCC_initial_forward_request: " 
-            << m_wCCInitialRequestToForwardRequestHistogram << endl;
-        out << "miss_latency_wCC_forward_to_first_response: " 
-            << m_wCCForwardRequestToFirstResponseHistogram << endl;
-        out << "miss_latency_wCC_first_response_to_completion: " 
-            << m_wCCFirstResponseToCompleteHistogram << endl;
-        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
-        out << "miss_latency_dir_issue_to_initial_request: " 
-            << m_dirIssueToInitialRequestHistogram << endl;
-        out << "miss_latency_dir_initial_forward_request: " 
-            << m_dirInitialRequestToForwardRequestHistogram << endl;
-        out << "miss_latency_dir_forward_to_first_response: " 
-            << m_dirForwardRequestToFirstResponseHistogram << endl;
-        out << "miss_latency_dir_first_response_to_completion: " 
-            << m_dirFirstResponseToCompleteHistogram << endl;
-        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
-
-        for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
-            for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
-                if (m_missMachLatencyHistograms[i][j].size() > 0) {
-                    out << "miss_latency_" << RubyRequestType(i) 
-                        << "_" << GenericMachineType(j) << ": "
-                        << m_missMachLatencyHistograms[i][j] << endl;
-                }
-            }
-        }
-
-        out << endl;
-
-        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
-        out << "------------------------------------" << endl;
-        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
-        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
-            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
-                out << "prefetch_latency_" << RubyRequestType(i) << ": "
-                    << m_SWPrefetchLatencyHistograms[i] << endl;
-            }
-        }
-        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
-            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
-                out << "prefetch_latency_" << GenericMachineType(i) << ": "
-                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
-            }
-        }
-        out << "prefetch_latency_L2Miss:"
-            << m_SWPrefetchL2MissLatencyHistogram << endl;
+        printMissLatencyProfile(out);
 
         if (m_all_sharing_histogram.size() > 0) {
             out << "all_sharing: " << m_all_sharing_histogram << endl;
@@ -434,44 +536,6 @@ Profiler::clearStats()
     m_real_time_start_time = time(NULL);
 
     m_busyBankCount = 0;
-
-    m_missLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-        m_missLatencyHistograms[i].clear(200);
-    }
-    m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
-    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
-        m_machLatencyHistograms[i].clear(200);
-    }
-    m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-        m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
-        for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
-            m_missMachLatencyHistograms[i][j].clear(200);
-        }
-    }
-    m_allMissLatencyHistogram.clear(200);
-    m_wCCIssueToInitialRequestHistogram.clear(200);
-    m_wCCInitialRequestToForwardRequestHistogram.clear(200);
-    m_wCCForwardRequestToFirstResponseHistogram.clear(200);
-    m_wCCFirstResponseToCompleteHistogram.clear(200);
-    m_wCCIncompleteTimes = 0;
-    m_dirIssueToInitialRequestHistogram.clear(200);
-    m_dirInitialRequestToForwardRequestHistogram.clear(200);
-    m_dirForwardRequestToFirstResponseHistogram.clear(200);
-    m_dirFirstResponseToCompleteHistogram.clear(200);
-    m_dirIncompleteTimes = 0;
-
-    m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
-        m_SWPrefetchLatencyHistograms[i].clear(200);
-    }
-    m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
-    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
-        m_SWPrefetchMachLatencyHistograms[i].clear(200);
-    }
-    m_allSWPrefetchLatencyHistogram.clear(200);
-
     m_read_sharing_histogram.clear();
     m_write_sharing_histogram.clear();
     m_all_sharing_histogram.clear();
@@ -530,85 +594,6 @@ Profiler::bankBusy()
     m_busyBankCount++;
 }
 
-// non-zero cycle demand request
-void
-Profiler::missLatency(Cycles cycles,
-                      RubyRequestType type,
-                      const GenericMachineType respondingMach)
-{
-    m_allMissLatencyHistogram.add(cycles);
-    m_missLatencyHistograms[type].add(cycles);
-    m_machLatencyHistograms[respondingMach].add(cycles);
-    m_missMachLatencyHistograms[type][respondingMach].add(cycles);
-}
-
-void
-Profiler::missLatencyWcc(Cycles issuedTime,
-                         Cycles initialRequestTime,
-                         Cycles forwardRequestTime,
-                         Cycles firstResponseTime,
-                         Cycles completionTime)
-{
-    if ((issuedTime <= initialRequestTime) &&
-        (initialRequestTime <= forwardRequestTime) &&
-        (forwardRequestTime <= firstResponseTime) &&
-        (firstResponseTime <= completionTime)) {
-        m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
-        
-        m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime - 
-                                                         initialRequestTime);
-        
-        m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime - 
-                                                        forwardRequestTime);
-        
-        m_wCCFirstResponseToCompleteHistogram.add(completionTime - 
-                                                  firstResponseTime);
-    } else {
-        m_wCCIncompleteTimes++;
-    }
-}
-
-void
-Profiler::missLatencyDir(Cycles issuedTime,
-                         Cycles initialRequestTime,
-                         Cycles forwardRequestTime,
-                         Cycles firstResponseTime,
-                         Cycles completionTime)
-{
-    if ((issuedTime <= initialRequestTime) &&
-        (initialRequestTime <= forwardRequestTime) &&
-        (forwardRequestTime <= firstResponseTime) &&
-        (firstResponseTime <= completionTime)) {
-        m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
-        
-        m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime - 
-                                                         initialRequestTime);
-        
-        m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime - 
-                                                        forwardRequestTime);
-        
-        m_dirFirstResponseToCompleteHistogram.add(completionTime - 
-                                                  firstResponseTime);
-    } else {
-        m_dirIncompleteTimes++;
-    }
-}
-
-// non-zero cycle prefetch request
-void
-Profiler::swPrefetchLatency(Cycles cycles, RubyRequestType type,
-                            const GenericMachineType respondingMach)
-{
-    m_allSWPrefetchLatencyHistogram.add(cycles);
-    m_SWPrefetchLatencyHistograms[type].add(cycles);
-    m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
-
-    if (respondingMach == GenericMachineType_Directory ||
-        respondingMach == GenericMachineType_NUM) {
-        m_SWPrefetchL2MissLatencyHistogram.add(cycles);
-    }
-}
-
 // Helper function
 static double
 process_memory_total()
index 23efed67a843b5d30d767b98ed42251d5481a2ee..e7b3c5f8d472b8531f2a0830eaf3fcf87b76a1a6 100644 (file)
@@ -52,7 +52,6 @@
 
 #include "base/hashmap.hh"
 #include "mem/protocol/AccessType.hh"
-#include "mem/protocol/GenericMachineType.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/RubyAccessMode.hh"
 #include "mem/protocol/RubyRequestType.hh"
@@ -110,21 +109,7 @@ class Profiler : public SimObject
 
     void controllerBusy(MachineID machID);
     void bankBusy();
-
-    void missLatency(Cycles t, RubyRequestType type,
-                     const GenericMachineType respondingMach);
-
-    void missLatencyWcc(Cycles issuedTime, Cycles initialRequestTime,
-                        Cycles forwardRequestTime, Cycles firstResponseTime,
-                        Cycles completionTime);
-    
-    void missLatencyDir(Cycles issuedTime, Cycles initialRequestTime,
-                        Cycles forwardRequestTime, Cycles firstResponseTime,
-                        Cycles completionTime);
     
-    void swPrefetchLatency(Cycles t, RubyRequestType type,
-                           const GenericMachineType respondingMach);
-
     void print(std::ostream& out) const;
 
     void rubyWatch(int proc);
@@ -141,6 +126,7 @@ class Profiler : public SimObject
     void printRequestProfile(std::ostream &out) const;
     void printDelayProfile(std::ostream &out) const;
     void printOutstandingReqProfile(std::ostream &out) const;
+    void printMissLatencyProfile(std::ostream &out) const;
 
   private:
     // Private copy constructor and assignment operator
@@ -161,27 +147,6 @@ class Profiler : public SimObject
     int64 m_cache_to_cache;
     int64 m_memory_to_cache;
 
-    std::vector<Histogram> m_missLatencyHistograms;
-    std::vector<Histogram> m_machLatencyHistograms;
-    std::vector< std::vector<Histogram> > m_missMachLatencyHistograms;
-    Histogram m_wCCIssueToInitialRequestHistogram;
-    Histogram m_wCCInitialRequestToForwardRequestHistogram;
-    Histogram m_wCCForwardRequestToFirstResponseHistogram;
-    Histogram m_wCCFirstResponseToCompleteHistogram;
-    int64 m_wCCIncompleteTimes;
-    Histogram m_dirIssueToInitialRequestHistogram;
-    Histogram m_dirInitialRequestToForwardRequestHistogram;
-    Histogram m_dirForwardRequestToFirstResponseHistogram;
-    Histogram m_dirFirstResponseToCompleteHistogram;
-    int64 m_dirIncompleteTimes;
-
-    Histogram m_allMissLatencyHistogram;
-
-    Histogram m_allSWPrefetchLatencyHistogram;
-    Histogram m_SWPrefetchL2MissLatencyHistogram;
-    std::vector<Histogram> m_SWPrefetchLatencyHistograms;
-    std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
-
     Histogram m_average_latency_estimate;
     m5::hash_set<Address> m_watch_address_set;
 
index 606dff0ab987fe0478130ec760585220aaf88cc3..8e61766b8515429cf04df57fff987fd9e15dfef5 100644 (file)
@@ -136,6 +136,61 @@ Sequencer::wakeup()
 void Sequencer::clearStats()
 {
     m_outstandReqHist.clear();
+
+    // Initialize the histograms that track latency of all requests
+    m_latencyHist.clear(20);
+    m_typeLatencyHist.resize(RubyRequestType_NUM);
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_typeLatencyHist[i].clear(20);
+    }
+
+    // Initialize the histograms that track latency of requests that
+    // hit in the cache attached to the sequencer.
+    m_hitLatencyHist.clear(20);
+    m_hitTypeLatencyHist.resize(RubyRequestType_NUM);
+    m_hitTypeMachLatencyHist.resize(RubyRequestType_NUM);
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_hitTypeLatencyHist[i].clear(20);
+        m_hitTypeMachLatencyHist[i].resize(MachineType_NUM);
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_hitTypeMachLatencyHist[i][j].clear(20);
+        }
+    }
+
+    // Initialize the histograms that track the latency of requests that
+    // missed in the cache attached to the sequencer.
+    m_missLatencyHist.clear(20);
+    m_missTypeLatencyHist.resize(RubyRequestType_NUM);
+    m_missTypeMachLatencyHist.resize(RubyRequestType_NUM);
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_missTypeLatencyHist[i].clear(20);
+        m_missTypeMachLatencyHist[i].resize(MachineType_NUM);
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_missTypeMachLatencyHist[i][j].clear(20);
+        }
+    }
+
+    m_hitMachLatencyHist.resize(MachineType_NUM);
+    m_missMachLatencyHist.resize(MachineType_NUM);
+    m_IssueToInitialDelayHist.resize(MachineType_NUM);
+    m_InitialToForwardDelayHist.resize(MachineType_NUM);
+    m_ForwardToFirstResponseDelayHist.resize(MachineType_NUM);
+    m_FirstResponseToCompletionDelayHist.resize(MachineType_NUM);
+    m_IncompleteTimes.resize(MachineType_NUM);
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        m_missMachLatencyHist[i].clear(20);
+        m_hitMachLatencyHist[i].clear(20);
+
+        m_IssueToInitialDelayHist[i].clear(20);
+        m_InitialToForwardDelayHist[i].clear(20);
+        m_ForwardToFirstResponseDelayHist[i].clear(20);
+        m_FirstResponseToCompletionDelayHist[i].clear(20);
+
+        m_IncompleteTimes[i] = 0;
+    }
 }
 
 void
@@ -370,26 +425,58 @@ Sequencer::handleLlsc(const Address& address, SequencerRequest* request)
 }
 
 void
-Sequencer::writeCallback(const Address& address, DataBlock& data)
+Sequencer::recordMissLatency(const Cycles cycles, const RubyRequestType type,
+                             const MachineType respondingMach,
+                             bool isExternalHit, Cycles issuedTime,
+                             Cycles initialRequestTime,
+                             Cycles forwardRequestTime,
+                             Cycles firstResponseTime, Cycles completionTime)
 {
-    writeCallback(address, GenericMachineType_NULL, data);
-}
+    m_latencyHist.add(cycles);
+    m_typeLatencyHist[type].add(cycles);
+
+    if (isExternalHit) {
+        m_missLatencyHist.add(cycles);
+        m_missTypeLatencyHist[type].add(cycles);
+
+        if (respondingMach != MachineType_NUM) {
+            m_missMachLatencyHist[respondingMach].add(cycles);
+            m_missTypeMachLatencyHist[type][respondingMach].add(cycles);
+
+            if ((issuedTime <= initialRequestTime) &&
+                (initialRequestTime <= forwardRequestTime) &&
+                (forwardRequestTime <= firstResponseTime) &&
+                (firstResponseTime <= completionTime)) {
+
+                m_IssueToInitialDelayHist[respondingMach].add(
+                    initialRequestTime - issuedTime);
+                m_InitialToForwardDelayHist[respondingMach].add(
+                    forwardRequestTime - initialRequestTime);
+                m_ForwardToFirstResponseDelayHist[respondingMach].add(
+                    firstResponseTime - forwardRequestTime);
+                m_FirstResponseToCompletionDelayHist[respondingMach].add(
+                    completionTime - firstResponseTime);
+            } else {
+                m_IncompleteTimes[respondingMach]++;
+            }
+        }
+    } else {
+        m_hitLatencyHist.add(cycles);
+        m_hitTypeLatencyHist[type].add(cycles);
 
-void
-Sequencer::writeCallback(const Address& address,
-                         GenericMachineType mach,
-                         DataBlock& data)
-{
-    writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+        if (respondingMach != MachineType_NUM) {
+            m_hitMachLatencyHist[respondingMach].add(cycles);
+            m_hitTypeMachLatencyHist[type][respondingMach].add(cycles);
+        }
+    }
 }
 
 void
-Sequencer::writeCallback(const Address& address,
-                         GenericMachineType mach,
-                         DataBlock& data,
-                         Cycles initialRequestTime,
-                         Cycles forwardRequestTime,
-                         Cycles firstResponseTime)
+Sequencer::writeCallback(const Address& address, DataBlock& data,
+                         const bool externalHit, const MachineType mach,
+                         const Cycles initialRequestTime,
+                         const Cycles forwardRequestTime,
+                         const Cycles firstResponseTime)
 {
     assert(address == line_address(address));
     assert(m_writeRequestTable.count(line_address(address)));
@@ -427,28 +514,13 @@ Sequencer::writeCallback(const Address& address,
         m_controller->unblock(address);
     }
 
-    hitCallback(request, mach, data, success,
+    hitCallback(request, data, success, mach, externalHit,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
 void
-Sequencer::readCallback(const Address& address, DataBlock& data)
-{
-    readCallback(address, GenericMachineType_NULL, data);
-}
-
-void
-Sequencer::readCallback(const Address& address,
-                        GenericMachineType mach,
-                        DataBlock& data)
-{
-    readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
-}
-
-void
-Sequencer::readCallback(const Address& address,
-                        GenericMachineType mach,
-                        DataBlock& data,
+Sequencer::readCallback(const Address& address, DataBlock& data,
+                        bool externalHit, const MachineType mach,
                         Cycles initialRequestTime,
                         Cycles forwardRequestTime,
                         Cycles firstResponseTime)
@@ -466,18 +538,17 @@ Sequencer::readCallback(const Address& address,
     assert((request->m_type == RubyRequestType_LD) ||
            (request->m_type == RubyRequestType_IFETCH));
 
-    hitCallback(request, mach, data, true,
+    hitCallback(request, data, true, mach, externalHit,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
 void
-Sequencer::hitCallback(SequencerRequest* srequest,
-                       GenericMachineType mach,
-                       DataBlock& data,
-                       bool success,
-                       Cycles initialRequestTime,
-                       Cycles forwardRequestTime,
-                       Cycles firstResponseTime)
+Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
+                       bool llscSuccess,
+                       const MachineType mach, const bool externalHit,
+                       const Cycles initialRequestTime,
+                       const Cycles forwardRequestTime,
+                       const Cycles firstResponseTime)
 {
     PacketPtr pkt = srequest->pkt;
     Address request_address(pkt->getAddr());
@@ -494,29 +565,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
     }
 
     assert(curCycle() >= issued_time);
-    Cycles miss_latency = curCycle() - issued_time;
-
-    // Profile the miss latency for all non-zero demand misses
-    if (miss_latency != 0) {
-        g_system_ptr->getProfiler()->missLatency(miss_latency, type, mach);
+    Cycles total_latency = curCycle() - issued_time;
 
-        if (mach == GenericMachineType_L1Cache_wCC) {
-            g_system_ptr->getProfiler()->missLatencyWcc(issued_time,
-                initialRequestTime, forwardRequestTime,
-                firstResponseTime, curCycle());
-        }
-
-        if (mach == GenericMachineType_Directory) {
-            g_system_ptr->getProfiler()->missLatencyDir(issued_time,
-                initialRequestTime, forwardRequestTime,
-                firstResponseTime, curCycle());
-        }
+    // Profile the latency for all demand accesses.
+    recordMissLatency(total_latency, type, mach, externalHit, issued_time,
+                      initialRequestTime, forwardRequestTime,
+                      firstResponseTime, curCycle());
 
-        DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
-                 curTick(), m_version, "Seq",
-                 success ? "Done" : "SC_Failed", "", "",
-                 request_address, miss_latency);
-    }
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
+             curTick(), m_version, "Seq",
+             llscSuccess ? "Done" : "SC_Failed", "", "",
+             request_address, total_latency);
 
     // update the data
     if (g_system_ptr->m_warmup_enabled) {
index 058edb9ce251320e84e79b7d2ab8f88ab737d0b1..86e6aa2a92ef7683c803c32465336305fada408d 100644 (file)
@@ -32,7 +32,7 @@
 #include <iostream>
 
 #include "base/hashmap.hh"
-#include "mem/protocol/GenericMachineType.hh"
+#include "mem/protocol/MachineType.hh"
 #include "mem/protocol/RubyRequestType.hh"
 #include "mem/protocol/SequencerRequestType.hh"
 #include "mem/ruby/common/Address.hh"
@@ -65,36 +65,24 @@ class Sequencer : public RubyPort
 
     // Public Methods
     void wakeup(); // Used only for deadlock detection
-
     void printProgress(std::ostream& out) const;
-
     void clearStats();
 
-    void writeCallback(const Address& address, DataBlock& data);
-
     void writeCallback(const Address& address,
-                       GenericMachineType mach,
-                       DataBlock& data);
-
-    void writeCallback(const Address& address,
-                       GenericMachineType mach,
                        DataBlock& data,
-                       Cycles initialRequestTime,
-                       Cycles forwardRequestTime,
-                       Cycles firstResponseTime);
-
-    void readCallback(const Address& address, DataBlock& data);
+                       const bool externalHit = false,
+                       const MachineType mach = MachineType_NUM,
+                       const Cycles initialRequestTime = Cycles(0),
+                       const Cycles forwardRequestTime = Cycles(0),
+                       const Cycles firstResponseTime = Cycles(0));
 
     void readCallback(const Address& address,
-                      GenericMachineType mach,
-                      DataBlock& data);
-
-    void readCallback(const Address& address,
-                      GenericMachineType mach,
                       DataBlock& data,
-                      Cycles initialRequestTime,
-                      Cycles forwardRequestTime,
-                      Cycles firstResponseTime);
+                      const bool externalHit = false,
+                      const MachineType mach = MachineType_NUM,
+                      const Cycles initialRequestTime = Cycles(0),
+                      const Cycles forwardRequestTime = Cycles(0),
+                      const Cycles firstResponseTime = Cycles(0));
 
     RequestStatus makeRequest(PacketPtr pkt);
     bool empty() const;
@@ -118,19 +106,63 @@ class Sequencer : public RubyPort
     void recordRequestType(SequencerRequestType requestType);
     Histogram& getOutstandReqHist() { return m_outstandReqHist; }
 
+    Histogram& getLatencyHist() { return m_latencyHist; }
+    Histogram& getTypeLatencyHist(uint32_t t)
+    { return m_typeLatencyHist[t]; }
+
+    Histogram& getHitLatencyHist() { return m_hitLatencyHist; }
+    Histogram& getHitTypeLatencyHist(uint32_t t)
+    { return m_hitTypeLatencyHist[t]; }
+
+    Histogram& getHitMachLatencyHist(uint32_t t)
+    { return m_hitMachLatencyHist[t]; }
+
+    Histogram& getHitTypeMachLatencyHist(uint32_t r, uint32_t t)
+    { return m_hitTypeMachLatencyHist[r][t]; }
+
+    Histogram& getMissLatencyHist() { return m_missLatencyHist; }
+    Histogram& getMissTypeLatencyHist(uint32_t t)
+    { return m_missTypeLatencyHist[t]; }
+
+    Histogram& getMissMachLatencyHist(uint32_t t)
+    { return m_missMachLatencyHist[t]; }
+
+    Histogram& getMissTypeMachLatencyHist(uint32_t r, uint32_t t)
+    { return m_missTypeMachLatencyHist[r][t]; }
+
+    Histogram& getIssueToInitialDelayHist(uint32_t t)
+    { return m_IssueToInitialDelayHist[t]; }
+
+    Histogram& getInitialToForwardDelayHist(const MachineType t)
+    { return m_InitialToForwardDelayHist[t]; }
+
+    Histogram& getForwardRequestToFirstResponseHist(const MachineType t)
+    { return m_ForwardToFirstResponseDelayHist[t]; }
+
+    Histogram& getFirstResponseToCompletionDelayHist(const MachineType t)
+    { return m_FirstResponseToCompletionDelayHist[t]; }
+
+    const uint64_t getIncompleteTimes(const MachineType t) const
+    { return m_IncompleteTimes[t]; }
+
   private:
     void issueRequest(PacketPtr pkt, RubyRequestType type);
 
-    void hitCallback(SequencerRequest* request,
-                     GenericMachineType mach,
-                     DataBlock& data,
-                     bool success,
-                     Cycles initialRequestTime,
-                     Cycles forwardRequestTime,
-                     Cycles firstResponseTime);
+    void hitCallback(SequencerRequest* request, DataBlock& data,
+                     bool llscSuccess,
+                     const MachineType mach, const bool externalHit,
+                     const Cycles initialRequestTime,
+                     const Cycles forwardRequestTime,
+                     const Cycles firstResponseTime);
 
-    RequestStatus insertRequest(PacketPtr pkt, RubyRequestType request_type);
+    void recordMissLatency(const Cycles t, const RubyRequestType type,
+                           const MachineType respondingMach,
+                           bool isExternalHit, Cycles issuedTime,
+                           Cycles initialRequestTime,
+                           Cycles forwardRequestTime, Cycles firstResponseTime,
+                           Cycles completionTime);
 
+    RequestStatus insertRequest(PacketPtr pkt, RubyRequestType request_type);
     bool handleLlsc(const Address& address, SequencerRequest* request);
 
     // Private copy constructor and assignment operator
@@ -161,6 +193,38 @@ class Sequencer : public RubyPort
     //! Histogram for number of outstanding requests per cycle.
     Histogram m_outstandReqHist;
 
+    //! Histogram for holding latency profile of all requests.
+    Histogram m_latencyHist;
+    std::vector<Histogram> m_typeLatencyHist;
+
+    //! Histogram for holding latency profile of all requests that
+    //! hit in the controller connected to this sequencer.
+    Histogram m_hitLatencyHist;
+    std::vector<Histogram> m_hitTypeLatencyHist;
+
+    //! Histograms for profiling the latencies for requests that
+    //! did not required external messages.
+    std::vector<Histogram> m_hitMachLatencyHist;
+    std::vector< std::vector<Histogram> > m_hitTypeMachLatencyHist;
+
+    //! Histogram for holding latency profile of all requests that
+    //! miss in the controller connected to this sequencer.
+    Histogram m_missLatencyHist;
+    std::vector<Histogram> m_missTypeLatencyHist;
+
+    //! Histograms for profiling the latencies for requests that
+    //! required external messages.
+    std::vector<Histogram> m_missMachLatencyHist;
+    std::vector< std::vector<Histogram> > m_missTypeMachLatencyHist;
+
+    //! Histograms for recording the breakdown of miss latency
+    std::vector<Histogram> m_IssueToInitialDelayHist;
+    std::vector<Histogram> m_InitialToForwardDelayHist;
+    std::vector<Histogram> m_ForwardToFirstResponseDelayHist;
+    std::vector<Histogram> m_FirstResponseToCompletionDelayHist;
+    std::vector<uint64_t> m_IncompleteTimes;
+
+
     class SequencerWakeupEvent : public Event
     {
       private:
index d494cb7cec3e799c147200f4179995c93be939c6..5d14f768868af0bfe8d773c3ff7208a7ce3d00fe 100644 (file)
@@ -29,10 +29,11 @@ from slicc.ast.DeclAST import DeclAST
 from slicc.symbols import StateMachine, Type
 
 class MachineAST(DeclAST):
-    def __init__(self, slicc, ident, pairs_ast, config_parameters, decls):
+    def __init__(self, slicc, idents, pairs_ast, config_parameters, decls):
         super(MachineAST, self).__init__(slicc, pairs_ast)
 
-        self.ident = ident
+        self.ident = idents[0]
+        self.machine_types = idents
         self.pairs_ast = pairs_ast
         self.config_parameters = config_parameters
         self.decls = decls
@@ -71,10 +72,18 @@ class MachineAST(DeclAST):
 
     def findMachines(self):
         # Add to MachineType enumeration
-        machine_type = self.symtab.find("MachineType", Type)
-        if not machine_type.addEnum(self.ident, self.pairs_ast.pairs):
-            self.error("Duplicate machine name: %s:%s" % (machine_type,
-                                                          self.ident))
+        for mtype in self.machine_types:
+            machine_type = self.symtab.find("MachineType", Type)
+            pairs = self.pairs_ast.pairs
+
+            if mtype == self.ident:
+                pairs["Primary"] = True
+            else:
+                pairs["Primary"] = False
+
+            if not machine_type.addEnum(mtype, pairs):
+                self.error("Duplicate machine name: %s:%s" % (
+                            machine_type, mtype))
 
         # Generate code for all the internal decls
         self.decls.findMachines()
index 96e029ecf9f1060ab48b62113628701889292b3d..aa96ceef12dc8a5a954b7fb2c550becb89659b7b 100644 (file)
@@ -258,8 +258,12 @@ class SLICC(Grammar):
             filename = os.path.join(self.base_dir, p[2])
         p[0] = self.parse_file(filename)
 
-    def p_decl__machine(self, p):
-        "decl : MACHINE '(' ident pairs ')' ':' params '{' decls '}'"
+    def p_decl__machine0(self, p):
+        "decl : MACHINE '(' idents ')' ':' params '{' decls '}'"
+        p[0] = ast.MachineAST(self, p[3], [], p[7], p[9])
+
+    def p_decl__machine1(self, p):
+        "decl : MACHINE '(' idents pairs ')' ':' params '{' decls '}'"
         p[0] = ast.MachineAST(self, p[3], p[4], p[7], p[9])
 
     def p_decl__action(self, p):
index 1c2177ce1a22de76b6127f9d11af4a546ec95d00..29b68f2c520416acf1ac0cb528dd45bd67eccca8 100644 (file)
@@ -477,7 +477,6 @@ ${{self.c_ident}}::print(ostream& out) const
 
         if self.isMachineType:
             code('#include "base/misc.hh"')
-            code('#include "mem/protocol/GenericMachineType.hh"')
             code('#include "mem/ruby/common/Address.hh"')
             code('struct MachineID;')
 
@@ -532,23 +531,6 @@ MachineID map_Address_to_DMA(const Address &addr);
                 code('''
 
 MachineID get${{enum.ident}}MachineID(NodeID RubyNode);
-''')
-
-            code('''
-inline GenericMachineType
-ConvertMachToGenericMach(MachineType machType)
-{
-''')
-            for enum in self.enums.itervalues():
-                genericType = self.enums[enum.ident].get('genericType',
-                                                         enum.ident)
-                code('''
-      if (machType == MachineType_${{enum.ident}})
-          return GenericMachineType_${{genericType}};
-''')
-            code('''
-      panic("cannot convert to a GenericMachineType");
-}
 ''')
 
         if self.isStateDecl:
@@ -610,7 +592,8 @@ AccessPermission ${{self.c_ident}}_to_permission(const ${{self.c_ident}}& obj)
 
         if self.isMachineType:
             for enum in self.enums.itervalues():
-                code('#include "mem/protocol/${{enum.ident}}_Controller.hh"')
+                if enum.get("Primary"):
+                    code('#include "mem/protocol/${{enum.ident}}_Controller.hh"')
             code('#include "mem/ruby/system/MachineID.hh"')
 
         code('''
@@ -747,7 +730,11 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj)
             code.indent()
             code('  case ${{self.c_ident}}_NUM:')
             for enum in reversed(self.enums.values()):
-                code('    base += ${{enum.ident}}_Controller::getNumControllers();')
+                # Check if there is a defined machine with this type
+                if enum.get("Primary"):
+                    code('    base += ${{enum.ident}}_Controller::getNumControllers();')
+                else:
+                    code('    base += 0;')
                 code('  case ${{self.c_ident}}_${{enum.ident}}:')
             code('    break;')
             code.dedent()
@@ -771,10 +758,11 @@ ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj)
 
             # For each field
             for enum in self.enums.itervalues():
-                code('''
-      case ${{self.c_ident}}_${{enum.ident}}:
-        return ${{enum.ident}}_Controller::getNumControllers();
-''')
+                code('case ${{self.c_ident}}_${{enum.ident}}:')
+                if enum.get("Primary"):
+                    code('return ${{enum.ident}}_Controller::getNumControllers();')
+                else:
+                    code('return 0;')
 
             # total num
             code('''