ruby: split CPU and GPU latency stats
[gem5.git] / src / mem / ruby / profiler / Profiler.hh
index bf4bf8a503cc10726744f2bda8fa286db24c04cc..6ad65f9625e74c8905a3d526e6532a8880f1f3cc 100644 (file)
 #ifndef __MEM_RUBY_PROFILER_PROFILER_HH__
 #define __MEM_RUBY_PROFILER_PROFILER_HH__
 
-#include "mem/protocol/AccessModeType.hh"
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/callback.hh"
+#include "base/statistics.hh"
 #include "mem/protocol/AccessType.hh"
-#include "mem/protocol/CacheRequestType.hh"
-#include "mem/protocol/GenericMachineType.hh"
-#include "mem/protocol/GenericRequestType.hh"
 #include "mem/protocol/PrefetchBit.hh"
-#include "mem/ruby/common/Address.hh"
-#include "mem/ruby/common/Consumer.hh"
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/common/Histogram.hh"
-#include "mem/ruby/common/Set.hh"
-#include "mem/ruby/libruby.hh"
-#include "mem/ruby/system/MachineID.hh"
-#include "mem/ruby/system/MemoryControl.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "params/RubyProfiler.hh"
-#include "sim/sim_object.hh"
-
-class CacheMsg;
-class AddressProfiler;
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/ruby/common/MachineID.hh"
+#include "params/RubySystem.hh"
 
-template <class KEY_TYPE, class VALUE_TYPE> class Map;
+class RubyRequest;
+class AddressProfiler;
 
-class Profiler : public SimObject, public Consumer
+class Profiler
 {
   public:
-    typedef RubyProfilerParams Params;
-    Profiler(const Params *);
+    Profiler(const RubySystemParams *params, RubySystem *rs);
     ~Profiler();
 
-    void wakeup();
-
-    void setPeriodicStatsFile(const string& filename);
-    void setPeriodicStatsInterval(integer_t period);
+    RubySystem *m_ruby_system;
 
-    void printStats(ostream& out, bool short_stats=false);
-    void printShortStats(ostream& out) { printStats(out, true); }
-    void printTraceStats(ostream& out) const;
-    void clearStats();
-    void printConfig(ostream& out) const;
-    void printResourceUsage(ostream& out) const;
+    void wakeup();
+    void regStats(const std::string &name);
+    void collateStats();
 
     AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
     AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
 
-    void addAddressTraceSample(const CacheMsg& msg, NodeID id);
-
-    void profileRequest(const string& requestStr);
-    void profileSharing(const Address& addr, AccessType type,
-                        NodeID requestor, const Set& sharers,
-                        const Set& owner);
-
-    void profileMulticastRetry(const Address& addr, int count);
-
-    void profileFilterAction(int action);
-
-    void profileConflictingRequests(const Address& addr);
-
-    void
-    profileOutstandingRequest(int outstanding)
-    {
-        m_outstanding_requests.add(outstanding);
-    }
-
-    void
-    profileOutstandingPersistentRequest(int outstanding)
-    {
-        m_outstanding_persistent_requests.add(outstanding);
-    }
-
-    void
-    profileAverageLatencyEstimate(int latency)
-    {
-        m_average_latency_estimate.add(latency);
-    }
-
-    void recordPrediction(bool wasGood, bool wasPredicted);
-
-    void startTransaction(int cpu);
-    void endTransaction(int cpu);
-    void profilePFWait(Time waitTime);
-
-    void controllerBusy(MachineID machID);
-    void bankBusy();
-    void missLatency(Time t, RubyRequestType type);
-    void swPrefetchLatency(Time t, CacheRequestType type,
-                           GenericMachineType respondingMach);
-    void sequencerRequests(int num) { m_sequencer_requests.add(num); }
-
-    void profileTransition(const string& component, NodeID version,
-                           Address addr, const string& state,
-                           const string& event, const string& next_state,
-                           const string& note);
-    void profileMsgDelay(int virtualNetwork, int delayCycles);
-
-    void print(ostream& out) const;
-
-    void rubyWatch(int proc);
-    bool watchAddress(Address addr);
-
-    // return Ruby's start time
-    Time
-    getRubyStartTime()
-    {
-        return m_ruby_start;
-    }
+    void addAddressTraceSample(const RubyRequest& msg, NodeID id);
 
     // added by SS
-    bool getHotLines() { return m_hot_lines; }
-    bool getAllInstructions() { return m_all_instructions; }
+    bool getHotLines() const { return m_hot_lines; }
+    bool getAllInstructions() const { return m_all_instructions; }
 
   private:
     // Private copy constructor and assignment operator
@@ -164,69 +90,58 @@ class Profiler : public SimObject, public Consumer
     AddressProfiler* m_address_profiler_ptr;
     AddressProfiler* m_inst_profiler_ptr;
 
-    Vector<int64> m_instructions_executed_at_start;
-    Vector<int64> m_cycles_executed_at_start;
-
-    ostream* m_periodic_output_file_ptr;
-    integer_t m_stats_period;
-
-    Time m_ruby_start;
-    time_t m_real_time_start_time;
-
-    Vector <Vector<integer_t> > m_busyControllerCount;
-    integer_t m_busyBankCount;
-    Histogram m_multicast_retry_histogram;
-
-    Histogram m_filter_action_histogram;
-    Histogram m_tbeProfile;
-
-    Histogram m_sequencer_requests;
-    Histogram m_read_sharing_histogram;
-    Histogram m_write_sharing_histogram;
-    Histogram m_all_sharing_histogram;
-    int64 m_cache_to_cache;
-    int64 m_memory_to_cache;
-
-    Histogram m_prefetchWaitHistogram;
-
-    Vector<Histogram> m_missLatencyHistograms;
-    Vector<Histogram> m_machLatencyHistograms;
-    Histogram m_allMissLatencyHistogram;
-
-    Histogram m_allSWPrefetchLatencyHistogram;
-    Histogram m_SWPrefetchL2MissLatencyHistogram;
-    Vector<Histogram> m_SWPrefetchLatencyHistograms;
-    Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
-
-    Histogram m_delayedCyclesHistogram;
-    Histogram m_delayedCyclesNonPFHistogram;
-    Vector<Histogram> m_delayedCyclesVCHistograms;
-
-    Histogram m_outstanding_requests;
-    Histogram m_outstanding_persistent_requests;
-
-    Histogram m_average_latency_estimate;
-
-    Map<Address, int>* m_watch_address_list_ptr;
-    // counts all initiated cache request including PUTs
-    int m_requests;
-    Map <string, int>* m_requestProfileMap_ptr;
+    Stats::Histogram delayHistogram;
+    std::vector<Stats::Histogram *> delayVCHistogram;
+
+    //! Histogram for number of outstanding requests per cycle.
+    Stats::Histogram m_outstandReqHistSeqr;
+    Stats::Histogram m_outstandReqHistCoalsr;
+
+    //! Histogram for holding latency profile of all requests.
+    Stats::Histogram m_latencyHistSeqr;
+    Stats::Histogram m_latencyHistCoalsr;
+    std::vector<Stats::Histogram *> m_typeLatencyHistSeqr;
+    std::vector<Stats::Histogram *> m_typeLatencyHistCoalsr;
+
+    //! Histogram for holding latency profile of all requests that
+    //! hit in the controller connected to this sequencer.
+    Stats::Histogram m_hitLatencyHistSeqr;
+    std::vector<Stats::Histogram *> m_hitTypeLatencyHistSeqr;
+
+    //! Histograms for profiling the latencies for requests that
+    //! did not required external messages.
+    std::vector<Stats::Histogram *> m_hitMachLatencyHistSeqr;
+    std::vector< std::vector<Stats::Histogram *> > m_hitTypeMachLatencyHistSeqr;
+
+    //! Histogram for holding latency profile of all requests that
+    //! miss in the controller connected to this sequencer.
+    Stats::Histogram m_missLatencyHistSeqr;
+    Stats::Histogram m_missLatencyHistCoalsr;
+    std::vector<Stats::Histogram *> m_missTypeLatencyHistSeqr;
+    std::vector<Stats::Histogram *> m_missTypeLatencyHistCoalsr;
+
+    //! Histograms for profiling the latencies for requests that
+    //! required external messages.
+    std::vector<Stats::Histogram *> m_missMachLatencyHistSeqr;
+    std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHistSeqr;
+    std::vector<Stats::Histogram *> m_missMachLatencyHistCoalsr;
+    std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHistCoalsr;
+
+    //! Histograms for recording the breakdown of miss latency
+    std::vector<Stats::Histogram *> m_IssueToInitialDelayHistSeqr;
+    std::vector<Stats::Histogram *> m_InitialToForwardDelayHistSeqr;
+    std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHistSeqr;
+    std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHistSeqr;
+    Stats::Scalar m_IncompleteTimesSeqr[MachineType_NUM];
+    std::vector<Stats::Histogram *> m_IssueToInitialDelayHistCoalsr;
+    std::vector<Stats::Histogram *> m_InitialToForwardDelayHistCoalsr;
+    std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHistCoalsr;
+    std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHistCoalsr;
 
     //added by SS
-    bool m_hot_lines;
-    bool m_all_instructions;
-
-    int m_num_of_sequencers;
+    const bool m_hot_lines;
+    const bool m_all_instructions;
+    const uint32_t m_num_vnets;
 };
 
-inline ostream&
-operator<<(ostream& out, const Profiler& obj)
-{
-    obj.print(out);
-    out << flush;
-    return out;
-}
-
 #endif // __MEM_RUBY_PROFILER_PROFILER_HH__
-
-