Support profiling.
authorKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:43:08 +0000 (17:43 -0400)
committerKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:43:08 +0000 (17:43 -0400)
--HG--
extra : convert_revision : eab02dea68442bd3f8c5d1d16b7f93f43cbda2a5

cpu/o3/alpha_cpu_impl.hh
cpu/o3/commit_impl.hh
cpu/o3/thread_state.hh
cpu/ozone/thread_state.hh
cpu/thread_state.hh
python/m5/objects/AlphaFullCPU.py
python/m5/objects/OzoneCPU.py

index 1bf0652cd34e962d2c6c56789f12ab273c835bf1..071a870effec09c68ffb1f36f9ecbf50263dc37e 100644 (file)
@@ -153,15 +153,6 @@ AlphaFullCPU<Impl>::regStats()
     this->commit.regStats();
 }
 
-#if FULL_SYSTEM
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
-{
-    // Currently not supported
-}
-#endif
-
 template <class Impl>
 void
 AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
@@ -334,15 +325,26 @@ AlphaFullCPU<Impl>::AlphaXC::readLastSuspend()
     return thread->lastSuspend;
 }
 
+template <class Impl>
+void
+AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
+{
+    thread->dumpFuncProfile();
+}
+
 template <class Impl>
 void
 AlphaFullCPU<Impl>::AlphaXC::profileClear()
-{}
+{
+    thread->profileClear();
+}
 
 template <class Impl>
 void
 AlphaFullCPU<Impl>::AlphaXC::profileSample()
-{}
+{
+    thread->profileSample();
+}
 #endif
 
 template <class Impl>
index 364e685c278243b5c00a35f98b5f12d1a39e471a..cd10ec6b2788b6f86604027494c791091f41bab6 100644 (file)
@@ -1035,6 +1035,20 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
     updateComInstStats(head_inst);
 
+#if FULL_SYSTEM
+    if (thread[tid]->profile) {
+//        bool usermode =
+//            (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
+//        thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
+        thread[tid]->profilePC = head_inst->readPC();
+        ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
+                                                          head_inst->staticInst);
+
+        if (node)
+            thread[tid]->profileNode = node;
+    }
+#endif
+
     if (head_inst->traceData) {
         head_inst->traceData->setFetchSeq(head_inst->seqNum);
         head_inst->traceData->setCPSeq(thread[tid]->numInst);
index 3f1208ea0c0a834c076d329f20f1e01f40a7a934..28f4881430b996eef0f2c8aeee41a2bb4b259c05 100644 (file)
 
 #include "arch/faults.hh"
 #include "arch/isa_traits.hh"
+#include "base/callback.hh"
+#include "base/output.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/thread_state.hh"
+#include "sim/sim_exit.hh"
 
 class Event;
 class Process;
@@ -83,8 +86,22 @@ struct O3ThreadState : public ThreadState {
 #if FULL_SYSTEM
     O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
         : ThreadState(-1, _thread_num, _mem),
-          inSyscall(0), trapPending(0)
-    { }
+          cpu(_cpu), inSyscall(0), trapPending(0)
+    {
+        if (cpu->params->profile) {
+            profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+            Callback *cb =
+                new MakeCallback<O3ThreadState,
+                &O3ThreadState::dumpFuncProfile>(this);
+            registerExitCallback(cb);
+        }
+
+        // let's fill with a dummy node for now so we don't get a segfault
+        // on the first cycle when there's no node available.
+        static ProfileNode dummyNode;
+        profileNode = &dummyNode;
+        profilePC = 3;
+    }
 #else
     O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
         : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
@@ -138,6 +155,14 @@ struct O3ThreadState : public ThreadState {
     /** Handles the syscall. */
     void syscall() { process->syscall(xcProxy); }
 #endif
+
+#if FULL_SYSTEM
+    void dumpFuncProfile()
+    {
+        std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+        profile->dump(xcProxy, *os);
+    }
+#endif
 };
 
 #endif // __CPU_O3_THREAD_STATE_HH__
index f104dff2393490708d6de0887714a91f2218446c..93a56da1b2c462edf9f23d5554b0aba0f6b79910 100644 (file)
 
 #include "arch/faults.hh"
 #include "arch/isa_traits.hh"
+#include "base/callback.hh"
+#include "base/output.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/thread_state.hh"
 #include "sim/process.hh"
+#include "sim/sim_exit.hh"
 
 class Event;
 //class Process;
@@ -62,9 +65,22 @@ struct OzoneThreadState : public ThreadState {
 #if FULL_SYSTEM
     OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
         : ThreadState(-1, _thread_num, _mem),
-          inSyscall(0), trapPending(0)
+          cpu(_cpu), inSyscall(0), trapPending(0)
     {
         memset(&regs, 0, sizeof(TheISA::RegFile));
+        if (cpu->params->profile) {
+            profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+            Callback *cb =
+                new MakeCallback<OzoneThreadState,
+                &OzoneThreadState::dumpFuncProfile>(this);
+            registerExitCallback(cb);
+        }
+
+        // let's fill with a dummy node for now so we don't get a segfault
+        // on the first cycle when there's no node available.
+        static ProfileNode dummyNode;
+        profileNode = &dummyNode;
+        profilePC = 3;
     }
 #else
     OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
@@ -187,6 +203,14 @@ struct OzoneThreadState : public ThreadState {
     Counter readFuncExeInst() { return funcExeInst; }
 
     void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+
+#if FULL_SYSTEM
+    void dumpFuncProfile()
+    {
+        std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+        profile->dump(xcProxy, *os);
+    }
+#endif
 };
 
 #endif // __CPU_OZONE_THREAD_STATE_HH__
index 12146bd11cb8caa698bc247c9a3e8e68a5647108..7a19963c8d7d3afebe034f7f9fbe4956461bfb6f 100644 (file)
@@ -30,6 +30,7 @@
 #define __CPU_THREAD_STATE_HH__
 
 #include "cpu/exec_context.hh"
+#include "cpu/profile.hh"
 
 #if FULL_SYSTEM
 class EndQuiesceEvent;
@@ -103,6 +104,20 @@ struct ThreadState {
 
 #endif
 
+#if FULL_SYSTEM
+    void profileClear()
+    {
+        if (profile)
+            profile->clear();
+    }
+
+    void profileSample()
+    {
+        if (profile)
+            profile->sample(profileNode, profilePC);
+    }
+#endif
+
     /**
      * Temporary storage to pass the source address from copy_load to
      * copy_store.
index 015e9d87298dd4c788d350ad84751c25c92231d7..5b6fa106347735cf4dba108be5e498b2de5fed5b 100644 (file)
@@ -10,6 +10,8 @@ class DerivAlphaFullCPU(BaseCPU):
         mem = Param.FunctionalMemory(NULL, "memory")
 
     checker = Param.BaseCPU(NULL, "checker")
+    if build_env['FULL_SYSTEM']:
+        profile = Param.Latency('0ns', "trace the kernel stack")
 
     cachePorts = Param.Unsigned("Cache Ports")
 
index ea8b6b537f235120851e266a37c9044ed126434c..dadca799059dc3dc886ff4d964e070af0ebe5860 100644 (file)
@@ -10,9 +10,12 @@ class DerivOzoneCPU(BaseCPU):
         mem = Param.FunctionalMemory(NULL, "memory")
 
     checker = Param.BaseCPU("Checker CPU")
+    if build_env['FULL_SYSTEM']:
+        profile = Param.Latency('0ns', "trace the kernel stack")
 
     width = Param.Unsigned("Width")
     frontEndWidth = Param.Unsigned("Front end width")
+    frontEndLatency = Param.Unsigned("Front end latency")
     backEndWidth = Param.Unsigned("Back end width")
     backEndSquashLatency = Param.Unsigned("Back end squash latency")
     backEndLatency = Param.Unsigned("Back end latency")
@@ -75,6 +78,7 @@ class DerivOzoneCPU(BaseCPU):
 
     LQEntries = Param.Unsigned("Number of load queue entries")
     SQEntries = Param.Unsigned("Number of store queue entries")
+    lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
     LFSTSize = Param.Unsigned("Last fetched store table size")
     SSITSize = Param.Unsigned("Store set ID table size")