cpu: add more instruction mix statistics
[gem5.git] / src / cpu / simple / base.cc
index 8d7a1b11951a322d15b1ffc80b0b6d35fcf2b104..f022d05e0c8964470f81bdaee4b71b27f413d058 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * Authors: Steve Reinhardt
  */
 
-#include "arch/faults.hh"
+#include "arch/kernel_stats.hh"
+#include "arch/stacktrace.hh"
+#include "arch/tlb.hh"
 #include "arch/utility.hh"
+#include "arch/vtophys.hh"
+#include "base/loader/symtab.hh"
 #include "base/cp_annotate.hh"
 #include "base/cprintf.hh"
 #include "base/inifile.hh"
-#include "base/loader/symtab.hh"
 #include "base/misc.hh"
 #include "base/pollevent.hh"
-#include "base/range.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
 #include "config/the_isa.hh"
+#include "cpu/simple/base.hh"
 #include "cpu/base.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/checker/thread_context.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/pred/bpred_unit.hh"
 #include "cpu/profile.hh"
-#include "cpu/simple/base.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/smt.hh"
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
+#include "debug/Decode.hh"
+#include "debug/Fetch.hh"
+#include "debug/Quiesce.hh"
+#include "mem/mem_object.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
 #include "params/BaseSimpleCPU.hh"
 #include "sim/byteswap.hh"
 #include "sim/debug.hh"
+#include "sim/faults.hh"
+#include "sim/full_system.hh"
 #include "sim/sim_events.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
 #include "sim/system.hh"
 
-#if FULL_SYSTEM
-#include "arch/kernel_stats.hh"
-#include "arch/stacktrace.hh"
-#include "arch/tlb.hh"
-#include "arch/vtophys.hh"
-#else // !FULL_SYSTEM
-#include "mem/mem_object.hh"
-#endif // FULL_SYSTEM
-
 using namespace std;
 using namespace TheISA;
 
 BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
-    : BaseCPU(p), traceData(NULL), thread(NULL), predecoder(NULL)
+    : BaseCPU(p),
+      branchPred(p->branchPred),
+      traceData(NULL), thread(NULL)
 {
-#if FULL_SYSTEM
-    thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
-#else
-    thread = new SimpleThread(this, /* thread_num */ 0, p->workload[0],
-            p->itb, p->dtb);
-#endif // !FULL_SYSTEM
+    if (FullSystem)
+        thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb,
+                                  p->isa[0]);
+    else
+        thread = new SimpleThread(this, /* thread_num */ 0, p->system,
+                                  p->workload[0], p->itb, p->dtb, p->isa[0]);
 
     thread->setStatus(ThreadContext::Halted);
 
     tc = thread->getTC();
 
+    if (p->checker) {
+        BaseCPU *temp_checker = p->checker;
+        checker = dynamic_cast<CheckerCPU *>(temp_checker);
+        checker->setSystem(p->system);
+        // Manipulate thread context
+        ThreadContext *cpu_tc = tc;
+        tc = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
+    } else {
+        checker = NULL;
+    }
+
     numInst = 0;
     startNumInst = 0;
+    numOp = 0;
+    startNumOp = 0;
     numLoad = 0;
     startNumLoad = 0;
     lastIcacheStall = 0;
@@ -115,7 +133,7 @@ BaseSimpleCPU::~BaseSimpleCPU()
 }
 
 void
-BaseSimpleCPU::deallocateContext(int thread_num)
+BaseSimpleCPU::deallocateContext(ThreadID thread_num)
 {
     // for now, these are equivalent
     suspendContext(thread_num);
@@ -123,7 +141,7 @@ BaseSimpleCPU::deallocateContext(int thread_num)
 
 
 void
-BaseSimpleCPU::haltContext(int thread_num)
+BaseSimpleCPU::haltContext(ThreadID thread_num)
 {
     // for now, these are equivalent
     suspendContext(thread_num);
@@ -138,8 +156,13 @@ BaseSimpleCPU::regStats()
     BaseCPU::regStats();
 
     numInsts
-        .name(name() + ".num_insts")
-        .desc("Number of instructions executed")
+        .name(name() + ".committedInsts")
+        .desc("Number of instructions committed")
+        ;
+
+    numOps
+        .name(name() + ".committedOps")
+        .desc("Number of ops (including micro ops) committed")
         ;
 
     numIntAluAccesses
@@ -192,6 +215,18 @@ BaseSimpleCPU::regStats()
         .desc("number of times the floating registers were written")
         ;
 
+    numCCRegReads
+        .name(name() + ".num_cc_register_reads")
+        .desc("number of times the CC registers were read")
+        .flags(nozero)
+        ;
+
+    numCCRegWrites
+        .name(name() + ".num_cc_register_writes")
+        .desc("number of times the CC registers were written")
+        .flags(nozero)
+        ;
+
     numMemRefs
         .name(name()+".num_mem_refs")
         .desc("number of memory refs")
@@ -251,9 +286,34 @@ BaseSimpleCPU::regStats()
         .prereq(dcacheRetryCycles)
         ;
 
+    statExecutedInstType
+        .init(Enums::Num_OpClass)
+        .name(name() + ".op_class")
+        .desc("Class of executed instruction")
+        .flags(total | pdf | dist)
+        ;
+    for (unsigned i = 0; i < Num_OpClasses; ++i) {
+        statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
+    }
+
     idleFraction = constant(1.0) - notIdleFraction;
     numIdleCycles = idleFraction * numCycles;
     numBusyCycles = (notIdleFraction)*numCycles;
+
+    numBranches
+        .name(name() + ".Branches")
+        .desc("Number of branches fetched")
+        .prereq(numBranches);
+
+    numPredictedBranches
+        .name(name() + ".predictedBranches")
+        .desc("Number of branches predicted as taken")
+        .prereq(numPredictedBranches);
+
+    numBranchMispred
+        .name(name() + ".BranchMispred")
+        .desc("Number of branch mispredictions")
+        .prereq(numBranchMispred);
 }
 
 void
@@ -264,22 +324,21 @@ BaseSimpleCPU::resetStats()
 }
 
 void
-BaseSimpleCPU::serialize(ostream &os)
+BaseSimpleCPU::serializeThread(ostream &os, ThreadID tid)
 {
-    SERIALIZE_ENUM(_status);
-    BaseCPU::serialize(os);
-//    SERIALIZE_SCALAR(inst);
-    nameOut(os, csprintf("%s.xc.0", name()));
+    assert(_status == Idle || _status == Running);
+    assert(tid == 0);
+
     thread->serialize(os);
 }
 
 void
-BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+BaseSimpleCPU::unserializeThread(Checkpoint *cp, const string &section,
+                                 ThreadID tid)
 {
-    UNSERIALIZE_ENUM(_status);
-    BaseCPU::unserialize(cp, section);
-//    UNSERIALIZE_SCALAR(inst);
-    thread->unserialize(cp, csprintf("%s.xc.0", section));
+    if (tid != 0)
+        fatal("Trying to load more than one thread into a SimpleCPU\n");
+    thread->unserialize(cp, section);
 }
 
 void
@@ -287,15 +346,12 @@ change_thread_state(ThreadID tid, int activate, int priority)
 {
 }
 
-#if FULL_SYSTEM
 Addr
 BaseSimpleCPU::dbg_vtophys(Addr addr)
 {
     return vtophys(tc, addr);
 }
-#endif // FULL_SYSTEM
 
-#if FULL_SYSTEM
 void
 BaseSimpleCPU::wakeup()
 {
@@ -305,12 +361,10 @@ BaseSimpleCPU::wakeup()
     DPRINTF(Quiesce,"Suspended Processor awoke\n");
     thread->activate();
 }
-#endif // FULL_SYSTEM
 
 void
 BaseSimpleCPU::checkForInterrupts()
 {
-#if FULL_SYSTEM
     if (checkInterrupts(tc)) {
         Fault interrupt = interrupts->getInterrupt(tc);
 
@@ -318,10 +372,9 @@ BaseSimpleCPU::checkForInterrupts()
             fetchOffset = 0;
             interrupts->updateIntrInfo(tc);
             interrupt->invoke(tc);
-            predecoder.reset();
+            thread->decoder.reset();
         }
     }
-#endif
 }
 
 
@@ -334,7 +387,8 @@ BaseSimpleCPU::setupFetchRequest(Request *req)
     DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr);
 
     Addr fetchPC = (instAddr & PCMask) + fetchOffset;
-    req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instAddr);
+    req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(),
+            instAddr);
 }
 
 
@@ -364,23 +418,22 @@ BaseSimpleCPU::preExecute()
         //We're not in the middle of a macro instruction
         StaticInstPtr instPtr = NULL;
 
+        TheISA::Decoder *decoder = &(thread->decoder);
+
         //Predecode, ie bundle up an ExtMachInst
-        //This should go away once the constructor can be set up properly
-        predecoder.setTC(thread->getTC());
         //If more fetch data is needed, pass it in.
         Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
-        //if(predecoder.needMoreBytes())
-            predecoder.moreBytes(pcState, fetchPC, inst);
+        //if(decoder->needMoreBytes())
+            decoder->moreBytes(pcState, fetchPC, inst);
         //else
-        //    predecoder.process();
+        //    decoder->process();
 
-        //If an instruction is ready, decode it. Otherwise, we'll have to
+        //Decode an instruction if one is ready. Otherwise, we'll have to
         //fetch beyond the MachInst at the current pc.
-        if (predecoder.extMachInstReady()) {
+        instPtr = decoder->decode(pcState);
+        if (instPtr) {
             stayAtPC = false;
-            ExtMachInst machInst = predecoder.getExtMachInst(pcState);
             thread->pcState(pcState);
-            instPtr = StaticInst::decode(machInst, pcState.instAddr());
         } else {
             stayAtPC = true;
             fetchOffset += sizeof(MachInst);
@@ -400,16 +453,28 @@ BaseSimpleCPU::preExecute()
     }
 
     //If we decoded an instruction this "tick", record information about it.
-    if(curStaticInst)
-    {
+    if (curStaticInst) {
 #if TRACING_ON
         traceData = tracer->getInstRecord(curTick(), tc,
                 curStaticInst, thread->pcState(), curMacroStaticInst);
 
-        DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n",
+        DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n",
                 curStaticInst->getName(), curStaticInst->machInst);
 #endif // TRACING_ON
     }
+
+    if (branchPred && curStaticInst && curStaticInst->isControl()) {
+        // Use a fake sequence number since we only have one
+        // instruction in flight at the same time.
+        const InstSeqNum cur_sn(0);
+        const ThreadID tid(0);
+        pred_pc = thread->pcState();
+        const bool predict_taken(
+            branchPred->predict(curStaticInst, cur_sn, pred_pc, tid));
+
+        if (predict_taken)
+            ++numPredictedBranches;
+    }
 }
 
 void
@@ -419,15 +484,13 @@ BaseSimpleCPU::postExecute()
 
     TheISA::PCState pc = tc->pcState();
     Addr instAddr = pc.instAddr();
-#if FULL_SYSTEM
-    if (thread->profile) {
+    if (FullSystem && thread->profile) {
         bool usermode = TheISA::inUserMode(tc);
         thread->profilePC = usermode ? 1 : instAddr;
         ProfileNode *node = thread->profile->consume(tc, curStaticInst);
         if (node)
             thread->profileNode = node;
     }
-#endif
 
     if (curStaticInst->isMemRef()) {
         numMemRefs++;
@@ -442,6 +505,10 @@ BaseSimpleCPU::postExecute()
         CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr());
     }
 
+    if (curStaticInst->isControl()) {
+        ++numBranches;
+    }
+
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
@@ -475,7 +542,10 @@ BaseSimpleCPU::postExecute()
     }
     /* End power model statistics */
 
-    traceFunctions(instAddr);
+    statExecutedInstType[curStaticInst->opClass()]++;
+
+    if (FullSystem)
+        traceFunctions(instAddr);
 
     if (traceData) {
         traceData->dump();
@@ -484,16 +554,17 @@ BaseSimpleCPU::postExecute()
     }
 }
 
-
 void
 BaseSimpleCPU::advancePC(Fault fault)
 {
+    const bool branching(thread->pcState().branching());
+
     //Since we're moving to a new pc, zero out the offset
     fetchOffset = 0;
     if (fault != NoFault) {
         curMacroStaticInst = StaticInst::nullStaticInstPtr;
         fault->invoke(tc, curStaticInst);
-        predecoder.reset();
+        thread->decoder.reset();
     } else {
         if (curStaticInst) {
             if (curStaticInst->isLastMicroop())
@@ -503,39 +574,28 @@ BaseSimpleCPU::advancePC(Fault fault)
             thread->pcState(pcState);
         }
     }
+
+    if (branchPred && curStaticInst && curStaticInst->isControl()) {
+        // Use a fake sequence number since we only have one
+        // instruction in flight at the same time.
+        const InstSeqNum cur_sn(0);
+        const ThreadID tid(0);
+
+        if (pred_pc == thread->pcState()) {
+            // Correctly predicted branch
+            branchPred->update(cur_sn, tid);
+        } else {
+            // Mis-predicted branch
+            branchPred->squash(cur_sn, pcState(),
+                               branching, tid);
+            ++numBranchMispred;
+        }
+    }
 }
 
-/*Fault
-BaseSimpleCPU::CacheOp(uint8_t Op, Addr EffAddr)
+void
+BaseSimpleCPU::startup()
 {
-    // translate to physical address
-    Fault fault = NoFault;
-    int CacheID = Op & 0x3; // Lower 3 bits identify Cache
-    int CacheOP = Op >> 2; // Upper 3 bits identify Cache Operation
-    if(CacheID > 1)
-      {
-        warn("CacheOps not implemented for secondary/tertiary caches\n");
-      }
-    else
-      {
-        switch(CacheOP)
-          { // Fill Packet Type
-          case 0: warn("Invalidate Cache Op\n");
-            break;
-          case 1: warn("Index Load Tag Cache Op\n");
-            break;
-          case 2: warn("Index Store Tag Cache Op\n");
-            break;
-          case 4: warn("Hit Invalidate Cache Op\n");
-            break;
-          case 5: warn("Fill/Hit Writeback Invalidate Cache Op\n");
-            break;
-          case 6: warn("Hit Writeback\n");
-            break;
-          case 7: warn("Fetch & Lock Cache Op\n");
-            break;
-          default: warn("Unimplemented Cache Op\n");
-          }
-      }
-    return fault;
-}*/
+    BaseCPU::startup();
+    thread->startup();
+}