x86,misc: add additional info on faulting X86 instruction, fetched PC
[gem5.git] / src / cpu / simple / base.cc
index 9035ce973a5844b72b9ea787466fb4f8e0cbbbd9..1f12afbf06a52b866d6335ebf657664701981891 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2010-2011 ARM Limited
+ * Copyright (c) 2010-2012, 2015, 2017 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * Authors: Steve Reinhardt
  */
 
+#include "cpu/simple/base.hh"
+
 #include "arch/kernel_stats.hh"
 #include "arch/stacktrace.hh"
 #include "arch/tlb.hh"
 #include "arch/utility.hh"
 #include "arch/vtophys.hh"
-#include "base/loader/symtab.hh"
 #include "base/cp_annotate.hh"
 #include "base/cprintf.hh"
 #include "base/inifile.hh"
-#include "base/misc.hh"
+#include "base/loader/symtab.hh"
+#include "base/logging.hh"
 #include "base/pollevent.hh"
-#include "base/range.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
 #include "config/the_isa.hh"
-#include "config/use_checker.hh"
-#include "cpu/simple/base.hh"
 #include "cpu/base.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/checker/thread_context.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/pred/bpred_unit.hh"
 #include "cpu/profile.hh"
+#include "cpu/simple/exec_context.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/smt.hh"
 #include "cpu/static_inst.hh"
 #include "sim/stats.hh"
 #include "sim/system.hh"
 
-#if USE_CHECKER
-#include "cpu/checker/cpu.hh"
-#include "cpu/checker/thread_context.hh"
-#endif
-
 using namespace std;
 using namespace TheISA;
 
 BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
-    : BaseCPU(p), traceData(NULL), thread(NULL), predecoder(NULL)
+    : BaseCPU(p),
+      curThread(0),
+      branchPred(p->branchPred),
+      traceData(NULL),
+      inst(),
+      _status(Idle)
 {
-    if (FullSystem)
-        thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
-    else
-        thread = new SimpleThread(this, /* thread_num */ 0, p->system,
-                p->workload[0], p->itb, p->dtb);
+    SimpleThread *thread;
 
-    thread->setStatus(ThreadContext::Halted);
-
-    tc = thread->getTC();
+    for (unsigned i = 0; i < numThreads; i++) {
+        if (FullSystem) {
+            thread = new SimpleThread(this, i, p->system,
+                                      p->itb, p->dtb, p->isa[i]);
+        } else {
+            thread = new SimpleThread(this, i, p->system, p->workload[i],
+                                      p->itb, p->dtb, p->isa[i]);
+        }
+        threadInfo.push_back(new SimpleExecContext(this, thread));
+        ThreadContext *tc = thread->getTC();
+        threadContexts.push_back(tc);
+    }
 
-#if USE_CHECKER
     if (p->checker) {
+        if (numThreads != 1)
+            fatal("Checker currently does not support SMT");
+
         BaseCPU *temp_checker = p->checker;
         checker = dynamic_cast<CheckerCPU *>(temp_checker);
         checker->setSystem(p->system);
         // Manipulate thread context
-        ThreadContext *cpu_tc = tc;
-        tc = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
+        ThreadContext *cpu_tc = threadContexts[0];
+        threadContexts[0] = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
     } else {
         checker = NULL;
     }
-#endif
+}
 
-    numInst = 0;
-    startNumInst = 0;
-    numLoad = 0;
-    startNumLoad = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
+void
+BaseSimpleCPU::init()
+{
+    BaseCPU::init();
 
-    threadContexts.push_back(tc);
+    for (auto tc : threadContexts) {
+        // Initialise the ThreadContext's memory proxies
+        tc->initMemProxies(tc);
 
+        if (FullSystem && !params()->switched_out) {
+            // initialize CPU, including PC
+            TheISA::initCPU(tc, tc->contextId());
+        }
+    }
+}
 
-    fetchOffset = 0;
-    stayAtPC = false;
+void
+BaseSimpleCPU::checkPcEventQueue()
+{
+    Addr oldpc, pc = threadInfo[curThread]->thread->instAddr();
+    do {
+        oldpc = pc;
+        system->pcEventQueue.service(threadContexts[curThread]);
+        pc = threadInfo[curThread]->thread->instAddr();
+    } while (oldpc != pc);
 }
 
-BaseSimpleCPU::~BaseSimpleCPU()
+void
+BaseSimpleCPU::swapActiveThread()
 {
+    if (numThreads > 1) {
+        if ((!curStaticInst || !curStaticInst->isDelayedCommit()) &&
+             !threadInfo[curThread]->stayAtPC) {
+            // Swap active threads
+            if (!activeThreads.empty()) {
+                curThread = activeThreads.front();
+                activeThreads.pop_front();
+                activeThreads.push_back(curThread);
+            }
+        }
+    }
 }
 
 void
-BaseSimpleCPU::deallocateContext(ThreadID thread_num)
+BaseSimpleCPU::countInst()
 {
-    // for now, these are equivalent
-    suspendContext(thread_num);
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        t_info.numInst++;
+        t_info.numInsts++;
+    }
+    t_info.numOp++;
+    t_info.numOps++;
+
+    system->totalNumInsts++;
+    t_info.thread->funcExeInst++;
+}
+
+Counter
+BaseSimpleCPU::totalInsts() const
+{
+    Counter total_inst = 0;
+    for (auto& t_info : threadInfo) {
+        total_inst += t_info->numInst;
+    }
+
+    return total_inst;
+}
+
+Counter
+BaseSimpleCPU::totalOps() const
+{
+    Counter total_op = 0;
+    for (auto& t_info : threadInfo) {
+        total_op += t_info->numOp;
+    }
+
+    return total_op;
 }
 
+BaseSimpleCPU::~BaseSimpleCPU()
+{
+}
 
 void
 BaseSimpleCPU::haltContext(ThreadID thread_num)
 {
     // for now, these are equivalent
     suspendContext(thread_num);
+    updateCycleCounters(BaseCPU::CPU_STATE_SLEEP);
 }
 
 
@@ -155,149 +226,204 @@ BaseSimpleCPU::regStats()
 
     BaseCPU::regStats();
 
-    numInsts
-        .name(name() + ".num_insts")
-        .desc("Number of instructions executed")
-        ;
-
-    numIntAluAccesses
-        .name(name() + ".num_int_alu_accesses")
-        .desc("Number of integer alu accesses")
-        ;
-
-    numFpAluAccesses
-        .name(name() + ".num_fp_alu_accesses")
-        .desc("Number of float alu accesses")
-        ;
-
-    numCallsReturns
-        .name(name() + ".num_func_calls")
-        .desc("number of times a function call or return occured")
-        ;
-
-    numCondCtrlInsts
-        .name(name() + ".num_conditional_control_insts")
-        .desc("number of instructions that are conditional controls")
-        ;
-
-    numIntInsts
-        .name(name() + ".num_int_insts")
-        .desc("number of integer instructions")
-        ;
-
-    numFpInsts
-        .name(name() + ".num_fp_insts")
-        .desc("number of float instructions")
-        ;
-
-    numIntRegReads
-        .name(name() + ".num_int_register_reads")
-        .desc("number of times the integer registers were read")
-        ;
-
-    numIntRegWrites
-        .name(name() + ".num_int_register_writes")
-        .desc("number of times the integer registers were written")
-        ;
-
-    numFpRegReads
-        .name(name() + ".num_fp_register_reads")
-        .desc("number of times the floating registers were read")
-        ;
-
-    numFpRegWrites
-        .name(name() + ".num_fp_register_writes")
-        .desc("number of times the floating registers were written")
-        ;
-
-    numMemRefs
-        .name(name()+".num_mem_refs")
-        .desc("number of memory refs")
-        ;
-
-    numStoreInsts
-        .name(name() + ".num_store_insts")
-        .desc("Number of store instructions")
-        ;
-
-    numLoadInsts
-        .name(name() + ".num_load_insts")
-        .desc("Number of load instructions")
-        ;
-
-    notIdleFraction
-        .name(name() + ".not_idle_fraction")
-        .desc("Percentage of non-idle cycles")
-        ;
-
-    idleFraction
-        .name(name() + ".idle_fraction")
-        .desc("Percentage of idle cycles")
-        ;
-
-    numBusyCycles
-        .name(name() + ".num_busy_cycles")
-        .desc("Number of busy cycles")
-        ;
-
-    numIdleCycles
-        .name(name()+".num_idle_cycles")
-        .desc("Number of idle cycles")
-        ;
-
-    icacheStallCycles
-        .name(name() + ".icache_stall_cycles")
-        .desc("ICache total stall cycles")
-        .prereq(icacheStallCycles)
-        ;
-
-    dcacheStallCycles
-        .name(name() + ".dcache_stall_cycles")
-        .desc("DCache total stall cycles")
-        .prereq(dcacheStallCycles)
-        ;
-
-    icacheRetryCycles
-        .name(name() + ".icache_retry_cycles")
-        .desc("ICache total retry cycles")
-        .prereq(icacheRetryCycles)
-        ;
-
-    dcacheRetryCycles
-        .name(name() + ".dcache_retry_cycles")
-        .desc("DCache total retry cycles")
-        .prereq(dcacheRetryCycles)
-        ;
-
-    idleFraction = constant(1.0) - notIdleFraction;
-    numIdleCycles = idleFraction * numCycles;
-    numBusyCycles = (notIdleFraction)*numCycles;
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        SimpleExecContext& t_info = *threadInfo[tid];
+
+        std::string thread_str = name();
+        if (numThreads > 1)
+            thread_str += ".thread" + std::to_string(tid);
+
+        t_info.numInsts
+            .name(thread_str + ".committedInsts")
+            .desc("Number of instructions committed")
+            ;
+
+        t_info.numOps
+            .name(thread_str + ".committedOps")
+            .desc("Number of ops (including micro ops) committed")
+            ;
+
+        t_info.numIntAluAccesses
+            .name(thread_str + ".num_int_alu_accesses")
+            .desc("Number of integer alu accesses")
+            ;
+
+        t_info.numFpAluAccesses
+            .name(thread_str + ".num_fp_alu_accesses")
+            .desc("Number of float alu accesses")
+            ;
+
+        t_info.numVecAluAccesses
+            .name(thread_str + ".num_vec_alu_accesses")
+            .desc("Number of vector alu accesses")
+            ;
+
+        t_info.numCallsReturns
+            .name(thread_str + ".num_func_calls")
+            .desc("number of times a function call or return occured")
+            ;
+
+        t_info.numCondCtrlInsts
+            .name(thread_str + ".num_conditional_control_insts")
+            .desc("number of instructions that are conditional controls")
+            ;
+
+        t_info.numIntInsts
+            .name(thread_str + ".num_int_insts")
+            .desc("number of integer instructions")
+            ;
+
+        t_info.numFpInsts
+            .name(thread_str + ".num_fp_insts")
+            .desc("number of float instructions")
+            ;
+
+        t_info.numVecInsts
+            .name(thread_str + ".num_vec_insts")
+            .desc("number of vector instructions")
+            ;
+
+        t_info.numIntRegReads
+            .name(thread_str + ".num_int_register_reads")
+            .desc("number of times the integer registers were read")
+            ;
+
+        t_info.numIntRegWrites
+            .name(thread_str + ".num_int_register_writes")
+            .desc("number of times the integer registers were written")
+            ;
+
+        t_info.numFpRegReads
+            .name(thread_str + ".num_fp_register_reads")
+            .desc("number of times the floating registers were read")
+            ;
+
+        t_info.numFpRegWrites
+            .name(thread_str + ".num_fp_register_writes")
+            .desc("number of times the floating registers were written")
+            ;
+
+        t_info.numVecRegReads
+            .name(thread_str + ".num_vec_register_reads")
+            .desc("number of times the vector registers were read")
+            ;
+
+        t_info.numVecRegWrites
+            .name(thread_str + ".num_vec_register_writes")
+            .desc("number of times the vector registers were written")
+            ;
+
+        t_info.numCCRegReads
+            .name(thread_str + ".num_cc_register_reads")
+            .desc("number of times the CC registers were read")
+            .flags(nozero)
+            ;
+
+        t_info.numCCRegWrites
+            .name(thread_str + ".num_cc_register_writes")
+            .desc("number of times the CC registers were written")
+            .flags(nozero)
+            ;
+
+        t_info.numMemRefs
+            .name(thread_str + ".num_mem_refs")
+            .desc("number of memory refs")
+            ;
+
+        t_info.numStoreInsts
+            .name(thread_str + ".num_store_insts")
+            .desc("Number of store instructions")
+            ;
+
+        t_info.numLoadInsts
+            .name(thread_str + ".num_load_insts")
+            .desc("Number of load instructions")
+            ;
+
+        t_info.notIdleFraction
+            .name(thread_str + ".not_idle_fraction")
+            .desc("Percentage of non-idle cycles")
+            ;
+
+        t_info.idleFraction
+            .name(thread_str + ".idle_fraction")
+            .desc("Percentage of idle cycles")
+            ;
+
+        t_info.numBusyCycles
+            .name(thread_str + ".num_busy_cycles")
+            .desc("Number of busy cycles")
+            ;
+
+        t_info.numIdleCycles
+            .name(thread_str + ".num_idle_cycles")
+            .desc("Number of idle cycles")
+            ;
+
+        t_info.icacheStallCycles
+            .name(thread_str + ".icache_stall_cycles")
+            .desc("ICache total stall cycles")
+            .prereq(t_info.icacheStallCycles)
+            ;
+
+        t_info.dcacheStallCycles
+            .name(thread_str + ".dcache_stall_cycles")
+            .desc("DCache total stall cycles")
+            .prereq(t_info.dcacheStallCycles)
+            ;
+
+        t_info.statExecutedInstType
+            .init(Enums::Num_OpClass)
+            .name(thread_str + ".op_class")
+            .desc("Class of executed instruction")
+            .flags(total | pdf | dist)
+            ;
+
+        for (unsigned i = 0; i < Num_OpClasses; ++i) {
+            t_info.statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
+        }
+
+        t_info.idleFraction = constant(1.0) - t_info.notIdleFraction;
+        t_info.numIdleCycles = t_info.idleFraction * numCycles;
+        t_info.numBusyCycles = t_info.notIdleFraction * numCycles;
+
+        t_info.numBranches
+            .name(thread_str + ".Branches")
+            .desc("Number of branches fetched")
+            .prereq(t_info.numBranches);
+
+        t_info.numPredictedBranches
+            .name(thread_str + ".predictedBranches")
+            .desc("Number of branches predicted as taken")
+            .prereq(t_info.numPredictedBranches);
+
+        t_info.numBranchMispred
+            .name(thread_str + ".BranchMispred")
+            .desc("Number of branch mispredictions")
+            .prereq(t_info.numBranchMispred);
+    }
 }
 
 void
 BaseSimpleCPU::resetStats()
 {
-//    startNumInst = numInst;
-     notIdleFraction = (_status != Idle);
+    for (auto &thread_info : threadInfo) {
+        thread_info->notIdleFraction = (_status != Idle);
+    }
 }
 
 void
-BaseSimpleCPU::serialize(ostream &os)
+BaseSimpleCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
 {
-    SERIALIZE_ENUM(_status);
-    BaseCPU::serialize(os);
-//    SERIALIZE_SCALAR(inst);
-    nameOut(os, csprintf("%s.xc.0", name()));
-    thread->serialize(os);
+    assert(_status == Idle || _status == Running);
+
+    threadInfo[tid]->thread->serialize(cp);
 }
 
 void
-BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+BaseSimpleCPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
 {
-    UNSERIALIZE_ENUM(_status);
-    BaseCPU::unserialize(cp, section);
-//    UNSERIALIZE_SCALAR(inst);
-    thread->unserialize(cp, csprintf("%s.xc.0", section));
+    threadInfo[tid]->thread->unserialize(cp);
 }
 
 void
@@ -308,30 +434,35 @@ change_thread_state(ThreadID tid, int activate, int priority)
 Addr
 BaseSimpleCPU::dbg_vtophys(Addr addr)
 {
-    return vtophys(tc, addr);
+    return vtophys(threadContexts[curThread], addr);
 }
 
 void
-BaseSimpleCPU::wakeup()
+BaseSimpleCPU::wakeup(ThreadID tid)
 {
-    if (thread->status() != ThreadContext::Suspended)
-        return;
+    getCpuAddrMonitor(tid)->gotWakeup = true;
 
-    DPRINTF(Quiesce,"Suspended Processor awoke\n");
-    thread->activate();
+    if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
+        DPRINTF(Quiesce,"[tid:%d] Suspended Processor awoke\n", tid);
+        threadInfo[tid]->thread->activate();
+    }
 }
 
 void
 BaseSimpleCPU::checkForInterrupts()
 {
+    SimpleExecContext&t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+    ThreadContext* tc = thread->getTC();
+
     if (checkInterrupts(tc)) {
-        Fault interrupt = interrupts->getInterrupt(tc);
+        Fault interrupt = interrupts[curThread]->getInterrupt(tc);
 
         if (interrupt != NoFault) {
-            fetchOffset = 0;
-            interrupts->updateIntrInfo(tc);
+            t_info.fetchOffset = 0;
+            interrupts[curThread]->updateIntrInfo(tc);
             interrupt->invoke(tc);
-            predecoder.reset();
+            thread->decoder.reset();
         }
     }
 }
@@ -340,20 +471,26 @@ BaseSimpleCPU::checkForInterrupts()
 void
 BaseSimpleCPU::setupFetchRequest(Request *req)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     Addr instAddr = thread->instAddr();
+    Addr fetchPC = (instAddr & PCMask) + t_info.fetchOffset;
 
     // set up memory request for instruction fetch
-    DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr);
+    DPRINTF(Fetch, "Fetch: Inst PC:%08p, Fetch PC:%08p\n", instAddr, fetchPC);
 
-    Addr fetchPC = (instAddr & PCMask) + fetchOffset;
-    req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(),
-            instAddr);
+    req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH,
+                 instMasterId(), instAddr);
 }
 
 
 void
 BaseSimpleCPU::preExecute()
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     // maintain $r0 semantics
     thread->setIntReg(ZeroReg, 0);
 #if THE_ISA == ALPHA_ISA
@@ -361,7 +498,7 @@ BaseSimpleCPU::preExecute()
 #endif // ALPHA_ISA
 
     // check for instruction-count-based events
-    comInstEventQueue[0]->serviceEvents(numInst);
+    comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
     system->instEventQueue.serviceEvents(system->totalNumInsts);
 
     // decode the instruction
@@ -370,40 +507,40 @@ BaseSimpleCPU::preExecute()
     TheISA::PCState pcState = thread->pcState();
 
     if (isRomMicroPC(pcState.microPC())) {
-        stayAtPC = false;
+        t_info.stayAtPC = false;
         curStaticInst = microcodeRom.fetchMicroop(pcState.microPC(),
                                                   curMacroStaticInst);
     } else if (!curMacroStaticInst) {
         //We're not in the middle of a macro instruction
         StaticInstPtr instPtr = NULL;
 
+        TheISA::Decoder *decoder = &(thread->decoder);
+
         //Predecode, ie bundle up an ExtMachInst
-        //This should go away once the constructor can be set up properly
-        predecoder.setTC(thread->getTC());
         //If more fetch data is needed, pass it in.
-        Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
-        //if(predecoder.needMoreBytes())
-            predecoder.moreBytes(pcState, fetchPC, inst);
+        Addr fetchPC = (pcState.instAddr() & PCMask) + t_info.fetchOffset;
+        //if (decoder->needMoreBytes())
+            decoder->moreBytes(pcState, fetchPC, inst);
         //else
-        //    predecoder.process();
+        //    decoder->process();
 
-        //If an instruction is ready, decode it. Otherwise, we'll have to
+        //Decode an instruction if one is ready. Otherwise, we'll have to
         //fetch beyond the MachInst at the current pc.
-        if (predecoder.extMachInstReady()) {
-            stayAtPC = false;
-            ExtMachInst machInst = predecoder.getExtMachInst(pcState);
+        instPtr = decoder->decode(pcState);
+        if (instPtr) {
+            t_info.stayAtPC = false;
             thread->pcState(pcState);
-            instPtr = thread->decoder.decode(machInst, pcState.instAddr());
         } else {
-            stayAtPC = true;
-            fetchOffset += sizeof(MachInst);
+            t_info.stayAtPC = true;
+            t_info.fetchOffset += sizeof(MachInst);
         }
 
         //If we decoded an instruction and it's microcoded, start pulling
         //out micro ops
         if (instPtr && instPtr->isMacroop()) {
             curMacroStaticInst = instPtr;
-            curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC());
+            curStaticInst =
+                curMacroStaticInst->fetchMicroop(pcState.microPC());
         } else {
             curStaticInst = instPtr;
         }
@@ -413,79 +550,108 @@ BaseSimpleCPU::preExecute()
     }
 
     //If we decoded an instruction this "tick", record information about it.
-    if(curStaticInst)
-    {
+    if (curStaticInst) {
 #if TRACING_ON
-        traceData = tracer->getInstRecord(curTick(), tc,
+        traceData = tracer->getInstRecord(curTick(), thread->getTC(),
                 curStaticInst, thread->pcState(), curMacroStaticInst);
 
-        DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n",
+        DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n",
                 curStaticInst->getName(), curStaticInst->machInst);
 #endif // TRACING_ON
     }
+
+    if (branchPred && curStaticInst &&
+        curStaticInst->isControl()) {
+        // Use a fake sequence number since we only have one
+        // instruction in flight at the same time.
+        const InstSeqNum cur_sn(0);
+        t_info.predPC = thread->pcState();
+        const bool predict_taken(
+            branchPred->predict(curStaticInst, cur_sn, t_info.predPC,
+                                curThread));
+
+        if (predict_taken)
+            ++t_info.numPredictedBranches;
+    }
 }
 
 void
 BaseSimpleCPU::postExecute()
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     assert(curStaticInst);
 
-    TheISA::PCState pc = tc->pcState();
+    TheISA::PCState pc = threadContexts[curThread]->pcState();
     Addr instAddr = pc.instAddr();
     if (FullSystem && thread->profile) {
-        bool usermode = TheISA::inUserMode(tc);
+        bool usermode = TheISA::inUserMode(threadContexts[curThread]);
         thread->profilePC = usermode ? 1 : instAddr;
-        ProfileNode *node = thread->profile->consume(tc, curStaticInst);
+        ProfileNode *node = thread->profile->consume(threadContexts[curThread],
+                                                     curStaticInst);
         if (node)
             thread->profileNode = node;
     }
 
     if (curStaticInst->isMemRef()) {
-        numMemRefs++;
+        t_info.numMemRefs++;
     }
 
     if (curStaticInst->isLoad()) {
-        ++numLoad;
-        comLoadEventQueue[0]->serviceEvents(numLoad);
+        ++t_info.numLoad;
+        comLoadEventQueue[curThread]->serviceEvents(t_info.numLoad);
     }
 
     if (CPA::available()) {
-        CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr());
+        CPA::cpa()->swAutoBegin(threadContexts[curThread], pc.nextInstAddr());
+    }
+
+    if (curStaticInst->isControl()) {
+        ++t_info.numBranches;
     }
 
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
-        numIntAluAccesses++;
-        numIntInsts++;
+        t_info.numIntAluAccesses++;
+        t_info.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
-        numFpAluAccesses++;
-        numFpInsts++;
+        t_info.numFpAluAccesses++;
+        t_info.numFpInsts++;
     }
-    
+
+    //vector alu accesses
+    if (curStaticInst->isVector()){
+        t_info.numVecAluAccesses++;
+        t_info.numVecInsts++;
+    }
+
     //number of function calls/returns to get window accesses
     if (curStaticInst->isCall() || curStaticInst->isReturn()){
-        numCallsReturns++;
+        t_info.numCallsReturns++;
     }
-    
+
     //the number of branch predictions that will be made
     if (curStaticInst->isCondCtrl()){
-        numCondCtrlInsts++;
+        t_info.numCondCtrlInsts++;
     }
-    
+
     //result bus acceses
     if (curStaticInst->isLoad()){
-        numLoadInsts++;
+        t_info.numLoadInsts++;
     }
-    
+
     if (curStaticInst->isStore()){
-        numStoreInsts++;
+        t_info.numStoreInsts++;
     }
     /* End power model statistics */
 
+    t_info.statExecutedInstType[curStaticInst->opClass()]++;
+
     if (FullSystem)
         traceFunctions(instAddr);
 
@@ -494,18 +660,25 @@ BaseSimpleCPU::postExecute()
         delete traceData;
         traceData = NULL;
     }
-}
 
+    // Call CPU instruction commit probes
+    probeInstCommit(curStaticInst);
+}
 
 void
-BaseSimpleCPU::advancePC(Fault fault)
+BaseSimpleCPU::advancePC(const Fault &fault)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
+    const bool branching(thread->pcState().branching());
+
     //Since we're moving to a new pc, zero out the offset
-    fetchOffset = 0;
+    t_info.fetchOffset = 0;
     if (fault != NoFault) {
         curMacroStaticInst = StaticInst::nullStaticInstPtr;
-        fault->invoke(tc, curStaticInst);
-        predecoder.reset();
+        fault->invoke(threadContexts[curThread], curStaticInst);
+        thread->decoder.reset();
     } else {
         if (curStaticInst) {
             if (curStaticInst->isLastMicroop())
@@ -515,39 +688,27 @@ BaseSimpleCPU::advancePC(Fault fault)
             thread->pcState(pcState);
         }
     }
+
+    if (branchPred && curStaticInst && curStaticInst->isControl()) {
+        // Use a fake sequence number since we only have one
+        // instruction in flight at the same time.
+        const InstSeqNum cur_sn(0);
+
+        if (t_info.predPC == thread->pcState()) {
+            // Correctly predicted branch
+            branchPred->update(cur_sn, curThread);
+        } else {
+            // Mis-predicted branch
+            branchPred->squash(cur_sn, thread->pcState(), branching, curThread);
+            ++t_info.numBranchMispred;
+        }
+    }
 }
 
-/*Fault
-BaseSimpleCPU::CacheOp(uint8_t Op, Addr EffAddr)
+void
+BaseSimpleCPU::startup()
 {
-    // translate to physical address
-    Fault fault = NoFault;
-    int CacheID = Op & 0x3; // Lower 3 bits identify Cache
-    int CacheOP = Op >> 2; // Upper 3 bits identify Cache Operation
-    if(CacheID > 1)
-      {
-        warn("CacheOps not implemented for secondary/tertiary caches\n");
-      }
-    else
-      {
-        switch(CacheOP)
-          { // Fill Packet Type
-          case 0: warn("Invalidate Cache Op\n");
-            break;
-          case 1: warn("Index Load Tag Cache Op\n");
-            break;
-          case 2: warn("Index Store Tag Cache Op\n");
-            break;
-          case 4: warn("Hit Invalidate Cache Op\n");
-            break;
-          case 5: warn("Fill/Hit Writeback Invalidate Cache Op\n");
-            break;
-          case 6: warn("Hit Writeback\n");
-            break;
-          case 7: warn("Fetch & Lock Cache Op\n");
-            break;
-          default: warn("Unimplemented Cache Op\n");
-          }
-      }
-    return fault;
-}*/
+    BaseCPU::startup();
+    for (auto& t_info : threadInfo)
+        t_info->thread->startup();
+}