add in an init() callback for CPU's so that no stats are accessed prior to the end...
[gem5.git] / cpu / simple_cpu / simple_cpu.cc
index f4fc1b8237f76baa072710f7027e3610c32c7c88..721861dd5e881b6d2b09a5546613a85acca3e9c2 100644 (file)
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
 #include <iostream>
 #include <iomanip>
 #include <list>
 #include <sstream>
 #include <string>
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "sim/host.hh"
 #include "base/cprintf.hh"
+#include "base/inifile.hh"
+#include "base/loader/symtab.hh"
 #include "base/misc.hh"
-#include "cpu/full_cpu/smt.hh"
-
-#include "sim/annotation.hh"
-#include "cpu/exec_context.hh"
+#include "base/pollevent.hh"
+#include "base/range.hh"
+#include "base/trace.hh"
 #include "cpu/base_cpu.hh"
-#include "sim/debug.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/full_cpu/smt.hh"
 #include "cpu/simple_cpu/simple_cpu.hh"
-#include "base/inifile.hh"
-#include "mem/mem_interface.hh"
-#include "mem/base_mem.hh"
 #include "cpu/static_inst.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "sim/annotation.hh"
+#include "sim/builder.hh"
+#include "sim/debug.hh"
+#include "sim/host.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_object.hh"
+#include "sim/sim_stats.hh"
 
 #ifdef FULL_SYSTEM
+#include "base/remote_gdb.hh"
+#include "dev/alpha_access.h"
+#include "dev/pciareg.h"
 #include "mem/functional_mem/memory_control.hh"
 #include "mem/functional_mem/physical_memory.hh"
-#include "targetarch/alpha_memory.hh"
 #include "sim/system.hh"
+#include "targetarch/alpha_memory.hh"
+#include "targetarch/vtophys.hh"
 #else // !FULL_SYSTEM
-#include "mem/functional_mem/functional_memory.hh"
-#include "sim/prog.hh"
 #include "eio/eio.hh"
+#include "mem/functional_mem/functional_memory.hh"
 #endif // FULL_SYSTEM
 
-#include "cpu/exetrace.hh"
-#include "base/trace.hh"
-#include "sim/sim_events.hh"
-#include "base/pollevent.hh"
-#include "sim/sim_object.hh"
-#include "sim/sim_stats.hh"
+using namespace std;
 
-#include "base/range.hh"
-#include "base/loader/symtab.hh"
+SimpleCPU::TickEvent::TickEvent(SimpleCPU *c)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+{
+}
 
-#ifdef FULL_SYSTEM
-#include "targetarch/vtophys.hh"
-#include "dev/pciareg.h"
-#include "base/remote_gdb.hh"
-#include "dev/alpha_access.h"
-#endif
+void
+SimpleCPU::TickEvent::process()
+{
+    cpu->tick();
+}
 
+const char *
+SimpleCPU::TickEvent::description()
+{
+    return "SimpleCPU tick event";
+}
 
-using namespace std;
 
 SimpleCPU::CacheCompletionEvent::CacheCompletionEvent(SimpleCPU *_cpu)
     : Event(&mainEventQueue),
@@ -96,7 +106,7 @@ void SimpleCPU::CacheCompletionEvent::process()
 const char *
 SimpleCPU::CacheCompletionEvent::description()
 {
-    return "cache completion event";
+    return "SimpleCPU cache completion event";
 }
 
 #ifdef FULL_SYSTEM
@@ -104,104 +114,149 @@ SimpleCPU::SimpleCPU(const string &_name,
                      System *_system,
                      Counter max_insts_any_thread,
                      Counter max_insts_all_threads,
+                     Counter max_loads_any_thread,
+                     Counter max_loads_all_threads,
                      AlphaItb *itb, AlphaDtb *dtb,
                      FunctionalMemory *mem,
                      MemInterface *icache_interface,
                      MemInterface *dcache_interface,
-                     int cpu_id, Tick freq)
+                     bool _def_reg, Tick freq)
     : BaseCPU(_name, /* number_of_threads */ 1,
               max_insts_any_thread, max_insts_all_threads,
-              _system, cpu_id, freq),
+              max_loads_any_thread, max_loads_all_threads,
+              _system, freq),
 #else
 SimpleCPU::SimpleCPU(const string &_name, Process *_process,
                      Counter max_insts_any_thread,
                      Counter max_insts_all_threads,
+                     Counter max_loads_any_thread,
+                     Counter max_loads_all_threads,
                      MemInterface *icache_interface,
-                     MemInterface *dcache_interface)
+                     MemInterface *dcache_interface,
+                     bool _def_reg)
     : BaseCPU(_name, /* number_of_threads */ 1,
-              max_insts_any_thread, max_insts_all_threads),
+              max_insts_any_thread, max_insts_all_threads,
+              max_loads_any_thread, max_loads_all_threads),
 #endif
-      tickEvent(this), xc(NULL), cacheCompletionEvent(this)
+      tickEvent(this), xc(NULL), defer_registration(_def_reg),
+      cacheCompletionEvent(this)
 {
+    _status = Idle;
 #ifdef FULL_SYSTEM
-    xc = new ExecContext(this, 0, system, itb, dtb, mem, cpu_id);
+    xc = new ExecContext(this, 0, system, itb, dtb, mem);
 
-    _status = Running;
-    if (cpu_id != 0) {
+    // initialize CPU, including PC
+    TheISA::initCPU(&xc->regs);
+#else
+    xc = new ExecContext(this, /* thread_num */ 0, _process, /* asid */ 0);
+#endif // !FULL_SYSTEM
 
-       xc->setStatus(ExecContext::Unallocated);
+    icacheInterface = icache_interface;
+    dcacheInterface = dcache_interface;
 
-       //Open a GDB debug session on port (7000 + the cpu_id)
-       (new GDBListener(new RemoteGDB(system, xc), 7000 + cpu_id))->listen();
+    memReq = new MemReq();
+    memReq->xc = xc;
+    memReq->asid = 0;
+    memReq->data = new uint8_t[64];
 
-       AlphaISA::init(system->physmem, &xc->regs);
+    numInst = 0;
+    startNumInst = 0;
+    numLoad = 0;
+    startNumLoad = 0;
+    lastIcacheStall = 0;
+    lastDcacheStall = 0;
 
-       fault = Reset_Fault;
+    execContexts.push_back(xc);
+}
 
-       IntReg *ipr = xc->regs.ipr;
-       ipr[TheISA::IPR_MCSR] = 0x6;
+SimpleCPU::~SimpleCPU()
+{
+}
 
-       AlphaISA::swap_palshadow(&xc->regs, true);
+void SimpleCPU::init()
+{
+    if (!defer_registration) {
+        this->registerExecContexts();
+    }
+}
 
-       xc->regs.pc =
-           ipr[TheISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault];
-       xc->regs.npc = xc->regs.pc + sizeof(MachInst);
+void
+SimpleCPU::switchOut()
+{
+    _status = SwitchedOut;
+    if (tickEvent.scheduled())
+        tickEvent.squash();
+}
 
-       _status = Idle;
-    }
-    else {
-      system->init(xc);
 
-      // Reset the system
-      //
-      AlphaISA::init(system->physmem, &xc->regs);
+void
+SimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
 
-      fault = Reset_Fault;
+    assert(!tickEvent.scheduled());
 
-      IntReg *ipr = xc->regs.ipr;
-      ipr[TheISA::IPR_MCSR] = 0x6;
+    // if any of this CPU's ExecContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+        }
+    }
 
-      AlphaISA::swap_palshadow(&xc->regs, true);
+    oldCPU->switchOut();
+}
 
-      xc->regs.pc = ipr[TheISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault];
-      xc->regs.npc = xc->regs.pc + sizeof(MachInst);
 
-       _status = Running;
-       tickEvent.schedule(0);
-    }
+void
+SimpleCPU::activateContext(int thread_num, int delay)
+{
+    assert(thread_num == 0);
+    assert(xc);
 
-#else
-    xc = new ExecContext(this, /* thread_num */ 0, _process, /* asid */ 0);
-    fault = No_Fault;
-    if (xc->status() == ExecContext::Active) {
-        _status = Running;
-       tickEvent.schedule(0);
-    } else
-        _status = Idle;
-#endif // !FULL_SYSTEM
+    assert(_status == Idle);
+    notIdleFraction++;
+    scheduleTickEvent(delay);
+    _status = Running;
+}
 
-    icacheInterface = icache_interface;
-    dcacheInterface = dcache_interface;
 
-    memReq = new MemReq();
-    memReq->xc = xc;
-    memReq->asid = 0;
+void
+SimpleCPU::suspendContext(int thread_num)
+{
+    assert(thread_num == 0);
+    assert(xc);
 
-    numInst = 0;
-    last_idle = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
+    assert(_status == Running);
+    notIdleFraction--;
+    unscheduleTickEvent();
+    _status = Idle;
+}
 
-    contexts.push_back(xc);
+
+void
+SimpleCPU::deallocateContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
 }
 
-SimpleCPU::~SimpleCPU()
+
+void
+SimpleCPU::haltContext(int thread_num)
 {
+    // for now, these are equivalent
+    suspendContext(thread_num);
 }
 
+
 void
 SimpleCPU::regStats()
 {
+    using namespace Statistics;
+
     BaseCPU::regStats();
 
     numInsts
@@ -214,11 +269,6 @@ SimpleCPU::regStats()
         .desc("Number of memory references")
         ;
 
-    idleCycles
-        .name(name() + ".idle_cycles")
-        .desc("Number of idle cycles")
-        ;
-
     idleFraction
         .name(name() + ".idle_fraction")
         .desc("Percentage of idle cycles")
@@ -236,60 +286,40 @@ SimpleCPU::regStats()
         .prereq(dcacheStallCycles)
         ;
 
-    idleFraction = idleCycles / simTicks;
-
-    numInsts = Statistics::scalar(numInst);
+    idleFraction = constant(1.0) - notIdleFraction;
+    numInsts = Statistics::scalar(numInst) - Statistics::scalar(startNumInst);
     simInsts += numInsts;
 }
 
 void
-SimpleCPU::serialize()
+SimpleCPU::resetStats()
 {
-    nameOut();
-
-#ifdef FULL_SYSTEM
-#if 0
-    // do we need this anymore?? egh
-    childOut("itb", xc->itb);
-    childOut("dtb", xc->dtb);
-    childOut("physmem", physmem);
-#endif
-#endif
-
-    for (int i = 0; i < NumIntRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "R%02d", i);
-        paramOut(buf.str(), xc->regs.intRegFile[i]);
-    }
-    for (int i = 0; i < NumFloatRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "F%02d", i);
-        paramOut(buf.str(), xc->regs.floatRegFile.d[i]);
-    }
-    // CPUTraitsType::serializeSpecialRegs(getProxy(), xc->regs);
+    startNumInst = numInst;
+    notIdleFraction = (_status != Idle);
 }
 
 void
-SimpleCPU::unserialize(IniFile &db, const string &category, ConfigNode *node)
+SimpleCPU::serialize(ostream &os)
 {
-    string data;
-
-    for (int i = 0; i < NumIntRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "R%02d", i);
-        db.findDefault(category, buf.str(), data);
-        to_number(data,xc->regs.intRegFile[i]);
-    }
-    for (int i = 0; i < NumFloatRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "F%02d", i);
-        db.findDefault(category, buf.str(), data);
-        xc->regs.floatRegFile.d[i] = strtod(data.c_str(),NULL);
-    }
-
-    // Read in Special registers
+    SERIALIZE_ENUM(_status);
+    SERIALIZE_SCALAR(inst);
+    nameOut(os, csprintf("%s.xc", name()));
+    xc->serialize(os);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+    nameOut(os, csprintf("%s.cacheCompletionEvent", name()));
+    cacheCompletionEvent.serialize(os);
+}
 
-    // CPUTraitsType::unserializeSpecialRegs(db,category,node,xc->regs);
+void
+SimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    UNSERIALIZE_ENUM(_status);
+    UNSERIALIZE_SCALAR(inst);
+    xc->unserialize(cp, csprintf("%s.xc", section));
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+    cacheCompletionEvent
+        .unserialize(cp, csprintf("%s.cacheCompletionEvent", section));
 }
 
 void
@@ -300,7 +330,7 @@ change_thread_state(int thread_number, int activate, int priority)
 // precise architected memory state accessor macros
 template <class T>
 Fault
-SimpleCPU::read(Addr addr, Tdata, unsigned flags)
+SimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
     memReq->reset(addr, sizeof(T), flags);
 
@@ -322,15 +352,16 @@ SimpleCPU::read(Addr addr, T& data, unsigned flags)
         memReq->cmd = Read;
         memReq->completionEvent = NULL;
         memReq->time = curTick;
-        memReq->flags &= ~UNCACHEABLE;
         MemAccessResult result = dcacheInterface->access(memReq);
 
         // Ugly hack to get an event scheduled *only* if the access is
         // a miss.  We really should add first-class support for this
         // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents) {
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
             memReq->completionEvent = &cacheCompletionEvent;
-            setStatus(DcacheMissStall);
+            lastDcacheStall = curTick;
+            unscheduleTickEvent();
+            _status = DcacheMissStall;
         }
     }
 
@@ -341,32 +372,32 @@ SimpleCPU::read(Addr addr, T& data, unsigned flags)
 
 template
 Fault
-SimpleCPU::read(Addr addr, uint64_tdata, unsigned flags);
+SimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
 
 template
 Fault
-SimpleCPU::read(Addr addr, uint32_tdata, unsigned flags);
+SimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
 
 template
 Fault
-SimpleCPU::read(Addr addr, uint16_tdata, unsigned flags);
+SimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
 
 template
 Fault
-SimpleCPU::read(Addr addr, uint8_tdata, unsigned flags);
+SimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
 
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 
 template<>
 Fault
-SimpleCPU::read(Addr addr, doubledata, unsigned flags)
+SimpleCPU::read(Addr addr, double &data, unsigned flags)
 {
     return read(addr, *(uint64_t*)&data, flags);
 }
 
 template<>
 Fault
-SimpleCPU::read(Addr addr, floatdata, unsigned flags)
+SimpleCPU::read(Addr addr, float &data, unsigned flags)
 {
     return read(addr, *(uint32_t*)&data, flags);
 }
@@ -374,7 +405,7 @@ SimpleCPU::read(Addr addr, float& data, unsigned flags)
 
 template<>
 Fault
-SimpleCPU::read(Addr addr, int32_tdata, unsigned flags)
+SimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
 {
     return read(addr, (uint32_t&)data, flags);
 }
@@ -400,18 +431,19 @@ SimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     if (fault == No_Fault && dcacheInterface) {
         memReq->cmd = Write;
-        memReq->data = (uint8_t *)&data;
+        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
         memReq->completionEvent = NULL;
         memReq->time = curTick;
-        memReq->flags &= ~UNCACHEABLE;
         MemAccessResult result = dcacheInterface->access(memReq);
 
         // Ugly hack to get an event scheduled *only* if the access is
         // a miss.  We really should add first-class support for this
         // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents) {
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
             memReq->completionEvent = &cacheCompletionEvent;
-            setStatus(DcacheMissStall);
+            lastDcacheStall = curTick;
+            unscheduleTickEvent();
+            _status = DcacheMissStall;
         }
     }
 
@@ -481,12 +513,19 @@ SimpleCPU::processCacheCompletion()
     switch (status()) {
       case IcacheMissStall:
         icacheStallCycles += curTick - lastIcacheStall;
-        setStatus(IcacheMissComplete);
+        _status = IcacheMissComplete;
+        scheduleTickEvent(1);
         break;
       case DcacheMissStall:
         dcacheStallCycles += curTick - lastDcacheStall;
-        setStatus(Running);
+        _status = Running;
+        scheduleTickEvent(1);
         break;
+      case SwitchedOut:
+        // If this CPU has been switched out due to sampling/warm-up,
+        // ignore any further status changes (e.g., due to cache
+        // misses outstanding at the time of the switch).
+        return;
       default:
         panic("SimpleCPU::processCacheCompletion: bad state");
         break;
@@ -501,7 +540,7 @@ SimpleCPU::post_interrupt(int int_num, int index)
 
     if (xc->status() == ExecContext::Suspended) {
                 DPRINTF(IPI,"Suspended Processor awoke\n");
-        xc->setStatus(ExecContext::Active);
+        xc->activate();
         Annotate::Resume(xc);
     }
 }
@@ -513,8 +552,10 @@ SimpleCPU::tick()
 {
     traceData = NULL;
 
+    Fault fault = No_Fault;
+
 #ifdef FULL_SYSTEM
-    if (fault == No_Fault && AlphaISA::check_interrupts &&
+    if (AlphaISA::check_interrupts &&
         xc->cpu->check_interrupts() &&
         !PC_PAL(xc->regs.pc) &&
         status() != IcacheMissComplete) {
@@ -568,7 +609,9 @@ SimpleCPU::tick()
         // We've already fetched an instruction and were stalled on an
         // I-cache miss.  No need to fetch it again.
 
-        setStatus(Running);
+        // Set status to running; tick event will get rescheduled if
+        // necessary at end of tick() function.
+        _status = Running;
     }
     else {
         // Try to fetch an instruction
@@ -593,15 +636,16 @@ SimpleCPU::tick()
             memReq->completionEvent = NULL;
 
             memReq->time = curTick;
-            memReq->flags &= ~UNCACHEABLE;
             MemAccessResult result = icacheInterface->access(memReq);
 
             // Ugly hack to get an event scheduled *only* if the access is
             // a miss.  We really should add first-class support for this
             // at some point.
-            if (result != MA_HIT && icacheInterface->doEvents) {
+            if (result != MA_HIT && icacheInterface->doEvents()) {
                 memReq->completionEvent = &cacheCompletionEvent;
-                setStatus(IcacheMissStall);
+                lastIcacheStall = curTick;
+                unscheduleTickEvent();
+                _status = IcacheMissStall;
                 return;
             }
         }
@@ -615,7 +659,7 @@ SimpleCPU::tick()
         numInst++;
 
         // check for instruction-count-based events
-        comInsnEventQueue[0]->serviceEvents(numInst);
+        comInstEventQueue[0]->serviceEvents(numInst);
 
         // decode the instruction
         StaticInstPtr<TheISA> si(inst);
@@ -628,14 +672,44 @@ SimpleCPU::tick()
         xc->regs.ra = (inst >> 21) & 0x1f;
 #endif // FULL_SYSTEM
 
-        xc->func_exe_insn++;
+        xc->func_exe_inst++;
 
         fault = si->execute(this, xc, traceData);
-
+#ifdef FS_MEASURE
+        if (!(xc->misspeculating()) && (xc->system->bin)) {
+            SWContext *ctx = xc->swCtx;
+            if (ctx && !ctx->callStack.empty()) {
+                if (si->isCall()) {
+                    ctx->calls++;
+                }
+                if (si->isReturn()) {
+                     if (ctx->calls == 0) {
+                        fnCall *top = ctx->callStack.top();
+                        DPRINTF(TCPIP, "Removing %s from callstack.\n", top->name);
+                        delete top;
+                        ctx->callStack.pop();
+                        if (ctx->callStack.empty())
+                            xc->system->nonPath->activate();
+                        else
+                            ctx->callStack.top()->myBin->activate();
+
+                        xc->system->dumpState(xc);
+                    } else {
+                        ctx->calls--;
+                    }
+                }
+            }
+        }
+#endif
         if (si->isMemRef()) {
             numMemRefs++;
         }
 
+        if (si->isLoad()) {
+            ++numLoad;
+            comLoadEventQueue[0]->serviceEvents(numLoad);
+        }
+
         if (traceData)
             traceData->finalize();
 
@@ -679,13 +753,14 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
 
     Param<Counter> max_insts_any_thread;
     Param<Counter> max_insts_all_threads;
+    Param<Counter> max_loads_any_thread;
+    Param<Counter> max_loads_all_threads;
 
 #ifdef FULL_SYSTEM
     SimObjectParam<AlphaItb *> itb;
     SimObjectParam<AlphaDtb *> dtb;
     SimObjectParam<FunctionalMemory *> mem;
     SimObjectParam<System *> system;
-    Param<int> cpu_id;
     Param<int> mult;
 #else
     SimObjectParam<Process *> workload;
@@ -694,15 +769,23 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
     SimObjectParam<BaseMem *> icache;
     SimObjectParam<BaseMem *> dcache;
 
+    Param<bool> defer_registration;
+
 END_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
 
 BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
 
     INIT_PARAM_DFLT(max_insts_any_thread,
-                    "terminate when any thread reaches this insn count",
+                    "terminate when any thread reaches this inst count",
                     0),
     INIT_PARAM_DFLT(max_insts_all_threads,
-                    "terminate when all threads have reached this insn count",
+                    "terminate when all threads have reached this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_any_thread,
+                    "terminate when any thread reaches this load count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_all_threads,
+                    "terminate when all threads have reached this load count",
                     0),
 
 #ifdef FULL_SYSTEM
@@ -710,37 +793,51 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
     INIT_PARAM(dtb, "Data TLB"),
     INIT_PARAM(mem, "memory"),
     INIT_PARAM(system, "system object"),
-    INIT_PARAM_DFLT(cpu_id, "CPU identification number", 0),
     INIT_PARAM_DFLT(mult, "system clock multiplier", 1),
 #else
     INIT_PARAM(workload, "processes to run"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(icache, "L1 instruction cache object", NULL),
-    INIT_PARAM_DFLT(dcache, "L1 data cache object", NULL)
+    INIT_PARAM_DFLT(dcache, "L1 data cache object", NULL),
+    INIT_PARAM_DFLT(defer_registration, "defer registration with system "
+                    "(for sampling)", false)
 
 END_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
 
 
 CREATE_SIM_OBJECT(SimpleCPU)
 {
+    SimpleCPU *cpu;
 #ifdef FULL_SYSTEM
     if (mult != 1)
         panic("processor clock multiplier must be 1\n");
 
-    return new SimpleCPU(getInstanceName(), system,
-                         max_insts_any_thread, max_insts_all_threads,
-                         itb, dtb, mem,
-                         (icache) ? icache->getInterface() : NULL,
-                         (dcache) ? dcache->getInterface() : NULL,
-                         cpu_id, ticksPerSecond * mult);
+    cpu = new SimpleCPU(getInstanceName(), system,
+                        max_insts_any_thread, max_insts_all_threads,
+                        max_loads_any_thread, max_loads_all_threads,
+                        itb, dtb, mem,
+                        (icache) ? icache->getInterface() : NULL,
+                        (dcache) ? dcache->getInterface() : NULL,
+                        defer_registration,
+                        ticksPerSecond * mult);
 #else
 
-    return new SimpleCPU(getInstanceName(), workload,
-                         max_insts_any_thread, max_insts_all_threads,
-                         icache->getInterface(), dcache->getInterface());
+    cpu = new SimpleCPU(getInstanceName(), workload,
+                        max_insts_any_thread, max_insts_all_threads,
+                        max_loads_any_thread, max_loads_all_threads,
+                        (icache) ? icache->getInterface() : NULL,
+                        (dcache) ? dcache->getInterface() : NULL,
+                        defer_registration);
 
 #endif // FULL_SYSTEM
+#if 0
+    if (!defer_registration) {
+        cpu->registerExecContexts();
+    }
+#endif
+    return cpu;
 }
 
 REGISTER_SIM_OBJECT("SimpleCPU", SimpleCPU)
+