add in an init() callback for CPU's so that no stats are accessed prior to the end...

[gem5.git] / cpu / simple_cpu / simple_cpu.cc
diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc

index f4fc1b8237f76baa072710f7027e3610c32c7c88..721861dd5e881b6d2b09a5546613a85acca3e9c2 100644 (file)
--- a/cpu/simple_cpu/simple_cpu.cc
+++ b/cpu/simple_cpu/simple_cpu.cc
@@ -26,61 +26,71 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
  #include <iostream>
  #include <iomanip>
  #include <list>
  #include <sstream>
  #include <string>
  
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "sim/host.hh"
  #include "base/cprintf.hh"
+#include "base/inifile.hh"
+#include "base/loader/symtab.hh"
  #include "base/misc.hh"
-#include "cpu/full_cpu/smt.hh"
-
-#include "sim/annotation.hh"
-#include "cpu/exec_context.hh"
+#include "base/pollevent.hh"
+#include "base/range.hh"
+#include "base/trace.hh"
  #include "cpu/base_cpu.hh"
-#include "sim/debug.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/full_cpu/smt.hh"
  #include "cpu/simple_cpu/simple_cpu.hh"
-#include "base/inifile.hh"
-#include "mem/mem_interface.hh"
-#include "mem/base_mem.hh"
  #include "cpu/static_inst.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "sim/annotation.hh"
+#include "sim/builder.hh"
+#include "sim/debug.hh"
+#include "sim/host.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_object.hh"
+#include "sim/sim_stats.hh"
  
  #ifdef FULL_SYSTEM
+#include "base/remote_gdb.hh"
+#include "dev/alpha_access.h"
+#include "dev/pciareg.h"
  #include "mem/functional_mem/memory_control.hh"
  #include "mem/functional_mem/physical_memory.hh"
-#include "targetarch/alpha_memory.hh"
  #include "sim/system.hh"
+#include "targetarch/alpha_memory.hh"
+#include "targetarch/vtophys.hh"
  #else // !FULL_SYSTEM
-#include "mem/functional_mem/functional_memory.hh"
-#include "sim/prog.hh"
  #include "eio/eio.hh"
+#include "mem/functional_mem/functional_memory.hh"
  #endif // FULL_SYSTEM
  
-#include "cpu/exetrace.hh"
-#include "base/trace.hh"
-#include "sim/sim_events.hh"
-#include "base/pollevent.hh"
-#include "sim/sim_object.hh"
-#include "sim/sim_stats.hh"
+using namespace std;
  
-#include "base/range.hh"
-#include "base/loader/symtab.hh"
+SimpleCPU::TickEvent::TickEvent(SimpleCPU *c)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+{
+}
  
-#ifdef FULL_SYSTEM
-#include "targetarch/vtophys.hh"
-#include "dev/pciareg.h"
-#include "base/remote_gdb.hh"
-#include "dev/alpha_access.h"
-#endif
+void
+SimpleCPU::TickEvent::process()
+{
+    cpu->tick();
+}
  
+const char *
+SimpleCPU::TickEvent::description()
+{
+    return "SimpleCPU tick event";
+}
  
-using namespace std;
  
  SimpleCPU::CacheCompletionEvent::CacheCompletionEvent(SimpleCPU *_cpu)
      : Event(&mainEventQueue),
@@ -96,7 +106,7 @@ void SimpleCPU::CacheCompletionEvent::process()
  const char *
  SimpleCPU::CacheCompletionEvent::description()
  {
-    return "cache completion event";
+    return "SimpleCPU cache completion event";
  }
  
  #ifdef FULL_SYSTEM
@@ -104,104 +114,149 @@ SimpleCPU::SimpleCPU(const string &_name,
                       System *_system,
                       Counter max_insts_any_thread,
                       Counter max_insts_all_threads,
+                     Counter max_loads_any_thread,
+                     Counter max_loads_all_threads,
                       AlphaItb *itb, AlphaDtb *dtb,
                       FunctionalMemory *mem,
                       MemInterface *icache_interface,
                       MemInterface *dcache_interface,
-                     int cpu_id, Tick freq)
+                     bool _def_reg, Tick freq)
      : BaseCPU(_name, /* number_of_threads */ 1,
                max_insts_any_thread, max_insts_all_threads,
-              _system, cpu_id, freq),
+              max_loads_any_thread, max_loads_all_threads,
+              _system, freq),
  #else
  SimpleCPU::SimpleCPU(const string &_name, Process *_process,
                       Counter max_insts_any_thread,
                       Counter max_insts_all_threads,
+                     Counter max_loads_any_thread,
+                     Counter max_loads_all_threads,
                       MemInterface *icache_interface,
-                     MemInterface *dcache_interface)
+                     MemInterface *dcache_interface,
+                     bool _def_reg)
      : BaseCPU(_name, /* number_of_threads */ 1,
-              max_insts_any_thread, max_insts_all_threads),
+              max_insts_any_thread, max_insts_all_threads,
+              max_loads_any_thread, max_loads_all_threads),
  #endif
-      tickEvent(this), xc(NULL), cacheCompletionEvent(this)
+      tickEvent(this), xc(NULL), defer_registration(_def_reg),
+      cacheCompletionEvent(this)
  {
+    _status = Idle;
  #ifdef FULL_SYSTEM
-    xc = new ExecContext(this, 0, system, itb, dtb, mem, cpu_id);
+    xc = new ExecContext(this, 0, system, itb, dtb, mem);
  
-    _status = Running;
-    if (cpu_id != 0) {
+    // initialize CPU, including PC
+    TheISA::initCPU(&xc->regs);
+#else
+    xc = new ExecContext(this, /* thread_num */ 0, _process, /* asid */ 0);
+#endif // !FULL_SYSTEM
  
-       xc->setStatus(ExecContext::Unallocated);
+    icacheInterface = icache_interface;
+    dcacheInterface = dcache_interface;
  
-       //Open a GDB debug session on port (7000 + the cpu_id)
-       (new GDBListener(new RemoteGDB(system, xc), 7000 + cpu_id))->listen();
+    memReq = new MemReq();
+    memReq->xc = xc;
+    memReq->asid = 0;
+    memReq->data = new uint8_t[64];
  
-       AlphaISA::init(system->physmem, &xc->regs);
+    numInst = 0;
+    startNumInst = 0;
+    numLoad = 0;
+    startNumLoad = 0;
+    lastIcacheStall = 0;
+    lastDcacheStall = 0;
  
-       fault = Reset_Fault;
+    execContexts.push_back(xc);
+}
  
-       IntReg *ipr = xc->regs.ipr;
-       ipr[TheISA::IPR_MCSR] = 0x6;
+SimpleCPU::~SimpleCPU()
+{
+}
  
-       AlphaISA::swap_palshadow(&xc->regs, true);
+void SimpleCPU::init()
+{
+    if (!defer_registration) {
+        this->registerExecContexts();
+    }
+}
  
-       xc->regs.pc =
-           ipr[TheISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault];
-       xc->regs.npc = xc->regs.pc + sizeof(MachInst);
+void
+SimpleCPU::switchOut()
+{
+    _status = SwitchedOut;
+    if (tickEvent.scheduled())
+        tickEvent.squash();
+}
  
-       _status = Idle;
-    }
-    else {
-      system->init(xc);
  
-      // Reset the system
-      //
-      AlphaISA::init(system->physmem, &xc->regs);
+void
+SimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
  
-      fault = Reset_Fault;
+    assert(!tickEvent.scheduled());
  
-      IntReg *ipr = xc->regs.ipr;
-      ipr[TheISA::IPR_MCSR] = 0x6;
+    // if any of this CPU's ExecContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+        }
+    }
  
-      AlphaISA::swap_palshadow(&xc->regs, true);
+    oldCPU->switchOut();
+}
  
-      xc->regs.pc = ipr[TheISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault];
-      xc->regs.npc = xc->regs.pc + sizeof(MachInst);
  
-       _status = Running;
-       tickEvent.schedule(0);
-    }
+void
+SimpleCPU::activateContext(int thread_num, int delay)
+{
+    assert(thread_num == 0);
+    assert(xc);
  
-#else
-    xc = new ExecContext(this, /* thread_num */ 0, _process, /* asid */ 0);
-    fault = No_Fault;
-    if (xc->status() == ExecContext::Active) {
-        _status = Running;
-       tickEvent.schedule(0);
-    } else
-        _status = Idle;
-#endif // !FULL_SYSTEM
+    assert(_status == Idle);
+    notIdleFraction++;
+    scheduleTickEvent(delay);
+    _status = Running;
+}
  
-    icacheInterface = icache_interface;
-    dcacheInterface = dcache_interface;
  
-    memReq = new MemReq();
-    memReq->xc = xc;
-    memReq->asid = 0;
+void
+SimpleCPU::suspendContext(int thread_num)
+{
+    assert(thread_num == 0);
+    assert(xc);
  
-    numInst = 0;
-    last_idle = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
+    assert(_status == Running);
+    notIdleFraction--;
+    unscheduleTickEvent();
+    _status = Idle;
+}
  
-    contexts.push_back(xc);
+
+void
+SimpleCPU::deallocateContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
  }
  
-SimpleCPU::~SimpleCPU()
+
+void
+SimpleCPU::haltContext(int thread_num)
  {
+    // for now, these are equivalent
+    suspendContext(thread_num);
  }
  
+
  void
  SimpleCPU::regStats()
  {
+    using namespace Statistics;
+
      BaseCPU::regStats();
  
      numInsts
@@ -214,11 +269,6 @@ SimpleCPU::regStats()
          .desc("Number of memory references")
          ;
  
-    idleCycles
-        .name(name() + ".idle_cycles")
-        .desc("Number of idle cycles")
-        ;
-
      idleFraction
          .name(name() + ".idle_fraction")
          .desc("Percentage of idle cycles")
@@ -236,60 +286,40 @@ SimpleCPU::regStats()
          .prereq(dcacheStallCycles)
          ;
  
-    idleFraction = idleCycles / simTicks;
-
-    numInsts = Statistics::scalar(numInst);
+    idleFraction = constant(1.0) - notIdleFraction;
+    numInsts = Statistics::scalar(numInst) - Statistics::scalar(startNumInst);
      simInsts += numInsts;
  }
  
  void
-SimpleCPU::serialize()
+SimpleCPU::resetStats()
  {
-    nameOut();
-
-#ifdef FULL_SYSTEM
-#if 0
-    // do we need this anymore?? egh
-    childOut("itb", xc->itb);
-    childOut("dtb", xc->dtb);
-    childOut("physmem", physmem);
-#endif
-#endif
-
-    for (int i = 0; i < NumIntRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "R%02d", i);
-        paramOut(buf.str(), xc->regs.intRegFile[i]);
-    }
-    for (int i = 0; i < NumFloatRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "F%02d", i);
-        paramOut(buf.str(), xc->regs.floatRegFile.d[i]);
-    }
-    // CPUTraitsType::serializeSpecialRegs(getProxy(), xc->regs);
+    startNumInst = numInst;
+    notIdleFraction = (_status != Idle);
  }
  
  void
-SimpleCPU::unserialize(IniFile &db, const string &category, ConfigNode *node)
+SimpleCPU::serialize(ostream &os)
  {
-    string data;
-
-    for (int i = 0; i < NumIntRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "R%02d", i);
-        db.findDefault(category, buf.str(), data);
-        to_number(data,xc->regs.intRegFile[i]);
-    }
-    for (int i = 0; i < NumFloatRegs; i++) {
-        stringstream buf;
-        ccprintf(buf, "F%02d", i);
-        db.findDefault(category, buf.str(), data);
-        xc->regs.floatRegFile.d[i] = strtod(data.c_str(),NULL);
-    }
-
-    // Read in Special registers
+    SERIALIZE_ENUM(_status);
+    SERIALIZE_SCALAR(inst);
+    nameOut(os, csprintf("%s.xc", name()));
+    xc->serialize(os);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+    nameOut(os, csprintf("%s.cacheCompletionEvent", name()));
+    cacheCompletionEvent.serialize(os);
+}
  
-    // CPUTraitsType::unserializeSpecialRegs(db,category,node,xc->regs);
+void
+SimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    UNSERIALIZE_ENUM(_status);
+    UNSERIALIZE_SCALAR(inst);
+    xc->unserialize(cp, csprintf("%s.xc", section));
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+    cacheCompletionEvent
+        .unserialize(cp, csprintf("%s.cacheCompletionEvent", section));
  }
  
  void
@@ -300,7 +330,7 @@ change_thread_state(int thread_number, int activate, int priority)
  // precise architected memory state accessor macros
  template <class T>
  Fault
-SimpleCPU::read(Addr addr, T& data, unsigned flags)
+SimpleCPU::read(Addr addr, T &data, unsigned flags)
  {
      memReq->reset(addr, sizeof(T), flags);
  
@@ -322,15 +352,16 @@ SimpleCPU::read(Addr addr, T& data, unsigned flags)
          memReq->cmd = Read;
          memReq->completionEvent = NULL;
          memReq->time = curTick;
-        memReq->flags &= ~UNCACHEABLE;
          MemAccessResult result = dcacheInterface->access(memReq);
  
          // Ugly hack to get an event scheduled *only* if the access is
          // a miss.  We really should add first-class support for this
          // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents) {
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
              memReq->completionEvent = &cacheCompletionEvent;
-            setStatus(DcacheMissStall);
+            lastDcacheStall = curTick;
+            unscheduleTickEvent();
+            _status = DcacheMissStall;
          }
      }
  
@@ -341,32 +372,32 @@ SimpleCPU::read(Addr addr, T& data, unsigned flags)
  
  template
  Fault
-SimpleCPU::read(Addr addr, uint64_t& data, unsigned flags);
+SimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
  
  template
  Fault
-SimpleCPU::read(Addr addr, uint32_t& data, unsigned flags);
+SimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
  
  template
  Fault
-SimpleCPU::read(Addr addr, uint16_t& data, unsigned flags);
+SimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
  
  template
  Fault
-SimpleCPU::read(Addr addr, uint8_t& data, unsigned flags);
+SimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
  
  #endif //DOXYGEN_SHOULD_SKIP_THIS
  
  template<>
  Fault
-SimpleCPU::read(Addr addr, double& data, unsigned flags)
+SimpleCPU::read(Addr addr, double &data, unsigned flags)
  {
      return read(addr, *(uint64_t*)&data, flags);
  }
  
  template<>
  Fault
-SimpleCPU::read(Addr addr, float& data, unsigned flags)
+SimpleCPU::read(Addr addr, float &data, unsigned flags)
  {
      return read(addr, *(uint32_t*)&data, flags);
  }
@@ -374,7 +405,7 @@ SimpleCPU::read(Addr addr, float& data, unsigned flags)
  
  template<>
  Fault
-SimpleCPU::read(Addr addr, int32_t& data, unsigned flags)
+SimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
  {
      return read(addr, (uint32_t&)data, flags);
  }
@@ -400,18 +431,19 @@ SimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
  
      if (fault == No_Fault && dcacheInterface) {
          memReq->cmd = Write;
-        memReq->data = (uint8_t *)&data;
+        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
          memReq->completionEvent = NULL;
          memReq->time = curTick;
-        memReq->flags &= ~UNCACHEABLE;
          MemAccessResult result = dcacheInterface->access(memReq);
  
          // Ugly hack to get an event scheduled *only* if the access is
          // a miss.  We really should add first-class support for this
          // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents) {
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
              memReq->completionEvent = &cacheCompletionEvent;
-            setStatus(DcacheMissStall);
+            lastDcacheStall = curTick;
+            unscheduleTickEvent();
+            _status = DcacheMissStall;
          }
      }
  
@@ -481,12 +513,19 @@ SimpleCPU::processCacheCompletion()
      switch (status()) {
        case IcacheMissStall:
          icacheStallCycles += curTick - lastIcacheStall;
-        setStatus(IcacheMissComplete);
+        _status = IcacheMissComplete;
+        scheduleTickEvent(1);
          break;
        case DcacheMissStall:
          dcacheStallCycles += curTick - lastDcacheStall;
-        setStatus(Running);
+        _status = Running;
+        scheduleTickEvent(1);
          break;
+      case SwitchedOut:
+        // If this CPU has been switched out due to sampling/warm-up,
+        // ignore any further status changes (e.g., due to cache
+        // misses outstanding at the time of the switch).
+        return;
        default:
          panic("SimpleCPU::processCacheCompletion: bad state");
          break;
@@ -501,7 +540,7 @@ SimpleCPU::post_interrupt(int int_num, int index)
  
      if (xc->status() == ExecContext::Suspended) {
                  DPRINTF(IPI,"Suspended Processor awoke\n");
-        xc->setStatus(ExecContext::Active);
+        xc->activate();
          Annotate::Resume(xc);
      }
  }
@@ -513,8 +552,10 @@ SimpleCPU::tick()
  {
      traceData = NULL;
  
+    Fault fault = No_Fault;
+
  #ifdef FULL_SYSTEM
-    if (fault == No_Fault && AlphaISA::check_interrupts &&
+    if (AlphaISA::check_interrupts &&
          xc->cpu->check_interrupts() &&
          !PC_PAL(xc->regs.pc) &&
          status() != IcacheMissComplete) {
@@ -568,7 +609,9 @@ SimpleCPU::tick()
          // We've already fetched an instruction and were stalled on an
          // I-cache miss.  No need to fetch it again.
  
-        setStatus(Running);
+        // Set status to running; tick event will get rescheduled if
+        // necessary at end of tick() function.
+        _status = Running;
      }
      else {
          // Try to fetch an instruction
@@ -593,15 +636,16 @@ SimpleCPU::tick()
              memReq->completionEvent = NULL;
  
              memReq->time = curTick;
-            memReq->flags &= ~UNCACHEABLE;
              MemAccessResult result = icacheInterface->access(memReq);
  
              // Ugly hack to get an event scheduled *only* if the access is
              // a miss.  We really should add first-class support for this
              // at some point.
-            if (result != MA_HIT && icacheInterface->doEvents) {
+            if (result != MA_HIT && icacheInterface->doEvents()) {
                  memReq->completionEvent = &cacheCompletionEvent;
-                setStatus(IcacheMissStall);
+                lastIcacheStall = curTick;
+                unscheduleTickEvent();
+                _status = IcacheMissStall;
                  return;
              }
          }
@@ -615,7 +659,7 @@ SimpleCPU::tick()
          numInst++;
  
          // check for instruction-count-based events
-        comInsnEventQueue[0]->serviceEvents(numInst);
+        comInstEventQueue[0]->serviceEvents(numInst);
  
          // decode the instruction
          StaticInstPtr<TheISA> si(inst);
@@ -628,14 +672,44 @@ SimpleCPU::tick()
          xc->regs.ra = (inst >> 21) & 0x1f;
  #endif // FULL_SYSTEM
  
-        xc->func_exe_insn++;
+        xc->func_exe_inst++;
  
          fault = si->execute(this, xc, traceData);
-
+#ifdef FS_MEASURE
+        if (!(xc->misspeculating()) && (xc->system->bin)) {
+            SWContext *ctx = xc->swCtx;
+            if (ctx && !ctx->callStack.empty()) {
+                if (si->isCall()) {
+                    ctx->calls++;
+                }
+                if (si->isReturn()) {
+                     if (ctx->calls == 0) {
+                        fnCall *top = ctx->callStack.top();
+                        DPRINTF(TCPIP, "Removing %s from callstack.\n", top->name);
+                        delete top;
+                        ctx->callStack.pop();
+                        if (ctx->callStack.empty())
+                            xc->system->nonPath->activate();
+                        else
+                            ctx->callStack.top()->myBin->activate();
+
+                        xc->system->dumpState(xc);
+                    } else {
+                        ctx->calls--;
+                    }
+                }
+            }
+        }
+#endif
          if (si->isMemRef()) {
              numMemRefs++;
          }
  
+        if (si->isLoad()) {
+            ++numLoad;
+            comLoadEventQueue[0]->serviceEvents(numLoad);
+        }
+
          if (traceData)
              traceData->finalize();
  
@@ -679,13 +753,14 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
  
      Param<Counter> max_insts_any_thread;
      Param<Counter> max_insts_all_threads;
+    Param<Counter> max_loads_any_thread;
+    Param<Counter> max_loads_all_threads;
  
  #ifdef FULL_SYSTEM
      SimObjectParam<AlphaItb *> itb;
      SimObjectParam<AlphaDtb *> dtb;
      SimObjectParam<FunctionalMemory *> mem;
      SimObjectParam<System *> system;
-    Param<int> cpu_id;
      Param<int> mult;
  #else
      SimObjectParam<Process *> workload;
@@ -694,15 +769,23 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
      SimObjectParam<BaseMem *> icache;
      SimObjectParam<BaseMem *> dcache;
  
+    Param<bool> defer_registration;
+
  END_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
  
  BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
  
      INIT_PARAM_DFLT(max_insts_any_thread,
-                    "terminate when any thread reaches this insn count",
+                    "terminate when any thread reaches this inst count",
                      0),
      INIT_PARAM_DFLT(max_insts_all_threads,
-                    "terminate when all threads have reached this insn count",
+                    "terminate when all threads have reached this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_any_thread,
+                    "terminate when any thread reaches this load count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_all_threads,
+                    "terminate when all threads have reached this load count",
                      0),
  
  #ifdef FULL_SYSTEM
@@ -710,37 +793,51 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
      INIT_PARAM(dtb, "Data TLB"),
      INIT_PARAM(mem, "memory"),
      INIT_PARAM(system, "system object"),
-    INIT_PARAM_DFLT(cpu_id, "CPU identification number", 0),
      INIT_PARAM_DFLT(mult, "system clock multiplier", 1),
  #else
      INIT_PARAM(workload, "processes to run"),
  #endif // FULL_SYSTEM
  
      INIT_PARAM_DFLT(icache, "L1 instruction cache object", NULL),
-    INIT_PARAM_DFLT(dcache, "L1 data cache object", NULL)
+    INIT_PARAM_DFLT(dcache, "L1 data cache object", NULL),
+    INIT_PARAM_DFLT(defer_registration, "defer registration with system "
+                    "(for sampling)", false)
  
  END_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
  
  
  CREATE_SIM_OBJECT(SimpleCPU)
  {
+    SimpleCPU *cpu;
  #ifdef FULL_SYSTEM
      if (mult != 1)
          panic("processor clock multiplier must be 1\n");
  
-    return new SimpleCPU(getInstanceName(), system,
-                         max_insts_any_thread, max_insts_all_threads,
-                         itb, dtb, mem,
-                         (icache) ? icache->getInterface() : NULL,
-                         (dcache) ? dcache->getInterface() : NULL,
-                         cpu_id, ticksPerSecond * mult);
+    cpu = new SimpleCPU(getInstanceName(), system,
+                        max_insts_any_thread, max_insts_all_threads,
+                        max_loads_any_thread, max_loads_all_threads,
+                        itb, dtb, mem,
+                        (icache) ? icache->getInterface() : NULL,
+                        (dcache) ? dcache->getInterface() : NULL,
+                        defer_registration,
+                        ticksPerSecond * mult);
  #else
  
-    return new SimpleCPU(getInstanceName(), workload,
-                         max_insts_any_thread, max_insts_all_threads,
-                         icache->getInterface(), dcache->getInterface());
+    cpu = new SimpleCPU(getInstanceName(), workload,
+                        max_insts_any_thread, max_insts_all_threads,
+                        max_loads_any_thread, max_loads_all_threads,
+                        (icache) ? icache->getInterface() : NULL,
+                        (dcache) ? dcache->getInterface() : NULL,
+                        defer_registration);
  
  #endif // FULL_SYSTEM
+#if 0
+    if (!defer_registration) {
+        cpu->registerExecContexts();
+    }
+#endif
+    return cpu;
  }
  
  REGISTER_SIM_OBJECT("SimpleCPU", SimpleCPU)
+