From 309e1d81939c44f6b31795be84868605e05b09ec Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 16 May 2006 17:36:50 -0400
Subject: [PATCH] Split SimpleCPU into two different models, AtomicSimpleCPU
 and TimingSimpleCPU, which use atomic and timing memory accesses
 respectively.  Common code is factored into the BaseSimpleCPU class.
 AtomicSimpleCPU includes an option (simulate_stalls) to add delays based on
 the estimated latency reported by the atomic accesses. Plain old "SimpleCPU"
 is gone; I have not updated all the config files (just test/test.py). Also
 fixes to get timing accesses working in new memory model and to get
 split-phase memory instruction definitions working with new memory model as
 well.

arch/alpha/isa/main.isa:
    Need to include packet_impl.h for functions that use Packet objects.
arch/alpha/isa/mem.isa:
    Change completeAcc() methods to take Packet object pointers.
    Also split out StoreCond template for completeAcc(), since
    that's the only one that needs write_result and we get an
    unused variable warning if we always have it in there.
build/SConstruct:
    Update list of recognized CPU model names.
configs/test/test.py:
    Change SimpleCPU to AtomicSimpleCPU.
cpu/SConscript:
    Define sources for new CPU models.
    Add split memory access methods to CPU model signatures.
cpu/cpu_models.py:
cpu/static_inst.hh:
    Define new CPU models.
cpu/simple/base.cc:
cpu/simple/base.hh:
    Factor out pieces specific to Atomic or Timing models.
mem/bus.cc:
    Bus needs to be able to route timing packets based on explicit dest
    so responses can get back to requester.  Set dest to Packet::Broadcast
    to indicate that dest should be derived from address.
    Also set packet src field based on port from which packet is sent.
mem/bus.hh:
    Set packet src field based on port from which packet is sent.
mem/packet.hh:
    Define Broadcast destination address to indicate that
    packet should be routed based on address.
mem/physical.cc:
    Set packet dest on response so packet is routed
    back to requester properly.
mem/port.cc:
    Flag blob packets as Broadcast.
python/m5/objects/PhysicalMemory.py:
    Change default latency to be 1 cycle.

--HG--
rename : cpu/simple/cpu.cc => cpu/simple/base.cc
rename : cpu/simple/cpu.hh => cpu/simple/base.hh
extra : convert_revision : e9646af6406a20c8c605087936dc4683375c2132
---
 arch/alpha/isa/main.isa             |    1 +
 arch/alpha/isa/mem.isa              |   53 +-
 build/SConstruct                    |    3 +-
 configs/test/test.py                |    2 +-
 cpu/SConscript                      |   20 +-
 cpu/cpu_models.py                   |    9 +-
 cpu/simple/atomic.cc                |  555 ++++++++++++
 cpu/simple/atomic.hh                |  139 +++
 cpu/simple/base.cc                  |  479 +++++++++++
 cpu/simple/{cpu.hh => base.hh}      |  143 +---
 cpu/simple/cpu.cc                   | 1218 ---------------------------
 cpu/simple/timing.cc                |  559 ++++++++++++
 cpu/simple/timing.hh                |  150 ++++
 cpu/static_inst.hh                  |    4 +-
 mem/bus.cc                          |   21 +-
 mem/bus.hh                          |   18 +-
 mem/packet.hh                       |    2 +
 mem/physical.cc                     |    1 +
 mem/port.cc                         |    2 +
 python/m5/objects/PhysicalMemory.py |    2 +-
 20 files changed, 1997 insertions(+), 1384 deletions(-)
 create mode 100644 cpu/simple/atomic.cc
 create mode 100644 cpu/simple/atomic.hh
 create mode 100644 cpu/simple/base.cc
 rename cpu/simple/{cpu.hh => base.hh} (73%)
 delete mode 100644 cpu/simple/cpu.cc
 create mode 100644 cpu/simple/timing.cc
 create mode 100644 cpu/simple/timing.hh

diff --git a/arch/alpha/isa/main.isa b/arch/alpha/isa/main.isa
index 746fe776d..03a8e1ff5 100644
--- a/arch/alpha/isa/main.isa
+++ b/arch/alpha/isa/main.isa
@@ -60,6 +60,7 @@ output exec {{
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
 #include "sim/sim_exit.hh"
+#include "mem/packet_impl.hh"
 
 using namespace AlphaISA;
 }};
diff --git a/arch/alpha/isa/mem.isa b/arch/alpha/isa/mem.isa
index 8742d308f..98c7ba979 100644
--- a/arch/alpha/isa/mem.isa
+++ b/arch/alpha/isa/mem.isa
@@ -178,7 +178,8 @@ def template InitiateAccDeclare {{
 
 
 def template CompleteAccDeclare {{
-    Fault completeAcc(uint8_t *, %(CPU_exec_context)s *, Trace::InstRecord *) const;
+    Fault completeAcc(Packet *, %(CPU_exec_context)s *,
+                      Trace::InstRecord *) const;
 }};
 
 
@@ -304,7 +305,7 @@ def template LoadInitiateAcc {{
 
 
 def template LoadCompleteAcc {{
-    Fault %(class_name)s::completeAcc(uint8_t *data,
+    Fault %(class_name)s::completeAcc(Packet *pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -313,7 +314,7 @@ def template LoadCompleteAcc {{
         %(fp_enable_check)s;
         %(op_decl)s;
 
-        memcpy(&Mem, data, sizeof(Mem));
+        Mem = pkt->get<typeof(Mem)>();
 
         if (fault == NoFault) {
             %(memacc_code)s;
@@ -406,7 +407,6 @@ def template StoreInitiateAcc {{
     {
         Addr EA;
         Fault fault = NoFault;
-        uint64_t write_result = 0;
 
         %(fp_enable_check)s;
         %(op_decl)s;
@@ -419,7 +419,7 @@ def template StoreInitiateAcc {{
 
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
-                              memAccessFlags, &write_result);
+                              memAccessFlags, NULL);
             if (traceData) { traceData->setData(Mem); }
         }
 
@@ -429,17 +429,39 @@ def template StoreInitiateAcc {{
 
 
 def template StoreCompleteAcc {{
-    Fault %(class_name)s::completeAcc(uint8_t *data,
+    Fault %(class_name)s::completeAcc(Packet *pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
         Fault fault = NoFault;
-        uint64_t write_result = 0;
 
         %(fp_enable_check)s;
         %(op_dest_decl)s;
 
-        memcpy(&write_result, data, sizeof(write_result));
+        if (fault == NoFault) {
+            %(postacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+
+def template StoreCondCompleteAcc {{
+    Fault %(class_name)s::completeAcc(Packet *pkt,
+                                      %(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(fp_enable_check)s;
+        %(op_dest_decl)s;
+
+        uint64_t write_result = pkt->req->getScResult();
 
         if (fault == NoFault) {
             %(postacc_code)s;
@@ -505,7 +527,7 @@ def template MiscInitiateAcc {{
 
 
 def template MiscCompleteAcc {{
-    Fault %(class_name)s::completeAcc(uint8_t *data,
+    Fault %(class_name)s::completeAcc(Packet *pkt,
                                       %(CPU_exec_context)s *xc,
                                       Trace::InstRecord *traceData) const
     {
@@ -577,7 +599,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
     if (exec_template_base == 'Load'):
         initiateacc_cblk = CodeBlock(ea_code + memacc_code)
         completeacc_cblk = CodeBlock(memacc_code + postacc_code)
-    elif (exec_template_base == 'Store'):
+    elif (exec_template_base.startswith('Store')):
         initiateacc_cblk = CodeBlock(ea_code + memacc_code)
         completeacc_cblk = CodeBlock(postacc_code)
     else:
@@ -595,7 +617,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
         initiateacc_iop.memacc_code = memacc_cblk.code
         completeacc_iop.memacc_code = memacc_cblk.code
         completeacc_iop.postacc_code = postacc_cblk.code
-    elif (exec_template_base == 'Store'):
+    elif (exec_template_base.startswith('Store')):
         initiateacc_iop.ea_code = ea_cblk.code
         initiateacc_iop.memacc_code = memacc_cblk.code
         completeacc_iop.postacc_code = postacc_cblk.code
@@ -616,6 +638,13 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
         memacc_iop.constructor += s
 
     # select templates
+
+    # define aliases... most StoreCond templates are the same as the
+    # corresponding Store templates (only CompleteAcc is different).
+    StoreCondMemAccExecute = StoreMemAccExecute
+    StoreCondExecute = StoreExecute
+    StoreCondInitiateAcc = StoreInitiateAcc
+
     memAccExecTemplate = eval(exec_template_base + 'MemAccExecute')
     fullExecTemplate = eval(exec_template_base + 'Execute')
     initiateAccTemplate = eval(exec_template_base + 'InitiateAcc')
@@ -685,7 +714,7 @@ def format StoreCond(memacc_code, postacc_code,
                      mem_flags = [], inst_flags = []) {{
     (header_output, decoder_output, decode_block, exec_output) = \
         LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
-                      postacc_code, exec_template_base = 'Store')
+                      postacc_code, exec_template_base = 'StoreCond')
 }};
 
 
diff --git a/build/SConstruct b/build/SConstruct
index 28940fc5a..54307e945 100644
--- a/build/SConstruct
+++ b/build/SConstruct
@@ -222,7 +222,8 @@ env = conf.Finish()
 env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips']
 
 # Define the universe of supported CPU models
-env['ALL_CPU_LIST'] = ['SimpleCPU', 'FastCPU', 'FullCPU', 'AlphaFullCPU']
+env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU',
+                       'FastCPU', 'FullCPU', 'AlphaFullCPU']
 
 # Sticky options get saved in the options file so they persist from
 # one invocation to the next (unless overridden, in which case the new
diff --git a/configs/test/test.py b/configs/test/test.py
index 695d84b73..75e832f5e 100644
--- a/configs/test/test.py
+++ b/configs/test/test.py
@@ -6,7 +6,7 @@ class HelloWorld(AlphaLiveProcess):
 
 magicbus = Bus()
 mem = PhysicalMemory()
-cpu = SimpleCPU(workload=HelloWorld(), mem=magicbus)
+cpu = AtomicSimpleCPU(workload=HelloWorld(), mem=magicbus)
 system = System(physmem=mem, cpu=cpu)
 system.c1 =  Connector(side_a=mem, side_b=magicbus)
 root = Root(system=system)
diff --git a/cpu/SConscript b/cpu/SConscript
index af6bab4eb..34fb6df78 100644
--- a/cpu/SConscript
+++ b/cpu/SConscript
@@ -51,6 +51,11 @@ execfile(models_db.srcnode().abspath)
 # Template for execute() signature.
 exec_sig_template = '''
 virtual Fault execute(%s *xc, Trace::InstRecord *traceData) const = 0;
+virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const
+{ panic("initiateAcc not defined!"); };
+virtual Fault completeAcc(Packet *pkt, %s *xc,
+                          Trace::InstRecord *traceData) const
+{ panic("completeAcc not defined!"); };
 '''
 
 # Generate header.  
@@ -62,7 +67,7 @@ def gen_cpu_exec_signatures(target, source, env):
 '''
     for cpu in env['CPU_MODELS']:
         xc_type = CpuModel.dict[cpu].strings['CPU_exec_context']
-        print >> f, exec_sig_template % xc_type
+        print >> f, exec_sig_template % (xc_type, xc_type, xc_type)
     print >> f, '''
 #endif  // __CPU_STATIC_INST_EXEC_SIGS_HH__
 '''
@@ -86,8 +91,17 @@ env.Command('static_inst_exec_sigs.hh', models_db,
 
 sources = []
 
-if 'SimpleCPU' in env['CPU_MODELS']:
-    sources += Split('simple/cpu.cc')
+need_simple_base = False
+if 'AtomicSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    sources += Split('simple/atomic.cc')
+
+if 'TimingSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    sources += Split('simple/timing.cc')
+
+if need_simple_base:
+    sources += Split('simple/base.cc')
 
 if 'FastCPU' in env['CPU_MODELS']:
     sources += Split('fast/cpu.cc')
diff --git a/cpu/cpu_models.py b/cpu/cpu_models.py
index 675204e5b..30cbabde1 100644
--- a/cpu/cpu_models.py
+++ b/cpu/cpu_models.py
@@ -56,9 +56,12 @@ class CpuModel:
 #   - substitution strings for ISA description templates
 #
 
-CpuModel('SimpleCPU', 'simple_cpu_exec.cc',
-         '#include "cpu/simple/cpu.hh"',
-         { 'CPU_exec_context': 'SimpleCPU' })
+CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc',
+         '#include "cpu/simple/atomic.hh"',
+         { 'CPU_exec_context': 'AtomicSimpleCPU' })
+CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc',
+         '#include "cpu/simple/timing.hh"',
+         { 'CPU_exec_context': 'TimingSimpleCPU' })
 CpuModel('FastCPU', 'fast_cpu_exec.cc',
          '#include "cpu/fast/cpu.hh"',
          { 'CPU_exec_context': 'FastCPU' })
diff --git a/cpu/simple/atomic.cc b/cpu/simple/atomic.cc
new file mode 100644
index 000000000..8c38fe0d4
--- /dev/null
+++ b/cpu/simple/atomic.cc
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/utility.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/simple/atomic.hh"
+#include "mem/packet_impl.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+using namespace TheISA;
+
+AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+{
+}
+
+
+void
+AtomicSimpleCPU::TickEvent::process()
+{
+    cpu->tick();
+}
+
+const char *
+AtomicSimpleCPU::TickEvent::description()
+{
+    return "AtomicSimpleCPU tick event";
+}
+
+
+void
+AtomicSimpleCPU::init()
+{
+    //Create Memory Ports (conect them up)
+    Port *mem_dport = mem->getPort("");
+    dcachePort.setPeer(mem_dport);
+    mem_dport->setPeer(&dcachePort);
+
+    Port *mem_iport = mem->getPort("");
+    icachePort.setPeer(mem_iport);
+    mem_iport->setPeer(&icachePort);
+
+    BaseCPU::init();
+#if FULL_SYSTEM
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+
+        // initialize CPU, including PC
+        TheISA::initCPU(xc, xc->readCpuId());
+    }
+#endif
+}
+
+bool
+AtomicSimpleCPU::CpuPort::recvTiming(Packet &pkt)
+{
+    panic("AtomicSimpleCPU doesn't expect recvAtomic callback!");
+    return true;
+}
+
+Tick
+AtomicSimpleCPU::CpuPort::recvAtomic(Packet &pkt)
+{
+    panic("AtomicSimpleCPU doesn't expect recvAtomic callback!");
+    return curTick;
+}
+
+void
+AtomicSimpleCPU::CpuPort::recvFunctional(Packet &pkt)
+{
+    panic("AtomicSimpleCPU doesn't expect recvFunctional callback!");
+}
+
+void
+AtomicSimpleCPU::CpuPort::recvStatusChange(Status status)
+{
+    panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
+}
+
+Packet *
+AtomicSimpleCPU::CpuPort::recvRetry()
+{
+    panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
+    return NULL;
+}
+
+
+AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
+    : BaseSimpleCPU(p), tickEvent(this),
+      width(p->width), simulate_stalls(p->simulate_stalls),
+      icachePort(this), dcachePort(this)
+{
+    _status = Idle;
+
+    ifetch_req = new Request(true);
+    ifetch_req->setAsid(0);
+    // @todo fix me and get the real cpu iD!!!
+    ifetch_req->setCpuNum(0);
+    ifetch_req->setSize(sizeof(MachInst));
+    ifetch_pkt = new Packet;
+    ifetch_pkt->cmd = Read;
+    ifetch_pkt->dataStatic(&inst);
+    ifetch_pkt->req = ifetch_req;
+    ifetch_pkt->size = sizeof(MachInst);
+    ifetch_pkt->dest = Packet::Broadcast;
+
+    data_read_req = new Request(true);
+    // @todo fix me and get the real cpu iD!!!
+    data_read_req->setCpuNum(0);
+    data_read_req->setAsid(0);
+    data_read_pkt = new Packet;
+    data_read_pkt->cmd = Read;
+    data_read_pkt->dataStatic(&dataReg);
+    data_read_pkt->req = data_read_req;
+    data_read_pkt->dest = Packet::Broadcast;
+
+    data_write_req = new Request(true);
+    // @todo fix me and get the real cpu iD!!!
+    data_write_req->setCpuNum(0);
+    data_write_req->setAsid(0);
+    data_write_pkt = new Packet;
+    data_write_pkt->cmd = Write;
+    data_write_pkt->req = data_write_req;
+    data_write_pkt->dest = Packet::Broadcast;
+}
+
+
+AtomicSimpleCPU::~AtomicSimpleCPU()
+{
+}
+
+void
+AtomicSimpleCPU::serialize(ostream &os)
+{
+    BaseSimpleCPU::serialize(os);
+    SERIALIZE_ENUM(_status);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+}
+
+void
+AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    BaseSimpleCPU::unserialize(cp, section);
+    UNSERIALIZE_ENUM(_status);
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+}
+
+void
+AtomicSimpleCPU::switchOut(Sampler *s)
+{
+    sampler = s;
+    if (status() == Running) {
+        _status = SwitchedOut;
+
+        tickEvent.squash();
+    }
+    sampler->signalSwitched();
+}
+
+
+void
+AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
+
+    assert(!tickEvent.scheduled());
+
+    // if any of this CPU's ExecContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+            break;
+        }
+    }
+}
+
+
+void
+AtomicSimpleCPU::activateContext(int thread_num, int delay)
+{
+    assert(thread_num == 0);
+    assert(cpuXC);
+
+    assert(_status == Idle);
+    assert(!tickEvent.scheduled());
+
+    notIdleFraction++;
+    tickEvent.schedule(curTick + cycles(delay));
+    _status = Running;
+}
+
+
+void
+AtomicSimpleCPU::suspendContext(int thread_num)
+{
+    assert(thread_num == 0);
+    assert(cpuXC);
+
+    assert(_status == Running);
+    assert(tickEvent.scheduled());
+
+    notIdleFraction--;
+    tickEvent.deschedule();
+    _status = Idle;
+}
+
+
+template <class T>
+Fault
+AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
+{
+    data_read_req->setVaddr(addr);
+    data_read_req->setSize(sizeof(T));
+    data_read_req->setFlags(flags);
+    data_read_req->setTime(curTick);
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // translate to physical address
+    Fault fault = cpuXC->translateDataReadReq(data_read_req);
+
+    // Now do the access.
+    if (fault == NoFault) {
+        data_read_pkt->reset();
+        data_read_pkt->addr = data_read_req->getPaddr();
+        data_read_pkt->size = sizeof(T);
+
+        dcache_complete = dcachePort.sendAtomic(*data_read_pkt);
+        dcache_access = true;
+
+        assert(data_read_pkt->result == Success);
+        data = data_read_pkt->get<T>();
+
+    }
+
+    // This will need a new way to tell if it has a dcache attached.
+    if (data_read_req->getFlags() & UNCACHEABLE)
+        recordEvent("Uncached Read");
+
+    return fault;
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
+
+template
+Fault
+AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
+
+template
+Fault
+AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
+
+template
+Fault
+AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags)
+{
+    return read(addr, *(uint64_t*)&data, flags);
+}
+
+template<>
+Fault
+AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags)
+{
+    return read(addr, *(uint32_t*)&data, flags);
+}
+
+
+template<>
+Fault
+AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
+{
+    return read(addr, (uint32_t&)data, flags);
+}
+
+
+template <class T>
+Fault
+AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+    data_write_req->setVaddr(addr);
+    data_write_req->setTime(curTick);
+    data_write_req->setSize(sizeof(T));
+    data_write_req->setFlags(flags);
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // translate to physical address
+    Fault fault = cpuXC->translateDataWriteReq(data_write_req);
+
+    // Now do the access.
+    if (fault == NoFault) {
+        data_write_pkt->reset();
+        data = htog(data);
+        data_write_pkt->dataStatic(&data);
+        data_write_pkt->addr = data_write_req->getPaddr();
+        data_write_pkt->size = sizeof(T);
+
+        dcache_complete = dcachePort.sendAtomic(*data_write_pkt);
+        dcache_access = true;
+
+        assert(data_write_pkt->result == Success);
+    }
+
+    if (res && (fault == NoFault))
+        *res = data_write_pkt->result;
+
+    // This will need a new way to tell if it's hooked up to a cache or not.
+    if (data_write_req->getFlags() & UNCACHEABLE)
+        recordEvent("Uncached Write");
+
+    // @todo this is a hack and only works on uniprocessor systems
+    // some one else can implement LL/SC.
+    if (data_write_req->getFlags() & LOCKED)
+        *res = 1;
+
+    // If the write needs to have a fault on the access, consider calling
+    // changeStatus() and changing it to "bad addr write" or something.
+    return fault;
+}
+
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template
+Fault
+AtomicSimpleCPU::write(uint64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+AtomicSimpleCPU::write(uint32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+AtomicSimpleCPU::write(uint16_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+AtomicSimpleCPU::write(uint8_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(*(uint64_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(*(uint32_t*)&data, addr, flags, res);
+}
+
+
+template<>
+Fault
+AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write((uint32_t)data, addr, flags, res);
+}
+
+
+void
+AtomicSimpleCPU::tick()
+{
+    Tick latency = cycles(1); // instruction takes one cycle by default
+
+    for (int i = 0; i < width; ++i) {
+        numCycles++;
+
+        ifetch_req->resetMin();
+        ifetch_pkt->reset();
+        Fault fault = setupFetchPacket(ifetch_pkt);
+
+        if (fault == NoFault) {
+            Tick icache_complete = icachePort.sendAtomic(*ifetch_pkt);
+            // ifetch_req is initialized to read the instruction directly
+            // into the CPU object's inst field.
+
+            dcache_access = false; // assume no dcache access
+            preExecute();
+            fault = curStaticInst->execute(this, traceData);
+            postExecute();
+
+            if (traceData) {
+                traceData->finalize();
+            }
+
+            if (simulate_stalls) {
+                // This calculation assumes that the icache and dcache
+                // access latencies are always a multiple of the CPU's
+                // cycle time.  If not, the next tick event may get
+                // scheduled at a non-integer multiple of the CPU
+                // cycle time.
+                Tick icache_stall = icache_complete - curTick - cycles(1);
+                Tick dcache_stall =
+                    dcache_access ? dcache_complete - curTick - cycles(1) : 0;
+                latency += icache_stall + dcache_stall;
+            }
+
+        }
+
+        advancePC(fault);
+    }
+
+    tickEvent.schedule(curTick + latency);
+}
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  AtomicSimpleCPU Simulation Object
+//
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
+
+    Param<Counter> max_insts_any_thread;
+    Param<Counter> max_insts_all_threads;
+    Param<Counter> max_loads_any_thread;
+    Param<Counter> max_loads_all_threads;
+    SimObjectParam<MemObject *> mem;
+
+#if FULL_SYSTEM
+    SimObjectParam<AlphaITB *> itb;
+    SimObjectParam<AlphaDTB *> dtb;
+    SimObjectParam<System *> system;
+    Param<int> cpu_id;
+    Param<Tick> profile;
+#else
+    SimObjectParam<Process *> workload;
+#endif // FULL_SYSTEM
+
+    Param<int> clock;
+
+    Param<bool> defer_registration;
+    Param<int> width;
+    Param<bool> function_trace;
+    Param<Tick> function_trace_start;
+    Param<bool> simulate_stalls;
+
+END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
+
+    INIT_PARAM(max_insts_any_thread,
+               "terminate when any thread reaches this inst count"),
+    INIT_PARAM(max_insts_all_threads,
+               "terminate when all threads have reached this inst count"),
+    INIT_PARAM(max_loads_any_thread,
+               "terminate when any thread reaches this load count"),
+    INIT_PARAM(max_loads_all_threads,
+               "terminate when all threads have reached this load count"),
+    INIT_PARAM(mem, "memory"),
+
+#if FULL_SYSTEM
+    INIT_PARAM(itb, "Instruction TLB"),
+    INIT_PARAM(dtb, "Data TLB"),
+    INIT_PARAM(system, "system object"),
+    INIT_PARAM(cpu_id, "processor ID"),
+    INIT_PARAM(profile, ""),
+#else
+    INIT_PARAM(workload, "processes to run"),
+#endif // FULL_SYSTEM
+
+    INIT_PARAM(clock, "clock speed"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+    INIT_PARAM(width, "cpu width"),
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace"),
+    INIT_PARAM(simulate_stalls, "Simulate cache stall cycles")
+
+END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
+
+
+CREATE_SIM_OBJECT(AtomicSimpleCPU)
+{
+    AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
+    params->name = getInstanceName();
+    params->numberOfThreads = 1;
+    params->max_insts_any_thread = max_insts_any_thread;
+    params->max_insts_all_threads = max_insts_all_threads;
+    params->max_loads_any_thread = max_loads_any_thread;
+    params->max_loads_all_threads = max_loads_all_threads;
+    params->deferRegistration = defer_registration;
+    params->clock = clock;
+    params->functionTrace = function_trace;
+    params->functionTraceStart = function_trace_start;
+    params->width = width;
+    params->simulate_stalls = simulate_stalls;
+    params->mem = mem;
+
+#if FULL_SYSTEM
+    params->itb = itb;
+    params->dtb = dtb;
+    params->system = system;
+    params->cpu_id = cpu_id;
+    params->profile = profile;
+#else
+    params->process = workload;
+#endif
+
+    AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
+    return cpu;
+}
+
+REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU)
+
diff --git a/cpu/simple/atomic.hh b/cpu/simple/atomic.hh
new file mode 100644
index 000000000..348308c46
--- /dev/null
+++ b/cpu/simple/atomic.hh
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_SIMPLE_ATOMIC_HH__
+#define __CPU_SIMPLE_ATOMIC_HH__
+
+#include "cpu/simple/base.hh"
+
+class AtomicSimpleCPU : public BaseSimpleCPU
+{
+  public:
+
+    struct Params : public BaseSimpleCPU::Params {
+        int width;
+        bool simulate_stalls;
+    };
+
+    AtomicSimpleCPU(Params *params);
+    virtual ~AtomicSimpleCPU();
+
+    virtual void init();
+
+  public:
+    //
+    enum Status {
+        Running,
+        Idle,
+        SwitchedOut
+    };
+
+  protected:
+    Status _status;
+
+    Status status() const { return _status; }
+
+  private:
+
+    struct TickEvent : public Event
+    {
+        AtomicSimpleCPU *cpu;
+
+        TickEvent(AtomicSimpleCPU *c);
+        void process();
+        const char *description();
+    };
+
+    TickEvent tickEvent;
+
+    const int width;
+    const bool simulate_stalls;
+
+    // main simulation loop (one cycle)
+    void tick();
+
+    class CpuPort : public Port
+    {
+
+        AtomicSimpleCPU *cpu;
+
+      public:
+
+        CpuPort(AtomicSimpleCPU *_cpu)
+            : cpu(_cpu)
+        { }
+
+      protected:
+
+        virtual bool recvTiming(Packet &pkt);
+
+        virtual Tick recvAtomic(Packet &pkt);
+
+        virtual void recvFunctional(Packet &pkt);
+
+        virtual void recvStatusChange(Status status);
+
+        virtual Packet *recvRetry();
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+    };
+
+    CpuPort icachePort;
+    CpuPort dcachePort;
+
+    Request *ifetch_req;
+    Packet  *ifetch_pkt;
+    Request *data_read_req;
+    Packet  *data_read_pkt;
+    Request *data_write_req;
+    Packet  *data_write_pkt;
+
+    bool dcache_access;
+    Tick dcache_complete;
+
+  public:
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+    void switchOut(Sampler *s);
+    void takeOverFrom(BaseCPU *oldCPU);
+
+    virtual void activateContext(int thread_num, int delay);
+    virtual void suspendContext(int thread_num);
+
+    template <class T>
+    Fault read(Addr addr, T &data, unsigned flags);
+
+    template <class T>
+    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+};
+
+#endif // __CPU_SIMPLE_ATOMIC_HH__
diff --git a/cpu/simple/base.cc b/cpu/simple/base.cc
new file mode 100644
index 000000000..40868e74d
--- /dev/null
+++ b/cpu/simple/base.cc
@@ -0,0 +1,479 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/utility.hh"
+#include "base/cprintf.hh"
+#include "base/inifile.hh"
+#include "base/loader/symtab.hh"
+#include "base/misc.hh"
+#include "base/pollevent.hh"
+#include "base/range.hh"
+#include "base/stats/events.hh"
+#include "base/trace.hh"
+#include "cpu/base.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/profile.hh"
+#include "cpu/sampler/sampler.hh"
+#include "cpu/simple/base.hh"
+#include "cpu/smt.hh"
+#include "cpu/static_inst.hh"
+#include "kern/kernel_stats.hh"
+#include "mem/packet_impl.hh"
+#include "sim/byteswap.hh"
+#include "sim/builder.hh"
+#include "sim/debug.hh"
+#include "sim/host.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+
+#if FULL_SYSTEM
+#include "base/remote_gdb.hh"
+#include "sim/system.hh"
+#include "arch/tlb.hh"
+#include "arch/stacktrace.hh"
+#include "arch/vtophys.hh"
+#else // !FULL_SYSTEM
+#include "mem/mem_object.hh"
+#endif // FULL_SYSTEM
+
+using namespace std;
+using namespace TheISA;
+
+BaseSimpleCPU::BaseSimpleCPU(Params *p)
+    : BaseCPU(p), mem(p->mem), cpuXC(NULL)
+{
+#if FULL_SYSTEM
+    cpuXC = new CPUExecContext(this, 0, p->system, p->itb, p->dtb);
+#else
+    cpuXC = new CPUExecContext(this, /* thread_num */ 0, p->process,
+            /* asid */ 0, mem);
+#endif // !FULL_SYSTEM
+
+    xcProxy = cpuXC->getProxy();
+
+    numInst = 0;
+    startNumInst = 0;
+    numLoad = 0;
+    startNumLoad = 0;
+    lastIcacheStall = 0;
+    lastDcacheStall = 0;
+
+    execContexts.push_back(xcProxy);
+}
+
+BaseSimpleCPU::~BaseSimpleCPU()
+{
+}
+
+void
+BaseSimpleCPU::deallocateContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+
+void
+BaseSimpleCPU::haltContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+
+void
+BaseSimpleCPU::regStats()
+{
+    using namespace Stats;
+
+    BaseCPU::regStats();
+
+    numInsts
+        .name(name() + ".num_insts")
+        .desc("Number of instructions executed")
+        ;
+
+    numMemRefs
+        .name(name() + ".num_refs")
+        .desc("Number of memory references")
+        ;
+
+    notIdleFraction
+        .name(name() + ".not_idle_fraction")
+        .desc("Percentage of non-idle cycles")
+        ;
+
+    idleFraction
+        .name(name() + ".idle_fraction")
+        .desc("Percentage of idle cycles")
+        ;
+
+    icacheStallCycles
+        .name(name() + ".icache_stall_cycles")
+        .desc("ICache total stall cycles")
+        .prereq(icacheStallCycles)
+        ;
+
+    dcacheStallCycles
+        .name(name() + ".dcache_stall_cycles")
+        .desc("DCache total stall cycles")
+        .prereq(dcacheStallCycles)
+        ;
+
+    icacheRetryCycles
+        .name(name() + ".icache_retry_cycles")
+        .desc("ICache total retry cycles")
+        .prereq(icacheRetryCycles)
+        ;
+
+    dcacheRetryCycles
+        .name(name() + ".dcache_retry_cycles")
+        .desc("DCache total retry cycles")
+        .prereq(dcacheRetryCycles)
+        ;
+
+    idleFraction = constant(1.0) - notIdleFraction;
+}
+
+void
+BaseSimpleCPU::resetStats()
+{
+    startNumInst = numInst;
+    // notIdleFraction = (_status != Idle);
+}
+
+void
+BaseSimpleCPU::serialize(ostream &os)
+{
+    BaseCPU::serialize(os);
+    SERIALIZE_SCALAR(inst);
+    nameOut(os, csprintf("%s.xc", name()));
+    cpuXC->serialize(os);
+}
+
+void
+BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    BaseCPU::unserialize(cp, section);
+    UNSERIALIZE_SCALAR(inst);
+    cpuXC->unserialize(cp, csprintf("%s.xc", section));
+}
+
+void
+change_thread_state(int thread_number, int activate, int priority)
+{
+}
+
+Fault
+BaseSimpleCPU::copySrcTranslate(Addr src)
+{
+#if 0
+    static bool no_warn = true;
+    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+    // Only support block sizes of 64 atm.
+    assert(blk_size == 64);
+    int offset = src & (blk_size - 1);
+
+    // Make sure block doesn't span page
+    if (no_warn &&
+        (src & PageMask) != ((src + blk_size) & PageMask) &&
+        (src >> 40) != 0xfffffc) {
+        warn("Copied block source spans pages %x.", src);
+        no_warn = false;
+    }
+
+    memReq->reset(src & ~(blk_size - 1), blk_size);
+
+    // translate to physical address
+    Fault fault = cpuXC->translateDataReadReq(req);
+
+    if (fault == NoFault) {
+        cpuXC->copySrcAddr = src;
+        cpuXC->copySrcPhysAddr = memReq->paddr + offset;
+    } else {
+        assert(!fault->isAlignmentFault());
+
+        cpuXC->copySrcAddr = 0;
+        cpuXC->copySrcPhysAddr = 0;
+    }
+    return fault;
+#else
+    return NoFault;
+#endif
+}
+
+Fault
+BaseSimpleCPU::copy(Addr dest)
+{
+#if 0
+    static bool no_warn = true;
+    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+    // Only support block sizes of 64 atm.
+    assert(blk_size == 64);
+    uint8_t data[blk_size];
+    //assert(cpuXC->copySrcAddr);
+    int offset = dest & (blk_size - 1);
+
+    // Make sure block doesn't span page
+    if (no_warn &&
+        (dest & PageMask) != ((dest + blk_size) & PageMask) &&
+        (dest >> 40) != 0xfffffc) {
+        no_warn = false;
+        warn("Copied block destination spans pages %x. ", dest);
+    }
+
+    memReq->reset(dest & ~(blk_size -1), blk_size);
+    // translate to physical address
+    Fault fault = cpuXC->translateDataWriteReq(req);
+
+    if (fault == NoFault) {
+        Addr dest_addr = memReq->paddr + offset;
+        // Need to read straight from memory since we have more than 8 bytes.
+        memReq->paddr = cpuXC->copySrcPhysAddr;
+        cpuXC->mem->read(memReq, data);
+        memReq->paddr = dest_addr;
+        cpuXC->mem->write(memReq, data);
+        if (dcacheInterface) {
+            memReq->cmd = Copy;
+            memReq->completionEvent = NULL;
+            memReq->paddr = cpuXC->copySrcPhysAddr;
+            memReq->dest = dest_addr;
+            memReq->size = 64;
+            memReq->time = curTick;
+            memReq->flags &= ~INST_READ;
+            dcacheInterface->access(memReq);
+        }
+    }
+    else
+        assert(!fault->isAlignmentFault());
+
+    return fault;
+#else
+    panic("copy not implemented");
+    return NoFault;
+#endif
+}
+
+#if FULL_SYSTEM
+Addr
+BaseSimpleCPU::dbg_vtophys(Addr addr)
+{
+    return vtophys(xcProxy, addr);
+}
+#endif // FULL_SYSTEM
+
+#if FULL_SYSTEM
+void
+BaseSimpleCPU::post_interrupt(int int_num, int index)
+{
+    BaseCPU::post_interrupt(int_num, index);
+
+    if (cpuXC->status() == ExecContext::Suspended) {
+                DPRINTF(IPI,"Suspended Processor awoke\n");
+        cpuXC->activate();
+    }
+}
+#endif // FULL_SYSTEM
+
+void
+BaseSimpleCPU::checkForInterrupts()
+{
+#if FULL_SYSTEM
+    if (checkInterrupts && check_interrupts() && !cpuXC->inPalMode() &&
+        status() != IcacheAccessComplete) {
+        int ipl = 0;
+        int summary = 0;
+        checkInterrupts = false;
+
+        if (cpuXC->readMiscReg(IPR_SIRR)) {
+            for (int i = INTLEVEL_SOFTWARE_MIN;
+                 i < INTLEVEL_SOFTWARE_MAX; i++) {
+                if (cpuXC->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+                    // See table 4-19 of 21164 hardware reference
+                    ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+                    summary |= (ULL(1) << i);
+                }
+            }
+        }
+
+        uint64_t interrupts = cpuXC->cpu->intr_status();
+        for (int i = INTLEVEL_EXTERNAL_MIN;
+            i < INTLEVEL_EXTERNAL_MAX; i++) {
+            if (interrupts & (ULL(1) << i)) {
+                // See table 4-19 of 21164 hardware reference
+                ipl = i;
+                summary |= (ULL(1) << i);
+            }
+        }
+
+        if (cpuXC->readMiscReg(IPR_ASTRR))
+            panic("asynchronous traps not implemented\n");
+
+        if (ipl && ipl > cpuXC->readMiscReg(IPR_IPLR)) {
+            cpuXC->setMiscReg(IPR_ISR, summary);
+            cpuXC->setMiscReg(IPR_INTID, ipl);
+
+            Fault(new InterruptFault)->invoke(xcProxy);
+
+            DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+                    cpuXC->readMiscReg(IPR_IPLR), ipl, summary);
+        }
+    }
+#endif
+}
+
+
+Fault
+BaseSimpleCPU::setupFetchPacket(Packet *ifetch_pkt)
+{
+    // Try to fetch an instruction
+
+    // set up memory request for instruction fetch
+
+    DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",cpuXC->readPC(),
+            cpuXC->readNextPC(),cpuXC->readNextNPC());
+
+    Request *ifetch_req = ifetch_pkt->req;
+    ifetch_req->setVaddr(cpuXC->readPC() & ~3);
+    ifetch_req->setTime(curTick);
+#if FULL_SYSTEM
+    ifetch_req->setFlags((cpuXC->readPC() & 1) ? PHYSICAL : 0);
+#else
+    ifetch_req->setFlags(0);
+#endif
+
+    Fault fault = cpuXC->translateInstReq(ifetch_req);
+
+    if (fault == NoFault) {
+        ifetch_pkt->addr = ifetch_req->getPaddr();
+    }
+
+    return fault;
+}
+
+
+void
+BaseSimpleCPU::preExecute()
+{
+    // maintain $r0 semantics
+    cpuXC->setIntReg(ZeroReg, 0);
+#if THE_ISA == ALPHA_ISA
+    cpuXC->setFloatReg(ZeroReg, 0.0);
+#endif // ALPHA_ISA
+
+    // keep an instruction count
+    numInst++;
+    numInsts++;
+
+    cpuXC->func_exe_inst++;
+
+    // check for instruction-count-based events
+    comInstEventQueue[0]->serviceEvents(numInst);
+
+    // decode the instruction
+    inst = gtoh(inst);
+    curStaticInst = StaticInst::decode(makeExtMI(inst, cpuXC->readPC()));
+
+    traceData = Trace::getInstRecord(curTick, xcProxy, this, curStaticInst,
+                                     cpuXC->readPC());
+
+    DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n",
+            curStaticInst->getName(), curStaticInst->getOpcode(),
+            curStaticInst->machInst);
+
+#if FULL_SYSTEM
+    cpuXC->setInst(inst);
+#endif // FULL_SYSTEM
+}
+
+void
+BaseSimpleCPU::postExecute()
+{
+#if FULL_SYSTEM
+    if (system->kernelBinning->fnbin) {
+        assert(kernelStats);
+        system->kernelBinning->execute(xcProxy, inst);
+    }
+
+    if (cpuXC->profile) {
+        bool usermode =
+            (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+        cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+        ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
+        if (node)
+            cpuXC->profileNode = node;
+    }
+#endif
+
+    if (curStaticInst->isMemRef()) {
+        numMemRefs++;
+    }
+
+    if (curStaticInst->isLoad()) {
+        ++numLoad;
+        comLoadEventQueue[0]->serviceEvents(numLoad);
+    }
+
+    traceFunctions(cpuXC->readPC());
+}
+
+
+void
+BaseSimpleCPU::advancePC(Fault fault)
+{
+    if (fault != NoFault) {
+#if FULL_SYSTEM
+        fault->invoke(xcProxy);
+#else // !FULL_SYSTEM
+        fatal("fault (%s) detected @ PC %08p", fault->name(), cpuXC->readPC());
+#endif // FULL_SYSTEM
+    }
+    else {
+        // go to the next instruction
+        cpuXC->setPC(cpuXC->readNextPC());
+#if THE_ISA == ALPHA_ISA
+        cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
+#else
+        cpuXC->setNextPC(cpuXC->readNextNPC());
+        cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
+#endif
+
+    }
+
+#if FULL_SYSTEM
+    Addr oldpc;
+    do {
+        oldpc = cpuXC->readPC();
+        system->pcEventQueue.service(xcProxy);
+    } while (oldpc != cpuXC->readPC());
+#endif
+}
+
diff --git a/cpu/simple/cpu.hh b/cpu/simple/base.hh
similarity index 73%
rename from cpu/simple/cpu.hh
rename to cpu/simple/base.hh
index 945de20af..4c0e6f3c7 100644
--- a/cpu/simple/cpu.hh
+++ b/cpu/simple/base.hh
@@ -26,8 +26,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __CPU_SIMPLE_CPU_SIMPLE_CPU_HH__
-#define __CPU_SIMPLE_CPU_SIMPLE_CPU_HH__
+#ifndef __CPU_SIMPLE_BASE_HH__
+#define __CPU_SIMPLE_BASE_HH__
 
 #include "base/statistics.hh"
 #include "config/full_system.hh"
@@ -65,108 +65,19 @@ namespace Trace {
 }
 
 
-// Set exactly one of these symbols to 1 to set the memory access
-// model.  Probably should make these template parameters, or even
-// just fork the CPU models.
-//
-#define SIMPLE_CPU_MEM_TIMING    0
-#define SIMPLE_CPU_MEM_ATOMIC    0
-#define SIMPLE_CPU_MEM_IMMEDIATE 1
-
-
-class SimpleCPU : public BaseCPU
+class BaseSimpleCPU : public BaseCPU
 {
   protected:
     typedef TheISA::MachInst MachInst;
     typedef TheISA::MiscReg MiscReg;
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
-    class CpuPort : public Port
-    {
-
-        SimpleCPU *cpu;
-
-      public:
-
-        CpuPort(SimpleCPU *_cpu)
-            : cpu(_cpu)
-        { }
-
-      protected:
-
-        virtual bool recvTiming(Packet &pkt);
-
-        virtual Tick recvAtomic(Packet &pkt);
-
-        virtual void recvFunctional(Packet &pkt);
-
-        virtual void recvStatusChange(Status status);
-
-        virtual Packet *recvRetry();
-
-        virtual void getDeviceAddressRanges(AddrRangeList &resp,
-            AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); }
-    };
 
     MemObject *mem;
-    CpuPort icachePort;
-    CpuPort dcachePort;
-
-  public:
-    // main simulation loop (one cycle)
-    void tick();
-    virtual void init();
-
-  private:
-    struct TickEvent : public Event
-    {
-        SimpleCPU *cpu;
-        int width;
-
-        TickEvent(SimpleCPU *c, int w);
-        void process();
-        const char *description();
-    };
 
-    TickEvent tickEvent;
-
-    /// Schedule tick event, regardless of its current state.
-    void scheduleTickEvent(int numCycles)
-    {
-        if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + cycles(numCycles));
-        else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + cycles(numCycles));
-    }
-
-    /// Unschedule tick event, regardless of its current state.
-    void unscheduleTickEvent()
-    {
-        if (tickEvent.scheduled())
-            tickEvent.squash();
-    }
-
-  private:
+  protected:
     Trace::InstRecord *traceData;
 
-  public:
-    //
-    enum Status {
-        Running,
-        Idle,
-        IcacheRetry,
-        IcacheWaitResponse,
-        IcacheAccessComplete,
-        DcacheRetry,
-        DcacheWaitResponse,
-        DcacheWaitSwitch,
-        SwitchedOut
-    };
-
-  private:
-    Status _status;
-
   public:
     void post_interrupt(int int_num, int index);
 
@@ -181,7 +92,6 @@ class SimpleCPU : public BaseCPU
   public:
     struct Params : public BaseCPU::Params
     {
-        int width;
         MemObject *mem;
 #if FULL_SYSTEM
         AlphaITB *itb;
@@ -190,8 +100,8 @@ class SimpleCPU : public BaseCPU
         Process *process;
 #endif
     };
-    SimpleCPU(Params *params);
-    virtual ~SimpleCPU();
+    BaseSimpleCPU(Params *params);
+    virtual ~BaseSimpleCPU();
 
   public:
     // execution context
@@ -199,9 +109,6 @@ class SimpleCPU : public BaseCPU
 
     ExecContext *xcProxy;
 
-    void switchOut(Sampler *s);
-    void takeOverFrom(BaseCPU *oldCPU);
-
 #if FULL_SYSTEM
     Addr dbg_vtophys(Addr addr);
 
@@ -214,17 +121,6 @@ class SimpleCPU : public BaseCPU
     // Static data storage
     TheISA::IntReg dataReg;
 
-#if SIMPLE_CPU_MEM_TIMING
-    Packet *retry_pkt;
-#elif SIMPLE_CPU_MEM_ATOMIC || SIMPLE_CPU_MEM_IMMEDIATE
-    Request *ifetch_req;
-    Packet  *ifetch_pkt;
-    Request *data_read_req;
-    Packet  *data_read_pkt;
-    Request *data_write_req;
-    Packet  *data_write_pkt;
-#endif
-
     // Pointer to the sampler that is telling us to switchover.
     // Used to signal the completion of the pipe drain and schedule
     // the next switchover
@@ -232,10 +128,12 @@ class SimpleCPU : public BaseCPU
 
     StaticInstPtr curStaticInst;
 
-    Status status() const { return _status; }
+    void checkForInterrupts();
+    Fault setupFetchPacket(Packet *ifetch_pkt);
+    void preExecute();
+    void postExecute();
+    void advancePC(Fault fault);
 
-    virtual void activateContext(int thread_num, int delay);
-    virtual void suspendContext(int thread_num);
     virtual void deallocateContext(int thread_num);
     virtual void haltContext(int thread_num);
 
@@ -280,26 +178,13 @@ class SimpleCPU : public BaseCPU
     Stats::Scalar<> dcacheRetryCycles;
     Counter lastDcacheRetry;
 
-    void sendIcacheRequest(Packet *pkt);
-    void sendDcacheRequest(Packet *pkt);
-    void processResponse(Packet &response);
-
-    Packet * processRetry();
-    void recvStatusChange(Port::Status status) {}
-
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
-    template <class T>
-    Fault read(Addr addr, T &data, unsigned flags);
-
-    template <class T>
-    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
-
     // These functions are only used in CPU models that split
     // effective address computation from the actual memory access.
-    void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); }
-    Addr getEA() 	{ panic("SimpleCPU::getEA() not implemented\n"); }
+    void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); }
+    Addr getEA() 	{ panic("BaseSimpleCPU::getEA() not implemented\n"); }
 
     void prefetch(Addr addr, unsigned flags)
     {
@@ -428,4 +313,4 @@ class SimpleCPU : public BaseCPU
     ExecContext *xcBase() { return xcProxy; }
 };
 
-#endif // __CPU_SIMPLE_CPU_SIMPLE_CPU_HH__
+#endif // __CPU_SIMPLE_BASE_HH__
diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc
deleted file mode 100644
index 328036918..000000000
--- a/cpu/simple/cpu.cc
+++ /dev/null
@@ -1,1218 +0,0 @@
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "arch/utility.hh"
-#include "base/cprintf.hh"
-#include "base/inifile.hh"
-#include "base/loader/symtab.hh"
-#include "base/misc.hh"
-#include "base/pollevent.hh"
-#include "base/range.hh"
-#include "base/stats/events.hh"
-#include "base/trace.hh"
-#include "cpu/base.hh"
-#include "cpu/cpu_exec_context.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/exetrace.hh"
-#include "cpu/profile.hh"
-#include "cpu/sampler/sampler.hh"
-#include "cpu/simple/cpu.hh"
-#include "cpu/smt.hh"
-#include "cpu/static_inst.hh"
-#include "kern/kernel_stats.hh"
-#include "mem/packet_impl.hh"
-#include "sim/byteswap.hh"
-#include "sim/builder.hh"
-#include "sim/debug.hh"
-#include "sim/host.hh"
-#include "sim/sim_events.hh"
-#include "sim/sim_object.hh"
-#include "sim/stats.hh"
-
-#if FULL_SYSTEM
-#include "base/remote_gdb.hh"
-//#include "mem/functional/memory_control.hh"
-//#include "mem/functional/physical.hh"
-#include "sim/system.hh"
-#include "arch/tlb.hh"
-#include "arch/stacktrace.hh"
-#include "arch/vtophys.hh"
-#else // !FULL_SYSTEM
-#include "mem/mem_object.hh"
-#endif // FULL_SYSTEM
-
-using namespace std;
-using namespace TheISA;
-
-SimpleCPU::TickEvent::TickEvent(SimpleCPU *c, int w)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
-{
-}
-
-
-void
-SimpleCPU::init()
-{
-    //Create Memory Ports (conect them up)
-    Port *mem_dport = mem->getPort("");
-    dcachePort.setPeer(mem_dport);
-    mem_dport->setPeer(&dcachePort);
-
-    Port *mem_iport = mem->getPort("");
-    icachePort.setPeer(mem_iport);
-    mem_iport->setPeer(&icachePort);
-
-    BaseCPU::init();
-#if FULL_SYSTEM
-    for (int i = 0; i < execContexts.size(); ++i) {
-        ExecContext *xc = execContexts[i];
-
-        // initialize CPU, including PC
-        TheISA::initCPU(xc, xc->readCpuId());
-    }
-#endif
-}
-
-void
-SimpleCPU::TickEvent::process()
-{
-    int count = width;
-    do {
-        cpu->tick();
-    } while (--count > 0 && cpu->status() == Running);
-}
-
-const char *
-SimpleCPU::TickEvent::description()
-{
-    return "SimpleCPU tick event";
-}
-
-
-bool
-SimpleCPU::CpuPort::recvTiming(Packet &pkt)
-{
-    cpu->processResponse(pkt);
-    return true;
-}
-
-Tick
-SimpleCPU::CpuPort::recvAtomic(Packet &pkt)
-{
-    panic("CPU doesn't expect callback!");
-    return curTick;
-}
-
-void
-SimpleCPU::CpuPort::recvFunctional(Packet &pkt)
-{
-    panic("CPU doesn't expect callback!");
-}
-
-void
-SimpleCPU::CpuPort::recvStatusChange(Status status)
-{
-    cpu->recvStatusChange(status);
-}
-
-Packet *
-SimpleCPU::CpuPort::recvRetry()
-{
-    return cpu->processRetry();
-}
-
-SimpleCPU::SimpleCPU(Params *p)
-    : BaseCPU(p), mem(p->mem), icachePort(this),
-      dcachePort(this), tickEvent(this, p->width), cpuXC(NULL)
-{
-    _status = Idle;
-
-#if FULL_SYSTEM
-    cpuXC = new CPUExecContext(this, 0, p->system, p->itb, p->dtb);
-#else
-    cpuXC = new CPUExecContext(this, /* thread_num */ 0, p->process,
-            /* asid */ 0, mem);
-#endif // !FULL_SYSTEM
-
-    xcProxy = cpuXC->getProxy();
-
-#if SIMPLE_CPU_MEM_ATOMIC || SIMPLE_CPU_MEM_IMMEDIATE
-    ifetch_req = new Request(true);
-    ifetch_req->setAsid(0);
-    // @todo fix me and get the real cpu iD!!!
-    ifetch_req->setCpuNum(0);
-    ifetch_req->setSize(sizeof(MachInst));
-    ifetch_pkt = new Packet;
-    ifetch_pkt->cmd = Read;
-    ifetch_pkt->dataStatic(&inst);
-    ifetch_pkt->req = ifetch_req;
-    ifetch_pkt->size = sizeof(MachInst);
-
-    data_read_req = new Request(true);
-    // @todo fix me and get the real cpu iD!!!
-    data_read_req->setCpuNum(0);
-    data_read_req->setAsid(0);
-    data_read_pkt = new Packet;
-    data_read_pkt->cmd = Read;
-    data_read_pkt->dataStatic(&dataReg);
-    data_read_pkt->req = data_read_req;
-
-    data_write_req = new Request(true);
-    // @todo fix me and get the real cpu iD!!!
-    data_write_req->setCpuNum(0);
-    data_write_req->setAsid(0);
-    data_write_pkt = new Packet;
-    data_write_pkt->cmd = Write;
-    data_write_pkt->req = data_write_req;
-#endif
-
-    numInst = 0;
-    startNumInst = 0;
-    numLoad = 0;
-    startNumLoad = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
-
-    execContexts.push_back(xcProxy);
-}
-
-SimpleCPU::~SimpleCPU()
-{
-}
-
-void
-SimpleCPU::switchOut(Sampler *s)
-{
-    sampler = s;
-    if (status() == DcacheWaitResponse) {
-        DPRINTF(Sampler,"Outstanding dcache access, waiting for completion\n");
-        _status = DcacheWaitSwitch;
-    }
-    else {
-        _status = SwitchedOut;
-
-        if (tickEvent.scheduled())
-            tickEvent.squash();
-
-        sampler->signalSwitched();
-    }
-}
-
-
-void
-SimpleCPU::takeOverFrom(BaseCPU *oldCPU)
-{
-    BaseCPU::takeOverFrom(oldCPU);
-
-    assert(!tickEvent.scheduled());
-
-    // if any of this CPU's ExecContexts are active, mark the CPU as
-    // running and schedule its tick event.
-    for (int i = 0; i < execContexts.size(); ++i) {
-        ExecContext *xc = execContexts[i];
-        if (xc->status() == ExecContext::Active && _status != Running) {
-            _status = Running;
-            tickEvent.schedule(curTick);
-        }
-    }
-}
-
-
-void
-SimpleCPU::activateContext(int thread_num, int delay)
-{
-    assert(thread_num == 0);
-    assert(cpuXC);
-
-    assert(_status == Idle);
-    notIdleFraction++;
-    scheduleTickEvent(delay);
-    _status = Running;
-}
-
-
-void
-SimpleCPU::suspendContext(int thread_num)
-{
-    assert(thread_num == 0);
-    assert(cpuXC);
-
-    assert(_status == Running);
-    notIdleFraction--;
-    unscheduleTickEvent();
-    _status = Idle;
-}
-
-
-void
-SimpleCPU::deallocateContext(int thread_num)
-{
-    // for now, these are equivalent
-    suspendContext(thread_num);
-}
-
-
-void
-SimpleCPU::haltContext(int thread_num)
-{
-    // for now, these are equivalent
-    suspendContext(thread_num);
-}
-
-
-void
-SimpleCPU::regStats()
-{
-    using namespace Stats;
-
-    BaseCPU::regStats();
-
-    numInsts
-        .name(name() + ".num_insts")
-        .desc("Number of instructions executed")
-        ;
-
-    numMemRefs
-        .name(name() + ".num_refs")
-        .desc("Number of memory references")
-        ;
-
-    notIdleFraction
-        .name(name() + ".not_idle_fraction")
-        .desc("Percentage of non-idle cycles")
-        ;
-
-    idleFraction
-        .name(name() + ".idle_fraction")
-        .desc("Percentage of idle cycles")
-        ;
-
-    icacheStallCycles
-        .name(name() + ".icache_stall_cycles")
-        .desc("ICache total stall cycles")
-        .prereq(icacheStallCycles)
-        ;
-
-    dcacheStallCycles
-        .name(name() + ".dcache_stall_cycles")
-        .desc("DCache total stall cycles")
-        .prereq(dcacheStallCycles)
-        ;
-
-    icacheRetryCycles
-        .name(name() + ".icache_retry_cycles")
-        .desc("ICache total retry cycles")
-        .prereq(icacheRetryCycles)
-        ;
-
-    dcacheRetryCycles
-        .name(name() + ".dcache_retry_cycles")
-        .desc("DCache total retry cycles")
-        .prereq(dcacheRetryCycles)
-        ;
-
-    idleFraction = constant(1.0) - notIdleFraction;
-}
-
-void
-SimpleCPU::resetStats()
-{
-    startNumInst = numInst;
-    notIdleFraction = (_status != Idle);
-}
-
-void
-SimpleCPU::serialize(ostream &os)
-{
-    BaseCPU::serialize(os);
-    SERIALIZE_ENUM(_status);
-    SERIALIZE_SCALAR(inst);
-    nameOut(os, csprintf("%s.xc", name()));
-    cpuXC->serialize(os);
-    nameOut(os, csprintf("%s.tickEvent", name()));
-    tickEvent.serialize(os);
-    nameOut(os, csprintf("%s.cacheCompletionEvent", name()));
-}
-
-void
-SimpleCPU::unserialize(Checkpoint *cp, const string &section)
-{
-    BaseCPU::unserialize(cp, section);
-    UNSERIALIZE_ENUM(_status);
-    UNSERIALIZE_SCALAR(inst);
-    cpuXC->unserialize(cp, csprintf("%s.xc", section));
-    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
-}
-
-void
-change_thread_state(int thread_number, int activate, int priority)
-{
-}
-
-Fault
-SimpleCPU::copySrcTranslate(Addr src)
-{
-#if 0
-    static bool no_warn = true;
-    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
-    // Only support block sizes of 64 atm.
-    assert(blk_size == 64);
-    int offset = src & (blk_size - 1);
-
-    // Make sure block doesn't span page
-    if (no_warn &&
-        (src & PageMask) != ((src + blk_size) & PageMask) &&
-        (src >> 40) != 0xfffffc) {
-        warn("Copied block source spans pages %x.", src);
-        no_warn = false;
-    }
-
-    memReq->reset(src & ~(blk_size - 1), blk_size);
-
-    // translate to physical address    Fault fault = cpuXC->translateDataReadReq(req);
-
-    if (fault == NoFault) {
-        cpuXC->copySrcAddr = src;
-        cpuXC->copySrcPhysAddr = memReq->paddr + offset;
-    } else {
-        assert(!fault->isAlignmentFault());
-
-        cpuXC->copySrcAddr = 0;
-        cpuXC->copySrcPhysAddr = 0;
-    }
-    return fault;
-#else
-    return NoFault;
-#endif
-}
-
-Fault
-SimpleCPU::copy(Addr dest)
-{
-#if 0
-    static bool no_warn = true;
-    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
-    // Only support block sizes of 64 atm.
-    assert(blk_size == 64);
-    uint8_t data[blk_size];
-    //assert(cpuXC->copySrcAddr);
-    int offset = dest & (blk_size - 1);
-
-    // Make sure block doesn't span page
-    if (no_warn &&
-        (dest & PageMask) != ((dest + blk_size) & PageMask) &&
-        (dest >> 40) != 0xfffffc) {
-        no_warn = false;
-        warn("Copied block destination spans pages %x. ", dest);
-    }
-
-    memReq->reset(dest & ~(blk_size -1), blk_size);
-    // translate to physical address
-    Fault fault = cpuXC->translateDataWriteReq(req);
-
-    if (fault == NoFault) {
-        Addr dest_addr = memReq->paddr + offset;
-        // Need to read straight from memory since we have more than 8 bytes.
-        memReq->paddr = cpuXC->copySrcPhysAddr;
-        cpuXC->mem->read(memReq, data);
-        memReq->paddr = dest_addr;
-        cpuXC->mem->write(memReq, data);
-        if (dcacheInterface) {
-            memReq->cmd = Copy;
-            memReq->completionEvent = NULL;
-            memReq->paddr = cpuXC->copySrcPhysAddr;
-            memReq->dest = dest_addr;
-            memReq->size = 64;
-            memReq->time = curTick;
-            memReq->flags &= ~INST_READ;
-            dcacheInterface->access(memReq);
-        }
-    }
-    else
-        assert(!fault->isAlignmentFault());
-
-    return fault;
-#else
-    panic("copy not implemented");
-    return NoFault;
-#endif
-}
-
-// precise architected memory state accessor macros
-template <class T>
-Fault
-SimpleCPU::read(Addr addr, T &data, unsigned flags)
-{
-    if (status() == DcacheWaitResponse || status() == DcacheWaitSwitch) {
-//	Fault fault = xc->read(memReq,data);
-        // Not sure what to check for no fault...
-        if (data_read_pkt->result == Success) {
-            data = data_read_pkt->get<T>();
-        }
-
-        if (traceData) {
-            traceData->setAddr(data_read_req->getVaddr());
-        }
-
-        // @todo: Figure out a way to create a Fault from the packet result.
-        return NoFault;
-    }
-
-//    memReq->reset(addr, sizeof(T), flags);
-
-#if SIMPLE_CPU_MEM_TIMING
-    CpuRequest *data_read_req = new Request(true);
-#endif
-
-    data_read_req->setVaddr(addr);
-    data_read_req->setSize(sizeof(T));
-    data_read_req->setFlags(flags);
-    data_read_req->setTime(curTick);
-
-    // translate to physical address
-    Fault fault = cpuXC->translateDataReadReq(data_read_req);
-
-    // Now do the access.
-    if (fault == NoFault) {
-#if SIMPLE_CPU_MEM_TIMING
-        data_read_pkt = new Packet;
-        data_read_pkt->cmd = Read;
-        data_read_pkt->req = data_read_req;
-        data_read_pkt->data = new uint8_t[8];
-#endif
-        data_read_pkt->reset();
-        data_read_pkt->addr = data_read_req->getPaddr();
-        data_read_pkt->size = sizeof(T);
-
-        sendDcacheRequest(data_read_pkt);
-
-#if SIMPLE_CPU_MEM_IMMEDIATE
-        // Need to find a way to not duplicate code above.
-
-        if (data_read_pkt->result == Success) {
-            data = data_read_pkt->get<T>();
-        }
-
-        if (traceData) {
-            traceData->setAddr(addr);
-        }
-
-        // @todo: Figure out a way to create a Fault from the packet result.
-        return NoFault;
-#endif
-    }
-/*
-        memReq->cmd = Read;
-        memReq->completionEvent = NULL;
-        memReq->time = curTick;
-        memReq->flags &= ~INST_READ;
-        MemAccessResult result = dcacheInterface->access(memReq);
-
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents()) {
-            memReq->completionEvent = &cacheCompletionEvent;
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
-        } else {
-            // do functional access
-            fault = cpuXC->read(memReq, data);
-
-        }
-    } else if(fault == NoFault) {
-        // do functional access
-        fault = cpuXC->read(memReq, data);
-
-    }
-*/
-    // This will need a new way to tell if it has a dcache attached.
-    if (data_read_req->getFlags() & UNCACHEABLE)
-        recordEvent("Uncached Read");
-
-    return fault;
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-SimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
-
-template
-Fault
-SimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
-
-template
-Fault
-SimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
-
-template
-Fault
-SimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-SimpleCPU::read(Addr addr, double &data, unsigned flags)
-{
-    return read(addr, *(uint64_t*)&data, flags);
-}
-
-template<>
-Fault
-SimpleCPU::read(Addr addr, float &data, unsigned flags)
-{
-    return read(addr, *(uint32_t*)&data, flags);
-}
-
-
-template<>
-Fault
-SimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
-{
-    return read(addr, (uint32_t&)data, flags);
-}
-
-
-template <class T>
-Fault
-SimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
-{
-    data_write_req->setVaddr(addr);
-    data_write_req->setTime(curTick);
-    data_write_req->setSize(sizeof(T));
-    data_write_req->setFlags(flags);
-
-    // translate to physical address
-    Fault fault = cpuXC->translateDataWriteReq(data_write_req);
-    // Now do the access.
-    if (fault == NoFault) {
-#if SIMPLE_CPU_MEM_TIMING
-        data_write_pkt = new Packet;
-        data_write_pkt->cmd = Write;
-        data_write_pkt->req = data_write_req;
-        data_write_pkt->allocate();
-        data_write_pkt->set(data);
-#else
-        data_write_pkt->reset();
-        data = htog(data);
-        data_write_pkt->dataStatic(&data);
-#endif
-        data_write_pkt->addr = data_write_req->getPaddr();
-        data_write_pkt->size = sizeof(T);
-
-        sendDcacheRequest(data_write_pkt);
-    }
-
-/*
-    // do functional access
-    if (fault == NoFault)
-        fault = cpuXC->write(memReq, data);
-
-    if (fault == NoFault && dcacheInterface) {
-        memReq->cmd = Write;
-        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
-        memReq->completionEvent = NULL;
-        memReq->time = curTick;
-        memReq->flags &= ~INST_READ;
-        MemAccessResult result = dcacheInterface->access(memReq);
-
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents()) {
-            memReq->completionEvent = &cacheCompletionEvent;
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
-        }
-    }
-*/
-    if (res && (fault == NoFault))
-        *res = data_write_pkt->result;
-
-    // This will need a new way to tell if it's hooked up to a cache or not.
-    if (data_write_req->getFlags() & UNCACHEABLE)
-        recordEvent("Uncached Write");
-
-    // @todo this is a hack and only works on uniprocessor systems some one else
-    // can implement LL/SC.
-    if (data_write_req->getFlags() & LOCKED)
-        *res = 1;
-
-    // If the write needs to have a fault on the access, consider calling
-    // changeStatus() and changing it to "bad addr write" or something.
-    return fault;
-}
-
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-template
-Fault
-SimpleCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-SimpleCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-SimpleCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-SimpleCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-SimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
-{
-    return write(*(uint64_t*)&data, addr, flags, res);
-}
-
-template<>
-Fault
-SimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
-{
-    return write(*(uint32_t*)&data, addr, flags, res);
-}
-
-
-template<>
-Fault
-SimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
-{
-    return write((uint32_t)data, addr, flags, res);
-}
-
-
-#if FULL_SYSTEM
-Addr
-SimpleCPU::dbg_vtophys(Addr addr)
-{
-    return vtophys(xcProxy, addr);
-}
-#endif // FULL_SYSTEM
-
-void
-SimpleCPU::sendIcacheRequest(Packet *pkt)
-{
-    assert(!tickEvent.scheduled());
-#if SIMPLE_CPU_MEM_TIMING
-    retry_pkt = pkt;
-    bool success = icachePort.sendTiming(*pkt);
-
-    unscheduleTickEvent();
-
-    lastIcacheStall = curTick;
-
-    if (!success) {
-        // Need to wait for retry
-        _status = IcacheRetry;
-    } else {
-        // Need to wait for cache to respond
-        _status = IcacheWaitResponse;
-    }
-#elif SIMPLE_CPU_MEM_ATOMIC
-    Tick latency = icachePort.sendAtomic(*pkt);
-
-    unscheduleTickEvent();
-    scheduleTickEvent(latency);
-
-    // Note that Icache miss cycles will be incorrect.  Unless
-    // we check the status of the packet sent (is this valid?),
-    // we won't know if the latency is a hit or a miss.
-    icacheStallCycles += latency;
-
-    _status = IcacheAccessComplete;
-#elif SIMPLE_CPU_MEM_IMMEDIATE
-    icachePort.sendAtomic(*pkt);
-#else
-#error "SimpleCPU has no mem model set"
-#endif
-}
-
-void
-SimpleCPU::sendDcacheRequest(Packet *pkt)
-{
-    assert(!tickEvent.scheduled());
-#if SIMPLE_CPU_MEM_TIMING
-    unscheduleTickEvent();
-
-    retry_pkt = pkt;
-    bool success = dcachePort.sendTiming(*pkt);
-
-    lastDcacheStall = curTick;
-
-    if (!success) {
-        _status = DcacheRetry;
-    } else {
-        _status = DcacheWaitResponse;
-    }
-#elif SIMPLE_CPU_MEM_ATOMIC
-    unscheduleTickEvent();
-
-    Tick latency = dcachePort.sendAtomic(*pkt);
-
-    scheduleTickEvent(latency);
-
-    // Note that Dcache miss cycles will be incorrect.  Unless
-    // we check the status of the packet sent (is this valid?),
-    // we won't know if the latency is a hit or a miss.
-    dcacheStallCycles += latency;
-#elif SIMPLE_CPU_MEM_IMMEDIATE
-    dcachePort.sendAtomic(*pkt);
-#else
-#error "SimpleCPU has no mem model set"
-#endif
-}
-
-void
-SimpleCPU::processResponse(Packet &response)
-{
-    assert(SIMPLE_CPU_MEM_TIMING);
-
-    // For what things is the CPU the consumer of the packet it sent
-    // out?  This may create a memory leak if that's the case and it's
-    // expected of the SimpleCPU to delete its own packet.
-    Packet *pkt = &response;
-
-    switch (status()) {
-      case IcacheWaitResponse:
-        icacheStallCycles += curTick - lastIcacheStall;
-
-        _status = IcacheAccessComplete;
-        scheduleTickEvent(1);
-
-        // Copy the icache data into the instruction itself.
-        inst = pkt->get<MachInst>();
-
-        delete pkt;
-        break;
-      case DcacheWaitResponse:
-        if (pkt->cmd == Read) {
-            curStaticInst->execute(this,traceData);
-            if (traceData)
-                traceData->finalize();
-        }
-
-        delete pkt;
-
-        dcacheStallCycles += curTick - lastDcacheStall;
-        _status = Running;
-        scheduleTickEvent(1);
-        break;
-      case DcacheWaitSwitch:
-        if (pkt->cmd == Read) {
-            curStaticInst->execute(this,traceData);
-            if (traceData)
-                traceData->finalize();
-        }
-
-        delete pkt;
-
-        _status = SwitchedOut;
-        sampler->signalSwitched();
-      case SwitchedOut:
-        // If this CPU has been switched out due to sampling/warm-up,
-        // ignore any further status changes (e.g., due to cache
-        // misses outstanding at the time of the switch).
-        delete pkt;
-
-        return;
-      default:
-        panic("SimpleCPU::processCacheCompletion: bad state");
-        break;
-    }
-}
-
-Packet *
-SimpleCPU::processRetry()
-{
-#if SIMPLE_CPU_MEM_TIMING
-    switch(status()) {
-      case IcacheRetry:
-        icacheRetryCycles += curTick - lastIcacheStall;
-        return retry_pkt;
-        break;
-      case DcacheRetry:
-        dcacheRetryCycles += curTick - lastDcacheStall;
-        return retry_pkt;
-        break;
-      default:
-        panic("SimpleCPU::processRetry: bad state");
-        break;
-    }
-#else
-    panic("shouldn't be here");
-#endif
-}
-
-#if FULL_SYSTEM
-void
-SimpleCPU::post_interrupt(int int_num, int index)
-{
-    BaseCPU::post_interrupt(int_num, index);
-
-    if (cpuXC->status() == ExecContext::Suspended) {
-                DPRINTF(IPI,"Suspended Processor awoke\n");
-        cpuXC->activate();
-    }
-}
-#endif // FULL_SYSTEM
-
-/* start simulation, program loaded, processor precise state initialized */
-void
-SimpleCPU::tick()
-{
-    DPRINTF(SimpleCPU,"\n\n");
-
-    numCycles++;
-
-    traceData = NULL;
-
-    Fault fault = NoFault;
-
-#if FULL_SYSTEM
-    if (checkInterrupts && check_interrupts() && !cpuXC->inPalMode() &&
-        status() != IcacheAccessComplete) {
-        int ipl = 0;
-        int summary = 0;
-        checkInterrupts = false;
-
-        if (cpuXC->readMiscReg(IPR_SIRR)) {
-            for (int i = INTLEVEL_SOFTWARE_MIN;
-                 i < INTLEVEL_SOFTWARE_MAX; i++) {
-                if (cpuXC->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
-                    // See table 4-19 of 21164 hardware reference
-                    ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
-                    summary |= (ULL(1) << i);
-                }
-            }
-        }
-
-        uint64_t interrupts = cpuXC->cpu->intr_status();
-        for (int i = INTLEVEL_EXTERNAL_MIN;
-            i < INTLEVEL_EXTERNAL_MAX; i++) {
-            if (interrupts & (ULL(1) << i)) {
-                // See table 4-19 of 21164 hardware reference
-                ipl = i;
-                summary |= (ULL(1) << i);
-            }
-        }
-
-        if (cpuXC->readMiscReg(IPR_ASTRR))
-            panic("asynchronous traps not implemented\n");
-
-        if (ipl && ipl > cpuXC->readMiscReg(IPR_IPLR)) {
-            cpuXC->setMiscReg(IPR_ISR, summary);
-            cpuXC->setMiscReg(IPR_INTID, ipl);
-
-            Fault(new InterruptFault)->invoke(xcProxy);
-
-            DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
-                    cpuXC->readMiscReg(IPR_IPLR), ipl, summary);
-        }
-    }
-#endif
-
-    // maintain $r0 semantics
-    cpuXC->setIntReg(ZeroReg, 0);
-#if THE_ISA == ALPHA_ISA
-    cpuXC->setFloatReg(ZeroReg, 0.0);
-#endif // ALPHA_ISA
-
-    if (status() == IcacheAccessComplete) {
-        // We've already fetched an instruction and were stalled on an
-        // I-cache miss.  No need to fetch it again.
-
-        // Set status to running; tick event will get rescheduled if
-        // necessary at end of tick() function.
-        _status = Running;
-    } else {
-        // Try to fetch an instruction
-
-        // set up memory request for instruction fetch
-
-        DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",cpuXC->readPC(),
-                cpuXC->readNextPC(),cpuXC->readNextNPC());
-
-#if SIMPLE_CPU_MEM_TIMING
-        CpuRequest *ifetch_req = new CpuRequest();
-        ifetch_req->setSize(sizeof(MachInst));
-#endif
-
-        ifetch_req->resetMin();
-        ifetch_req->setVaddr(cpuXC->readPC() & ~3);
-        ifetch_req->setTime(curTick);
-#if FULL_SYSTEM
-        ifetch_req->setFlags((cpuXC->readPC() & 1) ? PHYSICAL : 0);
-#else
-        ifetch_req->setFlags(0);
-#endif
-
-        fault = cpuXC->translateInstReq(ifetch_req);
-
-        if (fault == NoFault) {
-#if SIMPLE_CPU_MEM_TIMING
-            Packet *ifetch_pkt = new Packet;
-            ifetch_pkt->cmd = Read;
-            ifetch_pkt->data = (uint8_t *)&inst;
-            ifetch_pkt->req = ifetch_req;
-            ifetch_pkt->size = sizeof(MachInst);
-#endif
-            ifetch_pkt->reset();
-            ifetch_pkt->addr = ifetch_req->getPaddr();
-
-            sendIcacheRequest(ifetch_pkt);
-#if SIMPLE_CPU_MEM_TIMING || SIMPLE_CPU_MEM_ATOMIC
-            return;
-#endif
-/*
-        if (icacheInterface && fault == NoFault) {
-            memReq->completionEvent = NULL;
-
-            memReq->time = curTick;
-            memReq->flags |= INST_READ;
-            MemAccessResult result = icacheInterface->access(memReq);
-
-            // Ugly hack to get an event scheduled *only* if the access is
-            // a miss.  We really should add first-class support for this
-            // at some point.
-                if (result != MA_HIT && icacheInterface->doEvents()) {
-                memReq->completionEvent = &cacheCompletionEvent;
-                lastIcacheStall = curTick;
-                unscheduleTickEvent();
-                _status = IcacheMissStall;
-                return;
-            }
-        }
-*/
-        }
-    }
-
-    // If we've got a valid instruction (i.e., no fault on instruction
-    // fetch), then execute it.
-    if (fault == NoFault) {
-
-        // keep an instruction count
-        numInst++;
-        numInsts++;
-
-        // check for instruction-count-based events
-        comInstEventQueue[0]->serviceEvents(numInst);
-
-        // decode the instruction
-        inst = gtoh(inst);
-        curStaticInst = StaticInst::decode(makeExtMI(inst, cpuXC->readPC()));
-
-        traceData = Trace::getInstRecord(curTick, xcProxy, this, curStaticInst,
-                                         cpuXC->readPC());
-
-        DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n",
-                curStaticInst->getName(),curStaticInst->getOpcode(), curStaticInst->machInst);
-
-#if FULL_SYSTEM
-        cpuXC->setInst(inst);
-#endif // FULL_SYSTEM
-
-        cpuXC->func_exe_inst++;
-
-        fault = curStaticInst->execute(this, traceData);
-
-#if FULL_SYSTEM
-        if (system->kernelBinning->fnbin) {
-            assert(kernelStats);
-            system->kernelBinning->execute(xcProxy, inst);
-        }
-
-        if (cpuXC->profile) {
-            bool usermode =
-                (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
-            cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
-            ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
-            if (node)
-                cpuXC->profileNode = node;
-        }
-#endif
-
-        if (curStaticInst->isMemRef()) {
-            numMemRefs++;
-        }
-
-        if (curStaticInst->isLoad()) {
-            ++numLoad;
-            comLoadEventQueue[0]->serviceEvents(numLoad);
-        }
-
-        // If we have a dcache miss, then we can't finialize the instruction
-        // trace yet because we want to populate it with the data later
-        if (traceData && (status() != DcacheWaitResponse)) {
-            traceData->finalize();
-        }
-
-        traceFunctions(cpuXC->readPC());
-
-    }	// if (fault == NoFault)
-
-    if (fault != NoFault) {
-#if FULL_SYSTEM
-        fault->invoke(xcProxy);
-#else // !FULL_SYSTEM
-        fatal("fault (%s) detected @ PC %08p", fault->name(), cpuXC->readPC());
-#endif // FULL_SYSTEM
-    }
-    else {
-#if THE_ISA == ALPHA_ISA
-        // go to the next instruction
-        cpuXC->setPC(cpuXC->readNextPC());
-        cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
-#else
-        // go to the next instruction
-        cpuXC->setPC(cpuXC->readNextPC());
-        cpuXC->setNextPC(cpuXC->readNextNPC());
-        cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
-#endif
-
-    }
-
-#if FULL_SYSTEM
-    Addr oldpc;
-    do {
-        oldpc = cpuXC->readPC();
-        system->pcEventQueue.service(xcProxy);
-    } while (oldpc != cpuXC->readPC());
-#endif
-
-    assert(status() == Running ||
-           status() == Idle ||
-           status() == DcacheWaitResponse);
-
-    if (status() == Running && !tickEvent.scheduled())
-        tickEvent.schedule(curTick + cycles(1));
-}
-
-////////////////////////////////////////////////////////////////////////
-//
-//  SimpleCPU Simulation Object
-//
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
-
-    Param<Counter> max_insts_any_thread;
-    Param<Counter> max_insts_all_threads;
-    Param<Counter> max_loads_any_thread;
-    Param<Counter> max_loads_all_threads;
-    SimObjectParam<MemObject *> mem;
-
-#if FULL_SYSTEM
-    SimObjectParam<AlphaITB *> itb;
-    SimObjectParam<AlphaDTB *> dtb;
-    SimObjectParam<System *> system;
-    Param<int> cpu_id;
-    Param<Tick> profile;
-#else
-    SimObjectParam<Process *> workload;
-#endif // FULL_SYSTEM
-
-    Param<int> clock;
-
-    Param<bool> defer_registration;
-    Param<int> width;
-    Param<bool> function_trace;
-    Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
-
-    INIT_PARAM(max_insts_any_thread,
-               "terminate when any thread reaches this inst count"),
-    INIT_PARAM(max_insts_all_threads,
-               "terminate when all threads have reached this inst count"),
-    INIT_PARAM(max_loads_any_thread,
-               "terminate when any thread reaches this load count"),
-    INIT_PARAM(max_loads_all_threads,
-               "terminate when all threads have reached this load count"),
-    INIT_PARAM(mem, "memory"),
-
-#if FULL_SYSTEM
-    INIT_PARAM(itb, "Instruction TLB"),
-    INIT_PARAM(dtb, "Data TLB"),
-    INIT_PARAM(system, "system object"),
-    INIT_PARAM(cpu_id, "processor ID"),
-    INIT_PARAM(profile, ""),
-#else
-    INIT_PARAM(workload, "processes to run"),
-#endif // FULL_SYSTEM
-
-    INIT_PARAM(clock, "clock speed"),
-    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
-    INIT_PARAM(width, "cpu width"),
-    INIT_PARAM(function_trace, "Enable function trace"),
-    INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
-
-
-CREATE_SIM_OBJECT(SimpleCPU)
-{
-    SimpleCPU::Params *params = new SimpleCPU::Params();
-    params->name = getInstanceName();
-    params->numberOfThreads = 1;
-    params->max_insts_any_thread = max_insts_any_thread;
-    params->max_insts_all_threads = max_insts_all_threads;
-    params->max_loads_any_thread = max_loads_any_thread;
-    params->max_loads_all_threads = max_loads_all_threads;
-    params->deferRegistration = defer_registration;
-    params->clock = clock;
-    params->functionTrace = function_trace;
-    params->functionTraceStart = function_trace_start;
-    params->width = width;
-    params->mem = mem;
-
-#if FULL_SYSTEM
-    params->itb = itb;
-    params->dtb = dtb;
-    params->system = system;
-    params->cpu_id = cpu_id;
-    params->profile = profile;
-#else
-    params->process = workload;
-#endif
-
-    SimpleCPU *cpu = new SimpleCPU(params);
-    return cpu;
-}
-
-REGISTER_SIM_OBJECT("SimpleCPU", SimpleCPU)
-
diff --git a/cpu/simple/timing.cc b/cpu/simple/timing.cc
new file mode 100644
index 000000000..a511c3dbb
--- /dev/null
+++ b/cpu/simple/timing.cc
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/utility.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/simple/timing.hh"
+#include "mem/packet_impl.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+using namespace TheISA;
+
+
+void
+TimingSimpleCPU::init()
+{
+    //Create Memory Ports (conect them up)
+    Port *mem_dport = mem->getPort("");
+    dcachePort.setPeer(mem_dport);
+    mem_dport->setPeer(&dcachePort);
+
+    Port *mem_iport = mem->getPort("");
+    icachePort.setPeer(mem_iport);
+    mem_iport->setPeer(&icachePort);
+
+    BaseCPU::init();
+#if FULL_SYSTEM
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+
+        // initialize CPU, including PC
+        TheISA::initCPU(xc, xc->readCpuId());
+    }
+#endif
+}
+
+Tick
+TimingSimpleCPU::CpuPort::recvAtomic(Packet &pkt)
+{
+    panic("TimingSimpleCPU doesn't expect recvAtomic callback!");
+    return curTick;
+}
+
+void
+TimingSimpleCPU::CpuPort::recvFunctional(Packet &pkt)
+{
+    panic("TimingSimpleCPU doesn't expect recvFunctional callback!");
+}
+
+void
+TimingSimpleCPU::CpuPort::recvStatusChange(Status status)
+{
+    panic("TimingSimpleCPU doesn't expect recvStatusChange callback!");
+}
+
+TimingSimpleCPU::TimingSimpleCPU(Params *p)
+    : BaseSimpleCPU(p), icachePort(this), dcachePort(this)
+{
+    _status = Idle;
+    ifetch_pkt = dcache_pkt = NULL;
+}
+
+
+TimingSimpleCPU::~TimingSimpleCPU()
+{
+}
+
+void
+TimingSimpleCPU::serialize(ostream &os)
+{
+    BaseSimpleCPU::serialize(os);
+    SERIALIZE_ENUM(_status);
+}
+
+void
+TimingSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    BaseSimpleCPU::unserialize(cp, section);
+    UNSERIALIZE_ENUM(_status);
+}
+
+void
+TimingSimpleCPU::switchOut(Sampler *s)
+{
+    sampler = s;
+    if (status() == Running) {
+        _status = SwitchedOut;
+    }
+    sampler->signalSwitched();
+}
+
+
+void
+TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
+
+    // if any of this CPU's ExecContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            break;
+        }
+    }
+}
+
+
+void
+TimingSimpleCPU::activateContext(int thread_num, int delay)
+{
+    assert(thread_num == 0);
+    assert(cpuXC);
+
+    assert(_status == Idle);
+
+    notIdleFraction++;
+    _status = Running;
+    // kick things off by initiating the fetch of the next instruction
+    Event *e =
+        new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true);
+    e->schedule(curTick + cycles(delay));
+}
+
+
+void
+TimingSimpleCPU::suspendContext(int thread_num)
+{
+    assert(thread_num == 0);
+    assert(cpuXC);
+
+    panic("TimingSimpleCPU::suspendContext not implemented");
+
+    assert(_status == Running);
+
+    notIdleFraction--;
+    _status = Idle;
+}
+
+
+template <class T>
+Fault
+TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
+{
+    Request *data_read_req = new Request(true);
+
+    data_read_req->setVaddr(addr);
+    data_read_req->setSize(sizeof(T));
+    data_read_req->setFlags(flags);
+    data_read_req->setTime(curTick);
+
+    if (traceData) {
+        traceData->setAddr(data_read_req->getVaddr());
+    }
+
+   // translate to physical address
+    Fault fault = cpuXC->translateDataReadReq(data_read_req);
+
+    // Now do the access.
+    if (fault == NoFault) {
+        Packet *data_read_pkt = new Packet;
+        data_read_pkt->cmd = Read;
+        data_read_pkt->req = data_read_req;
+        data_read_pkt->dataDynamic<T>(new T);
+        data_read_pkt->addr = data_read_req->getPaddr();
+        data_read_pkt->size = sizeof(T);
+        data_read_pkt->dest = Packet::Broadcast;
+
+        if (!dcachePort.sendTiming(*data_read_pkt)) {
+            _status = DcacheRetry;
+            dcache_pkt = data_read_pkt;
+        } else {
+            _status = DcacheWaitResponse;
+            dcache_pkt = NULL;
+        }
+    }
+
+    // This will need a new way to tell if it has a dcache attached.
+    if (data_read_req->getFlags() & UNCACHEABLE)
+        recordEvent("Uncached Read");
+
+    return fault;
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+TimingSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
+
+template
+Fault
+TimingSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
+
+template
+Fault
+TimingSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
+
+template
+Fault
+TimingSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+TimingSimpleCPU::read(Addr addr, double &data, unsigned flags)
+{
+    return read(addr, *(uint64_t*)&data, flags);
+}
+
+template<>
+Fault
+TimingSimpleCPU::read(Addr addr, float &data, unsigned flags)
+{
+    return read(addr, *(uint32_t*)&data, flags);
+}
+
+
+template<>
+Fault
+TimingSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
+{
+    return read(addr, (uint32_t&)data, flags);
+}
+
+
+template <class T>
+Fault
+TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+    Request *data_write_req = new Request(true);
+    data_write_req->setVaddr(addr);
+    data_write_req->setTime(curTick);
+    data_write_req->setSize(sizeof(T));
+    data_write_req->setFlags(flags);
+
+    // translate to physical address
+    Fault fault = cpuXC->translateDataWriteReq(data_write_req);
+    // Now do the access.
+    if (fault == NoFault) {
+        Packet *data_write_pkt = new Packet;
+        data_write_pkt->cmd = Write;
+        data_write_pkt->req = data_write_req;
+        data_write_pkt->allocate();
+        data_write_pkt->size = sizeof(T);
+        data_write_pkt->set(data);
+        data_write_pkt->addr = data_write_req->getPaddr();
+        data_write_pkt->dest = Packet::Broadcast;
+
+        if (!dcachePort.sendTiming(*data_write_pkt)) {
+            _status = DcacheRetry;
+            dcache_pkt = data_write_pkt;
+        } else {
+            _status = DcacheWaitResponse;
+            dcache_pkt = NULL;
+        }
+    }
+
+    // This will need a new way to tell if it's hooked up to a cache or not.
+    if (data_write_req->getFlags() & UNCACHEABLE)
+        recordEvent("Uncached Write");
+
+    // If the write needs to have a fault on the access, consider calling
+    // changeStatus() and changing it to "bad addr write" or something.
+    return fault;
+}
+
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template
+Fault
+TimingSimpleCPU::write(uint64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+TimingSimpleCPU::write(uint32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+TimingSimpleCPU::write(uint16_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+TimingSimpleCPU::write(uint8_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+TimingSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(*(uint64_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+TimingSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(*(uint32_t*)&data, addr, flags, res);
+}
+
+
+template<>
+Fault
+TimingSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write((uint32_t)data, addr, flags, res);
+}
+
+
+void
+TimingSimpleCPU::fetch()
+{
+    Request *ifetch_req = new Request(true);
+    ifetch_req->setSize(sizeof(MachInst));
+
+    ifetch_pkt = new Packet;
+    ifetch_pkt->cmd = Read;
+    ifetch_pkt->dataStatic(&inst);
+    ifetch_pkt->req = ifetch_req;
+    ifetch_pkt->size = sizeof(MachInst);
+    ifetch_pkt->dest = Packet::Broadcast;
+
+    Fault fault = setupFetchPacket(ifetch_pkt);
+    if (fault == NoFault) {
+        if (!icachePort.sendTiming(*ifetch_pkt)) {
+            // Need to wait for retry
+            _status = IcacheRetry;
+        } else {
+            // Need to wait for cache to respond
+            _status = IcacheWaitResponse;
+            // ownership of packet transferred to memory system
+            ifetch_pkt = NULL;
+        }
+    } else {
+        panic("TimingSimpleCPU fetch fault handling not implemented");
+    }
+}
+
+
+void
+TimingSimpleCPU::completeInst(Fault fault)
+{
+    postExecute();
+
+    if (traceData) {
+        traceData->finalize();
+    }
+
+    advancePC(fault);
+
+    fetch();
+}
+
+
+void
+TimingSimpleCPU::completeIfetch()
+{
+    // received a response from the icache: execute the received
+    // instruction
+    assert(_status == IcacheWaitResponse);
+    _status = Running;
+    preExecute();
+    if (curStaticInst->isMemRef()) {
+        // load or store: just send to dcache
+        Fault fault = curStaticInst->initiateAcc(this, traceData);
+        assert(fault == NoFault);
+        assert(_status == DcacheWaitResponse);
+    } else {
+        // non-memory instruction: execute completely now
+        Fault fault = curStaticInst->execute(this, traceData);
+        completeInst(fault);
+    }
+}
+
+
+bool
+TimingSimpleCPU::IcachePort::recvTiming(Packet &pkt)
+{
+    cpu->completeIfetch();
+    return true;
+}
+
+Packet *
+TimingSimpleCPU::IcachePort::recvRetry()
+{
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+    assert(cpu->ifetch_pkt != NULL);
+    assert(cpu->_status == IcacheRetry);
+    cpu->_status = IcacheWaitResponse;
+    Packet *tmp = cpu->ifetch_pkt;
+    cpu->ifetch_pkt = NULL;
+    return tmp;
+}
+
+void
+TimingSimpleCPU::completeDataAccess(Packet *pkt)
+{
+    // received a response from the dcache: complete the load or store
+    // instruction
+    assert(pkt->result == Success);
+    assert(_status == DcacheWaitResponse);
+    _status = Running;
+
+    Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
+
+    completeInst(fault);
+}
+
+
+
+bool
+TimingSimpleCPU::DcachePort::recvTiming(Packet &pkt)
+{
+    cpu->completeDataAccess(&pkt);
+    return true;
+}
+
+Packet *
+TimingSimpleCPU::DcachePort::recvRetry()
+{
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+    assert(cpu->dcache_pkt != NULL);
+    assert(cpu->_status == DcacheRetry);
+    cpu->_status = DcacheWaitResponse;
+    Packet *tmp = cpu->dcache_pkt;
+    cpu->dcache_pkt = NULL;
+    return tmp;
+}
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  TimingSimpleCPU Simulation Object
+//
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
+
+    Param<Counter> max_insts_any_thread;
+    Param<Counter> max_insts_all_threads;
+    Param<Counter> max_loads_any_thread;
+    Param<Counter> max_loads_all_threads;
+    SimObjectParam<MemObject *> mem;
+
+#if FULL_SYSTEM
+    SimObjectParam<AlphaITB *> itb;
+    SimObjectParam<AlphaDTB *> dtb;
+    SimObjectParam<System *> system;
+    Param<int> cpu_id;
+    Param<Tick> profile;
+#else
+    SimObjectParam<Process *> workload;
+#endif // FULL_SYSTEM
+
+    Param<int> clock;
+
+    Param<bool> defer_registration;
+    Param<int> width;
+    Param<bool> function_trace;
+    Param<Tick> function_trace_start;
+    Param<bool> simulate_stalls;
+
+END_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
+
+    INIT_PARAM(max_insts_any_thread,
+               "terminate when any thread reaches this inst count"),
+    INIT_PARAM(max_insts_all_threads,
+               "terminate when all threads have reached this inst count"),
+    INIT_PARAM(max_loads_any_thread,
+               "terminate when any thread reaches this load count"),
+    INIT_PARAM(max_loads_all_threads,
+               "terminate when all threads have reached this load count"),
+    INIT_PARAM(mem, "memory"),
+
+#if FULL_SYSTEM
+    INIT_PARAM(itb, "Instruction TLB"),
+    INIT_PARAM(dtb, "Data TLB"),
+    INIT_PARAM(system, "system object"),
+    INIT_PARAM(cpu_id, "processor ID"),
+    INIT_PARAM(profile, ""),
+#else
+    INIT_PARAM(workload, "processes to run"),
+#endif // FULL_SYSTEM
+
+    INIT_PARAM(clock, "clock speed"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+    INIT_PARAM(width, "cpu width"),
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace"),
+    INIT_PARAM(simulate_stalls, "Simulate cache stall cycles")
+
+END_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
+
+
+CREATE_SIM_OBJECT(TimingSimpleCPU)
+{
+    TimingSimpleCPU::Params *params = new TimingSimpleCPU::Params();
+    params->name = getInstanceName();
+    params->numberOfThreads = 1;
+    params->max_insts_any_thread = max_insts_any_thread;
+    params->max_insts_all_threads = max_insts_all_threads;
+    params->max_loads_any_thread = max_loads_any_thread;
+    params->max_loads_all_threads = max_loads_all_threads;
+    params->deferRegistration = defer_registration;
+    params->clock = clock;
+    params->functionTrace = function_trace;
+    params->functionTraceStart = function_trace_start;
+    params->mem = mem;
+
+#if FULL_SYSTEM
+    params->itb = itb;
+    params->dtb = dtb;
+    params->system = system;
+    params->cpu_id = cpu_id;
+    params->profile = profile;
+#else
+    params->process = workload;
+#endif
+
+    TimingSimpleCPU *cpu = new TimingSimpleCPU(params);
+    return cpu;
+}
+
+REGISTER_SIM_OBJECT("TimingSimpleCPU", TimingSimpleCPU)
+
diff --git a/cpu/simple/timing.hh b/cpu/simple/timing.hh
new file mode 100644
index 000000000..e1b564c69
--- /dev/null
+++ b/cpu/simple/timing.hh
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_SIMPLE_TIMING_HH__
+#define __CPU_SIMPLE_TIMING_HH__
+
+#include "cpu/simple/base.hh"
+
+class TimingSimpleCPU : public BaseSimpleCPU
+{
+  public:
+
+    struct Params : public BaseSimpleCPU::Params {
+    };
+
+    TimingSimpleCPU(Params *params);
+    virtual ~TimingSimpleCPU();
+
+    virtual void init();
+
+  public:
+    //
+    enum Status {
+        Idle,
+        Running,
+        IcacheRetry,
+        IcacheWaitResponse,
+        IcacheWaitSwitch,
+        DcacheRetry,
+        DcacheWaitResponse,
+        DcacheWaitSwitch,
+        SwitchedOut
+    };
+
+  protected:
+    Status _status;
+
+    Status status() const { return _status; }
+
+  private:
+
+    class CpuPort : public Port
+    {
+      protected:
+        TimingSimpleCPU *cpu;
+
+      public:
+
+        CpuPort(TimingSimpleCPU *_cpu)
+            : cpu(_cpu)
+        { }
+
+      protected:
+
+        virtual Tick recvAtomic(Packet &pkt);
+
+        virtual void recvFunctional(Packet &pkt);
+
+        virtual void recvStatusChange(Status status);
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+    };
+
+    class IcachePort : public CpuPort
+    {
+      public:
+
+        IcachePort(TimingSimpleCPU *_cpu)
+            : CpuPort(_cpu)
+        { }
+
+      protected:
+
+        virtual bool recvTiming(Packet &pkt);
+
+        virtual Packet *recvRetry();
+    };
+
+    class DcachePort : public CpuPort
+    {
+      public:
+
+        DcachePort(TimingSimpleCPU *_cpu)
+            : CpuPort(_cpu)
+        { }
+
+      protected:
+
+        virtual bool recvTiming(Packet &pkt);
+
+        virtual Packet *recvRetry();
+    };
+
+    IcachePort icachePort;
+    DcachePort dcachePort;
+
+    Packet *ifetch_pkt;
+    Packet *dcache_pkt;
+
+  public:
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+    void switchOut(Sampler *s);
+    void takeOverFrom(BaseCPU *oldCPU);
+
+    virtual void activateContext(int thread_num, int delay);
+    virtual void suspendContext(int thread_num);
+
+    template <class T>
+    Fault read(Addr addr, T &data, unsigned flags);
+
+    template <class T>
+    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+    void fetch();
+    void completeInst(Fault fault);
+    void completeIfetch();
+    void completeDataAccess(Packet *);
+};
+
+#endif // __CPU_SIMPLE_TIMING_HH__
diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh
index f0b75c10e..33c9144fb 100644
--- a/cpu/static_inst.hh
+++ b/cpu/static_inst.hh
@@ -43,12 +43,14 @@
 struct AlphaSimpleImpl;
 class ExecContext;
 class DynInst;
+class Packet;
 
 template <class Impl>
 class AlphaDynInst;
 
 class FastCPU;
-class SimpleCPU;
+class AtomicSimpleCPU;
+class TimingSimpleCPU;
 class InorderCPU;
 class SymbolTable;
 
diff --git a/mem/bus.cc b/mem/bus.cc
index acc941434..f84e38301 100644
--- a/mem/bus.cc
+++ b/mem/bus.cc
@@ -48,9 +48,16 @@ Bus::init()
 /** Function called by the port when the bus is recieving a Timing
  * transaction.*/
 bool
-Bus::recvTiming(Packet &pkt, int id)
+Bus::recvTiming(Packet &pkt)
 {
-    return findPort(pkt.addr, id)->sendTiming(pkt);
+    Port *port;
+    if (pkt.dest == Packet::Broadcast) {
+        port = findPort(pkt.addr, pkt.src);
+    } else {
+        assert(pkt.dest > 0 && pkt.dest < interfaces.size());
+        port = interfaces[pkt.dest];
+    }
+    return port->sendTiming(pkt);
 }
 
 Port *
@@ -82,17 +89,19 @@ Bus::findPort(Addr addr, int id)
 /** Function called by the port when the bus is recieving a Atomic
  * transaction.*/
 Tick
-Bus::recvAtomic(Packet &pkt, int id)
+Bus::recvAtomic(Packet &pkt)
 {
-    return findPort(pkt.addr, id)->sendAtomic(pkt);
+    assert(pkt.dest == Packet::Broadcast);
+    return findPort(pkt.addr, pkt.src)->sendAtomic(pkt);
 }
 
 /** Function called by the port when the bus is recieving a Functional
  * transaction.*/
 void
-Bus::recvFunctional(Packet &pkt, int id)
+Bus::recvFunctional(Packet &pkt)
 {
-    findPort(pkt.addr, id)->sendFunctional(pkt);
+    assert(pkt.dest == Packet::Broadcast);
+    findPort(pkt.addr, pkt.src)->sendFunctional(pkt);
 }
 
 /** Function called by the port when the bus is recieving a status change.*/
diff --git a/mem/bus.hh b/mem/bus.hh
index de9259a90..40d274037 100644
--- a/mem/bus.hh
+++ b/mem/bus.hh
@@ -57,15 +57,15 @@ class Bus : public MemObject
 
     /** Function called by the port when the bus is recieving a Timing
         transaction.*/
-    bool recvTiming(Packet &pkt, int id);
+    bool recvTiming(Packet &pkt);
 
     /** Function called by the port when the bus is recieving a Atomic
         transaction.*/
-    Tick recvAtomic(Packet &pkt, int id);
+    Tick recvAtomic(Packet &pkt);
 
     /** Function called by the port when the bus is recieving a Functional
         transaction.*/
-    void recvFunctional(Packet &pkt, int id);
+    void recvFunctional(Packet &pkt);
 
     /** Function called by the port when the bus is recieving a status change.*/
     void recvStatusChange(Port::Status status, int id);
@@ -77,8 +77,7 @@ class Bus : public MemObject
      *             loops)
      * @return pointer to port that the packet should be sent out of.
      */
-    Port *
-    Bus::findPort(Addr addr, int id);
+    Port *findPort(Addr addr, int id);
 
     /** Process address range request.
      * @param resp addresses that we can respond to
@@ -110,17 +109,17 @@ class Bus : public MemObject
         /** When reciving a timing request from the peer port (at id),
             pass it to the bus. */
         virtual bool recvTiming(Packet &pkt)
-        { return bus->recvTiming(pkt, id); }
+        { pkt.src = id; return bus->recvTiming(pkt); }
 
         /** When reciving a Atomic requestfrom the peer port (at id),
             pass it to the bus. */
         virtual Tick recvAtomic(Packet &pkt)
-        { return bus->recvAtomic(pkt, id); }
+        { pkt.src = id; return bus->recvAtomic(pkt); }
 
         /** When reciving a Functional requestfrom the peer port (at id),
             pass it to the bus. */
         virtual void recvFunctional(Packet &pkt)
-        { bus->recvFunctional(pkt, id); }
+        { pkt.src = id; bus->recvFunctional(pkt); }
 
         /** When reciving a status changefrom the peer port (at id),
             pass it to the bus. */
@@ -131,7 +130,8 @@ class Bus : public MemObject
         // downstream from this bus, yes?  That is, the union of all
         // the 'owned' address ranges of all the other interfaces on
         // this bus...
-        virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop)
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop)
         { bus->addressRanges(resp, snoop, id); }
 
         // Hack to make translating port work without changes
diff --git a/mem/packet.hh b/mem/packet.hh
index 69d00675d..ab961e304 100644
--- a/mem/packet.hh
+++ b/mem/packet.hh
@@ -121,6 +121,8 @@ struct Packet
     /** A index of the source of the transaction. */
     short src;
 
+    static const short Broadcast = -1;
+
     /** A index to the destination of the transaction. */
     short dest;
 
diff --git a/mem/physical.cc b/mem/physical.cc
index beb97307a..d0409995b 100644
--- a/mem/physical.cc
+++ b/mem/physical.cc
@@ -127,6 +127,7 @@ PhysicalMemory::doTimingAccess (Packet &pkt, MemoryPort* memoryPort)
 {
     doFunctionalAccess(pkt);
 
+    pkt.dest = pkt.src;
     MemResponseEvent* response = new MemResponseEvent(pkt, memoryPort);
     response->schedule(curTick + lat);
 
diff --git a/mem/port.cc b/mem/port.cc
index 32031d96c..5b1f634d6 100644
--- a/mem/port.cc
+++ b/mem/port.cc
@@ -31,6 +31,7 @@
  */
 
 #include "base/chunk_generator.hh"
+#include "mem/packet_impl.hh"
 #include "mem/port.hh"
 
 void
@@ -40,6 +41,7 @@ Port::blobHelper(Addr addr, uint8_t *p, int size, Command cmd)
     Packet pkt;
     pkt.req = &req;
     pkt.cmd = cmd;
+    pkt.dest = Packet::Broadcast;
 
     for (ChunkGenerator gen(addr, size, peerBlockSize());
          !gen.done(); gen.next()) {
diff --git a/python/m5/objects/PhysicalMemory.py b/python/m5/objects/PhysicalMemory.py
index e59e94e9b..c59910093 100644
--- a/python/m5/objects/PhysicalMemory.py
+++ b/python/m5/objects/PhysicalMemory.py
@@ -5,4 +5,4 @@ class PhysicalMemory(MemObject):
     type = 'PhysicalMemory'
     range = Param.AddrRange("Device Address")
     file = Param.String('', "memory mapped file")
-    latency = Param.Latency('10ns', "latency of an access")
+    latency = Param.Latency(Parent.clock, "latency of an access")
-- 
2.30.2