config,cpu: Add SMT support to Atomic and Timing CPUs
authorMitch Hayenga <mitch.hayenga@arm.com>
Wed, 30 Sep 2015 16:14:19 +0000 (11:14 -0500)
committerMitch Hayenga <mitch.hayenga@arm.com>
Wed, 30 Sep 2015 16:14:19 +0000 (11:14 -0500)
Adds SMT support to the "simple" CPU models so that they can be
used with other SMT-supported CPUs. Example usage: this enables
the TimingSimpleCPU to be used to warmup caches before swapping to
detailed mode with the in-order or out-of-order based CPU models.

configs/example/se.py
src/cpu/simple/atomic.cc
src/cpu/simple/atomic.hh
src/cpu/simple/base.cc
src/cpu/simple/base.hh
src/cpu/simple/exec_context.hh [new file with mode: 0644]
src/cpu/simple/timing.cc
src/cpu/simple/timing.hh
tests/quick/se/01.hello-2T-smt/test.py

index f0e84e69dc9cf5f05f4d501c89eefa96a90efe46..afd916cdb4cc56986fce1defdd8421ee22d967fb 100644 (file)
@@ -178,6 +178,9 @@ system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)],
                 mem_ranges = [AddrRange(options.mem_size)],
                 cache_line_size = options.cacheline_size)
 
+if numThreads > 1:
+    system.multi_thread = True
+
 # Create a top-level voltage domain
 system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
 
index 3777ddee90281b49aa0d6db973a87cc5c65181ca..6690c1da62316ef69e27a9815bb000ec0ef7aea6 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright 2014 Google, Inc.
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2013,2015 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -84,24 +84,11 @@ AtomicSimpleCPU::TickEvent::description() const
 void
 AtomicSimpleCPU::init()
 {
-    BaseCPU::init();
+    BaseSimpleCPU::init();
 
-    // Initialise the ThreadContext's memory proxies
-    tcBase()->initMemProxies(tcBase());
-
-    if (FullSystem && !params()->switched_out) {
-        ThreadID size = threadContexts.size();
-        for (ThreadID i = 0; i < size; ++i) {
-            ThreadContext *tc = threadContexts[i];
-            // initialize CPU, including PC
-            TheISA::initCPU(tc, tc->contextId());
-        }
-    }
-
-    // Atomic doesn't do MT right now, so contextId == threadId
-    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    ifetch_req.setThreadContext(_cpuId, 0);
+    data_read_req.setThreadContext(_cpuId, 0);
+    data_write_req.setThreadContext(_cpuId, 0);
 }
 
 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -131,12 +118,13 @@ AtomicSimpleCPU::drain()
         return DrainState::Drained;
 
     if (!isDrained()) {
-        DPRINTF(Drain, "Requesting drain: %s\n", pcState());
+        DPRINTF(Drain, "Requesting drain.\n");
         return DrainState::Draining;
     } else {
         if (tickEvent.scheduled())
             deschedule(tickEvent);
 
+        activeThreads.clear();
         DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
         return DrainState::Drained;
     }
@@ -153,16 +141,22 @@ AtomicSimpleCPU::drainResume()
     verifyMemoryMode();
 
     assert(!threadContexts.empty());
-    if (threadContexts.size() > 1)
-        fatal("The atomic CPU only supports one thread.\n");
 
-    if (thread->status() == ThreadContext::Active) {
-        schedule(tickEvent, nextCycle());
-        _status = BaseSimpleCPU::Running;
-        notIdleFraction = 1;
-    } else {
-        _status = BaseSimpleCPU::Idle;
-        notIdleFraction = 0;
+    _status = BaseSimpleCPU::Idle;
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
+            threadInfo[tid]->notIdleFraction = 1;
+            activeThreads.push_back(tid);
+            _status = BaseSimpleCPU::Running;
+
+            // Tick if any threads active
+            if (!tickEvent.scheduled()) {
+                schedule(tickEvent, nextCycle());
+            }
+        } else {
+            threadInfo[tid]->notIdleFraction = 0;
+        }
     }
 }
 
@@ -172,7 +166,7 @@ AtomicSimpleCPU::tryCompleteDrain()
     if (drainState() != DrainState::Draining)
         return false;
 
-    DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
+    DPRINTF(Drain, "tryCompleteDrain.\n");
     if (!isDrained())
         return false;
 
@@ -201,10 +195,6 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 
     // The tick event should have been descheduled by drain()
     assert(!tickEvent.scheduled());
-
-    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 }
 
 void
@@ -221,20 +211,23 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num)
 {
     DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
 
-    assert(thread_num == 0);
-    assert(thread);
-
-    assert(_status == Idle);
-    assert(!tickEvent.scheduled());
+    assert(thread_num < numThreads);
 
-    notIdleFraction = 1;
-    Cycles delta = ticksToCycles(thread->lastActivate - thread->lastSuspend);
+    threadInfo[thread_num]->notIdleFraction = 1;
+    Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
+                                 threadInfo[thread_num]->thread->lastSuspend);
     numCycles += delta;
     ppCycles->notify(delta);
 
-    //Make sure ticks are still on multiples of cycles
-    schedule(tickEvent, clockEdge(Cycles(0)));
+    if (!tickEvent.scheduled()) {
+        //Make sure ticks are still on multiples of cycles
+        schedule(tickEvent, clockEdge(Cycles(0)));
+    }
     _status = BaseSimpleCPU::Running;
+    if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
+        == activeThreads.end()) {
+        activeThreads.push_back(thread_num);
+    }
 }
 
 
@@ -243,21 +236,24 @@ AtomicSimpleCPU::suspendContext(ThreadID thread_num)
 {
     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 
-    assert(thread_num == 0);
-    assert(thread);
+    assert(thread_num < numThreads);
+    activeThreads.remove(thread_num);
 
     if (_status == Idle)
         return;
 
     assert(_status == BaseSimpleCPU::Running);
 
-    // tick event may not be scheduled if this gets called from inside
-    // an instruction's execution, e.g. "quiesce"
-    if (tickEvent.scheduled())
-        deschedule(tickEvent);
+    threadInfo[thread_num]->notIdleFraction = 0;
+
+    if (activeThreads.empty()) {
+        _status = Idle;
+
+        if (tickEvent.scheduled()) {
+            deschedule(tickEvent);
+        }
+    }
 
-    notIdleFraction = 0;
-    _status = Idle;
 }
 
 
@@ -269,7 +265,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
-    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
         cpu->wakeup();
     }
 
@@ -277,7 +273,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
     if (pkt->isInvalidate()) {
         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
                 pkt->getAddr());
-        TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
+        for (auto &t_info : cpu->threadInfo) {
+            TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+        }
     }
 
     return 0;
@@ -291,7 +289,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
-    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
         cpu->wakeup();
     }
 
@@ -299,7 +297,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
     if (pkt->isInvalidate()) {
         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
                 pkt->getAddr());
-        TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
+        for (auto &t_info : cpu->threadInfo) {
+            TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+        }
     }
 }
 
@@ -307,6 +307,9 @@ Fault
 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
                          unsigned size, unsigned flags)
 {
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     // use the CPU's statically allocated read request and packet objects
     Request *req = &data_read_req;
 
@@ -330,7 +333,8 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 
         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
+        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                          BaseTLB::Read);
 
         // Now do the access.
         if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
@@ -370,6 +374,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
                 assert(!locked);
                 locked = true;
             }
+
             return fault;
         }
 
@@ -391,7 +396,8 @@ Fault
 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
                           Addr addr, unsigned flags, uint64_t *res)
 {
-
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
     static uint8_t zero_array[64] = {};
 
     if (data == NULL) {
@@ -424,7 +430,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 
         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
+        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
 
         // Now do the access.
         if (fault == NoFault) {
@@ -477,6 +483,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
                 assert(locked);
                 locked = false;
             }
+
+
             if (fault != NoFault && req->isPrefetch()) {
                 return NoFault;
             } else {
@@ -503,6 +511,19 @@ AtomicSimpleCPU::tick()
 {
     DPRINTF(SimpleCPU, "Tick\n");
 
+    // Change thread if multi-threaded
+    swapActiveThread();
+
+    // Set memroy request ids to current thread
+    if (numThreads > 1) {
+        ifetch_req.setThreadContext(_cpuId, curThread);
+        data_read_req.setThreadContext(_cpuId, curThread);
+        data_write_req.setThreadContext(_cpuId, curThread);
+    }
+
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     Tick latency = 0;
 
     for (int i = 0; i < width || locked; ++i) {
@@ -529,7 +550,7 @@ AtomicSimpleCPU::tick()
         if (needToFetch) {
             ifetch_req.taskId(taskId());
             setupFetchRequest(&ifetch_req);
-            fault = thread->itb->translateAtomic(&ifetch_req, tc,
+            fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(),
                                                  BaseTLB::Execute);
         }
 
@@ -565,7 +586,7 @@ AtomicSimpleCPU::tick()
             preExecute();
 
             if (curStaticInst) {
-                fault = curStaticInst->execute(this, traceData);
+                fault = curStaticInst->execute(&t_info, traceData);
 
                 // keep an instruction count
                 if (fault == NoFault) {
@@ -601,7 +622,7 @@ AtomicSimpleCPU::tick()
             }
 
         }
-        if(fault != NoFault || !stayAtPC)
+        if(fault != NoFault || !t_info.stayAtPC)
             advancePC(fault);
     }
 
@@ -613,7 +634,7 @@ AtomicSimpleCPU::tick()
         latency = clockPeriod();
 
     if (_status != Idle)
-        schedule(tickEvent, curTick() + latency);
+        reschedule(tickEvent, curTick() + latency, true);
 }
 
 void
@@ -638,8 +659,5 @@ AtomicSimpleCPU::printAddr(Addr a)
 AtomicSimpleCPU *
 AtomicSimpleCPUParams::create()
 {
-    numThreads = 1;
-    if (!FullSystem && workload.size() != 1)
-        panic("only one workload allowed");
     return new AtomicSimpleCPU(this);
 }
index 3f587e6716541dc95c9ef4aa16d831635550d9e7..76ee9f897463305394837d02640286b33661f86c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2013,2015 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -44,6 +44,7 @@
 #define __CPU_SIMPLE_ATOMIC_HH__
 
 #include "cpu/simple/base.hh"
+#include "cpu/simple/exec_context.hh"
 #include "params/AtomicSimpleCPU.hh"
 #include "sim/probe/probe.hh"
 
@@ -96,9 +97,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU
      * </ul>
      */
     bool isDrained() {
-        return microPC() == 0 &&
+        SimpleExecContext &t_info = *threadInfo[curThread];
+
+        return t_info.thread->microPC() == 0 &&
             !locked &&
-            !stayAtPC;
+            !t_info.stayAtPC;
     }
 
     /**
index 2751a346cb4c4a18452109249afcb3f8645f7d9e..673cadd77bf6c34d87fbdae00d2cbcbfa39c30dc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012,2015 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -62,6 +62,7 @@
 #include "cpu/exetrace.hh"
 #include "cpu/pred/bpred_unit.hh"
 #include "cpu/profile.hh"
+#include "cpu/simple/exec_context.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/smt.hh"
 #include "cpu/static_inst.hh"
@@ -87,46 +88,121 @@ using namespace TheISA;
 
 BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
     : BaseCPU(p),
+      curThread(0),
       branchPred(p->branchPred),
-      traceData(NULL), thread(NULL), _status(Idle), interval_stats(false),
-      inst()
+      traceData(NULL),
+      inst(),
+      _status(Idle)
 {
-    if (FullSystem)
-        thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb,
-                                  p->isa[0]);
-    else
-        thread = new SimpleThread(this, /* thread_num */ 0, p->system,
-                                  p->workload[0], p->itb, p->dtb, p->isa[0]);
-
-    thread->setStatus(ThreadContext::Halted);
+    SimpleThread *thread;
 
-    tc = thread->getTC();
+    for (unsigned i = 0; i < numThreads; i++) {
+        if (FullSystem) {
+            thread = new SimpleThread(this, i, p->system,
+                                      p->itb, p->dtb, p->isa[i]);
+        } else {
+            thread = new SimpleThread(this, i, p->system, p->workload[i],
+                                      p->itb, p->dtb, p->isa[i]);
+        }
+        threadInfo.push_back(new SimpleExecContext(this, thread));
+        ThreadContext *tc = thread->getTC();
+        threadContexts.push_back(tc);
+    }
 
     if (p->checker) {
+        if (numThreads != 1)
+            fatal("Checker currently does not support SMT");
+
         BaseCPU *temp_checker = p->checker;
         checker = dynamic_cast<CheckerCPU *>(temp_checker);
         checker->setSystem(p->system);
         // Manipulate thread context
-        ThreadContext *cpu_tc = tc;
-        tc = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
+        ThreadContext *cpu_tc = threadContexts[0];
+        threadContexts[0] = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
     } else {
         checker = NULL;
     }
+}
+
+void
+BaseSimpleCPU::init()
+{
+    BaseCPU::init();
+
+    for (auto tc : threadContexts) {
+        // Initialise the ThreadContext's memory proxies
+        tc->initMemProxies(tc);
+
+        if (FullSystem && !params()->switched_out) {
+            // initialize CPU, including PC
+            TheISA::initCPU(tc, tc->contextId());
+        }
+    }
+}
+
+void
+BaseSimpleCPU::checkPcEventQueue()
+{
+    Addr oldpc, pc = threadInfo[curThread]->thread->instAddr();
+    do {
+        oldpc = pc;
+        system->pcEventQueue.service(threadContexts[curThread]);
+        pc = threadInfo[curThread]->thread->instAddr();
+    } while (oldpc != pc);
+}
+
+void
+BaseSimpleCPU::swapActiveThread()
+{
+    if (numThreads > 1) {
+        if ((!curStaticInst || !curStaticInst->isDelayedCommit()) &&
+             !threadInfo[curThread]->stayAtPC) {
+            // Swap active threads
+            if (!activeThreads.empty()) {
+                curThread = activeThreads.front();
+                activeThreads.pop_front();
+                activeThreads.push_back(curThread);
+            }
+        }
+    }
+}
+
+void
+BaseSimpleCPU::countInst()
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        t_info.numInst++;
+        t_info.numInsts++;
+    }
+    t_info.numOp++;
+    t_info.numOps++;
 
-    numInst = 0;
-    startNumInst = 0;
-    numOp = 0;
-    startNumOp = 0;
-    numLoad = 0;
-    startNumLoad = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
+    system->totalNumInsts++;
+    t_info.thread->funcExeInst++;
+}
+
+Counter
+BaseSimpleCPU::totalInsts() const
+{
+    Counter total_inst = 0;
+    for (auto& t_info : threadInfo) {
+        total_inst += t_info->numInst;
+    }
 
-    threadContexts.push_back(tc);
+    return total_inst;
+}
 
+Counter
+BaseSimpleCPU::totalOps() const
+{
+    Counter total_op = 0;
+    for (auto& t_info : threadInfo) {
+        total_op += t_info->numOp;
+    }
 
-    fetchOffset = 0;
-    stayAtPC = false;
+    return total_op;
 }
 
 BaseSimpleCPU::~BaseSimpleCPU()
@@ -148,177 +224,184 @@ BaseSimpleCPU::regStats()
 
     BaseCPU::regStats();
 
-    numInsts
-        .name(name() + ".committedInsts")
-        .desc("Number of instructions committed")
-        ;
-
-    numOps
-        .name(name() + ".committedOps")
-        .desc("Number of ops (including micro ops) committed")
-        ;
-
-    numIntAluAccesses
-        .name(name() + ".num_int_alu_accesses")
-        .desc("Number of integer alu accesses")
-        ;
-
-    numFpAluAccesses
-        .name(name() + ".num_fp_alu_accesses")
-        .desc("Number of float alu accesses")
-        ;
-
-    numCallsReturns
-        .name(name() + ".num_func_calls")
-        .desc("number of times a function call or return occured")
-        ;
-
-    numCondCtrlInsts
-        .name(name() + ".num_conditional_control_insts")
-        .desc("number of instructions that are conditional controls")
-        ;
-
-    numIntInsts
-        .name(name() + ".num_int_insts")
-        .desc("number of integer instructions")
-        ;
-
-    numFpInsts
-        .name(name() + ".num_fp_insts")
-        .desc("number of float instructions")
-        ;
-
-    numIntRegReads
-        .name(name() + ".num_int_register_reads")
-        .desc("number of times the integer registers were read")
-        ;
-
-    numIntRegWrites
-        .name(name() + ".num_int_register_writes")
-        .desc("number of times the integer registers were written")
-        ;
-
-    numFpRegReads
-        .name(name() + ".num_fp_register_reads")
-        .desc("number of times the floating registers were read")
-        ;
-
-    numFpRegWrites
-        .name(name() + ".num_fp_register_writes")
-        .desc("number of times the floating registers were written")
-        ;
-
-    numCCRegReads
-        .name(name() + ".num_cc_register_reads")
-        .desc("number of times the CC registers were read")
-        .flags(nozero)
-        ;
-
-    numCCRegWrites
-        .name(name() + ".num_cc_register_writes")
-        .desc("number of times the CC registers were written")
-        .flags(nozero)
-        ;
-
-    numMemRefs
-        .name(name()+".num_mem_refs")
-        .desc("number of memory refs")
-        ;
-
-    numStoreInsts
-        .name(name() + ".num_store_insts")
-        .desc("Number of store instructions")
-        ;
-
-    numLoadInsts
-        .name(name() + ".num_load_insts")
-        .desc("Number of load instructions")
-        ;
-
-    notIdleFraction
-        .name(name() + ".not_idle_fraction")
-        .desc("Percentage of non-idle cycles")
-        ;
-
-    idleFraction
-        .name(name() + ".idle_fraction")
-        .desc("Percentage of idle cycles")
-        ;
-
-    numBusyCycles
-        .name(name() + ".num_busy_cycles")
-        .desc("Number of busy cycles")
-        ;
-
-    numIdleCycles
-        .name(name()+".num_idle_cycles")
-        .desc("Number of idle cycles")
-        ;
-
-    icacheStallCycles
-        .name(name() + ".icache_stall_cycles")
-        .desc("ICache total stall cycles")
-        .prereq(icacheStallCycles)
-        ;
-
-    dcacheStallCycles
-        .name(name() + ".dcache_stall_cycles")
-        .desc("DCache total stall cycles")
-        .prereq(dcacheStallCycles)
-        ;
-
-    statExecutedInstType
-        .init(Enums::Num_OpClass)
-        .name(name() + ".op_class")
-        .desc("Class of executed instruction")
-        .flags(total | pdf | dist)
-        ;
-    for (unsigned i = 0; i < Num_OpClasses; ++i) {
-        statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
-    }
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        SimpleExecContext& t_info = *threadInfo[tid];
+
+        std::string thread_str = name();
+        if (numThreads > 1)
+            thread_str += ".thread" + std::to_string(tid);
+
+        t_info.numInsts
+            .name(thread_str + ".committedInsts")
+            .desc("Number of instructions committed")
+            ;
+
+        t_info.numOps
+            .name(thread_str + ".committedOps")
+            .desc("Number of ops (including micro ops) committed")
+            ;
+
+        t_info.numIntAluAccesses
+            .name(thread_str + ".num_int_alu_accesses")
+            .desc("Number of integer alu accesses")
+            ;
+
+        t_info.numFpAluAccesses
+            .name(thread_str + ".num_fp_alu_accesses")
+            .desc("Number of float alu accesses")
+            ;
+
+        t_info.numCallsReturns
+            .name(thread_str + ".num_func_calls")
+            .desc("number of times a function call or return occured")
+            ;
+
+        t_info.numCondCtrlInsts
+            .name(thread_str + ".num_conditional_control_insts")
+            .desc("number of instructions that are conditional controls")
+            ;
+
+        t_info.numIntInsts
+            .name(thread_str + ".num_int_insts")
+            .desc("number of integer instructions")
+            ;
+
+        t_info.numFpInsts
+            .name(thread_str + ".num_fp_insts")
+            .desc("number of float instructions")
+            ;
+
+        t_info.numIntRegReads
+            .name(thread_str + ".num_int_register_reads")
+            .desc("number of times the integer registers were read")
+            ;
+
+        t_info.numIntRegWrites
+            .name(thread_str + ".num_int_register_writes")
+            .desc("number of times the integer registers were written")
+            ;
+
+        t_info.numFpRegReads
+            .name(thread_str + ".num_fp_register_reads")
+            .desc("number of times the floating registers were read")
+            ;
+
+        t_info.numFpRegWrites
+            .name(thread_str + ".num_fp_register_writes")
+            .desc("number of times the floating registers were written")
+            ;
+
+        t_info.numCCRegReads
+            .name(thread_str + ".num_cc_register_reads")
+            .desc("number of times the CC registers were read")
+            .flags(nozero)
+            ;
+
+        t_info.numCCRegWrites
+            .name(thread_str + ".num_cc_register_writes")
+            .desc("number of times the CC registers were written")
+            .flags(nozero)
+            ;
+
+        t_info.numMemRefs
+            .name(thread_str + ".num_mem_refs")
+            .desc("number of memory refs")
+            ;
+
+        t_info.numStoreInsts
+            .name(thread_str + ".num_store_insts")
+            .desc("Number of store instructions")
+            ;
+
+        t_info.numLoadInsts
+            .name(thread_str + ".num_load_insts")
+            .desc("Number of load instructions")
+            ;
+
+        t_info.notIdleFraction
+            .name(thread_str + ".not_idle_fraction")
+            .desc("Percentage of non-idle cycles")
+            ;
+
+        t_info.idleFraction
+            .name(thread_str + ".idle_fraction")
+            .desc("Percentage of idle cycles")
+            ;
+
+        t_info.numBusyCycles
+            .name(thread_str + ".num_busy_cycles")
+            .desc("Number of busy cycles")
+            ;
+
+        t_info.numIdleCycles
+            .name(thread_str + ".num_idle_cycles")
+            .desc("Number of idle cycles")
+            ;
+
+        t_info.icacheStallCycles
+            .name(thread_str + ".icache_stall_cycles")
+            .desc("ICache total stall cycles")
+            .prereq(t_info.icacheStallCycles)
+            ;
+
+        t_info.dcacheStallCycles
+            .name(thread_str + ".dcache_stall_cycles")
+            .desc("DCache total stall cycles")
+            .prereq(t_info.dcacheStallCycles)
+            ;
+
+        t_info.statExecutedInstType
+            .init(Enums::Num_OpClass)
+            .name(thread_str + ".op_class")
+            .desc("Class of executed instruction")
+            .flags(total | pdf | dist)
+            ;
+
+        for (unsigned i = 0; i < Num_OpClasses; ++i) {
+            t_info.statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
+        }
 
-    idleFraction = constant(1.0) - notIdleFraction;
-    numIdleCycles = idleFraction * numCycles;
-    numBusyCycles = (notIdleFraction)*numCycles;
+        t_info.idleFraction = constant(1.0) - t_info.notIdleFraction;
+        t_info.numIdleCycles = t_info.idleFraction * numCycles;
+        t_info.numBusyCycles = t_info.notIdleFraction * numCycles;
 
-    numBranches
-        .name(name() + ".Branches")
-        .desc("Number of branches fetched")
-        .prereq(numBranches);
+        t_info.numBranches
+            .name(thread_str + ".Branches")
+            .desc("Number of branches fetched")
+            .prereq(t_info.numBranches);
 
-    numPredictedBranches
-        .name(name() + ".predictedBranches")
-        .desc("Number of branches predicted as taken")
-        .prereq(numPredictedBranches);
+        t_info.numPredictedBranches
+            .name(thread_str + ".predictedBranches")
+            .desc("Number of branches predicted as taken")
+            .prereq(t_info.numPredictedBranches);
 
-    numBranchMispred
-        .name(name() + ".BranchMispred")
-        .desc("Number of branch mispredictions")
-        .prereq(numBranchMispred);
+        t_info.numBranchMispred
+            .name(thread_str + ".BranchMispred")
+            .desc("Number of branch mispredictions")
+            .prereq(t_info.numBranchMispred);
+    }
 }
 
 void
 BaseSimpleCPU::resetStats()
 {
-//    startNumInst = numInst;
-     notIdleFraction = (_status != Idle);
+    for (auto &thread_info : threadInfo) {
+        thread_info->notIdleFraction = (_status != Idle);
+    }
 }
 
 void
 BaseSimpleCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
 {
     assert(_status == Idle || _status == Running);
-    assert(tid == 0);
 
-    thread->serialize(cp);
+    threadInfo[tid]->thread->serialize(cp);
 }
 
 void
 BaseSimpleCPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
 {
-    if (tid != 0)
-        fatal("Trying to load more than one thread into a SimpleCPU\n");
-    thread->unserialize(cp);
+    threadInfo[tid]->thread->unserialize(cp);
 }
 
 void
@@ -329,29 +412,34 @@ change_thread_state(ThreadID tid, int activate, int priority)
 Addr
 BaseSimpleCPU::dbg_vtophys(Addr addr)
 {
-    return vtophys(tc, addr);
+    return vtophys(threadContexts[curThread], addr);
 }
 
 void
 BaseSimpleCPU::wakeup()
 {
-    getAddrMonitor()->gotWakeup = true;
+    getCpuAddrMonitor()->gotWakeup = true;
 
-    if (thread->status() != ThreadContext::Suspended)
-        return;
-
-    DPRINTF(Quiesce,"Suspended Processor awoke\n");
-    thread->activate();
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
+            DPRINTF(Quiesce,"Suspended Processor awoke\n");
+            threadInfo[tid]->thread->activate();
+        }
+    }
 }
 
 void
 BaseSimpleCPU::checkForInterrupts()
 {
+    SimpleExecContext&t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+    ThreadContext* tc = thread->getTC();
+
     if (checkInterrupts(tc)) {
         Fault interrupt = interrupts->getInterrupt(tc);
 
         if (interrupt != NoFault) {
-            fetchOffset = 0;
+            t_info.fetchOffset = 0;
             interrupts->updateIntrInfo(tc);
             interrupt->invoke(tc);
             thread->decoder.reset();
@@ -363,12 +451,15 @@ BaseSimpleCPU::checkForInterrupts()
 void
 BaseSimpleCPU::setupFetchRequest(Request *req)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     Addr instAddr = thread->instAddr();
 
     // set up memory request for instruction fetch
     DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr);
 
-    Addr fetchPC = (instAddr & PCMask) + fetchOffset;
+    Addr fetchPC = (instAddr & PCMask) + t_info.fetchOffset;
     req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(),
             instAddr);
 }
@@ -377,6 +468,9 @@ BaseSimpleCPU::setupFetchRequest(Request *req)
 void
 BaseSimpleCPU::preExecute()
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     // maintain $r0 semantics
     thread->setIntReg(ZeroReg, 0);
 #if THE_ISA == ALPHA_ISA
@@ -384,7 +478,7 @@ BaseSimpleCPU::preExecute()
 #endif // ALPHA_ISA
 
     // check for instruction-count-based events
-    comInstEventQueue[0]->serviceEvents(numInst);
+    comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
     system->instEventQueue.serviceEvents(system->totalNumInsts);
 
     // decode the instruction
@@ -393,7 +487,7 @@ BaseSimpleCPU::preExecute()
     TheISA::PCState pcState = thread->pcState();
 
     if (isRomMicroPC(pcState.microPC())) {
-        stayAtPC = false;
+        t_info.stayAtPC = false;
         curStaticInst = microcodeRom.fetchMicroop(pcState.microPC(),
                                                   curMacroStaticInst);
     } else if (!curMacroStaticInst) {
@@ -404,7 +498,7 @@ BaseSimpleCPU::preExecute()
 
         //Predecode, ie bundle up an ExtMachInst
         //If more fetch data is needed, pass it in.
-        Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
+        Addr fetchPC = (pcState.instAddr() & PCMask) + t_info.fetchOffset;
         //if(decoder->needMoreBytes())
             decoder->moreBytes(pcState, fetchPC, inst);
         //else
@@ -414,18 +508,19 @@ BaseSimpleCPU::preExecute()
         //fetch beyond the MachInst at the current pc.
         instPtr = decoder->decode(pcState);
         if (instPtr) {
-            stayAtPC = false;
+            t_info.stayAtPC = false;
             thread->pcState(pcState);
         } else {
-            stayAtPC = true;
-            fetchOffset += sizeof(MachInst);
+            t_info.stayAtPC = true;
+            t_info.fetchOffset += sizeof(MachInst);
         }
 
         //If we decoded an instruction and it's microcoded, start pulling
         //out micro ops
         if (instPtr && instPtr->isMacroop()) {
             curMacroStaticInst = instPtr;
-            curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC());
+            curStaticInst =
+                curMacroStaticInst->fetchMicroop(pcState.microPC());
         } else {
             curStaticInst = instPtr;
         }
@@ -437,7 +532,7 @@ BaseSimpleCPU::preExecute()
     //If we decoded an instruction this "tick", record information about it.
     if (curStaticInst) {
 #if TRACING_ON
-        traceData = tracer->getInstRecord(curTick(), tc,
+        traceData = tracer->getInstRecord(curTick(), thread->getTC(),
                 curStaticInst, thread->pcState(), curMacroStaticInst);
 
         DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n",
@@ -445,86 +540,91 @@ BaseSimpleCPU::preExecute()
 #endif // TRACING_ON
     }
 
-    if (branchPred && curStaticInst && curStaticInst->isControl()) {
+    if (branchPred && curStaticInst &&
+        curStaticInst->isControl()) {
         // Use a fake sequence number since we only have one
         // instruction in flight at the same time.
         const InstSeqNum cur_sn(0);
-        const ThreadID tid(0);
-        pred_pc = thread->pcState();
+        t_info.predPC = thread->pcState();
         const bool predict_taken(
-            branchPred->predict(curStaticInst, cur_sn, pred_pc, tid));
+            branchPred->predict(curStaticInst, cur_sn, t_info.predPC,
+                                curThread));
 
         if (predict_taken)
-            ++numPredictedBranches;
+            ++t_info.numPredictedBranches;
     }
 }
 
 void
 BaseSimpleCPU::postExecute()
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     assert(curStaticInst);
 
-    TheISA::PCState pc = tc->pcState();
+    TheISA::PCState pc = threadContexts[curThread]->pcState();
     Addr instAddr = pc.instAddr();
     if (FullSystem && thread->profile) {
-        bool usermode = TheISA::inUserMode(tc);
+        bool usermode = TheISA::inUserMode(threadContexts[curThread]);
         thread->profilePC = usermode ? 1 : instAddr;
-        ProfileNode *node = thread->profile->consume(tc, curStaticInst);
+        ProfileNode *node = thread->profile->consume(threadContexts[curThread],
+                                                     curStaticInst);
         if (node)
             thread->profileNode = node;
     }
 
     if (curStaticInst->isMemRef()) {
-        numMemRefs++;
+        t_info.numMemRefs++;
     }
 
     if (curStaticInst->isLoad()) {
-        ++numLoad;
-        comLoadEventQueue[0]->serviceEvents(numLoad);
+        ++t_info.numLoad;
+        comLoadEventQueue[curThread]->serviceEvents(t_info.numLoad);
     }
 
     if (CPA::available()) {
-        CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr());
+        CPA::cpa()->swAutoBegin(threadContexts[curThread], pc.nextInstAddr());
     }
 
     if (curStaticInst->isControl()) {
-        ++numBranches;
+        ++t_info.numBranches;
     }
 
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
-        numIntAluAccesses++;
-        numIntInsts++;
+        t_info.numIntAluAccesses++;
+        t_info.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
-        numFpAluAccesses++;
-        numFpInsts++;
+        t_info.numFpAluAccesses++;
+        t_info.numFpInsts++;
     }
-    
+
     //number of function calls/returns to get window accesses
     if (curStaticInst->isCall() || curStaticInst->isReturn()){
-        numCallsReturns++;
+        t_info.numCallsReturns++;
     }
-    
+
     //the number of branch predictions that will be made
     if (curStaticInst->isCondCtrl()){
-        numCondCtrlInsts++;
+        t_info.numCondCtrlInsts++;
     }
-    
+
     //result bus acceses
     if (curStaticInst->isLoad()){
-        numLoadInsts++;
+        t_info.numLoadInsts++;
     }
-    
+
     if (curStaticInst->isStore()){
-        numStoreInsts++;
+        t_info.numStoreInsts++;
     }
     /* End power model statistics */
 
-    statExecutedInstType[curStaticInst->opClass()]++;
+    t_info.statExecutedInstType[curStaticInst->opClass()]++;
 
     if (FullSystem)
         traceFunctions(instAddr);
@@ -542,13 +642,16 @@ BaseSimpleCPU::postExecute()
 void
 BaseSimpleCPU::advancePC(const Fault &fault)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     const bool branching(thread->pcState().branching());
 
     //Since we're moving to a new pc, zero out the offset
-    fetchOffset = 0;
+    t_info.fetchOffset = 0;
     if (fault != NoFault) {
         curMacroStaticInst = StaticInst::nullStaticInstPtr;
-        fault->invoke(tc, curStaticInst);
+        fault->invoke(threadContexts[curThread], curStaticInst);
         thread->decoder.reset();
     } else {
         if (curStaticInst) {
@@ -564,16 +667,14 @@ BaseSimpleCPU::advancePC(const Fault &fault)
         // Use a fake sequence number since we only have one
         // instruction in flight at the same time.
         const InstSeqNum cur_sn(0);
-        const ThreadID tid(0);
 
-        if (pred_pc == thread->pcState()) {
+        if (t_info.predPC == thread->pcState()) {
             // Correctly predicted branch
-            branchPred->update(cur_sn, tid);
+            branchPred->update(cur_sn, curThread);
         } else {
             // Mis-predicted branch
-            branchPred->squash(cur_sn, pcState(),
-                               branching, tid);
-            ++numBranchMispred;
+            branchPred->squash(cur_sn, thread->pcState(), branching, curThread);
+            ++t_info.numBranchMispred;
         }
     }
 }
@@ -582,5 +683,6 @@ void
 BaseSimpleCPU::startup()
 {
     BaseCPU::startup();
-    thread->startup();
+    for (auto& t_info : threadInfo)
+        t_info->thread->startup();
 }
index 2f72470109e07a4b723dbb2fe118f365431b4fbf..c108cb986d44ecfdf998c20236e52d637126c380 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012,2015 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -79,57 +79,35 @@ namespace Trace {
 
 struct BaseSimpleCPUParams;
 class BPredUnit;
+class SimpleExecContext;
 
-class BaseSimpleCPU : public BaseCPU, public ExecContext
+class BaseSimpleCPU : public BaseCPU
 {
   protected:
-    typedef TheISA::MiscReg MiscReg;
-    typedef TheISA::FloatReg FloatReg;
-    typedef TheISA::FloatRegBits FloatRegBits;
-    typedef TheISA::CCReg CCReg;
-
+    ThreadID curThread;
     BPredUnit *branchPred;
 
-  protected:
-    Trace::InstRecord *traceData;
-
-    inline void checkPcEventQueue() {
-        Addr oldpc, pc = thread->instAddr();
-        do {
-            oldpc = pc;
-            system->pcEventQueue.service(tc);
-            pc = thread->instAddr();
-        } while (oldpc != pc);
-    }
-
-  public:
-    void wakeup();
-
-    void zero_fill_64(Addr addr) {
-      static int warned = 0;
-      if (!warned) {
-        warn ("WH64 is not implemented");
-        warned = 1;
-      }
-    };
+    void checkPcEventQueue();
+    void swapActiveThread();
 
   public:
     BaseSimpleCPU(BaseSimpleCPUParams *params);
     virtual ~BaseSimpleCPU();
-
+    void wakeup();
+    virtual void init();
   public:
-    /** SimpleThread object, provides all the architectural state. */
-    SimpleThread *thread;
+    Trace::InstRecord *traceData;
+    CheckerCPU *checker;
 
-    /** ThreadContext object, provides an interface for external
-     * objects to modify this thread's state.
-     */
-    ThreadContext *tc;
+    std::vector<SimpleExecContext*> threadInfo;
+    std::list<ThreadID> activeThreads;
 
-    CheckerCPU *checker;
+    /** Current instruction */
+    TheISA::MachInst inst;
+    StaticInstPtr curStaticInst;
+    StaticInstPtr curMacroStaticInst;
 
   protected:
-
     enum Status {
         Idle,
         Running,
@@ -147,22 +125,8 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
     Status _status;
 
   public:
-
     Addr dbg_vtophys(Addr addr);
 
-    bool interval_stats;
-
-    // current instruction
-    TheISA::MachInst inst;
-
-    StaticInstPtr curStaticInst;
-    StaticInstPtr curMacroStaticInst;
-
-    //This is the offset from the current pc that fetch should be performed at
-    Addr fetchOffset;
-    //This flag says to stay at the current pc. This is useful for
-    //instructions which go beyond MachInst boundaries.
-    bool stayAtPC;
 
     void checkForInterrupts();
     void setupFetchRequest(Request *req);
@@ -178,289 +142,20 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
 
     virtual void startup();
 
-    // number of simulated instructions
-    Counter numInst;
-    Counter startNumInst;
-    Stats::Scalar numInsts;
-    Counter numOp;
-    Counter startNumOp;
-    Stats::Scalar numOps;
-
-    void countInst()
-    {
-        if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
-            numInst++;
-            numInsts++;
-        }
-        numOp++;
-        numOps++;
-
-        system->totalNumInsts++;
-        thread->funcExeInst++;
-    }
-
-    virtual Counter totalInsts() const
-    {
-        return numInst - startNumInst;
-    }
-
-    virtual Counter totalOps() const
-    {
-        return numOp - startNumOp;
-    }
-
-    //number of integer alu accesses
-    Stats::Scalar numIntAluAccesses;
-
-    //number of float alu accesses
-    Stats::Scalar numFpAluAccesses;
-
-    //number of function calls/returns
-    Stats::Scalar numCallsReturns;
-
-    //conditional control instructions;
-    Stats::Scalar numCondCtrlInsts;
-
-    //number of int instructions
-    Stats::Scalar numIntInsts;
-
-    //number of float instructions
-    Stats::Scalar numFpInsts;
-
-    //number of integer register file accesses
-    Stats::Scalar numIntRegReads;
-    Stats::Scalar numIntRegWrites;
-
-    //number of float register file accesses
-    Stats::Scalar numFpRegReads;
-    Stats::Scalar numFpRegWrites;
-
-    //number of condition code register file accesses
-    Stats::Scalar numCCRegReads;
-    Stats::Scalar numCCRegWrites;
+    virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
+                          unsigned flags) = 0;
 
-    // number of simulated memory references
-    Stats::Scalar numMemRefs;
-    Stats::Scalar numLoadInsts;
-    Stats::Scalar numStoreInsts;
+    virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
+                           unsigned flags, uint64_t* res) = 0;
 
-    // number of idle cycles
-    Stats::Formula numIdleCycles;
-
-    // number of busy cycles
-    Stats::Formula numBusyCycles;
-
-    // number of simulated loads
-    Counter numLoad;
-    Counter startNumLoad;
-
-    // number of idle cycles
-    Stats::Average notIdleFraction;
-    Stats::Formula idleFraction;
-
-    // number of cycles stalled for I-cache responses
-    Stats::Scalar icacheStallCycles;
-    Counter lastIcacheStall;
-
-    // number of cycles stalled for D-cache responses
-    Stats::Scalar dcacheStallCycles;
-    Counter lastDcacheStall;
-
-    /// @{
-    /// Total number of branches fetched
-    Stats::Scalar numBranches;
-    /// Number of branches predicted as taken
-    Stats::Scalar numPredictedBranches;
-    /// Number of misprediced branches
-    Stats::Scalar numBranchMispred;
-    /// @}
-
-    // instruction mix histogram by OpClass
-    Stats::Vector statExecutedInstType;
+    void countInst();
+    virtual Counter totalInsts() const;
+    virtual Counter totalOps() const;
 
     void serializeThread(CheckpointOut &cp,
                          ThreadID tid) const M5_ATTR_OVERRIDE;
     void unserializeThread(CheckpointIn &cp, ThreadID tid) M5_ATTR_OVERRIDE;
 
-    // These functions are only used in CPU models that split
-    // effective address computation from the actual memory access.
-    void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); }
-    Addr getEA() const  { panic("BaseSimpleCPU::getEA() not implemented\n"); }
-
-    // The register accessor methods provide the index of the
-    // instruction's operand (e.g., 0 or 1), not the architectural
-    // register index, to simplify the implementation of register
-    // renaming.  We find the architectural register index by indexing
-    // into the instruction's own operand index table.  Note that a
-    // raw pointer to the StaticInst is provided instead of a
-    // ref-counted StaticInstPtr to redice overhead.  This is fine as
-    // long as these methods don't copy the pointer into any long-term
-    // storage (which is pretty hard to imagine they would have reason
-    // to do).
-
-    IntReg readIntRegOperand(const StaticInst *si, int idx)
-    {
-        numIntRegReads++;
-        return thread->readIntReg(si->srcRegIdx(idx));
-    }
-
-    FloatReg readFloatRegOperand(const StaticInst *si, int idx)
-    {
-        numFpRegReads++;
-        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
-        return thread->readFloatReg(reg_idx);
-    }
-
-    FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
-    {
-        numFpRegReads++;
-        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
-        return thread->readFloatRegBits(reg_idx);
-    }
-
-    CCReg readCCRegOperand(const StaticInst *si, int idx)
-    {
-        numCCRegReads++;
-        int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base;
-        return thread->readCCReg(reg_idx);
-    }
-
-    void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
-    {
-        numIntRegWrites++;
-        thread->setIntReg(si->destRegIdx(idx), val);
-    }
-
-    void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
-    {
-        numFpRegWrites++;
-        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
-        thread->setFloatReg(reg_idx, val);
-    }
-
-    void setFloatRegOperandBits(const StaticInst *si, int idx,
-                                FloatRegBits val)
-    {
-        numFpRegWrites++;
-        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
-        thread->setFloatRegBits(reg_idx, val);
-    }
-
-    void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
-    {
-        numCCRegWrites++;
-        int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base;
-        thread->setCCReg(reg_idx, val);
-    }
-
-    bool readPredicate() { return thread->readPredicate(); }
-    void setPredicate(bool val)
-    {
-        thread->setPredicate(val);
-        if (traceData) {
-            traceData->setPredicate(val);
-        }
-    }
-    TheISA::PCState pcState() const { return thread->pcState(); }
-    void pcState(const TheISA::PCState &val) { thread->pcState(val); }
-    Addr instAddr() { return thread->instAddr(); }
-    Addr nextInstAddr() { return thread->nextInstAddr(); }
-    MicroPC microPC() { return thread->microPC(); }
-
-    MiscReg readMiscRegNoEffect(int misc_reg) const
-    {
-        return thread->readMiscRegNoEffect(misc_reg);
-    }
-
-    MiscReg readMiscReg(int misc_reg)
-    {
-        numIntRegReads++;
-        return thread->readMiscReg(misc_reg);
-    }
-
-    void setMiscReg(int misc_reg, const MiscReg &val)
-    {
-        numIntRegWrites++;
-        return thread->setMiscReg(misc_reg, val);
-    }
-
-    MiscReg readMiscRegOperand(const StaticInst *si, int idx)
-    {
-        numIntRegReads++;
-        int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base;
-        return thread->readMiscReg(reg_idx);
-    }
-
-    void setMiscRegOperand(
-            const StaticInst *si, int idx, const MiscReg &val)
-    {
-        numIntRegWrites++;
-        int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base;
-        return thread->setMiscReg(reg_idx, val);
-    }
-
-    void demapPage(Addr vaddr, uint64_t asn)
-    {
-        thread->demapPage(vaddr, asn);
-    }
-
-    void demapInstPage(Addr vaddr, uint64_t asn)
-    {
-        thread->demapInstPage(vaddr, asn);
-    }
-
-    void demapDataPage(Addr vaddr, uint64_t asn)
-    {
-        thread->demapDataPage(vaddr, asn);
-    }
-
-    unsigned int readStCondFailures() const {
-        return thread->readStCondFailures();
-    }
-
-    void setStCondFailures(unsigned int sc_failures) {
-        thread->setStCondFailures(sc_failures);
-    }
-
-    MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID)
-    {
-        panic("Simple CPU models do not support multithreaded "
-              "register access.\n");
-    }
-
-    void setRegOtherThread(int regIdx, MiscReg val,
-                           ThreadID tid = InvalidThreadID)
-    {
-        panic("Simple CPU models do not support multithreaded "
-              "register access.\n");
-    }
-
-    //Fault CacheOp(uint8_t Op, Addr EA);
-
-    Fault hwrei() { return thread->hwrei(); }
-    bool simPalCheck(int palFunc) { return thread->simPalCheck(palFunc); }
-
-    void
-    syscall(int64_t callnum)
-    {
-        if (FullSystem)
-            panic("Syscall emulation isn't available in FS mode.\n");
-
-        thread->syscall(callnum);
-    }
-
-    ThreadContext *tcBase() { return tc; }
-
-  private:
-    TheISA::PCState pred_pc;
-
-  public:
-    // monitor/mwait funtions
-    void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
-    bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
-    void mwaitAtomic(ThreadContext *tc)
-    { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
-    AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
 };
 
 #endif // __CPU_SIMPLE_BASE_HH__
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
new file mode 100644 (file)
index 0000000..f474cc3
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 2014-2015 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Kevin Lim
+ *          Andreas Sandberg
+ *          Mitch Hayenga
+ */
+
+#ifndef __CPU_SIMPLE_EXEC_CONTEXT_HH__
+#define __CPU_SIMPLE_EXEC_CONTEXT_HH__
+
+#include "arch/registers.hh"
+#include "base/types.hh"
+#include "config/the_isa.hh"
+#include "cpu/base.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/simple/base.hh"
+#include "cpu/static_inst_fwd.hh"
+#include "cpu/translation.hh"
+
+class BaseSimpleCPU;
+
+class SimpleExecContext : public ExecContext {
+  protected:
+    typedef TheISA::MiscReg MiscReg;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
+    typedef TheISA::CCReg CCReg;
+
+  public:
+    BaseSimpleCPU *cpu;
+    SimpleThread* thread;
+
+    // This is the offset from the current pc that fetch should be performed
+    Addr fetchOffset;
+    // This flag says to stay at the current pc. This is useful for
+    // instructions which go beyond MachInst boundaries.
+    bool stayAtPC;
+
+    // Branch prediction
+    TheISA::PCState predPC;
+
+    /** PER-THREAD STATS */
+
+    // Number of simulated instructions
+    Counter numInst;
+    Stats::Scalar numInsts;
+    Counter numOp;
+    Stats::Scalar numOps;
+
+    // Number of integer alu accesses
+    Stats::Scalar numIntAluAccesses;
+
+    // Number of float alu accesses
+    Stats::Scalar numFpAluAccesses;
+
+    // Number of function calls/returns
+    Stats::Scalar numCallsReturns;
+
+    // Conditional control instructions;
+    Stats::Scalar numCondCtrlInsts;
+
+    // Number of int instructions
+    Stats::Scalar numIntInsts;
+
+    // Number of float instructions
+    Stats::Scalar numFpInsts;
+
+    // Number of integer register file accesses
+    Stats::Scalar numIntRegReads;
+    Stats::Scalar numIntRegWrites;
+
+    // Number of float register file accesses
+    Stats::Scalar numFpRegReads;
+    Stats::Scalar numFpRegWrites;
+
+    // Number of condition code register file accesses
+    Stats::Scalar numCCRegReads;
+    Stats::Scalar numCCRegWrites;
+
+    // Number of simulated memory references
+    Stats::Scalar numMemRefs;
+    Stats::Scalar numLoadInsts;
+    Stats::Scalar numStoreInsts;
+
+    // Number of idle cycles
+    Stats::Formula numIdleCycles;
+
+    // Number of busy cycles
+    Stats::Formula numBusyCycles;
+
+    // Number of simulated loads
+    Counter numLoad;
+
+    // Number of idle cycles
+    Stats::Average notIdleFraction;
+    Stats::Formula idleFraction;
+
+    // Number of cycles stalled for I-cache responses
+    Stats::Scalar icacheStallCycles;
+    Counter lastIcacheStall;
+
+    // Number of cycles stalled for D-cache responses
+    Stats::Scalar dcacheStallCycles;
+    Counter lastDcacheStall;
+
+    /// @{
+    /// Total number of branches fetched
+    Stats::Scalar numBranches;
+    /// Number of branches predicted as taken
+    Stats::Scalar numPredictedBranches;
+    /// Number of misprediced branches
+    Stats::Scalar numBranchMispred;
+    /// @}
+
+   // Instruction mix histogram by OpClass
+   Stats::Vector statExecutedInstType;
+
+  public:
+    /** Constructor */
+    SimpleExecContext(BaseSimpleCPU* _cpu, SimpleThread* _thread)
+        : cpu(_cpu), thread(_thread), fetchOffset(0), stayAtPC(false),
+        numInst(0), numOp(0), numLoad(0), lastIcacheStall(0), lastDcacheStall(0)
+    { }
+
+    /** Reads an integer register. */
+    IntReg readIntRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
+    {
+        numIntRegReads++;
+        return thread->readIntReg(si->srcRegIdx(idx));
+    }
+
+    /** Sets an integer register to a value. */
+    void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
+        M5_ATTR_OVERRIDE
+    {
+        numIntRegWrites++;
+        thread->setIntReg(si->destRegIdx(idx), val);
+    }
+
+    /** Reads a floating point register of single register width. */
+    FloatReg readFloatRegOperand(const StaticInst *si, int idx)
+        M5_ATTR_OVERRIDE
+    {
+        numFpRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
+        return thread->readFloatReg(reg_idx);
+    }
+
+    /** Reads a floating point register in its binary format, instead
+     * of by value. */
+    FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
+        M5_ATTR_OVERRIDE
+    {
+        numFpRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
+        return thread->readFloatRegBits(reg_idx);
+    }
+
+    /** Sets a floating point register of single width to a value. */
+    void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
+        M5_ATTR_OVERRIDE
+    {
+        numFpRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
+        thread->setFloatReg(reg_idx, val);
+    }
+
+    /** Sets the bits of a floating point register of single width
+     * to a binary value. */
+    void setFloatRegOperandBits(const StaticInst *si, int idx,
+                                FloatRegBits val) M5_ATTR_OVERRIDE
+    {
+        numFpRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
+        thread->setFloatRegBits(reg_idx, val);
+    }
+
+    CCReg readCCRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
+    {
+        numCCRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base;
+        return thread->readCCReg(reg_idx);
+    }
+
+    void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
+        M5_ATTR_OVERRIDE
+    {
+        numCCRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base;
+        thread->setCCReg(reg_idx, val);
+    }
+
+    MiscReg readMiscRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
+    {
+        numIntRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base;
+        return thread->readMiscReg(reg_idx);
+    }
+
+    void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val)
+        M5_ATTR_OVERRIDE
+    {
+        numIntRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base;
+        thread->setMiscReg(reg_idx, val);
+    }
+
+    /**
+     * Reads a miscellaneous register, handling any architectural
+     * side effects due to reading that register.
+     */
+    MiscReg readMiscReg(int misc_reg) M5_ATTR_OVERRIDE
+    {
+        numIntRegReads++;
+        return thread->readMiscReg(misc_reg);
+    }
+
+    /**
+     * Sets a miscellaneous register, handling any architectural
+     * side effects due to writing that register.
+     */
+    void setMiscReg(int misc_reg, const MiscReg &val) M5_ATTR_OVERRIDE
+    {
+        numIntRegWrites++;
+        thread->setMiscReg(misc_reg, val);
+    }
+
+    PCState pcState() const M5_ATTR_OVERRIDE
+    {
+        return thread->pcState();
+    }
+
+    void pcState(const PCState &val) M5_ATTR_OVERRIDE
+    {
+        thread->pcState(val);
+    }
+
+
+    /**
+     * Record the effective address of the instruction.
+     *
+     * @note Only valid for memory ops.
+     */
+    void setEA(Addr EA) M5_ATTR_OVERRIDE
+    { panic("BaseSimpleCPU::setEA() not implemented\n"); }
+
+    /**
+     * Get the effective address of the instruction.
+     *
+     * @note Only valid for memory ops.
+     */
+    Addr getEA() const M5_ATTR_OVERRIDE
+    { panic("BaseSimpleCPU::getEA() not implemented\n"); }
+
+    Fault readMem(Addr addr, uint8_t *data, unsigned int size,
+                  unsigned int flags) M5_ATTR_OVERRIDE
+    {
+        return cpu->readMem(addr, data, size, flags);
+    }
+
+    Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
+                   unsigned int flags, uint64_t *res) M5_ATTR_OVERRIDE
+    {
+        return cpu->writeMem(data, size, addr, flags, res);
+    }
+
+    /**
+     * Sets the number of consecutive store conditional failures.
+     */
+    void setStCondFailures(unsigned int sc_failures) M5_ATTR_OVERRIDE
+    {
+        thread->setStCondFailures(sc_failures);
+    }
+
+    /**
+     * Returns the number of consecutive store conditional failures.
+     */
+    unsigned int readStCondFailures() const M5_ATTR_OVERRIDE
+    {
+        return thread->readStCondFailures();
+    }
+
+    /**
+     * Executes a syscall specified by the callnum.
+     */
+    void syscall(int64_t callnum) M5_ATTR_OVERRIDE
+    {
+        if (FullSystem)
+            panic("Syscall emulation isn't available in FS mode.");
+
+        thread->syscall(callnum);
+    }
+
+    /** Returns a pointer to the ThreadContext. */
+    ThreadContext *tcBase() M5_ATTR_OVERRIDE
+    {
+        return thread->getTC();
+    }
+
+    /**
+     * Somewhat Alpha-specific function that handles returning from an
+     * error or interrupt.
+     */
+    Fault hwrei() M5_ATTR_OVERRIDE
+    {
+        return thread->hwrei();
+    }
+
+    /**
+     * Check for special simulator handling of specific PAL calls.  If
+     * return value is false, actual PAL call will be suppressed.
+     */
+    bool simPalCheck(int palFunc) M5_ATTR_OVERRIDE
+    {
+        return thread->simPalCheck(palFunc);
+    }
+
+    bool readPredicate() M5_ATTR_OVERRIDE
+    {
+        return thread->readPredicate();
+    }
+
+    void setPredicate(bool val) M5_ATTR_OVERRIDE
+    {
+        thread->setPredicate(val);
+
+        if (cpu->traceData) {
+            cpu->traceData->setPredicate(val);
+        }
+    }
+
+    /**
+     * Invalidate a page in the DTLB <i>and</i> ITLB.
+     */
+    void demapPage(Addr vaddr, uint64_t asn) M5_ATTR_OVERRIDE
+    {
+        thread->demapPage(vaddr, asn);
+    }
+
+    void armMonitor(Addr address) M5_ATTR_OVERRIDE
+    {
+        cpu->armMonitor(address);
+    }
+
+    bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
+    {
+        return cpu->mwait(pkt);
+    }
+
+    void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
+    {
+        cpu->mwaitAtomic(tc, thread->dtb);
+    }
+
+    AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
+    {
+        return cpu->getCpuAddrMonitor();
+    }
+
+#if THE_ISA == MIPS_ISA
+    MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID)
+        M5_ATTR_OVERRIDE
+    {
+        panic("Simple CPU models do not support multithreaded "
+              "register access.");
+    }
+
+    void setRegOtherThread(int regIdx, MiscReg val,
+                           ThreadID tid = InvalidThreadID) M5_ATTR_OVERRIDE
+    {
+        panic("Simple CPU models do not support multithreaded "
+              "register access.");
+    }
+
+#endif
+
+};
+
+#endif // __CPU_EXEC_CONTEXT_HH__
index 5dc042f1e93f9e64820b37f620692966ec1c60d7..487da36eaa84c99cfc2e435a523fe49c715fdca1 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright 2014 Google, Inc.
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013,2015 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -67,18 +67,7 @@ using namespace TheISA;
 void
 TimingSimpleCPU::init()
 {
-    BaseCPU::init();
-
-    // Initialise the ThreadContext's memory proxies
-    tcBase()->initMemProxies(tcBase());
-
-    if (FullSystem && !params()->switched_out) {
-        for (int i = 0; i < threadContexts.size(); ++i) {
-            ThreadContext *tc = threadContexts[i];
-            // initialize CPU, including PC
-            TheISA::initCPU(tc, _cpuId);
-        }
-    }
+    BaseSimpleCPU::init();
 }
 
 void
@@ -111,9 +100,10 @@ TimingSimpleCPU::drain()
     if (_status == Idle ||
         (_status == BaseSimpleCPU::Running && isDrained())) {
         DPRINTF(Drain, "No need to drain.\n");
+        activeThreads.clear();
         return DrainState::Drained;
     } else {
-        DPRINTF(Drain, "Requesting drain: %s\n", pcState());
+        DPRINTF(Drain, "Requesting drain.\n");
 
         // The fetch event can become descheduled if a drain didn't
         // succeed on the first attempt. We need to reschedule it if
@@ -136,17 +126,27 @@ TimingSimpleCPU::drainResume()
     verifyMemoryMode();
 
     assert(!threadContexts.empty());
-    if (threadContexts.size() > 1)
-        fatal("The timing CPU only supports one thread.\n");
 
-    if (thread->status() == ThreadContext::Active) {
-        schedule(fetchEvent, nextCycle());
-        _status = BaseSimpleCPU::Running;
-        notIdleFraction = 1;
-    } else {
-        _status = BaseSimpleCPU::Idle;
-        notIdleFraction = 0;
+    _status = BaseSimpleCPU::Idle;
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
+            threadInfo[tid]->notIdleFraction = 1;
+
+            activeThreads.push_back(tid);
+
+            _status = BaseSimpleCPU::Running;
+
+            // Fetch if any threads active
+            if (!fetchEvent.scheduled()) {
+                schedule(fetchEvent, nextCycle());
+            }
+        } else {
+            threadInfo[tid]->notIdleFraction = 0;
+        }
     }
+
+    system->totalNumInsts = 0;
 }
 
 bool
@@ -155,7 +155,7 @@ TimingSimpleCPU::tryCompleteDrain()
     if (drainState() != DrainState::Draining)
         return false;
 
-    DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
+    DPRINTF(Drain, "tryCompleteDrain.\n");
     if (!isDrained())
         return false;
 
@@ -168,12 +168,15 @@ TimingSimpleCPU::tryCompleteDrain()
 void
 TimingSimpleCPU::switchOut()
 {
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    M5_VAR_USED SimpleThread* thread = t_info.thread;
+
     BaseSimpleCPU::switchOut();
 
     assert(!fetchEvent.scheduled());
     assert(_status == BaseSimpleCPU::Running || _status == Idle);
-    assert(!stayAtPC);
-    assert(microPC() == 0);
+    assert(!t_info.stayAtPC);
+    assert(thread->microPC() == 0);
 
     updateCycleCounts();
 }
@@ -201,16 +204,20 @@ TimingSimpleCPU::activateContext(ThreadID thread_num)
 {
     DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
 
-    assert(thread_num == 0);
-    assert(thread);
-
-    assert(_status == Idle);
+    assert(thread_num < numThreads);
 
-    notIdleFraction = 1;
-    _status = BaseSimpleCPU::Running;
+    threadInfo[thread_num]->notIdleFraction = 1;
+    if (_status == BaseSimpleCPU::Idle)
+        _status = BaseSimpleCPU::Running;
 
     // kick things off by initiating the fetch of the next instruction
-    schedule(fetchEvent, clockEdge(Cycles(0)));
+    if (!fetchEvent.scheduled())
+        schedule(fetchEvent, clockEdge(Cycles(0)));
+
+    if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
+         == activeThreads.end()) {
+        activeThreads.push_back(thread_num);
+    }
 }
 
 
@@ -219,24 +226,31 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num)
 {
     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 
-    assert(thread_num == 0);
-    assert(thread);
+    assert(thread_num < numThreads);
+    activeThreads.remove(thread_num);
 
     if (_status == Idle)
         return;
 
     assert(_status == BaseSimpleCPU::Running);
 
-    // just change status to Idle... if status != Running,
-    // completeInst() will not initiate fetch of next instruction.
+    threadInfo[thread_num]->notIdleFraction = 0;
 
-    notIdleFraction = 0;
-    _status = Idle;
+    if (activeThreads.empty()) {
+        _status = Idle;
+
+        if (fetchEvent.scheduled()) {
+            deschedule(fetchEvent);
+        }
+    }
 }
 
 bool
 TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     RequestPtr req = pkt->req;
 
     // We're about the issues a locked load, so tell the monitor
@@ -264,6 +278,9 @@ void
 TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
                           bool read)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     PacketPtr pkt = buildPacket(req, read);
     pkt->dataDynamic<uint8_t>(data);
     if (req->getFlags().isSet(Request::NO_ACCESS)) {
@@ -389,9 +406,12 @@ Fault
 TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
                          unsigned size, unsigned flags)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     Fault fault;
     const int asid = 0;
-    const ThreadID tid = 0;
+    const ThreadID tid = curThread;
     const Addr pc = thread->instAddr();
     unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Read;
@@ -400,7 +420,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
         traceData->setMem(addr, size, flags);
 
     RequestPtr req  = new Request(asid, addr, size,
-                                  flags, dataMasterId(), pc, _cpuId, tid);
+                                  flags, dataMasterId(), pc,
+                                  thread->contextId(), tid);
 
     req->taskId(taskId());
 
@@ -421,14 +442,14 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
         DataTranslation<TimingSimpleCPU *> *trans2 =
             new DataTranslation<TimingSimpleCPU *>(this, state, 1);
 
-        thread->dtb->translateTiming(req1, tc, trans1, mode);
-        thread->dtb->translateTiming(req2, tc, trans2, mode);
+        thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode);
+        thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode);
     } else {
         WholeTranslationState *state =
             new WholeTranslationState(req, new uint8_t[size], NULL, mode);
         DataTranslation<TimingSimpleCPU *> *translation
             = new DataTranslation<TimingSimpleCPU *>(this, state);
-        thread->dtb->translateTiming(req, tc, translation, mode);
+        thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
     }
 
     return NoFault;
@@ -437,6 +458,9 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
 bool
 TimingSimpleCPU::handleWritePacket()
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     RequestPtr req = dcache_pkt->req;
     if (req->isMmappedIpr()) {
         Cycles delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
@@ -457,9 +481,12 @@ Fault
 TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
                           Addr addr, unsigned flags, uint64_t *res)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     uint8_t *newData = new uint8_t[size];
     const int asid = 0;
-    const ThreadID tid = 0;
+    const ThreadID tid = curThread;
     const Addr pc = thread->instAddr();
     unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Write;
@@ -476,7 +503,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
         traceData->setMem(addr, size, flags);
 
     RequestPtr req = new Request(asid, addr, size,
-                                 flags, dataMasterId(), pc, _cpuId, tid);
+                                 flags, dataMasterId(), pc,
+                                 thread->contextId(), tid);
 
     req->taskId(taskId());
 
@@ -496,14 +524,14 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
         DataTranslation<TimingSimpleCPU *> *trans2 =
             new DataTranslation<TimingSimpleCPU *>(this, state, 1);
 
-        thread->dtb->translateTiming(req1, tc, trans1, mode);
-        thread->dtb->translateTiming(req2, tc, trans2, mode);
+        thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode);
+        thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode);
     } else {
         WholeTranslationState *state =
             new WholeTranslationState(req, newData, res, mode);
         DataTranslation<TimingSimpleCPU *> *translation =
             new DataTranslation<TimingSimpleCPU *>(this, state);
-        thread->dtb->translateTiming(req, tc, translation, mode);
+        thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
     }
 
     // Translation faults will be returned via finishTranslation()
@@ -540,6 +568,12 @@ TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
 void
 TimingSimpleCPU::fetch()
 {
+    // Change thread if multi-threaded
+    swapActiveThread();
+
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
     DPRINTF(SimpleCPU, "Fetch\n");
 
     if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
@@ -552,17 +586,18 @@ TimingSimpleCPU::fetch()
         return;
 
     TheISA::PCState pcState = thread->pcState();
-    bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst;
+    bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
+                       !curMacroStaticInst;
 
     if (needToFetch) {
         _status = BaseSimpleCPU::Running;
         Request *ifetch_req = new Request();
         ifetch_req->taskId(taskId());
-        ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
+        ifetch_req->setThreadContext(thread->contextId(), curThread);
         setupFetchRequest(ifetch_req);
         DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());
-        thread->itb->translateTiming(ifetch_req, tc, &fetchTranslation,
-                BaseTLB::Execute);
+        thread->itb->translateTiming(ifetch_req, thread->getTC(),
+                &fetchTranslation, BaseTLB::Execute);
     } else {
         _status = IcacheWaitResponse;
         completeIfetch(NULL);
@@ -607,6 +642,8 @@ TimingSimpleCPU::sendFetch(const Fault &fault, RequestPtr req,
 void
 TimingSimpleCPU::advanceInst(const Fault &fault)
 {
+    SimpleExecContext &t_info = *threadInfo[curThread];
+
     if (_status == Faulting)
         return;
 
@@ -619,7 +656,7 @@ TimingSimpleCPU::advanceInst(const Fault &fault)
     }
 
 
-    if (!stayAtPC)
+    if (!t_info.stayAtPC)
         advancePC(fault);
 
     if (tryCompleteDrain())
@@ -637,6 +674,8 @@ TimingSimpleCPU::advanceInst(const Fault &fault)
 void
 TimingSimpleCPU::completeIfetch(PacketPtr pkt)
 {
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
     DPRINTF(SimpleCPU, "Complete ICache Fetch for addr %#x\n", pkt ?
             pkt->getAddr() : 0);
 
@@ -656,7 +695,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
     preExecute();
     if (curStaticInst && curStaticInst->isMemRef()) {
         // load or store: just send to dcache
-        Fault fault = curStaticInst->initiateAcc(this, traceData);
+        Fault fault = curStaticInst->initiateAcc(&t_info, traceData);
 
         // If we're not running now the instruction will complete in a dcache
         // response callback or the instruction faulted and has started an
@@ -677,7 +716,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
         }
     } else if (curStaticInst) {
         // non-memory instruction: execute completely now
-        Fault fault = curStaticInst->execute(this, traceData);
+        Fault fault = curStaticInst->execute(&t_info, traceData);
 
         // keep an instruction count
         if (fault == NoFault)
@@ -690,7 +729,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
         postExecute();
         // @todo remove me after debugging with legion done
         if (curStaticInst && (!curStaticInst->isMicroop() ||
-                    curStaticInst->isFirstMicroop()))
+                curStaticInst->isFirstMicroop()))
             instCnt++;
         advanceInst(fault);
     } else {
@@ -776,7 +815,8 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 
     _status = BaseSimpleCPU::Running;
 
-    Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
+    Fault fault = curStaticInst->completeAcc(pkt, threadInfo[curThread],
+                                             traceData);
 
     // keep an instruction count
     if (fault == NoFault)
@@ -810,17 +850,20 @@ void
 TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 {
     // X86 ISA: Snooping an invalidation for monitor/mwait
-    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
         cpu->wakeup();
     }
-    TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
+
+    for (auto &t_info : cpu->threadInfo) {
+        TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+    }
 }
 
 void
 TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
 {
     // X86 ISA: Snooping an invalidation for monitor/mwait
-    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
         cpu->wakeup();
     }
 }
@@ -930,8 +973,5 @@ TimingSimpleCPU::printAddr(Addr a)
 TimingSimpleCPU *
 TimingSimpleCPUParams::create()
 {
-    numThreads = 1;
-    if (!FullSystem && workload.size() != 1)
-        panic("only one workload allowed");
     return new TimingSimpleCPU(this);
 }
index b6a1da4e2dc0ff1f6a336b369b99c608124d4dd2..d409ac5d2191cd78af4cd87b3f9a8f1f5ae88799 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2013,2015 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -44,6 +44,7 @@
 #define __CPU_SIMPLE_TIMING_HH__
 
 #include "cpu/simple/base.hh"
+#include "cpu/simple/exec_context.hh"
 #include "cpu/translation.hh"
 #include "params/TimingSimpleCPU.hh"
 
@@ -342,7 +343,11 @@ class TimingSimpleCPU : public BaseSimpleCPU
      * </ul>
      */
     bool isDrained() {
-        return microPC() == 0 && !stayAtPC && !fetchEvent.scheduled();
+        SimpleExecContext& t_info = *threadInfo[curThread];
+        SimpleThread* thread = t_info.thread;
+
+        return thread->microPC() == 0 && !t_info.stayAtPC &&
+               !fetchEvent.scheduled();
     }
 
     /**
index 885de10f3a2e30130d8c39f0de3d44d516c5eee8..7a8fbe0bdd21cfabc2112c9e19a3a56c78e2d580 100644 (file)
@@ -29,5 +29,6 @@
 process1 = LiveProcess(cmd = 'hello', executable = binpath('hello'))
 process2 = LiveProcess(cmd = 'hello', executable = binpath('hello'))
 
+root.system.multi_thread = True
 root.system.cpu[0].workload = [process1, process2]
 root.system.cpu[0].numThreads = 2