Switch out fixups for the CPUs.
authorKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:29:34 +0000 (17:29 -0400)
committerKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:29:34 +0000 (17:29 -0400)
cpu/cpu_exec_context.cc:
    Be sure to switch over the kernel stats so things don't get messed up.  This may lead to weird stats files for sampling runs (detailed stats should be correct, regardless of which kernel stats this is defined on).
cpu/o3/cpu.cc:
    Updates for switching out.  Also include a bunch of debug info if needed.
cpu/o3/fetch_impl.hh:
    Switch out properly.
cpu/o3/inst_queue.hh:
cpu/o3/inst_queue_impl.hh:
    Comment out unused stats (they made the stats file huge).
cpu/o3/lsq_unit.hh:
cpu/o3/lsq_unit_impl.hh:
    Add in new stat.
cpu/o3/rename.hh:
    Fix up for switching out.
cpu/o3/rename_impl.hh:
    Fix up for switching out.  Be sure to mark any Misc regs as ready if their renamed inst got squashed from being switched out.
cpu/ozone/cpu_impl.hh:
cpu/simple/cpu.cc:
    Switch out fixup.
sim/eventq.hh:
    Make CPU switching more immediate.
    Also comment out the assertion, as it doesn't apply if we're putting it on an inst-based queue.

--HG--
extra : convert_revision : f40ed40604738993f061e0c628810ff37a920562

12 files changed:
cpu/cpu_exec_context.cc
cpu/o3/cpu.cc
cpu/o3/fetch_impl.hh
cpu/o3/inst_queue.hh
cpu/o3/inst_queue_impl.hh
cpu/o3/lsq_unit.hh
cpu/o3/lsq_unit_impl.hh
cpu/o3/rename.hh
cpu/o3/rename_impl.hh
cpu/ozone/cpu_impl.hh
cpu/simple/cpu.cc
sim/eventq.hh

index 0dcf149fd7ccda3ca8757a1412ccad1020d91d16..9f151dd6a0e373ab22b862541d15d15e81ca9d39 100644 (file)
@@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext)
     if (quiesceEvent) {
         quiesceEvent->xc = proxy;
     }
+
+    Kernel::Statistics *stats = oldContext->getKernelStats();
+    if (stats) {
+        kernelStats = stats;
+    }
 #endif
 
     storeCondFailures = 0;
index 0025d4144552346c298d39de1e98e7683f71a5fa..88de6c7465679c79042e0426d887d2d6d43ab676 100644 (file)
@@ -599,8 +599,11 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
     activityRec.activity();
+
+#if FULL_SYSTEM
     if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled())
         thread[tid]->quiesceEvent->deschedule();
+#endif
 
     fetch.wakeFromQuiesce();
 
@@ -671,6 +674,8 @@ template <class Impl>
 void
 FullO3CPU<Impl>::switchOut(Sampler *_sampler)
 {
+    DPRINTF(FullCPU, "Switching out\n");
+    BaseCPU::switchOut(_sampler);
     sampler = _sampler;
     switchCount = 0;
     fetch.switchOut();
@@ -694,6 +699,41 @@ FullO3CPU<Impl>::signalSwitched()
         rename.doSwitchOut();
         commit.doSwitchOut();
         instList.clear();
+
+#ifndef NDEBUG
+        PhysRegIndex renamed_reg;
+        // First loop through the integer registers.
+        for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
+            renamed_reg = renameMap[0].lookup(i);
+            assert(renamed_reg == commitRenameMap[0].lookup(i));
+
+            DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+                    renamed_reg);
+
+            assert(scoreboard.getReg(renamed_reg));
+        }
+
+        // Then loop through the floating point registers.
+        for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+            renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag);
+            assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag));
+
+            DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+                    renamed_reg);
+
+            assert(scoreboard.getReg(renamed_reg));
+        }
+
+        for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) {
+            renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs;
+
+            DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
+                    renamed_reg);
+
+            assert(scoreboard.getReg(renamed_reg));
+        }
+#endif
+
         while (!removeList.empty()) {
             removeList.pop();
         }
index cc09c4a418ed20e6fc2e391312b8c725aa33d6e7..7a3292dbe18a632c831ab164cb49b2875b281eb3 100644 (file)
@@ -391,6 +391,7 @@ DefaultFetch<Impl>::takeOverFrom()
     wroteToTimeBuffer = false;
     _status = Inactive;
     switchedOut = false;
+    interruptPending = false;
     branchPred.takeOverFrom();
 }
 
@@ -469,7 +470,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     unsigned flags = 0;
 #endif // FULL_SYSTEM
 
-    if (interruptPending && flags == 0) {
+    if (isSwitchedOut() || (interruptPending && flags == 0)) {
         // Hold off fetch from getting new instructions while an interrupt
         // is pending.
         return false;
index 80cd71f0d05c40ce2a5b5f9ff46ed53309ae75bd..e96fbc667b7f16a7967f87c819f24b7587ef9673 100644 (file)
@@ -474,11 +474,11 @@ class InstructionQueue
     Stats::Scalar<> iqSquashedNonSpecRemoved;
 
     /** Distribution of number of instructions in the queue. */
-    Stats::VectorDistribution<> queueResDist;
+//    Stats::VectorDistribution<> queueResDist;
     /** Distribution of the number of instructions issued. */
     Stats::Distribution<> numIssuedDist;
     /** Distribution of the cycles it takes to issue an instruction. */
-    Stats::VectorDistribution<> issueDelayDist;
+//    Stats::VectorDistribution<> issueDelayDist;
 
     /** Number of times an instruction could not be issued because a
      * FU was busy.
index 72cb0d70841fbc4ba90a930ddb424072dd55adc7..b6b06ca77fbb3994fa2785d74830362cbcf3a497 100644 (file)
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
         .name(name() + ".iqSquashedNonSpecRemoved")
         .desc("Number of squashed non-spec instructions that were removed")
         .prereq(iqSquashedNonSpecRemoved);
-
+/*
     queueResDist
         .init(Num_OpClasses, 0, 99, 2)
         .name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
     for (int i = 0; i < Num_OpClasses; ++i) {
         queueResDist.subname(i, opClassStrings[i]);
     }
+*/
     numIssuedDist
         .init(0,totalWidth,1)
         .name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
     //
     //  How long did instructions for a particular FU type wait prior to issue
     //
-
+/*
     issueDelayDist
         .init(Num_OpClasses,0,99,2)
         .name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
         subname << opClassStrings[i] << "_delay";
         issueDelayDist.subname(i, subname.str());
     }
-
+*/
     issueRate
         .name(name() + ".ISSUE:rate")
         .desc("Inst issue rate")
index fe174a97d5b947745a4e418644c771009f1791a1..1db6dc02db101cf3f6a320c89c7321655e4c349f 100644 (file)
@@ -382,6 +382,9 @@ class LSQUnit {
      * ignored due to the instruction already being squashed. */
     Stats::Scalar<> lsqIgnoredResponses;
 
+    /** Tota number of memory ordering violations. */
+    Stats::Scalar<> lsqMemOrderViolation;
+
     /** Total number of squashed stores. */
     Stats::Scalar<> lsqSquashedStores;
 
index 5cc3078f8cb9ff49e5039e54db47732ee1535c61..7086c381eb144a7142a4aaf7ea31c2b29c95de99 100644 (file)
@@ -144,6 +144,10 @@ LSQUnit<Impl>::regStats()
         .name(name() + ".ignoredResponses")
         .desc("Number of memory responses ignored because the instruction is squashed");
 
+    lsqMemOrderViolation
+        .name(name() + ".memOrderViolation")
+        .desc("Number of memory ordering violations");
+
     lsqSquashedStores
         .name(name() + ".squashedStores")
         .desc("Number of stores squashed");
@@ -495,6 +499,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
                 // A load incorrectly passed this store.  Squash and refetch.
                 // For now return a fault to show that it was unsuccessful.
                 memDepViolator = loadQueue[load_idx];
+                ++lsqMemOrderViolation;
 
                 return genMachineCheckFault();
             }
index 4912431add74c18c772bed8ef326ec30959ea0b8..5769dbd374cc17a179a5203cb381b2bb341cc259 100644 (file)
@@ -411,6 +411,8 @@ class DefaultRename
     /** The maximum skid buffer size. */
     unsigned skidBufferMax;
 
+    PhysRegIndex maxPhysicalRegs;
+
     /** Enum to record the source of a structure full stall.  Can come from
      * either ROB, IQ, LSQ, and it is priortized in that order.
      */
index 93f5b3504f7b2e0c0d7f4aae62e08b7833f3c24a..49627e3d4c17e463aadf80c709981c75907101b0 100644 (file)
@@ -40,7 +40,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
       commitToRenameDelay(params->commitToRenameDelay),
       renameWidth(params->renameWidth),
       commitWidth(params->commitWidth),
-      numThreads(params->numberOfThreads)
+      numThreads(params->numberOfThreads),
+      maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
 {
     _status = Inactive;
 
@@ -283,6 +284,11 @@ DefaultRename<Impl>::doSwitchOut()
             // Put the renamed physical register back on the free list.
             freeList->addReg(hb_it->newPhysReg);
 
+            // Be sure to mark its register as ready if it's a misc register.
+            if (hb_it->newPhysReg >= maxPhysicalRegs) {
+                scoreboard->setReg(hb_it->newPhysReg);
+            }
+
             historyBuffer[i].erase(hb_it++);
         }
         insts[i].clear();
index 050bdb9a385a1d1d048ba5cc322f0ecd7bebe0b0..1a0de29f5cb8af1e469718f603170a7e3389890e 100644 (file)
@@ -184,7 +184,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 
     globalSeqNum = 1;
 
+#if FULL_SYSTEM
     checkInterrupts = false;
+#endif
 
     lockFlag = 0;
 
@@ -213,6 +215,7 @@ template <class Impl>
 void
 OzoneCPU<Impl>::switchOut(Sampler *_sampler)
 {
+    BaseCPU::switchOut(_sampler);
     sampler = _sampler;
     switchCount = 0;
     // Front end needs state from back end, so switch out the back end first.
@@ -234,6 +237,16 @@ OzoneCPU<Impl>::signalSwitched()
             checker->switchOut(sampler);
 
         _status = SwitchedOut;
+#ifndef NDEBUG
+        // Loop through all registers
+        for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
+            assert(thread.renameTable[i] == frontEnd->renameTable[i]);
+
+            assert(thread.renameTable[i] == backEnd->renameTable[i]);
+
+            DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
+        }
+#endif
 
         if (tickEvent.scheduled())
             tickEvent.squash();
@@ -256,9 +269,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
     frontEnd->takeOverFrom();
     assert(!tickEvent.scheduled());
 
+#ifndef NDEBUG
+    // Check rename table.
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        assert(thread.renameTable[i]->isResultReady());
+    }
+#endif
+
     // @todo: Fix hardcoded number
     // Clear out any old information in time buffer.
-    for (int i = 0; i < 6; ++i) {
+    for (int i = 0; i < 15; ++i) {
         comm.advance();
     }
 
@@ -291,8 +311,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
     scheduleTickEvent(delay);
     _status = Running;
     thread._status = ExecContext::Active;
+#if FULL_SYSTEM
     if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
         thread.quiesceEvent->deschedule();
+#endif
     frontEnd->wakeFromQuiesce();
 }
 
@@ -369,7 +391,7 @@ template <class Impl>
 void
 OzoneCPU<Impl>::resetStats()
 {
-    startNumInst = numInst;
+//    startNumInst = numInst;
     notIdleFraction = (_status != Idle);
 }
 
@@ -777,7 +799,9 @@ OzoneCPU<Impl>::OzoneXC::halt()
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
-{ }
+{
+    thread->dumpFuncProfile();
+}
 #endif
 
 template <class Impl>
@@ -797,6 +821,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     copyArchRegs(old_context);
     setCpuId(old_context->readCpuId());
 
+    thread->inst = old_context->getInst();
 #if !FULL_SYSTEM
     setFuncExeInst(old_context->readFuncExeInst());
 #else
@@ -869,16 +894,14 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::profileClear()
 {
-    if (thread->profile)
-        thread->profile->clear();
+    thread->profileClear();
 }
 
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::profileSample()
 {
-    if (thread->profile)
-        thread->profile->sample(thread->profileNode, thread->profilePC);
+    thread->profileSample();
 }
 #endif
 
@@ -906,14 +929,20 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
     cpu->frontEnd->setPC(thread->PC);
     cpu->frontEnd->setNextPC(thread->nextPC);
 
-    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
-        if (i < TheISA::FP_Base_DepTag) {
-            thread->renameTable[i]->setIntResult(xc->readIntReg(i));
-        } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
-            int fp_idx = i - TheISA::FP_Base_DepTag;
-            thread->renameTable[i]->setDoubleResult(
-                xc->readFloatRegDouble(fp_idx));
-        }
+    // First loop through the integer registers.
+    for (int i = 0; i < TheISA::NumIntRegs; ++i) {
+/*        DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
+                "now has data %lli.\n",
+                i, thread->renameTable[i]->readIntResult(),
+                xc->readIntReg(i));
+*/
+        thread->renameTable[i]->setIntResult(xc->readIntReg(i));
+    }
+
+    // Then loop through the floating point registers.
+    for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
+        int fp_idx = i + TheISA::FP_Base_DepTag;
+        thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i));
     }
 
 #if !FULL_SYSTEM
index 0a4b3c3e4cbaa9b01571ee133e9338980419bc28..eb19115b2cb22e32088fe3c5c2046913de504ac2 100644 (file)
@@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s)
         _status = SwitchedOut;
 
         if (tickEvent.scheduled())
-            tickEvent.squash();
+            tickEvent.deschedule();
+
+        assert(!tickEvent.scheduled());
 
         sampler->signalSwitched();
     }
@@ -294,7 +296,7 @@ SimpleCPU::regStats()
 void
 SimpleCPU::resetStats()
 {
-    startNumInst = numInst;
+//    startNumInst = numInst;
     notIdleFraction = (_status != Idle);
 }
 
@@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src)
     Fault fault = cpuXC->translateDataReadReq(memReq);
 
     if (fault == NoFault) {
+        panic("We can't copy!");
         cpuXC->copySrcAddr = src;
         cpuXC->copySrcPhysAddr = memReq->paddr + offset;
     } else {
@@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr)
 void
 SimpleCPU::processCacheCompletion()
 {
+    Fault fault;
+
     switch (status()) {
       case IcacheMissStall:
         icacheStallCycles += curTick - lastIcacheStall;
@@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion()
         break;
       case DcacheMissSwitch:
         if (memReq->cmd.isRead()) {
-            curStaticInst->execute(this,traceData);
+            fault = curStaticInst->execute(this,traceData);
             if (traceData)
                 traceData->finalize();
+        } else {
+            fault = NoFault;
         }
+        assert(fault == NoFault);
+        assert(!tickEvent.scheduled());
         _status = SwitchedOut;
         sampler->signalSwitched();
+        return;
       case SwitchedOut:
         // If this CPU has been switched out due to sampling/warm-up,
         // ignore any further status changes (e.g., due to cache
@@ -787,9 +797,10 @@ SimpleCPU::tick()
         }
 
         if (cpuXC->profile) {
-            bool usermode =
-                (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
-            cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+//            bool usermode =
+//                (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+//            cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
+            cpuXC->profilePC = cpuXC->readPC();
             ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
             if (node)
                 cpuXC->profileNode = node;
@@ -849,8 +860,10 @@ SimpleCPU::tick()
            status() == Idle ||
            status() == DcacheMissStall);
 
-    if (status() == Running && !tickEvent.scheduled())
+    if (status() == Running && !tickEvent.scheduled()) {
+        assert(_status != SwitchedOut);
         tickEvent.schedule(curTick + cycles(1));
+    }
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
     Param<Counter> max_insts_all_threads;
     Param<Counter> max_loads_any_thread;
     Param<Counter> max_loads_all_threads;
+    Param<Counter> stats_reset_inst;
     Param<Tick> progress_interval;
 
 #if FULL_SYSTEM
@@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
                "terminate when any thread reaches this load count"),
     INIT_PARAM(max_loads_all_threads,
                "terminate when all threads have reached this load count"),
+    INIT_PARAM(stats_reset_inst,
+               "instruction to reset stats on"),
     INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0),
 
 #if FULL_SYSTEM
@@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
     params->max_insts_all_threads = max_insts_all_threads;
     params->max_loads_any_thread = max_loads_any_thread;
     params->max_loads_all_threads = max_loads_all_threads;
+    params->stats_reset_inst = stats_reset_inst;
     params->deferRegistration = defer_registration;
     params->clock = clock;
     params->functionTrace = function_trace;
index 5fc73bb53d5b43d81e5c5c359d4610b620eabb92..b9a0abc1252718f9d2e01c070856bfc7ad1f35a8 100644 (file)
@@ -43,6 +43,7 @@
 #include "sim/host.hh" // for Tick
 
 #include "base/fast_alloc.hh"
+#include "base/misc.hh"
 #include "base/trace.hh"
 #include "sim/serialize.hh"
 
@@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc
         /// same cycle (after unscheduling the old CPU's tick event).
         /// The switch needs to come before any tick events to make
         /// sure we don't tick both CPUs in the same cycle.
-        CPU_Switch_Pri         =   31,
+        CPU_Switch_Pri         =   -31,
 
         /// Serailization needs to occur before tick events also, so
         /// that a serialize/unserialize is identical to an on-line
@@ -344,7 +345,8 @@ inline void
 Event::schedule(Tick t)
 {
     assert(!scheduled());
-    assert(t >= curTick);
+//    if (t < curTick)
+//        warn("t is less than curTick, ensure you don't want cycles");
 
     setFlags(Scheduled);
 #if TRACING_ON