inorder: fix squash bug in branch predictor

[gem5.git] / src / cpu / simple / atomic.cc
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc

index 9f574e8beeaf362b901e16e5ced7ee5d02d423d2..05b4ca3e210bd3118364c498cef8add2ad08bc02 100644 (file)
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -32,6 +32,7 @@
  #include "arch/mmaped_ipr.hh"
  #include "arch/utility.hh"
  #include "base/bigint.hh"
+#include "config/the_isa.hh"
  #include "cpu/exetrace.hh"
  #include "cpu/simple/atomic.hh"
  #include "mem/packet.hh"
@@ -43,7 +44,7 @@ using namespace std;
  using namespace TheISA;
  
  AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+    : Event(CPU_Tick_Pri), cpu(c)
  {
  }
  
@@ -55,13 +56,13 @@ AtomicSimpleCPU::TickEvent::process()
  }
  
  const char *
-AtomicSimpleCPU::TickEvent::description()
+AtomicSimpleCPU::TickEvent::description() const
  {
      return "AtomicSimpleCPU tick";
  }
  
  Port *
-AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
+AtomicSimpleCPU::getPort(const string &if_name, int idx)
  {
      if (if_name == "dcache_port")
          return &dcachePort;
@@ -80,11 +81,12 @@ AtomicSimpleCPU::init()
  {
      BaseCPU::init();
  #if FULL_SYSTEM
-    for (int i = 0; i < threadContexts.size(); ++i) {
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
          ThreadContext *tc = threadContexts[i];
  
          // initialize CPU, including PC
-        TheISA::initCPU(tc, tc->readCpuId());
+        TheISA::initCPU(tc, tc->contextId());
      }
  #endif
      if (hasPhysMemPort) {
@@ -93,6 +95,10 @@ AtomicSimpleCPU::init()
          physmemPort.getPeerAddressRanges(pmAddrList, snoop);
          physMemAddr = *pmAddrList.begin();
      }
+    // Atomic doesn't do MT right now, so contextId == threadId
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
  }
  
  bool
@@ -144,13 +150,14 @@ AtomicSimpleCPU::DcachePort::setPeer(Port *port)
  #if FULL_SYSTEM
      // Update the ThreadContext's memory ports (Functional/Virtual
      // Ports)
-    cpu->tcBase()->connectMemPorts();
+    cpu->tcBase()->connectMemPorts(cpu->tcBase());
  #endif
  }
  
-AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
-    : BaseSimpleCPU(p), tickEvent(this),
-      width(p->width), simulate_stalls(p->simulate_stalls),
+AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
+    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
+      simulate_data_stalls(p->simulate_data_stalls),
+      simulate_inst_stalls(p->simulate_inst_stalls),
        icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
        physmemPort(name() + "-iport", this), hasPhysMemPort(false)
  {
@@ -159,14 +166,14 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
      icachePort.snoopRangeSent = false;
      dcachePort.snoopRangeSent = false;
  
-    ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
  }
  
  
  AtomicSimpleCPU::~AtomicSimpleCPU()
  {
+    if (tickEvent.scheduled()) {
+        deschedule(tickEvent);
+    }
  }
  
  void
@@ -174,8 +181,7 @@ AtomicSimpleCPU::serialize(ostream &os)
  {
      SimObject::State so_state = SimObject::getState();
      SERIALIZE_ENUM(so_state);
-    Status _status = status();
-    SERIALIZE_ENUM(_status);
+    SERIALIZE_SCALAR(locked);
      BaseSimpleCPU::serialize(os);
      nameOut(os, csprintf("%s.tickEvent", name()));
      tickEvent.serialize(os);
@@ -186,7 +192,7 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
  {
      SimObject::State so_state;
      UNSERIALIZE_ENUM(so_state);
-    UNSERIALIZE_ENUM(_status);
+    UNSERIALIZE_SCALAR(locked);
      BaseSimpleCPU::unserialize(cp, section);
      tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
  }
@@ -194,23 +200,23 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
  void
  AtomicSimpleCPU::resume()
  {
+    if (_status == Idle || _status == SwitchedOut)
+        return;
+
      DPRINTF(SimpleCPU, "Resume\n");
-    if (_status != SwitchedOut && _status != Idle) {
-        assert(system->getMemoryMode() == Enums::atomic);
+    assert(system->getMemoryMode() == Enums::atomic);
  
-        changeState(SimObject::Running);
-        if (thread->status() == ThreadContext::Active) {
-            if (!tickEvent.scheduled()) {
-                tickEvent.schedule(nextCycle());
-            }
-        }
+    changeState(SimObject::Running);
+    if (thread->status() == ThreadContext::Active) {
+        if (!tickEvent.scheduled())
+            schedule(tickEvent, nextCycle());
      }
  }
  
  void
  AtomicSimpleCPU::switchOut()
  {
-    assert(status() == Running || status() == Idle);
+    assert(_status == Running || _status == Idle);
      _status = SwitchedOut;
  
      tickEvent.squash();
@@ -226,11 +232,12 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
  
      // if any of this CPU's ThreadContexts are active, mark the CPU as
      // running and schedule its tick event.
-    for (int i = 0; i < threadContexts.size(); ++i) {
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
          ThreadContext *tc = threadContexts[i];
          if (tc->status() == ThreadContext::Active && _status != Running) {
              _status = Running;
-            tickEvent.schedule(nextCycle());
+            schedule(tickEvent, nextCycle());
              break;
          }
      }
@@ -238,7 +245,9 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
          _status = Idle;
      }
      assert(threadContexts.size() == 1);
-    cpuId = tc->readCpuId();
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
  }
  
  
@@ -257,7 +266,7 @@ AtomicSimpleCPU::activateContext(int thread_num, int delay)
      numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
  
      //Make sure ticks are still on multiples of cycles
-    tickEvent.schedule(nextCycle(curTick + ticks(delay)));
+    schedule(tickEvent, nextCycle(curTick + ticks(delay)));
      _status = Running;
  }
  
@@ -270,12 +279,15 @@ AtomicSimpleCPU::suspendContext(int thread_num)
      assert(thread_num == 0);
      assert(thread);
  
+    if (_status == Idle)
+        return;
+
      assert(_status == Running);
  
      // tick event may not be scheduled if this gets called from inside
      // an instruction's execution, e.g. "quiesce"
      if (tickEvent.scheduled())
-        tickEvent.deschedule();
+        deschedule(tickEvent);
  
      notIdleFraction--;
      _status = Idle;
@@ -294,7 +306,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
      }
  
      //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
+    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int dataSize = sizeof(T);
  
@@ -313,12 +325,12 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
          req->setVirt(0, addr, dataSize, flags, thread->readPC());
  
          // translate to physical address
-        Fault fault = thread->translateDataReadReq(req);
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
  
          // Now do the access.
-        if (fault == NoFault) {
+        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
              Packet pkt = Packet(req,
-                    req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
+                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
                      Packet::Broadcast);
              pkt.dataStatic(dataPtr);
  
@@ -334,7 +346,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
  
              assert(!pkt.isError());
  
-            if (req->isLocked()) {
+            if (req->isLLSC()) {
                  TheISA::handleLockedRead(thread, req);
              }
          }
@@ -344,12 +356,25 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
              recordEvent("Uncached Read");
  
          //If there's a fault, return it
-        if (fault != NoFault)
-            return fault;
+        if (fault != NoFault) {
+            if (req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
+            }
+        }
+
          //If we don't need to access a second cache line, stop now.
          if (secondAddr <= addr)
          {
              data = gtoh(data);
+            if (traceData) {
+                traceData->setData(data);
+            }
+            if (req->isLocked() && fault == NoFault) {
+                assert(!locked);
+                locked = true;
+            }
              return fault;
          }
  
@@ -429,7 +454,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
      }
  
      //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
+    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int dataSize = sizeof(T);
  
@@ -448,14 +473,14 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
          req->setVirt(0, addr, dataSize, flags, thread->readPC());
  
          // translate to physical address
-        Fault fault = thread->translateDataWriteReq(req);
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
  
          // Now do the access.
          if (fault == NoFault) {
              MemCmd cmd = MemCmd::WriteReq; // default
              bool do_access = true;  // flag to suppress cache access
  
-            if (req->isLocked()) {
+            if (req->isLLSC()) {
                  cmd = MemCmd::StoreCondReq;
                  do_access = TheISA::handleLockedWrite(thread, req);
              } else if (req->isSwap()) {
@@ -466,7 +491,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
                  }
              }
  
-            if (do_access) {
+            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
                  Packet pkt = Packet(req, cmd, Packet::Broadcast);
                  pkt.dataStatic(dataPtr);
  
@@ -508,7 +533,18 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
              // If the write needs to have a fault on the access, consider
              // calling changeStatus() and changing it to "bad addr write"
              // or something.
-            return fault;
+            if (traceData) {
+                traceData->setData(gtoh(data));
+            }
+            if (req->isLocked() && fault == NoFault) {
+                assert(locked);
+                locked = false;
+            }
+            if (fault != NoFault && req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
+            }
          }
  
          /*
@@ -587,39 +623,54 @@ AtomicSimpleCPU::tick()
  {
      DPRINTF(SimpleCPU, "Tick\n");
  
-    Tick latency = ticks(1); // instruction takes one cycle by default
+    Tick latency = 0;
  
-    for (int i = 0; i < width; ++i) {
+    for (int i = 0; i < width || locked; ++i) {
          numCycles++;
  
          if (!curStaticInst || !curStaticInst->isDelayedCommit())
              checkForInterrupts();
  
-        Fault fault = setupFetchRequest(&ifetch_req);
+        checkPcEventQueue();
+
+        Fault fault = NoFault;
+
+        bool fromRom = isRomMicroPC(thread->readMicroPC());
+        if (!fromRom && !curMacroStaticInst) {
+            setupFetchRequest(&ifetch_req);
+            fault = thread->itb->translateAtomic(&ifetch_req, tc,
+                                                 BaseTLB::Execute);
+        }
  
          if (fault == NoFault) {
              Tick icache_latency = 0;
              bool icache_access = false;
              dcache_access = false; // assume no dcache access
  
-            //Fetch more instruction memory if necessary
-            //if(predecoder.needMoreBytes())
-            //{
-                icache_access = true;
-                Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
-                                           Packet::Broadcast);
-                ifetch_pkt.dataStatic(&inst);
-
-                if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
-                    icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
-                else
-                    icache_latency = icachePort.sendAtomic(&ifetch_pkt);
+            if (!fromRom && !curMacroStaticInst) {
+                // This is commented out because the predecoder would act like
+                // a tiny cache otherwise. It wouldn't be flushed when needed
+                // like the I cache. It should be flushed, and when that works
+                // this code should be uncommented.
+                //Fetch more instruction memory if necessary
+                //if(predecoder.needMoreBytes())
+                //{
+                    icache_access = true;
+                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
+                                               Packet::Broadcast);
+                    ifetch_pkt.dataStatic(&inst);
+
+                    if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
+                        icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
+                    else
+                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
  
-                assert(!ifetch_pkt.isError());
+                    assert(!ifetch_pkt.isError());
  
-                // ifetch_req is initialized to read the instruction directly
-                // into the CPU object's inst field.
-            //}
+                    // ifetch_req is initialized to read the instruction directly
+                    // into the CPU object's inst field.
+                //}
+            }
  
              preExecute();
  
@@ -643,16 +694,21 @@ AtomicSimpleCPU::tick()
                          curStaticInst->isFirstMicroop()))
                  instCnt++;
  
-            if (simulate_stalls) {
-                Tick icache_stall =
-                    icache_access ? icache_latency - ticks(1) : 0;
-                Tick dcache_stall =
-                    dcache_access ? dcache_latency - ticks(1) : 0;
-                Tick stall_cycles = (icache_stall + dcache_stall) / ticks(1);
-                if (ticks(stall_cycles) < (icache_stall + dcache_stall))
-                    latency += ticks(stall_cycles+1);
-                else
-                    latency += ticks(stall_cycles);
+            Tick stall_ticks = 0;
+            if (simulate_inst_stalls && icache_access)
+                stall_ticks += icache_latency;
+
+            if (simulate_data_stalls && dcache_access)
+                stall_ticks += dcache_latency;
+
+            if (stall_ticks) {
+                Tick stall_cycles = stall_ticks / ticks(1);
+                Tick aligned_stall_ticks = ticks(stall_cycles);
+
+                if (aligned_stall_ticks < stall_ticks)
+                    aligned_stall_ticks += 1;
+
+                latency += aligned_stall_ticks;
              }
  
          }
@@ -660,8 +716,19 @@ AtomicSimpleCPU::tick()
              advancePC(fault);
      }
  
+    // instruction takes at least one cycle
+    if (latency < ticks(1))
+        latency = ticks(1);
+
      if (_status != Idle)
-        tickEvent.schedule(curTick + latency);
+        schedule(tickEvent, curTick + latency);
+}
+
+
+void
+AtomicSimpleCPU::printAddr(Addr a)
+{
+    dcachePort.printAddr(a);
  }
  
  
@@ -672,38 +739,10 @@ AtomicSimpleCPU::tick()
  AtomicSimpleCPU *
  AtomicSimpleCPUParams::create()
  {
-    AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
-    params->name = name;
-    params->numberOfThreads = 1;
-    params->max_insts_any_thread = max_insts_any_thread;
-    params->max_insts_all_threads = max_insts_all_threads;
-    params->max_loads_any_thread = max_loads_any_thread;
-    params->max_loads_all_threads = max_loads_all_threads;
-    params->progress_interval = progress_interval;
-    params->deferRegistration = defer_registration;
-    params->phase = phase;
-    params->clock = clock;
-    params->functionTrace = function_trace;
-    params->functionTraceStart = function_trace_start;
-    params->width = width;
-    params->simulate_stalls = simulate_stalls;
-    params->system = system;
-    params->cpu_id = cpu_id;
-    params->tracer = tracer;
-
-    params->itb = itb;
-    params->dtb = dtb;
-#if FULL_SYSTEM
-    params->profile = profile;
-    params->do_quiesce = do_quiesce;
-    params->do_checkpoint_insts = do_checkpoint_insts;
-    params->do_statistics_insts = do_statistics_insts;
-#else
+    numThreads = 1;
+#if !FULL_SYSTEM
      if (workload.size() != 1)
          panic("only one workload allowed");
-    params->process = workload[0];
  #endif
-
-    AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
-    return cpu;
+    return new AtomicSimpleCPU(this);
  }