inorder: fix squash bug in branch predictor

[gem5.git] / src / cpu / simple / atomic.cc
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc

index 30f4a5c5ed3a72bd9526f96a4902ea82e6b317c7..05b4ca3e210bd3118364c498cef8add2ad08bc02 100644 (file)
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -32,6 +32,7 @@
  #include "arch/mmaped_ipr.hh"
  #include "arch/utility.hh"
  #include "base/bigint.hh"
+#include "config/the_isa.hh"
  #include "cpu/exetrace.hh"
  #include "cpu/simple/atomic.hh"
  #include "mem/packet.hh"
@@ -61,7 +62,7 @@ AtomicSimpleCPU::TickEvent::description() const
  }
  
  Port *
-AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
+AtomicSimpleCPU::getPort(const string &if_name, int idx)
  {
      if (if_name == "dcache_port")
          return &dcachePort;
@@ -79,13 +80,13 @@ void
  AtomicSimpleCPU::init()
  {
      BaseCPU::init();
-    cpuId = tc->readCpuId();
  #if FULL_SYSTEM
-    for (int i = 0; i < threadContexts.size(); ++i) {
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
          ThreadContext *tc = threadContexts[i];
  
          // initialize CPU, including PC
-        TheISA::initCPU(tc, cpuId);
+        TheISA::initCPU(tc, tc->contextId());
      }
  #endif
      if (hasPhysMemPort) {
@@ -94,9 +95,10 @@ AtomicSimpleCPU::init()
          physmemPort.getPeerAddressRanges(pmAddrList, snoop);
          physMemAddr = *pmAddrList.begin();
      }
-    ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
+    // Atomic doesn't do MT right now, so contextId == threadId
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
  }
  
  bool
@@ -153,7 +155,7 @@ AtomicSimpleCPU::DcachePort::setPeer(Port *port)
  }
  
  AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
-    : BaseSimpleCPU(p), tickEvent(this), width(p->width),
+    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
        simulate_data_stalls(p->simulate_data_stalls),
        simulate_inst_stalls(p->simulate_inst_stalls),
        icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
@@ -169,6 +171,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
  
  AtomicSimpleCPU::~AtomicSimpleCPU()
  {
+    if (tickEvent.scheduled()) {
+        deschedule(tickEvent);
+    }
  }
  
  void
@@ -176,6 +181,7 @@ AtomicSimpleCPU::serialize(ostream &os)
  {
      SimObject::State so_state = SimObject::getState();
      SERIALIZE_ENUM(so_state);
+    SERIALIZE_SCALAR(locked);
      BaseSimpleCPU::serialize(os);
      nameOut(os, csprintf("%s.tickEvent", name()));
      tickEvent.serialize(os);
@@ -186,6 +192,7 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
  {
      SimObject::State so_state;
      UNSERIALIZE_ENUM(so_state);
+    UNSERIALIZE_SCALAR(locked);
      BaseSimpleCPU::unserialize(cp, section);
      tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
  }
@@ -225,7 +232,8 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
  
      // if any of this CPU's ThreadContexts are active, mark the CPU as
      // running and schedule its tick event.
-    for (int i = 0; i < threadContexts.size(); ++i) {
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
          ThreadContext *tc = threadContexts[i];
          if (tc->status() == ThreadContext::Active && _status != Running) {
              _status = Running;
@@ -237,10 +245,9 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
          _status = Idle;
      }
      assert(threadContexts.size() == 1);
-    cpuId = tc->readCpuId();
-    ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
  }
  
  
@@ -272,6 +279,9 @@ AtomicSimpleCPU::suspendContext(int thread_num)
      assert(thread_num == 0);
      assert(thread);
  
+    if (_status == Idle)
+        return;
+
      assert(_status == Running);
  
      // tick event may not be scheduled if this gets called from inside
@@ -296,7 +306,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
      }
  
      //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
+    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int dataSize = sizeof(T);
  
@@ -315,12 +325,12 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
          req->setVirt(0, addr, dataSize, flags, thread->readPC());
  
          // translate to physical address
-        Fault fault = thread->translateDataReadReq(req);
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
  
          // Now do the access.
-        if (fault == NoFault) {
+        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
              Packet pkt = Packet(req,
-                    req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
+                    req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
                      Packet::Broadcast);
              pkt.dataStatic(dataPtr);
  
@@ -336,7 +346,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
  
              assert(!pkt.isError());
  
-            if (req->isLocked()) {
+            if (req->isLLSC()) {
                  TheISA::handleLockedRead(thread, req);
              }
          }
@@ -346,8 +356,14 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
              recordEvent("Uncached Read");
  
          //If there's a fault, return it
-        if (fault != NoFault)
-            return fault;
+        if (fault != NoFault) {
+            if (req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
+            }
+        }
+
          //If we don't need to access a second cache line, stop now.
          if (secondAddr <= addr)
          {
@@ -355,6 +371,10 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
              if (traceData) {
                  traceData->setData(data);
              }
+            if (req->isLocked() && fault == NoFault) {
+                assert(!locked);
+                locked = true;
+            }
              return fault;
          }
  
@@ -371,61 +391,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
      }
  }
  
-Fault
-AtomicSimpleCPU::translateDataReadAddr(Addr vaddr, Addr & paddr,
-        int size, unsigned flags)
-{
-    // use the CPU's statically allocated read request and packet objects
-    Request *req = &data_read_req;
-
-    if (traceData) {
-        traceData->setAddr(vaddr);
-    }
-
-    //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
-    //The size of the data we're trying to read.
-    int dataSize = size;
-
-    bool firstTimeThrough = true;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(vaddr + dataSize - 1, blockSize);
-
-    if(secondAddr > vaddr)
-        dataSize = secondAddr - vaddr;
-
-    while(1) {
-        req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
-
-        // translate to physical address
-        Fault fault = thread->translateDataReadReq(req);
-
-        //If there's a fault, return it
-        if (fault != NoFault)
-            return fault;
-
-        if (firstTimeThrough) {
-            paddr = req->getPaddr();
-            firstTimeThrough = false;
-        }
-
-        //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= vaddr)
-            return fault;
-
-        /*
-         * Set up for accessing the second cache line.
-         */
-
-        //Adjust the size to get the remaining bytes.
-        dataSize = vaddr + size - secondAddr;
-        //And access the right address.
-        vaddr = secondAddr;
-    }
-}
-
  #ifndef DOXYGEN_SHOULD_SKIP_THIS
  
  template
@@ -489,7 +454,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
      }
  
      //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
+    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int dataSize = sizeof(T);
  
@@ -508,14 +473,14 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
          req->setVirt(0, addr, dataSize, flags, thread->readPC());
  
          // translate to physical address
-        Fault fault = thread->translateDataWriteReq(req);
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
  
          // Now do the access.
          if (fault == NoFault) {
              MemCmd cmd = MemCmd::WriteReq; // default
              bool do_access = true;  // flag to suppress cache access
  
-            if (req->isLocked()) {
+            if (req->isLLSC()) {
                  cmd = MemCmd::StoreCondReq;
                  do_access = TheISA::handleLockedWrite(thread, req);
              } else if (req->isSwap()) {
@@ -526,7 +491,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
                  }
              }
  
-            if (do_access) {
+            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
                  Packet pkt = Packet(req, cmd, Packet::Broadcast);
                  pkt.dataStatic(dataPtr);
  
@@ -569,9 +534,17 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
              // calling changeStatus() and changing it to "bad addr write"
              // or something.
              if (traceData) {
-                traceData->setData(data);
+                traceData->setData(gtoh(data));
+            }
+            if (req->isLocked() && fault == NoFault) {
+                assert(locked);
+                locked = false;
+            }
+            if (fault != NoFault && req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
              }
-            return fault;
          }
  
          /*
@@ -587,64 +560,6 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
      }
  }
  
-Fault
-AtomicSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
-        int size, unsigned flags)
-{
-    // use the CPU's statically allocated write request and packet objects
-    Request *req = &data_write_req;
-
-    if (traceData) {
-        traceData->setAddr(vaddr);
-    }
-
-    //The block size of our peer.
-    int blockSize = dcachePort.peerBlockSize();
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(vaddr + size - 1, blockSize);
-
-    //The size of the data we're trying to read.
-    int dataSize = size;
-
-    bool firstTimeThrough = true;
-
-    if(secondAddr > vaddr)
-        dataSize = secondAddr - vaddr;
-
-    dcache_latency = 0;
-
-    while(1) {
-        req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
-
-        // translate to physical address
-        Fault fault = thread->translateDataWriteReq(req);
-
-        //If there's a fault or we don't need to access a second cache line,
-        //stop now.
-        if (fault != NoFault)
-            return fault;
-
-        if (firstTimeThrough) {
-            paddr = req->getPaddr();
-            firstTimeThrough = false;
-        }
-
-        if (secondAddr <= vaddr)
-            return fault;
-
-        /*
-         * Set up for accessing the second cache line.
-         */
-
-        //Adjust the size to get the remaining bytes.
-        dataSize = vaddr + size - secondAddr;
-        //And access the right address.
-        vaddr = secondAddr;
-    }
-}
-
  
  #ifndef DOXYGEN_SHOULD_SKIP_THIS
  
@@ -710,7 +625,7 @@ AtomicSimpleCPU::tick()
  
      Tick latency = 0;
  
-    for (int i = 0; i < width; ++i) {
+    for (int i = 0; i < width || locked; ++i) {
          numCycles++;
  
          if (!curStaticInst || !curStaticInst->isDelayedCommit())
@@ -721,15 +636,18 @@ AtomicSimpleCPU::tick()
          Fault fault = NoFault;
  
          bool fromRom = isRomMicroPC(thread->readMicroPC());
-        if (!fromRom)
-            fault = setupFetchRequest(&ifetch_req);
+        if (!fromRom && !curMacroStaticInst) {
+            setupFetchRequest(&ifetch_req);
+            fault = thread->itb->translateAtomic(&ifetch_req, tc,
+                                                 BaseTLB::Execute);
+        }
  
          if (fault == NoFault) {
              Tick icache_latency = 0;
              bool icache_access = false;
              dcache_access = false; // assume no dcache access
  
-            if (!fromRom) {
+            if (!fromRom && !curMacroStaticInst) {
                  // This is commented out because the predecoder would act like
                  // a tiny cache otherwise. It wouldn't be flushed when needed
                  // like the I cache. It should be flushed, and when that works