style: fix missing spaces in control statements

[gem5.git] / src / cpu / simple / atomic.cc
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc

index e63d998a7549c491da5c637605b4bed0b64c85c2..1eb219483f80f54078e82a99a6d486714737f7b4 100644 (file)
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -1,5 +1,6 @@
  /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright 2014 Google, Inc.
+ * Copyright (c) 2012-2013,2015 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -44,9 +45,11 @@
  #include "arch/mmapped_ipr.hh"
  #include "arch/utility.hh"
  #include "base/bigint.hh"
+#include "base/output.hh"
  #include "config/the_isa.hh"
  #include "cpu/simple/atomic.hh"
  #include "cpu/exetrace.hh"
+#include "debug/Drain.hh"
  #include "debug/ExecFaulting.hh"
  #include "debug/SimpleCPU.hh"
  #include "mem/packet.hh"
@@ -81,24 +84,12 @@ AtomicSimpleCPU::TickEvent::description() const
  void
  AtomicSimpleCPU::init()
  {
-    BaseCPU::init();
+    BaseSimpleCPU::init();
  
-    // Initialise the ThreadContext's memory proxies
-    tcBase()->initMemProxies(tcBase());
-
-    if (FullSystem && !params()->defer_registration) {
-        ThreadID size = threadContexts.size();
-        for (ThreadID i = 0; i < size; ++i) {
-            ThreadContext *tc = threadContexts[i];
-            // initialize CPU, including PC
-            TheISA::initCPU(tc, tc->contextId());
-        }
-    }
-
-    // Atomic doesn't do MT right now, so contextId == threadId
-    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    int cid = threadContexts[0]->contextId();
+    ifetch_req.setThreadContext(cid, 0);
+    data_read_req.setThreadContext(cid, 0);
+    data_write_req.setThreadContext(cid, 0);
  }
  
  AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -107,7 +98,8 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
        simulate_inst_stalls(p->simulate_inst_stalls),
        icachePort(name() + ".icache_port", this),
        dcachePort(name() + ".dcache_port", this),
-      fastmem(p->fastmem)
+      fastmem(p->fastmem), dcache_access(false), dcache_latency(0),
+      ppCommit(nullptr)
  {
      _status = Idle;
  }
@@ -120,107 +112,141 @@ AtomicSimpleCPU::~AtomicSimpleCPU()
      }
  }
  
-void
-AtomicSimpleCPU::serialize(ostream &os)
+DrainState
+AtomicSimpleCPU::drain()
  {
-    Drainable::State so_state(getDrainState());
-    SERIALIZE_ENUM(so_state);
-    SERIALIZE_SCALAR(locked);
-    BaseSimpleCPU::serialize(os);
-    nameOut(os, csprintf("%s.tickEvent", name()));
-    tickEvent.serialize(os);
+    if (switchedOut())
+        return DrainState::Drained;
+
+    if (!isDrained()) {
+        DPRINTF(Drain, "Requesting drain.\n");
+        return DrainState::Draining;
+    } else {
+        if (tickEvent.scheduled())
+            deschedule(tickEvent);
+
+        activeThreads.clear();
+        DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
+        return DrainState::Drained;
+    }
  }
  
  void
-AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
  {
-    Drainable::State so_state;
-    UNSERIALIZE_ENUM(so_state);
-    UNSERIALIZE_SCALAR(locked);
-    BaseSimpleCPU::unserialize(cp, section);
-    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
-}
+    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+            pkt->cmdString());
  
-unsigned int
-AtomicSimpleCPU::drain(DrainManager *drain_manager)
-{
-    setDrainState(Drainable::Drained);
-    return 0;
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (tid != sender) {
+            if (getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+                wakeup(tid);
+            }
+
+            TheISA::handleLockedSnoop(threadInfo[tid]->thread,
+                                      pkt, dcachePort.cacheBlockMask);
+        }
+    }
  }
  
  void
  AtomicSimpleCPU::drainResume()
  {
-    if (_status == Idle || _status == SwitchedOut)
+    assert(!tickEvent.scheduled());
+    if (switchedOut())
          return;
  
      DPRINTF(SimpleCPU, "Resume\n");
-    assert(system->getMemoryMode() == Enums::atomic);
+    verifyMemoryMode();
+
+    assert(!threadContexts.empty());
+
+    _status = BaseSimpleCPU::Idle;
  
-    setDrainState(Drainable::Running);
-    if (thread->status() == ThreadContext::Active) {
-        if (!tickEvent.scheduled())
-            schedule(tickEvent, nextCycle());
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
+            threadInfo[tid]->notIdleFraction = 1;
+            activeThreads.push_back(tid);
+            _status = BaseSimpleCPU::Running;
+
+            // Tick if any threads active
+            if (!tickEvent.scheduled()) {
+                schedule(tickEvent, nextCycle());
+            }
+        } else {
+            threadInfo[tid]->notIdleFraction = 0;
+        }
      }
-    system->totalNumInsts = 0;
  }
  
+bool
+AtomicSimpleCPU::tryCompleteDrain()
+{
+    if (drainState() != DrainState::Draining)
+        return false;
+
+    DPRINTF(Drain, "tryCompleteDrain.\n");
+    if (!isDrained())
+        return false;
+
+    DPRINTF(Drain, "CPU done draining, processing drain event\n");
+    signalDrainDone();
+
+    return true;
+}
+
+
  void
  AtomicSimpleCPU::switchOut()
  {
-    assert(_status == BaseSimpleCPU::Running || _status == Idle);
-    _status = SwitchedOut;
+    BaseSimpleCPU::switchOut();
  
-    tickEvent.squash();
+    assert(!tickEvent.scheduled());
+    assert(_status == BaseSimpleCPU::Running || _status == Idle);
+    assert(isDrained());
  }
  
  
  void
  AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
  {
-    BaseCPU::takeOverFrom(oldCPU);
+    BaseSimpleCPU::takeOverFrom(oldCPU);
  
+    // The tick event should have been descheduled by drain()
      assert(!tickEvent.scheduled());
+}
  
-    // if any of this CPU's ThreadContexts are active, mark the CPU as
-    // running and schedule its tick event.
-    ThreadID size = threadContexts.size();
-    for (ThreadID i = 0; i < size; ++i) {
-        ThreadContext *tc = threadContexts[i];
-        if (tc->status() == ThreadContext::Active &&
-            _status != BaseSimpleCPU::Running) {
-            _status = BaseSimpleCPU::Running;
-            schedule(tickEvent, nextCycle());
-            break;
-        }
-    }
-    if (_status != BaseSimpleCPU::Running) {
-        _status = Idle;
+void
+AtomicSimpleCPU::verifyMemoryMode() const
+{
+    if (!system->isAtomicMode()) {
+        fatal("The atomic CPU requires the memory system to be in "
+              "'atomic' mode.\n");
      }
-    assert(threadContexts.size() == 1);
-    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
-    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
-    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
  }
  
-
  void
-AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
+AtomicSimpleCPU::activateContext(ThreadID thread_num)
  {
-    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
+    DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
  
-    assert(thread_num == 0);
-    assert(thread);
+    assert(thread_num < numThreads);
  
-    assert(_status == Idle);
-    assert(!tickEvent.scheduled());
-
-    notIdleFraction++;
-    numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
+    threadInfo[thread_num]->notIdleFraction = 1;
+    Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
+                                 threadInfo[thread_num]->thread->lastSuspend);
+    numCycles += delta;
+    ppCycles->notify(delta);
  
-    //Make sure ticks are still on multiples of cycles
-    schedule(tickEvent, clockEdge(delay));
+    if (!tickEvent.scheduled()) {
+        //Make sure ticks are still on multiples of cycles
+        schedule(tickEvent, clockEdge(Cycles(0)));
+    }
      _status = BaseSimpleCPU::Running;
+    if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
+        == activeThreads.end()) {
+        activeThreads.push_back(thread_num);
+    }
  }
  
  
@@ -229,60 +255,114 @@ AtomicSimpleCPU::suspendContext(ThreadID thread_num)
  {
      DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
  
-    assert(thread_num == 0);
-    assert(thread);
+    assert(thread_num < numThreads);
+    activeThreads.remove(thread_num);
  
      if (_status == Idle)
          return;
  
      assert(_status == BaseSimpleCPU::Running);
  
-    // tick event may not be scheduled if this gets called from inside
-    // an instruction's execution, e.g. "quiesce"
-    if (tickEvent.scheduled())
-        deschedule(tickEvent);
+    threadInfo[thread_num]->notIdleFraction = 0;
+
+    if (activeThreads.empty()) {
+        _status = Idle;
+
+        if (tickEvent.scheduled()) {
+            deschedule(tickEvent);
+        }
+    }
  
-    notIdleFraction--;
-    _status = Idle;
  }
  
  
+Tick
+AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
+{
+    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+            pkt->cmdString());
+
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup(tid);
+        }
+    }
+
+    // if snoop invalidates, release any associated locks
+    if (pkt->isInvalidate()) {
+        DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
+                pkt->getAddr());
+        for (auto &t_info : cpu->threadInfo) {
+            TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+        }
+    }
+
+    return 0;
+}
+
+void
+AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
+{
+    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+            pkt->cmdString());
+
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup(tid);
+        }
+    }
+
+    // if snoop invalidates, release any associated locks
+    if (pkt->isInvalidate()) {
+        DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
+                pkt->getAddr());
+        for (auto &t_info : cpu->threadInfo) {
+            TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+        }
+    }
+}
+
  Fault
  AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
                           unsigned size, unsigned flags)
  {
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
      // use the CPU's statically allocated read request and packet objects
      Request *req = &data_read_req;
  
-    if (traceData) {
-        traceData->setAddr(addr);
-    }
+    if (traceData)
+        traceData->setMem(addr, size, flags);
  
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int fullSize = size;
  
      //The address of the second part of this access if it needs to be split
      //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
  
      if (secondAddr > addr)
          size = secondAddr - addr;
  
      dcache_latency = 0;
  
+    req->taskId(taskId());
      while (1) {
          req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
  
          // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
+        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                          BaseTLB::Read);
  
          // Now do the access.
          if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
-            Packet pkt = Packet(req,
-                                req->isLLSC() ? MemCmd::LoadLockedReq :
-                                MemCmd::ReadReq);
+            Packet pkt(req, Packet::makeReadCmd(req));
              pkt.dataStatic(data);
  
              if (req->isMmappedIpr())
@@ -314,10 +394,11 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
          //If we don't need to access a second cache line, stop now.
          if (secondAddr <= addr)
          {
-            if (req->isLocked() && fault == NoFault) {
+            if (req->isLockedRMW() && fault == NoFault) {
                  assert(!locked);
                  locked = true;
              }
+
              return fault;
          }
  
@@ -334,37 +415,52 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
      }
  }
  
+Fault
+AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size, unsigned flags)
+{
+    panic("initiateMemRead() is for timing accesses, and should "
+          "never be called on AtomicSimpleCPU.\n");
+}
  
  Fault
  AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
                            Addr addr, unsigned flags, uint64_t *res)
  {
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+    static uint8_t zero_array[64] = {};
+
+    if (data == NULL) {
+        assert(size <= 64);
+        assert(flags & Request::CACHE_BLOCK_ZERO);
+        // This must be a cache block cleaning request
+        data = zero_array;
+    }
+
      // use the CPU's statically allocated write request and packet objects
      Request *req = &data_write_req;
  
-    if (traceData) {
-        traceData->setAddr(addr);
-    }
+    if (traceData)
+        traceData->setMem(addr, size, flags);
  
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
      //The size of the data we're trying to read.
      int fullSize = size;
  
      //The address of the second part of this access if it needs to be split
      //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
  
-    if(secondAddr > addr)
+    if (secondAddr > addr)
          size = secondAddr - addr;
  
      dcache_latency = 0;
  
-    while(1) {
+    req->taskId(taskId());
+    while (1) {
          req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
  
          // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
+        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
  
          // Now do the access.
          if (fault == NoFault) {
@@ -373,7 +469,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
  
              if (req->isLLSC()) {
                  cmd = MemCmd::StoreCondReq;
-                do_access = TheISA::handleLockedWrite(thread, req);
+                do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
              } else if (req->isSwap()) {
                  cmd = MemCmd::SwapReq;
                  if (req->isCondSwap()) {
@@ -394,13 +490,16 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
                          system->getPhysMem().access(&pkt);
                      else
                          dcache_latency += dcachePort.sendAtomic(&pkt);
+
+                    // Notify other threads on this CPU of write
+                    threadSnoop(&pkt, curThread);
                  }
                  dcache_access = true;
                  assert(!pkt.isError());
  
                  if (req->isSwap()) {
                      assert(res);
-                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
+                    memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
                  }
              }
  
@@ -413,10 +512,12 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
          //stop now.
          if (fault != NoFault || secondAddr <= addr)
          {
-            if (req->isLocked() && fault == NoFault) {
+            if (req->isLockedRMW() && fault == NoFault) {
                  assert(locked);
                  locked = false;
              }
+
+
              if (fault != NoFault && req->isPrefetch()) {
                  return NoFault;
              } else {
@@ -443,18 +544,37 @@ AtomicSimpleCPU::tick()
  {
      DPRINTF(SimpleCPU, "Tick\n");
  
+    // Change thread if multi-threaded
+    swapActiveThread();
+
+    // Set memroy request ids to current thread
+    if (numThreads > 1) {
+        ContextID cid = threadContexts[curThread]->contextId();
+
+        ifetch_req.setThreadContext(cid, curThread);
+        data_read_req.setThreadContext(cid, curThread);
+        data_write_req.setThreadContext(cid, curThread);
+    }
+
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
      Tick latency = 0;
  
      for (int i = 0; i < width || locked; ++i) {
          numCycles++;
+        ppCycles->notify(1);
  
-        if (!curStaticInst || !curStaticInst->isDelayedCommit())
+        if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
              checkForInterrupts();
+            checkPcEventQueue();
+        }
  
-        checkPcEventQueue();
          // We must have just got suspended by a PC event
-        if (_status == Idle)
+        if (_status == Idle) {
+            tryCompleteDrain();
              return;
+        }
  
          Fault fault = NoFault;
  
@@ -463,8 +583,9 @@ AtomicSimpleCPU::tick()
          bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
                             !curMacroStaticInst;
          if (needToFetch) {
+            ifetch_req.taskId(taskId());
              setupFetchRequest(&ifetch_req);
-            fault = thread->itb->translateAtomic(&ifetch_req, tc,
+            fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(),
                                                   BaseTLB::Execute);
          }
  
@@ -479,7 +600,7 @@ AtomicSimpleCPU::tick()
                  // like the I cache. It should be flushed, and when that works
                  // this code should be uncommented.
                  //Fetch more instruction memory if necessary
-                //if(decoder.needMoreBytes())
+                //if (decoder.needMoreBytes())
                  //{
                      icache_access = true;
                      Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
@@ -500,11 +621,13 @@ AtomicSimpleCPU::tick()
              preExecute();
  
              if (curStaticInst) {
-                fault = curStaticInst->execute(this, traceData);
+                fault = curStaticInst->execute(&t_info, traceData);
  
                  // keep an instruction count
-                if (fault == NoFault)
+                if (fault == NoFault) {
                      countInst();
+                    ppCommit->notify(std::make_pair(thread, curStaticInst));
+                }
                  else if (traceData && !DTRACE(ExecFaulting)) {
                      delete traceData;
                      traceData = NULL;
@@ -534,18 +657,29 @@ AtomicSimpleCPU::tick()
              }
  
          }
-        if(fault != NoFault || !stayAtPC)
+        if (fault != NoFault || !t_info.stayAtPC)
              advancePC(fault);
      }
  
+    if (tryCompleteDrain())
+        return;
+
      // instruction takes at least one cycle
      if (latency < clockPeriod())
          latency = clockPeriod();
  
      if (_status != Idle)
-        schedule(tickEvent, curTick() + latency);
+        reschedule(tickEvent, curTick() + latency, true);
  }
  
+void
+AtomicSimpleCPU::regProbePoints()
+{
+    BaseCPU::regProbePoints();
+
+    ppCommit = new ProbePointArg<pair<SimpleThread*, const StaticInstPtr>>
+                                (getProbeManager(), "Commit");
+}
  
  void
  AtomicSimpleCPU::printAddr(Addr a)
@@ -553,7 +687,6 @@ AtomicSimpleCPU::printAddr(Addr a)
      dcachePort.printAddr(a);
  }
  
-
  ////////////////////////////////////////////////////////////////////////
  //
  //  AtomicSimpleCPU Simulation Object
@@ -561,8 +694,5 @@ AtomicSimpleCPU::printAddr(Addr a)
  AtomicSimpleCPU *
  AtomicSimpleCPUParams::create()
  {
-    numThreads = 1;
-    if (!FullSystem && workload.size() != 1)
-        panic("only one workload allowed");
      return new AtomicSimpleCPU(this);
  }