cpu: support atomic memory request type with AtomicOpFunctor

author Tuan Ta <qtt2@cornell.edu>

Mon, 22 Jan 2018 18:12:50 +0000 (13:12 -0500)

committer Tuan Ta <qtt2@cornell.edu>

Fri, 8 Feb 2019 15:27:04 +0000 (15:27 +0000)
author Tuan Ta <qtt2@cornell.edu>
Mon, 22 Jan 2018 18:12:50 +0000 (13:12 -0500)
committer Tuan Ta <qtt2@cornell.edu>
Fri, 8 Feb 2019 15:27:04 +0000 (15:27 +0000)
diff --git a/src/cpu/base.cc b/src/cpu/base.cc

index 878e655512842107d1a6cb2cbe3dcc29f716edd7..30f6baf20cc51d101e1ad01858fd93e1cbc81e02 100644 (file)
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -409,7 +409,7 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst)
      if (inst->isLoad())
          ppRetiredLoads->notify(1);
  
-    if (inst->isStore())
+    if (inst->isStore() || inst->isAtomic())
          ppRetiredStores->notify(1);
  
      if (inst->isControl())
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh

index c24517937d4f5e00d775d9343b00a711bc47346d..9a1ab062c6fc0c1a7476afe8a8a5f68810f85e49 100644 (file)
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -303,6 +303,9 @@ class BaseDynInst : public ExecContext, public RefCounted
      Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                     Request::Flags flags, uint64_t *res);
  
+    Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
+                         AtomicOpFunctor *amo_op);
+
      /** True if the DTB address translation has started. */
      bool translationStarted() const { return instFlags[TranslationStarted]; }
      void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
@@ -920,4 +923,20 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
              /* st */ false, data, size, addr, flags, res);
  }
  
+template<class Impl>
+Fault
+BaseDynInst<Impl>::initiateMemAMO(Addr addr, unsigned size,
+                                  Request::Flags flags,
+                                  AtomicOpFunctor *amo_op)
+{
+    // atomic memory instructions do not have data to be written to memory yet
+    // since the atomic operations will be executed directly in cache/memory.
+    // Therefore, its `data` field is nullptr.
+    // Atomic memory requests need to carry their `amo_op` fields to cache/
+    // memory
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* atomic */ false, nullptr, size, addr, flags, nullptr, amo_op);
+}
+
  #endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh

index e32c015bfe84e10f62731271265f5d9435ce3c81..5f830d7a9aec8c0906a4204dafbe0dc013d76111 100644 (file)
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -536,9 +536,16 @@ class CheckerCPU : public BaseCPU, public ExecContext
  
      Fault readMem(Addr addr, uint8_t *data, unsigned size,
                    Request::Flags flags) override;
+
      Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                     Request::Flags flags, uint64_t *res) override;
  
+    Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+                 Request::Flags flags, AtomicOpFunctor *amo_op) override
+    {
+        panic("AMO is not supported yet in CPU checker\n");
+    }
+
      unsigned int
      readStCondFailures() const override {
          return thread->readStCondFailures();
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh

index 1c1c8956a2f40b60bbbf4fc1e206028208672e5d..d46cc1315f86f07c586503d75bb714f4311a4f0a 100644 (file)
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -260,6 +260,28 @@ class ExecContext {
      virtual Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
                             Request::Flags flags, uint64_t *res) = 0;
  
+    /**
+     * For atomic-mode contexts, perform an atomic AMO (a.k.a., Atomic
+     * Read-Modify-Write Memory Operation)
+     */
+    virtual Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
+                         Request::Flags flags,
+                         AtomicOpFunctor *amo_op)
+    {
+        panic("ExecContext::amoMem() should be overridden\n");
+    }
+
+    /**
+     * For timing-mode contexts, initiate an atomic AMO (atomic
+     * read-modify-write memory operation)
+     */
+    virtual Fault initiateMemAMO(Addr addr, unsigned int size,
+                                 Request::Flags flags,
+                                 AtomicOpFunctor *amo_op)
+    {
+        panic("ExecContext::initiateMemAMO() should be overridden\n");
+    }
+
      /**
       * Sets the number of consecutive store conditional failures.
       */
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh

index 179883ecc9d38e8a95f1168e27668221b0e19f31..02b3dae1c1f0dd99e5ad4db464b420097dfd0c49 100644 (file)
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -108,7 +108,7 @@ class ExecContext : public ::ExecContext
                      Request::Flags flags) override
      {
          execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
-            size, addr, flags, NULL);
+            size, addr, flags, NULL, nullptr);
          return NoFault;
      }
  
@@ -117,7 +117,17 @@ class ExecContext : public ::ExecContext
               Request::Flags flags, uint64_t *res) override
      {
          execute.getLSQ().pushRequest(inst, false /* store */, data,
-            size, addr, flags, res);
+            size, addr, flags, res, nullptr);
+        return NoFault;
+    }
+
+    Fault
+    initiateMemAMO(Addr addr, unsigned int size, Request::Flags flags,
+                   AtomicOpFunctor *amo_op) override
+    {
+        // AMO requests are pushed through the store path
+        execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
+            size, addr, flags, nullptr, amo_op);
          return NoFault;
      }
  
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc

index 234a233c2ec6b73952a105be6c099feac0d37898..6a418202f31bd0092ffde4229340189ee7fc5eea 100644 (file)
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -337,6 +337,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
  
      bool is_load = inst->staticInst->isLoad();
      bool is_store = inst->staticInst->isStore();
+    bool is_atomic = inst->staticInst->isAtomic();
      bool is_prefetch = inst->staticInst->isDataPrefetch();
  
      /* If true, the trace's predicate value will be taken from the exec
@@ -368,7 +369,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
              *inst);
  
          fatal("Received error response packet for inst: %s\n", *inst);
-    } else if (is_store || is_load || is_prefetch) {
+    } else if (is_store || is_load || is_prefetch || is_atomic) {
          assert(packet);
  
          DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n",
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc

index 180890147d2a385737e825ac7fc1561b32d5447c..9347e4ccbe4ed3d0c99e8eb51dfc100cf5b35f36 100644 (file)
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -421,6 +421,8 @@ Fetch2::evaluate()
                          loadInstructions++;
                      else if (decoded_inst->isStore())
                          storeInstructions++;
+                    else if (decoded_inst->isAtomic())
+                        amoInstructions++;
                      else if (decoded_inst->isVector())
                          vecInstructions++;
                      else if (decoded_inst->isFloating())
@@ -636,6 +638,11 @@ Fetch2::regStats()
          .name(name() + ".store_instructions")
          .desc("Number of memory store instructions successfully decoded")
          .flags(total);
+
+    amoInstructions
+        .name(name() + ".amo_instructions")
+        .desc("Number of memory atomic instructions successfully decoded")
+        .flags(total);
  }
  
  void
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh

index 2230560f164335bfe1218ff635403e92a645bb64..114dec0f553b4d0c2a030d048a1d5edb4e91ac2b 100644 (file)
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -171,6 +171,7 @@ class Fetch2 : public Named
      Stats::Scalar vecInstructions;
      Stats::Scalar loadInstructions;
      Stats::Scalar storeInstructions;
+    Stats::Scalar amoInstructions;
  
    public:
      /** Dump the whole contents of the input buffer.  Useful after a
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc

index b836ed22d916b2ffa7b26b2f6d8c42249f9ae317..6fe6c3738f93e65e71b83978e532b86fee3ce6a9 100644 (file)
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -676,9 +676,9 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
      while (ret == NoAddrRangeCoverage && i != slots.rend()) {
          LSQRequestPtr slot = *i;
  
-        /* Cache maintenance instructions go down via the store path *
-         * but they carry no data and they shouldn't be considered for
-         * forwarding */
+        /* Cache maintenance instructions go down via the store path but
+         * they carry no data and they shouldn't be considered
+         * for forwarding */
          if (slot->packet &&
              slot->inst->id.threadId == request->inst->id.threadId &&
              !slot->packet->req->isCacheMaintenance()) {
@@ -931,8 +931,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
      bool is_load = request->isLoad;
      bool is_llsc = request->request->isLLSC();
      bool is_swap = request->request->isSwap();
+    bool is_atomic = request->request->isAtomic();
      bool bufferable = !(request->request->isStrictlyOrdered() ||
-        is_llsc || is_swap);
+                        is_llsc || is_swap || is_atomic);
  
      if (is_load) {
          if (numStoresInTransfers != 0) {
@@ -965,9 +966,16 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
          if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
              NoAddrRangeCoverage)
          {
+            // There's at least another request that targets the same
+            // address and is staying in the storeBuffer. Since our
+            // request is non-bufferable (e.g., strictly ordered or atomic),
+            // we must wait for the other request in the storeBuffer to
+            // complete before we can issue this non-bufferable request.
+            // This is to make sure that the order they access the cache is
+            // correct.
              DPRINTF(MinorMem, "Memory access can receive forwarded data"
-                " from the store buffer, need to wait for store buffer to"
-                " drain\n");
+                " from the store buffer, but need to wait for store buffer"
+                " to drain\n");
              return;
          }
      }
@@ -1469,9 +1477,21 @@ LSQ::needsToTick()
  void
  LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                   unsigned int size, Addr addr, Request::Flags flags,
-                 uint64_t *res)
+                 uint64_t *res, AtomicOpFunctor *amo_op)
  {
      bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
+
+    if (needs_burst && inst->staticInst->isAtomic()) {
+        // AMO requests that access across a cache line boundary are not
+        // allowed since the cache does not guarantee AMO ops to be executed
+        // atomically in two cache lines
+        // For ISAs such as x86 that requires AMO operations to work on
+        // accesses that cross cache-line boundaries, the cache needs to be
+        // modified to support locking both cache lines to guarantee the
+        // atomicity.
+        panic("Do not expect cross-cache-line atomic memory request\n");
+    }
+
      LSQRequestPtr request;
  
      /* Copy given data into the request.  The request will pass this to the
@@ -1480,15 +1500,16 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
  
      DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
          " 0x%x%s lineWidth : 0x%x\n",
-        (isLoad ? "load" : "store"), addr, size, flags,
+        (isLoad ? "load" : "store/atomic"), addr, size, flags,
              (needs_burst ? " (needs burst)" : ""), lineWidth);
  
      if (!isLoad) {
-        /* request_data becomes the property of a ...DataRequest (see below)
+        /* Request_data becomes the property of a ...DataRequest (see below)
           *  and destroyed by its destructor */
          request_data = new uint8_t[size];
-        if (flags & Request::STORE_NO_DATA) {
-            /* For cache zeroing, just use zeroed data */
+        if (inst->staticInst->isAtomic() ||
+            (flags & Request::STORE_NO_DATA)) {
+            /* For atomic or store-no-data, just use zeroed data */
              std::memset(request_data, 0, size);
          } else {
              std::memcpy(request_data, data, size);
@@ -1511,7 +1532,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
      request->request->setVirt(0 /* asid */,
          addr, size, flags, cpu.dataMasterId(),
          /* I've no idea why we need the PC, but give it */
-        inst->pc.instAddr());
+        inst->pc.instAddr(), amo_op);
  
      requests.push(request);
      request->startAddrTranslation();
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh

index da873b4acaf83bc23698c7151ffb56cb70f37978..11fa8774f2e4545e623a422e753f4dfe25d78989 100644 (file)
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -696,11 +696,11 @@ class LSQ : public Named
      void completeMemBarrierInst(MinorDynInstPtr inst,
          bool committed);
  
-    /** Single interface for readMem/writeMem to issue requests into
+    /** Single interface for readMem/writeMem/amoMem to issue requests into
       *  the LSQ */
      void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,
-                     uint64_t *res);
+                     uint64_t *res, AtomicOpFunctor *amo_op);
  
      /** Push a predicate failed-representing request into the queues just
       *  to maintain commit order */
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh

index 4e32f865d959462e631bcf2fb287a04ed616dd10..e624557c8663a643d7bd04d0c7b12911d143f598 100644 (file)
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -498,6 +498,8 @@ class DefaultCommit
      Stats::Vector statComRefs;
      /** Stat for the total number of committed loads. */
      Stats::Vector statComLoads;
+    /** Stat for the total number of committed atomics. */
+    Stats::Vector statComAmos;
      /** Total number of committed memory barriers. */
      Stats::Vector statComMembars;
      /** Total number of committed branches. */
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh

index 2891ce331429623882bf19013f9a6d55e5d4d41c..ec3d61050563320e3e91a67b3969de35185b9f32 100644 (file)
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -208,6 +208,13 @@ DefaultCommit<Impl>::regStats()
          .flags(total)
          ;
  
+    statComAmos
+        .init(cpu->numThreads)
+        .name(name() +  ".amos")
+        .desc("Number of atomic instructions committed")
+        .flags(total)
+        ;
+
      statComMembars
          .init(cpu->numThreads)
          .name(name() +  ".membars")
@@ -1158,8 +1165,9 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
          // Make sure we are only trying to commit un-executed instructions we
          // think are possible.
          assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional()
-               || head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-               (head_inst->isLoad() && head_inst->strictlyOrdered()));
+               || head_inst->isMemBarrier() || head_inst->isWriteBarrier()
+               || head_inst->isAtomic()
+               || (head_inst->isLoad() && head_inst->strictlyOrdered()));
  
          DPRINTF(Commit, "Encountered a barrier or non-speculative "
                  "instruction [sn:%lli] at the head of the ROB, PC %s.\n",
@@ -1306,7 +1314,7 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
  #endif
  
      // If this was a store, record it for this cycle.
-    if (head_inst->isStore())
+    if (head_inst->isStore() || head_inst->isAtomic())
          committedStores[tid] = true;
  
      // Return true to indicate that we have committed an instruction.
@@ -1399,6 +1407,10 @@ DefaultCommit<Impl>::updateComInstStats(const DynInstPtr &inst)
          if (inst->isLoad()) {
              statComLoads[tid]++;
          }
+
+        if (inst->isAtomic()) {
+            statComAmos[tid]++;
+        }
      }
  
      if (inst->isMemBarrier()) {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh

index ec6be657afe71d89dc4814bea44628034e9c06d0..21cae444b141e54455826f4af3941dbd10f7b473 100644 (file)
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -793,10 +793,10 @@ class FullO3CPU : public BaseO3CPU
      /** CPU pushRequest function, forwards request to LSQ. */
      Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res)
+                      uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
      {
          return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
-                flags, res);
+                flags, res, amo_op);
      }
  
      /** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh

index 2513896315ee68f1c698984504cc0bb1a220e371..6434ec8c36c2b3e4035fffec243eabd9e9d2efbb 100644 (file)
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -479,7 +479,8 @@ DefaultIEW<Impl>::squash(ThreadID tid)
          if (skidBuffer[tid].front()->isLoad()) {
              toRename->iewInfo[tid].dispatchedToLQ++;
          }
-        if (skidBuffer[tid].front()->isStore()) {
+        if (skidBuffer[tid].front()->isStore() ||
+            skidBuffer[tid].front()->isAtomic()) {
              toRename->iewInfo[tid].dispatchedToSQ++;
          }
  
@@ -862,7 +863,8 @@ DefaultIEW<Impl>::emptyRenameInsts(ThreadID tid)
          if (insts[tid].front()->isLoad()) {
              toRename->iewInfo[tid].dispatchedToLQ++;
          }
-        if (insts[tid].front()->isStore()) {
+        if (insts[tid].front()->isStore() ||
+            insts[tid].front()->isAtomic()) {
              toRename->iewInfo[tid].dispatchedToSQ++;
          }
  
@@ -1004,7 +1006,7 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
              if (inst->isLoad()) {
                  toRename->iewInfo[tid].dispatchedToLQ++;
              }
-            if (inst->isStore()) {
+            if (inst->isStore() || inst->isAtomic()) {
                  toRename->iewInfo[tid].dispatchedToSQ++;
              }
  
@@ -1030,7 +1032,8 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
          }
  
          // Check LSQ if inst is LD/ST
-        if ((inst->isLoad() && ldstQueue.lqFull(tid)) ||
+        if ((inst->isAtomic() && ldstQueue.sqFull(tid)) ||
+            (inst->isLoad() && ldstQueue.lqFull(tid)) ||
              (inst->isStore() && ldstQueue.sqFull(tid))) {
              DPRINTF(IEW, "[tid:%i]: Issue: %s has become full.\n",tid,
                      inst->isLoad() ? "LQ" : "SQ");
@@ -1048,7 +1051,25 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
          }
  
          // Otherwise issue the instruction just fine.
-        if (inst->isLoad()) {
+        if (inst->isAtomic()) {
+            DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
+                    "encountered, adding to LSQ.\n", tid);
+
+            ldstQueue.insertStore(inst);
+
+            ++iewDispStoreInsts;
+
+            // AMOs need to be set as "canCommit()"
+            // so that commit can process them when they reach the
+            // head of commit.
+            inst->setCanCommit();
+            instQueue.insertNonSpec(inst);
+            add_to_iq = false;
+
+            ++iewDispNonSpecInsts;
+
+            toRename->iewInfo[tid].dispatchedToSQ++;
+        } else if (inst->isLoad()) {
              DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
                      "encountered, adding to LSQ.\n", tid);
  
@@ -1243,7 +1264,20 @@ DefaultIEW<Impl>::executeInsts()
                      "reference.\n");
  
              // Tell the LDSTQ to execute this instruction (if it is a load).
-            if (inst->isLoad()) {
+            if (inst->isAtomic()) {
+                // AMOs are treated like store requests
+                fault = ldstQueue.executeStore(inst);
+
+                if (inst->isTranslationDelayed() &&
+                    fault == NoFault) {
+                    // A hw page table walk is currently going on; the
+                    // instruction must be deferred.
+                    DPRINTF(IEW, "Execute: Delayed translation, deferring "
+                            "store.\n");
+                    instQueue.deferMemInst(inst);
+                    continue;
+                }
+            } else if (inst->isLoad()) {
                  // Loads will mark themselves as executed, and their writeback
                  // event adds the instruction to the queue to commit
                  fault = ldstQueue.executeLoad(inst);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh

index ddd7b6d5fd5b63514117402ad52690be32fe51b0..aa12297d634c99ed0b2c0c26afc82ed789da9491 100644 (file)
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1251,13 +1251,15 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
  
              bool is_acq_rel = squashed_inst->isMemBarrier() &&
                           (squashed_inst->isLoad() ||
-                           (squashed_inst->isStore() &&
+                          squashed_inst->isAtomic() ||
+                          (squashed_inst->isStore() &&
                               !squashed_inst->isStoreConditional()));
  
              // Remove the instruction from the dependency list.
              if (is_acq_rel ||
                  (!squashed_inst->isNonSpeculative() &&
                   !squashed_inst->isStoreConditional() &&
+                 !squashed_inst->isAtomic() &&
                   !squashed_inst->isMemBarrier() &&
                   !squashed_inst->isWriteBarrier())) {
  
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh

index 81b7c04a5f5c4e89a0bda02d88569a2b4461b9e1..f576dd3f4e9294f940d01b394bc2a035bf582130 100644 (file)
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -191,7 +191,7 @@ class LSQ
          enum Flag : FlagsStorage
          {
              IsLoad              = 0x00000001,
-            /** True if this is a store that writes registers (SC). */
+            /** True if this is a store/atomic that writes registers (SC). */
              WbStore             = 0x00000002,
              Delayed             = 0x00000004,
              IsSplit             = 0x00000008,
@@ -211,7 +211,9 @@ class LSQ
              LSQEntryFreed       = 0x00000800,
              /** Store written back. */
              WritebackScheduled  = 0x00001000,
-            WritebackDone       = 0x00002000
+            WritebackDone       = 0x00002000,
+            /** True if this is an atomic request */
+            IsAtomic            = 0x00004000
          };
          FlagsType flags;
  
@@ -250,32 +252,39 @@ class LSQ
          const uint32_t _size;
          const Request::Flags _flags;
          uint32_t _numOutstandingPackets;
+        AtomicOpFunctor *_amo_op;
        protected:
          LSQUnit* lsqUnit() { return &_port; }
          LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
              _state(State::NotIssued), _senderState(nullptr),
              _port(*port), _inst(inst), _data(nullptr),
              _res(nullptr), _addr(0), _size(0), _flags(0),
-            _numOutstandingPackets(0)
+            _numOutstandingPackets(0), _amo_op(nullptr)
          {
              flags.set(Flag::IsLoad, isLoad);
-            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            flags.set(Flag::WbStore,
+                      _inst->isStoreConditional() || _inst->isAtomic());
+            flags.set(Flag::IsAtomic, _inst->isAtomic());
              install();
          }
          LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                     const Addr& addr, const uint32_t& size,
                     const Request::Flags& flags_,
-                   PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+                   PacketDataPtr data = nullptr, uint64_t* res = nullptr,
+                   AtomicOpFunctor* amo_op = nullptr)
              : _state(State::NotIssued), _senderState(nullptr),
              numTranslatedFragments(0),
              numInTranslationFragments(0),
              _port(*port), _inst(inst), _data(data),
              _res(res), _addr(addr), _size(size),
              _flags(flags_),
-            _numOutstandingPackets(0)
+            _numOutstandingPackets(0),
+            _amo_op(amo_op)
          {
              flags.set(Flag::IsLoad, isLoad);
-            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            flags.set(Flag::WbStore,
+                      _inst->isStoreConditional() || _inst->isAtomic());
+            flags.set(Flag::IsAtomic, _inst->isAtomic());
              install();
          }
  
@@ -285,12 +294,20 @@ class LSQ
              return flags.isSet(Flag::IsLoad);
          }
  
+        bool
+        isAtomic() const
+        {
+            return flags.isSet(Flag::IsAtomic);
+        }
+
          /** Install the request in the LQ/SQ. */
          void install()
          {
              if (isLoad()) {
                  _port.loadQueue[_inst->lqIdx].setRequest(this);
              } else {
+                // Store, StoreConditional, and Atomic requests are pushed
+                // to this storeQueue
                  _port.storeQueue[_inst->sqIdx].setRequest(this);
              }
          }
@@ -609,17 +626,21 @@ class LSQ
          using LSQRequest::numInTranslationFragments;
          using LSQRequest::numTranslatedFragments;
          using LSQRequest::_numOutstandingPackets;
+        using LSQRequest::_amo_op;
        public:
          SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                            const Addr& addr, const uint32_t& size,
                            const Request::Flags& flags_,
                            PacketDataPtr data = nullptr,
-                          uint64_t* res = nullptr) :
-            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+                          uint64_t* res = nullptr,
+                          AtomicOpFunctor* amo_op = nullptr) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+                       amo_op)
          {
              LSQRequest::_requests.push_back(
-                std::make_shared<Request>(inst->getASID(), addr, size, flags_,
-                    inst->masterId(), inst->instAddr(), inst->contextId()));
+                    std::make_shared<Request>(inst->getASID(), addr, size,
+                    flags_, inst->masterId(), inst->instAddr(),
+                    inst->contextId(), amo_op));
              LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
          }
          inline virtual ~SingleDataRequest() {}
@@ -928,7 +949,7 @@ class LSQ
  
      Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res);
+                      uint64_t *res, AtomicOpFunctor *amo_op);
  
      /** The CPU pointer. */
      O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh

index 8a221a8d5e7ed1469b31666d0d6fb92c0fd64dea..abe751c88ac062936a295a92165639f54fb7dfc2 100644 (file)
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -680,13 +680,26 @@ template<class Impl>
  Fault
  LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                         unsigned int size, Addr addr, Request::Flags flags,
-                       uint64_t *res)
+                       uint64_t *res, AtomicOpFunctor *amo_op)
  {
+    // This comming request can be either load, store or atomic.
+    // Atomic request has a corresponding pointer to its atomic memory
+    // operation
+    bool isAtomic = !isLoad && amo_op;
+
      ThreadID tid = cpu->contextToThread(inst->contextId());
      auto cacheLineSize = cpu->cacheLineSize();
      bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
      LSQRequest* req = nullptr;
  
+    // Atomic requests that access data across cache line boundary are
+    // currently not allowed since the cache does not guarantee corresponding
+    // atomic memory operations to be executed atomically across a cache line.
+    // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+    // the cache needs to be modified to perform atomic update to both cache
+    // lines. For now, such cross-line update is not supported.
+    assert(!isAtomic || (isAtomic && !needs_burst));
+
      if (inst->translationStarted()) {
          req = inst->savedReq;
          assert(req);
@@ -696,7 +709,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                      size, flags, data, res);
          } else {
              req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
-                    size, flags, data, res);
+                    size, flags, data, res, amo_op);
          }
          assert(req);
          inst->setRequest();
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh

index 5b90da4f509346f6f5464adf3e79bbf93a9570e8..3be67bec4c255682dd2a6a3bc8ce4fdc0542650f 100644 (file)
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -702,10 +702,12 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
              bool lower_load_has_store_part = req_s < st_e;
              bool upper_load_has_store_part = req_e > st_s;
  
-            // If the store's data has all of the data needed and the load
-            // isn't LLSC then
-            // we can forward.
-            if (store_has_lower_limit && store_has_upper_limit &&
+            // If the store entry is not atomic (atomic does not have valid
+            // data), the store has all of the data needed, and
+            // the load is not LLSC, then
+            // we can forward data from the store to the load
+            if (!store_it->instruction()->isAtomic() &&
+                store_has_lower_limit && store_has_upper_limit &&
                  !req->mainRequest()->isLLSC()) {
  
                  // Get shift amount for offset into the store's data.
@@ -755,17 +757,22 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
  
                  return NoFault;
              } else if (
+                // This is the partial store-load forwarding case where a store
+                // has only part of the load's data and the load isn't LLSC
                  (!req->mainRequest()->isLLSC() &&
                   ((store_has_lower_limit && lower_load_has_store_part) ||
                    (store_has_upper_limit && upper_load_has_store_part) ||
                    (lower_load_has_store_part && upper_load_has_store_part))) ||
+                // The load is LLSC, and the store has all or part of the
+                // load's data
                  (req->mainRequest()->isLLSC() &&
                   ((store_has_lower_limit || upper_load_has_store_part) &&
-                  (store_has_upper_limit || lower_load_has_store_part)))) {
-                // This is the partial store-load forwarding case where a store
-                // has only part of the load's data and the load isn't LLSC or
-                // the load is LLSC and the store has all or part of the load's
+                  (store_has_upper_limit || lower_load_has_store_part))) ||
+                // The store entry is atomic and has all or part of the load's
                  // data
+                (store_it->instruction()->isAtomic() &&
+                 ((store_has_lower_limit || upper_load_has_store_part) &&
+                  (store_has_upper_limit || lower_load_has_store_part)))) {
  
                  // If it's already been written back, then don't worry about
                  // stalling on it.
@@ -857,8 +864,10 @@ LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
      storeQueue[store_idx].isAllZeros() = store_no_data;
      assert(size <= SQEntry::DataSize || store_no_data);
  
+    // copy data into the storeQueue only if the store request has valid data
      if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
-        !req->request()->isCacheMaintenance())
+        !req->request()->isCacheMaintenance() &&
+        !req->request()->isAtomic())
          memcpy(storeQueue[store_idx].data(), data, size);
  
      // This function only writes the data to the store queue, so no fault
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh

index 9756a9ef144e8219f868c8e19cf7cc4c4e2331aa..48179ceb82119c5e25e42a09a55b912e1f139215 100644 (file)
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -124,16 +124,19 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
      assert(!cpu->switchedOut());
      if (!inst->isSquashed()) {
          if (state->needWB) {
-            // Only loads and store conditionals perform the writeback
+            // Only loads, store conditionals and atomics perform the writeback
              // after receving the response from the memory
-            assert(inst->isLoad() || inst->isStoreConditional());
+            assert(inst->isLoad() || inst->isStoreConditional() ||
+                   inst->isAtomic());
              writeback(inst, state->request()->mainPacket());
-            if (inst->isStore()) {
+            if (inst->isStore() || inst->isAtomic()) {
                  auto ss = dynamic_cast<SQSenderState*>(state);
                  ss->writebackDone();
                  completeStore(ss->idx);
              }
          } else if (inst->isStore()) {
+            // This is a regular store (i.e., not store conditionals and
+            // atomics), so it can complete without writing back
              completeStore(dynamic_cast<SQSenderState*>(state)->idx);
          }
      }
@@ -274,7 +277,7 @@ LSQUnit<Impl>::insert(const DynInstPtr &inst)
  {
      assert(inst->isMemRef());
  
-    assert(inst->isLoad() || inst->isStore());
+    assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
  
      if (inst->isLoad()) {
          insertLoad(inst);
@@ -614,8 +617,8 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
  
      assert(store_fault == NoFault);
  
-    if (store_inst->isStoreConditional()) {
-        // Store conditionals need to set themselves as able to
+    if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
+        // Store conditionals and Atomics need to set themselves as able to
          // writeback if we haven't had a fault by here.
          storeQueue[store_idx].canWB() = true;
  
@@ -751,8 +754,8 @@ LSQUnit<Impl>::writebackStores()
              state->inst = inst;
  
              req->senderState(state);
-            if (inst->isStoreConditional()) {
-                /* Only store conditionals need a writeback. */
+            if (inst->isStoreConditional() || inst->isAtomic()) {
+                /* Only store conditionals and atomics need a writeback. */
                  state->needWB = true;
              }
          }
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh

index 26c4b4d6e7e23c8f9c34ac42687a412fba347549..f1d0e2313d75e13eb4e6275077826d6aa073dad7 100644 (file)
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -191,11 +191,11 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
      // Check any barriers and the dependence predictor for any
      // producing memrefs/stores.
      InstSeqNum producing_store;
-    if (inst->isLoad() && loadBarrier) {
+    if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) {
          DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
                  loadBarrierSN);
          producing_store = loadBarrierSN;
-    } else if (inst->isStore() && storeBarrier) {
+    } else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) {
          DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
                  storeBarrierSN);
          producing_store = storeBarrierSN;
@@ -252,8 +252,8 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
          }
      }
  
-    if (inst->isStore()) {
-        DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+    if (inst->isStore() || inst->isAtomic()) {
+        DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
                  inst->pcState(), inst->seqNum);
  
          depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -288,8 +288,8 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
  
      // Might want to turn this part into an inline function or something.
      // It's shared between both insert functions.
-    if (inst->isStore()) {
-        DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+    if (inst->isStore() || inst->isAtomic()) {
+        DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
                  inst->pcState(), inst->seqNum);
  
          depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -451,8 +451,9 @@ template <class MemDepPred, class Impl>
  void
  MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
  {
-    // Only stores and barriers have dependents.
-    if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
+    // Only stores, atomics and barriers have dependents.
+    if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() &&
+        !inst->isWriteBarrier()) {
          return;
      }
  
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh

index fd9b09e20c7ba143f7e273fb3f15174b048e7233..c24a097118817b7e51f2248aa002c976e83a1c47 100644 (file)
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -647,7 +647,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
              }
          }
  
-        if (inst->isStore()) {
+        if (inst->isStore() || inst->isAtomic()) {
              if (calcFreeSQEntries(tid) <= 0) {
                  DPRINTF(Rename, "[tid:%u]: Cannot rename due to no free SQ\n");
                  source = SQ;
@@ -741,12 +741,12 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
  
          renameDestRegs(inst, inst->threadNumber);
  
-        if (inst->isLoad()) {
-                loadsInProgress[tid]++;
-        }
-        if (inst->isStore()) {
-                storesInProgress[tid]++;
+        if (inst->isAtomic() || inst->isStore()) {
+            storesInProgress[tid]++;
+        } else if (inst->isLoad()) {
+            loadsInProgress[tid]++;
          }
+
          ++renamed_insts;
          // Notify potential listeners that source and destination registers for
          // this instruction have been renamed.
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc

index e91fafbcce1d5b610acae0229465bc36e3841e34..caf2427efef57c50b2e1c739de7fbcec87457d47 100644 (file)
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -72,6 +72,7 @@ AtomicSimpleCPU::init()
      ifetch_req->setContext(cid);
      data_read_req->setContext(cid);
      data_write_req->setContext(cid);
+    data_amo_req->setContext(cid);
  }
  
  AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -90,6 +91,7 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
      ifetch_req = std::make_shared<Request>();
      data_read_req = std::make_shared<Request>();
      data_write_req = std::make_shared<Request>();
+    data_amo_req = std::make_shared<Request>();
  }
  
  
@@ -416,14 +418,6 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
      }
  }
  
-Fault
-AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size,
-                                 Request::Flags flags)
-{
-    panic("initiateMemRead() is for timing accesses, and should "
-          "never be called on AtomicSimpleCPU.\n");
-}
-
  Fault
  AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
                            Request::Flags flags, uint64_t *res)
@@ -534,6 +528,70 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
      }
  }
  
+Fault
+AtomicSimpleCPU::amoMem(Addr addr, uint8_t* data, unsigned size,
+                        Request::Flags flags, AtomicOpFunctor *amo_op)
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
+    // use the CPU's statically allocated amo request and packet objects
+    const RequestPtr &req = data_amo_req;
+
+    if (traceData)
+        traceData->setMem(addr, size, flags);
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
+
+    // AMO requests that access across a cache line boundary are not
+    // allowed since the cache does not guarantee AMO ops to be executed
+    // atomically in two cache lines
+    // For ISAs such as x86 that requires AMO operations to work on
+    // accesses that cross cache-line boundaries, the cache needs to be
+    // modified to support locking both cache lines to guarantee the
+    // atomicity.
+    if (secondAddr > addr) {
+        panic("AMO request should not access across a cache line boundary\n");
+    }
+
+    dcache_latency = 0;
+
+    req->taskId(taskId());
+    req->setVirt(0, addr, size, flags, dataMasterId(),
+                 thread->pcState().instAddr(), amo_op);
+
+    // translate to physical address
+    Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                      BaseTLB::Write);
+
+    // Now do the access.
+    if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+        // We treat AMO accesses as Write accesses with SwapReq command
+        // data will hold the return data of the AMO access
+        Packet pkt(req, Packet::makeWriteCmd(req));
+        pkt.dataStatic(data);
+
+        if (req->isMmappedIpr())
+            dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
+        else {
+            dcache_latency += sendPacket(dcachePort, &pkt);
+        }
+
+        dcache_access = true;
+
+        assert(!pkt.isError());
+        assert(!req->isLLSC());
+    }
+
+    if (fault != NoFault && req->isPrefetch()) {
+        return NoFault;
+    }
+
+    //If there's a fault and we're not doing prefetch, return it
+    return fault;
+}
  
  void
  AtomicSimpleCPU::tick()
@@ -550,6 +608,7 @@ AtomicSimpleCPU::tick()
          ifetch_req->setContext(cid);
          data_read_req->setContext(cid);
          data_write_req->setContext(cid);
+        data_amo_req->setContext(cid);
      }
  
      SimpleExecContext& t_info = *threadInfo[curThread];
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh

index a5151aa1862876cdf1c95145faf587844c27b047..84f3791212cf9ba999e23e58adb2c1a0a6d663a8 100644 (file)
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -163,6 +163,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
      RequestPtr ifetch_req;
      RequestPtr data_read_req;
      RequestPtr data_write_req;
+    RequestPtr data_amo_req;
  
      bool dcache_access;
      Tick dcache_latency;
@@ -197,12 +198,12 @@ class AtomicSimpleCPU : public BaseSimpleCPU
      Fault readMem(Addr addr, uint8_t *data, unsigned size,
                    Request::Flags flags) override;
  
-    Fault initiateMemRead(Addr addr, unsigned size,
-                          Request::Flags flags) override;
-
      Fault writeMem(uint8_t *data, unsigned size,
                     Addr addr, Request::Flags flags, uint64_t *res) override;
  
+    Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+                 Request::Flags flags, AtomicOpFunctor *amo_op) override;
+
      void regProbePoints() override;
  
      /**
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc

index f71277d1cb25856639e20f9cbce7b10242a2df9e..422c732985bb250d443cb416bba56c4fdbd1e505 100644 (file)
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -644,7 +644,7 @@ BaseSimpleCPU::postExecute()
          t_info.numLoadInsts++;
      }
  
-    if (curStaticInst->isStore()){
+    if (curStaticInst->isStore() || curStaticInst->isAtomic()){
          t_info.numStoreInsts++;
      }
      /* End power model statistics */
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh

index e62fcf4d1a00b40df01dbd523cbaf106a80db0da..8060b07ad32e265dd7d9cbfd4618e50aa52d0874 100644 (file)
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -143,13 +143,26 @@ class BaseSimpleCPU : public BaseCPU
      void startup() override;
  
      virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
-                          Request::Flags flags) = 0;
+                          Request::Flags flags)
+    { panic("readMem() is not implemented\n"); }
  
      virtual Fault initiateMemRead(Addr addr, unsigned size,
-                                  Request::Flags flags) = 0;
+                                  Request::Flags flags)
+    { panic("initiateMemRead() is not implemented\n"); }
  
      virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
-                           Request::Flags flags, uint64_t* res) = 0;
+                           Request::Flags flags, uint64_t* res)
+    { panic("writeMem() is not implemented\n"); }
+
+    virtual Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+                         Request::Flags flags,
+                         AtomicOpFunctor *amo_op)
+    { panic("amoMem() is not implemented\n"); }
+
+    virtual Fault initiateMemAMO(Addr addr, unsigned size,
+                                 Request::Flags flags,
+                                 AtomicOpFunctor *amo_op)
+    { panic("initiateMemAMO() is not implemented\n"); }
  
      void countInst();
      Counter totalInsts() const override;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh

index 0552dc0c63fb0f61b08fb97d54992152418ce38f..de5cc7fd7462eeddcac275a1f1acb8341bc20cfd 100644 (file)
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -456,6 +456,19 @@ class SimpleExecContext : public ExecContext {
          return cpu->writeMem(data, size, addr, flags, res);
      }
  
+    Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
+                 Request::Flags flags, AtomicOpFunctor *amo_op) override
+    {
+        return cpu->amoMem(addr, data, size, flags, amo_op);
+    }
+
+    Fault initiateMemAMO(Addr addr, unsigned int size,
+                         Request::Flags flags,
+                         AtomicOpFunctor *amo_op) override
+    {
+        return cpu->initiateMemAMO(addr, size, flags, amo_op);
+    }
+
      /**
       * Sets the number of consecutive store conditional failures.
       */
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc

index b5450cf5fe76c98e5c0f79ba6555c77d778f1cdf..637308a96454f38664ae13a1ccd0bffcb59517b4 100644 (file)
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -293,6 +293,7 @@ TimingSimpleCPU::sendData(const RequestPtr &req, uint8_t *data, uint64_t *res,
  
      PacketPtr pkt = buildPacket(req, read);
      pkt->dataDynamic<uint8_t>(data);
+
      if (req->getFlags().isSet(Request::NO_ACCESS)) {
          assert(!dcache_pkt);
          pkt->makeResponse();
@@ -414,14 +415,6 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
      pkt2->senderState = new SplitFragmentSenderState(pkt, 1);
  }
  
-Fault
-TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
-                         unsigned size, Request::Flags flags)
-{
-    panic("readMem() is for atomic accesses, and should "
-          "never be called on TimingSimpleCPU.\n");
-}
-
  Fault
  TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
                                   Request::Flags flags)
@@ -556,6 +549,54 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
      return NoFault;
  }
  
+Fault
+TimingSimpleCPU::initiateMemAMO(Addr addr, unsigned size,
+                                Request::Flags flags,
+                                AtomicOpFunctor *amo_op)
+{
+    SimpleExecContext &t_info = *threadInfo[curThread];
+    SimpleThread* thread = t_info.thread;
+
+    Fault fault;
+    const int asid = 0;
+    const Addr pc = thread->instAddr();
+    unsigned block_size = cacheLineSize();
+    BaseTLB::Mode mode = BaseTLB::Write;
+
+    if (traceData)
+        traceData->setMem(addr, size, flags);
+
+    RequestPtr req = make_shared<Request>(asid, addr, size, flags,
+                            dataMasterId(), pc, thread->contextId(), amo_op);
+
+    assert(req->hasAtomicOpFunctor());
+
+    req->taskId(taskId());
+
+    Addr split_addr = roundDown(addr + size - 1, block_size);
+
+    // AMO requests that access across a cache line boundary are not
+    // allowed since the cache does not guarantee AMO ops to be executed
+    // atomically in two cache lines
+    // For ISAs such as x86 that requires AMO operations to work on
+    // accesses that cross cache-line boundaries, the cache needs to be
+    // modified to support locking both cache lines to guarantee the
+    // atomicity.
+    if (split_addr > addr) {
+        panic("AMO requests should not access across a cache line boundary\n");
+    }
+
+    _status = DTBWaitResponse;
+
+    WholeTranslationState *state =
+        new WholeTranslationState(req, new uint8_t[size], NULL, mode);
+    DataTranslation<TimingSimpleCPU *> *translation
+        = new DataTranslation<TimingSimpleCPU *>(this, state);
+    thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
+
+    return NoFault;
+}
+
  void
  TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
  {
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh

index 0300d38eb1596d10885f484836996f10b1ff7429..ce0a4dbfc1d6436d4566b7a058a6fca934d1a0fe 100644 (file)
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -282,15 +282,15 @@ class TimingSimpleCPU : public BaseSimpleCPU
      void activateContext(ThreadID thread_num) override;
      void suspendContext(ThreadID thread_num) override;
  
-    Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
-
      Fault initiateMemRead(Addr addr, unsigned size,
                            Request::Flags flags) override;
  
      Fault writeMem(uint8_t *data, unsigned size,
                     Addr addr, Request::Flags flags, uint64_t *res) override;
  
+    Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
+                         AtomicOpFunctor *amo_op) override;
+
      void fetch();
      void sendFetch(const Fault &fault,
                     const RequestPtr &req, ThreadContext *tc);
author	Tuan Ta <qtt2@cornell.edu>
	Mon, 22 Jan 2018 18:12:50 +0000 (13:12 -0500)
committer	Tuan Ta <qtt2@cornell.edu>
	Fri, 8 Feb 2019 15:27:04 +0000 (15:27 +0000)
src/cpu/base.cc		patch \| blob \| history
src/cpu/base_dyn_inst.hh		patch \| blob \| history
src/cpu/checker/cpu.hh		patch \| blob \| history
src/cpu/exec_context.hh		patch \| blob \| history
src/cpu/minor/exec_context.hh		patch \| blob \| history
src/cpu/minor/execute.cc		patch \| blob \| history
src/cpu/minor/fetch2.cc		patch \| blob \| history
src/cpu/minor/fetch2.hh		patch \| blob \| history
src/cpu/minor/lsq.cc		patch \| blob \| history
src/cpu/minor/lsq.hh		patch \| blob \| history
src/cpu/o3/commit.hh		patch \| blob \| history
src/cpu/o3/commit_impl.hh		patch \| blob \| history
src/cpu/o3/cpu.hh		patch \| blob \| history
src/cpu/o3/iew_impl.hh		patch \| blob \| history
src/cpu/o3/inst_queue_impl.hh		patch \| blob \| history
src/cpu/o3/lsq.hh		patch \| blob \| history
src/cpu/o3/lsq_impl.hh		patch \| blob \| history
src/cpu/o3/lsq_unit.hh		patch \| blob \| history
src/cpu/o3/lsq_unit_impl.hh		patch \| blob \| history
src/cpu/o3/mem_dep_unit_impl.hh		patch \| blob \| history
src/cpu/o3/rename_impl.hh		patch \| blob \| history
src/cpu/simple/atomic.cc		patch \| blob \| history
src/cpu/simple/atomic.hh		patch \| blob \| history
src/cpu/simple/base.cc		patch \| blob \| history
src/cpu/simple/base.hh		patch \| blob \| history
src/cpu/simple/exec_context.hh		patch \| blob \| history
src/cpu/simple/timing.cc		patch \| blob \| history
src/cpu/simple/timing.hh		patch \| blob \| history