cpu,mem: Add support for partial loads/stores and wide mem. accesses

author Giacomo Gabrielli <giacomo.gabrielli@arm.com>

Fri, 7 Jul 2017 13:13:11 +0000 (14:13 +0100)

committer Giacomo Gabrielli <giacomo.gabrielli@arm.com>

Sat, 11 May 2019 12:48:58 +0000 (12:48 +0000)
author Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Fri, 7 Jul 2017 13:13:11 +0000 (14:13 +0100)
committer Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Sat, 11 May 2019 12:48:58 +0000 (12:48 +0000)
diff --git a/src/cpu/base.hh b/src/cpu/base.hh

index f013a3e02b6fe98c2eeeb4dc7ebb17126e217106..3d679f172d961ee4143a570097160ca8e52164b3 100644 (file)
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -175,9 +175,9 @@ class BaseCPU : public ClockedObject
      uint32_t socketId() const { return _socketId; }
  
      /** Reads this CPU's unique data requestor ID */
-    MasterID dataMasterId() { return _dataMasterId; }
+    MasterID dataMasterId() const { return _dataMasterId; }
      /** Reads this CPU's unique instruction requestor ID */
-    MasterID instMasterId() { return _instMasterId; }
+    MasterID instMasterId() const { return _instMasterId; }
  
      /**
       * Get a port on this CPU. All CPUs have a data and
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh

index 4084241bd3edc16c252979b2b5aa835c1b16a76c..22a32ec1024fe31699fe69a57cd37d1ecfd296ae 100644 (file)
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -298,10 +298,12 @@ class BaseDynInst : public ExecContext, public RefCounted
          cpu->demapPage(vaddr, asn);
      }
  
-    Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags);
+    Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>());
  
      Fault writeMem(uint8_t *data, unsigned size, Addr addr,
-                   Request::Flags flags, uint64_t *res);
+                   Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>());
  
      Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
                           AtomicOpFunctor *amo_op);
@@ -918,21 +920,24 @@ class BaseDynInst : public ExecContext, public RefCounted
  template<class Impl>
  Fault
  BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
-                                   Request::Flags flags)
+                                   Request::Flags flags,
+                                   const std::vector<bool>& byteEnable)
  {
      return cpu->pushRequest(
              dynamic_cast<typename DynInstPtr::PtrType>(this),
-            /* ld */ true, nullptr, size, addr, flags, nullptr);
+            /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
+            byteEnable);
  }
  
  template<class Impl>
  Fault
  BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
-                            Request::Flags flags, uint64_t *res)
+                            Request::Flags flags, uint64_t *res,
+                            const std::vector<bool>& byteEnable)
  {
      return cpu->pushRequest(
              dynamic_cast<typename DynInstPtr::PtrType>(this),
-            /* st */ false, data, size, addr, flags, res);
+            /* st */ false, data, size, addr, flags, res, nullptr, byteEnable);
  }
  
  template<class Impl>
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc

index 7f8eada4c4d5330c8be614d93c15422a1b6a0cae..cca6d6b12480b0cb5b1139ebbb0b1233c14f45c2 100644 (file)
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2011,2013,2017 ARM Limited
+ * Copyright (c) 2011,2013,2017-2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -52,6 +52,7 @@
  #include "cpu/simple_thread.hh"
  #include "cpu/static_inst.hh"
  #include "cpu/thread_context.hh"
+#include "cpu/utils.hh"
  #include "params/CheckerCPU.hh"
  #include "sim/full_system.hh"
  
@@ -139,31 +140,68 @@ CheckerCPU::unserialize(CheckpointIn &cp)
  {
  }
  
+RequestPtr
+CheckerCPU::genMemFragmentRequest(Addr frag_addr, int size,
+                                  Request::Flags flags,
+                                  const std::vector<bool>& byte_enable,
+                                  int& frag_size, int& size_left) const
+{
+    frag_size = std::min(
+        cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+        (Addr) size_left);
+    size_left -= frag_size;
+
+    RequestPtr mem_req;
+
+    if (!byte_enable.empty()) {
+        // Set up byte-enable mask for the current fragment
+        auto it_start = byte_enable.cbegin() + (size - (frag_size +
+                                                        size_left));
+        auto it_end = byte_enable.cbegin() + (size - size_left);
+        if (isAnyActiveElement(it_start, it_end)) {
+            mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                    flags, masterId, thread->pcState().instAddr(),
+                    tc->contextId());
+            mem_req->setByteEnable(std::vector<bool>(it_start, it_end));
+        }
+    } else {
+        mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                    flags, masterId, thread->pcState().instAddr(),
+                    tc->contextId());
+    }
+
+    return mem_req;
+}
+
  Fault
  CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
-                    Request::Flags flags)
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable)
  {
      Fault fault = NoFault;
-    int fullSize = size;
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
      bool checked_flags = false;
      bool flags_match = true;
      Addr pAddr = 0x0;
  
-
-    if (secondAddr > addr)
-       size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
  
      // Need to account for multiple accesses like the Atomic and TimingSimple
      while (1) {
-        auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+                                                   byteEnable, frag_size,
+                                                   size_left);
+
+        predicate = (mem_req != nullptr);
  
          // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        }
  
-        if (!checked_flags && fault == NoFault && unverifiedReq) {
+        if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
              flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
                                       mem_req->getPaddr(), mem_req->getFlags());
              pAddr = mem_req->getPaddr();
@@ -171,7 +209,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
          }
  
          // Now do the access
-        if (fault == NoFault &&
+        if (predicate && fault == NoFault &&
              !mem_req->getFlags().isSet(Request::NO_ACCESS)) {
              PacketPtr pkt = Packet::createRead(mem_req);
  
@@ -182,7 +220,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
                  dcachePort->sendFunctional(pkt);
              } else {
                  // Assume the data is correct if it's an uncached access
-                memcpy(data, unverifiedMemData, size);
+                memcpy(data, unverifiedMemData, frag_size);
              }
  
              delete pkt;
@@ -196,22 +234,21 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
          }
  
          //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
+        if (size_left == 0)
          {
              break;
          }
  
          // Setup for accessing next cache line
-        data += size;
-        unverifiedMemData += size;
-        size = addr + fullSize - secondAddr;
-        addr = secondAddr;
+        frag_addr += frag_size;
+        data += frag_size;
+        unverifiedMemData += frag_size;
      }
  
      if (!flags_match) {
          warn("%lli: Flags do not match CPU:%#x %#x %#x Checker:%#x %#x %#x\n",
               curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
-             unverifiedReq->getFlags(), addr, pAddr, flags);
+             unverifiedReq->getFlags(), frag_addr, pAddr, flags);
          handleError();
      }
  
@@ -220,31 +257,35 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
  
  Fault
  CheckerCPU::writeMem(uint8_t *data, unsigned size,
-                     Addr addr, Request::Flags flags, uint64_t *res)
+                     Addr addr, Request::Flags flags, uint64_t *res,
+                     const std::vector<bool>& byteEnable)
  {
+    assert(byteEnable.empty() || byteEnable.size() == size);
+
      Fault fault = NoFault;
      bool checked_flags = false;
      bool flags_match = true;
      Addr pAddr = 0x0;
      static uint8_t zero_data[64] = {};
  
-    int fullSize = size;
-
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
  
      // Need to account for a multiple access like Atomic and Timing CPUs
      while (1) {
-        auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+                                                   byteEnable, frag_size,
+                                                   size_left);
  
-        // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        predicate = (mem_req != nullptr);
+
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        }
  
-        if (!checked_flags && fault == NoFault && unverifiedReq) {
+        if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
             flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
                                      mem_req->getPaddr(), mem_req->getFlags());
             pAddr = mem_req->getPaddr();
@@ -261,7 +302,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
          bool was_prefetch = mem_req->isPrefetch();
  
          //If we don't need to access a second cache line, stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
          {
              if (fault != NoFault && was_prefetch) {
                fault = NoFault;
@@ -269,16 +310,13 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
              break;
          }
  
-        //Update size and access address
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        frag_addr += frag_size;
     }
  
     if (!flags_match) {
         warn("%lli: Flags do not match CPU:%#x %#x Checker:%#x %#x %#x\n",
              curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
-            unverifiedReq->getFlags(), addr, pAddr, flags);
+            unverifiedReq->getFlags(), frag_addr, pAddr, flags);
         handleError();
     }
  
@@ -304,12 +342,12 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
     // const set of zeros.
     if (flags & Request::STORE_NO_DATA) {
         assert(!data);
-       assert(sizeof(zero_data) <= fullSize);
+       assert(sizeof(zero_data) <= size);
         data = zero_data;
     }
  
     if (unverifiedReq && unverifiedMemData &&
-       memcmp(data, unverifiedMemData, fullSize) && extraData) {
+       memcmp(data, unverifiedMemData, size) && extraData) {
             warn("%lli: Store value does not match value sent to memory! "
                    "data: %#x inst_data: %#x", curTick(), data,
                    unverifiedMemData);
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh

index 8c3000005cb96464e1564e684b48e24cb0ae7951..66632b7201a6c107c76f1f5d41eb41e5b23a89eb 100644 (file)
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -531,11 +531,36 @@ class CheckerCPU : public BaseCPU, public ExecContext
          this->dtb->demapPage(vaddr, asn);
      }
  
+    /**
+     * Helper function used to generate the request for a single fragment of a
+     * memory access.
+     *
+     * Takes care of setting up the appropriate byte-enable mask for the
+     * fragment, given the mask for the entire memory access.
+     *
+     * @param frag_addr Start address of the fragment.
+     * @param size Total size of the memory access in bytes.
+     * @param flags Request flags.
+     * @param byte_enable Byte-enable mask for the entire memory access.
+     * @param[out] frag_size Fragment size.
+     * @param[in,out] size_left Size left to be processed in the memory access.
+     * @return Pointer to the allocated Request, nullptr if the byte-enable
+     * mask is all-false for the fragment.
+     */
+    RequestPtr genMemFragmentRequest(Addr frag_addr, int size,
+                                     Request::Flags flags,
+                                     const std::vector<bool>& byte_enable,
+                                     int& frag_size, int& size_left) const;
+
      Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,
+                  const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
  
      Fault writeMem(uint8_t *data, unsigned size, Addr addr,
-                   Request::Flags flags, uint64_t *res) override;
+                   Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
  
      Fault amoMem(Addr addr, uint8_t* data, unsigned size,
                   Request::Flags flags, AtomicOpFunctor *amo_op) override
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh

index 4cad9e3e1434b611437fbf62692dd16e5d4332e3..b294387e2f8e485071a239a6f3da88b36ab4b7f2 100644 (file)
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -235,7 +235,8 @@ class ExecContext {
       * should never be called).
       */
      virtual Fault readMem(Addr addr, uint8_t *data, unsigned int size,
-                          Request::Flags flags)
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
      {
          panic("ExecContext::readMem() should be overridden\n");
      }
@@ -248,7 +249,8 @@ class ExecContext {
       * should never be called).
       */
      virtual Fault initiateMemRead(Addr addr, unsigned int size,
-                                  Request::Flags flags)
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
      {
          panic("ExecContext::initiateMemRead() should be overridden\n");
      }
@@ -258,7 +260,9 @@ class ExecContext {
       * For timing-mode contexts, initiate a timing memory write operation.
       */
      virtual Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
-                           Request::Flags flags, uint64_t *res) = 0;
+                           Request::Flags flags, uint64_t *res,
+                           const std::vector<bool>& byteEnable =
+                               std::vector<bool>()) = 0;
  
      /**
       * For atomic-mode contexts, perform an atomic AMO (a.k.a., Atomic
diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh

index b2decb39b97b5a1a3b0141109f69e97fe36bc372..0a8ff8acf080e0b34fa90fd419eae51a0cf4c04f 100644 (file)
--- a/src/cpu/minor/dyn_inst.hh
+++ b/src/cpu/minor/dyn_inst.hh
@@ -202,6 +202,13 @@ class MinorDynInst : public RefCounted
       *  to allow other instructions to fill the fetch delay */
      bool canEarlyIssue;
  
+    /** Flag controlling conditional execution of the instruction */
+    bool predicate;
+
+    /** Flag controlling conditional execution of the memory access associated
+     *  with the instruction (only meaningful for loads/stores) */
+    bool memAccPredicate;
+
      /** execSeqNum of the latest inst on which this inst depends.
       *  This can be used as a sanity check for dependency ordering
       *  where slightly out of order execution is required (notably
@@ -227,7 +234,7 @@ class MinorDynInst : public RefCounted
          pc(TheISA::PCState(0)), fault(fault_),
          triedToPredict(false), predictedTaken(false),
          fuIndex(0), inLSQ(false), inStoreBuffer(false),
-        canEarlyIssue(false),
+        canEarlyIssue(false), predicate(true), memAccPredicate(true),
          instToWaitFor(0), extraCommitDelay(Cycles(0)),
          extraCommitDelayExpr(NULL), minimumCommitCycle(Cycles(0))
      { }
@@ -266,6 +273,14 @@ class MinorDynInst : public RefCounted
      /** ReportIF interface */
      void reportData(std::ostream &os) const;
  
+    bool readPredicate() const { return predicate; }
+
+    void setPredicate(bool val) { predicate = val; }
+
+    bool readMemAccPredicate() const { return memAccPredicate; }
+
+    void setMemAccPredicate(bool val) { memAccPredicate = val; }
+
      ~MinorDynInst();
  };
  
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh

index b39bbac3fa2f2de1b45fa566c9ab38331c817932..9f6fce4cd9846a53e954dfb168152608a81aaf76 100644 (file)
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -96,28 +96,40 @@ class ExecContext : public ::ExecContext
      {
          DPRINTF(MinorExecute, "ExecContext setting PC: %s\n", inst->pc);
          pcState(inst->pc);
-        setPredicate(true);
+        setPredicate(inst->readPredicate());
+        setMemAccPredicate(inst->readMemAccPredicate());
          thread.setIntReg(TheISA::ZeroReg, 0);
  #if THE_ISA == ALPHA_ISA
          thread.setFloatReg(TheISA::ZeroReg, 0);
  #endif
      }
  
+    ~ExecContext()
+    {
+        inst->setPredicate(readPredicate());
+        inst->setMemAccPredicate(readMemAccPredicate());
+    }
+
      Fault
      initiateMemRead(Addr addr, unsigned int size,
-                    Request::Flags flags) override
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
      {
          execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
-            size, addr, flags, NULL, nullptr);
+            size, addr, flags, nullptr, nullptr, byteEnable);
          return NoFault;
      }
  
      Fault
      writeMem(uint8_t *data, unsigned int size, Addr addr,
-             Request::Flags flags, uint64_t *res) override
+             Request::Flags flags, uint64_t *res,
+             const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
      {
+        assert(byteEnable.empty() || byteEnable.size() == size);
          execute.getLSQ().pushRequest(inst, false /* store */, data,
-            size, addr, flags, res, nullptr);
+            size, addr, flags, res, nullptr, byteEnable);
          return NoFault;
      }
  
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc

index 47f3cbc684cc8a6cc369abaf3b90253ceae22c9b..527eb2bc036fe79e0b5c26f5ccef2c4a7874fde6 100644 (file)
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014,2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -364,6 +364,8 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
          DPRINTF(MinorMem, "Completing failed request inst: %s\n",
              *inst);
          use_context_predicate = false;
+        if (!context.readMemAccPredicate())
+            inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
      } else if (packet->isError()) {
          DPRINTF(MinorMem, "Trying to commit error response: %s\n",
              *inst);
@@ -481,6 +483,10 @@ Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch,
          } else {
              /* Only set this if the instruction passed its
               * predicate */
+            if (!context.readMemAccPredicate()) {
+                DPRINTF(MinorMem, "No memory access for inst: %s\n", *inst);
+                assert(context.readPredicate());
+            }
              passed_predicate = context.readPredicate();
  
              /* Set predicate in tracing */
@@ -928,7 +934,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
                   *  until it gets to the head of inFlightInsts */
                  inst->canEarlyIssue = false;
                  /* Not completed as we'll come here again to pick up
-                *  the fault when we get to the end of the FU */
+                 * the fault when we get to the end of the FU */
                  completed_inst = false;
              } else {
                  DPRINTF(MinorExecute, "Fault in execute: %s\n",
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc

index 6fe6c3738f93e65e71b83978e532b86fee3ce6a9..1d9f17e8d9a5e1c2c65e177d3bfa1f18c7dc4f7f 100644 (file)
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2013-2014,2017 ARM Limited
+ * Copyright (c) 2013-2014,2017-2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -49,27 +49,13 @@
  #include "cpu/minor/exec_context.hh"
  #include "cpu/minor/execute.hh"
  #include "cpu/minor/pipeline.hh"
+#include "cpu/utils.hh"
  #include "debug/Activity.hh"
  #include "debug/MinorMem.hh"
  
  namespace Minor
  {
  
-/** Returns the offset of addr into an aligned a block of size block_size */
-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
-    return addr & (block_size - 1);
-}
-
-/** Returns true if the given [addr .. addr+size-1] transfer needs to be
- *  fragmented across a block size of block_size */
-static bool
-transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
-{
-    return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
  LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
      PacketDataPtr data_, uint64_t *res_) :
      SenderState(),
@@ -88,6 +74,13 @@ LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
      request = std::make_shared<Request>();
  }
  
+void
+LSQ::LSQRequest::disableMemAccess()
+{
+    port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
+    DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
+}
+
  LSQ::AddrRangeCoverage
  LSQ::LSQRequest::containsAddrRangeOf(
      Addr req1_addr, unsigned int req1_size,
@@ -256,16 +249,23 @@ LSQ::SingleDataRequest::startAddrTranslation()
      ThreadContext *thread = port.cpu.getContext(
          inst->id.threadId);
  
-    port.numAccessesInDTLB++;
+    const auto &byteEnable = request->getByteEnable();
+    if (byteEnable.size() == 0 ||
+        isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
+        port.numAccessesInDTLB++;
  
-    setState(LSQ::LSQRequest::InTranslation);
+        setState(LSQ::LSQRequest::InTranslation);
  
-    DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
-    /* Submit the translation request.  The response will come through
-     *  finish/markDelayed on the LSQRequest as it bears the Translation
-     *  interface */
-    thread->getDTBPtr()->translateTiming(
-        request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
+        DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
+        /* Submit the translation request.  The response will come through
+         *  finish/markDelayed on the LSQRequest as it bears the Translation
+         *  interface */
+        thread->getDTBPtr()->translateTiming(
+            request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
+    } else {
+        disableMemAccess();
+        setState(LSQ::LSQRequest::Complete);
+    }
  }
  
  void
@@ -357,6 +357,8 @@ LSQ::SplitDataRequest::makeFragmentRequests()
      unsigned int fragment_size;
      Addr fragment_addr;
  
+    std::vector<bool> fragment_write_byte_en;
+
      /* Assume that this transfer is across potentially many block snap
       * boundaries:
       *
@@ -401,6 +403,9 @@ LSQ::SplitDataRequest::makeFragmentRequests()
      /* Just past the last address in the request */
      Addr end_addr = base_addr + whole_size;
  
+    auto& byte_enable = request->getByteEnable();
+    unsigned int num_disabled_fragments = 0;
+
      for (unsigned int fragment_index = 0; fragment_index < numFragments;
           fragment_index++)
      {
@@ -421,32 +426,58 @@ LSQ::SplitDataRequest::makeFragmentRequests()
          }
  
          RequestPtr fragment = std::make_shared<Request>();
+        bool disabled_fragment = false;
  
          fragment->setContext(request->contextId());
-        fragment->setVirt(0 /* asid */,
-            fragment_addr, fragment_size, request->getFlags(),
-            request->masterId(),
-            request->getPC());
+        if (byte_enable.empty()) {
+            fragment->setVirt(0 /* asid */,
+                fragment_addr, fragment_size, request->getFlags(),
+                request->masterId(),
+                request->getPC());
+        } else {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = byte_enable.begin() +
+                (fragment_addr - base_addr);
+            auto it_end = byte_enable.begin() +
+                (fragment_addr - base_addr) + fragment_size;
+            if (isAnyActiveElement(it_start, it_end)) {
+                fragment->setVirt(0 /* asid */,
+                    fragment_addr, fragment_size, request->getFlags(),
+                    request->masterId(),
+                    request->getPC());
+                fragment->setByteEnable(std::vector<bool>(it_start, it_end));
+            } else {
+                disabled_fragment = true;
+            }
+        }
  
-        DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d"
-            " (whole request addr: 0x%x size: %d) %s\n",
-            fragment_addr, fragment_size, base_addr, whole_size,
-            (is_last_fragment ? "last fragment" : ""));
+        if (!disabled_fragment) {
+            DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
+                " size: %d (whole request addr: 0x%x size: %d) %s\n",
+                fragment_addr, fragment_size, base_addr, whole_size,
+                (is_last_fragment ? "last fragment" : ""));
  
-        fragment_addr += fragment_size;
+            fragmentRequests.push_back(fragment);
+        } else {
+            num_disabled_fragments++;
+        }
  
-        fragmentRequests.push_back(fragment);
+        fragment_addr += fragment_size;
      }
+    assert(numFragments >= num_disabled_fragments);
+    numFragments -= num_disabled_fragments;
  }
  
  void
  LSQ::SplitDataRequest::makeFragmentPackets()
  {
+    assert(numTranslatedFragments > 0);
      Addr base_addr = request->getVaddr();
  
      DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
  
-    for (unsigned int fragment_index = 0; fragment_index < numFragments;
+    for (unsigned int fragment_index = 0;
+         fragment_index < numTranslatedFragments;
           fragment_index++)
      {
          RequestPtr fragment = fragmentRequests[fragment_index];
@@ -490,28 +521,32 @@ LSQ::SplitDataRequest::makeFragmentPackets()
  void
  LSQ::SplitDataRequest::startAddrTranslation()
  {
-    setState(LSQ::LSQRequest::InTranslation);
-
      makeFragmentRequests();
  
-    numInTranslationFragments = 0;
-    numTranslatedFragments = 0;
+    if (numFragments > 0) {
+        setState(LSQ::LSQRequest::InTranslation);
+        numInTranslationFragments = 0;
+        numTranslatedFragments = 0;
  
-    /* @todo, just do these in sequence for now with
-     * a loop of:
-     * do {
-     *  sendNextFragmentToTranslation ; translateTiming ; finish
-     * } while (numTranslatedFragments != numFragments);
-     */
+        /* @todo, just do these in sequence for now with
+         * a loop of:
+         * do {
+         *  sendNextFragmentToTranslation ; translateTiming ; finish
+         * } while (numTranslatedFragments != numFragments);
+         */
  
-    /* Do first translation */
-    sendNextFragmentToTranslation();
+        /* Do first translation */
+        sendNextFragmentToTranslation();
+    } else {
+        disableMemAccess();
+        setState(LSQ::LSQRequest::Complete);
+    }
  }
  
  PacketPtr
  LSQ::SplitDataRequest::getHeadPacket()
  {
-    assert(numIssuedFragments < numFragments);
+    assert(numIssuedFragments < numTranslatedFragments);
  
      return fragmentPackets[numIssuedFragments];
  }
@@ -519,7 +554,7 @@ LSQ::SplitDataRequest::getHeadPacket()
  void
  LSQ::SplitDataRequest::stepToNextPacket()
  {
-    assert(numIssuedFragments < numFragments);
+    assert(numIssuedFragments < numTranslatedFragments);
  
      numIssuedFragments++;
  }
@@ -527,14 +562,13 @@ LSQ::SplitDataRequest::stepToNextPacket()
  void
  LSQ::SplitDataRequest::retireResponse(PacketPtr response)
  {
-    assert(numRetiredFragments < numFragments);
+    assert(numRetiredFragments < numTranslatedFragments);
  
      DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
-        " offset: 0x%x (retired fragment num: %d) %s\n",
+        " offset: 0x%x (retired fragment num: %d)\n",
          response->req->getVaddr(), response->req->getSize(),
          request->getVaddr() - response->req->getVaddr(),
-        numRetiredFragments,
-        (fault == NoFault ? "" : fault->name()));
+        numRetiredFragments);
  
      numRetiredFragments++;
  
@@ -573,7 +607,7 @@ LSQ::SplitDataRequest::retireResponse(PacketPtr response)
              packet->makeResponse();
      }
  
-    if (numRetiredFragments == numFragments)
+    if (numRetiredFragments == numTranslatedFragments)
          setState(Complete);
  
      if (!skipped && isComplete()) {
@@ -1477,7 +1511,8 @@ LSQ::needsToTick()
  void
  LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                   unsigned int size, Addr addr, Request::Flags flags,
-                 uint64_t *res, AtomicOpFunctor *amo_op)
+                 uint64_t *res, AtomicOpFunctor *amo_op,
+                 const std::vector<bool>& byteEnable)
  {
      bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
  
@@ -1533,6 +1568,9 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
          addr, size, flags, cpu.dataMasterId(),
          /* I've no idea why we need the PC, but give it */
          inst->pc.instAddr(), amo_op);
+    if (!byteEnable.empty()) {
+        request->request->setByteEnable(byteEnable);
+    }
  
      requests.push(request);
      request->startAddrTranslation();
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh

index 11fa8774f2e4545e623a422e753f4dfe25d78989..23b47c53c3995058306ea0c64ae12bdd86607357 100644 (file)
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -188,6 +188,8 @@ class LSQ : public Named
          /** BaseTLB::Translation interface */
          void markDelayed() { }
  
+        void disableMemAccess();
+
        public:
          LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
              PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
@@ -441,7 +443,8 @@ class LSQ : public Named
          { return numIssuedFragments != numRetiredFragments; }
  
          /** Have we stepped past the end of fragmentPackets? */
-        bool sentAllPackets() { return numIssuedFragments == numFragments; }
+        bool sentAllPackets()
+        { return numIssuedFragments == numTranslatedFragments; }
  
          /** For loads, paste the response data into the main
           *  response packet */
@@ -700,7 +703,9 @@ class LSQ : public Named
       *  the LSQ */
      void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,
-                     uint64_t *res, AtomicOpFunctor *amo_op);
+                     uint64_t *res, AtomicOpFunctor *amo_op,
+                     const std::vector<bool>& byteEnable =
+                         std::vector<bool>());
  
      /** Push a predicate failed-representing request into the queues just
       *  to maintain commit order */
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh

index c754fe8cf4ce7a14beaa8c186621ebb29507b6cc..db8fca20adb380aa0d92c581dcbd9fece4532745 100644 (file)
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -787,10 +787,13 @@ class FullO3CPU : public BaseO3CPU
      /** CPU pushRequest function, forwards request to LSQ. */
      Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
+                      uint64_t *res, AtomicOpFunctor *amo_op = nullptr,
+                      const std::vector<bool>& byteEnable =
+                          std::vector<bool>())
+
      {
          return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
-                flags, res, amo_op);
+                flags, res, amo_op, byteEnable);
      }
  
      /** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh

index a6037b7f4b734f748bef67bbbbd2f1da4c2d7aac..84f1411a576594f69c9c9798a4d8e1153f1452ac 100644 (file)
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -50,6 +50,7 @@
  #include "arch/generic/tlb.hh"
  #include "cpu/inst_seq.hh"
  #include "cpu/o3/lsq_unit.hh"
+#include "cpu/utils.hh"
  #include "enums/SMTQueuePolicy.hh"
  #include "mem/port.hh"
  #include "sim/sim_object.hh"
@@ -251,6 +252,7 @@ class LSQ
          const Addr _addr;
          const uint32_t _size;
          const Request::Flags _flags;
+        std::vector<bool> _byteEnable;
          uint32_t _numOutstandingPackets;
          AtomicOpFunctor *_amo_op;
        protected:
@@ -351,6 +353,28 @@ class LSQ
              }
          }
  
+        /** Helper function used to add a (sub)request, given its address
+         * `addr`, size `size` and byte-enable mask `byteEnable`.
+         *
+         * The request is only added if the mask is empty or if there is at
+         * least an active element in it.
+         */
+        void
+        addRequest(Addr addr, unsigned size,
+                   const std::vector<bool>& byteEnable)
+        {
+            if (byteEnable.empty() ||
+                isAnyActiveElement(byteEnable.begin(), byteEnable.end())) {
+                auto request = std::make_shared<Request>(_inst->getASID(),
+                        addr, size, _flags, _inst->masterId(),
+                        _inst->instAddr(), _inst->contextId());
+                if (!byteEnable.empty()) {
+                    request->setByteEnable(byteEnable);
+                }
+                _requests.push_back(request);
+            }
+        }
+
          /** Destructor.
           * The LSQRequest owns the request. If the packet has already been
           * sent, the sender state will be deleted upon receiving the reply.
@@ -609,11 +633,17 @@ class LSQ
           * declaration of the names in the parent class. */
          using Flag = typename LSQRequest::Flag;
          using State = typename LSQRequest::State;
+        using LSQRequest::_addr;
          using LSQRequest::_fault;
+        using LSQRequest::_flags;
+        using LSQRequest::_size;
+        using LSQRequest::_byteEnable;
+        using LSQRequest::_requests;
          using LSQRequest::_inst;
          using LSQRequest::_packets;
          using LSQRequest::_port;
          using LSQRequest::_res;
+        using LSQRequest::_taskId;
          using LSQRequest::_senderState;
          using LSQRequest::_state;
          using LSQRequest::flags;
@@ -635,14 +665,8 @@ class LSQ
                            uint64_t* res = nullptr,
                            AtomicOpFunctor* amo_op = nullptr) :
              LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
-                       amo_op)
-        {
-            LSQRequest::_requests.push_back(
-                    std::make_shared<Request>(inst->getASID(), addr, size,
-                    flags_, inst->masterId(), inst->instAddr(),
-                    inst->contextId(), amo_op));
-            LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
-        }
+                       amo_op) {}
+
          inline virtual ~SingleDataRequest() {}
          virtual void initiateTranslation();
          virtual void finish(const Fault &fault, const RequestPtr &req,
@@ -671,6 +695,7 @@ class LSQ
          using LSQRequest::_port;
          using LSQRequest::_requests;
          using LSQRequest::_res;
+        using LSQRequest::_byteEnable;
          using LSQRequest::_senderState;
          using LSQRequest::_size;
          using LSQRequest::_state;
@@ -691,14 +716,14 @@ class LSQ
          RequestPtr mainReq;
          PacketPtr _mainPacket;
  
-
        public:
          SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                           const Addr& addr, const uint32_t& size,
                           const Request::Flags & flags_,
                           PacketDataPtr data = nullptr,
                           uint64_t* res = nullptr) :
-            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+                       nullptr),
              numFragments(0),
              numReceivedPackets(0),
              mainReq(nullptr),
@@ -949,7 +974,8 @@ class LSQ
  
      Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res, AtomicOpFunctor *amo_op);
+                      uint64_t *res, AtomicOpFunctor *amo_op,
+                      const std::vector<bool>& byteEnable);
  
      /** The CPU pointer. */
      O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh

index 7327120297c960a6eb95b46288d30421c50a691b..70621a5237562af4e012c5b7f8dc41926c12fe58 100644 (file)
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -681,29 +681,12 @@ LSQ<Impl>::dumpInsts() const
      }
  }
  
-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
-    return addr & (block_size - 1);
-}
-
-static Addr
-addrBlockAlign(Addr addr, uint64_t block_size)
-{
-    return addr & ~(block_size - 1);
-}
-
-static bool
-transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
-{
-    return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
  template<class Impl>
  Fault
  LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                         unsigned int size, Addr addr, Request::Flags flags,
-                       uint64_t *res, AtomicOpFunctor *amo_op)
+                       uint64_t *res, AtomicOpFunctor *amo_op,
+                       const std::vector<bool>& byteEnable)
  {
      // This comming request can be either load, store or atomic.
      // Atomic request has a corresponding pointer to its atomic memory
@@ -735,6 +718,9 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                      size, flags, data, res, amo_op);
          }
          assert(req);
+        if (!byteEnable.empty()) {
+            req->_byteEnable = byteEnable;
+        }
          inst->setRequest();
          req->taskId(cpu->taskId());
  
@@ -756,6 +742,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
              else
                  inst->getFault() = cpu->write(req, data, inst->sqIdx);
          } else if (isLoad) {
+            inst->setMemAccPredicate(false);
              // Commit will have to clean up whatever happened.  Set this
              // instruction as executed.
              inst->setExecuted();
@@ -848,14 +835,21 @@ template<class Impl>
  void
  LSQ<Impl>::SingleDataRequest::initiateTranslation()
  {
-    _inst->translationStarted(true);
-    setState(State::Translation);
-    flags.set(Flag::TranslationStarted);
+    assert(_requests.size() == 0);
  
-    _inst->savedReq = this;
-    sendFragmentToTranslation(0);
+    this->addRequest(_addr, _size, _byteEnable);
  
-    if (isTranslationComplete()) {
+    if (_requests.size() > 0) {
+        _requests.back()->setReqInstSeqNum(_inst->seqNum);
+        _requests.back()->taskId(_taskId);
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags.set(Flag::TranslationStarted);
+
+        _inst->savedReq = this;
+        sendFragmentToTranslation(0);
+    } else {
+        _inst->setMemAccPredicate(false);
      }
  }
  
@@ -877,11 +871,7 @@ template<class Impl>
  void
  LSQ<Impl>::SplitDataRequest::initiateTranslation()
  {
-    _inst->translationStarted(true);
-    setState(State::Translation);
-    flags.set(Flag::TranslationStarted);
-
-    unsigned int cacheLineSize = _port.cacheLineSize();
+    auto cacheLineSize = _port.cacheLineSize();
      Addr base_addr = _addr;
      Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
      Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
@@ -890,6 +880,9 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
      mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
                  _size, _flags, _inst->masterId(),
                  _inst->instAddr(), _inst->contextId());
+    if (!_byteEnable.empty()) {
+        mainReq->setByteEnable(_byteEnable);
+    }
  
      // Paddr is not used in mainReq. However, we will accumulate the flags
      // from the sub requests into mainReq by calling setFlags() in finish().
@@ -898,39 +891,63 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
      mainReq->setPaddr(0);
  
      /* Get the pre-fix, possibly unaligned. */
-    _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
-                next_addr - base_addr, _flags, _inst->masterId(),
-                _inst->instAddr(), _inst->contextId()));
+    if (_byteEnable.empty()) {
+        this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+    } else {
+        auto it_start = _byteEnable.begin();
+        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+        this->addRequest(base_addr, next_addr - base_addr,
+                         std::vector<bool>(it_start, it_end));
+    }
      size_so_far = next_addr - base_addr;
  
      /* We are block aligned now, reading whole blocks. */
      base_addr = next_addr;
      while (base_addr != final_addr) {
-        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
-                    base_addr, cacheLineSize, _flags, _inst->masterId(),
-                    _inst->instAddr(), _inst->contextId()));
+        if (_byteEnable.empty()) {
+            this->addRequest(base_addr, cacheLineSize, _byteEnable);
+        } else {
+            auto it_start = _byteEnable.begin() + size_so_far;
+            auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+            this->addRequest(base_addr, cacheLineSize,
+                             std::vector<bool>(it_start, it_end));
+        }
          size_so_far += cacheLineSize;
          base_addr += cacheLineSize;
      }
  
      /* Deal with the tail. */
      if (size_so_far < _size) {
-        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
-                    base_addr, _size - size_so_far, _flags, _inst->masterId(),
-                    _inst->instAddr(), _inst->contextId()));
+        if (_byteEnable.empty()) {
+            this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+        } else {
+            auto it_start = _byteEnable.begin() + size_so_far;
+            auto it_end = _byteEnable.end();
+            this->addRequest(base_addr, _size - size_so_far,
+                             std::vector<bool>(it_start, it_end));
+        }
      }
  
-    /* Setup the requests and send them to translation. */
-    for (auto& r: _requests) {
-        r->setReqInstSeqNum(_inst->seqNum);
-        r->taskId(_taskId);
-    }
-    this->_inst->savedReq = this;
-    numInTranslationFragments = 0;
-    numTranslatedFragments = 0;
+    if (_requests.size() > 0) {
+        /* Setup the requests and send them to translation. */
+        for (auto& r: _requests) {
+            r->setReqInstSeqNum(_inst->seqNum);
+            r->taskId(_taskId);
+        }
  
-    for (uint32_t i = 0; i < _requests.size(); i++) {
-        sendFragmentToTranslation(i);
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags.set(Flag::TranslationStarted);
+        this->_inst->savedReq = this;
+        numInTranslationFragments = 0;
+        numTranslatedFragments = 0;
+        _fault.resize(_requests.size());
+
+        for (uint32_t i = 0; i < _requests.size(); i++) {
+            sendFragmentToTranslation(i);
+        }
+    } else {
+        _inst->setMemAccPredicate(false);
      }
  }
  
@@ -968,8 +985,6 @@ LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
      while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
          pktIdx++;
      assert(pktIdx < _packets.size());
-    assert(pkt->req == _requests[pktIdx]);
-    assert(pkt == _packets[pktIdx]);
      numReceivedPackets++;
      state->outstanding--;
      if (numReceivedPackets == _packets.size()) {
@@ -1012,16 +1027,19 @@ void
  LSQ<Impl>::SplitDataRequest::buildPackets()
  {
      /* Extra data?? */
-    ptrdiff_t offset = 0;
+    Addr base_address = _addr;
+
      if (_packets.size() == 0) {
          /* New stuff */
          if (isLoad()) {
              _mainPacket = Packet::createRead(mainReq);
              _mainPacket->dataStatic(_inst->memData);
          }
-        for (auto& r: _requests) {
+        for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+            RequestPtr r = _requests[i];
              PacketPtr pkt = isLoad() ? Packet::createRead(r)
-                                    : Packet::createWrite(r);
+                                     : Packet::createWrite(r);
+            ptrdiff_t offset = r->getVaddr() - base_address;
              if (isLoad()) {
                  pkt->dataStatic(_inst->memData + offset);
              } else {
@@ -1031,12 +1049,11 @@ LSQ<Impl>::SplitDataRequest::buildPackets()
                          r->getSize());
                  pkt->dataDynamic(req_data);
              }
-            offset += r->getSize();
              pkt->senderState = _senderState;
              _packets.push_back(pkt);
          }
      }
-    assert(_packets.size() == _requests.size());
+    assert(_packets.size() > 0);
  }
  
  template<class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh

index 9323e863466f9ff36040bb3b887a5b5ea83a4c0c..21bed99fa25f7e19073f63222d86cfac6c04844e 100644 (file)
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -542,8 +542,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
  
      load_fault = inst->initiateAcc();
  
-    if (!inst->readMemAccPredicate()) {
-        assert(load_fault == NoFault);
+    if (load_fault == NoFault && !inst->readMemAccPredicate()) {
          assert(inst->readPredicate());
          inst->setExecuted();
          inst->completeAcc(nullptr);
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc

index caf2427efef57c50b2e1c739de7fbcec87457d47..c5b024532cd2b7b021b52fd594b4e5d265e1e019 100644 (file)
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -49,6 +49,7 @@
  #include "base/output.hh"
  #include "config/the_isa.hh"
  #include "cpu/exetrace.hh"
+#include "cpu/utils.hh"
  #include "debug/Drain.hh"
  #include "debug/ExecFaulting.hh"
  #include "debug/SimpleCPU.hh"
@@ -333,9 +334,43 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
      }
  }
  
+bool
+AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+                                       int size, Request::Flags flags,
+                                       const std::vector<bool>& byte_enable,
+                                       int& frag_size, int& size_left) const
+{
+    bool predicate = true;
+    Addr inst_addr = threadInfo[curThread]->thread->pcState().instAddr();
+
+    frag_size = std::min(
+        cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+        (Addr) size_left);
+    size_left -= frag_size;
+
+    if (!byte_enable.empty()) {
+        // Set up byte-enable mask for the current fragment
+        auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
+        auto it_end = byte_enable.begin() + (size - size_left);
+        if (isAnyActiveElement(it_start, it_end)) {
+            req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                         inst_addr);
+            req->setByteEnable(std::vector<bool>(it_start, it_end));
+        } else {
+            predicate = false;
+        }
+    } else {
+        req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                     inst_addr);
+    }
+
+    return predicate;
+}
+
  Fault
  AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
-                         Request::Flags flags)
+                         Request::Flags flags,
+                         const std::vector<bool>& byteEnable)
  {
      SimpleExecContext& t_info = *threadInfo[curThread];
      SimpleThread* thread = t_info.thread;
@@ -346,28 +381,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
      if (traceData)
          traceData->setMem(addr, size, flags);
  
-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
      dcache_latency = 0;
  
      req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
+    Fault fault = NoFault;
+
      while (1) {
-        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+        predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+                                          byteEnable, frag_size, size_left);
  
          // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
-                                                          BaseTLB::Read);
+        if (predicate) {
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Read);
+        }
  
          // Now do the access.
-        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+        if (predicate && fault == NoFault &&
+            !req->getFlags().isSet(Request::NO_ACCESS)) {
              Packet pkt(req, Packet::makeReadCmd(req));
              pkt.dataStatic(data);
  
@@ -394,33 +430,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
              }
          }
  
-        //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
-        {
+        // If we don't need to access further cache lines, stop now.
+        if (size_left == 0) {
              if (req->isLockedRMW() && fault == NoFault) {
                  assert(!locked);
                  locked = true;
              }
-
              return fault;
          }
  
          /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
           */
+        frag_addr += frag_size;
  
          //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
      }
  }
  
  Fault
  AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
-                          Request::Flags flags, uint64_t *res)
+                          Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
  {
      SimpleExecContext& t_info = *threadInfo[curThread];
      SimpleThread* thread = t_info.thread;
@@ -439,32 +471,37 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
      if (traceData)
          traceData->setMem(addr, size, flags);
  
-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
      dcache_latency = 0;
  
      req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    int curr_frag_id = 0;
+    bool predicate;
+    Fault fault = NoFault;
+
      while (1) {
-        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+        predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+                                          byteEnable, frag_size, size_left);
  
          // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
+        if (predicate)
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Write);
  
          // Now do the access.
-        if (fault == NoFault) {
+        if (predicate && fault == NoFault) {
              bool do_access = true;  // flag to suppress cache access
  
              if (req->isLLSC()) {
-                do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
+                assert(curr_frag_id == 0);
+                do_access =
+                    TheISA::handleLockedWrite(thread, req,
+                                              dcachePort.cacheBlockMask);
              } else if (req->isSwap()) {
+                assert(curr_frag_id == 0);
                  if (req->isCondSwap()) {
                      assert(res);
                      req->setExtraData(*res);
@@ -488,8 +525,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
                  assert(!pkt.isError());
  
                  if (req->isSwap()) {
-                    assert(res);
-                    memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
+                    assert(res && curr_frag_id == 0);
+                    memcpy(res, pkt.getConstPtr<uint8_t>(), size);
                  }
              }
  
@@ -500,14 +537,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
  
          //If there's a fault or we don't need to access a second cache line,
          //stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
          {
              if (req->isLockedRMW() && fault == NoFault) {
-                assert(locked);
+                assert(byteEnable.empty());
+                assert(locked && curr_frag_id == 0);
                  locked = false;
              }
  
-
              if (fault != NoFault && req->isPrefetch()) {
                  return NoFault;
              } else {
@@ -516,15 +553,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
          }
  
          /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
           */
+        frag_addr += frag_size;
  
          //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
+
+        curr_frag_id++;
      }
  }
  
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh

index 84f3791212cf9ba999e23e58adb2c1a0a6d663a8..10030698177ea986cb25b76456df44e72661b8e9 100644 (file)
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -195,11 +195,36 @@ class AtomicSimpleCPU : public BaseSimpleCPU
      void activateContext(ThreadID thread_num) override;
      void suspendContext(ThreadID thread_num) override;
  
+    /**
+     * Helper function used to set up the request for a single fragment of a
+     * memory access.
+     *
+     * Takes care of setting up the appropriate byte-enable mask for the
+     * fragment, given the mask for the entire memory access.
+     *
+     * @param req Pointer to the Request object to populate.
+     * @param frag_addr Start address of the fragment.
+     * @param size Total size of the memory access in bytes.
+     * @param flags Request flags.
+     * @param byte_enable Byte-enable mask for the entire memory access.
+     * @param[out] frag_size Fragment size.
+     * @param[in,out] size_left Size left to be processed in the memory access.
+     * @return True if the byte-enable mask for the fragment is not all-false.
+     */
+    bool genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+                               int size, Request::Flags flags,
+                               const std::vector<bool>& byte_enable,
+                               int& frag_size, int& size_left) const;
+
      Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,
+                  const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
  
      Fault writeMem(uint8_t *data, unsigned size,
-                   Addr addr, Request::Flags flags, uint64_t *res) override;
+                   Addr addr, Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
  
      Fault amoMem(Addr addr, uint8_t* data, unsigned size,
                   Request::Flags flags, AtomicOpFunctor *amo_op) override;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc

index 298ba9f9e9cf4e76399fe737d240ef0fd41da55c..816add707bef31eb977e66f4cb7c2ed86d3f4f8d 100644 (file)
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2012, 2015, 2017 ARM Limited
+ * Copyright (c) 2010-2012, 2015, 2017, 2018 ARM Limited
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved
   *
@@ -494,6 +494,10 @@ BaseSimpleCPU::preExecute()
      thread->setFloatReg(ZeroReg, 0);
  #endif // ALPHA_ISA
  
+    // resets predicates
+    t_info.setPredicate(true);
+    t_info.setMemAccPredicate(true);
+
      // check for instruction-count-based events
      comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
      system->instEventQueue.serviceEvents(system->totalNumInsts);
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh

index 8060b07ad32e265dd7d9cbfd4618e50aa52d0874..5404e5df8d33a4835b9a07617a9c7eb65688c6c2 100644 (file)
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2011-2012,2015 ARM Limited
+ * Copyright (c) 2011-2012,2015,2018 ARM Limited
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved
   *
@@ -143,15 +143,21 @@ class BaseSimpleCPU : public BaseCPU
      void startup() override;
  
      virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
-                          Request::Flags flags)
+                          Request::Flags flags,
+                          const std::vector<bool>& byteEnable =
+                              std::vector<bool>())
      { panic("readMem() is not implemented\n"); }
  
      virtual Fault initiateMemRead(Addr addr, unsigned size,
-                                  Request::Flags flags)
+                                  Request::Flags flags,
+                                  const std::vector<bool>& byteEnable =
+                                      std::vector<bool>())
      { panic("initiateMemRead() is not implemented\n"); }
  
      virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
-                           Request::Flags flags, uint64_t* res)
+                           Request::Flags flags, uint64_t* res,
+                           const std::vector<bool>& byteEnable =
+                               std::vector<bool>())
      { panic("writeMem() is not implemented\n"); }
  
      virtual Fault amoMem(Addr addr, uint8_t* data, unsigned size,
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh

index be7a863c5fcfeee09a967aea91326f8466a33b6d..de98d6efd934c3ed34a1b4dcae43cb3f743c6c31 100644 (file)
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -434,26 +434,32 @@ class SimpleExecContext : public ExecContext {
          thread->pcState(val);
      }
  
-
      Fault
      readMem(Addr addr, uint8_t *data, unsigned int size,
-            Request::Flags flags) override
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
      {
-        return cpu->readMem(addr, data, size, flags);
+        return cpu->readMem(addr, data, size, flags, byteEnable);
      }
  
      Fault
      initiateMemRead(Addr addr, unsigned int size,
-                    Request::Flags flags) override
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
      {
-        return cpu->initiateMemRead(addr, size, flags);
+        return cpu->initiateMemRead(addr, size, flags, byteEnable);
      }
  
      Fault
      writeMem(uint8_t *data, unsigned int size, Addr addr,
-             Request::Flags flags, uint64_t *res) override
+             Request::Flags flags, uint64_t *res,
+             const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
      {
-        return cpu->writeMem(data, size, addr, flags, res);
+        assert(byteEnable.empty() || byteEnable.size() == size);
+        return cpu->writeMem(data, size, addr, flags, res, byteEnable);
      }
  
      Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc

index 637308a96454f38664ae13a1ccd0bffcb59517b4..454259099fd378e44d66d86f39ecbd1a806c806f 100644 (file)
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -1,6 +1,6 @@
  /*
   * Copyright 2014 Google, Inc.
- * Copyright (c) 2010-2013,2015,2017 ARM Limited
+ * Copyright (c) 2010-2013,2015,2017-2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -417,7 +417,8 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
  
  Fault
  TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
-                                 Request::Flags flags)
+                                 Request::Flags flags,
+                                 const std::vector<bool>& byteEnable)
  {
      SimpleExecContext &t_info = *threadInfo[curThread];
      SimpleThread* thread = t_info.thread;
@@ -434,6 +435,9 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
      RequestPtr req = std::make_shared<Request>(
          asid, addr, size, flags, dataMasterId(), pc,
          thread->contextId());
+    if (!byteEnable.empty()) {
+        req->setByteEnable(byteEnable);
+    }
  
      req->taskId(taskId());
  
@@ -491,7 +495,8 @@ TimingSimpleCPU::handleWritePacket()
  
  Fault
  TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
-                          Addr addr, Request::Flags flags, uint64_t *res)
+                          Addr addr, Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
  {
      SimpleExecContext &t_info = *threadInfo[curThread];
      SimpleThread* thread = t_info.thread;
@@ -516,6 +521,9 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
      RequestPtr req = std::make_shared<Request>(
          asid, addr, size, flags, dataMasterId(), pc,
          thread->contextId());
+    if (!byteEnable.empty()) {
+        req->setByteEnable(byteEnable);
+    }
  
      req->taskId(taskId());
  
@@ -523,6 +531,10 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
      assert(split_addr <= addr || split_addr - addr < block_size);
  
      _status = DTBWaitResponse;
+
+    // TODO: TimingSimpleCPU doesn't support arbitrarily long multi-line mem.
+    // accesses yet
+
      if (split_addr > addr) {
          RequestPtr req1, req2;
          assert(!req->isLLSC() && !req->isSwap());
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh

index ce0a4dbfc1d6436d4566b7a058a6fca934d1a0fe..a49822fc1a236172e4438a9e796cdc3115ae9bce 100644 (file)
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013,2015 ARM Limited
+ * Copyright (c) 2012-2013,2015,2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -283,10 +283,14 @@ class TimingSimpleCPU : public BaseSimpleCPU
      void suspendContext(ThreadID thread_num) override;
  
      Fault initiateMemRead(Addr addr, unsigned size,
-                          Request::Flags flags) override;
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable =std::vector<bool>())
+        override;
  
      Fault writeMem(uint8_t *data, unsigned size,
-                   Addr addr, Request::Flags flags, uint64_t *res) override;
+                   Addr addr, Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
  
      Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
                           AtomicOpFunctor *amo_op) override;
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc

index 9067e877b3fe3766a57ac15100085c1f89713587..0936e41ad1d48f293b3884c7cc0335c4b6820b52 100644 (file)
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -77,7 +77,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
                             Process *_process, BaseTLB *_itb,
                             BaseTLB *_dtb, TheISA::ISA *_isa)
      : ThreadState(_cpu, _thread_num, _process), isa(_isa),
-      predicate(false), system(_sys),
+      predicate(true), memAccPredicate(true), system(_sys),
        itb(_itb), dtb(_dtb), decoder(TheISA::Decoder(_isa))
  {
      clearArchRegs();
@@ -87,8 +87,9 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
  SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
                             BaseTLB *_itb, BaseTLB *_dtb,
                             TheISA::ISA *_isa, bool use_kernel_stats)
-    : ThreadState(_cpu, _thread_num, NULL), isa(_isa), system(_sys), itb(_itb),
-      dtb(_dtb), decoder(TheISA::Decoder(_isa))
+    : ThreadState(_cpu, _thread_num, NULL), isa(_isa),
+      predicate(true), memAccPredicate(true), system(_sys),
+      itb(_itb), dtb(_dtb), decoder(TheISA::Decoder(_isa))
  {
      quiesceEvent = new EndQuiesceEvent(this);
  
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh

new file mode 100644 (file)

index 0000000..4c13181
--- /dev/null
+++ b/src/cpu/utils.hh
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Andrew Bardsley
+ */
+
+#ifndef __CPU_UTILS_HH__
+#define __CPU_UTILS_HH__
+
+#include "base/types.hh"
+
+/**
+ * Calculates the offset of a given address wrt aligned fixed-size blocks.
+ * @param addr Input address.
+ * @param block_size Block size in bytes.
+ * @return Offset of the given address in bytes.
+ */
+inline Addr
+addrBlockOffset(Addr addr, Addr block_size)
+{
+    return addr & (block_size - 1);
+}
+
+/**
+ * Returns the address of the closest aligned fixed-size block to the given
+ * address.
+ * @param addr Input address.
+ * @param block_size Block size in bytes.
+ * @return Address of the closest aligned block.
+ */
+inline Addr
+addrBlockAlign(Addr addr, Addr block_size)
+{
+    return addr & ~(block_size - 1);
+}
+
+/**
+ * Returns true if the given memory access (address, size) needs to be
+ * fragmented across aligned fixed-size blocks.
+ * @param addr Address of the memory access.
+ * @param size Size of the memory access.
+ * @param block_size Block size in bytes.
+ * @return True if the memory access needs to be fragmented.
+ */
+inline bool
+transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
+{
+    return (addrBlockOffset(addr, block_size) + size) > block_size;
+}
+
+/**
+ * Test if there is any active element in an enablement range.
+ */
+inline bool
+isAnyActiveElement(const std::vector<bool>::const_iterator& it_start,
+                   const std::vector<bool>::const_iterator& it_end)
+{
+    auto it_tmp = it_start;
+    for (;it_tmp != it_end && !(*it_tmp); ++it_tmp);
+    return (it_tmp != it_end);
+}
+
+#endif // __CPU_UTILS_HH__
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc

index f7b02ce17675dd633525dbdce73159c191455705..a998530fdfe7c7c0b05b59c9308521e96b5b0ecb 100644 (file)
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2012,2017 ARM Limited
+ * Copyright (c) 2010-2012,2017-2018 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc

index 494a998a5185cc219b11259e9773462783f3b3f7..b72ff426185df11ef9b10c00ce3c8d9c8ca48d1d 100644 (file)
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -297,7 +297,8 @@ Cache::promoteWholeLineWrites(PacketPtr pkt)
  {
      // Cache line clearing instructions
      if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
-        (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0)) {
+        (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0) &&
+        !pkt->isMaskedWrite()) {
          pkt->cmd = MemCmd::WriteLineReq;
          DPRINTF(Cache, "packet promoted from Write to WriteLineReq\n");
      }
diff --git a/src/mem/packet.hh b/src/mem/packet.hh

index 93b3ad5deb08347c2b03c0b6acc7182af8a06fbf..130cc41adc5df6ee23de8bfdbae8d22ed2a32312 100644 (file)
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -1092,6 +1092,7 @@ class Packet : public Printable
      getPtr()
      {
          assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA));
+        assert(!isMaskedWrite());
          return (T*)data;
      }
  
@@ -1180,10 +1181,11 @@ class Packet : public Printable
          // same pointer from source to destination and back
          assert(p != getPtr<uint8_t>() || flags.isSet(STATIC_DATA));
  
-        if (p != getPtr<uint8_t>())
+        if (p != getPtr<uint8_t>()) {
              // for packet with allocated dynamic data, we copy data from
              // one to the other, e.g. a forwarded response to a response
              std::memcpy(getPtr<uint8_t>(), p, getSize());
+        }
      }
  
      /**
@@ -1203,7 +1205,19 @@ class Packet : public Printable
      void
      writeData(uint8_t *p) const
      {
-        std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+        if (!isMaskedWrite()) {
+            std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+        } else {
+            assert(req->getByteEnable().size() == getSize());
+            // Write only the enabled bytes
+            const uint8_t *base = getConstPtr<uint8_t>();
+            for (int i = 0; i < getSize(); i++) {
+                if (req->getByteEnable()[i]) {
+                    p[i] = *(base + i);
+                }
+                // Disabled bytes stay untouched
+            }
+        }
      }
  
      /**
@@ -1268,6 +1282,17 @@ class Packet : public Printable
      bool
      trySatisfyFunctional(PacketPtr other)
      {
+        if (other->isMaskedWrite()) {
+            // Do not forward data if overlapping with a masked write
+            if (_isSecure == other->isSecure() &&
+                getAddr() <= (other->getAddr() + other->getSize() - 1) &&
+                other->getAddr() <= (getAddr() + getSize() - 1)) {
+                warn("Trying to check against a masked write, skipping."
+                     " (addr: 0x%x, other addr: 0x%x)", getAddr(),
+                     other->getAddr());
+            }
+            return false;
+        }
          // all packets that are carrying a payload should have a valid
          // data pointer
          return trySatisfyFunctional(other, other->getAddr(), other->isSecure(),
@@ -1296,6 +1321,12 @@ class Packet : public Printable
          return cmd == MemCmd::CleanEvict || cmd == MemCmd::WritebackClean;
      }
  
+    bool
+    isMaskedWrite() const
+    {
+        return (cmd == MemCmd::WriteReq && !req->getByteEnable().empty());
+    }
+
      /**
       * Check a functional request against a memory value represented
       * by a base/size pair and an associated data array. If the
diff --git a/src/mem/request.hh b/src/mem/request.hh

index 2a53c21a4e8ff9ee2f13d2802f0f479dc844ae62..324ae382e712d439ac6a66572e5b5ffb739fdc37 100644 (file)
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -320,6 +320,9 @@ class Request
       */
      unsigned _size;
  
+    /** Byte-enable mask for writes. */
+    std::vector<bool> _byteEnable;
+
      /** The requestor ID which is unique in the system for all ports
       * that are capable of issuing a transaction
       */
@@ -567,6 +570,9 @@ class Request
       * Generate two requests as if this request had been split into two
       * pieces. The original request can't have been translated already.
       */
+    // TODO: this function is still required by TimingSimpleCPU - should be
+    // removed once TimingSimpleCPU will support arbitrarily long multi-line
+    // mem. accesses
      void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
      {
          assert(privateFlags.isSet(VALID_VADDR));
@@ -577,6 +583,14 @@ class Request
          req1->_size = split_addr - _vaddr;
          req2->_vaddr = split_addr;
          req2->_size = _size - req1->_size;
+        if (!_byteEnable.empty()) {
+            req1->_byteEnable = std::vector<bool>(
+                _byteEnable.begin(),
+                _byteEnable.begin() + req1->_size);
+            req2->_byteEnable = std::vector<bool>(
+                _byteEnable.begin() + req1->_size,
+                _byteEnable.end());
+        }
      }
  
      /**
@@ -628,6 +642,19 @@ class Request
          return _size;
      }
  
+    const std::vector<bool>&
+    getByteEnable() const
+    {
+        return _byteEnable;
+    }
+
+    void
+    setByteEnable(const std::vector<bool>& be)
+    {
+        assert(be.empty() || be.size() == _size);
+        _byteEnable = be;
+    }
+
      /** Accessor for time. */
      Tick
      time() const
author	Giacomo Gabrielli <giacomo.gabrielli@arm.com>
	Fri, 7 Jul 2017 13:13:11 +0000 (14:13 +0100)
committer	Giacomo Gabrielli <giacomo.gabrielli@arm.com>
	Sat, 11 May 2019 12:48:58 +0000 (12:48 +0000)
src/cpu/base.hh		patch \| blob \| history
src/cpu/base_dyn_inst.hh		patch \| blob \| history
src/cpu/checker/cpu.cc		patch \| blob \| history
src/cpu/checker/cpu.hh		patch \| blob \| history
src/cpu/exec_context.hh		patch \| blob \| history
src/cpu/minor/dyn_inst.hh		patch \| blob \| history
src/cpu/minor/exec_context.hh		patch \| blob \| history
src/cpu/minor/execute.cc		patch \| blob \| history
src/cpu/minor/lsq.cc		patch \| blob \| history
src/cpu/minor/lsq.hh		patch \| blob \| history
src/cpu/o3/cpu.hh		patch \| blob \| history
src/cpu/o3/lsq.hh		patch \| blob \| history
src/cpu/o3/lsq_impl.hh		patch \| blob \| history
src/cpu/o3/lsq_unit_impl.hh		patch \| blob \| history
src/cpu/simple/atomic.cc		patch \| blob \| history
src/cpu/simple/atomic.hh		patch \| blob \| history
src/cpu/simple/base.cc		patch \| blob \| history
src/cpu/simple/base.hh		patch \| blob \| history
src/cpu/simple/exec_context.hh		patch \| blob \| history
src/cpu/simple/timing.cc		patch \| blob \| history
src/cpu/simple/timing.hh		patch \| blob \| history
src/cpu/simple_thread.cc		patch \| blob \| history
src/cpu/utils.hh	[new file with mode: 0644]	patch \| blob
src/mem/abstract_mem.cc		patch \| blob \| history
src/mem/cache/cache.cc		patch \| blob \| history
src/mem/packet.hh		patch \| blob \| history
src/mem/request.hh		patch \| blob \| history