From c417b76bad37f9a7669e3703c3bf7dca1ac296af Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 27 Nov 2019 16:29:52 +0000
Subject: [PATCH] cpu: Never use a empty byteEnable

The byteEnable variable is used for masking bytes in a memory request.
The default behaviour is to provide from the ExecContext to the CPU
(and then to the LSQ) an empty vector, which is the same as providing
a vector where every element is true.
Such vectors basically mean: do not mask any byte in the memory request.

This behaviour adds more complexity to the downstream LSQs, which now
have to distinguish between an empty and non-empty byteEnable.

This patch is simplifying things by transforming an empty vector into
a all true one, making sure the CPUs are always receiving a non empty
byteEnable.

JIRA: https://gem5.atlassian.net/browse/GEM5-196

Change-Id: I1d1cecd86ed64c53a314ed700f28810d76c195c3
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/23285
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/base_dyn_inst.hh       | 23 +++++++++---------
 src/cpu/checker/cpu.cc         | 28 +++++++++-------------
 src/cpu/checker/cpu.hh         |  4 ++--
 src/cpu/exec_context.hh        |  7 +++---
 src/cpu/minor/exec_context.hh  | 12 +++++-----
 src/cpu/minor/lsq.cc           | 28 +++++++++-------------
 src/cpu/o3/lsq.hh              |  7 ++----
 src/cpu/o3/lsq_impl.hh         | 44 +++++++++++-----------------------
 src/cpu/simple/atomic.cc       | 20 ++++++----------
 src/cpu/simple/exec_context.hh | 15 ++++++------
 src/cpu/simple/timing.cc       |  8 ++-----
 src/mem/request.hh             | 18 +++++++-------
 12 files changed, 86 insertions(+), 128 deletions(-)
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index fd216b173..194d77b85 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -308,14 +308,13 @@ class BaseDynInst : public ExecContext, public RefCounted
     }
 
     Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
-            const std::vector<bool> &byte_enable=std::vector<bool>()) override;
+            const std::vector<bool> &byte_enable) override;
 
     Fault initiateHtmCmd(Request::Flags flags) override;
 
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                    Request::Flags flags, uint64_t *res,
-                   const std::vector<bool> &byte_enable=std::vector<bool>())
-                   override;
+                   const std::vector<bool> &byte_enable) override;
 
     Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
                          AtomicOpFunctorPtr amo_op) override;
@@ -1069,11 +1068,11 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
                                    Request::Flags flags,
                                    const std::vector<bool> &byte_enable)
 {
-    assert(byte_enable.empty() || byte_enable.size() == size);
+    assert(byte_enable.size() == size);
     return cpu->pushRequest(
-            dynamic_cast<typename DynInstPtr::PtrType>(this),
-            /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
-            byte_enable);
+        dynamic_cast<typename DynInstPtr::PtrType>(this),
+        /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
+        byte_enable);
 }
 
 template<class Impl>
@@ -1091,11 +1090,11 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
                             Request::Flags flags, uint64_t *res,
                             const std::vector<bool> &byte_enable)
 {
-    assert(byte_enable.empty() || byte_enable.size() == size);
+    assert(byte_enable.size() == size);
     return cpu->pushRequest(
-            dynamic_cast<typename DynInstPtr::PtrType>(this),
-            /* st */ false, data, size, addr, flags, res, nullptr,
-            byte_enable);
+        dynamic_cast<typename DynInstPtr::PtrType>(this),
+        /* st */ false, data, size, addr, flags, res, nullptr,
+        byte_enable);
 }
 
 template<class Impl>
@@ -1112,7 +1111,7 @@ BaseDynInst<Impl>::initiateMemAMO(Addr addr, unsigned size,
     return cpu->pushRequest(
             dynamic_cast<typename DynInstPtr::PtrType>(this),
             /* atomic */ false, nullptr, size, addr, flags, nullptr,
-            std::move(amo_op));
+            std::move(amo_op), std::vector<bool>(size, true));
 }
 
 #endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index fe0300e74..8f558703c 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013,2017-2018 ARM Limited
+ * Copyright (c) 2011,2013,2017-2018, 2020 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -147,21 +147,15 @@ CheckerCPU::genMemFragmentRequest(Addr frag_addr, int size,
 
     RequestPtr mem_req;
 
-    if (!byte_enable.empty()) {
-        // Set up byte-enable mask for the current fragment
-        auto it_start = byte_enable.cbegin() + (size - (frag_size +
-                                                        size_left));
-        auto it_end = byte_enable.cbegin() + (size - size_left);
-        if (isAnyActiveElement(it_start, it_end)) {
-            mem_req = std::make_shared<Request>(frag_addr, frag_size,
-                    flags, requestorId, thread->pcState().instAddr(),
-                    tc->contextId());
-            mem_req->setByteEnable(std::vector<bool>(it_start, it_end));
-        }
-    } else {
+    // Set up byte-enable mask for the current fragment
+    auto it_start = byte_enable.cbegin() + (size - (frag_size +
+                                                    size_left));
+    auto it_end = byte_enable.cbegin() + (size - size_left);
+    if (isAnyActiveElement(it_start, it_end)) {
         mem_req = std::make_shared<Request>(frag_addr, frag_size,
-                    flags, requestorId, thread->pcState().instAddr(),
-                    tc->contextId());
+                flags, requestorId, thread->pcState().instAddr(),
+                tc->contextId());
+        mem_req->setByteEnable(std::vector<bool>(it_start, it_end));
     }
 
     return mem_req;
@@ -172,7 +166,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
                     Request::Flags flags,
                     const std::vector<bool>& byte_enable)
 {
-    assert(byte_enable.empty() || byte_enable.size() == size);
+    assert(byte_enable.size() == size);
 
     Fault fault = NoFault;
     bool checked_flags = false;
@@ -256,7 +250,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
                      Addr addr, Request::Flags flags, uint64_t *res,
                      const std::vector<bool>& byte_enable)
 {
-    assert(byte_enable.empty() || byte_enable.size() == size);
+    assert(byte_enable.size() == size);
 
     Fault fault = NoFault;
     bool checked_flags = false;
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index f5d783492..97203c28c 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -587,12 +587,12 @@ class CheckerCPU : public BaseCPU, public ExecContext
 
     Fault readMem(Addr addr, uint8_t *data, unsigned size,
                   Request::Flags flags,
-                  const std::vector<bool>& byte_enable = std::vector<bool>())
+                  const std::vector<bool>& byte_enable)
         override;
 
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                    Request::Flags flags, uint64_t *res,
-                   const std::vector<bool>& byte_enable = std::vector<bool>())
+                   const std::vector<bool>& byte_enable)
         override;
 
     Fault amoMem(Addr addr, uint8_t* data, unsigned size,
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index b1cdbd89c..7c433ad1e 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -233,7 +233,7 @@ class ExecContext {
      */
     virtual Fault readMem(Addr addr, uint8_t *data, unsigned int size,
             Request::Flags flags,
-            const std::vector<bool>& byte_enable = std::vector<bool>())
+            const std::vector<bool>& byte_enable)
     {
         panic("ExecContext::readMem() should be overridden\n");
     }
@@ -247,7 +247,7 @@ class ExecContext {
      */
     virtual Fault initiateMemRead(Addr addr, unsigned int size,
             Request::Flags flags,
-            const std::vector<bool>& byte_enable = std::vector<bool>())
+            const std::vector<bool>& byte_enable)
     {
         panic("ExecContext::initiateMemRead() should be overridden\n");
     }
@@ -263,8 +263,7 @@ class ExecContext {
      */
     virtual Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
                            Request::Flags flags, uint64_t *res,
-                           const std::vector<bool>& byte_enable =
-                               std::vector<bool>()) = 0;
+                           const std::vector<bool>& byte_enable) = 0;
 
     /**
      * For atomic-mode contexts, perform an atomic AMO (a.k.a., Atomic
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 58301a070..4f7c763b9 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -105,10 +105,9 @@ class ExecContext : public ::ExecContext
     Fault
     initiateMemRead(Addr addr, unsigned int size,
                     Request::Flags flags,
-                    const std::vector<bool>& byte_enable =
-                        std::vector<bool>()) override
+                    const std::vector<bool>& byte_enable) override
     {
-        assert(byte_enable.empty() || byte_enable.size() == size);
+        assert(byte_enable.size() == size);
         return execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
             size, addr, flags, nullptr, nullptr, byte_enable);
     }
@@ -123,10 +122,10 @@ class ExecContext : public ::ExecContext
     Fault
     writeMem(uint8_t *data, unsigned int size, Addr addr,
              Request::Flags flags, uint64_t *res,
-             const std::vector<bool>& byte_enable = std::vector<bool>())
+             const std::vector<bool>& byte_enable)
         override
     {
-        assert(byte_enable.empty() || byte_enable.size() == size);
+        assert(byte_enable.size() == size);
         return execute.getLSQ().pushRequest(inst, false /* store */, data,
             size, addr, flags, res, nullptr, byte_enable);
     }
@@ -137,7 +136,8 @@ class ExecContext : public ::ExecContext
     {
         // AMO requests are pushed through the store path
         return execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
-            size, addr, flags, nullptr, std::move(amo_op));
+            size, addr, flags, nullptr, std::move(amo_op),
+            std::vector<bool>(size, true));
     }
 
     RegVal
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index afdeb1e56..b4e5493ad 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -301,8 +301,7 @@ LSQ::SingleDataRequest::startAddrTranslation()
         inst->id.threadId);
 
     const auto &byte_enable = request->getByteEnable();
-    if (byte_enable.size() == 0 ||
-        isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
+    if (isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
         port.numAccessesInDTLB++;
 
         setState(LSQ::LSQRequest::InTranslation);
@@ -495,24 +494,19 @@ LSQ::SplitDataRequest::makeFragmentRequests()
         bool disabled_fragment = false;
 
         fragment->setContext(request->contextId());
-        if (byte_enable.empty()) {
+        // Set up byte-enable mask for the current fragment
+        auto it_start = byte_enable.begin() +
+            (fragment_addr - base_addr);
+        auto it_end = byte_enable.begin() +
+            (fragment_addr - base_addr) + fragment_size;
+        if (isAnyActiveElement(it_start, it_end)) {
             fragment->setVirt(
                 fragment_addr, fragment_size, request->getFlags(),
-                request->requestorId(), request->getPC());
+                request->requestorId(),
+                request->getPC());
+            fragment->setByteEnable(std::vector<bool>(it_start, it_end));
         } else {
-            // Set up byte-enable mask for the current fragment
-            auto it_start = byte_enable.begin() +
-                (fragment_addr - base_addr);
-            auto it_end = byte_enable.begin() +
-                (fragment_addr - base_addr) + fragment_size;
-            if (isAnyActiveElement(it_start, it_end)) {
-                fragment->setVirt(
-                    fragment_addr, fragment_size, request->getFlags(),
-                    request->requestorId(), request->getPC());
-                fragment->setByteEnable(std::vector<bool>(it_start, it_end));
-            } else {
-                disabled_fragment = true;
-            }
+            disabled_fragment = true;
         }
 
         if (!disabled_fragment) {
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 6e7d8d7e5..bec3ac21c 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -406,15 +406,12 @@ class LSQ
         addRequest(Addr addr, unsigned size,
                    const std::vector<bool>& byte_enable)
         {
-            if (byte_enable.empty() ||
-                isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
+            if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
                 auto request = std::make_shared<Request>(
                         addr, size, _flags, _inst->requestorId(),
                         _inst->instAddr(), _inst->contextId(),
                         std::move(_amo_op));
-                if (!byte_enable.empty()) {
-                    request->setByteEnable(byte_enable);
-                }
+                request->setByteEnable(byte_enable);
                 _requests.push_back(request);
             }
         }
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index ab8564085..e3922aece 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -715,9 +715,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                     size, flags, data, res, std::move(amo_op));
         }
         assert(req);
-        if (!byte_enable.empty()) {
-            req->_byteEnable = byte_enable;
-        }
+        req->_byteEnable = byte_enable;
         inst->setRequest();
         req->taskId(cpu->taskId());
 
@@ -894,9 +892,7 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
     mainReq = std::make_shared<Request>(base_addr,
                 _size, _flags, _inst->requestorId(),
                 _inst->instAddr(), _inst->contextId());
-    if (!_byteEnable.empty()) {
-        mainReq->setByteEnable(_byteEnable);
-    }
+    mainReq->setByteEnable(_byteEnable);
 
     // Paddr is not used in mainReq. However, we will accumulate the flags
     // from the sub requests into mainReq by calling setFlags() in finish().
@@ -905,41 +901,29 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
     mainReq->setPaddr(0);
 
     /* Get the pre-fix, possibly unaligned. */
-    if (_byteEnable.empty()) {
-        this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
-    } else {
-        auto it_start = _byteEnable.begin();
-        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
-        this->addRequest(base_addr, next_addr - base_addr,
-                         std::vector<bool>(it_start, it_end));
-    }
+    auto it_start = _byteEnable.begin();
+    auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+    this->addRequest(base_addr, next_addr - base_addr,
+                     std::vector<bool>(it_start, it_end));
     size_so_far = next_addr - base_addr;
 
     /* We are block aligned now, reading whole blocks. */
     base_addr = next_addr;
     while (base_addr != final_addr) {
-        if (_byteEnable.empty()) {
-            this->addRequest(base_addr, cacheLineSize, _byteEnable);
-        } else {
-            auto it_start = _byteEnable.begin() + size_so_far;
-            auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
-            this->addRequest(base_addr, cacheLineSize,
-                             std::vector<bool>(it_start, it_end));
-        }
+        auto it_start = _byteEnable.begin() + size_so_far;
+        auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+        this->addRequest(base_addr, cacheLineSize,
+                         std::vector<bool>(it_start, it_end));
         size_so_far += cacheLineSize;
         base_addr += cacheLineSize;
     }
 
     /* Deal with the tail. */
     if (size_so_far < _size) {
-        if (_byteEnable.empty()) {
-            this->addRequest(base_addr, _size - size_so_far, _byteEnable);
-        } else {
-            auto it_start = _byteEnable.begin() + size_so_far;
-            auto it_end = _byteEnable.end();
-            this->addRequest(base_addr, _size - size_so_far,
-                             std::vector<bool>(it_start, it_end));
-        }
+        auto it_start = _byteEnable.begin() + size_so_far;
+        auto it_end = _byteEnable.end();
+        this->addRequest(base_addr, _size - size_so_far,
+                         std::vector<bool>(it_start, it_end));
     }
 
     if (_requests.size() > 0) {
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 20c6e1cb6..25248d9be 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -345,21 +345,15 @@ AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
         (Addr) size_left);
     size_left -= frag_size;
 
-    if (!byte_enable.empty()) {
-        // Set up byte-enable mask for the current fragment
-        auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
-        auto it_end = byte_enable.begin() + (size - size_left);
-        if (isAnyActiveElement(it_start, it_end)) {
-            req->setVirt(frag_addr, frag_size, flags, dataRequestorId(),
-                         inst_addr);
-            req->setByteEnable(std::vector<bool>(it_start, it_end));
-        } else {
-            predicate = false;
-        }
-    } else {
+    // Set up byte-enable mask for the current fragment
+    auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
+    auto it_end = byte_enable.begin() + (size - size_left);
+    if (isAnyActiveElement(it_start, it_end)) {
         req->setVirt(frag_addr, frag_size, flags, dataRequestorId(),
                      inst_addr);
-        req->setByteEnable(std::vector<bool>());
+        req->setByteEnable(std::vector<bool>(it_start, it_end));
+    } else {
+        predicate = false;
     }
 
     return predicate;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index fbd2d96e1..09b3668cb 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -433,31 +433,32 @@ class SimpleExecContext : public ExecContext {
     Fault
     readMem(Addr addr, uint8_t *data, unsigned int size,
             Request::Flags flags,
-            const std::vector<bool>& byte_enable = std::vector<bool>())
+            const std::vector<bool>& byte_enable)
         override
     {
-        assert(byte_enable.empty() || byte_enable.size() == size);
+        assert(byte_enable.size() == size);
         return cpu->readMem(addr, data, size, flags, byte_enable);
     }
 
     Fault
     initiateMemRead(Addr addr, unsigned int size,
                     Request::Flags flags,
-                    const std::vector<bool>& byte_enable = std::vector<bool>())
+                    const std::vector<bool>& byte_enable)
         override
     {
-        assert(byte_enable.empty() || byte_enable.size() == size);
+        assert(byte_enable.size() == size);
         return cpu->initiateMemRead(addr, size, flags, byte_enable);
     }
 
     Fault
     writeMem(uint8_t *data, unsigned int size, Addr addr,
              Request::Flags flags, uint64_t *res,
-             const std::vector<bool>& byte_enable = std::vector<bool>())
+             const std::vector<bool>& byte_enable)
         override
     {
-        assert(byte_enable.empty() || byte_enable.size() == size);
-        return cpu->writeMem(data, size, addr, flags, res, byte_enable);
+        assert(byte_enable.size() == size);
+        return cpu->writeMem(data, size, addr, flags, res,
+            byte_enable);
     }
 
     Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index d38aefe5d..19556950e 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -467,9 +467,7 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
 
     RequestPtr req = std::make_shared<Request>(
         addr, size, flags, dataRequestorId(), pc, thread->contextId());
-    if (!byte_enable.empty()) {
-        req->setByteEnable(byte_enable);
-    }
+    req->setByteEnable(byte_enable);
 
     req->taskId(taskId());
 
@@ -551,9 +549,7 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
 
     RequestPtr req = std::make_shared<Request>(
         addr, size, flags, dataRequestorId(), pc, thread->contextId());
-    if (!byte_enable.empty()) {
-        req->setByteEnable(byte_enable);
-    }
+    req->setByteEnable(byte_enable);
 
     req->taskId(taskId());
 
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 43f54e64a..73c823bf2 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -432,6 +432,7 @@ class Request
     {
         _flags.set(flags);
         privateFlags.set(VALID_PADDR|VALID_SIZE);
+        _byteEnable = std::vector<bool>(size, true);
     }
 
     Request(Addr vaddr, unsigned size, Flags flags,
@@ -440,6 +441,7 @@ class Request
     {
         setVirt(vaddr, size, flags, id, pc, std::move(atomic_op));
         setContext(cid);
+        _byteEnable = std::vector<bool>(size, true);
     }
 
     Request(const Request& other)
@@ -541,14 +543,12 @@ class Request
         req1->_size = split_addr - _vaddr;
         req2->_vaddr = split_addr;
         req2->_size = _size - req1->_size;
-        if (!_byteEnable.empty()) {
-            req1->_byteEnable = std::vector<bool>(
-                _byteEnable.begin(),
-                _byteEnable.begin() + req1->_size);
-            req2->_byteEnable = std::vector<bool>(
-                _byteEnable.begin() + req1->_size,
-                _byteEnable.end());
-        }
+        req1->_byteEnable = std::vector<bool>(
+            _byteEnable.begin(),
+            _byteEnable.begin() + req1->_size);
+        req2->_byteEnable = std::vector<bool>(
+            _byteEnable.begin() + req1->_size,
+            _byteEnable.end());
     }
 
     /**
@@ -624,7 +624,7 @@ class Request
     void
     setByteEnable(const std::vector<bool>& be)
     {
-        assert(be.empty() || be.size() == _size);
+        assert(be.size() == _size);
         _byteEnable = be;
     }
 
-- 
2.30.2