From 099cb037e83d1e7bb47ec0e8eaf649a63f889d38 Mon Sep 17 00:00:00 2001 From: Nikos Nikoleris Date: Tue, 7 Feb 2017 11:35:48 +0000 Subject: [PATCH] cpu: Add support for CMOs in the cpu models Cache maintenance operations go through the write channel of the cpu. This changes makes sure that the cpu does not try to fill in the packet with data. Change-Id: Ic83205bb1cda7967636d88f15adcb475eb38d158 Reviewed-by: Stephan Diestelhorst Reviewed-on: https://gem5-review.googlesource.com/5055 Maintainer: Andreas Sandberg Reviewed-by: Jason Lowe-Power --- src/cpu/checker/cpu.cc | 4 ++-- src/cpu/minor/lsq.cc | 17 ++++++++++++----- src/cpu/o3/lsq_unit.hh | 21 +++++++++++++-------- src/cpu/simple/atomic.cc | 7 ++----- src/cpu/simple/timing.cc | 2 +- src/mem/request.hh | 2 ++ 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index b22fb2a45..48fcb202c 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011,2013 ARM Limited + * Copyright (c) 2011,2013,2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -310,7 +310,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size, // If the request is to ZERO a cache block, there is no data to check // against, but it's all zero. We need something to compare to, so use a // const set of zeros. - if (flags & Request::CACHE_BLOCK_ZERO) { + if (flags & Request::STORE_NO_DATA) { assert(!data); assert(sizeof(zero_data) <= fullSize); data = zero_data; diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index b7d5360ac..cb0611be3 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 ARM Limited + * Copyright (c) 2013-2014,2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -679,8 +679,12 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, while (ret == NoAddrRangeCoverage && i != slots.rend()) { LSQRequestPtr slot = *i; + /* Cache maintenance instructions go down via the store path * + * but they carry no data and they shouldn't be considered for + * forwarding */ if (slot->packet && - slot->inst->id.threadId == request->inst->id.threadId) { + slot->inst->id.threadId == request->inst->id.threadId && + !slot->packet->req->isCacheMaintenance()) { AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); if (coverage != NoAddrRangeCoverage) { @@ -1492,7 +1496,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, /* request_data becomes the property of a ...DataRequest (see below) * and destroyed by its destructor */ request_data = new uint8_t[size]; - if (flags & Request::CACHE_BLOCK_ZERO) { + if (flags & Request::STORE_NO_DATA) { /* For cache zeroing, just use zeroed data */ std::memset(request_data, 0, size); } else { @@ -1562,10 +1566,13 @@ makePacketForRequest(Request &request, bool isLoad, if (sender_state) ret->pushSenderState(sender_state); - if (isLoad) + if (isLoad) { ret->allocate(); - else + } else if (!request.isCacheMaintenance()) { + // CMOs are treated as stores but they don't have data. All + // stores otherwise need to allocate for data. ret->dataDynamic(data); + } return ret; } diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index b8e895571..a2813b3dc 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 ARM Limited + * Copyright (c) 2012-2014,2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -650,10 +650,14 @@ LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, store_size = storeQueue[store_idx].size; - if (store_size == 0) - continue; - else if (storeQueue[store_idx].inst->strictlyOrdered()) + if (!store_size || storeQueue[store_idx].inst->strictlyOrdered() || + (storeQueue[store_idx].req && + storeQueue[store_idx].req->isCacheMaintenance())) { + // Cache maintenance instructions go down via the store + // path but they carry no data and they shouldn't be + // considered for forwarding continue; + } assert(storeQueue[store_idx].inst->effAddrValid()); @@ -894,9 +898,9 @@ LSQUnit::write(Request *req, Request *sreqLow, Request *sreqHigh, storeQueue[store_idx].sreqHigh = sreqHigh; unsigned size = req->getSize(); storeQueue[store_idx].size = size; - storeQueue[store_idx].isAllZeros = req->getFlags() & Request::CACHE_BLOCK_ZERO; - assert(size <= sizeof(storeQueue[store_idx].data) || - (req->getFlags() & Request::CACHE_BLOCK_ZERO)); + bool store_no_data = req->getFlags() & Request::STORE_NO_DATA; + storeQueue[store_idx].isAllZeros = store_no_data; + assert(size <= sizeof(storeQueue[store_idx].data) || store_no_data); // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. @@ -904,7 +908,8 @@ LSQUnit::write(Request *req, Request *sreqLow, Request *sreqHigh, storeQueue[store_idx].isSplit = true; } - if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO)) + if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO) && \ + !req->isCacheMaintenance()) memcpy(storeQueue[store_idx].data, data, size); // This function only writes the data to the store queue, so no fault diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index eea7615c8..f3596b6a5 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -430,7 +430,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, if (data == NULL) { assert(size <= 64); - assert(flags & Request::CACHE_BLOCK_ZERO); + assert(flags & Request::STORE_NO_DATA); // This must be a cache block cleaning request data = zero_array; } @@ -462,14 +462,11 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, // Now do the access. if (fault == NoFault) { - MemCmd cmd = MemCmd::WriteReq; // default bool do_access = true; // flag to suppress cache access if (req->isLLSC()) { - cmd = MemCmd::StoreCondReq; do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask); } else if (req->isSwap()) { - cmd = MemCmd::SwapReq; if (req->isCondSwap()) { assert(res); req->setExtraData(*res); @@ -477,7 +474,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, } if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { - Packet pkt = Packet(req, cmd); + Packet pkt(req, Packet::makeWriteCmd(req)); pkt.dataStatic(data); if (req->isMmappedIpr()) { diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index c38f2107f..961e31935 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -510,7 +510,7 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, BaseTLB::Mode mode = BaseTLB::Write; if (data == NULL) { - assert(flags & Request::CACHE_BLOCK_ZERO); + assert(flags & Request::STORE_NO_DATA); // This must be a cache block cleaning request memset(newData, 0, size); } else { diff --git a/src/mem/request.hh b/src/mem/request.hh index 258693547..5cb08ca39 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -202,6 +202,8 @@ class Request */ STICKY_FLAGS = INST_FETCH }; + static const FlagsType STORE_NO_DATA = CACHE_BLOCK_ZERO | + CLEAN | INVALIDATE; /** Master Ids that are statically allocated * @{*/ -- 2.30.2