From aa8c6e9c959eab4d516bc07593bea20ade9ad80c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Fri, 13 Aug 2010 06:16:02 -0700 Subject: [PATCH] CPU: Add readBytes and writeBytes functions to the exec contexts. --- src/cpu/base_dyn_inst.hh | 57 ++++++-- src/cpu/exec_context.hh | 5 + src/cpu/inorder/cpu.cc | 122 +---------------- src/cpu/inorder/cpu.hh | 9 +- src/cpu/inorder/inorder_dyn_inst.cc | 35 +++-- src/cpu/inorder/inorder_dyn_inst.hh | 7 +- src/cpu/inorder/resources/cache_unit.cc | 170 ++++-------------------- src/cpu/inorder/resources/cache_unit.hh | 9 +- src/cpu/o3/cpu.hh | 6 +- src/cpu/o3/lsq.hh | 12 +- src/cpu/o3/lsq_unit.hh | 27 ++-- src/cpu/simple/atomic.cc | 86 +++++++----- src/cpu/simple/atomic.hh | 5 + src/cpu/simple/timing.cc | 59 +++++--- src/cpu/simple/timing.hh | 10 ++ 15 files changed, 253 insertions(+), 366 deletions(-) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index a9ba12958..3ecec0f0c 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -120,6 +120,8 @@ class BaseDynInst : public FastAlloc, public RefCounted template Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + /** * Does a write to a given address. * @param data The data to be written. @@ -131,6 +133,9 @@ class BaseDynInst : public FastAlloc, public RefCounted template Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + /** Splits a request in two if it crosses a dcache block. */ void splitRequest(RequestPtr req, RequestPtr &sreqLow, RequestPtr &sreqHigh); @@ -867,12 +872,12 @@ class BaseDynInst : public FastAlloc, public RefCounted }; template -template -inline Fault -BaseDynInst::read(Addr addr, T &data, unsigned flags) +Fault +BaseDynInst::readBytes(Addr addr, uint8_t *data, + unsigned size, unsigned flags) { reqMade = true; - Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, + Request *req = new Request(asid, addr, size, flags, this->PC, thread->contextId(), threadNumber); Request *sreqLow = NULL; @@ -889,11 +894,6 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) effAddrValid = true; fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); } else { - - // Return a fixed value to keep simulation deterministic even - // along misspeculated paths. - data = (T)-1; - // Commit will have to clean up whatever happened. Set this // instruction as executed. this->setExecuted(); @@ -901,7 +901,6 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) if (traceData) { traceData->setAddr(addr); - traceData->setData(data); } return fault; @@ -910,15 +909,35 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) template template inline Fault -BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +BaseDynInst::read(Addr addr, T &data, unsigned flags) { + Fault fault = readBytes(addr, (uint8_t *)&data, sizeof(T), flags); + + if (fault != NoFault) { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + } + data = TheISA::gtoh(data); + if (traceData) { - traceData->setAddr(addr); traceData->setData(data); } + return fault; +} + +template +Fault +BaseDynInst::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + } + reqMade = true; - Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, + Request *req = new Request(asid, addr, size, flags, this->PC, thread->contextId(), threadNumber); Request *sreqLow = NULL; @@ -939,6 +958,18 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) return fault; } +template +template +inline Fault +BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(data); + } + data = TheISA::htog(data); + return writeBytes((uint8_t *)&data, sizeof(T), addr, flags, res); +} + template inline void BaseDynInst::splitRequest(RequestPtr req, RequestPtr &sreqLow, diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index c930b5cce..22dffea1c 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -111,12 +111,17 @@ class ExecContext { template Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + /** Writes to an address, creating a memory request with the given * flags. Writes data to memory. For store conditionals, returns * the result of the store in res. */ template Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + /** Prefetches an address, creating a memory request with the * given flags. */ void prefetch(Addr addr, unsigned flags); diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 75873d97d..059996b07 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -1518,135 +1518,25 @@ InOrderCPU::getDTBPtr() return dtb_res->tlb(); } -template Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) +InOrderCPU::read(DynInstPtr inst, Addr addr, + uint8_t *data, unsigned size, unsigned flags) { //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case // you want to run w/out caches? CacheUnit *cache_res = dynamic_cast(resPool->getResource(dataPortIdx)); - return cache_res->read(inst, addr, data, flags); + return cache_res->read(inst, addr, data, size, flags); } -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags); - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags); - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags); - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags); - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags); - -template -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, double &data, unsigned flags) -{ - return read(inst, addr, *(uint64_t*)&data, flags); -} - -template<> -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, float &data, unsigned flags) -{ - return read(inst, addr, *(uint32_t*)&data, flags); -} - - -template<> -Fault -InOrderCPU::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags) -{ - return read(inst, addr, (uint32_t&)data, flags); -} - -template Fault -InOrderCPU::write(DynInstPtr inst, T data, Addr addr, unsigned flags, - uint64_t *write_res) +InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *write_res) { //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case // you want to run w/out caches? CacheUnit *cache_res = dynamic_cast(resPool->getResource(dataPortIdx)); - return cache_res->write(inst, data, addr, flags, write_res); -} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -InOrderCPU::write(DynInstPtr inst, Twin32_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -InOrderCPU::write(DynInstPtr inst, Twin64_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -InOrderCPU::write(DynInstPtr inst, uint64_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -InOrderCPU::write(DynInstPtr inst, uint32_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -InOrderCPU::write(DynInstPtr inst, uint16_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr, - unsigned flags, uint64_t *res); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, *(uint64_t*)&data, addr, flags, res); -} - -template<> -Fault -InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, *(uint32_t*)&data, addr, flags, res); -} - - -template<> -Fault -InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, (uint32_t)data, addr, flags, res); + return cache_res->write(inst, data, size, addr, flags, write_res); } diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 6676d78cf..450829e64 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -523,15 +523,14 @@ class InOrderCPU : public BaseCPU /** Forwards an instruction read to the appropriate data * resource (indexes into Resource Pool thru "dataPortIdx") */ - template - Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags); + Fault read(DynInstPtr inst, Addr addr, + uint8_t *data, unsigned size, unsigned flags); /** Forwards an instruction write. to the appropriate data * resource (indexes into Resource Pool thru "dataPortIdx") */ - template - Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags, - uint64_t *write_res = NULL); + Fault write(DynInstPtr inst, uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *write_res = NULL); /** Forwards an instruction prefetch to the appropriate data * resource (indexes into Resource Pool thru "dataPortIdx") diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 13ec7a3ff..5486dedee 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -610,6 +610,13 @@ InOrderDynInst::deallocateContext(int thread_num) this->cpu->deallocateContext(thread_num); } +Fault +InOrderDynInst::readBytes(Addr addr, uint8_t *data, + unsigned size, unsigned flags) +{ + return cpu->read(this, addr, data, size, flags); +} + template inline Fault InOrderDynInst::read(Addr addr, T &data, unsigned flags) @@ -618,8 +625,11 @@ InOrderDynInst::read(Addr addr, T &data, unsigned flags) traceData->setAddr(addr); traceData->setData(data); } - - return cpu->read(this, addr, data, flags); + Fault fault = readBytes(addr, (uint8_t *)&data, sizeof(T), flags); + data = TheISA::gtoh(data); + if (traceData) + traceData->setData(data); + return fault; } #ifndef DOXYGEN_SHOULD_SKIP_THIS @@ -663,20 +673,29 @@ InOrderDynInst::read(Addr addr, int32_t &data, unsigned flags) return read(addr, (uint32_t&)data, flags); } +Fault +InOrderDynInst::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) +{ + assert(sizeof(storeData) >= size); + memcpy(&storeData, data, size); + return cpu->write(this, (uint8_t *)&storeData, size, addr, flags, res); +} + template inline Fault InOrderDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) { - if (traceData) { - traceData->setAddr(addr); - traceData->setData(data); - } - storeData = data; DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] Setting store data to %#x.\n", threadNumber, seqNum, storeData); - return cpu->write(this, data, addr, flags, res); + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + storeData = TheISA::htog(data); + return writeBytes((uint8_t*)&data, sizeof(T), addr, flags, res); } #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index ffb795e1e..0d42f4696 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -334,7 +334,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted PacketDataPtr splitMemData; RequestPtr splitMemReq; - int splitTotalSize; + int totalSize; int split2ndSize; Addr split2ndAddr; bool split2ndAccess; @@ -637,6 +637,8 @@ class InOrderDynInst : public FastAlloc, public RefCounted template Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + /** * Does a write to a given address. * @param data The data to be written. @@ -649,6 +651,9 @@ class InOrderDynInst : public FastAlloc, public RefCounted Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + /** Initiates a memory access - Calculate Eff. Addr & Initiate Memory * Access Only valid for memory operations. */ diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 2ab9e889e..4d21f527e 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -443,9 +443,9 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, return cache_req->fault; } -template Fault -CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) +CacheUnit::read(DynInstPtr inst, Addr addr, + uint8_t *data, unsigned size, unsigned flags) { CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); assert(cache_req && "Can't Find Instruction for Read!"); @@ -454,14 +454,15 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) unsigned blockSize = this->cachePort->peerBlockSize(); //The size of the data we're trying to read. - int dataSize = sizeof(T); + int fullSize = size; + inst->totalSize = size; if (inst->traceData) { inst->traceData->setAddr(addr); } if (inst->split2ndAccess) { - dataSize = inst->split2ndSize; + size = inst->split2ndSize; cache_req->splitAccess = true; cache_req->split2ndAccess = true; @@ -473,7 +474,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); if (secondAddr > addr && !inst->split2ndAccess) { @@ -483,8 +484,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) // Save All "Total" Split Information // ============================== inst->splitInst = true; - inst->splitMemData = new uint8_t[dataSize]; - inst->splitTotalSize = dataSize; + inst->splitMemData = new uint8_t[size]; if (!inst->splitInstSked) { // Schedule Split Read/Complete for Instruction @@ -517,22 +517,22 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) // Split Information for First Access // ============================== - dataSize = secondAddr - addr; + size = secondAddr - addr; cache_req->splitAccess = true; // Split Information for Second Access // ============================== - inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndSize = addr + fullSize - secondAddr; inst->split2ndAddr = secondAddr; - inst->split2ndDataPtr = inst->splitMemData + dataSize; + inst->split2ndDataPtr = inst->splitMemData + size; inst->split2ndFlags = flags; } - doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read); + doTLBAccess(inst, cache_req, size, flags, TheISA::TLB::Read); if (cache_req->fault == NoFault) { if (!cache_req->splitAccess) { - cache_req->reqData = new uint8_t[dataSize]; + cache_req->reqData = new uint8_t[size]; doCacheAccess(inst, NULL); } else { if (!inst->split2ndAccess) { @@ -548,10 +548,9 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) return cache_req->fault; } -template Fault -CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, - uint64_t *write_res) +CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *write_res) { CacheReqPtr cache_req = dynamic_cast(findRequest(inst)); assert(cache_req && "Can't Find Instruction for Write!"); @@ -559,16 +558,16 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, // The block size of our peer unsigned blockSize = this->cachePort->peerBlockSize(); - //The size of the data we're trying to read. - int dataSize = sizeof(T); + //The size of the data we're trying to write. + int fullSize = size; + inst->totalSize = size; if (inst->traceData) { inst->traceData->setAddr(addr); - inst->traceData->setData(data); } if (inst->split2ndAccess) { - dataSize = inst->split2ndSize; + size = inst->split2ndSize; cache_req->splitAccess = true; cache_req->split2ndAccess = true; @@ -579,7 +578,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); if (secondAddr > addr && !inst->split2ndAccess) { @@ -589,7 +588,6 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, // Save All "Total" Split Information // ============================== inst->splitInst = true; - inst->splitTotalSize = dataSize; if (!inst->splitInstSked) { // Schedule Split Read/Complete for Instruction @@ -624,25 +622,25 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, // Split Information for First Access // ============================== - dataSize = secondAddr - addr; + size = secondAddr - addr; cache_req->splitAccess = true; // Split Information for Second Access // ============================== - inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndSize = addr + fullSize - secondAddr; inst->split2ndAddr = secondAddr; inst->split2ndStoreDataPtr = &cache_req->inst->storeData; - inst->split2ndStoreDataPtr += dataSize; + inst->split2ndStoreDataPtr += size; inst->split2ndFlags = flags; inst->splitInstSked = true; } - doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write); + doTLBAccess(inst, cache_req, size, flags, TheISA::TLB::Write); if (cache_req->fault == NoFault) { if (!cache_req->splitAccess) { // Remove this line since storeData is saved in INST? - cache_req->reqData = new uint8_t[dataSize]; + cache_req->reqData = new uint8_t[size]; doCacheAccess(inst, write_res); } else { doCacheAccess(inst, write_res, cache_req); @@ -729,8 +727,8 @@ CacheUnit::execute(int slot_num) cache_req->inst->split2ndAddr); inst->split2ndAccess = true; assert(inst->split2ndAddr != 0); - read(inst, inst->split2ndAddr, inst->split2ndData, - inst->split2ndFlags); + read(inst, inst->split2ndAddr, &inst->split2ndData, + inst->totalSize, inst->split2ndFlags); break; case InitSecondSplitWrite: @@ -741,8 +739,8 @@ CacheUnit::execute(int slot_num) inst->split2ndAccess = true; assert(inst->split2ndAddr != 0); - write(inst, inst->split2ndAddr, inst->split2ndData, - inst->split2ndFlags, NULL); + write(inst, &inst->split2ndData, inst->totalSize, + inst->split2ndAddr, inst->split2ndFlags, NULL); break; @@ -1075,7 +1073,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) if (inst->splitFinishCnt == 2) { cache_req->memReq->setVirt(0/*inst->tid*/, inst->getMemAddr(), - inst->splitTotalSize, + inst->totalSize, 0, 0); @@ -1301,113 +1299,3 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, freeSlot(slot_remove_list[i]); } -// Extra Template Definitions -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags); - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags); - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags); - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags); - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags); - -template -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, double &data, unsigned flags) -{ - return read(inst, addr, *(uint64_t*)&data, flags); -} - -template<> -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, float &data, unsigned flags) -{ - return read(inst, addr, *(uint32_t*)&data, flags); -} - - -template<> -Fault -CacheUnit::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags) -{ - return read(inst, addr, (uint32_t&)data, flags); -} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -CacheUnit::write(DynInstPtr inst, Twin32_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -CacheUnit::write(DynInstPtr inst, Twin64_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -CacheUnit::write(DynInstPtr inst, uint64_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -CacheUnit::write(DynInstPtr inst, uint32_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -CacheUnit::write(DynInstPtr inst, uint16_t data, Addr addr, - unsigned flags, uint64_t *res); - -template -Fault -CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr, - unsigned flags, uint64_t *res); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, *(uint64_t*)&data, addr, flags, res); -} - -template<> -Fault -CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, *(uint32_t*)&data, addr, flags, res); -} - - -template<> -Fault -CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, - uint64_t *res) -{ - return write(inst, (uint32_t)data, addr, flags, res); -} - diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index 177f81559..2f369db7c 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -161,12 +161,11 @@ class CacheUnit : public Resource /** Returns a specific port. */ Port *getPort(const std::string &if_name, int idx); - template - Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags); + Fault read(DynInstPtr inst, Addr addr, + uint8_t *data, unsigned size, unsigned flags); - template - Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags, - uint64_t *res); + Fault write(DynInstPtr inst, uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); Fault doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, int flags, TheISA::TLB::Mode tlb_mode); diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 82d4ca25b..a102a21f5 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -702,18 +702,16 @@ class FullO3CPU : public BaseO3CPU std::vector tids; /** CPU read function, forwards read to LSQ. */ - template Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh, - T &data, int load_idx) + uint8_t *data, int load_idx) { return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, data, load_idx); } /** CPU write function, forwards write to LSQ. */ - template Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh, - T &data, int store_idx) + uint8_t *data, int store_idx) { return this->iew.ldstQueue.write(req, sreqLow, sreqHigh, data, store_idx); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 7a7ea917f..0ad5d51c2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -273,16 +273,14 @@ class LSQ { /** Executes a read operation, using the load specified at the load * index. */ - template Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, - T &data, int load_idx); + uint8_t *data, int load_idx); /** Executes a store operation, using the store specified at the store * index. */ - template Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, - T &data, int store_idx); + uint8_t *data, int store_idx); /** The CPU pointer. */ O3CPU *cpu; @@ -371,10 +369,9 @@ class LSQ { }; template -template Fault LSQ::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, - T &data, int load_idx) + uint8_t *data, int load_idx) { ThreadID tid = req->threadId(); @@ -382,10 +379,9 @@ LSQ::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, } template -template Fault LSQ::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, - T &data, int store_idx) + uint8_t *data, int store_idx) { ThreadID tid = req->threadId(); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index c19a368d1..10b1ed11a 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -474,14 +474,12 @@ class LSQUnit { public: /** Executes the load at the given index. */ - template - Fault read(Request *req, Request *sreqLow, Request *sreqHigh, T &data, - int load_idx); + Fault read(Request *req, Request *sreqLow, Request *sreqHigh, + uint8_t *data, int load_idx); /** Executes the store at the given index. */ - template - Fault write(Request *req, Request *sreqLow, Request *sreqHigh, T &data, - int store_idx); + Fault write(Request *req, Request *sreqLow, Request *sreqHigh, + uint8_t *data, int store_idx); /** Returns the index of the head load instruction. */ int getLoadHead() { return loadHead; } @@ -514,10 +512,9 @@ class LSQUnit { }; template -template Fault LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, - T &data, int load_idx) + uint8_t *data, int load_idx) { DynInstPtr load_inst = loadQueue[load_idx]; @@ -605,7 +602,8 @@ LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); - memcpy(&data, storeQueue[store_idx].data + shift_amt, sizeof(T)); + memcpy(data, storeQueue[store_idx].data + shift_amt, + req->getSize()); assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; @@ -809,10 +807,9 @@ LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, } template -template Fault LSQUnit::write(Request *req, Request *sreqLow, Request *sreqHigh, - T &data, int store_idx) + uint8_t *data, int store_idx) { assert(storeQueue[store_idx].inst); @@ -824,17 +821,17 @@ LSQUnit::write(Request *req, Request *sreqLow, Request *sreqHigh, storeQueue[store_idx].req = req; storeQueue[store_idx].sreqLow = sreqLow; storeQueue[store_idx].sreqHigh = sreqHigh; - storeQueue[store_idx].size = sizeof(T); + unsigned size = req->getSize(); + storeQueue[store_idx].size = size; + assert(size <= sizeof(storeQueue[store_idx].data)); // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. if (TheISA::HasUnalignedMemAcc && sreqLow) { storeQueue[store_idx].isSplit = true; } - assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); - T gData = htog(data); - memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); + memcpy(storeQueue[store_idx].data, data, size); // This function only writes the data to the store queue, so no fault // can happen here. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index f8819c734..8ee91758f 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -294,9 +294,9 @@ AtomicSimpleCPU::suspendContext(int thread_num) } -template Fault -AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +AtomicSimpleCPU::readBytes(Addr addr, uint8_t * data, + unsigned size, unsigned flags) { // use the CPU's statically allocated read request and packet objects Request *req = &data_read_req; @@ -308,21 +308,19 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) //The block size of our peer. unsigned blockSize = dcachePort.peerBlockSize(); //The size of the data we're trying to read. - int dataSize = sizeof(T); - - uint8_t * dataPtr = (uint8_t *)&data; + int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); - if(secondAddr > addr) - dataSize = secondAddr - addr; + if (secondAddr > addr) + size = secondAddr - addr; dcache_latency = 0; - while(1) { - req->setVirt(0, addr, dataSize, flags, thread->readPC()); + while (1) { + req->setVirt(0, addr, size, flags, thread->readPC()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); @@ -332,7 +330,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) Packet pkt = Packet(req, req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, Packet::Broadcast); - pkt.dataStatic(dataPtr); + pkt.dataStatic(data); if (req->isMmapedIpr()) dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); @@ -363,10 +361,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) //If we don't need to access a second cache line, stop now. if (secondAddr <= addr) { - data = gtoh(data); - if (traceData) { - traceData->setData(data); - } if (req->isLocked() && fault == NoFault) { assert(!locked); locked = true; @@ -379,14 +373,30 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) */ //Move the pointer we're reading into to the correct location. - dataPtr += dataSize; + data += size; //Adjust the size to get the remaining bytes. - dataSize = addr + sizeof(T) - secondAddr; + size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } + +template +Fault +AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + uint8_t *dataPtr = (uint8_t *)&data; + memset(dataPtr, 0, sizeof(data)); + Fault fault = readBytes(addr, dataPtr, sizeof(data), flags); + if (fault == NoFault) { + data = gtoh(data); + if (traceData) + traceData->setData(data); + } + return fault; +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template @@ -438,38 +448,33 @@ AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) } -template Fault -AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +AtomicSimpleCPU::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) { // use the CPU's statically allocated write request and packet objects Request *req = &data_write_req; if (traceData) { traceData->setAddr(addr); - traceData->setData(data); } - data = htog(data); - //The block size of our peer. unsigned blockSize = dcachePort.peerBlockSize(); //The size of the data we're trying to read. - int dataSize = sizeof(T); - - uint8_t * dataPtr = (uint8_t *)&data; + int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); if(secondAddr > addr) - dataSize = secondAddr - addr; + size = secondAddr - addr; dcache_latency = 0; while(1) { - req->setVirt(0, addr, dataSize, flags, thread->readPC()); + req->setVirt(0, addr, size, flags, thread->readPC()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); @@ -492,7 +497,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { Packet pkt = Packet(req, cmd, Packet::Broadcast); - pkt.dataStatic(dataPtr); + pkt.dataStatic(data); if (req->isMmapedIpr()) { dcache_latency += @@ -508,7 +513,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (req->isSwap()) { assert(res); - *res = pkt.get(); + memcpy(res, pkt.getPtr(), fullSize); } } @@ -537,15 +542,32 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) */ //Move the pointer we're reading into to the correct location. - dataPtr += dataSize; + data += size; //Adjust the size to get the remaining bytes. - dataSize = addr + sizeof(T) - secondAddr; + size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } +template +Fault +AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + uint8_t *dataPtr = (uint8_t *)&data; + if (traceData) + traceData->setData(data); + data = htog(data); + + Fault fault = writeBytes(dataPtr, sizeof(data), addr, flags, res); + if (fault == NoFault && data_write_req.isSwap()) { + *res = gtoh((T)*res); + } + return fault; +} + + #ifndef DOXYGEN_SHOULD_SKIP_THIS template diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 2a66e9341..5ec1970e7 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -134,9 +134,14 @@ class AtomicSimpleCPU : public BaseSimpleCPU template Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + template Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + /** * Print state of address in memory system via PrintReq (for * debugging). diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index b8fc5ab84..1670cb066 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -414,26 +414,25 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2, pkt2->senderState = new SplitFragmentSenderState(pkt, 1); } -template Fault -TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +TimingSimpleCPU::readBytes(Addr addr, uint8_t *data, + unsigned size, unsigned flags) { Fault fault; const int asid = 0; const ThreadID tid = 0; const Addr pc = thread->readPC(); unsigned block_size = dcachePort.peerBlockSize(); - int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Read; if (traceData) { traceData->setAddr(addr); } - RequestPtr req = new Request(asid, addr, data_size, + RequestPtr req = new Request(asid, addr, size, flags, pc, _cpuId, tid); - Addr split_addr = roundDown(addr + data_size - 1, block_size); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); _status = DTBWaitResponse; @@ -443,7 +442,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) req->splitOnVaddr(split_addr, req1, req2); WholeTranslationState *state = - new WholeTranslationState(req, req1, req2, (uint8_t *)(new T), + new WholeTranslationState(req, req1, req2, new uint8_t[size], NULL, mode); DataTranslation *trans1 = new DataTranslation(this, state, 0); @@ -454,7 +453,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) thread->dtb->translateTiming(req2, tc, trans2, mode); } else { WholeTranslationState *state = - new WholeTranslationState(req, (uint8_t *)(new T), NULL, mode); + new WholeTranslationState(req, new uint8_t[size], NULL, mode); DataTranslation *translation = new DataTranslation(this, state); thread->dtb->translateTiming(req, tc, translation, mode); @@ -463,6 +462,13 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) return NoFault; } +template +Fault +TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + return readBytes(addr, (uint8_t *)&data, sizeof(T), flags); +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template @@ -532,30 +538,26 @@ TimingSimpleCPU::handleWritePacket() return dcache_pkt == NULL; } -template Fault -TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +TimingSimpleCPU::writeTheseBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) { const int asid = 0; const ThreadID tid = 0; const Addr pc = thread->readPC(); unsigned block_size = dcachePort.peerBlockSize(); - int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Write; if (traceData) { traceData->setAddr(addr); - traceData->setData(data); } - RequestPtr req = new Request(asid, addr, data_size, + RequestPtr req = new Request(asid, addr, size, flags, pc, _cpuId, tid); - Addr split_addr = roundDown(addr + data_size - 1, block_size); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - T *dataP = new T; - *dataP = TheISA::htog(data); _status = DTBWaitResponse; if (split_addr > addr) { RequestPtr req1, req2; @@ -563,8 +565,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) req->splitOnVaddr(split_addr, req1, req2); WholeTranslationState *state = - new WholeTranslationState(req, req1, req2, (uint8_t *)dataP, - res, mode); + new WholeTranslationState(req, req1, req2, data, res, mode); DataTranslation *trans1 = new DataTranslation(this, state, 0); DataTranslation *trans2 = @@ -574,7 +575,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) thread->dtb->translateTiming(req2, tc, trans2, mode); } else { WholeTranslationState *state = - new WholeTranslationState(req, (uint8_t *)dataP, res, mode); + new WholeTranslationState(req, data, res, mode); DataTranslation *translation = new DataTranslation(this, state); thread->dtb->translateTiming(req, tc, translation, mode); @@ -584,6 +585,28 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) return NoFault; } +Fault +TimingSimpleCPU::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) +{ + uint8_t *newData = new uint8_t[size]; + memcpy(newData, data, size); + return writeTheseBytes(newData, size, addr, flags, res); +} + +template +Fault +TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(data); + } + T *dataP = new T; + *dataP = TheISA::htog(data); + + return writeTheseBytes((uint8_t *)dataP, sizeof(T), addr, flags, res); +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 62c105418..65cbe3098 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -251,9 +251,14 @@ class TimingSimpleCPU : public BaseSimpleCPU template Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + template Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + void fetch(); void sendFetch(Fault fault, RequestPtr req, ThreadContext *tc); void completeIfetch(PacketPtr ); @@ -274,6 +279,11 @@ class TimingSimpleCPU : public BaseSimpleCPU private: + // The backend for writeBytes and write. It's the same as writeBytes, but + // doesn't make a copy of data. + Fault writeTheseBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + typedef EventWrapper FetchEvent; FetchEvent fetchEvent; -- 2.30.2