X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcpu%2Fbase_dyn_inst.hh;h=a6c08cc1f115e86a72c9a8677df872fa56a549d8;hb=392c1ced53827198652f5eda58e1874246b024f4;hp=a5357a7b05de0ae8dad2a1d06a1771590299acc2;hpb=7dde557fdc51140988092962137e1006d1609bea;p=gem5.git diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index a5357a7b0..a6c08cc1f 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,6 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011, 2013, 2016-2020 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -37,159 +38,66 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - * Timothy M. Jones */ #ifndef __CPU_BASE_DYN_INST_HH__ #define __CPU_BASE_DYN_INST_HH__ +#include #include +#include #include #include -#include "arch/faults.hh" +#include "arch/generic/tlb.hh" #include "arch/utility.hh" -#include "base/fast_alloc.hh" #include "base/trace.hh" -#include "config/full_system.hh" #include "config/the_isa.hh" -#include "cpu/o3/comm.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/exec_context.hh" #include "cpu/exetrace.hh" +#include "cpu/inst_res.hh" #include "cpu/inst_seq.hh" #include "cpu/op_class.hh" #include "cpu/static_inst.hh" #include "cpu/translation.hh" +#include "debug/HtmCpu.hh" #include "mem/packet.hh" +#include "mem/request.hh" #include "sim/byteswap.hh" #include "sim/system.hh" -#include "sim/tlb.hh" /** * @file * Defines a dynamic instruction context. */ -// Forward declaration. -class StaticInstPtr; - template -class BaseDynInst : public FastAlloc, public RefCounted +class BaseDynInst : public ExecContext, public RefCounted { public: // Typedef for the CPU. typedef typename Impl::CPUType ImplCPU; typedef typename ImplCPU::ImplState ImplState; + using VecRegContainer = TheISA::VecRegContainer; - // Logical register index type. - typedef TheISA::RegIndex RegIndex; - // Integer register type. - typedef TheISA::IntReg IntReg; - // Floating point register type. - typedef TheISA::FloatReg FloatReg; + using LSQRequestPtr = typename Impl::CPUPol::LSQ::LSQRequest*; + using LQIterator = typename Impl::CPUPol::LSQUnit::LQIterator; + using SQIterator = typename Impl::CPUPol::LSQUnit::SQIterator; // The DynInstPtr type. typedef typename Impl::DynInstPtr DynInstPtr; + typedef RefCountingPtr > BaseDynInstPtr; // The list of instructions iterator type. typedef typename std::list::iterator ListIt; enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, /// Max source regs - MaxInstDestRegs = TheISA::MaxInstDestRegs, /// Max dest regs + MaxInstDestRegs = TheISA::MaxInstDestRegs /// Max dest regs }; - /** The StaticInst used by this BaseDynInst. */ - StaticInstPtr staticInst; - - //////////////////////////////////////////// - // - // INSTRUCTION EXECUTION - // - //////////////////////////////////////////// - /** InstRecord that tracks this instructions. */ - Trace::InstRecord *traceData; - - void demapPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - void demapInstPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - void demapDataPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - - /** - * Does a read to a given address. - * @param addr The address to read. - * @param data The read's data is written into this parameter. - * @param flags The request's flags. - * @return Returns any fault due to the read. - */ - template - Fault read(Addr addr, T &data, unsigned flags); - - Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); - - /** - * Does a write to a given address. - * @param data The data to be written. - * @param addr The address to write to. - * @param flags The request's flags. - * @param res The result of the write (for load locked/store conditionals). - * @return Returns any fault due to the write. - */ - template - Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - - Fault writeBytes(uint8_t *data, unsigned size, - Addr addr, unsigned flags, uint64_t *res); - - /** Splits a request in two if it crosses a dcache block. */ - void splitRequest(RequestPtr req, RequestPtr &sreqLow, - RequestPtr &sreqHigh); - - /** Initiate a DTB address translation. */ - void initiateTranslation(RequestPtr req, RequestPtr sreqLow, - RequestPtr sreqHigh, uint64_t *res, - BaseTLB::Mode mode); - - /** Finish a DTB address translation. */ - void finishTranslation(WholeTranslationState *state); - - /** True if the DTB address translation has started. */ - bool translationStarted; - - /** True if the DTB address translation has completed. */ - bool translationCompleted; - - /** - * Returns true if the DTB address translation is being delayed due to a hw - * page table walk. - */ - bool isTranslationDelayed() const - { - return (translationStarted && !translationCompleted); - } - - /** - * Saved memory requests (needed when the DTB address translation is - * delayed due to a hw page table walk). - */ - RequestPtr savedReq; - RequestPtr savedSreqLow; - RequestPtr savedSreqHigh; - - /** @todo: Consider making this private. */ - public: - /** The sequence number of the instruction. */ - InstSeqNum seqNum; - + protected: enum Status { IqEntry, /// Instruction is in the IQ RobEntry, /// Instruction is in the ROB @@ -206,6 +114,9 @@ class BaseDynInst : public FastAlloc, public RefCounted SquashedInIQ, /// Instruction is squashed in the IQ SquashedInLSQ, /// Instruction is squashed in the LSQ SquashedInROB, /// Instruction is squashed in the ROB + PinnedRegsRenamed, /// Pinned registers are renamed + PinnedRegsWritten, /// Pinned registers are written back + PinnedRegsSquashDone, /// Regs pinning status updated after squash RecoverInst, /// Is a recover instruction BlockingInst, /// Is a blocking instruction ThreadsyncWait, /// Is a thread synchronization instruction @@ -216,149 +127,276 @@ class BaseDynInst : public FastAlloc, public RefCounted NumStatus }; - /** The status of this BaseDynInst. Several bits can be set. */ - std::bitset status; - - /** The thread this instruction is from. */ - ThreadID threadNumber; + enum Flags { + NotAnInst, + TranslationStarted, + TranslationCompleted, + PossibleLoadViolation, + HitExternalSnoop, + EffAddrValid, + RecordResult, + Predicate, + MemAccPredicate, + PredTaken, + IsStrictlyOrdered, + ReqMade, + MemOpDone, + HtmFromTransaction, + MaxFlags + }; - /** data address space ID, for loads & stores. */ - short asid; + public: + /** The sequence number of the instruction. */ + InstSeqNum seqNum; - /** How many source registers are ready. */ - unsigned readyRegs; + /** The StaticInst used by this BaseDynInst. */ + const StaticInstPtr staticInst; /** Pointer to the Impl's CPU object. */ ImplCPU *cpu; + BaseCPU *getCpuPtr() { return cpu; } + /** Pointer to the thread state. */ ImplState *thread; /** The kind of fault this instruction has generated. */ Fault fault; - /** Pointer to the data for the memory access. */ - uint8_t *memData; + /** InstRecord that tracks this instructions. */ + Trace::InstRecord *traceData; - /** The effective virtual address (lds & stores only). */ - Addr effAddr; + protected: + /** The result of the instruction; assumes an instruction can have many + * destination registers. + */ + std::queue instResult; - /** The size of the request */ - Addr effSize; + /** PC state for this instruction. */ + TheISA::PCState pc; - /** Is the effective virtual address valid. */ - bool effAddrValid; + private: + /* An amalgamation of a lot of boolean values into one */ + std::bitset instFlags; - /** The effective physical address. */ - Addr physEffAddr; + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset status; + + protected: + /** Whether or not the source register is ready. + * @todo: Not sure this should be here vs the derived class. + */ + std::bitset _readySrcRegIdx; + + public: + /** The thread this instruction is from. */ + ThreadID threadNumber; - /** Effective virtual address for a copy source. */ - Addr copySrcEffAddr; + /** Iterator pointing to this BaseDynInst in the list of all insts. */ + ListIt instListIt; - /** Effective physical address for a copy source. */ - Addr copySrcPhysEffAddr; + ////////////////////// Branch Data /////////////// + /** Predicted PC state after this instruction. */ + TheISA::PCState predPC; - /** The memory request flags (from translation). */ - unsigned memReqFlags; + /** The Macroop if one exists */ + const StaticInstPtr macroop; - union Result { - uint64_t integer; -// float fp; - double dbl; - }; + /** How many source registers are ready. */ + uint8_t readyRegs; - /** The result of the instruction; assumes for now that there's only one - * destination register. - */ - Result instResult; + public: + /////////////////////// Load Store Data ////////////////////// + /** The effective virtual address (lds & stores only). */ + Addr effAddr; - /** Records changes to result? */ - bool recordResult; + /** The effective physical address. */ + Addr physEffAddr; - /** Did this instruction execute, or is it predicated false */ - bool predicate; + /** The memory request flags (from translation). */ + unsigned memReqFlags; - protected: - /** PC state for this instruction. */ - TheISA::PCState pc; + /** The size of the request */ + unsigned effSize; - /** Predicted PC state after this instruction. */ - TheISA::PCState predPC; + /** Pointer to the data for the memory access. */ + uint8_t *memData; - /** If this is a branch that was predicted taken */ - bool predTaken; + /** Load queue index. */ + int16_t lqIdx; + LQIterator lqIt; - public: + /** Store queue index. */ + int16_t sqIdx; + SQIterator sqIt; -#ifdef DEBUG - void dumpSNList(); -#endif - /** Whether or not the source register is ready. - * @todo: Not sure this should be here vs the derived class. + /////////////////////// TLB Miss ////////////////////// + /** + * Saved memory request (needed when the DTB address translation is + * delayed due to a hw page table walk). */ - bool _readySrcRegIdx[MaxInstSrcRegs]; + LSQRequestPtr savedReq; + + /////////////////////// Checker ////////////////////// + // Need a copy of main request pointer to verify on writes. + RequestPtr reqToVerify; + + private: + // hardware transactional memory + uint64_t htmUid; + uint64_t htmDepth; protected: /** Flattened register index of the destination registers of this * instruction. */ - TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs]; - - /** Flattened register index of the source registers of this - * instruction. - */ - TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs]; + std::array _flatDestRegIdx; /** Physical register index of the destination registers of this * instruction. */ - PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs]; + std::array _destRegIdx; /** Physical register index of the source registers of this * instruction. */ - PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs]; + std::array _srcRegIdx; /** Physical register index of the previous producers of the * architected destinations. */ - PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs]; + std::array _prevDestRegIdx; + public: + /** Records changes to result? */ + void recordResult(bool f) { instFlags[RecordResult] = f; } + + /** Is the effective virtual address valid. */ + bool effAddrValid() const { return instFlags[EffAddrValid]; } + void effAddrValid(bool b) { instFlags[EffAddrValid] = b; } + + /** Whether or not the memory operation is done. */ + bool memOpDone() const { return instFlags[MemOpDone]; } + void memOpDone(bool f) { instFlags[MemOpDone] = f; } + + bool notAnInst() const { return instFlags[NotAnInst]; } + void setNotAnInst() { instFlags[NotAnInst] = true; } + + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + + void + demapPage(Addr vaddr, uint64_t asn) override + { + cpu->demapPage(vaddr, asn); + } + void + demapInstPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + void + demapDataPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + + Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags, + const std::vector &byte_enable=std::vector()) override; + + Fault initiateHtmCmd(Request::Flags flags) override; + + Fault writeMem(uint8_t *data, unsigned size, Addr addr, + Request::Flags flags, uint64_t *res, + const std::vector &byte_enable=std::vector()) + override; + + Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags, + AtomicOpFunctorPtr amo_op) override; + + /** True if the DTB address translation has started. */ + bool translationStarted() const { return instFlags[TranslationStarted]; } + void translationStarted(bool f) { instFlags[TranslationStarted] = f; } + + /** True if the DTB address translation has completed. */ + bool translationCompleted() const { return instFlags[TranslationCompleted]; } + void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; } + + /** True if this address was found to match a previous load and they issued + * out of order. If that happend, then it's only a problem if an incoming + * snoop invalidate modifies the line, in which case we need to squash. + * If nothing modified the line the order doesn't matter. + */ + bool + possibleLoadViolation() const + { + return instFlags[PossibleLoadViolation]; + } + void + possibleLoadViolation(bool f) + { + instFlags[PossibleLoadViolation] = f; + } + + /** True if the address hit a external snoop while sitting in the LSQ. + * If this is true and a older instruction sees it, this instruction must + * reexecute + */ + bool hitExternalSnoop() const { return instFlags[HitExternalSnoop]; } + void hitExternalSnoop(bool f) { instFlags[HitExternalSnoop] = f; } + + /** + * Returns true if the DTB address translation is being delayed due to a hw + * page table walk. + */ + bool + isTranslationDelayed() const + { + return (translationStarted() && !translationCompleted()); + } + + public: +#ifdef DEBUG + void dumpSNList(); +#endif /** Returns the physical register index of the i'th destination * register. */ - PhysRegIndex renamedDestRegIdx(int idx) const + PhysRegIdPtr + renamedDestRegIdx(int idx) const { return _destRegIdx[idx]; } /** Returns the physical register index of the i'th source register. */ - PhysRegIndex renamedSrcRegIdx(int idx) const + PhysRegIdPtr + renamedSrcRegIdx(int idx) const { + assert(TheISA::MaxInstSrcRegs > idx); return _srcRegIdx[idx]; } /** Returns the flattened register index of the i'th destination * register. */ - TheISA::RegIndex flattenedDestRegIdx(int idx) const + const RegId & + flattenedDestRegIdx(int idx) const { return _flatDestRegIdx[idx]; } - /** Returns the flattened register index of the i'th source register */ - TheISA::RegIndex flattenedSrcRegIdx(int idx) const - { - return _flatSrcRegIdx[idx]; - } - /** Returns the physical register index of the previous physical register * that remapped to the same logical register index. */ - PhysRegIndex prevDestRegIdx(int idx) const + PhysRegIdPtr + prevDestRegIdx(int idx) const { return _prevDestRegIdx[idx]; } @@ -366,34 +404,31 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Renames a destination register to a physical register. Also records * the previous physical register that the logical register mapped to. */ - void renameDestReg(int idx, - PhysRegIndex renamed_dest, - PhysRegIndex previous_rename) + void + renameDestReg(int idx, PhysRegIdPtr renamed_dest, + PhysRegIdPtr previous_rename) { _destRegIdx[idx] = renamed_dest; _prevDestRegIdx[idx] = previous_rename; + if (renamed_dest->isPinned()) + setPinnedRegsRenamed(); } /** Renames a source logical register to the physical register which * has/will produce that logical register's result. * @todo: add in whether or not the source register is ready. */ - void renameSrcReg(int idx, PhysRegIndex renamed_src) + void + renameSrcReg(int idx, PhysRegIdPtr renamed_src) { _srcRegIdx[idx] = renamed_src; } - /** Flattens a source architectural register index into a logical index. - */ - void flattenSrcReg(int idx, TheISA::RegIndex flattened_src) - { - _flatSrcRegIdx[idx] = flattened_src; - } - /** Flattens a destination architectural register index into a logical * index. */ - void flattenDestReg(int idx, TheISA::RegIndex flattened_dest) + void + flattenDestReg(int idx, const RegId &flattened_dest) { _flatDestRegIdx[idx] = flattened_dest; } @@ -404,23 +439,14 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param seq_num The sequence number of the instruction. * @param cpu Pointer to the instruction's CPU. */ - BaseDynInst(StaticInstPtr staticInst, TheISA::PCState pc, - TheISA::PCState predPC, InstSeqNum seq_num, ImplCPU *cpu); - - /** BaseDynInst constructor given a binary instruction. - * @param inst The binary instruction. - * @param _pc The PC state for the instruction. - * @param _predPC The predicted next PC state for the instruction. - * @param seq_num The sequence number of the instruction. - * @param cpu Pointer to the instruction's CPU. - */ - BaseDynInst(TheISA::ExtMachInst inst, TheISA::PCState pc, - TheISA::PCState predPC, InstSeqNum seq_num, ImplCPU *cpu); + BaseDynInst(const StaticInstPtr &staticInst, const StaticInstPtr ¯oop, + TheISA::PCState pc, TheISA::PCState predPC, + InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. * @param _staticInst The StaticInst for this BaseDynInst. */ - BaseDynInst(StaticInstPtr &_staticInst); + BaseDynInst(const StaticInstPtr &staticInst, const StaticInstPtr ¯oop); /** BaseDynInst destructor. */ ~BaseDynInst(); @@ -437,13 +463,22 @@ class BaseDynInst : public FastAlloc, public RefCounted void dump(std::string &outstring); /** Read this CPU's ID. */ - int cpuId() { return cpu->cpuId(); } + int cpuId() const { return cpu->cpuId(); } + + /** Read this CPU's Socket ID. */ + uint32_t socketId() const { return cpu->socketId(); } + + /** Read this CPU's data requestor ID */ + RequestorID requestorId() const { return cpu->dataRequestorId(); } /** Read this context's system-wide ID **/ - int contextId() { return thread->contextId(); } + ContextID contextId() const { return thread->contextId(); } /** Returns the fault type. */ - Fault getFault() { return fault; } + Fault getFault() const { return fault; } + /** TODO: This I added for the LSQRequest side to be able to modify the + * fault. There should be a better mechanism in place. */ + Fault& getFault() { return fault; } /** Checks whether or not this instruction has had its branch target * calculated yet. For now it is not utilized and is hacked to be @@ -453,10 +488,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool doneTargCalc() { return false; } /** Set the predicted target of this current instruction. */ - void setPredTarg(const TheISA::PCState &_predPC) - { - predPC = _predPC; - } + void setPredTarg(const TheISA::PCState &_predPC) { predPC = _predPC; } const TheISA::PCState &readPredTarg() { return predPC; } @@ -470,18 +502,17 @@ class BaseDynInst : public FastAlloc, public RefCounted Addr predMicroPC() { return predPC.microPC(); } /** Returns whether the instruction was predicted taken or not. */ - bool readPredTaken() - { - return predTaken; - } + bool readPredTaken() { return instFlags[PredTaken]; } - void setPredTaken(bool predicted_taken) + void + setPredTaken(bool predicted_taken) { - predTaken = predicted_taken; + instFlags[PredTaken] = predicted_taken; } /** Returns whether the instruction mispredicted. */ - bool mispredicted() + bool + mispredicted() { TheISA::PCState tempPC = pc; TheISA::advancePC(tempPC, staticInst); @@ -495,13 +526,14 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isMemRef() const { return staticInst->isMemRef(); } bool isLoad() const { return staticInst->isLoad(); } bool isStore() const { return staticInst->isStore(); } + bool isAtomic() const { return staticInst->isAtomic(); } bool isStoreConditional() const { return staticInst->isStoreConditional(); } bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } - bool isCopy() const { return staticInst->isCopy(); } bool isInteger() const { return staticInst->isInteger(); } bool isFloating() const { return staticInst->isFloating(); } + bool isVector() const { return staticInst->isVector(); } bool isControl() const { return staticInst->isControl(); } bool isCall() const { return staticInst->isCall(); } bool isReturn() const { return staticInst->isReturn(); } @@ -512,10 +544,16 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } - bool isSerializeBefore() const - { return staticInst->isSerializeBefore() || status[SerializeBefore]; } - bool isSerializeAfter() const - { return staticInst->isSerializeAfter() || status[SerializeAfter]; } + bool + isSerializeBefore() const + { + return staticInst->isSerializeBefore() || status[SerializeBefore]; + } + bool + isSerializeAfter() const + { + return staticInst->isSerializeAfter() || status[SerializeAfter]; + } bool isSquashAfter() const { return staticInst->isSquashAfter(); } bool isMemBarrier() const { return staticInst->isMemBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } @@ -530,6 +568,62 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isLastMicroop() const { return staticInst->isLastMicroop(); } bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } + // hardware transactional memory + bool isHtmStart() const { return staticInst->isHtmStart(); } + bool isHtmStop() const { return staticInst->isHtmStop(); } + bool isHtmCancel() const { return staticInst->isHtmCancel(); } + bool isHtmCmd() const { return staticInst->isHtmCmd(); } + + uint64_t + getHtmTransactionUid() const override + { + assert(instFlags[HtmFromTransaction]); + return this->htmUid; + } + + uint64_t + newHtmTransactionUid() const override + { + panic("Not yet implemented\n"); + return 0; + } + + bool + inHtmTransactionalState() const override + { + return instFlags[HtmFromTransaction]; + } + + uint64_t + getHtmTransactionalDepth() const override + { + if (inHtmTransactionalState()) + return this->htmDepth; + else + return 0; + } + + void + setHtmTransactionalState(uint64_t htm_uid, uint64_t htm_depth) + { + instFlags.set(HtmFromTransaction); + htmUid = htm_uid; + htmDepth = htm_depth; + } + + void + clearHtmTransactionalState() + { + if (inHtmTransactionalState()) { + DPRINTF(HtmCpu, + "clearing instuction's transactional state htmUid=%u\n", + getHtmTransactionUid()); + + instFlags.reset(HtmFromTransaction); + htmUid = -1; + htmDepth = 0; + } + } /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -576,63 +670,132 @@ class BaseDynInst : public FastAlloc, public RefCounted // for machines with separate int & FP reg files int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } + int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); } + int8_t + numVecElemDestRegs() const + { + return staticInst->numVecElemDestRegs(); + } + int8_t + numVecPredDestRegs() const + { + return staticInst->numVecPredDestRegs(); + } /** Returns the logical register index of the i'th destination register. */ - RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } + const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); } /** Returns the logical register index of the i'th source register. */ - RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); } + const RegId& srcRegIdx(int i) const { return staticInst->srcRegIdx(i); } - /** Returns the result of an integer instruction. */ - uint64_t readIntResult() { return instResult.integer; } + /** Return the size of the instResult queue. */ + uint8_t resultSize() { return instResult.size(); } - /** Returns the result of a floating point instruction. */ - float readFloatResult() { return (float)instResult.dbl; } + /** Pops a result off the instResult queue. + * If the result stack is empty, return the default value. + * */ + InstResult + popResult(InstResult dflt=InstResult()) + { + if (!instResult.empty()) { + InstResult t = instResult.front(); + instResult.pop(); + return t; + } + return dflt; + } - /** Returns the result of a floating point (double) instruction. */ - double readDoubleResult() { return instResult.dbl; } + /** Pushes a result onto the instResult queue. */ + /** @{ */ + /** Scalar result. */ + template + void + setScalarResult(T &&t) + { + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward(t), + InstResult::ResultType::Scalar)); + } + } - /** Records an integer register being set to a value. */ - void setIntRegOperand(const StaticInst *si, int idx, uint64_t val) + /** Full vector result. */ + template + void + setVecResult(T &&t) { - if (recordResult) - instResult.integer = val; + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward(t), + InstResult::ResultType::VecReg)); + } } - /** Records an fp register being set to a value. */ - void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val, - int width) + /** Vector element result. */ + template + void + setVecElemResult(T &&t) { - if (recordResult) { - if (width == 32) - instResult.dbl = (double)val; - else if (width == 64) - instResult.dbl = val; - else - panic("Unsupported width!"); + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward(t), + InstResult::ResultType::VecElem)); } } - /** Records an fp register being set to a value. */ - void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val) + /** Predicate result. */ + template + void + setVecPredResult(T &&t) { - if (recordResult) - instResult.dbl = (double)val; + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward(t), + InstResult::ResultType::VecPredReg)); + } } + /** @} */ - /** Records an fp register being set to an integer value. */ - void setFloatRegOperandBits(const StaticInst *si, int idx, uint64_t val, - int width) + /** Records an integer register being set to a value. */ + void + setIntRegOperand(const StaticInst *si, int idx, RegVal val) override { - if (recordResult) - instResult.integer = val; + setScalarResult(val); + } + + /** Records a CC register being set to a value. */ + void + setCCRegOperand(const StaticInst *si, int idx, RegVal val) override + { + setScalarResult(val); + } + + /** Record a vector register being set to a value */ + void + setVecRegOperand(const StaticInst *si, int idx, + const VecRegContainer &val) override + { + setVecResult(val); } /** Records an fp register being set to an integer value. */ - void setFloatRegOperandBits(const StaticInst *si, int idx, uint64_t val) + void + setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val) override + { + setScalarResult(val); + } + + /** Record a vector register being set to a value */ + void + setVecElemOperand(const StaticInst *si, int idx, + const VecElem val) override + { + setVecElemResult(val); + } + + /** Record a vector register being set to a value */ + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer &val) override { - if (recordResult) - instResult.integer = val; + setVecPredResult(val); } /** Records that one of the source registers is ready. */ @@ -642,7 +805,8 @@ class BaseDynInst : public FastAlloc, public RefCounted void markSrcRegReady(RegIndex src_idx); /** Returns if a source register is ready. */ - bool isReadySrcRegIdx(int idx) const + bool + isReadySrcRegIdx(int idx) const { return this->_readySrcRegIdx[idx]; } @@ -703,7 +867,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { status.set(Squashed); } + void setSquashed(); /** Returns whether or not this instruction is squashed. */ bool isSquashed() const { return status[Squashed]; } @@ -738,7 +902,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isInLSQ() const { return status[LsqEntry]; } /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { status.set(SquashedInLSQ);} + void setSquashedInLSQ() { status.set(SquashedInLSQ); status.set(Squashed);} /** Returns whether or not this instruction is squashed in the LSQ. */ bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } @@ -761,37 +925,79 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether or not this instruction is squashed in the ROB. */ bool isSquashedInROB() const { return status[SquashedInROB]; } + /** Returns whether pinned registers are renamed */ + bool isPinnedRegsRenamed() const { return status[PinnedRegsRenamed]; } + + /** Sets the destination registers as renamed */ + void + setPinnedRegsRenamed() + { + assert(!status[PinnedRegsSquashDone]); + assert(!status[PinnedRegsWritten]); + status.set(PinnedRegsRenamed); + } + + /** Returns whether destination registers are written */ + bool isPinnedRegsWritten() const { return status[PinnedRegsWritten]; } + + /** Sets destination registers as written */ + void + setPinnedRegsWritten() + { + assert(!status[PinnedRegsSquashDone]); + assert(status[PinnedRegsRenamed]); + status.set(PinnedRegsWritten); + } + + /** Return whether dest registers' pinning status updated after squash */ + bool + isPinnedRegsSquashDone() const { return status[PinnedRegsSquashDone]; } + + /** Sets dest registers' status updated after squash */ + void + setPinnedRegsSquashDone() { + assert(!status[PinnedRegsSquashDone]); + status.set(PinnedRegsSquashDone); + } + /** Read the PC state of this instruction. */ - const TheISA::PCState pcState() const { return pc; } + TheISA::PCState pcState() const override { return pc; } /** Set the PC state of this instruction. */ - const void pcState(const TheISA::PCState &val) { pc = val; } + void pcState(const TheISA::PCState &val) override { pc = val; } /** Read the PC of this instruction. */ - const Addr instAddr() const { return pc.instAddr(); } + Addr instAddr() const { return pc.instAddr(); } /** Read the PC of the next instruction. */ - const Addr nextInstAddr() const { return pc.nextInstAddr(); } + Addr nextInstAddr() const { return pc.nextInstAddr(); } /**Read the micro PC of this instruction. */ - const Addr microPC() const { return pc.microPC(); } + Addr microPC() const { return pc.microPC(); } - bool readPredicate() - { - return predicate; - } + bool readPredicate() const override { return instFlags[Predicate]; } - void setPredicate(bool val) + void + setPredicate(bool val) override { - predicate = val; + instFlags[Predicate] = val; if (traceData) { traceData->setPredicate(val); } } - /** Sets the ASID. */ - void setASID(short addr_space_id) { asid = addr_space_id; } + bool + readMemAccPredicate() const override + { + return instFlags[MemAccPredicate]; + } + + void + setMemAccPredicate(bool val) override + { + instFlags[MemAccPredicate] = val; + } /** Sets the thread id. */ void setTid(ThreadID tid) { threadNumber = tid; } @@ -800,56 +1006,20 @@ class BaseDynInst : public FastAlloc, public RefCounted void setThreadState(ImplState *state) { thread = state; } /** Returns the thread context. */ - ThreadContext *tcBase() { return thread->getTC(); } - - private: - /** Instruction effective address. - * @todo: Consider if this is necessary or not. - */ - Addr instEffAddr; - - /** Whether or not the effective address calculation is completed. - * @todo: Consider if this is necessary or not. - */ - bool eaCalcDone; - - /** Is this instruction's memory access uncacheable. */ - bool isUncacheable; - - /** Has this instruction generated a memory request. */ - bool reqMade; + ThreadContext *tcBase() const override { return thread->getTC(); } public: - /** Sets the effective address. */ - void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } - - /** Returns the effective address. */ - const Addr &getEA() const { return instEffAddr; } - - /** Returns whether or not the eff. addr. calculation has been completed. */ - bool doneEACalc() { return eaCalcDone; } - /** Returns whether or not the eff. addr. source registers are ready. */ - bool eaSrcsReady(); + bool eaSrcsReady() const; - /** Whether or not the memory operation is done. */ - bool memOpDone; - - /** Is this instruction's memory access uncacheable. */ - bool uncacheable() { return isUncacheable; } + /** Is this instruction's memory access strictly ordered? */ + bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; } + void strictlyOrdered(bool so) { instFlags[IsStrictlyOrdered] = so; } /** Has this instruction generated a memory request. */ - bool hasRequest() { return reqMade; } - - public: - /** Load queue index. */ - int16_t lqIdx; - - /** Store queue index. */ - int16_t sqIdx; - - /** Iterator pointing to this BaseDynInst in the list of all insts. */ - ListIt instListIt; + bool hasRequest() const { return instFlags[ReqMade]; } + /** Assert this instruction has generated a memory request. */ + void setRequest() { instFlags[ReqMade] = true; } /** Returns iterator to this instruction in the list of all insts. */ ListIt &getInstListIt() { return instListIt; } @@ -859,217 +1029,93 @@ class BaseDynInst : public FastAlloc, public RefCounted public: /** Returns the number of consecutive store conditional failures. */ - unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } -}; - -template -Fault -BaseDynInst::readBytes(Addr addr, uint8_t *data, - unsigned size, unsigned flags) -{ - reqMade = true; - Request *req = NULL; - Request *sreqLow = NULL; - Request *sreqHigh = NULL; - - if (reqMade && translationStarted) { - req = savedReq; - sreqLow = savedSreqLow; - sreqHigh = savedSreqHigh; - } else { - req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); - } - - if (translationCompleted) { - if (fault == NoFault) { - effAddr = req->getVaddr(); - effSize = size; - effAddrValid = true; - fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); - } else { - // Commit will have to clean up whatever happened. Set this - // instruction as executed. - this->setExecuted(); - } - - if (fault != NoFault) { - // Return a fixed value to keep simulation deterministic even - // along misspeculated paths. - if (data) - bzero(data, size); - } + unsigned int + readStCondFailures() const override + { + return thread->storeCondFailures; } - if (traceData) { - traceData->setAddr(addr); + /** Sets the number of consecutive store conditional failures. */ + void + setStCondFailures(unsigned int sc_failures) override + { + thread->storeCondFailures = sc_failures; } - return fault; -} - -template -template -inline Fault -BaseDynInst::read(Addr addr, T &data, unsigned flags) -{ - Fault fault = readBytes(addr, (uint8_t *)&data, sizeof(T), flags); - - data = TheISA::gtoh(data); - - if (traceData) { - traceData->setData(data); + public: + // monitor/mwait funtions + void + armMonitor(Addr address) override + { + cpu->armMonitor(threadNumber, address); } - - return fault; -} - -template -Fault -BaseDynInst::writeBytes(uint8_t *data, unsigned size, - Addr addr, unsigned flags, uint64_t *res) -{ - if (traceData) { - traceData->setAddr(addr); + bool + mwait(PacketPtr pkt) override + { + return cpu->mwait(threadNumber, pkt); } - - reqMade = true; - Request *req = NULL; - Request *sreqLow = NULL; - Request *sreqHigh = NULL; - - if (reqMade && translationStarted) { - req = savedReq; - sreqLow = savedSreqLow; - sreqHigh = savedSreqHigh; - } else { - req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); + void + mwaitAtomic(ThreadContext *tc) override + { + return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); } - - if (fault == NoFault && translationCompleted) { - effAddr = req->getVaddr(); - effSize = size; - effAddrValid = true; - fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx); + AddressMonitor * + getAddrMonitor() override + { + return cpu->getCpuAddrMonitor(threadNumber); } - - return fault; -} +}; template -template -inline Fault -BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +Fault +BaseDynInst::initiateMemRead(Addr addr, unsigned size, + Request::Flags flags, + const std::vector &byte_enable) { - if (traceData) { - traceData->setData(data); - } - data = TheISA::htog(data); - return writeBytes((uint8_t *)&data, sizeof(T), addr, flags, res); + assert(byte_enable.empty() || byte_enable.size() == size); + return cpu->pushRequest( + dynamic_cast(this), + /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr, + byte_enable); } template -inline void -BaseDynInst::splitRequest(RequestPtr req, RequestPtr &sreqLow, - RequestPtr &sreqHigh) +Fault +BaseDynInst::initiateHtmCmd(Request::Flags flags) { - // Check to see if the request crosses the next level block boundary. - unsigned block_size = cpu->getDcachePort()->peerBlockSize(); - Addr addr = req->getVaddr(); - Addr split_addr = roundDown(addr + req->getSize() - 1, block_size); - assert(split_addr <= addr || split_addr - addr < block_size); - - // Spans two blocks. - if (split_addr > addr) { - req->splitOnVaddr(split_addr, sreqLow, sreqHigh); - } + return cpu->pushRequest( + dynamic_cast(this), + /* ld */ true, nullptr, 8, 0x0ul, flags, nullptr, nullptr); } template -inline void -BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, - RequestPtr sreqHigh, uint64_t *res, - BaseTLB::Mode mode) +Fault +BaseDynInst::writeMem(uint8_t *data, unsigned size, Addr addr, + Request::Flags flags, uint64_t *res, + const std::vector &byte_enable) { - translationStarted = true; - - if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { - WholeTranslationState *state = - new WholeTranslationState(req, NULL, res, mode); - - // One translation if the request isn't split. - DataTranslation > *trans = - new DataTranslation >(this, state); - cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); - if (!translationCompleted) { - // Save memory requests. - savedReq = state->mainReq; - savedSreqLow = state->sreqLow; - savedSreqHigh = state->sreqHigh; - } - } else { - WholeTranslationState *state = - new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode); - - // Two translations when the request is split. - DataTranslation > *stransLow = - new DataTranslation >(this, state, 0); - DataTranslation > *stransHigh = - new DataTranslation >(this, state, 1); - - cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); - cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); - if (!translationCompleted) { - // Save memory requests. - savedReq = state->mainReq; - savedSreqLow = state->sreqLow; - savedSreqHigh = state->sreqHigh; - } - } + assert(byte_enable.empty() || byte_enable.size() == size); + return cpu->pushRequest( + dynamic_cast(this), + /* st */ false, data, size, addr, flags, res, nullptr, + byte_enable); } template -inline void -BaseDynInst::finishTranslation(WholeTranslationState *state) +Fault +BaseDynInst::initiateMemAMO(Addr addr, unsigned size, + Request::Flags flags, + AtomicOpFunctorPtr amo_op) { - fault = state->getFault(); - - if (state->isUncacheable()) - isUncacheable = true; - - if (fault == NoFault) { - physEffAddr = state->getPaddr(); - memReqFlags = state->getFlags(); - - if (state->mainReq->isCondSwap()) { - assert(state->res); - state->mainReq->setExtraData(*state->res); - } - - } else { - state->deleteReqs(); - } - delete state; - - translationCompleted = true; + // atomic memory instructions do not have data to be written to memory yet + // since the atomic operations will be executed directly in cache/memory. + // Therefore, its `data` field is nullptr. + // Atomic memory requests need to carry their `amo_op` fields to cache/ + // memory + return cpu->pushRequest( + dynamic_cast(this), + /* atomic */ false, nullptr, size, addr, flags, nullptr, + std::move(amo_op)); } #endif // __CPU_BASE_DYN_INST_HH__