From 6df196b71e058b2c827e1027416155ac8ec8cf9f Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 5 Jun 2012 01:23:09 -0400 Subject: [PATCH] O3: Clean up the O3 structures and try to pack them a bit better. DynInst is extremely large the hope is that this re-organization will put the most used members close to each other. --- src/arch/alpha/registers.hh | 3 + src/arch/arm/registers.hh | 1 + src/arch/isa_parser.py | 8 + src/arch/mips/registers.hh | 1 + src/arch/power/registers.hh | 1 + src/arch/sparc/registers.hh | 1 + src/arch/x86/registers.hh | 1 + src/cpu/base_dyn_inst.hh | 409 +++++++++++++++++----------------- src/cpu/base_dyn_inst_impl.hh | 22 +- src/cpu/o3/bpred_unit.hh | 17 +- src/cpu/o3/comm.hh | 23 +- src/cpu/o3/commit_impl.hh | 10 +- src/cpu/o3/decode_impl.hh | 2 +- src/cpu/o3/dyn_inst.hh | 23 +- src/cpu/o3/dyn_inst_impl.hh | 3 +- src/cpu/o3/iew_impl.hh | 4 +- src/cpu/o3/inst_queue_impl.hh | 20 +- src/cpu/o3/lsq_unit.hh | 35 ++- src/cpu/o3/lsq_unit_impl.hh | 18 +- src/cpu/o3/rename_impl.hh | 4 +- 20 files changed, 304 insertions(+), 302 deletions(-) diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh index 2eefead16..e1f50ddc2 100644 --- a/src/arch/alpha/registers.hh +++ b/src/arch/alpha/registers.hh @@ -40,6 +40,9 @@ namespace AlphaISA { using AlphaISAInst::MaxInstSrcRegs; using AlphaISAInst::MaxInstDestRegs; +// Locked read/write flags are can't be detected by the ISA parser +const int MaxMiscDestRegs = AlphaISAInst::MaxMiscDestRegs + 1; + typedef uint8_t RegIndex; typedef uint64_t IntReg; diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 1a688af56..cd2f1f9b8 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -55,6 +55,7 @@ namespace ArmISA { const int MaxInstSrcRegs = ArmISAInst::MaxInstDestRegs + ArmISAInst::MaxInstSrcRegs; using ArmISAInst::MaxInstDestRegs; +using ArmISAInst::MaxMiscDestRegs; typedef uint16_t RegIndex; diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 1b0d46410..60e7e226b 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -726,6 +726,7 @@ class OperandList(object): self.numDestRegs = 0 self.numFPDestRegs = 0 self.numIntDestRegs = 0 + self.numMiscDestRegs = 0 self.memOperand = None for op_desc in self.items: if op_desc.isReg(): @@ -739,6 +740,8 @@ class OperandList(object): self.numFPDestRegs += 1 elif op_desc.isIntReg(): self.numIntDestRegs += 1 + elif op_desc.isControlReg(): + self.numMiscDestRegs += 1 elif op_desc.isMem(): if self.memOperand: error("Code block has more than one memory operand.") @@ -747,6 +750,8 @@ class OperandList(object): parser.maxInstSrcRegs = self.numSrcRegs if parser.maxInstDestRegs < self.numDestRegs: parser.maxInstDestRegs = self.numDestRegs + if parser.maxMiscDestRegs < self.numMiscDestRegs: + parser.maxMiscDestRegs = self.numMiscDestRegs # now make a final pass to finalize op_desc fields that may depend # on the register enumeration for op_desc in self.items: @@ -1001,6 +1006,7 @@ namespace %(namespace)s { const int MaxInstSrcRegs = %(MaxInstSrcRegs)d; const int MaxInstDestRegs = %(MaxInstDestRegs)d; + const int MaxMiscDestRegs = %(MaxMiscDestRegs)d; } // namespace %(namespace)s @@ -1036,6 +1042,7 @@ class ISAParser(Grammar): self.maxInstSrcRegs = 0 self.maxInstDestRegs = 0 + self.maxMiscDestRegs = 0 ##################################################################### # @@ -1990,6 +1997,7 @@ StaticInstPtr # value of the globals. MaxInstSrcRegs = self.maxInstSrcRegs MaxInstDestRegs = self.maxInstDestRegs + MaxMiscDestRegs = self.maxMiscDestRegs # max_inst_regs.hh self.update_if_needed('max_inst_regs.hh', max_inst_regs_template % vars()) diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index 0e5cbfc91..807fd825f 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -43,6 +43,7 @@ namespace MipsISA using MipsISAInst::MaxInstSrcRegs; using MipsISAInst::MaxInstDestRegs; +using MipsISAInst::MaxMiscDestRegs; // Constants Related to the number of registers const int NumIntArchRegs = 32; diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index 0d32201c2..2d09677b2 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -38,6 +38,7 @@ namespace PowerISA { using PowerISAInst::MaxInstSrcRegs; using PowerISAInst::MaxInstDestRegs; +using PowerISAInst::MaxMiscDestRegs; typedef uint8_t RegIndex; diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index 91904f42b..8c61a070c 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -42,6 +42,7 @@ namespace SparcISA using SparcISAInst::MaxInstSrcRegs; using SparcISAInst::MaxInstDestRegs; +using SparcISAInst::MaxMiscDestRegs; typedef uint64_t IntReg; typedef uint64_t MiscReg; diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index 66f5dab80..20385a960 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -49,6 +49,7 @@ namespace X86ISA { using X86ISAInst::MaxInstSrcRegs; using X86ISAInst::MaxInstDestRegs; +using X86ISAInst::MaxMiscDestRegs; const int NumMiscArchRegs = NUM_MISCREGS; const int NumMiscRegs = NUM_MISCREGS; diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index a9cb60070..20278bd30 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -98,92 +98,16 @@ class BaseDynInst : public RefCounted MaxInstDestRegs = TheISA::MaxInstDestRegs /// Max dest regs }; - /** The StaticInst used by this BaseDynInst. */ - StaticInstPtr staticInst; - StaticInstPtr macroop; - - //////////////////////////////////////////// - // - // INSTRUCTION EXECUTION - // - //////////////////////////////////////////// - /** InstRecord that tracks this instructions. */ - Trace::InstRecord *traceData; - - void demapPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - void demapInstPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - void demapDataPage(Addr vaddr, uint64_t asn) - { - cpu->demapPage(vaddr, asn); - } - - Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags); - - Fault writeMem(uint8_t *data, unsigned size, - Addr addr, unsigned flags, uint64_t *res); - - /** Splits a request in two if it crosses a dcache block. */ - void splitRequest(RequestPtr req, RequestPtr &sreqLow, - RequestPtr &sreqHigh); - - /** Initiate a DTB address translation. */ - void initiateTranslation(RequestPtr req, RequestPtr sreqLow, - RequestPtr sreqHigh, uint64_t *res, - BaseTLB::Mode mode); - - /** Finish a DTB address translation. */ - void finishTranslation(WholeTranslationState *state); - - /** True if the DTB address translation has started. */ - bool translationStarted; - - /** True if the DTB address translation has completed. */ - bool translationCompleted; - - /** True if this address was found to match a previous load and they issued - * out of order. If that happend, then it's only a problem if an incoming - * snoop invalidate modifies the line, in which case we need to squash. - * If nothing modified the line the order doesn't matter. - */ - bool possibleLoadViolation; - - /** True if the address hit a external snoop while sitting in the LSQ. - * If this is true and a older instruction sees it, this instruction must - * reexecute - */ - bool hitExternalSnoop; - - /** - * Returns true if the DTB address translation is being delayed due to a hw - * page table walk. - */ - bool isTranslationDelayed() const - { - return (translationStarted && !translationCompleted); - } - - /** - * Saved memory requests (needed when the DTB address translation is - * delayed due to a hw page table walk). - */ - RequestPtr savedReq; - RequestPtr savedSreqLow; - RequestPtr savedSreqHigh; - - // Need a copy of main request pointer to verify on writes. - RequestPtr reqToVerify; - - /** @todo: Consider making this private. */ - public: - /** The sequence number of the instruction. */ - InstSeqNum seqNum; + union Result { + uint64_t integer; + double dbl; + void set(uint64_t i) { integer = i; } + void set(double d) { dbl = d; } + void get(uint64_t& i) { i = integer; } + void get(double& d) { d = dbl; } + }; + protected: enum Status { IqEntry, /// Instruction is in the IQ RobEntry, /// Instruction is in the ROB @@ -210,17 +134,31 @@ class BaseDynInst : public RefCounted NumStatus }; - /** The status of this BaseDynInst. Several bits can be set. */ - std::bitset status; - - /** The thread this instruction is from. */ - ThreadID threadNumber; + enum Flags { + TranslationStarted, + TranslationCompleted, + PossibleLoadViolation, + HitExternalSnoop, + EffAddrValid, + RecordResult, + Predicate, + PredTaken, + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ + EACalcDone, + IsUncacheable, + ReqMade, + MemOpDone, + MaxFlags + }; - /** data address space ID, for loads & stores. */ - short asid; + public: + /** The sequence number of the instruction. */ + InstSeqNum seqNum; - /** How many source registers are ready. */ - unsigned readyRegs; + /** The StaticInst used by this BaseDynInst. */ + StaticInstPtr staticInst; /** Pointer to the Impl's CPU object. */ ImplCPU *cpu; @@ -231,17 +169,50 @@ class BaseDynInst : public RefCounted /** The kind of fault this instruction has generated. */ Fault fault; - /** Pointer to the data for the memory access. */ - uint8_t *memData; + /** InstRecord that tracks this instructions. */ + Trace::InstRecord *traceData; - /** The effective virtual address (lds & stores only). */ - Addr effAddr; + protected: + /** The result of the instruction; assumes an instruction can have many + * destination registers. + */ + std::queue instResult; - /** The size of the request */ - Addr effSize; + /** PC state for this instruction. */ + TheISA::PCState pc; - /** Is the effective virtual address valid. */ - bool effAddrValid; + /* An amalgamation of a lot of boolean values into one */ + std::bitset instFlags; + + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset status; + + /** Whether or not the source register is ready. + * @todo: Not sure this should be here vs the derived class. + */ + std::bitset _readySrcRegIdx; + + public: + /** The thread this instruction is from. */ + ThreadID threadNumber; + + /** Iterator pointing to this BaseDynInst in the list of all insts. */ + ListIt instListIt; + + ////////////////////// Branch Data /////////////// + /** Predicted PC state after this instruction. */ + TheISA::PCState predPC; + + /** The Macroop if one exists */ + StaticInstPtr macroop; + + /** How many source registers are ready. */ + uint8_t readyRegs; + + public: + /////////////////////// Load Store Data ////////////////////// + /** The effective virtual address (lds & stores only). */ + Addr effAddr; /** The effective physical address. */ Addr physEffAddr; @@ -249,46 +220,40 @@ class BaseDynInst : public RefCounted /** The memory request flags (from translation). */ unsigned memReqFlags; - union Result { - uint64_t integer; - double dbl; - void set(uint64_t i) { integer = i; } - void set(double d) { dbl = d; } - void get(uint64_t& i) { i = integer; } - void get(double& d) { d = dbl; } - }; - - /** The result of the instruction; assumes an instruction can have many - * destination registers. - */ - std::queue instResult; + /** data address space ID, for loads & stores. */ + short asid; - /** Records changes to result? */ - bool recordResult; + /** The size of the request */ + uint8_t effSize; - /** Did this instruction execute, or is it predicated false */ - bool predicate; + /** Pointer to the data for the memory access. */ + uint8_t *memData; - protected: - /** PC state for this instruction. */ - TheISA::PCState pc; + /** Load queue index. */ + int16_t lqIdx; - /** Predicted PC state after this instruction. */ - TheISA::PCState predPC; + /** Store queue index. */ + int16_t sqIdx; - /** If this is a branch that was predicted taken */ - bool predTaken; - public: + /////////////////////// TLB Miss ////////////////////// + /** + * Saved memory requests (needed when the DTB address translation is + * delayed due to a hw page table walk). + */ + RequestPtr savedReq; + RequestPtr savedSreqLow; + RequestPtr savedSreqHigh; -#ifdef DEBUG - void dumpSNList(); -#endif + /////////////////////// Checker ////////////////////// + // Need a copy of main request pointer to verify on writes. + RequestPtr reqToVerify; - /** Whether or not the source register is ready. - * @todo: Not sure this should be here vs the derived class. + private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. */ - bool _readySrcRegIdx[MaxInstSrcRegs]; + Addr instEffAddr; protected: /** Flattened register index of the destination registers of this @@ -296,11 +261,6 @@ class BaseDynInst : public RefCounted */ TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs]; - /** Flattened register index of the source registers of this - * instruction. - */ - TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs]; - /** Physical register index of the destination registers of this * instruction. */ @@ -316,7 +276,91 @@ class BaseDynInst : public RefCounted */ PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs]; + + public: + /** Records changes to result? */ + void recordResult(bool f) { instFlags[RecordResult] = f; } + + /** Is the effective virtual address valid. */ + bool effAddrValid() const { return instFlags[EffAddrValid]; } + + /** Whether or not the memory operation is done. */ + bool memOpDone() const { return instFlags[MemOpDone]; } + void memOpDone(bool f) { instFlags[MemOpDone] = f; } + + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + + void demapPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + void demapInstPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + void demapDataPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + + Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags); + + Fault writeMem(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + + /** Splits a request in two if it crosses a dcache block. */ + void splitRequest(RequestPtr req, RequestPtr &sreqLow, + RequestPtr &sreqHigh); + + /** Initiate a DTB address translation. */ + void initiateTranslation(RequestPtr req, RequestPtr sreqLow, + RequestPtr sreqHigh, uint64_t *res, + BaseTLB::Mode mode); + + /** Finish a DTB address translation. */ + void finishTranslation(WholeTranslationState *state); + + /** True if the DTB address translation has started. */ + bool translationStarted() const { return instFlags[TranslationStarted]; } + void translationStarted(bool f) { instFlags[TranslationStarted] = f; } + + /** True if the DTB address translation has completed. */ + bool translationCompleted() const { return instFlags[TranslationCompleted]; } + void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; } + + /** True if this address was found to match a previous load and they issued + * out of order. If that happend, then it's only a problem if an incoming + * snoop invalidate modifies the line, in which case we need to squash. + * If nothing modified the line the order doesn't matter. + */ + bool possibleLoadViolation() const { return instFlags[PossibleLoadViolation]; } + void possibleLoadViolation(bool f) { instFlags[PossibleLoadViolation] = f; } + + /** True if the address hit a external snoop while sitting in the LSQ. + * If this is true and a older instruction sees it, this instruction must + * reexecute + */ + bool hitExternalSnoop() const { return instFlags[HitExternalSnoop]; } + void hitExternalSnoop(bool f) { instFlags[HitExternalSnoop] = f; } + + /** + * Returns true if the DTB address translation is being delayed due to a hw + * page table walk. + */ + bool isTranslationDelayed() const + { + return (translationStarted() && !translationCompleted()); + } + public: +#ifdef DEBUG + void dumpSNList(); +#endif /** Returns the physical register index of the i'th destination * register. @@ -329,6 +373,7 @@ class BaseDynInst : public RefCounted /** Returns the physical register index of the i'th source register. */ PhysRegIndex renamedSrcRegIdx(int idx) const { + assert(TheISA::MaxInstSrcRegs > idx); return _srcRegIdx[idx]; } @@ -340,12 +385,6 @@ class BaseDynInst : public RefCounted return _flatDestRegIdx[idx]; } - /** Returns the flattened register index of the i'th source register */ - TheISA::RegIndex flattenedSrcRegIdx(int idx) const - { - return _flatSrcRegIdx[idx]; - } - /** Returns the physical register index of the previous physical register * that remapped to the same logical register index. */ @@ -374,13 +413,6 @@ class BaseDynInst : public RefCounted _srcRegIdx[idx] = renamed_src; } - /** Flattens a source architectural register index into a logical index. - */ - void flattenSrcReg(int idx, TheISA::RegIndex flattened_src) - { - _flatSrcRegIdx[idx] = flattened_src; - } - /** Flattens a destination architectural register index into a logical * index. */ @@ -457,12 +489,12 @@ class BaseDynInst : public RefCounted /** Returns whether the instruction was predicted taken or not. */ bool readPredTaken() { - return predTaken; + return instFlags[PredTaken]; } void setPredTaken(bool predicted_taken) { - predTaken = predicted_taken; + instFlags[PredTaken] = predicted_taken; } /** Returns whether the instruction mispredicted. */ @@ -588,7 +620,7 @@ class BaseDynInst : public RefCounted template void setResult(T t) { - if (recordResult) { + if (instFlags[RecordResult]) { Result instRes; instRes.set(t); instResult.push(instRes); @@ -774,12 +806,12 @@ class BaseDynInst : public RefCounted bool readPredicate() { - return predicate; + return instFlags[Predicate]; } void setPredicate(bool val) { - predicate = val; + instFlags[Predicate] = val; if (traceData) { traceData->setPredicate(val); @@ -798,54 +830,24 @@ class BaseDynInst : public RefCounted /** Returns the thread context. */ ThreadContext *tcBase() { return thread->getTC(); } - private: - /** Instruction effective address. - * @todo: Consider if this is necessary or not. - */ - Addr instEffAddr; - - /** Whether or not the effective address calculation is completed. - * @todo: Consider if this is necessary or not. - */ - bool eaCalcDone; - - /** Is this instruction's memory access uncacheable. */ - bool isUncacheable; - - /** Has this instruction generated a memory request. */ - bool reqMade; - public: /** Sets the effective address. */ - void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + void setEA(Addr &ea) { instEffAddr = ea; instFlags[EACalcDone] = true; } /** Returns the effective address. */ const Addr &getEA() const { return instEffAddr; } /** Returns whether or not the eff. addr. calculation has been completed. */ - bool doneEACalc() { return eaCalcDone; } + bool doneEACalc() { return instFlags[EACalcDone]; } /** Returns whether or not the eff. addr. source registers are ready. */ bool eaSrcsReady(); - /** Whether or not the memory operation is done. */ - bool memOpDone; - /** Is this instruction's memory access uncacheable. */ - bool uncacheable() { return isUncacheable; } + bool uncacheable() { return instFlags[IsUncacheable]; } /** Has this instruction generated a memory request. */ - bool hasRequest() { return reqMade; } - - public: - /** Load queue index. */ - int16_t lqIdx; - - /** Store queue index. */ - int16_t sqIdx; - - /** Iterator pointing to this BaseDynInst in the list of all insts. */ - ListIt instListIt; + bool hasRequest() { return instFlags[ReqMade]; } /** Returns iterator to this instruction in the list of all insts. */ ListIt &getInstListIt() { return instListIt; } @@ -868,12 +870,12 @@ Fault BaseDynInst::readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags) { - reqMade = true; + instFlags[ReqMade] = true; Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - if (reqMade && translationStarted) { + if (instFlags[ReqMade] && translationStarted()) { req = savedReq; sreqLow = savedSreqLow; sreqHigh = savedSreqHigh; @@ -888,11 +890,11 @@ BaseDynInst::readMem(Addr addr, uint8_t *data, initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); } - if (translationCompleted) { + if (translationCompleted()) { if (fault == NoFault) { effAddr = req->getVaddr(); effSize = size; - effAddrValid = true; + instFlags[EffAddrValid] = true; if (cpu->checker) { if (reqToVerify != NULL) { @@ -931,12 +933,12 @@ BaseDynInst::writeMem(uint8_t *data, unsigned size, traceData->setAddr(addr); } - reqMade = true; + instFlags[ReqMade] = true; Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - if (reqMade && translationStarted) { + if (instFlags[ReqMade] && translationStarted()) { req = savedReq; sreqLow = savedSreqLow; sreqHigh = savedSreqHigh; @@ -951,10 +953,10 @@ BaseDynInst::writeMem(uint8_t *data, unsigned size, initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); } - if (fault == NoFault && translationCompleted) { + if (fault == NoFault && translationCompleted()) { effAddr = req->getVaddr(); effSize = size; - effAddrValid = true; + instFlags[EffAddrValid] = true; if (cpu->checker) { if (reqToVerify != NULL) { @@ -991,7 +993,7 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, uint64_t *res, BaseTLB::Mode mode) { - translationStarted = true; + translationStarted(true); if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { WholeTranslationState *state = @@ -1001,7 +1003,7 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, DataTranslation *trans = new DataTranslation(this, state); cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); - if (!translationCompleted) { + if (!translationCompleted()) { // Save memory requests. savedReq = state->mainReq; savedSreqLow = state->sreqLow; @@ -1019,7 +1021,7 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); - if (!translationCompleted) { + if (!translationCompleted()) { // Save memory requests. savedReq = state->mainReq; savedSreqLow = state->sreqLow; @@ -1034,8 +1036,7 @@ BaseDynInst::finishTranslation(WholeTranslationState *state) { fault = state->getFault(); - if (state->isUncacheable()) - isUncacheable = true; + instFlags[IsUncacheable] = state->isUncacheable(); if (fault == NoFault) { physEffAddr = state->getPaddr(); @@ -1051,7 +1052,7 @@ BaseDynInst::finishTranslation(WholeTranslationState *state) } delete state; - translationCompleted = true; + translationCompleted(true); } #endif // __CPU_BASE_DYN_INST_HH__ diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 05f9b7767..663159b94 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -60,13 +60,12 @@ BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, StaticInstPtr _macroop, TheISA::PCState _pc, TheISA::PCState _predPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(_staticInst), macroop(_macroop), traceData(NULL), cpu(cpu) + : staticInst(_staticInst), cpu(cpu), traceData(NULL), macroop(_macroop) { seqNum = seq_num; pc = _pc; predPC = _predPC; - predTaken = false; initVars(); } @@ -74,7 +73,7 @@ BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, template BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, StaticInstPtr _macroop) - : staticInst(_staticInst), macroop(_macroop), traceData(NULL) + : staticInst(_staticInst), traceData(NULL), macroop(_macroop) { seqNum = 0; initVars(); @@ -86,25 +85,14 @@ BaseDynInst::initVars() { memData = NULL; effAddr = 0; - effAddrValid = false; physEffAddr = 0; - - translationStarted = false; - translationCompleted = false; - possibleLoadViolation = false; - hitExternalSnoop = false; - - isUncacheable = false; - reqMade = false; readyRegs = 0; - recordResult = true; - status.reset(); - eaCalcDone = false; - memOpDone = false; - predicate = true; + instFlags.reset(); + instFlags[RecordResult] = true; + instFlags[Predicate] = true; lqIdx = -1; sqIdx = -1; diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh index 673472b69..8bfab11a9 100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/o3/bpred_unit.hh @@ -206,9 +206,9 @@ class BPredUnit PredictorHistory(const InstSeqNum &seq_num, Addr instPC, bool pred_taken, void *bp_history, ThreadID _tid) - : seqNum(seq_num), pc(instPC), RASTarget(0), RASIndex(0), - tid(_tid), predTaken(pred_taken), usedRAS(0), - wasCall(0), wasReturn(0), validBTB(0), bpHistory(bp_history) + : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), + RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), + wasCall(0), wasReturn(0), validBTB(0) {} bool operator==(const PredictorHistory &entry) const { @@ -221,6 +221,12 @@ class BPredUnit /** The PC associated with the sequence number. */ Addr pc; + /** Pointer to the history object passed back from the branch + * predictor. It is used to update or restore state of the + * branch predictor. + */ + void *bpHistory; + /** The RAS target (only valid if a return). */ TheISA::PCState RASTarget; @@ -243,11 +249,6 @@ class BPredUnit bool wasReturn; /** Whether or not the instruction had a valid BTB entry. */ bool validBTB; - /** Pointer to the history object passed back from the branch - * predictor. It is used to update or restore state of the - * branch predictor. - */ - void *bpHistory; }; typedef std::list History; diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 053d4f6be..31d252c73 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -96,15 +96,14 @@ struct DefaultIEWDefaultCommit { int size; DynInstPtr insts[Impl::MaxWidth]; - - bool squash[Impl::MaxThreads]; - bool branchMispredict[Impl::MaxThreads]; DynInstPtr mispredictInst[Impl::MaxThreads]; - bool branchTaken[Impl::MaxThreads]; Addr mispredPC[Impl::MaxThreads]; - TheISA::PCState pc[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; + TheISA::PCState pc[Impl::MaxThreads]; + bool squash[Impl::MaxThreads]; + bool branchMispredict[Impl::MaxThreads]; + bool branchTaken[Impl::MaxThreads]; bool includeSquashInst[Impl::MaxThreads]; }; @@ -122,21 +121,17 @@ template struct TimeBufStruct { typedef typename Impl::DynInstPtr DynInstPtr; struct decodeComm { - bool squash; - bool predIncorrect; uint64_t branchAddr; - InstSeqNum doneSeqNum; - - // @todo: Might want to package this kind of branch stuff into a single - // struct as it is used pretty frequently. - bool branchMispredict; DynInstPtr mispredictInst; - bool branchTaken; + DynInstPtr squashInst; Addr mispredPC; TheISA::PCState nextPC; - DynInstPtr squashInst; unsigned branchCount; + bool squash; + bool predIncorrect; + bool branchMispredict; + bool branchTaken; }; decodeComm decodeInfo[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 1bf493871..45f5bc02b 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1244,11 +1244,11 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->microPC(), head_inst->seqNum, head_inst->staticInst->disassemble(head_inst->instAddr())); - DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", head_inst->decodeTick); - DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", head_inst->renameTick); - DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", head_inst->dispatchTick); - DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", head_inst->issueTick); - DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", head_inst->completeTick); + DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", head_inst->fetchTick + head_inst->decodeTick); + DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", head_inst->fetchTick + head_inst->renameTick); + DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", head_inst->fetchTick + head_inst->dispatchTick); + DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", head_inst->fetchTick + head_inst->issueTick); + DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", head_inst->fetchTick + head_inst->completeTick); DPRINTFR(O3PipeView, "O3PipeView:retire:%llu\n", curTick()); #endif diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index fd8dc834b..315d53155 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -709,7 +709,7 @@ DefaultDecode::decodeInsts(ThreadID tid) --insts_available; #if TRACING_ON - inst->decodeTick = curTick(); + inst->decodeTick = curTick() - inst->fetchTick; #endif // Ensure that if it was predicted as a branch, it really is a diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index ed947d92f..8acbf3443 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -107,26 +107,28 @@ class BaseO3DynInst : public BaseDynInst void initVars(); protected: + /** Values to be written to the destination misc. registers. */ + MiscReg _destMiscRegVal[TheISA::MaxMiscDestRegs]; + /** Indexes of the destination misc. registers. They are needed to defer * the write accesses to the misc. registers until the commit stage, when * the instruction is out of its speculative state. */ - int _destMiscRegIdx[MaxInstDestRegs]; - /** Values to be written to the destination misc. registers. */ - MiscReg _destMiscRegVal[MaxInstDestRegs]; + short _destMiscRegIdx[TheISA::MaxMiscDestRegs]; + /** Number of destination misc. registers. */ - int _numDestMiscRegs; + uint8_t _numDestMiscRegs; - public: + public: #if TRACING_ON /** Tick records used for the pipeline activity viewer. */ Tick fetchTick; - Tick decodeTick; - Tick renameTick; - Tick dispatchTick; - Tick issueTick; - Tick completeTick; + uint32_t decodeTick; + uint32_t renameTick; + uint32_t dispatchTick; + uint32_t issueTick; + uint32_t completeTick; #endif /** Reads a misc. register, including any side-effects the read @@ -145,6 +147,7 @@ class BaseO3DynInst : public BaseDynInst /** Writes to misc. registers are recorded and deferred until the * commit stage, when updateMiscRegs() is called. */ + assert(_numDestMiscRegs < TheISA::MaxMiscDestRegs); _destMiscRegIdx[_numDestMiscRegs] = misc_reg; _destMiscRegVal[_numDestMiscRegs] = val; _numDestMiscRegs++; diff --git a/src/cpu/o3/dyn_inst_impl.hh b/src/cpu/o3/dyn_inst_impl.hh index 2870d40fe..85778aadc 100644 --- a/src/cpu/o3/dyn_inst_impl.hh +++ b/src/cpu/o3/dyn_inst_impl.hh @@ -75,9 +75,10 @@ BaseO3DynInst::initVars() for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i); - this->_readySrcRegIdx[i] = 0; } + this->_readySrcRegIdx.reset(); + _numDestMiscRegs = 0; #if TRACING_ON diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b306e6e58..60f4604a2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1152,7 +1152,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) ++iewDispatchedInsts; #if TRACING_ON - inst->dispatchTick = curTick(); + inst->dispatchTick = curTick() - inst->fetchTick; #endif } @@ -1617,7 +1617,7 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) iewExecutedInsts++; #if TRACING_ON - inst->completeTick = curTick(); + inst->completeTick = curTick() - inst->fetchTick; #endif // diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 2c0779a03..ae5f93c38 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -859,7 +859,7 @@ InstructionQueue::scheduleReadyInsts() ++total_issued; #if TRACING_ON - issuing_inst->issueTick = curTick(); + issuing_inst->issueTick = curTick() - issuing_inst->fetchTick; #endif if (!issuing_inst->isMemRef()) { @@ -1054,8 +1054,8 @@ InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); // Reset DTB translation state - resched_inst->translationStarted = false; - resched_inst->translationCompleted = false; + resched_inst->translationStarted(false); + resched_inst->translationCompleted(false); resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); @@ -1079,7 +1079,7 @@ InstructionQueue::completeMemInst(DynInstPtr &completed_inst) ++freeEntries; - completed_inst->memOpDone = true; + completed_inst->memOpDone(true); memDepUnit[tid].completed(completed_inst); count[tid]--; @@ -1098,7 +1098,7 @@ InstructionQueue::getDeferredMemInstToExecute() { for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); ++it) { - if ((*it)->translationCompleted || (*it)->isSquashed()) { + if ((*it)->translationCompleted() || (*it)->isSquashed()) { DynInstPtr ret = *it; deferredMemInsts.erase(it); return ret; @@ -1165,7 +1165,7 @@ InstructionQueue::doSquash(ThreadID tid) if (!squashed_inst->isIssued() || (squashed_inst->isMemRef() && - !squashed_inst->memOpDone)) { + !squashed_inst->memOpDone())) { DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n", tid, squashed_inst->seqNum, squashed_inst->pcState()); @@ -1456,7 +1456,7 @@ InstructionQueue::dumpInsts() ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { + !(*inst_list_it)->memOpDone()) { // Loads that have not been marked as executed // still count towards the total instructions. ++valid_num; @@ -1473,7 +1473,7 @@ InstructionQueue::dumpInsts() (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); } cprintf("\n"); @@ -1498,7 +1498,7 @@ InstructionQueue::dumpInsts() ++valid_num; cprintf("Count:%i\n", valid_num); } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { + !(*inst_list_it)->memOpDone()) { // Loads that have not been marked as executed // still count towards the total instructions. ++valid_num; @@ -1515,7 +1515,7 @@ InstructionQueue::dumpInsts() (*inst_list_it)->isSquashed()); if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); } cprintf("\n"); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index c3bb8f7cd..7093b5fee 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -275,28 +275,28 @@ class LSQUnit { public: /** Default constructor. */ LSQSenderState() - : noWB(false), isSplit(false), pktToSend(false), outstanding(1), - mainPkt(NULL), pendingPacket(NULL) - { } + : mainPkt(NULL), pendingPacket(NULL), outstanding(1), + noWB(false), isSplit(false), pktToSend(false) + { } /** Instruction who initiated the access to memory. */ DynInstPtr inst; + /** The main packet from a split load, used during writeback. */ + PacketPtr mainPkt; + /** A second packet from a split store that needs sending. */ + PacketPtr pendingPacket; + /** The LQ/SQ index of the instruction. */ + uint8_t idx; + /** Number of outstanding packets to complete. */ + uint8_t outstanding; /** Whether or not it is a load. */ bool isLoad; - /** The LQ/SQ index of the instruction. */ - int idx; /** Whether or not the instruction will need to writeback. */ bool noWB; /** Whether or not this access is split in two. */ bool isSplit; /** Whether or not there is a packet that needs sending. */ bool pktToSend; - /** Number of outstanding packets to complete. */ - int outstanding; - /** The main packet from a split load, used during writeback. */ - PacketPtr mainPkt; - /** A second packet from a split store that needs sending. */ - PacketPtr pendingPacket; /** Completes a packet and returns whether the access is finished. */ inline bool complete() { return --outstanding == 0; } @@ -342,7 +342,8 @@ class LSQUnit { { std::memset(data, 0, sizeof(data)); } - + /** The store data. */ + char data[16]; /** The store instruction. */ DynInstPtr inst; /** The request for the store. */ @@ -351,9 +352,7 @@ class LSQUnit { RequestPtr sreqLow; RequestPtr sreqHigh; /** The size of the store. */ - int size; - /** The store data. */ - char data[16]; + uint8_t size; /** Whether or not the store is split into two requests. */ bool isSplit; /** Whether or not the store can writeback. */ @@ -593,9 +592,9 @@ LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, // Disable recording the result temporarily. Writing to misc // regs normally updates the result, but this is not the // desired behavior when handling store conditionals. - load_inst->recordResult = false; + load_inst->recordResult(false); TheISA::handleLockedRead(load_inst.get(), req); - load_inst->recordResult = true; + load_inst->recordResult(true); } if (req->isMmappedIpr()) { @@ -651,7 +650,7 @@ LSQUnit::read(Request *req, Request *sreqLow, Request *sreqHigh, else if (storeQueue[store_idx].inst->uncacheable()) continue; - assert(storeQueue[store_idx].inst->effAddrValid); + assert(storeQueue[store_idx].inst->effAddrValid()); // Check if the store data is within the lower and upper bounds of // addresses that the request needs. diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 4f82ad9e3..a878b1540 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -465,7 +465,7 @@ LSQUnit::checkSnoop(PacketPtr pkt) while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid || ld_inst->uncacheable()) { + if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { incrLdIdx(load_idx); continue; } @@ -475,7 +475,7 @@ LSQUnit::checkSnoop(PacketPtr pkt) ld_inst->seqNum, load_addr, invalidate_addr); if (load_addr == invalidate_addr) { - if (ld_inst->possibleLoadViolation) { + if (ld_inst->possibleLoadViolation()) { DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", ld_inst->physEffAddr, pkt->getAddr(), ld_inst->seqNum); @@ -485,7 +485,7 @@ LSQUnit::checkSnoop(PacketPtr pkt) // If a older load checks this and it's true // then we might have missed the snoop // in which case we need to invalidate to be sure - ld_inst->hitExternalSnoop = true; + ld_inst->hitExternalSnoop(true); } } incrLdIdx(load_idx); @@ -507,7 +507,7 @@ LSQUnit::checkViolations(int load_idx, DynInstPtr &inst) */ while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid || ld_inst->uncacheable()) { + if (!ld_inst->effAddrValid() || ld_inst->uncacheable()) { incrLdIdx(load_idx); continue; } @@ -521,7 +521,7 @@ LSQUnit::checkViolations(int load_idx, DynInstPtr &inst) // If this load is to the same block as an external snoop // invalidate that we've observed then the load needs to be // squashed as it could have newer data - if (ld_inst->hitExternalSnoop) { + if (ld_inst->hitExternalSnoop()) { if (!memDepViolator || ld_inst->seqNum < memDepViolator->seqNum) { DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] " @@ -540,7 +540,7 @@ LSQUnit::checkViolations(int load_idx, DynInstPtr &inst) // Otherwise, mark the load has a possible load violation // and if we see a snoop before it's commited, we need to squash - ld_inst->possibleLoadViolation = true; + ld_inst->possibleLoadViolation(true); DPRINTF(LSQUnit, "Found possible load violaiton at addr: %#x" " between instructions [sn:%lli] and [sn:%lli]\n", inst_eff_addr1, inst->seqNum, ld_inst->seqNum); @@ -610,7 +610,7 @@ LSQUnit::executeLoad(DynInstPtr &inst) iewStage->instToCommit(inst); iewStage->activityThisCycle(); } else if (!loadBlocked()) { - assert(inst->effAddrValid); + assert(inst->effAddrValid()); int load_idx = inst->lqIdx; incrLdIdx(load_idx); @@ -857,9 +857,9 @@ LSQUnit::writebackStores() // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. - inst->recordResult = false; + inst->recordResult(false); bool success = TheISA::handleLockedWrite(inst.get(), req); - inst->recordResult = true; + inst->recordResult(true); if (!success) { // Instantly complete this store. diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 678927813..592bc059f 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -692,7 +692,7 @@ DefaultRename::renameInsts(ThreadID tid) ++renamed_insts; #if TRACING_ON - inst->renameTick = curTick(); + inst->renameTick = curTick() - inst->fetchTick; #endif // Put instruction in rename queue. @@ -997,8 +997,6 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst, ThreadID tid) panic("Reg index is out of bound: %d.", src_reg); } - inst->flattenSrcReg(src_idx, flat_src_reg); - // Look up the source registers to get the phys. register they've // been renamed to, and set the sources to those registers. PhysRegIndex renamed_reg = renameMap[tid]->lookup(flat_src_reg); -- 2.30.2