From c3081d9c1c36e1a08c173048783d191fa19463de Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 14 Apr 2007 17:13:18 +0000 Subject: [PATCH] Add support for microcode and pull out the special branch delay slot handling. Branch delay slots need to be squash on a mispredict as well because the nnpc they saw was incorrect. --HG-- extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719 --- src/cpu/base_dyn_inst.hh | 57 +++++++++++++++++-- src/cpu/base_dyn_inst_impl.hh | 55 +++++++++++++++++-- src/cpu/o3/comm.hh | 21 ++++--- src/cpu/o3/commit.hh | 34 +++++++++--- src/cpu/o3/commit_impl.hh | 11 ++-- src/cpu/o3/cpu.cc | 30 +++++++++- src/cpu/o3/cpu.hh | 22 ++++++-- src/cpu/o3/decode_impl.hh | 4 +- src/cpu/o3/fetch.hh | 17 +++--- src/cpu/o3/fetch_impl.hh | 91 ++++++++++++++++++++----------- src/cpu/o3/iew_impl.hh | 1 + src/cpu/o3/rename_impl.hh | 6 +- src/cpu/o3/sparc/dyn_inst.hh | 10 +++- src/cpu/o3/sparc/dyn_inst_impl.hh | 17 +++++- 14 files changed, 291 insertions(+), 85 deletions(-) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index b02038b3e..1311e5cf2 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** PC of this instruction. */ Addr PC; + /** Micro PC of this instruction. */ + Addr microPC; + protected: /** Next non-speculative PC. It is not filled in at fetch, but rather * once the target of the branch is truly known (either decode or @@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Next non-speculative NPC. Target PC for Mips or Sparc. */ Addr nextNPC; + /** Next non-speculative micro PC. */ + Addr nextMicroPC; + /** Predicted next PC. */ Addr predPC; /** Predicted next NPC. */ Addr predNPC; + /** Predicted next microPC */ + Addr predMicroPC; + /** If this is a branch that was predicted taken */ bool predTaken; @@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted { _flatDestRegIdx[idx] = flattened_dest; } + /** BaseDynInst constructor given a binary instruction. + * @param staticInst A StaticInstPtr to the underlying instruction. + * @param PC The PC of the instruction. + * @param pred_PC The predicted next PC. + * @param pred_NPC The predicted next NPC. + * @param seq_num The sequence number of the instruction. + * @param cpu Pointer to the instruction's CPU. + */ + BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a binary instruction. * @param inst The binary instruction. @@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param seq_num The sequence number of the instruction. * @param cpu Pointer to the instruction's CPU. */ - BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr pred_PC, Addr pred_NPC, + BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. @@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted #endif } + Addr readNextMicroPC() + { + return nextMicroPC; + } + /** Set the predicted target of this current instruction. */ - void setPredTarg(Addr predicted_PC, Addr predicted_NPC) + void setPredTarg(Addr predicted_PC, Addr predicted_NPC, + Addr predicted_MicroPC) { predPC = predicted_PC; predNPC = predicted_NPC; + predMicroPC = predicted_MicroPC; } /** Returns the predicted PC immediately after the branch. */ @@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the predicted PC two instructions after the branch */ Addr readPredNPC() { return predNPC; } + /** Returns the predicted micro PC after the branch */ + Addr readPredMicroPC() { return predMicroPC; } + /** Returns whether the instruction was predicted taken or not. */ bool readPredTaken() { @@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted bool mispredicted() { return readPredPC() != readNextPC() || - readPredNPC() != readNextNPC(); + readPredNPC() != readNextNPC() || + readPredMicroPC() != readNextMicroPC(); } // @@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isQuiesce() const { return staticInst->isQuiesce(); } bool isIprAccess() const { return staticInst->isIprAccess(); } bool isUnverifiable() const { return staticInst->isUnverifiable(); } + bool isMacroOp() const { return staticInst->isMacroOp(); } + bool isMicroOp() const { return staticInst->isMicroOp(); } + bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } + bool isLastMicroOp() const { return staticInst->isLastMicroOp(); } + bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); } + bool isMicroBranch() const { return staticInst->isMicroBranch(); } /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -700,20 +737,28 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } + /**Read the micro PC of this instruction. */ + const Addr readMicroPC() const { return microPC; } + /** Set the next PC of this instruction (its actual target). */ - void setNextPC(uint64_t val) + void setNextPC(Addr val) { nextPC = val; } /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ - void setNextNPC(uint64_t val) + void setNextNPC(Addr val) { #if ISA_HAS_DELAY_SLOT nextNPC = val; #endif } + void setNextMicroPC(Addr val) + { + nextMicroPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index a1c866336..acf8af9cf 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -62,19 +62,66 @@ my_hash_t thishash; #endif template -BaseDynInst::BaseDynInst(TheISA::ExtMachInst machInst, +BaseDynInst::BaseDynInst(StaticInstPtr _staticInst, Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, InstSeqNum seq_num, ImplCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu) + : staticInst(_staticInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + PC = inst_PC; - nextPC = inst_NPC; - nextNPC = nextPC + sizeof(TheISA::MachInst); + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } + predPC = pred_PC; + predNPC = pred_NPC; + predMicroPC = pred_MicroPC; + predTaken = false; + + initVars(); +} + +template +BaseDynInst::BaseDynInst(TheISA::ExtMachInst inst, + Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, + Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu) + : staticInst(inst), traceData(NULL), cpu(cpu) +{ + seqNum = seq_num; + + bool nextIsMicro = + staticInst->isMicroOp() && !staticInst->isLastMicroOp(); + + PC = inst_PC; + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } predPC = pred_PC; predNPC = pred_NPC; + predMicroPC = pred_MicroPC; predTaken = false; initVars(); diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 8d7bb95f4..fb772060b 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; - uint64_t mispredPC[Impl::MaxThreads]; - uint64_t nextPC[Impl::MaxThreads]; - uint64_t nextNPC[Impl::MaxThreads]; + Addr mispredPC[Impl::MaxThreads]; + Addr nextPC[Impl::MaxThreads]; + Addr nextNPC[Impl::MaxThreads]; + Addr nextMicroPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; bool includeSquashInst[Impl::MaxThreads]; @@ -118,9 +119,10 @@ struct TimeBufStruct { // struct as it is used pretty frequently. bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; unsigned branchCount; }; @@ -158,9 +160,10 @@ struct TimeBufStruct { bool branchMispredict; bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - uint64_t nextNPC; + Addr mispredPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index fba618c14..27bdd20c5 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -279,25 +279,37 @@ class DefaultCommit /** Returns the PC of the head instruction of the ROB. * @todo: Probably remove this function as it returns only thread 0. */ - uint64_t readPC() { return PC[0]; } + Addr readPC() { return PC[0]; } /** Returns the PC of a specific thread. */ - uint64_t readPC(unsigned tid) { return PC[tid]; } + Addr readPC(unsigned tid) { return PC[tid]; } /** Sets the PC of a specific thread. */ - void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + void setPC(Addr val, unsigned tid) { PC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid) { return microPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; } /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } + Addr readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } + void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; } /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } + Addr readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } + void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; } + + /** Reads the micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; } + + /** Sets the micro PC of a specific thread */ + void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; } private: /** Time buffer interface. */ @@ -402,12 +414,20 @@ class DefaultCommit */ Addr PC[Impl::MaxThreads]; + /** The commit micro PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ + Addr microPC[Impl::MaxThreads]; + /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; + /** The next micro PC of each thread. */ + Addr nextMicroPC[Impl::MaxThreads]; + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 9dd5ed291..fc24d7edc 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -124,7 +124,7 @@ DefaultCommit::DefaultCommit(O3CPU *_cpu, Params *params) committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = nextNPC[i] = 0; + microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0; } #if FULL_SYSTEM interrupt = NoFault; @@ -508,6 +508,7 @@ DefaultCommit::squashAll(unsigned tid) toIEW->commitInfo[tid].nextPC = PC[tid]; toIEW->commitInfo[tid].nextNPC = nextPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid]; } template @@ -768,6 +769,7 @@ DefaultCommit::commit() toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid]; + toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid]; toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; @@ -877,6 +879,7 @@ DefaultCommit::commitInsts() PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); nextNPC[tid] = head_inst->readNextNPC(); + nextMicroPC[tid] = head_inst->readNextMicroPC(); // Increment the total number of non-speculative instructions // executed. @@ -905,12 +908,10 @@ DefaultCommit::commitInsts() } PC[tid] = nextPC[tid]; -#if ISA_HAS_DELAY_SLOT nextPC[tid] = nextNPC[tid]; nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); -#else - nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); -#endif + microPC[tid] = nextMicroPC[tid]; + nextMicroPC[tid] = microPC[tid] + 1; #if FULL_SYSTEM int count = 0; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index b2b4645d2..59978a065 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -696,7 +696,7 @@ FullO3CPU::removeThread(unsigned tid) // Squash Throughout Pipeline InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid); + fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); @@ -1150,6 +1150,20 @@ FullO3CPU::setPC(Addr new_PC,unsigned tid) commit.setPC(new_PC, tid); } +template +uint64_t +FullO3CPU::readMicroPC(unsigned tid) +{ + return commit.readMicroPC(tid); +} + +template +void +FullO3CPU::setMicroPC(Addr new_PC,unsigned tid) +{ + commit.setMicroPC(new_PC, tid); +} + template uint64_t FullO3CPU::readNextPC(unsigned tid) @@ -1178,6 +1192,20 @@ FullO3CPU::setNextNPC(uint64_t val,unsigned tid) commit.setNextNPC(val, tid); } +template +uint64_t +FullO3CPU::readNextMicroPC(unsigned tid) +{ + return commit.readNextMicroPC(tid); +} + +template +void +FullO3CPU::setNextMicroPC(Addr new_PC,unsigned tid) +{ + commit.setNextMicroPC(new_PC, tid); +} + template typename FullO3CPU::ListIt FullO3CPU::addInst(DynInstPtr &inst) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 4b247e6e3..bff78bf9e 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); /** Reads the commit PC of a specific thread. */ - uint64_t readPC(unsigned tid); + Addr readPC(unsigned tid); /** Sets the commit PC of a specific thread. */ void setPC(Addr new_PC, unsigned tid); + /** Reads the commit micro PC of a specific thread. */ + Addr readMicroPC(unsigned tid); + + /** Sets the commmit micro PC of a specific thread. */ + void setMicroPC(Addr new_microPC, unsigned tid); + /** Reads the next PC of a specific thread. */ - uint64_t readNextPC(unsigned tid); + Addr readNextPC(unsigned tid); /** Sets the next PC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid); + void setNextPC(Addr val, unsigned tid); /** Reads the next NPC of a specific thread. */ - uint64_t readNextNPC(unsigned tid); + Addr readNextNPC(unsigned tid); /** Sets the next NPC of a specific thread. */ - void setNextNPC(uint64_t val, unsigned tid); + void setNextNPC(Addr val, unsigned tid); + + /** Reads the commit next micro PC of a specific thread. */ + Addr readNextMicroPC(unsigned tid); + + /** Sets the commit next micro PC of a specific thread. */ + void setNextMicroPC(Addr val, unsigned tid); /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index c9d0a1885..ce6738456 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -273,6 +273,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) ///explicitly for ISAs with delay slots. toFetch->decodeInfo[tid].nextNPC = inst->branchTarget() + sizeof(TheISA::MachInst); + toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC(); #if ISA_HAS_DELAY_SLOT toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != (inst->readNextPC() + sizeof(TheISA::MachInst)); @@ -735,7 +736,8 @@ DefaultDecode::decodeInsts(unsigned tid) // a check at the end squash(inst, inst->threadNumber); Addr target = inst->branchTarget(); - inst->setPredTarg(target, target + sizeof(TheISA::MachInst)); + //The micro pc after an instruction level branch should be 0 + inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0); break; } } diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index bb0057e7c..7645a226c 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -227,7 +227,7 @@ class DefaultFetch * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -242,12 +242,14 @@ class DefaultFetch bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid); /** Squashes a specific thread and resets the PC. */ - inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid); + inline void doSquash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, unsigned tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode that should be sqaushed. */ void squashFromDecode(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Checks if a thread is stalled. */ @@ -263,6 +265,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching @@ -346,16 +349,12 @@ class DefaultFetch /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; + /** Per-thread fetch micro PC. */ + Addr microPC[Impl::MaxThreads]; + /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; - /** Per-thread next Next PC. - * This is not a real register but is used for - * architectures that use a branch-delay slot. - * (such as MIPS or Sparc) - */ - Addr nextNPC[Impl::MaxThreads]; - /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 25498c7f3..d1f38e38b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -312,7 +312,7 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); - nextNPC[tid] = cpu->readNextNPC(tid); + microPC[tid] = cpu->readMicroPC(tid); } for (int tid=0; tid < numThreads; tid++) { @@ -439,11 +439,7 @@ DefaultFetch::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); -#if ISA_HAS_DELAY_SLOT - nextNPC[i] = cpu->readNextNPC(i); -#else - nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst); -#endif + microPC[i] = cpu->readMicroPC(i); fetchStatus[i] = Running; } numInst = 0; @@ -493,7 +489,7 @@ DefaultFetch::switchToInactive() template bool DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, - Addr &next_NPC) + Addr &next_NPC, Addr &next_MicroPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -501,13 +497,22 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, bool predict_taken; if (!inst->isControl()) { - next_PC = next_NPC; - next_NPC = next_NPC + instSize; - inst->setPredTarg(next_PC, next_NPC); + if (inst->isMicroOp() && !inst->isLastMicroOp()) { + next_MicroPC++; + } else { + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + next_MicroPC = 0; + } + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(false); return false; } + //Assume for now that all control flow is to a different macroop which + //would reset the micro pc to 0. + next_MicroPC = 0; + int tid = inst->threadNumber; Addr pred_PC = next_PC; predict_taken = branchPred.predict(inst, pred_PC, tid); @@ -534,7 +539,7 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, #endif /* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", tid, next_PC, next_NPC);*/ - inst->setPredTarg(next_PC, next_NPC); + inst->setPredTarg(next_PC, next_NPC, next_MicroPC); inst->setPredTaken(predict_taken); ++fetchedBranches; @@ -658,14 +663,14 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid template inline void DefaultFetch::doSquash(const Addr &new_PC, - const Addr &new_NPC, unsigned tid) + const Addr &new_NPC, const Addr &new_microPC, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", tid, new_PC, new_NPC); PC[tid] = new_PC; nextPC[tid] = new_NPC; - nextNPC[tid] = new_NPC + instSize; + microPC[tid] = new_microPC; // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -693,12 +698,12 @@ DefaultFetch::doSquash(const Addr &new_PC, template void DefaultFetch::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, - const InstSeqNum &seq_num, - unsigned tid) + const Addr &new_MicroPC, + const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are in flight between // fetch and decode. @@ -774,11 +779,12 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const Addr &new_MicroPC, const InstSeqNum &seq_num, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); - doSquash(new_PC, new_NPC, tid); + doSquash(new_PC, new_NPC, new_MicroPC, tid); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); @@ -893,6 +899,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // In any case, squash. squash(fromCommit->commitInfo[tid].nextPC, fromCommit->commitInfo[tid].nextNPC, + fromCommit->commitInfo[tid].nextMicroPC, fromCommit->commitInfo[tid].doneSeqNum, tid); @@ -948,6 +955,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, fromDecode->decodeInfo[tid].nextNPC, + fromDecode->decodeInfo[tid].nextMicroPC, fromDecode->decodeInfo[tid].doneSeqNum, tid); @@ -1002,9 +1010,9 @@ DefaultFetch::fetch(bool &status_change) DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); // The current PC. - Addr &fetch_PC = PC[tid]; - - Addr &fetch_NPC = nextPC[tid]; + Addr fetch_PC = PC[tid]; + Addr fetch_NPC = nextPC[tid]; + Addr fetch_MicroPC = microPC[tid]; // Fault code for memory access. Fault fault = NoFault; @@ -1063,6 +1071,7 @@ DefaultFetch::fetch(bool &status_change) Addr next_PC = fetch_PC; Addr next_NPC = fetch_NPC; + Addr next_MicroPC = fetch_MicroPC; InstSeqNum inst_seq; MachInst inst; @@ -1070,6 +1079,9 @@ DefaultFetch::fetch(bool &status_change) // @todo: Fix this hack. unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + StaticInstPtr staticInst = NULL; + StaticInstPtr macroop = NULL; + if (fault == NoFault) { // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the @@ -1104,19 +1116,29 @@ DefaultFetch::fetch(bool &status_change) // Make sure this is a valid index. assert(offset <= cacheBlkSize - instSize); - // Get the instruction from the array of the cache line. - inst = TheISA::gtoh(*reinterpret_cast - (&cacheData[tid][offset])); + if (!macroop) { + // Get the instruction from the array of the cache line. + inst = TheISA::gtoh(*reinterpret_cast + (&cacheData[tid][offset])); - predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(fetch_PC, 0, inst); + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, 0, inst); - ext_inst = predecoder.getExtMachInst(); + ext_inst = predecoder.getExtMachInst(); + staticInst = StaticInstPtr(ext_inst); + if (staticInst->isMacroOp()) + macroop = staticInst; + } + if (macroop) { + staticInst = macroop->fetchMicroOp(fetch_MicroPC); + if (staticInst->isLastMicroOp()) + macroop = NULL; + } // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(ext_inst, - fetch_PC, fetch_NPC, - next_PC, next_NPC, + DynInstPtr instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, fetch_MicroPC, + next_PC, next_NPC, next_MicroPC, inst_seq, cpu); instruction->setTid(tid); @@ -1139,7 +1161,7 @@ DefaultFetch::fetch(bool &status_change) instruction->readPC()); ///FIXME This needs to be more robust in dealing with delay slots - lookupAndUpdateNextPC(instruction, next_PC, next_NPC); + lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC); predicted_branch |= (next_PC != fetch_NPC); // Add instruction to the CPU's list of instructions. @@ -1157,6 +1179,7 @@ DefaultFetch::fetch(bool &status_change) // Move to the next instruction, unless we have a branch. fetch_PC = next_PC; fetch_NPC = next_NPC; + fetch_MicroPC = next_MicroPC; if (instruction->isQuiesce()) { DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", @@ -1167,7 +1190,8 @@ DefaultFetch::fetch(bool &status_change) break; } - offset += instSize; + if (!macroop) + offset += instSize; } if (offset >= cacheBlkSize) { @@ -1191,7 +1215,7 @@ DefaultFetch::fetch(bool &status_change) if (fault == NoFault) { PC[tid] = next_PC; nextPC[tid] = next_NPC; - nextNPC[tid] = next_NPC + instSize; + microPC[tid] = next_MicroPC; DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); } else { // We shouldn't be in an icache miss and also have a fault (an ITB @@ -1210,8 +1234,9 @@ DefaultFetch::fetch(bool &status_change) // We will use a nop in order to carry the fault. ext_inst = TheISA::NoopMachInst; + StaticInstPtr staticInst = new StaticInst(ext_inst); // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, + DynInstPtr instruction = new DynInst(staticInst, fetch_PC, fetch_NPC, next_PC, next_NPC, inst_seq, cpu); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 050785818..399c44909 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -454,6 +454,7 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) #endif toCommit->nextPC[tid] = inst->readNextPC(); toCommit->nextNPC[tid] = inst->readNextNPC(); + toCommit->nextMicroPC[tid] = inst->readNextMicroPC(); toCommit->includeSquashInst[tid] = false; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 6e7180b1e..d78de2c87 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -963,6 +963,7 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg); } inst->flattenSrcReg(src_idx, flat_src_reg); @@ -979,9 +980,11 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) // See if the register is ready or not. if (scoreboard->getReg(renamed_reg) == true) { - DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid); + DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg); inst->markSrcRegReady(src_idx); + } else { + DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg); } ++renameRenameLookups; @@ -1008,6 +1011,7 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) // Floating point and Miscellaneous registers need their indexes // adjusted to account for the expanded number of flattened int regs. flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; + DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg); } inst->flattenDestReg(dest_idx, flat_dest_reg); diff --git a/src/cpu/o3/sparc/dyn_inst.hh b/src/cpu/o3/sparc/dyn_inst.hh index 72242b161..a7ab6cd79 100644 --- a/src/cpu/o3/sparc/dyn_inst.hh +++ b/src/cpu/o3/sparc/dyn_inst.hh @@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ - SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, - Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu); + SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); + + /** BaseDynInst constructor given a binary instruction. */ + SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ SparcDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/sparc/dyn_inst_impl.hh b/src/cpu/o3/sparc/dyn_inst_impl.hh index c4d30b6f4..6bfe97717 100644 --- a/src/cpu/o3/sparc/dyn_inst_impl.hh +++ b/src/cpu/o3/sparc/dyn_inst_impl.hh @@ -30,11 +30,24 @@ #include "cpu/o3/sparc/dyn_inst.hh" +template +SparcDynInst::SparcDynInst(StaticInstPtr staticInst, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(staticInst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) +{ + initVars(); +} + template SparcDynInst::SparcDynInst(TheISA::ExtMachInst inst, - Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC, + Addr PC, Addr NPC, Addr microPC, + Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC, InstSeqNum seq_num, O3CPU *cpu) - : BaseDynInst(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu) + : BaseDynInst(inst, PC, NPC, microPC, + Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu) { initVars(); } -- 2.30.2