From: Korey Sewell Date: Sun, 23 Jul 2006 17:39:42 +0000 (-0400) Subject: This changeset gets the MIPS ISA pretty much working in the O3CPU. It builds, runs... X-Git-Tag: m5_2.0_beta1~56^2^2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=19ca97af79f3a40111991b4f8375592c7ede65fa;p=gem5.git This changeset gets the MIPS ISA pretty much working in the O3CPU. It builds, runs, and gets very very close to completing the hello world succesfully but there are some minor quirks to iron out. Who would've known a DELAY SLOT introduces that much complexity?! arrgh! Anyways, a lot of this stuff had to do with my project at MIPS and me needing to know how I was going to get this working for the MIPS ISA. So I figured I would try to touch it up and throw it in here (I hate to introduce non-completely working components... ) src/arch/alpha/isa/mem.isa: spacing src/arch/mips/faults.cc: src/arch/mips/faults.hh: Gabe really authored this src/arch/mips/isa/decoder.isa: add StoreConditional Flag to instruction src/arch/mips/isa/formats/basic.isa: Steven really did this file src/arch/mips/isa/formats/branch.isa: fix bug for uncond/cond control src/arch/mips/isa/formats/mem.isa: Adjust O3CPU memory access to use new memory model interface. src/arch/mips/isa/formats/util.isa: update LoadStoreBase template src/arch/mips/isa_traits.cc: update SERIALIZE partially src/arch/mips/process.cc: src/arch/mips/process.hh: no need for this for NOW. ASID/Virtual addressing handles it src/arch/mips/regfile/misc_regfile.hh: add in clear() function and comments for future usage of special misc. regs src/cpu/base_dyn_inst.hh: add in nextNPC variable and supporting functions. add isCondDelaySlot function Update predTaken and mispredicted functions src/cpu/base_dyn_inst_impl.hh: init nextNPC src/cpu/o3/SConscript: add MIPS files to compile src/cpu/o3/alpha/thread_context.hh: no need for my name on this file src/cpu/o3/bpred_unit_impl.hh: Update RAS appropriately for MIPS src/cpu/o3/comm.hh: add some extra communication variables to aid in handling the delay slots src/cpu/o3/commit.hh: minor name fix for nextNPC functions. src/cpu/o3/commit_impl.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/rename_impl.hh: Fix necessary variables and functions for squashes with delay slots src/cpu/o3/cpu.cc: Update function interface ... adjust removeInstsNotInROB function to recognize delay slots insts src/cpu/o3/cpu.hh: update removeInstsNotInROB src/cpu/o3/decode.hh: declare necessary variables for handling delay slot src/cpu/o3/dyn_inst.hh: Add in MipsDynInst src/cpu/o3/fetch.hh: src/cpu/o3/iew.hh: src/cpu/o3/rename.hh: declare necessary variables and adjust functions for handling delay slot src/cpu/o3/inst_queue.hh: src/cpu/simple/base.cc: no need for my name here src/cpu/o3/isa_specific.hh: add in MIPS files src/cpu/o3/scoreboard.hh: dont include alpha specific isa traits! src/cpu/o3/thread_context.hh: no need for my name here, i just rearranged where the file goes src/cpu/static_inst.hh: add isCondDelaySlot function src/cpu/o3/mips/cpu.cc: src/cpu/o3/mips/cpu.hh: src/cpu/o3/mips/cpu_builder.cc: src/cpu/o3/mips/cpu_impl.hh: src/cpu/o3/mips/dyn_inst.cc: src/cpu/o3/mips/dyn_inst.hh: src/cpu/o3/mips/dyn_inst_impl.hh: src/cpu/o3/mips/impl.hh: src/cpu/o3/mips/params.hh: src/cpu/o3/mips/thread_context.cc: src/cpu/o3/mips/thread_context.hh: MIPS file for O3CPU...mirrors ALPHA definition --HG-- extra : convert_revision : 9bb199b4085903e49ffd5a4c8ac44d11460d988c --- diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa index 08a0a2343..a5dda7fc6 100644 --- a/src/arch/alpha/isa/mem.isa +++ b/src/arch/alpha/isa/mem.isa @@ -668,7 +668,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + completeAccTemplate.subst(completeacc_iop)) }}; - def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc index cfeb045eb..2a8ab1df5 100644 --- a/src/arch/mips/faults.cc +++ b/src/arch/mips/faults.cc @@ -25,13 +25,15 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Gabe Black + * Korey Sewell */ #include "arch/mips/faults.hh" #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" + #if !FULL_SYSTEM #include "sim/process.hh" #include "mem/page_table.hh" @@ -110,35 +112,6 @@ FaultName IntegerOverflowFault::_name = "intover"; FaultVect IntegerOverflowFault::_vect = 0x0501; FaultStat IntegerOverflowFault::_count; -#if FULL_SYSTEM - -void MipsFault::invoke(ThreadContext * tc) -{ - FaultBase::invoke(tc); - countStat()++; - - // exception restart address - if (setRestartAddress() || !tc->inPalMode()) - tc->setMiscReg(MipsISA::IPR_EXC_ADDR, tc->readPC()); - - if (skipFaultingInstruction()) { - // traps... skip faulting instruction. - tc->setMiscReg(MipsISA::IPR_EXC_ADDR, - tc->readMiscReg(MipsISA::IPR_EXC_ADDR) + 4); - } - - tc->setPC(tc->readMiscReg(MipsISA::IPR_PAL_BASE) + vect()); - tc->setNextPC(tc->readPC() + sizeof(MachInst)); -} - -void ArithmeticFault::invoke(ThreadContext * tc) -{ - FaultBase::invoke(tc); - panic("Arithmetic traps are unimplemented!"); -} - -#else //!FULL_SYSTEM - void PageTableFault::invoke(ThreadContext *tc) { Process *p = tc->getProcessPtr(); @@ -159,6 +132,5 @@ void PageTableFault::invoke(ThreadContext *tc) } } -#endif } // namespace MipsISA diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh index 95c61cfbc..9d2c5df32 100644 --- a/src/arch/mips/faults.hh +++ b/src/arch/mips/faults.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Gabe Black + * Korey Sewell */ #ifndef __MIPS_FAULTS_HH__ diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 9ac982e34..13f6f9712 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -1089,7 +1089,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x0: StoreCond::sc({{ Mem.uw = Rt.uw;}}, {{ uint64_t tmp = write_result; Rt.uw = (tmp == 0 || tmp == 1) ? tmp : Rt.uw; - }}, mem_flags=LOCKED); + }}, mem_flags=LOCKED, inst_flags = IsStoreConditional); format StoreMemory { 0x1: swc1({{ Mem.uw = Ft.uw; }}); diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa index 29dafd541..29a445b2c 100644 --- a/src/arch/mips/isa/formats/basic.isa +++ b/src/arch/mips/isa/formats/basic.isa @@ -26,7 +26,8 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Korey Sewell +// Authors: Steve Reinhardt +// Korey Sewell // Declarations for execute() methods. def template BasicExecDeclare {{ @@ -85,7 +86,7 @@ def template BasicDecodeWithMnemonic {{ return new %(class_name)s("%(mnemonic)s", machInst); }}; -// The most basic instruction format... used only for a few misc. insts +// The most basic instruction format... def format BasicOp(code, *flags) {{ iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags) header_output = BasicDeclare.subst(iop) diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa index 5230ce9cc..9db3108b4 100644 --- a/src/arch/mips/isa/formats/branch.isa +++ b/src/arch/mips/isa/formats/branch.isa @@ -235,10 +235,11 @@ def format Branch(code,*opt_flags) {{ else: inst_flags += (x, ) + #Take into account uncond. branch instruction if 'cond == 1' in code: - inst_flags += ('IsCondControl', ) + inst_flags += ('IsUnCondControl', ) else: - inst_flags += ('IsUncondControl', ) + inst_flags += ('IsCondControl', ) #Condition code code = 'bool cond;\n' + code diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index f03f7becd..d6b0c2938 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -26,7 +26,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Gabe Black +// Authors: Steve Reinhardt // Korey Sewell //////////////////////////////////////////////////////////////////// @@ -162,7 +162,7 @@ def template InitiateAccDeclare {{ def template CompleteAccDeclare {{ - Fault completeAcc(uint8_t *, %(CPU_exec_context)s *, Trace::InstRecord *) const; + Fault completeAcc(Packet *, %(CPU_exec_context)s *, Trace::InstRecord *) const; }}; @@ -288,7 +288,7 @@ def template LoadInitiateAcc {{ def template LoadCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { @@ -297,7 +297,7 @@ def template LoadCompleteAcc {{ %(fp_enable_check)s; %(op_decl)s; - memcpy(&Mem, data, sizeof(Mem)); + Mem = pkt->get(); if (fault == NoFault) { %(memacc_code)s; @@ -390,7 +390,6 @@ def template StoreInitiateAcc {{ { Addr EA; Fault fault = NoFault; - uint64_t write_result = 0; %(fp_enable_check)s; %(op_decl)s; @@ -403,7 +402,7 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, - memAccessFlags, &write_result); + memAccessFlags, NULL); if (traceData) { traceData->setData(Mem); } } @@ -413,17 +412,38 @@ def template StoreInitiateAcc {{ def template StoreCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(fp_enable_check)s; + %(op_dest_decl)s; + + if (fault == NoFault) { + %(postacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template StoreCondCompleteAcc {{ + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; - uint64_t write_result = 0; %(fp_enable_check)s; %(op_dest_decl)s; - memcpy(&write_result, data, sizeof(write_result)); + uint64_t write_result = pkt->req->getScResult(); if (fault == NoFault) { %(postacc_code)s; @@ -489,7 +509,7 @@ def template MiscInitiateAcc {{ def template MiscCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { @@ -580,5 +600,5 @@ def format StoreCond(memacc_code, postacc_code, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - postacc_code, exec_template_base = 'Store') + postacc_code, exec_template_base = 'StoreCond') }}; diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index 0cc375af3..9f25c7725 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -65,7 +65,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, if (exec_template_base == 'Load'): initiateacc_cblk = CodeBlock(ea_code + memacc_code) completeacc_cblk = CodeBlock(memacc_code + postacc_code) - elif (exec_template_base == 'Store'): + elif (exec_template_base.startswith('Store')): initiateacc_cblk = CodeBlock(ea_code + memacc_code) completeacc_cblk = CodeBlock(postacc_code) else: @@ -83,7 +83,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, initiateacc_iop.memacc_code = memacc_cblk.code completeacc_iop.memacc_code = memacc_cblk.code completeacc_iop.postacc_code = postacc_cblk.code - elif (exec_template_base == 'Store'): + elif (exec_template_base.startswith('Store')): initiateacc_iop.ea_code = ea_cblk.code initiateacc_iop.memacc_code = memacc_cblk.code completeacc_iop.postacc_code = postacc_cblk.code @@ -104,6 +104,13 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, memacc_iop.constructor += s # select templates + + # define aliases... most StoreCond templates are the same as the + # corresponding Store templates (only CompleteAcc is different). + StoreCondMemAccExecute = StoreMemAccExecute + StoreCondExecute = StoreExecute + StoreCondInitiateAcc = StoreInitiateAcc + memAccExecTemplate = eval(exec_template_base + 'MemAccExecute') fullExecTemplate = eval(exec_template_base + 'Execute') initiateAccTemplate = eval(exec_template_base + 'InitiateAcc') @@ -118,7 +125,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + initiateAccTemplate.subst(initiateacc_iop) + completeAccTemplate.subst(completeacc_iop)) }}; - output header {{ std::string inst2string(MachInst machInst); }}; diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index a8b41270e..1ec4bde3e 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -73,9 +73,9 @@ void RegFile::serialize(std::ostream &os) { intRegFile.serialize(os); - //SERIALIZE_ARRAY(floatRegFile.q, NumFloatRegs); + //SERIALIZE_ARRAY(floatRegFile, NumFloatRegs); + //SERIALZE_ARRAY(miscRegFile); //SERIALIZE_SCALAR(miscRegs.fpcr); - //SERIALIZE_SCALAR(miscRegs.uniq); //SERIALIZE_SCALAR(miscRegs.lock_flag); //SERIALIZE_SCALAR(miscRegs.lock_addr); SERIALIZE_SCALAR(pc); @@ -88,9 +88,9 @@ void RegFile::unserialize(Checkpoint *cp, const std::string §ion) { intRegFile.unserialize(cp, section); - //UNSERIALIZE_ARRAY(floatRegFile.q, NumFloatRegs); + //UNSERIALIZE_ARRAY(floatRegFile); + //UNSERIALZE_ARRAY(miscRegFile); //UNSERIALIZE_SCALAR(miscRegs.fpcr); - //UNSERIALIZE_SCALAR(miscRegs.uniq); //UNSERIALIZE_SCALAR(miscRegs.lock_flag); //UNSERIALIZE_SCALAR(miscRegs.lock_addr); UNSERIALIZE_SCALAR(pc); diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index cb847fe04..031c2030e 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -41,8 +41,6 @@ using namespace std; using namespace MipsISA; -Addr MipsLiveProcess::stack_start = 0x7FFFFFFF; - MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, System *_system, int stdin_fd, int stdout_fd, int stderr_fd, std::vector &argv, std::vector &envp) @@ -51,11 +49,10 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, { // Set up stack. On MIPS, stack starts at the top of kuseg // user address space. MIPS stack grows down from here - stack_base = stack_start; + stack_base = 0x7FFFFFFF; // Set pointer for next thread stack. Reserve 8M for main stack. next_thread_stack_base = stack_base - (8 * 1024 * 1024); - stack_start = next_thread_stack_base; // Set up break point (Top of Heap) brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index 4baee134b..400591599 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -51,8 +51,6 @@ class MipsLiveProcess : public LiveProcess void startup(); - - static Addr stack_start; }; diff --git a/src/arch/mips/regfile/misc_regfile.hh b/src/arch/mips/regfile/misc_regfile.hh index 87961f97e..67aef9c63 100644 --- a/src/arch/mips/regfile/misc_regfile.hh +++ b/src/arch/mips/regfile/misc_regfile.hh @@ -45,12 +45,25 @@ namespace MipsISA protected: uint64_t fpcr; // floating point condition codes + // FPCR is not used in MIPS. Condition + // codes are kept as part of the FloatRegFile + bool lock_flag; // lock flag for LL/SC + // use LL reg. in the future + Addr lock_addr; // lock address for LL/SC + // use LLAddr reg. in the future MiscReg miscRegFile[NumMiscRegs]; public: + void clear() + { + fpcr = 0; + lock_flag = 0; + lock_addr = 0; + } + void copyMiscRegs(ThreadContext *tc); MiscReg readReg(int misc_reg) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9cc61f74c..40611abe6 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -215,6 +215,9 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr nextPC; + /** Next non-speculative NPC. Target PC for Mips or Sparc. */ + Addr nextNPC; + /** Predicted next PC. */ Addr predPC; @@ -275,6 +278,11 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr readNextPC() { return nextPC; } + /** Returns the next NPC. This could be the speculative next NPC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextNPC() { return nextNPC; } + /** Set the predicted target of this current instruction. */ void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } @@ -282,11 +290,20 @@ class BaseDynInst : public FastAlloc, public RefCounted Addr readPredTarg() { return predPC; } /** Returns whether the instruction was predicted taken or not. */ - bool predTaken() { return predPC != (PC + sizeof(MachInst)); } + bool predTaken() +#if THE_ISA == ALPHA_ISA + { return predPC != (PC + sizeof(MachInst)); } +#else + { return predPC != (nextPC + sizeof(MachInst)); } +#endif /** Returns whether the instruction mispredicted. */ - bool mispredicted() { return predPC != nextPC; } - + bool mispredicted() +#if THE_ISA == ALPHA_ISA + { return predPC != nextPC; } +#else + { return predPC != nextNPC; } +#endif // // Instruction types. Forward checks to StaticInst object. // @@ -308,6 +325,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } bool isCondCtrl() const { return staticInst->isCondCtrl(); } bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const @@ -545,6 +563,12 @@ class BaseDynInst : public FastAlloc, public RefCounted nextPC = val; } + /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ + void setNextNPC(uint64_t val) + { + nextNPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 91424faad..a344990b4 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -74,6 +74,7 @@ BaseDynInst::BaseDynInst(ExtMachInst machInst, Addr inst_PC, PC = inst_PC; nextPC = PC + sizeof(MachInst); + nextNPC = nextPC + sizeof(MachInst); predPC = pred_PC; initVars(); diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript index e65d41411..44882e5ec 100755 --- a/src/cpu/o3/SConscript +++ b/src/cpu/o3/SConscript @@ -52,15 +52,14 @@ if env['TARGET_ISA'] == 'alpha': alpha/cpu_builder.cc ''') elif env['TARGET_ISA'] == 'mips': - sys.exit('O3 CPU does not support MIPS') - #sources += Split(''' - # mips/dyn_inst.cc - # mips/cpu.cc - # mips/thread_context.cc - # mips/cpu_builder.cc - # ''') + sources += Split(''' + mips/dyn_inst.cc + mips/cpu.cc + mips/thread_context.cc + mips/cpu_builder.cc + ''') elif env['TARGET_ISA'] == 'sparc': - sys.exit('O3 CPU does not support MIPS') + sys.exit('O3 CPU does not support Sparc') #sources += Split(''' # sparc/dyn_inst.cc # sparc/cpu.cc diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index ad52b0d2e..773f08ecc 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #include "cpu/o3/thread_context.hh" diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 0da02145b..8dad42011 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -159,7 +159,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) void *bp_history = NULL; if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. BPUncond(bp_history); @@ -201,15 +201,20 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++BTBLookups; if (inst->isCall()) { - RAS[tid].push(PC + sizeof(MachInst)); +#if THE_ISA == ALPHA_ISA + Addr ras_pc = PC + sizeof(MachInst); // Next PC +#else + Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC +#endif + RAS[tid].push(ras_pc); // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. predict_record.wasCall = true; - DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call" + DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x was a call" ", adding %#x to the RAS.\n", - tid, inst->readPC(), PC + sizeof(MachInst)); + tid, inst->readPC(), ras_pc); } if (BTB.valid(PC, tid)) { @@ -242,7 +247,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size()); + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); return pred_taken; } @@ -251,8 +256,8 @@ template void BPredUnit::update(const InstSeqNum &done_sn, unsigned tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" - "number %lli.\n", tid, done_sn); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until " + "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index bf1bd08e8..e311910cf 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -88,6 +88,7 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; + bool condDelaySlotBranch[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; @@ -113,6 +114,7 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -165,6 +167,9 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; + bool squashDelaySlot; + //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 956b6ec3e..cb83012f7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -286,13 +286,11 @@ class DefaultCommit /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } -#if THE_ISA != ALPHA_ISA /** Reads the next NPC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } -#endif + void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } private: /** Time buffer interface. */ @@ -397,10 +395,8 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; -#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c667d633a..8a8035e73 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -120,7 +120,7 @@ DefaultCommit::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = 0; + PC[i] = nextPC[i] = nextNPC[i] = 0; } } @@ -723,14 +723,48 @@ DefaultCommit::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; - if (fromIEW->includeSquashInst[tid] == true) - squashed_inst--; +#if THE_ISA != ALPHA_ISA + InstSeqNum bdelay_done_seq_num; + bool squash_bdelay_slot; + + if (fromIEW->branchMispredict[tid]) { + if (fromIEW->branchTaken[tid] && + fromIEW->condDelaySlotBranch[tid]) { + DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch" + "mispredicted as taken. Squashing after previous " + "inst, [sn:%i]\n", + tid, squashed_inst); + bdelay_done_seq_num = squashed_inst; + squash_bdelay_slot = true; + } else { + DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing " + "after delay slot [sn:%i]\n", tid, squashed_inst+1); + bdelay_done_seq_num = squashed_inst + 1; + squash_bdelay_slot = false; + } + } else { + bdelay_done_seq_num = squashed_inst; + } +#endif + if (fromIEW->includeSquashInst[tid] == true) { + squashed_inst--; +#if THE_ISA != ALPHA_ISA + bdelay_done_seq_num--; +#endif + } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; +#if THE_ISA == ALPHA_ISA rob->squash(squashed_inst, tid); + toIEW->commitInfo[tid].squashDelaySlot = true; +#else + rob->squash(bdelay_done_seq_num, tid); + toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; + toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; +#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -840,6 +874,7 @@ DefaultCommit::commitInsts() } else { PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); + nextNPC[tid] = head_inst->readNextNPC(); // Increment the total number of non-speculative instructions // executed. @@ -868,7 +903,13 @@ DefaultCommit::commitInsts() } PC[tid] = nextPC[tid]; +#if THE_ISA == ALPHA_ISA nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); +#else + nextPC[tid] = nextNPC[tid]; + nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); +#endif + #if FULL_SYSTEM int count = 0; Addr oldpc; @@ -1069,6 +1110,8 @@ template void DefaultCommit::getInsts() { + DPRINTF(Commit, "Getting instructions from Rename stage.\n"); + // Read any renamed instructions and place them into the ROB. int insts_to_process = min((int)renameWidth, fromRename->size); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c43cc2cf8..af032132e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -667,11 +667,12 @@ FullO3CPU::removeThread(unsigned tid) } // Squash Throughout Pipeline - fetch.squash(0,tid); + InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; + fetch.squash(0, squash_seq_num, true, tid); decode.squash(tid); - rename.squash(tid); + rename.squash(squash_seq_num, tid); iew.squash(tid); - commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); + commit.rob->squash(squash_seq_num, tid); assert(iew.ldstQueue.getCount(tid) == 0); @@ -1101,7 +1102,6 @@ FullO3CPU::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } -#if THE_ISA != ALPHA_ISA template uint64_t FullO3CPU::readNextNPC(unsigned tid) @@ -1111,11 +1111,10 @@ FullO3CPU::readNextNPC(unsigned tid) template void -FullO3CPU::setNextNNPC(uint64_t val,unsigned tid) +FullO3CPU::setNextNPC(uint64_t val,unsigned tid) { commit.setNextNPC(val, tid); } -#endif template typename FullO3CPU::ListIt @@ -1165,7 +1164,9 @@ FullO3CPU::removeFrontInst(DynInstPtr &inst) template void -FullO3CPU::removeInstsNotInROB(unsigned tid) +FullO3CPU::removeInstsNotInROB(unsigned tid, + bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1196,6 +1197,12 @@ FullO3CPU::removeInstsNotInROB(unsigned tid) while (inst_it != end_it) { assert(!instList.empty()); +#if THE_ISA != ALPHA_ISA + if(!squash_delay_slot && + delay_slot_seq_num >= (*inst_it)->seqNum) { + break; + } +#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 83cb966e3..8c4f663ef 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -449,8 +449,10 @@ class FullO3CPU : public BaseO3CPU */ void removeFrontInst(DynInstPtr &inst); - /** Remove all instructions that are not currently in the ROB. */ - void removeInstsNotInROB(unsigned tid); + /** Remove all instructions that are not currently in the ROB. + * There's also an option to not squash delay slot instructions.*/ + void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 7f5ecbc26..4a845e670 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -276,6 +276,19 @@ class DefaultDecode /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/ + Addr bdelayDoneSeqNum[Impl::MaxThreads]; + + /** Instruction used for squashing branch (used for MIPS)*/ + DynInstPtr squashInst[Impl::MaxThreads]; + + /** Tells when their is a pending delay slot inst. to send + * to rename. If there is, then wait squash after the next + * instruction (used for MIPS). + */ + bool squashAfterDelaySlot[Impl::MaxThreads]; + + /** Stat for total number of idle cycles. */ Stats::Scalar<> decodeIdleCycles; /** Stat for total number of blocked cycles. */ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8b851c032..0bc6a109f 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -50,6 +50,8 @@ DefaultDecode::DefaultDecode(Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; + + squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -278,13 +280,25 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); +#if THE_ISA == ALPHA_ISA toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + InstSeqNum squash_seq_num = inst->seqNum; +#else + toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; + squashAfterDelaySlot[tid] = false; + + InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; +#endif + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { @@ -296,7 +310,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid && - fromFetch->insts[i]->seqNum > inst->seqNum) { + fromFetch->insts[i]->seqNum > squash_seq_num) { fromFetch->insts[i]->setSquashed(); } } @@ -304,15 +318,35 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif skidBuffer[tid].pop(); } // Squash instructions up until this one - cpu->removeInstsUntil(inst->seqNum, tid); + cpu->removeInstsUntil(squash_seq_num, tid); } template @@ -611,7 +645,7 @@ DefaultDecode::decode(bool &status_change, unsigned tid) // will allow, as long as it is not currently blocked. if (decodeStatus[tid] == Running || decodeStatus[tid] == Idle) { - DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run " + DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run " "stage.\n",tid); decodeInsts(tid); @@ -710,6 +744,9 @@ DefaultDecode::decodeInsts(unsigned tid) // Ensure that if it was predicted as a branch, it really is a // branch. if (inst->predTaken() && !inst->isControl()) { + DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC, + inst->nextPC + 4); + panic("Instruction predicted as a branch!"); ++decodeControlMispred; @@ -730,12 +767,43 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end +#if THE_ISA == ALPHA_ISA squash(inst, inst->threadNumber); inst->setPredTarg(inst->branchTarget()); - break; +#else + // If mispredicted as taken, then ignore delay slot + // instruction... else keep delay slot and squash + // after it is sent to rename + if (inst->predTaken() && inst->isCondDelaySlot()) { + DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." + "[sn:%i] PC %#x mispredicted as taken.\n", tid, + inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum; + squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); + break; + } else { + DPRINTF(Decode, "[tid:%i]: Misprediction detected at " + "[sn:%i] PC %#x, will squash after delay slot " + "inst. is sent to Rename\n", + tid, inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum + 1; + squashAfterDelaySlot[tid] = true; + squashInst[tid] = inst; + continue; + } +#endif } } + + if (squashAfterDelaySlot[tid]) { + assert(!inst->isSquashed()); + squash(squashInst[tid], squashInst[tid]->threadNumber); + squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget()); + assert(!inst->isSquashed()); + break; + } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index a2cdf2dba..5f7caf79f 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -34,12 +34,15 @@ #include "arch/isa_specific.hh" #if THE_ISA == ALPHA_ISA -template -class AlphaDynInst; - -struct AlphaSimpleImpl; - -typedef AlphaDynInst O3DynInst; + template class AlphaDynInst; + struct AlphaSimpleImpl; + typedef AlphaDynInst O3DynInst; +#elif THE_ISA == MIPS_ISA + template class MipsDynInst; + struct MipsSimpleImpl; + typedef MipsDynInst O3DynInst; +#else + #error "O3DynInst not defined for this ISA" #endif #endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 931919af8..1a2ca32a4 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -106,6 +106,7 @@ class DefaultFetch virtual void recvRetry(); }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -218,9 +219,10 @@ class DefaultFetch * @param next_PC Next PC variable passed in by reference. It is * expected to be set to the current PC; it will be updated with what * the next PC will be. + * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -255,7 +257,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const Addr &new_PC, unsigned tid); + void squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. @@ -340,14 +343,12 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** Per-thread next Next PC. * This is not a real register but is used for * architectures that use a branch-delay slot. * (such as MIPS or Sparc) */ Addr nextNPC[Impl::MaxThreads]; -#endif /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -360,6 +361,19 @@ class DefaultFetch /** Tracks how many instructions has been fetched this cycle. */ int numInst; + /** Tracks delay slot information for threads in ISAs which use + * delay slots; + */ + struct DelaySlotInfo { + InstSeqNum delaySlotSeqNum; + InstSeqNum branchSeqNum; + int numInsts; + Addr targetAddr; + bool targetReady; + }; + + DelaySlotInfo delaySlotInfo[Impl::MaxThreads]; + /** Source of possible stalls. */ struct Stalls { bool decode; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 4184e1867..54e35433b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -165,10 +165,15 @@ DefaultFetch::DefaultFetch(Params *params) cacheDataPC[tid] = 0; cacheDataValid[tid] = false; - stalls[tid].decode = 0; - stalls[tid].rename = 0; - stalls[tid].iew = 0; - stalls[tid].commit = 0; + delaySlotInfo[tid].branchSeqNum = -1; + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + + stalls[tid].decode = false; + stalls[tid].rename = false; + stalls[tid].iew = false; + stalls[tid].commit = false; } // Get the size of an instruction. @@ -288,6 +293,9 @@ DefaultFetch::setCPU(O3CPU *cpu_ptr) } #endif + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -426,6 +434,10 @@ DefaultFetch::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if THE_ISA != ALPHA_ISA nextNPC[i] = cpu->readNextNPC(i); + delaySlotInfo[i].branchSeqNum = -1; + delaySlotInfo[i].numInsts = 0; + delaySlotInfo[i].targetAddr = 0; + delaySlotInfo[i].targetReady = false; #endif fetchStatus[i] = Running; } @@ -474,7 +486,8 @@ DefaultFetch::switchToInactive() template bool -DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -482,12 +495,54 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) bool predict_taken; if (!inst->isControl()) { +#if THE_ISA == ALPHA_ISA next_PC = next_PC + instSize; inst->setPredTarg(next_PC); +#else + Addr cur_PC = next_PC; + next_PC = cur_PC + instSize; //next_NPC; + next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; + inst->setPredTarg(next_NPC); +#endif return false; } - predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + int tid = inst->threadNumber; +#if THE_ISA == ALPHA_ISA + predict_taken = branchPred.predict(inst, next_PC, tid); +#else + Addr pred_PC = next_PC; + predict_taken = branchPred.predict(inst, pred_PC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + } + + if (predict_taken) { + next_PC = next_NPC; + next_NPC = pred_PC; + + // Update delay slot info + ++delaySlotInfo[tid].numInsts; + delaySlotInfo[tid].targetAddr = pred_PC; + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, + delaySlotInfo[tid].numInsts); + } else { // !predict_taken + if (inst->isCondDelaySlot()) { + next_PC = pred_PC; + // The delay slot is skipped here if there is on + // prediction + } else { + next_PC = next_NPC; + // No need to declare a delay slot here since + // there is no for the pred. target to jump + } + + next_NPC = next_NPC + instSize; + } +#endif ++fetchedBranches; @@ -606,6 +661,7 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) PC[tid] = new_PC; nextPC[tid] = new_PC + instSize; + nextNPC[tid] = new_PC + (2 * instSize); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -639,6 +695,14 @@ DefaultFetch::squashFromDecode(const Addr &new_PC, doSquash(new_PC, tid); +#if THE_ISA != ALPHA_ISA + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } +#endif + // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); @@ -712,14 +776,26 @@ DefaultFetch::updateFetchStatus() template void -DefaultFetch::squash(const Addr &new_PC, unsigned tid) +DefaultFetch::squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, tid); +#if THE_ISA == ALPHA_ISA // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); + cpu->removeInstsNotInROB(tid, true, 0); +#else + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); +#endif } template @@ -828,8 +904,16 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // In any case, squash. - squash(fromCommit->commitInfo[tid].nextPC,tid); + squash(fromCommit->commitInfo[tid].nextPC, + doneSeqNum, + fromCommit->commitInfo[tid].squashDelaySlot, + tid); // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -876,9 +960,15 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) } if (fetchStatus[tid] != Squashing) { + +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; +#endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].doneSeqNum, + doneSeqNum, tid); return true; @@ -984,6 +1074,7 @@ DefaultFetch::fetch(bool &status_change) } Addr next_PC = fetch_PC; + Addr next_NPC = next_PC + instSize; InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -1002,10 +1093,13 @@ DefaultFetch::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; + // Need to keep track of whether or not a delay slot + // instruction has been fetched + for (; offset < cacheBlkSize && numInst < fetchWidth && - !predicted_branch; + (!predicted_branch || delaySlotInfo[tid].numInsts > 0); ++numInst) { // Get a sequence number. @@ -1042,7 +1136,8 @@ DefaultFetch::fetch(bool &status_change) instruction->staticInst, instruction->readPC(),tid); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, + next_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1068,7 +1163,41 @@ DefaultFetch::fetch(bool &status_change) break; } - offset+= instSize; + offset += instSize; + +#if THE_ISA != ALPHA_ISA + if (predicted_branch) { + delaySlotInfo[tid].branchSeqNum = inst_seq; + + DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", + tid, inst_seq); + continue; + } else if (delaySlotInfo[tid].numInsts > 0) { + --delaySlotInfo[tid].numInsts; + + // It's OK to set PC to target of branch + if (delaySlotInfo[tid].numInsts == 0) { + delaySlotInfo[tid].targetReady = true; + + // Break the looping condition + predicted_branch = true; + } + + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" + " process.\n", tid, delaySlotInfo[tid].numInsts); + } +#endif + } + + if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } } @@ -1085,12 +1214,19 @@ DefaultFetch::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; #else - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; - nextPC[tid] = next_PC + instSize; - - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + if (delaySlotInfo[tid].targetReady && + delaySlotInfo[tid].numInsts == 0) { + // Set PC to target + PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC + nextPC[tid] = next_PC + instSize; //next_NPC + nextNPC[tid] = next_PC + (2 * instSize); + + delaySlotInfo[tid].targetReady = false; + } else { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; + } #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 6c6be4787..76fa008ee 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -405,6 +405,9 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; + /** Keeps track of the last valid branch delay slot instss for threads */ + InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; + /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 684ae2295..c4ec93b3e 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -73,6 +73,7 @@ DefaultIEW::DefaultIEW(Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; + bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -428,13 +429,31 @@ DefaultIEW::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. +#if THE_ISA == ALPHA_ISA ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); - +#else + ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); +#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. - while (!skidBuffer[tid].empty()) { + DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", + tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + while (!skidBuffer[tid].empty()) { +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= + fromCommit->commitInfo[tid].bdelayDoneSeqNum) { + DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " + "that occur before delay slot [sn:%i].\n", + fromCommit->commitInfo[tid].bdelayDoneSeqNum, + tid); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " + "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); + } +#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,6 +464,8 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } + bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; + emptyRenameInsts(tid); } @@ -458,10 +479,26 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); - toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; + +#if THE_ISA == ALPHA_ISA toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toCommit->nextPC[tid] = inst->readNextPC(); +#else + bool branch_taken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toCommit->branchTaken[tid] = branch_taken; + + toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot(); + + if (inst->isCondDelaySlot() && branch_taken) { + toCommit->nextPC[tid] = inst->readNextPC(); + } else { + toCommit->nextPC[tid] = inst->readNextNPC(); + } +#endif toCommit->includeSquashInst[tid] = false; @@ -825,8 +862,10 @@ DefaultIEW::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); +#endif #endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); @@ -837,7 +876,23 @@ template void DefaultIEW::emptyRenameInsts(unsigned tid) { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " + "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { + DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" + " that occurs at or before delay slot [sn:%i].\n", + tid, bdelayDoneSeqNum[tid]); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " + "[sn:%i].\n", tid, insts[tid].front()->seqNum); + } +#endif + if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -1120,7 +1175,7 @@ DefaultIEW::dispatchInsts(unsigned tid) } if (!insts_to_dispatch.empty()) { - DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n"); + DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid); block(tid); toRename->iewUnblock[tid] = false; } @@ -1263,9 +1318,13 @@ DefaultIEW::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); +#if THE_ISA == ALPHA_ISA DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextPC); - +#else + DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextNPC); +#endif // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 4c69ca384..d745faf7b 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 36e0842be..8faae3ebf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -993,7 +993,11 @@ InstructionQueue::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. +#if THE_ISA == ALPHA_ISA squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; +#else + squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh index f8a9dd8cc..4937589e3 100755 --- a/src/cpu/o3/isa_specific.hh +++ b/src/cpu/o3/isa_specific.hh @@ -35,6 +35,11 @@ #include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/params.hh" #include "cpu/o3/alpha/dyn_inst.hh" +#elif THE_ISA == MIPS_ISA + #include "cpu/o3/mips/cpu.hh" + #include "cpu/o3/mips/impl.hh" + #include "cpu/o3/mips/params.hh" + #include "cpu/o3/mips/dyn_inst.hh" #else - #error "O3CPU doesnt support this ISA" + #error "ISA-specific header files O3CPU not defined ISA" #endif diff --git a/src/cpu/o3/mips/cpu.cc b/src/cpu/o3/mips/cpu.cc new file mode 100755 index 000000000..420f460b2 --- /dev/null +++ b/src/cpu/o3/mips/cpu.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/cpu_impl.hh" +#include "cpu/o3/mips/dyn_inst.hh" + +// Force instantiation of MipsO3CPU for all the implemntations that are +// needed. Consider merging this and mips_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (mips_impl.cc?). +template class MipsO3CPU; diff --git a/src/cpu/o3/mips/cpu.hh b/src/cpu/o3/mips/cpu.hh new file mode 100755 index 000000000..1813e8656 --- /dev/null +++ b/src/cpu/o3/mips/cpu.hh @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_CPU_HH__ +#define __CPU_O3_MIPS_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * MipsO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template +class MipsO3CPU : public FullO3CPU +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState ImplState; + typedef O3ThreadState Thread; + typedef typename Impl::Params Params; + + /** Constructs an MipsO3CPU with the given parameters. */ + MipsO3CPU(Params *params); + + /** Registers statistics. */ + void regStats(); + + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); + + /** CPU read function, forwards read to LSQ. */ + template + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_MIPS_CPU_HH__ diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc new file mode 100644 index 000000000..f1c3b33a5 --- /dev/null +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include + +#include "cpu/base.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/fu_pool.hh" +#include "sim/builder.hh" + +class DerivO3CPU : public MipsO3CPU +{ + public: + DerivO3CPU(MipsSimpleParams *p) + : MipsO3CPU(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +Param clock; +Param numThreads; +Param activity; + +SimObjectVectorParam workload; + +SimObjectParam mem; + +SimObjectParam checker; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +Param cachePorts; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param dispatchWidth; +Param issueWidth; +Param wbWidth; +Param wbDepth; +SimObjectParam fuPool; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; +Param trapLatency; + +Param backComSize; +Param forwardComSize; + +Param predType; +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM_DFLT(activity, "Initial activity count", 0), + + INIT_PARAM(workload, "Processes to run"), + + INIT_PARAM(mem, "Memory"), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), + INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + +CREATE_SIM_OBJECT(DerivO3CPU) +{ + DerivO3CPU *cpu; + + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + MipsSimpleParams *params = new MipsSimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + params->activity = activity; + + params->workload = workload; + + params->mem = mem; + + params->checker = checker; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->cachePorts = cachePorts; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; + params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; + params->fuPool = fuPool; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + params->trapLatency = trapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivO3CPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) + diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh new file mode 100644 index 000000000..6b9f3ae10 --- /dev/null +++ b/src/cpu/o3/mips/cpu_impl.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "config/use_checker.hh" + +#include "arch/mips/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/mips/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +using namespace TheISA; + +template +MipsO3CPU::MipsO3CPU(Params *params) + : FullO3CPU(params) +{ + DPRINTF(O3CPU, "Creating MipsO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + + + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); + + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + MipsTC *mips_tc = + new MipsTC; + + tc = mips_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext >( + mips_tc, this->checker); + } +#endif + + mips_tc->cpu = this; + mips_tc->thread = this->thread[i]; + + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template +void +MipsO3CPU::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template +MiscReg +MipsO3CPU::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template +MiscReg +MipsO3CPU::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template +Fault +MipsO3CPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template +Fault +MipsO3CPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template +void +MipsO3CPU::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +template +void +MipsO3CPU::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template +void +MipsO3CPU::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); + + DPRINTF(O3CPU, "[tid:%i] Register 2 is %i ", tid, this->readIntReg(2)); +} + +template +TheISA::IntReg +MipsO3CPU::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(MipsISA::ArgumentReg0 + i, tid); +} + +template +void +MipsO3CPU::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(MipsISA::ArgumentReg0 + i, val, tid); +} + +template +void +MipsO3CPU::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Mips syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/mips/dyn_inst.cc b/src/cpu/o3/mips/dyn_inst.cc new file mode 100755 index 000000000..216aa7d2c --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/dyn_inst_impl.hh" +#include "cpu/o3/mips/impl.hh" + +// Force instantiation of MipsDynInst for all the implementations that +// are needed. +template class MipsDynInst; diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh new file mode 100755 index 000000000..06bdfcec4 --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_DYN_INST_HH__ +#define __CPU_O3_MIPS_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" + +class Packet; + +/** + * Mostly implementation & ISA specific MipsDynInst. As with most + * other classes in the new CPU model, it is templated on the Impl to + * allow for passing in of all types, such as the CPU type and the ISA + * type. The MipsDynInst serves as the primary interface to the CPU + * for instructions that are executing. + */ +template +class MipsDynInst : public BaseDynInst +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::O3CPU O3CPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Extended machine instruction type. */ + typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + O3CPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + MipsDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute(); + + /** Initiates the access. Only valid for memory operations. */ + Fault initiateAcc(); + + /** Completes the access. Only valid for memory operations. */ + Fault completeAcc(Packet *pkt); + + private: + /** Initializes variables. */ + void initVars(); + + public: + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg) + { + return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return this->cpu->readMiscRegWithEffect(misc_reg, fault, + this->threadNumber); + } + + /** Sets a misc. register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return this->cpu->setMiscRegWithEffect(misc_reg, val, + this->threadNumber); + } + + /** Calls a syscall. */ + void syscall(int64_t callnum); + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + BaseDynInst::setIntReg(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + BaseDynInst::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + BaseDynInst::setFloatReg(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + BaseDynInst::setFloatRegBits(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + BaseDynInst::setFloatRegBits(si, idx, val); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + /** Does the memory access part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_MIPS_DYN_INST_HH__ + diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh new file mode 100755 index 000000000..57dec1ccf --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/mips/dyn_inst.hh" + +template +MipsDynInst::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + initVars(); +} + +template +MipsDynInst::MipsDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ + initVars(); +} + +template +void +MipsDynInst::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } +} + +template +Fault +MipsDynInst::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +MipsDynInst::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +MipsDynInst::completeAcc(Packet *pkt) +{ + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); + + return this->fault; +} + +template +void +MipsDynInst::syscall(int64_t callnum) +{ + this->cpu->syscall(callnum, this->threadNumber); +} + diff --git a/src/cpu/o3/mips/impl.hh b/src/cpu/o3/mips/impl.hh new file mode 100644 index 000000000..ac7181a19 --- /dev/null +++ b/src/cpu/o3/mips/impl.hh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_IMPL_HH__ +#define __CPU_O3_MIPS_IMPL_HH__ + +#include "arch/mips/isa_traits.hh" + +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/cpu_policy.hh" + + +// Forward declarations. +template +class MipsDynInst; + +template +class MipsO3CPU; + +/** Implementation specific struct that defines several key types to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific O3CPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct MipsSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used, which defines all of the CPU stages. */ + typedef SimpleCPUPolicy CPUPol; + + /** The DynInst type to be used. */ + typedef MipsDynInst DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr DynInstPtr; + + /** The O3CPU type to be used. */ + typedef MipsO3CPU O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; + + /** The Params to be passed to each stage. */ + typedef MipsSimpleParams Params; + + enum { + MaxWidth = 8, + MaxThreads = 4 + }; +}; + +/** The O3Impl to be used. */ +typedef MipsSimpleImpl O3CPUImpl; + +#endif // __CPU_O3_MIPS_IMPL_HH__ diff --git a/src/cpu/o3/mips/params.hh b/src/cpu/o3/mips/params.hh new file mode 100644 index 000000000..d1ac62e21 --- /dev/null +++ b/src/cpu/o3/mips/params.hh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_PARAMS_HH__ +#define __CPU_O3_MIPS_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +//class MipsDTB; +//class MipsITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the MipsO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class MipsSimpleParams : public O3Params +{ + public: + MipsSimpleParams() {} + +#if FULL_SYSTEM + //Full System Paramater Objects place here + MipsITB *itb; + MipsDTB *dtb; +#endif +}; + +#endif // __CPU_O3_MIPS_PARAMS_HH__ diff --git a/src/cpu/o3/mips/thread_context.cc b/src/cpu/o3/mips/thread_context.cc new file mode 100755 index 000000000..0061a2a63 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext; + diff --git a/src/cpu/o3/mips/thread_context.hh b/src/cpu/o3/mips/thread_context.hh new file mode 100644 index 000000000..5b03426f1 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template +class MipsTC : public O3ThreadContext +{ + public: + virtual uint64_t readNextNPC() + { + return this->cpu->readNextNPC(this->thread->readTid()); + } + + virtual void setNextNPC(uint64_t val) + { + this->cpu->setNextNPC(val, this->thread->readTid()); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Mips!"); } + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 034087feb..ba26a01dd 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -76,6 +76,7 @@ class DefaultRename // using a list instead of a queue. (Most other stages use a // queue) typedef std::list InstQueue; + typedef typename std::list::iterator ListIt; public: /** Overall rename status. Used to determine if the CPU can @@ -170,7 +171,7 @@ class DefaultRename void takeOverFrom(); /** Squashes all instructions in a thread. */ - void squash(unsigned tid); + void squash(const InstSeqNum &squash_seq_num, unsigned tid); /** Ticks rename, which processes all input signals and attempts to rename * as many instructions as possible. @@ -222,7 +223,7 @@ class DefaultRename bool unblock(unsigned tid); /** Executes actual squash, removing squashed instructions. */ - void doSquash(unsigned tid); + void doSquash(const InstSeqNum &squash_seq_num, unsigned tid); /** Removes a committed instruction's rename history. */ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 805a72808..ee251de00 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include @@ -318,7 +319,7 @@ DefaultRename::takeOverFrom() template void -DefaultRename::squash(unsigned tid) +DefaultRename::squash(const InstSeqNum &squash_seq_num, unsigned tid) { DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid); @@ -341,19 +342,55 @@ DefaultRename::squash(unsigned tid) unsigned squashCount = 0; for (int i=0; isize; i++) { - if (fromDecode->insts[i]->threadNumber == tid) { + if (fromDecode->insts[i]->threadNumber == tid && + fromDecode->insts[i]->seqNum > squash_seq_num) { fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } + } + // Clear the instruction list and skid buffer in case they have any + // insts in them. Since we support multiple ISAs, we cant just: + // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is + // a possible delay slot inst for different architectures + // insts[tid].clear(); +#if THE_ISA == ALPHA_ISA insts[tid].clear(); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " + "[sn:%i].\n",tid, squash_seq_num); + ListIt ilist_it = insts[tid].begin(); + while (ilist_it != insts[tid].end()) { + if ((*ilist_it)->seqNum > squash_seq_num) { + (*ilist_it)->setSquashed(); + DPRINTF(Rename, "Squashing incoming decode instruction, " + "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); + } + ilist_it++; + } +#endif // Clear the skid buffer in case it has any data in it. + // See comments above. + // skidBuffer[tid].clear(); +#if THE_ISA == ALPHA_ISA skidBuffer[tid].clear(); - - doSquash(tid); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " + "until [sn:%i].\n", tid, squash_seq_num); + ListIt slist_it = skidBuffer[tid].begin(); + while (slist_it != skidBuffer[tid].end()) { + if ((*slist_it)->seqNum > squash_seq_num) { + (*slist_it)->setSquashed(); + DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" + "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); + } + slist_it++; + } +#endif + doSquash(squash_seq_num, tid); } template @@ -572,7 +609,7 @@ DefaultRename::renameInsts(unsigned tid) if (inst->isSquashed()) { DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is " "squashed, skipping.\n", - tid, inst->seqNum, inst->threadNumber,inst->readPC()); + tid, inst->seqNum, inst->readPC()); ++renameSquashedInsts; @@ -707,8 +744,10 @@ DefaultRename::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i=0; i < numThreads; i++) assert(insts[i].empty()); +#endif #endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; @@ -824,12 +863,10 @@ DefaultRename::unblock(unsigned tid) template void -DefaultRename::doSquash(unsigned tid) +DefaultRename::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid) { typename list::iterator hb_it = historyBuffer[tid].begin(); - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; - // After a syscall squashes everything, the history buffer may be empty // but the ROB may still be squashing instructions. if (historyBuffer[tid].empty()) { @@ -963,8 +1000,9 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) historyBuffer[tid].push_front(hb_entry); - DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, " - "[sn:%lli].\n",tid, + DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer " + "(size=%i), [sn:%lli].\n",tid, + historyBuffer[tid].size(), (*historyBuffer[tid].begin()).instSeqNum); // Tell the instruction to rename the appropriate destination @@ -1143,7 +1181,13 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); - squash(tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif + + squash(squashed_seq_num, tid); return true; } diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh index f8e4df3b7..544084c40 100644 --- a/src/cpu/o3/scoreboard.hh +++ b/src/cpu/o3/scoreboard.hh @@ -35,7 +35,7 @@ #include #include #include -#include "arch/alpha/isa_traits.hh" +#include "arch/isa_traits.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index df8d1a6d8..9ca02b9f3 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -26,12 +26,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_THREAD_CONTEXT_HH__ #define __CPU_O3_THREAD_CONTEXT_HH__ +#include "cpu/thread_context.hh" #include "cpu/o3/isa_specific.hh" class EndQuiesceEvent; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 240696c2b..622455745 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt - * Korey Sewell */ #include "arch/utility.hh" @@ -455,6 +454,7 @@ BaseSimpleCPU::advancePC(Fault fault) #else thread->setNextPC(thread->readNextNPC()); thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + assert(thread->readNextPC() != thread->readNextNPC()); #endif } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index ea1a65148..0d356bb4c 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -214,6 +214,7 @@ class StaticInstBase : public RefCounted bool isIndirectCtrl() const { return flags[IsIndirectControl]; } bool isCondCtrl() const { return flags[IsCondControl]; } bool isUncondCtrl() const { return flags[IsUncondControl]; } + bool isCondDelaySlot() const { return flags[IsCondDelaySlot]; } bool isThreadSync() const { return flags[IsThreadSync]; } bool isSerializing() const { return flags[IsSerializing] ||