From 5c4714c1a91680a0253f866958a9db80cd8decb2 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 25 Feb 2005 18:00:49 -0500 Subject: [PATCH] Initial light-weight OoO CPU checkin, along with gcc-3.4 fixes. SConscript: Include new files. arch/alpha/isa_desc: Make the eaCompPtr and memAccPtr non-const so that execute() can be called on them. arch/alpha/isa_traits.hh: Add enum for total number of data registers. arch/isa_parser.py: base/traceflags.py: Include new light-weight OoO CPU model. cpu/base_dyn_inst.cc: cpu/base_dyn_inst.hh: Changes to abstract more away from the base dyn inst class. cpu/beta_cpu/2bit_local_pred.cc: cpu/beta_cpu/2bit_local_pred.hh: cpu/beta_cpu/tournament_pred.cc: cpu/beta_cpu/tournament_pred.hh: Remove redundant SatCounter class. cpu/beta_cpu/alpha_dyn_inst.cc: cpu/beta_cpu/alpha_full_cpu.cc: cpu/beta_cpu/alpha_full_cpu.hh: cpu/beta_cpu/bpred_unit.cc: cpu/beta_cpu/inst_queue.cc: cpu/beta_cpu/mem_dep_unit.cc: cpu/beta_cpu/ras.cc: cpu/beta_cpu/rename_map.cc: cpu/beta_cpu/rename_map.hh: cpu/beta_cpu/rob.cc: Fix for gcc-3.4 cpu/beta_cpu/alpha_dyn_inst.hh: cpu/beta_cpu/alpha_dyn_inst_impl.hh: Fixes for gcc-3.4. Include more variables and functions that are specific to AlphaDynInst which were once in BaseDynInst. cpu/beta_cpu/alpha_full_cpu_builder.cc: Make params match the current params inherited from BaseCPU. cpu/beta_cpu/alpha_full_cpu_impl.hh: Fixes for gcc-3.4 cpu/beta_cpu/full_cpu.cc: Use new params pointer in BaseCPU. Fix for gcc-3.4. cpu/beta_cpu/full_cpu.hh: Use new params class from BaseCPU. cpu/beta_cpu/iew_impl.hh: Remove unused function. cpu/simple_cpu/simple_cpu.cc: Remove unused global variable. cpu/static_inst.hh: Include OoODynInst for new lightweight OoO CPU --HG-- extra : convert_revision : 34d9f2e64ca0313377391e0d059bf09c040286fa --- SConscript | 57 ++- arch/alpha/isa_desc | 8 +- arch/alpha/isa_traits.hh | 4 + arch/isa_parser.py | 3 + base/traceflags.py | 3 +- cpu/base_dyn_inst.cc | 121 +++-- cpu/base_dyn_inst.hh | 252 +++------- cpu/beta_cpu/2bit_local_pred.cc | 35 +- cpu/beta_cpu/2bit_local_pred.hh | 53 +-- cpu/beta_cpu/alpha_dyn_inst.cc | 2 +- cpu/beta_cpu/alpha_dyn_inst.hh | 135 +++++- cpu/beta_cpu/alpha_dyn_inst_impl.hh | 56 ++- cpu/beta_cpu/alpha_full_cpu.cc | 2 +- cpu/beta_cpu/alpha_full_cpu.hh | 26 +- cpu/beta_cpu/alpha_full_cpu_builder.cc | 8 +- cpu/beta_cpu/alpha_full_cpu_impl.hh | 118 ++--- cpu/beta_cpu/bpred_unit.cc | 2 +- cpu/beta_cpu/full_cpu.cc | 16 +- cpu/beta_cpu/full_cpu.hh | 22 +- cpu/beta_cpu/iew_impl.hh | 6 +- cpu/beta_cpu/inst_queue.cc | 3 +- cpu/beta_cpu/mem_dep_unit.cc | 2 +- cpu/beta_cpu/ras.cc | 5 +- cpu/beta_cpu/rename_map.cc | 4 + cpu/beta_cpu/rename_map.hh | 4 +- cpu/beta_cpu/rob.cc | 2 +- cpu/beta_cpu/sat_counter.cc | 43 ++ cpu/beta_cpu/sat_counter.hh | 62 +++ cpu/beta_cpu/tournament_pred.cc | 53 +-- cpu/beta_cpu/tournament_pred.hh | 53 +-- cpu/ooo_cpu/ea_list.cc | 50 ++ cpu/ooo_cpu/ea_list.hh | 44 ++ cpu/ooo_cpu/ooo_cpu.cc | 6 + cpu/ooo_cpu/ooo_cpu.hh | 613 +++++++++++++++++++++++++ cpu/ooo_cpu/ooo_impl.hh | 21 + cpu/simple_cpu/simple_cpu.cc | 3 - cpu/static_inst.hh | 15 +- 37 files changed, 1317 insertions(+), 595 deletions(-) create mode 100644 cpu/beta_cpu/sat_counter.cc create mode 100644 cpu/beta_cpu/sat_counter.hh create mode 100644 cpu/ooo_cpu/ea_list.cc create mode 100644 cpu/ooo_cpu/ea_list.hh create mode 100644 cpu/ooo_cpu/ooo_cpu.cc create mode 100644 cpu/ooo_cpu/ooo_cpu.hh create mode 100644 cpu/ooo_cpu/ooo_impl.hh diff --git a/SConscript b/SConscript index 4e4cb8727..94e788ce9 100644 --- a/SConscript +++ b/SConscript @@ -51,6 +51,7 @@ base_sources = Split(''' arch/alpha/full_cpu_exec.cc arch/alpha/faults.cc arch/alpha/isa_traits.cc + arch/alpha/ooo_cpu_exec.cc base/circlebuf.cc base/copyright.cc @@ -114,6 +115,7 @@ base_sources = Split(''' cpu/beta_cpu/rename.cc cpu/beta_cpu/rename_map.cc cpu/beta_cpu/rob.cc + cpu/beta_cpu/sat_counter.cc cpu/beta_cpu/store_set.cc cpu/beta_cpu/tournament_pred.cc cpu/fast_cpu/fast_cpu.cc @@ -136,30 +138,34 @@ base_sources = Split(''' cpu/full_cpu/ls_queue.cc cpu/full_cpu/machine_queue.cc cpu/full_cpu/pc_sample_profile.cc - cpu/full_cpu/pipetrace.cc - cpu/full_cpu/readyq.cc - cpu/full_cpu/reg_info.cc - cpu/full_cpu/rob_station.cc - cpu/full_cpu/spec_memory.cc - cpu/full_cpu/spec_state.cc - cpu/full_cpu/storebuffer.cc - cpu/full_cpu/writeback.cc - cpu/full_cpu/iq/iq_station.cc - cpu/full_cpu/iq/iqueue.cc - cpu/full_cpu/iq/segmented/chain_info.cc - cpu/full_cpu/iq/segmented/chain_wire.cc - cpu/full_cpu/iq/segmented/iq_seg.cc - cpu/full_cpu/iq/segmented/iq_segmented.cc - cpu/full_cpu/iq/segmented/seg_chain.cc - cpu/full_cpu/iq/seznec/iq_seznec.cc - cpu/full_cpu/iq/standard/iq_standard.cc - cpu/sampling_cpu/sampling_cpu.cc - cpu/simple_cpu/simple_cpu.cc - cpu/inorder_cpu/inorder_cpu.cc - cpu/trace/reader/mem_trace_reader.cc - cpu/trace/reader/ibm_reader.cc - cpu/trace/reader/itx_reader.cc - cpu/trace/reader/m5_reader.cc + cpu/full_cpu/pipetrace.cc + cpu/full_cpu/readyq.cc + cpu/full_cpu/reg_info.cc + cpu/full_cpu/rob_station.cc + cpu/full_cpu/spec_memory.cc + cpu/full_cpu/spec_state.cc + cpu/full_cpu/storebuffer.cc + cpu/full_cpu/writeback.cc + cpu/full_cpu/iq/iq_station.cc + cpu/full_cpu/iq/iqueue.cc + cpu/full_cpu/iq/segmented/chain_info.cc + cpu/full_cpu/iq/segmented/chain_wire.cc + cpu/full_cpu/iq/segmented/iq_seg.cc + cpu/full_cpu/iq/segmented/iq_segmented.cc + cpu/full_cpu/iq/segmented/seg_chain.cc + cpu/full_cpu/iq/seznec/iq_seznec.cc + cpu/full_cpu/iq/standard/iq_standard.cc + cpu/inorder_cpu/inorder_cpu.cc + cpu/ooo_cpu/ea_list.cc + cpu/ooo_cpu/ooo_cpu.cc + cpu/ooo_cpu/ooo_dyn_inst.cc + cpu/ooo_cpu/ooo_sim_obj.cc + cpu/sampling_cpu/sampling_cpu.cc + cpu/simple_cpu/simple_cpu.cc + cpu/trace/reader/mem_trace_reader.cc + cpu/trace/reader/ibm_reader.cc + cpu/trace/reader/itx_reader.cc + cpu/trace/reader/m5_reader.cc mem/base_hier.cc mem/base_mem.cc @@ -390,7 +396,8 @@ env.Command(Split('''arch/alpha/decoder.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/inorder_cpu_exec.cc - arch/alpha/full_cpu_exec.cc'''), + arch/alpha/full_cpu_exec.cc + arch/alpha/ooo_cpu_exec.cc'''), Split('''arch/alpha/isa_desc arch/isa_parser.py'''), '$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha') diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc index e7d0d68ce..1e92033dc 100644 --- a/arch/alpha/isa_desc +++ b/arch/alpha/isa_desc @@ -738,9 +738,9 @@ output header {{ /// Memory request flags. See mem_req_base.hh. unsigned memAccessFlags; /// Pointer to EAComp object. - const StaticInstPtr eaCompPtr; + StaticInstPtr eaCompPtr; /// Pointer to MemAcc object. - const StaticInstPtr memAccPtr; + StaticInstPtr memAccPtr; /// Constructor Memory(const char *mnem, MachInst _machInst, OpClass __opClass, @@ -755,8 +755,8 @@ output header {{ public: - const StaticInstPtr &eaCompInst() const { return eaCompPtr; } - const StaticInstPtr &memAccInst() const { return memAccPtr; } + StaticInstPtr &eaCompInst() { return eaCompPtr; } + StaticInstPtr &memAccInst() { return memAccPtr; } }; /** diff --git a/arch/alpha/isa_traits.hh b/arch/alpha/isa_traits.hh index ff3da1502..bf184b875 100644 --- a/arch/alpha/isa_traits.hh +++ b/arch/alpha/isa_traits.hh @@ -148,6 +148,10 @@ static const Addr PageOffset = PageBytes - 1; NumIntRegs + NumFloatRegs + NumMiscRegs + NumInternalProcRegs }; + enum { + TotalDataRegs = NumIntRegs + NumFloatRegs + }; + typedef union { IntReg intreg; FloatReg fpreg; diff --git a/arch/isa_parser.py b/arch/isa_parser.py index 8187cf188..5e0267c9e 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -642,6 +642,9 @@ CpuModel('FullCPU', 'full_cpu_exec.cc', CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', { 'CPU_exec_context': 'AlphaDynInst' }) +CpuModel('OoOCPU', 'ooo_cpu_exec.cc', + '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"', + { 'CPU_exec_context': 'OoODynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/base/traceflags.py b/base/traceflags.py index d1eb4478a..496647116 100644 --- a/base/traceflags.py +++ b/base/traceflags.py @@ -138,7 +138,8 @@ baseFlags = [ 'MemDepUnit', 'DynInst', 'FullCPU', - 'CommitRate' + 'CommitRate', + 'OoOCPU' ] # diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index 74f6b8a6c..b8424f576 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __BASE_DYN_INST_CC__ -#define __BASE_DYN_INST_CC__ +#ifndef __CPU_BASE_DYN_INST_CC__ +#define __CPU_BASE_DYN_INST_CC__ #include #include @@ -43,6 +43,8 @@ #include "cpu/base_dyn_inst.hh" #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/alpha_full_cpu.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" +#include "cpu/ooo_cpu/ooo_cpu.hh" using namespace std; @@ -74,92 +76,58 @@ BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, FullCPU *cpu) : staticInst(machInst), traceData(NULL), cpu(cpu), xc(cpu->xcBase()) { - DPRINTF(FullCPU, "DynInst: Creating new DynInst.\n"); + seqNum = seq_num; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + initVars(); +} + +template +BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ + initVars(); +} + +template +void +BaseDynInst::initVars() +{ effAddr = MemReq::inval_addr; physEffAddr = MemReq::inval_addr; readyRegs = 0; - seqNum = seq_num; - -// specMemWrite = false; - + completed = false; canIssue = false; issued = false; executed = false; canCommit = false; squashed = false; squashedInIQ = false; + eaCalcDone = false; blockingInst = false; recoverInst = false; - specMode = false; -// btbMissed = false; + // Eventually make this a parameter. threadNumber = 0; - // Also make this a parameter. - specMode = true; + // Also make this a parameter, or perhaps get it from xc or cpu. asid = 0; // Initialize the fault to be unimplemented opcode. fault = Unimplemented_Opcode_Fault; - PC = inst_PC; - nextPC = PC + sizeof(MachInst); - predPC = pred_PC; - - // Make sure to have the renamed register entries set to the same - // as the normal register entries. It will allow the IQ to work - // without any modifications. - for (int i = 0; i < staticInst->numDestRegs(); i++) - { - _destRegIdx[i] = staticInst->destRegIdx(i); - } - - for (int i = 0; i < staticInst->numSrcRegs(); i++) - { - _srcRegIdx[i] = staticInst->srcRegIdx(i); - _readySrcRegIdx[i] = 0; - } - ++instcount; -// assert(instcount < 50); - DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", instcount); } -template -BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) - : staticInst(_staticInst), traceData(NULL) -{ - effAddr = MemReq::inval_addr; - physEffAddr = MemReq::inval_addr; - -// specMemWrite = false; - - blockingInst = false; - recoverInst = false; - specMode = false; -// btbMissed = false; - - // Make sure to have the renamed register entries set to the same - // as the normal register entries. It will allow the IQ to work - // without any modifications. - for (int i = 0; i < staticInst->numDestRegs(); i++) - { - _destRegIdx[i] = staticInst->destRegIdx(i); - } - - for (int i = 0; i < staticInst->numSrcRegs(); i++) - { - _srcRegIdx[i] = staticInst->srcRegIdx(i); - } -} - template BaseDynInst::~BaseDynInst() { @@ -173,14 +141,14 @@ BaseDynInst::~BaseDynInst() DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", instcount); } - +/* template FunctionalMemory * BaseDynInst::getMemory(void) { return xc->mem; } -/* + template IntReg * BaseDynInst::getIntegerRegs(void) @@ -395,10 +363,35 @@ BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) #endif +template +bool +BaseDynInst::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + +// StaticInstPtr eaInst = staticInst->eaCompInst(); + + for (int i = 1; i < numSrcRegs(); ++i) + { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} + +// Forward declaration... +template class BaseDynInst; +template class BaseDynInst; + +template <> int BaseDynInst::instcount = 0; -// Forward declaration... -template BaseDynInst; +template <> +int +BaseDynInst::instcount = 0; -#endif // __BASE_DYN_INST_CC__ +#endif // __CPU_BASE_DYN_INST_CC__ diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 171721e61..943293b25 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -26,35 +26,32 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __BASE_DYN_INST_HH__ -#define __BASE_DYN_INST_HH__ +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ -#include #include +#include #include "base/fast_alloc.hh" #include "base/trace.hh" -#include "cpu/static_inst.hh" #include "cpu/beta_cpu/comm.hh" +#include "cpu/exetrace.hh" #include "cpu/full_cpu/bpred_update.hh" -#include "mem/functional_mem/main_memory.hh" -#include "cpu/full_cpu/spec_memory.hh" -#include "cpu/inst_seq.hh" #include "cpu/full_cpu/op_class.hh" +#include "cpu/full_cpu/spec_memory.hh" #include "cpu/full_cpu/spec_state.hh" +#include "cpu/inst_seq.hh" +#include "cpu/static_inst.hh" +#include "mem/functional_mem/main_memory.hh" /** * @file * Defines a dynamic instruction context. */ -namespace Trace { - class InstRecord; -}; - // Forward declaration. -template +template class StaticInstPtr; template @@ -90,8 +87,6 @@ class BaseDynInst : public FastAlloc, public RefCounted //////////////////////////////////////////// Trace::InstRecord *traceData; -// void setCPSeq(InstSeqNum seq); - template Fault read(Addr addr, T &data, unsigned flags); @@ -99,15 +94,12 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - - IntReg *getIntegerRegs(void); - FunctionalMemory *getMemory(void); - void prefetch(Addr addr, unsigned flags); void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); Fault copy(Addr dest); + // Probably should be private... public: /** Is this instruction valid. */ bool valid; @@ -118,6 +110,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** How many source registers are ready. */ unsigned readyRegs; + /** Is the instruction completed. */ + bool completed; + /** Can this instruction issue. */ bool canIssue; @@ -145,18 +140,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Is this a thread syncrhonization instruction. */ bool threadsyncWait; - /** If the BTB missed. */ -// bool btbMissed; - - /** The global history of this instruction (branch). */ -// unsigned globalHistory; - /** The thread this instruction is from. */ short threadNumber; - /** If instruction is speculative. */ - short specMode; - /** data address space ID, for loads & stores. */ short asid; @@ -190,14 +176,16 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The data to be stored. */ IntReg storeData; - /** Result of this instruction, if an integer. */ - uint64_t intResult; - - /** Result of this instruction, if a float. */ - float floatResult; + union Result { + uint64_t integer; + float fp; + double dbl; + }; - /** Result of this instruction, if a double. */ - double doubleResult; + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; /** PC of this instruction. */ Addr PC; @@ -214,28 +202,11 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Count of total number of dynamic instructions. */ static int instcount; - /** Did this instruction do a spec write? */ -// bool specMemWrite; - - private: - /** Physical register index of the destination registers of this - * instruction. - */ - PhysRegIndex _destRegIdx[MaxInstDestRegs]; - - /** Physical register index of the source registers of this - * instruction. + /** Whether or not the source register is ready. Not sure this should be + * here vs. the derived class. */ - PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; - - /** Whether or not the source register is ready. */ bool _readySrcRegIdx[MaxInstSrcRegs]; - /** Physical register index of the previous producers of the - * architected destinations. - */ - PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; - public: /** BaseDynInst constructor given a binary instruction. */ BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, @@ -247,14 +218,10 @@ class BaseDynInst : public FastAlloc, public RefCounted /** BaseDynInst destructor. */ ~BaseDynInst(); -#if 0 - Fault - mem_access(MemCmd cmd, // Read or Write access cmd - Addr addr, // virtual address of access - void *p, // input/output buffer - int nbytes); // access size -#endif + private: + void initVars(); + public: void trace_mem(Fault fault, // last fault MemCmd cmd, // last command @@ -278,7 +245,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool doneTargCalc() { return false; } /** Returns the calculated target of the branch. */ - Addr readCalcTarg() { return nextPC; } +// Addr readCalcTarg() { return nextPC; } Addr readNextPC() { return nextPC; } @@ -296,16 +263,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether the instruction mispredicted. */ bool mispredicted() { return (predPC != nextPC); } -/* - unsigned readGlobalHist() { - return globalHistory; - } - - void setGlobalHist(unsigned history) { - globalHistory = history; - } -*/ - // // Instruction types. Forward checks to StaticInst object. // @@ -331,6 +288,12 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } int8_t numDestRegs() const { return staticInst->numDestRegs(); } @@ -351,52 +314,9 @@ class BaseDynInst : public FastAlloc, public RefCounted return staticInst->srcRegIdx(i); } - /** Returns the physical register index of the i'th destination - * register. - */ - PhysRegIndex renamedDestRegIdx(int idx) const - { - return _destRegIdx[idx]; - } - - /** Returns the physical register index of the i'th source register. */ - PhysRegIndex renamedSrcRegIdx(int idx) const - { - return _srcRegIdx[idx]; - } - - bool isReadySrcRegIdx(int idx) const - { - return _readySrcRegIdx[idx]; - } - - /** Returns the physical register index of the previous physical register - * that remapped to the same logical register index. - */ - PhysRegIndex prevDestRegIdx(int idx) const - { - return _prevDestRegIdx[idx]; - } - - /** Renames a destination register to a physical register. Also records - * the previous physical register that the logical register mapped to. - */ - void renameDestReg(int idx, - PhysRegIndex renamed_dest, - PhysRegIndex previous_rename) - { - _destRegIdx[idx] = renamed_dest; - _prevDestRegIdx[idx] = previous_rename; - } - - /** Renames a source logical register to the physical register which - * has/will produce that logical register's result. - * @todo: add in whether or not the source register is ready. - */ - void renameSrcReg(int idx, PhysRegIndex renamed_src) - { - _srcRegIdx[idx] = renamed_src; - } + uint64_t readIntResult() { return instResult.integer; } + float readFloatResult() { return instResult.fp; } + double readDoubleResult() { return instResult.dbl; } //Push to .cc file. /** Records that one of the source registers is ready. */ @@ -419,6 +339,15 @@ class BaseDynInst : public FastAlloc, public RefCounted } } + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + void setCompleted() { completed = true; } + + bool isCompleted() const { return completed; } + /** Sets this instruction as ready to issue. */ void setCanIssue() { canIssue = true; } @@ -429,13 +358,13 @@ class BaseDynInst : public FastAlloc, public RefCounted void setIssued() { issued = true; } /** Returns whether or not this instruction has issued. */ - bool isIssued() { return issued; } + bool isIssued() const { return issued; } /** Sets this instruction as executed. */ void setExecuted() { executed = true; } /** Returns whether or not this instruction has executed. */ - bool isExecuted() { return executed; } + bool isExecuted() const { return executed; } /** Sets this instruction as ready to commit. */ void setCanCommit() { canCommit = true; } @@ -456,82 +385,25 @@ class BaseDynInst : public FastAlloc, public RefCounted void setSquashedInIQ() { squashedInIQ = true; } /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() { return squashedInIQ; } - - /** Returns the opclass of this instruction. */ - OpClass opClass() const { return staticInst->opClass(); } - - /** Returns whether or not the BTB missed. */ -// bool btbMiss() const { return btbMissed; } - - /** Returns the branch target address. */ - Addr branchTarget() const { return staticInst->branchTarget(PC); } - - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - uint64_t readIntReg(StaticInst *si, int idx) - { - return cpu->readIntReg(_srcRegIdx[idx]); - } - - float readFloatRegSingle(StaticInst *si, int idx) - { - return cpu->readFloatRegSingle(_srcRegIdx[idx]); - } - - double readFloatRegDouble(StaticInst *si, int idx) - { - return cpu->readFloatRegDouble(_srcRegIdx[idx]); - } - - uint64_t readFloatRegInt(StaticInst *si, int idx) - { - return cpu->readFloatRegInt(_srcRegIdx[idx]); - } - /** @todo: Make results into arrays so they can handle multiple dest - * registers. - */ - void setIntReg(StaticInst *si, int idx, uint64_t val) - { - cpu->setIntReg(_destRegIdx[idx], val); - intResult = val; - } - - void setFloatRegSingle(StaticInst *si, int idx, float val) - { - cpu->setFloatRegSingle(_destRegIdx[idx], val); - floatResult = val; - } - - void setFloatRegDouble(StaticInst *si, int idx, double val) - { - cpu->setFloatRegDouble(_destRegIdx[idx], val); - doubleResult = val; - } - - void setFloatRegInt(StaticInst *si, int idx, uint64_t val) - { - cpu->setFloatRegInt(_destRegIdx[idx], val); - intResult = val; - } + bool isSquashedInIQ() const { return squashedInIQ; } /** Read the PC of this instruction. */ - Addr readPC() { return PC; } + const Addr readPC() const { return PC; } /** Set the next PC of this instruction (its actual target). */ void setNextPC(uint64_t val) { nextPC = val; } -// bool misspeculating() { return cpu->misspeculating(); } ExecContext *xcBase() { return xc; } + + private: + Addr instEffAddr; + bool eaCalcDone; + + public: + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + const Addr &getEA() const { return instEffAddr; } + bool doneEACalc() { return eaCalcDone; } + bool eaSrcsReady(); }; template @@ -589,8 +461,6 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) storeSize = sizeof(T); storeData = data; -// if (specMode) -// specMemWrite = true; MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); @@ -627,4 +497,4 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) return fault; } -#endif // __DYN_INST_HH__ +#endif // __CPU_BASE_DYN_INST_HH__ diff --git a/cpu/beta_cpu/2bit_local_pred.cc b/cpu/beta_cpu/2bit_local_pred.cc index ef7f23d49..e5bf9647f 100644 --- a/cpu/beta_cpu/2bit_local_pred.cc +++ b/cpu/beta_cpu/2bit_local_pred.cc @@ -1,36 +1,6 @@ #include "base/trace.hh" #include "cpu/beta_cpu/2bit_local_pred.hh" -DefaultBP::SatCounter::SatCounter(unsigned bits) - : maxVal((1 << bits) - 1), counter(0) -{ -} - -DefaultBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) - : maxVal((1 << bits) - 1), counter(initial_val) -{ - // Check to make sure initial value doesn't exceed the max counter value. - if (initial_val > maxVal) { - panic("BP: Initial counter value exceeds max size."); - } -} - -void -DefaultBP::SatCounter::increment() -{ - if(counter < maxVal) { - ++counter; - } -} - -void -DefaultBP::SatCounter::decrement() -{ - if(counter > 0) { - --counter; - } -} - DefaultBP::DefaultBP(unsigned _localPredictorSize, unsigned _localCtrBits, unsigned _instShiftAmt) @@ -46,7 +16,10 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize, DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); // Setup the array of counters for the local predictor. - localCtrs = new SatCounter[localPredictorSize](localCtrBits); + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(_localCtrBits); DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", localPredictorSize); diff --git a/cpu/beta_cpu/2bit_local_pred.hh b/cpu/beta_cpu/2bit_local_pred.hh index 32a7972d0..cda7d3e65 100644 --- a/cpu/beta_cpu/2bit_local_pred.hh +++ b/cpu/beta_cpu/2bit_local_pred.hh @@ -1,8 +1,9 @@ -#ifndef __2BIT_LOCAL_PRED_HH__ -#define __2BIT_LOCAL_PRED_HH__ +#ifndef __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ +#define __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/sat_counter.hh" class DefaultBP { @@ -34,52 +35,6 @@ class DefaultBP inline unsigned getLocalIndex(Addr &PC); - /** - * Private counter class for the internal saturating counters. - * Implements an n bit saturating counter and provides methods to - * increment, decrement, and read it. - * @todo Consider making this something that more closely mimics a - * built in class so you can use ++ or --. - */ - class SatCounter - { - public: - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - */ - SatCounter(unsigned bits); - - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - * @param initial_val Starting value for each counter. - */ - SatCounter(unsigned bits, unsigned initial_val); - - /** - * Increments the counter's current value. - */ - void increment(); - - /** - * Decrements the counter's current value. - */ - void decrement(); - - /** - * Read the counter's value. - */ - uint8_t read() - { - return counter; - } - - private: - uint8_t maxVal; - uint8_t counter; - }; - /** Array of counters that make up the local predictor. */ SatCounter *localCtrs; @@ -96,4 +51,4 @@ class DefaultBP unsigned indexMask; }; -#endif // __2BIT_LOCAL_PRED_HH__ +#endif // __CPU_BETA_CPU_2BIT_LOCAL_PRED_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst.cc b/cpu/beta_cpu/alpha_dyn_inst.cc index 1bfcb8420..d929da1cf 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.cc +++ b/cpu/beta_cpu/alpha_dyn_inst.cc @@ -4,4 +4,4 @@ // Force instantiation of AlphaDynInst for all the implementations that // are needed. -template AlphaDynInst; +template class AlphaDynInst; diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index 584e027d7..b2f0d703e 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -47,11 +47,11 @@ class AlphaDynInst : public BaseDynInst /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); - /** Executes the instruction. */ + /** Executes the instruction. Why the hell did I put this here? */ Fault execute() { - fault = staticInst->execute(this, traceData); - return fault; + this->fault = this->staticInst->execute(this, this->traceData); + return this->fault; } public: @@ -74,6 +74,135 @@ class AlphaDynInst : public BaseDynInst void syscall(); #endif + + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + return this->cpu->readFloatRegSingle(_srcRegIdx[idx]); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + return this->cpu->readFloatRegDouble(_srcRegIdx[idx]); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + return this->cpu->readFloatRegInt(_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + this->cpu->setFloatRegSingle(_destRegIdx[idx], val); + this->instResult.fp = val; + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + this->cpu->setFloatRegDouble(_destRegIdx[idx], val); + this->instResult.dbl = val; + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + this->cpu->setFloatRegInt(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } }; #endif // __CPU_BETA_CPU_ALPHA_DYN_INST_HH__ diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh index 8311067db..4a3ae99d4 100644 --- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -4,42 +4,68 @@ template AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, FullCPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) { + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } + } template AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) - : BaseDynInst(_staticInst) + : BaseDynInst(_staticInst) { + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < _staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = _staticInst->destRegIdx(i); + } + + for (int i = 0; i < _staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = _staticInst->srcRegIdx(i); + } } template uint64_t AlphaDynInst::readUniq() { - return cpu->readUniq(); + return this->cpu->readUniq(); } template void AlphaDynInst::setUniq(uint64_t val) { - cpu->setUniq(val); + this->cpu->setUniq(val); } template uint64_t AlphaDynInst::readFpcr() { - return cpu->readFpcr(); + return this->cpu->readFpcr(); } template void AlphaDynInst::setFpcr(uint64_t val) { - cpu->setFpcr(val); + this->cpu->setFpcr(val); } #ifdef FULL_SYSTEM @@ -47,63 +73,63 @@ template uint64_t AlphaDynInst::readIpr(int idx, Fault &fault) { - return cpu->readIpr(idx, fault); + return this->cpu->readIpr(idx, fault); } template Fault AlphaDynInst::setIpr(int idx, uint64_t val) { - return cpu->setIpr(idx, val); + return this->cpu->setIpr(idx, val); } template Fault AlphaDynInst::hwrei() { - return cpu->hwrei(); + return this->cpu->hwrei(); } template int AlphaDynInst::readIntrFlag() { -return cpu->readIntrFlag(); +return this->cpu->readIntrFlag(); } template void AlphaDynInst::setIntrFlag(int val) { - cpu->setIntrFlag(val); + this->cpu->setIntrFlag(val); } template bool AlphaDynInst::inPalMode() { - return cpu->inPalMode(); + return this->cpu->inPalMode(); } template void AlphaDynInst::trap(Fault fault) { - cpu->trap(fault); + this->cpu->trap(fault); } template bool AlphaDynInst::simPalCheck(int palFunc) { - return cpu->simPalCheck(palFunc); + return this->cpu->simPalCheck(palFunc); } #else template void AlphaDynInst::syscall() { - cpu->syscall(); + this->cpu->syscall(); } #endif diff --git a/cpu/beta_cpu/alpha_full_cpu.cc b/cpu/beta_cpu/alpha_full_cpu.cc index 80c4bdec8..ee461eb13 100644 --- a/cpu/beta_cpu/alpha_full_cpu.cc +++ b/cpu/beta_cpu/alpha_full_cpu.cc @@ -6,4 +6,4 @@ // Force instantiation of AlphaFullCPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template AlphaFullCPU; +template class AlphaFullCPU; diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index 92eebc82a..3c29dd277 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -87,22 +87,22 @@ class AlphaFullCPU : public FullBetaCPU // trying to rename source/destination registers... uint64_t readUniq() { - return regFile.readUniq(); + return this->regFile.readUniq(); } void setUniq(uint64_t val) { - regFile.setUniq(val); + this->regFile.setUniq(val); } uint64_t readFpcr() { - return regFile.readFpcr(); + return this->regFile.readFpcr(); } void setFpcr(uint64_t val) { - regFile.setFpcr(val); + this->regFile.setFpcr(val); } #ifdef FULL_SYSTEM @@ -127,13 +127,13 @@ class AlphaFullCPU : public FullBetaCPU // set the register. IntReg getSyscallArg(int i) { - return xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; + return this->xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i]; } // used to shift args for indirect syscall void setSyscallArg(int i, IntReg val) { - xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; + this->xc->regs.intRegFile[AlphaISA::ArgumentReg0 + i] = val; } void setSyscallReturn(int64_t return_value) @@ -144,12 +144,12 @@ class AlphaFullCPU : public FullBetaCPU const int RegA3 = 19; // only place this is used if (return_value >= 0) { // no error - xc->regs.intRegFile[RegA3] = 0; - xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; + this->xc->regs.intRegFile[RegA3] = 0; + this->xc->regs.intRegFile[AlphaISA::ReturnValueReg] = return_value; } else { // got an error, return details - xc->regs.intRegFile[RegA3] = (IntReg) -1; - xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; + this->xc->regs.intRegFile[RegA3] = (IntReg) -1; + this->xc->regs.intRegFile[AlphaISA::ReturnValueReg] = -return_value; } } @@ -188,7 +188,7 @@ class AlphaFullCPU : public FullBetaCPU #endif Fault error; - error = mem->read(req, data); + error = this->mem->read(req, data); data = htoa(data); return error; } @@ -203,7 +203,7 @@ class AlphaFullCPU : public FullBetaCPU // If this is a store conditional, act appropriately if (req->flags & LOCKED) { - cregs = &xc->regs.miscRegs; + cregs = &this->xc->regs.miscRegs; if (req->flags & UNCACHEABLE) { // Don't update result register (see stq_c in isa_desc) @@ -241,7 +241,7 @@ class AlphaFullCPU : public FullBetaCPU #endif - return mem->write(req, (T)htoa(data)); + return this->mem->write(req, (T)htoa(data)); } }; diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc index f37081232..cf9536cb8 100644 --- a/cpu/beta_cpu/alpha_full_cpu_builder.cc +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -283,10 +283,10 @@ CREATE_SIM_OBJECT(BaseFullCPU) params.mem = mem; - params.maxInstsAnyThread = max_insts_any_thread; - params.maxInstsAllThreads = max_insts_all_threads; - params.maxLoadsAnyThread = max_loads_any_thread; - params.maxLoadsAllThreads = max_loads_all_threads; + params.max_insts_any_thread = max_insts_any_thread; + params.max_insts_all_threads = max_insts_all_threads; + params.max_loads_any_thread = max_loads_any_thread; + params.max_loads_all_threads = max_loads_all_threads; // // Caches diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index 611a0d80d..fccded193 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -14,17 +14,17 @@ template AlphaFullCPU::AlphaFullCPU(Params ¶ms) - : FullBetaCPU(params) + : FullBetaCPU(params) { DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); - fetch.setCPU(this); - decode.setCPU(this); - rename.setCPU(this); - iew.setCPU(this); - commit.setCPU(this); + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); - rob.setCPU(this); + this->rob.setCPU(this); } template @@ -32,12 +32,12 @@ void AlphaFullCPU::regStats() { // Register stats for everything that has stats. - fullCPURegStats(); - fetch.regStats(); - decode.regStats(); - rename.regStats(); - iew.regStats(); - commit.regStats(); + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); } #ifndef FULL_SYSTEM @@ -49,25 +49,25 @@ AlphaFullCPU::syscall() DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); // Commit stage needs to run as well. - commit.tick(); + this->commit.tick(); squashStages(); // Temporarily increase this by one to account for the syscall // instruction. - ++funcExeInst; + ++(this->funcExeInst); // Copy over all important state to xc once all the unrolling is done. copyToXC(); - process->syscall(xc); + this->process->syscall(this->xc); // Copy over all important state back to CPU. copyFromXC(); // Decrease funcExeInst by one as the normal commit will handle // incrememnting it. - --funcExeInst; + --(this->funcExeInst); } // This is not a pretty function, and should only be used if it is necessary @@ -77,40 +77,40 @@ template void AlphaFullCPU::squashStages() { - InstSeqNum rob_head = rob.readHeadSeqNum(); + InstSeqNum rob_head = this->rob.readHeadSeqNum(); // Now hack the time buffer to put this sequence number in the places // where the stages might read it. for (int i = 0; i < 5; ++i) { - timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; } - fetch.squash(rob.readHeadNextPC()); - fetchQueue.advance(); + this->fetch.squash(this->rob.readHeadNextPC()); + this->fetchQueue.advance(); - decode.squash(); - decodeQueue.advance(); + this->decode.squash(); + this->decodeQueue.advance(); - rename.squash(); - renameQueue.advance(); - renameQueue.advance(); + this->rename.squash(); + this->renameQueue.advance(); + this->renameQueue.advance(); // Be sure to advance the IEW queues so that the commit stage doesn't // try to set an instruction as completed at the same time that it // might be deleting it. - iew.squash(); - iewQueue.advance(); - iewQueue.advance(); + this->iew.squash(); + this->iewQueue.advance(); + this->iewQueue.advance(); - rob.squash(rob_head); - commit.setSquashing(); + this->rob.squash(rob_head); + this->commit.setSquashing(); // Now hack the time buffer to clear the sequence numbers in the places // where the stages might read it.? for (int i = 0; i < 5; ++i) { - timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; } } @@ -126,29 +126,31 @@ AlphaFullCPU::copyToXC() // First loop through the integer registers. for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = renameMap.lookup(i); - xc->regs.intRegFile[i] = regFile.readIntReg(renamed_reg); + renamed_reg = this->renameMap.lookup(i); + this->xc->regs.intRegFile[i] = this->regFile.readIntReg(renamed_reg); DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg]); + renamed_reg, this->regFile.intRegFile[renamed_reg]); } // Then loop through the floating point registers. for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - xc->regs.floatRegFile.d[i] = regFile.readFloatRegDouble(renamed_reg); - xc->regs.floatRegFile.q[i] = regFile.readFloatRegInt(renamed_reg); + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->xc->regs.floatRegFile.d[i] = + this->regFile.readFloatRegDouble(renamed_reg); + this->xc->regs.floatRegFile.q[i] = + this->regFile.readFloatRegInt(renamed_reg); } - xc->regs.miscRegs.fpcr = regFile.miscRegs.fpcr; - xc->regs.miscRegs.uniq = regFile.miscRegs.uniq; - xc->regs.miscRegs.lock_flag = regFile.miscRegs.lock_flag; - xc->regs.miscRegs.lock_addr = regFile.miscRegs.lock_addr; + this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs.fpcr; + this->xc->regs.miscRegs.uniq = this->regFile.miscRegs.uniq; + this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs.lock_flag; + this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs.lock_addr; - xc->regs.pc = rob.readHeadPC(); - xc->regs.npc = xc->regs.pc+4; + this->xc->regs.pc = this->rob.readHeadPC(); + this->xc->regs.npc = this->xc->regs.pc+4; - xc->func_exe_inst = funcExeInst; + this->xc->func_exe_inst = this->funcExeInst; } // This function will probably mess things up unless the ROB is empty and @@ -162,35 +164,37 @@ AlphaFullCPU::copyFromXC() // First loop through the integer registers. for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = renameMap.lookup(i); + renamed_reg = this->renameMap.lookup(i); DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " "now has data %lli.\n", - renamed_reg, regFile.intRegFile[renamed_reg], - xc->regs.intRegFile[i]); + renamed_reg, this->regFile.intRegFile[renamed_reg], + this->xc->regs.intRegFile[i]); - regFile.setIntReg(renamed_reg, xc->regs.intRegFile[i]); + this->regFile.setIntReg(renamed_reg, this->xc->regs.intRegFile[i]); } // Then loop through the floating point registers. for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - regFile.setFloatRegDouble(renamed_reg, xc->regs.floatRegFile.d[i]); - regFile.setFloatRegInt(renamed_reg, xc->regs.floatRegFile.q[i]); + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->regFile.setFloatRegDouble(renamed_reg, + this->xc->regs.floatRegFile.d[i]); + this->regFile.setFloatRegInt(renamed_reg, + this->xc->regs.floatRegFile.q[i]); } // Then loop through the misc registers. - regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + this->regFile.miscRegs.fpcr = this->xc->regs.miscRegs.fpcr; + this->regFile.miscRegs.uniq = this->xc->regs.miscRegs.uniq; + this->regFile.miscRegs.lock_flag = this->xc->regs.miscRegs.lock_flag; + this->regFile.miscRegs.lock_addr = this->xc->regs.miscRegs.lock_addr; // Then finally set the PC and the next PC. // regFile.pc = xc->regs.pc; // regFile.npc = xc->regs.npc; - funcExeInst = xc->func_exe_inst; + this->funcExeInst = this->xc->func_exe_inst; } #ifdef FULL_SYSTEM diff --git a/cpu/beta_cpu/bpred_unit.cc b/cpu/beta_cpu/bpred_unit.cc index c4a79fbbe..c1b0f54b2 100644 --- a/cpu/beta_cpu/bpred_unit.cc +++ b/cpu/beta_cpu/bpred_unit.cc @@ -3,4 +3,4 @@ #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/alpha_dyn_inst.hh" -template TwobitBPredUnit; +template class TwobitBPredUnit; diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index d5228601c..04c74393b 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -15,22 +15,10 @@ using namespace std; -#ifdef FULL_SYSTEM -BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(params.name, params.numberOfThreads, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads, - params._system, params.freq) -{ -} -#else BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(params.name, params.numberOfThreads, - params.maxInstsAnyThread, params.maxInstsAllThreads, - params.maxLoadsAnyThread, params.maxLoadsAllThreads) + : BaseCPU(¶ms) { } -#endif // FULL_SYSTEM template FullBetaCPU::TickEvent::TickEvent(FullBetaCPU *c) @@ -515,6 +503,6 @@ FullBetaCPU::wakeDependents(DynInstPtr &inst) } // Forward declaration of FullBetaCPU. -template FullBetaCPU; +template class FullBetaCPU; #endif // __SIMPLE_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 19eb972d9..8ce32b7c7 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -27,27 +27,7 @@ class BaseFullCPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: - class Params - { - public: -#ifdef FULL_SYSTEM - std::string name; - int numberOfThreads; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; - System *_system; - Tick freq; -#else - std::string name; - int numberOfThreads; - Counter maxInstsAnyThread; - Counter maxInstsAllThreads; - Counter maxLoadsAnyThread; - Counter maxLoadsAllThreads; -#endif // FULL_SYSTEM - }; + typedef BaseCPU::Params Params; #ifdef FULL_SYSTEM BaseFullCPU(Params ¶ms); diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index b718e6aa0..1d072ab33 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -244,10 +244,10 @@ SimpleIEW::squashDueToBranch(DynInstPtr &inst) // Also send PC update information back to prior stages. toCommit->squashedSeqNum = inst->seqNum; toCommit->mispredPC = inst->readPC(); - toCommit->nextPC = inst->readCalcTarg(); + toCommit->nextPC = inst->readNextPC(); toCommit->branchMispredict = true; // Prediction was incorrect, so send back inverse. - toCommit->branchTaken = inst->readCalcTarg() != + toCommit->branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(MachInst)); } @@ -265,7 +265,7 @@ SimpleIEW::squashDueToMem(DynInstPtr &inst) toCommit->squash = true; // Also send PC update information back to prior stages. toCommit->squashedSeqNum = inst->seqNum; - toCommit->nextPC = inst->readCalcTarg(); + toCommit->nextPC = inst->readNextPC(); } template diff --git a/cpu/beta_cpu/inst_queue.cc b/cpu/beta_cpu/inst_queue.cc index c4fd077bc..cd660ac79 100644 --- a/cpu/beta_cpu/inst_queue.cc +++ b/cpu/beta_cpu/inst_queue.cc @@ -4,7 +4,8 @@ #include "cpu/beta_cpu/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template InstructionQueue; +template class InstructionQueue; +template<> unsigned InstructionQueue::DependencyEntry::mem_alloc_counter = 0; diff --git a/cpu/beta_cpu/mem_dep_unit.cc b/cpu/beta_cpu/mem_dep_unit.cc index 3175997f6..d8b5a80eb 100644 --- a/cpu/beta_cpu/mem_dep_unit.cc +++ b/cpu/beta_cpu/mem_dep_unit.cc @@ -6,4 +6,4 @@ // Force instantation of memory dependency unit using store sets and // AlphaSimpleImpl. -template MemDepUnit; +template class MemDepUnit; diff --git a/cpu/beta_cpu/ras.cc b/cpu/beta_cpu/ras.cc index ca05f5a0d..23ca45b3a 100644 --- a/cpu/beta_cpu/ras.cc +++ b/cpu/beta_cpu/ras.cc @@ -4,7 +4,10 @@ ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) : numEntries(_numEntries), usedEntries(0), tos(0) { - addrStack = new Addr[numEntries](0); + addrStack = new Addr[numEntries]; + + for (int i = 0; i < numEntries; ++i) + addrStack[i] = 0; } void diff --git a/cpu/beta_cpu/rename_map.cc b/cpu/beta_cpu/rename_map.cc index 1301202f2..45b8084de 100644 --- a/cpu/beta_cpu/rename_map.cc +++ b/cpu/beta_cpu/rename_map.cc @@ -1,6 +1,10 @@ +#include + #include "cpu/beta_cpu/rename_map.hh" +using namespace std; + // Todo: Consider making functions inline. Avoid having things that are // using the zero register or misc registers from adding on the registers // to the free list. Possibly remove the direct communication between diff --git a/cpu/beta_cpu/rename_map.hh b/cpu/beta_cpu/rename_map.hh index 44a7eefb1..198cfc536 100644 --- a/cpu/beta_cpu/rename_map.hh +++ b/cpu/beta_cpu/rename_map.hh @@ -64,8 +64,8 @@ class SimpleRenameMap void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); - void squash(vector freed_regs, - vector unmaps); + void squash(std::vector freed_regs, + std::vector unmaps); int numFreeEntries(); diff --git a/cpu/beta_cpu/rob.cc b/cpu/beta_cpu/rob.cc index 611cca0ba..ad45c022f 100644 --- a/cpu/beta_cpu/rob.cc +++ b/cpu/beta_cpu/rob.cc @@ -4,4 +4,4 @@ #include "cpu/beta_cpu/rob_impl.hh" // Force instantiation of InstructionQueue. -template ROB; +template class ROB; diff --git a/cpu/beta_cpu/sat_counter.cc b/cpu/beta_cpu/sat_counter.cc new file mode 100644 index 000000000..da095c3e1 --- /dev/null +++ b/cpu/beta_cpu/sat_counter.cc @@ -0,0 +1,43 @@ +#include "base/misc.hh" +#include "cpu/beta_cpu/sat_counter.hh" + +SatCounter::SatCounter() + : maxVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} + +void +SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} diff --git a/cpu/beta_cpu/sat_counter.hh b/cpu/beta_cpu/sat_counter.hh new file mode 100644 index 000000000..e0f23e13e --- /dev/null +++ b/cpu/beta_cpu/sat_counter.hh @@ -0,0 +1,62 @@ +#ifndef __CPU_BETA_CPU_SAT_COUNTER_HH__ +#define __CPU_BETA_CPU_SAT_COUNTER_HH__ + +#include + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ + SatCounter(); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Sets the number of bits. + */ + void setBits(unsigned bits); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + const uint8_t read() const + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; +}; + +#endif // __CPU_BETA_CPU_SAT_COUNTER_HH__ diff --git a/cpu/beta_cpu/tournament_pred.cc b/cpu/beta_cpu/tournament_pred.cc index 53a11326a..5a22278eb 100644 --- a/cpu/beta_cpu/tournament_pred.cc +++ b/cpu/beta_cpu/tournament_pred.cc @@ -1,35 +1,5 @@ #include "cpu/beta_cpu/tournament_pred.hh" -TournamentBP::SatCounter::SatCounter(unsigned bits) - : maxVal((1 << bits) - 1), counter(0) -{ -} - -TournamentBP::SatCounter::SatCounter(unsigned bits, unsigned initial_val) - : maxVal((1 << bits) - 1), counter(initial_val) -{ - // Check to make sure initial value doesn't exceed the max counter value. - if (initial_val > maxVal) { - panic("BP: Initial counter value exceeds max size."); - } -} - -void -TournamentBP::SatCounter::increment() -{ - if (counter < maxVal) { - ++counter; - } -} - -void -TournamentBP::SatCounter::decrement() -{ - if (counter > 0) { - --counter; - } -} - TournamentBP::TournamentBP(unsigned _local_predictor_size, unsigned _local_ctr_bits, unsigned _local_history_table_size, @@ -54,21 +24,36 @@ TournamentBP::TournamentBP(unsigned _local_predictor_size, //Should do checks here to make sure sizes are correct (powers of 2) //Setup the array of counters for the local predictor - local_ctrs = new SatCounter[local_predictor_size](local_ctr_bits); + local_ctrs = new SatCounter[local_predictor_size]; + + for (int i = 0; i < local_predictor_size; ++i) + local_ctrs[i].setBits(local_ctr_bits); + //Setup the history table for the local table - local_history_table = new unsigned[local_history_table_size](0); + local_history_table = new unsigned[local_history_table_size]; + + for (int i = 0; i < local_history_table_size; ++i) + local_history_table[i] = 0; + // Setup the local history mask localHistoryMask = (1 << local_history_bits) - 1; //Setup the array of counters for the global predictor - global_ctrs = new SatCounter[global_predictor_size](global_ctr_bits); + global_ctrs = new SatCounter[global_predictor_size]; + + for (int i = 0; i < global_predictor_size; ++i) + global_ctrs[i].setBits(global_ctr_bits); + //Clear the global history global_history = 0; // Setup the global history mask globalHistoryMask = (1 << global_history_bits) - 1; //Setup the array of counters for the choice predictor - choice_ctrs = new SatCounter[choice_predictor_size](choice_ctr_bits); + choice_ctrs = new SatCounter[choice_predictor_size]; + + for (int i = 0; i < choice_predictor_size; ++i) + choice_ctrs[i].setBits(choice_ctr_bits); threshold = (1 << (local_ctr_bits - 1)) - 1; threshold = threshold / 2; diff --git a/cpu/beta_cpu/tournament_pred.hh b/cpu/beta_cpu/tournament_pred.hh index bf87d753b..1512abc78 100644 --- a/cpu/beta_cpu/tournament_pred.hh +++ b/cpu/beta_cpu/tournament_pred.hh @@ -1,8 +1,9 @@ -#ifndef __TOURNAMENT_PRED_HH__ -#define __TOURNAMENT_PRED_HH__ +#ifndef __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ +#define __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ // For Addr type. #include "arch/alpha/isa_traits.hh" +#include "cpu/beta_cpu/sat_counter.hh" class TournamentBP { @@ -48,52 +49,6 @@ class TournamentBP inline void updateHistoriesNotTaken(unsigned local_history_idx); - /** - * Private counter class for the internal saturating counters. - * Implements an n bit saturating counter and provides methods to - * increment, decrement, and read it. - * @todo Consider making this something that more closely mimics a - * built in class so you can use ++ or --. - */ - class SatCounter - { - public: - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - */ - SatCounter(unsigned bits); - - /** - * Constructor for the counter. - * @param bits How many bits the counter will have. - * @param initial_val Starting value for each counter. - */ - SatCounter(unsigned bits, unsigned initial_val); - - /** - * Increments the counter's current value. - */ - void increment(); - - /** - * Decrements the counter's current value. - */ - void decrement(); - - /** - * Read the counter's value. - */ - uint8_t read() - { - return counter; - } - - private: - uint8_t maxVal; - uint8_t counter; - }; - /** Local counters. */ SatCounter *local_ctrs; @@ -157,4 +112,4 @@ class TournamentBP unsigned threshold; }; -#endif // __TOURNAMENT_PRED_HH__ +#endif // __CPU_BETA_CPU_TOURNAMENT_PRED_HH__ diff --git a/cpu/ooo_cpu/ea_list.cc b/cpu/ooo_cpu/ea_list.cc new file mode 100644 index 000000000..4142e7f5e --- /dev/null +++ b/cpu/ooo_cpu/ea_list.cc @@ -0,0 +1,50 @@ + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ooo_cpu/ea_list.hh" + +void +EAList::addAddr(const InstSeqNum &new_sn, const Addr &new_ea) +{ + instEA newEA(new_sn, new_ea); + + eaList.push_back(newEA); +} + +void +EAList::clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear) +{ + eaListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first != sn_to_clear) { + assert((*list_it).second == ea_to_clear); + } +} + +bool +EAList::checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const +{ + const constEAListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first < check_sn) { + if ((*list_it).second == check_ea) { + return true; + } + } + + return false; +} + +void +EAList::clear() +{ + eaList.clear(); +} + +void +EAList::commit(const InstSeqNum &commit_sn) +{ + while (!eaList.empty() && eaList.front().first <= commit_sn) { + eaList.pop_front(); + } +} diff --git a/cpu/ooo_cpu/ea_list.hh b/cpu/ooo_cpu/ea_list.hh new file mode 100644 index 000000000..bc099d7f3 --- /dev/null +++ b/cpu/ooo_cpu/ea_list.hh @@ -0,0 +1,44 @@ +#ifndef __CPU_EA_LIST_HH__ +#define __CPU_EA_LIST_HH__ + +#include +#include + +#include "arch/alpha/isa_traits.hh" +#include "cpu/inst_seq.hh" + +/** + * Simple class to hold onto a list of pairs, each pair having a memory + * instruction's sequence number and effective addr. This list can be used + * for memory disambiguation. However, if I ever want to forward results, I + * may have to use a list that holds DynInstPtrs. Hence this may change in + * the future. + */ +class EAList { + private: + typedef std::pair instEA; + typedef std::list::iterator eaListIt; + typedef std::list::const_iterator constEAListIt; + + std::list eaList; + + public: + EAList() { } + ~EAList() { } + + void addAddr(const InstSeqNum &new_sn, const Addr &new_ea); + + void clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear); + + /** Checks if any instructions older than check_sn have a conflicting + * address with check_ea. Note that this function does not handle the + * sequence number rolling over. + */ + bool checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const; + + void clear(); + + void commit(const InstSeqNum &commit_sn); +}; + +#endif // __CPU_EA_LIST_HH__ diff --git a/cpu/ooo_cpu/ooo_cpu.cc b/cpu/ooo_cpu/ooo_cpu.cc new file mode 100644 index 000000000..255070de4 --- /dev/null +++ b/cpu/ooo_cpu/ooo_cpu.cc @@ -0,0 +1,6 @@ + +#include "cpu/ooo_cpu/ooo_cpu_impl.hh" +#include "cpu/ooo_cpu/ooo_dyn_inst.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" + +template class OoOCPU; diff --git a/cpu/ooo_cpu/ooo_cpu.hh b/cpu/ooo_cpu/ooo_cpu.hh new file mode 100644 index 000000000..25fdb39b6 --- /dev/null +++ b/cpu/ooo_cpu/ooo_cpu.hh @@ -0,0 +1,613 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_CPU_HH__ +#define __CPU_OOO_CPU_OOO_CPU_HH__ + +#include "base/statistics.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "cpu/full_cpu/fu_pool.hh" +#include "cpu/ooo_cpu/ea_list.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +// forward declarations +#ifdef FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class Checkpoint; +class MemInterface; + +namespace Trace { + class InstRecord; +} + +/** + * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with + * simple out-of-order capabilities added to it. It is still a 1 CPI machine + * (?), but is capable of handling cache misses. Basically it models having + * a ROB/IQ by only allowing a certain amount of instructions to execute while + * the cache miss is outstanding. + */ + +template +class OoOCPU : public BaseCPU +{ + private: + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::ISA ISA; + + public: + // main simulation loop (one cycle) + void tick(); + + private: + struct TickEvent : public Event + { + OoOCPU *cpu; + int width; + + TickEvent(OoOCPU *c, int w); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + private: + Trace::InstRecord *traceData; + + template + void trace_data(T data); + + public: + // + enum Status { + Running, + Idle, + IcacheMissStall, + IcacheMissComplete, + DcacheMissStall, + SwitchedOut + }; + + private: + Status _status; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + struct Params : public BaseCPU::Params + { + MemInterface *icache_interface; + MemInterface *dcache_interface; + int width; +#ifdef FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + FunctionalMemory *mem; +#else + Process *process; +#endif + int issueWidth; + }; + + OoOCPU(Params *params); + + virtual ~OoOCPU(); + + private: + void copyFromXC(); + + public: + // execution context + ExecContext *xc; + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + +#ifdef FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // L1 instruction cache + MemInterface *icacheInterface; + + // L1 data cache + MemInterface *dcacheInterface; + + FuncUnitPool *fuPool; + + // Refcounted pointer to the one memory request. + MemReqPtr cacheMemReq; + + class ICacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + + public: + ICacheCompletionEvent(OoOCPU *_cpu); + + virtual void process(); + virtual const char *description(); + }; + + // Will need to create a cache completion event upon any memory miss. + ICacheCompletionEvent iCacheCompletionEvent; + + class DCacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + DynInstPtr inst; + + public: + DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + Status status() const { return _status; } + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache misses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + void processICacheCompletion(); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#ifdef FULL_SYSTEM + bool validInstAddr(Addr addr) { return true; } + bool validDataAddr(Addr addr) { return true; } + int getInstAsid() { return xc->regs.instAsid(); } + int getDataAsid() { return xc->regs.dataAsid(); } + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + bool validInstAddr(Addr addr) + { return xc->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return xc->validDataAddr(addr); } + + int getInstAsid() { return xc->asid; } + int getDataAsid() { return xc->asid; } + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return No_Fault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template + Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); + + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst); + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + private: + bool executeInst(DynInstPtr &inst); + + void renameInst(DynInstPtr &inst); + + void addInst(DynInstPtr &inst); + + void commitHeadInst(); + + bool grabInst(); + + Fault fetchCacheLine(); + + InstSeqNum getAndIncrementInstSeq(); + + bool ambigMemAddr; + + private: + InstSeqNum globalSeqNum; + + DynInstPtr renameTable[ISA::TotalNumRegs]; + DynInstPtr commitTable[ISA::TotalNumRegs]; + + // Might need a table of the shadow registers as well. +#ifdef FULL_SYSTEM + DynInstPtr palShadowTable[ISA::NumIntRegs]; +#endif + + public: + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + // In the OoO case these shouldn't read from the XC but rather from the + // rename table of DynInsts. Also these likely shouldn't be called very + // often, other than when adding things into the xc during say a syscall. + + uint64_t readIntReg(StaticInst *si, int idx) + { + return xc->readIntReg(si->srcRegIdx(idx)); + } + + float readFloatRegSingle(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegSingle(reg_idx); + } + + double readFloatRegDouble(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegDouble(reg_idx); + } + + uint64_t readFloatRegInt(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegInt(reg_idx); + } + + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + xc->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatRegSingle(StaticInst *si, int idx, float val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegSingle(reg_idx, val); + } + + void setFloatRegDouble(StaticInst *si, int idx, double val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegDouble(reg_idx, val); + } + + void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegInt(reg_idx, val); + } + + uint64_t readPC() { return PC; } + void setNextPC(Addr val) { nextPC = val; } + + private: + Addr PC; + Addr nextPC; + + unsigned issueWidth; + + bool fetchRedirExcp; + bool fetchRedirBranch; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + protected: + bool cacheBlkValid; + + private: + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = ISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + unsigned instSize; + + // ROB tracking stuff. + DynInstPtr robHeadPtr; + DynInstPtr robTailPtr; + unsigned robInsts; + + // List of outstanding EA instructions. + protected: + EAList eaList; + + public: + void branchToTarget(Addr val) + { + if (!fetchRedirExcp) { + fetchRedirBranch = true; + PC = val; + } + } + + // ISA stuff: + uint64_t readUniq() { return xc->readUniq(); } + void setUniq(uint64_t val) { xc->setUniq(val); } + + uint64_t readFpcr() { return xc->readFpcr(); } + void setFpcr(uint64_t val) { xc->setFpcr(val); } + +#ifdef FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } + Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } + Fault hwrei() { return xc->hwrei(); } + int readIntrFlag() { return xc->readIntrFlag(); } + void setIntrFlag(int val) { xc->setIntrFlag(val); } + bool inPalMode() { return xc->inPalMode(); } + void ev5_trap(Fault fault) { xc->ev5_trap(fault); } + bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } +#else + void syscall() { xc->syscall(); } +#endif + + ExecContext *xcBase() { return xc; } +}; + + +// precise architected memory state accessor macros +template +template +Fault +OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) +{ + MemReqPtr readReq = new MemReq(); + readReq->xc = xc; + readReq->asid = 0; + readReq->data = new uint8_t[64]; + + readReq->reset(addr, sizeof(T), flags); + + // translate to physical address - This might be an ISA impl call + Fault fault = translateDataReadReq(readReq); + + // do functional access + if (fault == No_Fault) + fault = xc->mem->read(readReq, data); +#if 0 + if (traceData) { + traceData->setAddr(addr); + if (fault == No_Fault) + traceData->setData(data); + } +#endif + + // if we have a cache, do cache access too + if (fault == No_Fault && dcacheInterface) { + readReq->cmd = Read; + readReq->completionEvent = NULL; + readReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(readReq); + + if (dcacheInterface->doEvents()) { + readReq->completionEvent = new DCacheCompletionEvent(this, inst); + lastDcacheStall = curTick; + unscheduleTickEvent(); + _status = DcacheMissStall; + } + } + + if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); + + return fault; +} + +template +template +Fault +OoOCPU::write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst) +{ + MemReqPtr writeReq = new MemReq(); + writeReq->xc = xc; + writeReq->asid = 0; + writeReq->data = new uint8_t[64]; + +#if 0 + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } +#endif + + writeReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = xc->translateDataWriteReq(writeReq); + + // do functional access + if (fault == No_Fault) + fault = xc->write(writeReq, data); + + if (fault == No_Fault && dcacheInterface) { + writeReq->cmd = Write; + memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); + writeReq->completionEvent = NULL; + writeReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(writeReq); + + if (dcacheInterface->doEvents()) { + writeReq->completionEvent = new DCacheCompletionEvent(this, inst); + lastDcacheStall = curTick; + unscheduleTickEvent(); + _status = DcacheMissStall; + } + } + + if (res && (fault == No_Fault)) + *res = writeReq->result; + + if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); + + return fault; +} + + +#endif // __CPU_OOO_CPU_OOO_CPU_HH__ diff --git a/cpu/ooo_cpu/ooo_impl.hh b/cpu/ooo_cpu/ooo_impl.hh new file mode 100644 index 000000000..9e6df9214 --- /dev/null +++ b/cpu/ooo_cpu/ooo_impl.hh @@ -0,0 +1,21 @@ + +#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ +#define __CPU_OOO_CPU_OOO_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +template +class OoOCPU; + +template +class OoODynInst; + +struct OoOImpl { + typedef AlphaISA ISA; + typedef OoOCPU OoOCPU; + typedef OoOCPU FullCPU; + typedef OoODynInst DynInst; + typedef RefCountingPtr DynInstPtr; +}; + +#endif // __CPU_OOO_CPU_OOO_IMPL_HH__ diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc index 044ee9b9d..df78eb9a9 100644 --- a/cpu/simple_cpu/simple_cpu.cc +++ b/cpu/simple_cpu/simple_cpu.cc @@ -562,9 +562,6 @@ SimpleCPU::dbg_vtophys(Addr addr) } #endif // FULL_SYSTEM -Tick save_cycle = 0; - - void SimpleCPU::processCacheCompletion() { diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index b5022af5b..25c98b12a 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __STATIC_INST_HH__ -#define __STATIC_INST_HH__ +#ifndef __CPU_STATIC_INST_HH__ +#define __CPU_STATIC_INST_HH__ #include #include @@ -41,11 +41,16 @@ // forward declarations struct AlphaSimpleImpl; +struct OoOImpl; class ExecContext; class DynInst; + template class AlphaDynInst; +template +class OoODynInst; + class FastCPU; class SimpleCPU; class InorderCPU; @@ -255,7 +260,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the EA computation. */ - virtual const + virtual StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } /** @@ -264,7 +269,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the memory access (not the EA computation). */ - virtual const + virtual StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } /// The binary machine instruction. @@ -445,4 +450,4 @@ class StaticInstPtr : public RefCountingPtr > } }; -#endif // __STATIC_INST_HH__ +#endif // __CPU_STATIC_INST_HH__ -- 2.30.2