From 608641e23c7f2288810c3f23a1a63790b664f2ab Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Sun, 26 Jul 2015 10:21:20 -0500 Subject: [PATCH] cpu: implements vector registers This adds a vector register type. The type is defined as a std::array of a fixed number of uint64_ts. The isa_parser.py has been modified to parse vector register operands and generate the required code. Different cpus have vector register files now. --- src/arch/SConscript | 4 +- src/arch/alpha/isa.hh | 7 ++ src/arch/alpha/registers.hh | 10 ++- src/arch/alpha/utility.cc | 1 + src/arch/arm/insts/static_inst.cc | 2 + src/arch/arm/isa.hh | 7 ++ src/arch/arm/registers.hh | 10 ++- src/arch/arm/utility.cc | 3 + src/arch/isa_parser.py | 115 +++++++++++++++++++++++++++- src/arch/mips/isa.hh | 7 ++ src/arch/mips/registers.hh | 10 ++- src/arch/mips/utility.cc | 3 + src/arch/null/registers.hh | 2 + src/arch/power/insts/static_inst.cc | 2 + src/arch/power/isa.hh | 7 ++ src/arch/power/registers.hh | 10 ++- src/arch/power/utility.cc | 3 + src/arch/sparc/isa.hh | 7 ++ src/arch/sparc/registers.hh | 9 ++- src/arch/sparc/utility.cc | 3 + src/arch/x86/insts/static_inst.cc | 7 ++ src/arch/x86/isa.hh | 6 ++ src/arch/x86/registers.hh | 11 ++- src/arch/x86/utility.cc | 4 + src/cpu/StaticInstFlags.py | 7 +- src/cpu/base_dyn_inst.hh | 26 +++++++ src/cpu/checker/cpu.hh | 24 +++++- src/cpu/checker/cpu_impl.hh | 74 ++++++++++++------ src/cpu/checker/thread_context.hh | 16 ++++ src/cpu/exec_context.hh | 17 ++++ src/cpu/minor/dyn_inst.cc | 2 + src/cpu/minor/exec_context.hh | 43 +++++++---- src/cpu/minor/scoreboard.cc | 8 ++ src/cpu/minor/scoreboard.hh | 12 +-- src/cpu/o3/O3CPU.py | 7 ++ src/cpu/o3/cpu.cc | 76 +++++++++++++++++- src/cpu/o3/cpu.hh | 12 +++ src/cpu/o3/dyn_inst.hh | 19 ++++- src/cpu/o3/free_list.hh | 21 ++++- src/cpu/o3/inst_queue_impl.hh | 2 +- src/cpu/o3/regfile.cc | 26 ++++++- src/cpu/o3/regfile.hh | 52 ++++++++++++- src/cpu/o3/rename_impl.hh | 20 ++++- src/cpu/o3/rename_map.cc | 12 +++ src/cpu/o3/rename_map.hh | 41 +++++++++- src/cpu/o3/thread_context.hh | 12 +++ src/cpu/o3/thread_context_impl.hh | 23 ++++++ src/cpu/reg_class.cc | 1 + src/cpu/reg_class.hh | 6 +- src/cpu/simple/base.hh | 20 +++++ src/cpu/simple_thread.hh | 50 ++++++++++++ src/cpu/static_inst.hh | 12 ++- src/cpu/thread_context.cc | 30 ++++++++ src/cpu/thread_context.hh | 24 ++++++ src/sim/insttracer.hh | 9 ++- 55 files changed, 876 insertions(+), 78 deletions(-) diff --git a/src/arch/SConscript b/src/arch/SConscript index e0d6845f5..89ecdfa73 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -196,5 +196,7 @@ env.Append(BUILDERS = {'ScanISA' : DebugFlag('IntRegs') DebugFlag('FloatRegs') DebugFlag('CCRegs') +DebugFlag('VectorRegs') DebugFlag('MiscRegs') -CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ]) +CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'VectorRegs', + 'MiscRegs' ]) diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh index 6a88ee40b..b5964e622 100644 --- a/src/arch/alpha/isa.hh +++ b/src/arch/alpha/isa.hh @@ -114,6 +114,13 @@ namespace AlphaISA return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh index 3fd774cf7..665ea30c7 100644 --- a/src/arch/alpha/registers.hh +++ b/src/arch/alpha/registers.hh @@ -56,6 +56,12 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array VectorReg; + union AnyReg { IntReg intreg; @@ -95,6 +101,7 @@ const int NumFloatArchRegs = 32; const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs; const int NumFloatRegs = NumFloatArchRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; const int TotalNumRegs = @@ -106,7 +113,8 @@ enum DependenceTags { // 32..63 are the FP regs 0..31, i.e. use (reg + FP_Reg_Base) FP_Reg_Base = NumIntRegs, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Misc_Reg_Base = Vector_Reg_Base + NumCCRegs, // NumVectorRegs == 0 Max_Reg_Index = Misc_Reg_Base + NumMiscRegs + NumInternalProcRegs }; diff --git a/src/arch/alpha/utility.cc b/src/arch/alpha/utility.cc index 2dfe00f96..b0a503828 100644 --- a/src/arch/alpha/utility.cc +++ b/src/arch/alpha/utility.cc @@ -73,6 +73,7 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + assert(NumVectorRegs == 0); // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 9f878ac4d..417496579 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -337,6 +337,8 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const case CCRegClass: ccprintf(os, "cc_%s", ArmISA::ccRegName[rel_reg]); break; + case VectorRegClass: + panic("ARM ISA does not have any vector registers yet!"); } } diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index a07017c17..1e7edd637 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -287,6 +287,13 @@ namespace ArmISA return reg; } + int + flattenVectorIndex(int reg) const + { + assert(reg >= 0); + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 23fc20450..e57802e53 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -72,6 +72,12 @@ typedef uint64_t MiscReg; // condition code register; must be at least 32 bits for FpCondCodes typedef uint64_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array VectorReg; + // Constants Related to the number of registers const int NumIntArchRegs = NUM_ARCH_INTREGS; // The number of single precision floating point registers @@ -82,6 +88,7 @@ const int NumFloatSpecialRegs = 32; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumCCRegs = NUM_CCREGS; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; #define ISA_HAS_CC_REGS @@ -112,7 +119,8 @@ const int SyscallSuccessReg = ReturnValueReg; // These help enumerate all the registers for dependence tracking. const int FP_Reg_Base = NumIntRegs * (MODE_MAXMODE + 1); const int CC_Reg_Base = FP_Reg_Base + NumFloatRegs; -const int Misc_Reg_Base = CC_Reg_Base + NumCCRegs; +const int Vector_Reg_Base = CC_Reg_Base + NumCCRegs; +const int Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs; const int Max_Reg_Index = Misc_Reg_Base + NumMiscRegs; typedef union { diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index 34fcfd482..e1f9dfe04 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -156,6 +156,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) for (int i = 0; i < NumCCRegs; i++) dest->setCCReg(i, src->readCCReg(i)); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + for (int i = 0; i < NumMiscRegs; i++) dest->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i)); diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index f756161ea..5050d24d4 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -515,6 +515,9 @@ class Operand(object): def isCCReg(self): return 0 + def isVectorReg(self): + return 0 + def isControlReg(self): return 0 @@ -751,6 +754,106 @@ class CCRegOperand(Operand): return wb +class VectorRegOperand(Operand): + def isReg(self): + return 1 + + def isVectorReg(self): + return 1 + + def __init__(self, parser, full_name, ext, is_src, is_dest): + ## Vector registers are always treated as source registers since + ## not the whole of them might be written, in which case we need + ## to retain the earlier value. + super(VectorRegOperand, self).__init__(parser, full_name, ext, + True, is_dest) + self.size = 0 + + def finalize(self, predRead, predWrite): + self.flags = self.getFlags() + self.constructor = self.makeConstructor(predRead, predWrite) + self.op_decl = self.makeDecl() + + if self.is_src: + self.op_rd = self.makeRead(predRead) + self.op_src_decl = self.makeDecl() + else: + self.op_rd = '' + self.op_src_decl = '' + + if self.is_dest: + self.op_wb = self.makeWrite(predWrite) + self.op_dest_decl = self.makeDecl() + else: + self.op_wb = '' + self.op_dest_decl = '' + + def makeConstructor(self, predRead, predWrite): + c_src = '' + c_dest = '' + + if self.is_src: + c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + Vector_Reg_Base;' % \ + (self.reg_spec) + if self.hasReadPred(): + c_src = '\n\tif (%s) {%s\n\t}' % \ + (self.read_predicate, c_src) + + if self.is_dest: + c_dest = '\n\t_destRegIdx[_numDestRegs++] = %s + Vector_Reg_Base;' % \ + (self.reg_spec) + c_dest += '\n\t_numVectorDestRegs++;' + if self.hasWritePred(): + c_dest = '\n\tif (%s) {%s\n\t}' % \ + (self.write_predicate, c_dest) + + return c_src + c_dest + + def makeRead(self, predRead): + if self.read_code != None: + return self.buildReadCode('readVectorRegOperand') + + vector_reg_val = '' + if predRead: + vector_reg_val = 'xc->readVectorRegOperand(this, _sourceIndex++)' + if self.hasReadPred(): + vector_reg_val = '(%s) ? %s : 0' % \ + (self.read_predicate, vector_reg_val) + else: + vector_reg_val = 'xc->readVectorRegOperand(this, %d)' % \ + self.src_reg_idx + + return '%s = %s;\n' % (self.base_name, vector_reg_val) + + def makeWrite(self, predWrite): + if self.write_code != None: + return self.buildWriteCode('setVectorRegOperand') + + if predWrite: + wp = 'true' + if self.hasWritePred(): + wp = self.write_predicate + + wcond = 'if (%s)' % (wp) + windex = '_destIndex++' + else: + wcond = '' + windex = '%d' % self.dest_reg_idx + + wb = ''' + %s + { + TheISA::VectorReg final_val = %s; + xc->setVectorRegOperand(this, %s, final_val);\n + if (traceData) { traceData->setData(final_val); } + }''' % (wcond, self.base_name, windex) + + return wb + + def makeDecl(self): + ctype = 'TheISA::VectorReg' + return '%s %s;\n' % (ctype, self.base_name) + class ControlRegOperand(Operand): def isReg(self): return 1 @@ -818,7 +921,10 @@ class MemOperand(Operand): # Note that initializations in the declarations are solely # to avoid 'uninitialized variable' errors from the compiler. # Declare memory data variable. - return '%s %s = 0;\n' % (self.ctype, self.base_name) + if 'IsVector' in self.flags: + return 'TheISA::VectorReg %s;\n' % self.base_name + else: + return '%s %s = 0;\n' % (self.ctype, self.base_name) def makeRead(self, predRead): if self.read_code != None: @@ -909,6 +1015,7 @@ class OperandList(object): self.numFPDestRegs = 0 self.numIntDestRegs = 0 self.numCCDestRegs = 0 + self.numVectorDestRegs = 0 self.numMiscDestRegs = 0 self.memOperand = None @@ -931,6 +1038,8 @@ class OperandList(object): self.numIntDestRegs += 1 elif op_desc.isCCReg(): self.numCCDestRegs += 1 + elif op_desc.isVectorReg(): + self.numVectorDestRegs += 1 elif op_desc.isControlReg(): self.numMiscDestRegs += 1 elif op_desc.isMem(): @@ -1127,6 +1236,7 @@ class InstObjParams(object): header += '\n\t_numFPDestRegs = 0;' header += '\n\t_numIntDestRegs = 0;' header += '\n\t_numCCDestRegs = 0;' + header += '\n\t_numVectorDestRegs = 0;' self.constructor = header + \ self.operands.concatAttrStrings('constructor') @@ -2292,7 +2402,8 @@ StaticInstPtr operandsREString = r''' (? VectorReg; + typedef union { IntReg intreg; FloatReg fpreg; diff --git a/src/arch/mips/utility.cc b/src/arch/mips/utility.cc index 80047fbfd..92ca8c6f0 100644 --- a/src/arch/mips/utility.cc +++ b/src/arch/mips/utility.cc @@ -252,6 +252,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers for (int i = 0; i < NumMiscRegs; i++) dest->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i)); diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh index 1e52fc5a6..3f1524554 100644 --- a/src/arch/null/registers.hh +++ b/src/arch/null/registers.hh @@ -49,6 +49,8 @@ typedef uint32_t FloatRegBits; typedef float FloatReg; typedef uint8_t CCReg; typedef uint64_t MiscReg; +typedef uint64_t VectorRegElement; +typedef std::array VectorReg; } diff --git a/src/arch/power/insts/static_inst.cc b/src/arch/power/insts/static_inst.cc index 087e1f740..5bd16b40d 100644 --- a/src/arch/power/insts/static_inst.cc +++ b/src/arch/power/insts/static_inst.cc @@ -57,6 +57,8 @@ PowerStaticInst::printReg(std::ostream &os, int reg) const } case CCRegClass: panic("printReg: POWER does not implement CCRegClass\n"); + case VectorRegClass: + panic("printReg: POWER does not implement VectorRegClass\n"); } } diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh index aaf5bd92a..08ee82d5d 100644 --- a/src/arch/power/isa.hh +++ b/src/arch/power/isa.hh @@ -105,6 +105,13 @@ class ISA : public SimObject return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index abee516fc..1d0b4a21f 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -55,6 +55,12 @@ typedef uint64_t MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// typedefs for Vector registers +const int NumVectorRegElements = 0; +typedef uint64_t VectorRegElement; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array VectorReg; + // Constants Related to the number of registers const int NumIntArchRegs = 32; @@ -68,6 +74,7 @@ const int NumInternalProcRegs = 0; const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int NumMiscRegs = NUM_MISCREGS; // Semantically meaningful register indices @@ -90,7 +97,8 @@ const int SyscallSuccessReg = 3; // These help enumerate all the registers for dependence tracking. const int FP_Reg_Base = NumIntRegs; const int CC_Reg_Base = FP_Reg_Base + NumFloatRegs; -const int Misc_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Vector_Reg_Base = CC_Reg_Base + NumCCRegs; // NumCCRegs == 0 +const int Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs; // NumVectorRegs == 0 const int Max_Reg_Index = Misc_Reg_Base + NumMiscRegs; typedef union { diff --git a/src/arch/power/utility.cc b/src/arch/power/utility.cc index 7be195b8d..fa2a1d89b 100644 --- a/src/arch/power/utility.cc +++ b/src/arch/power/utility.cc @@ -51,6 +51,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh index 1d2a457d2..51e797c90 100644 --- a/src/arch/sparc/isa.hh +++ b/src/arch/sparc/isa.hh @@ -211,6 +211,13 @@ class ISA : public SimObject return reg; } + // dummy + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index b25f34584..a59139ba2 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -51,6 +51,11 @@ typedef uint32_t FloatRegBits; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array VectorReg; typedef union { @@ -75,6 +80,7 @@ const int SyscallPseudoReturnReg = 9; const int NumIntArchRegs = 32; const int NumIntRegs = (MaxGL + 1) * 8 + NWindows * 16 + NumMicroIntRegs; const int NumCCRegs = 0; +const int NumVectorRegs = 0; const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; @@ -82,7 +88,8 @@ const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; enum DependenceTags { FP_Reg_Base = NumIntRegs, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, // NumCCRegs == 0 + Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs, // NumVectorRegs == 0 Max_Reg_Index = Misc_Reg_Base + NumMiscRegs, }; diff --git a/src/arch/sparc/utility.cc b/src/arch/sparc/utility.cc index 34d4f79b3..6d7a1ba95 100644 --- a/src/arch/sparc/utility.cc +++ b/src/arch/sparc/utility.cc @@ -237,6 +237,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest) // Would need to add condition-code regs if implemented assert(NumCCRegs == 0); + // Copy vector registers when vector registers put to use. + assert(NumVectorRegs == 0); + // Copy misc. registers copyMiscRegs(src, dest); diff --git a/src/arch/x86/insts/static_inst.cc b/src/arch/x86/insts/static_inst.cc index 39091289f..49ea6ef4e 100644 --- a/src/arch/x86/insts/static_inst.cc +++ b/src/arch/x86/insts/static_inst.cc @@ -225,12 +225,19 @@ namespace X86ISA ccprintf(os, "%%cc%d", rel_reg); break; + case VectorRegClass: + ccprintf(os, "%%cc%d", rel_reg); + break; + case MiscRegClass: switch (rel_reg) { default: ccprintf(os, "%%ctrl%d", rel_reg); } break; + + default: + panic("Invalid register class!\n"); } } diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh index 88f4980ae..779241c55 100644 --- a/src/arch/x86/isa.hh +++ b/src/arch/x86/isa.hh @@ -91,6 +91,12 @@ namespace X86ISA return reg; } + int + flattenVectorIndex(int reg) const + { + return reg; + } + int flattenMiscIndex(int reg) const { diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index ebd88136e..ad40fe17f 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -57,6 +57,7 @@ const int NumMiscRegs = NUM_MISCREGS; const int NumIntArchRegs = NUM_INTREGS; const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs + NumImplicitIntRegs; const int NumCCRegs = NUM_CCREGS; +const int NumVectorRegs = 0; #define ISA_HAS_CC_REGS @@ -72,7 +73,8 @@ enum DependenceTags { // we just start at (1 << 7) == 128. FP_Reg_Base = 128, CC_Reg_Base = FP_Reg_Base + NumFloatRegs, - Misc_Reg_Base = CC_Reg_Base + NumCCRegs, + Vector_Reg_Base = CC_Reg_Base + NumCCRegs, + Misc_Reg_Base = Vector_Reg_Base + NumVectorRegs, Max_Reg_Index = Misc_Reg_Base + NumMiscRegs }; @@ -91,6 +93,13 @@ const int SyscallPseudoReturnReg = INTREG_RDX; typedef uint64_t IntReg; typedef uint64_t CCReg; + +// vector register file entry type +typedef uint64_t VectorRegElement; +const int NumVectorRegElements = 0; +const int VectorRegBytes = NumVectorRegElements * sizeof(VectorRegElement); +typedef std::array VectorReg; + //XXX Should this be a 128 bit structure for XMM memory ops? typedef uint64_t LargestRead; typedef uint64_t MiscReg; diff --git a/src/arch/x86/utility.cc b/src/arch/x86/utility.cc index f7d0f816e..e1be61180 100644 --- a/src/arch/x86/utility.cc +++ b/src/arch/x86/utility.cc @@ -245,6 +245,10 @@ copyRegs(ThreadContext *src, ThreadContext *dest) //copy condition-code regs for (int i = 0; i < NumCCRegs; ++i) dest->setCCRegFlat(i, src->readCCRegFlat(i)); + + // copy vector regs when added to the architecture + assert(NumVectorRegs == 0); + copyMiscRegs(src, dest); dest->pcState(src->pcState()); } diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index ef29726fc..3b00e5df8 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -55,8 +55,8 @@ class StaticInstFlags(Enum): vals = [ 'IsNop', # Is a no-op (no effect at all). - 'IsInteger', # References integer regs. - 'IsFloating', # References FP regs. + 'IsInteger', # References scalar integer regs. + 'IsFloating', # References scalar FP regs. 'IsCC', # References CC regs. 'IsMemRef', # References memory (load, store, or prefetch) @@ -108,5 +108,6 @@ class StaticInstFlags(Enum): 'IsMicroBranch', # This microop branches within the microcode for # a macroop 'IsDspOp', - 'IsSquashAfter' # Squash all uncommitted state after executed + 'IsSquashAfter', # Squash all uncommitted state after executed + 'IsVector', # References vector register. ] diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 5b54679c9..515df6821 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -99,10 +99,19 @@ class BaseDynInst : public ExecContext, public RefCounted union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; protected: @@ -521,6 +530,9 @@ class BaseDynInst : public ExecContext, public RefCounted bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } bool isInteger() const { return staticInst->isInteger(); } bool isFloating() const { return staticInst->isFloating(); } + bool isVector() const { return staticInst->isVector(); } + bool isCC() const { return staticInst->isCC(); } + bool isControl() const { return staticInst->isControl(); } bool isCall() const { return staticInst->isCall(); } bool isReturn() const { return staticInst->isReturn(); } @@ -550,6 +562,11 @@ class BaseDynInst : public ExecContext, public RefCounted bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } + void printFlags(std::ostream &outs, const std::string &separator) const + { staticInst->printFlags(outs, separator); } + + std::string getName() const { return staticInst->getName(); } + /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -596,6 +613,8 @@ class BaseDynInst : public ExecContext, public RefCounted int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); } + int8_t numVectorDestRegs() const + { return staticInst->numVectorDestRegs(); } /** Returns the logical register index of the i'th destination register. */ RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -655,6 +674,13 @@ class BaseDynInst : public ExecContext, public RefCounted setResult(val); } + /** Records a vector register being set to a value. */ + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + setResult(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index a363b6d0f..6d75f7c12 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -94,6 +94,7 @@ class CheckerCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; + typedef TheISA::VectorReg VectorReg; /** id attached to all issued requests */ MasterID masterId; @@ -145,10 +146,19 @@ class CheckerCPU : public BaseCPU, public ExecContext union Result { uint64_t integer; double dbl; + + // I am assuming that vector register type is different from the two + // types used above. Else it seems useless to have a separate typedef + // for vector registers. + VectorReg vector; + void set(uint64_t i) { integer = i; } void set(double d) { dbl = d; } + void set(const VectorReg &v) { vector = v; } + void get(uint64_t& i) { i = integer; } void get(double& d) { d = dbl; } + void get(VectorReg& v) { v = vector; } }; // ISAs like ARM can have multiple destination registers to check, @@ -231,6 +241,11 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return thread->readVectorReg(si->srcRegIdx(idx)); + } + template void setResult(T t) { @@ -267,6 +282,13 @@ class CheckerCPU : public BaseCPU, public ExecContext setResult(val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + thread->setVectorReg(si->destRegIdx(idx), val); + setResult(val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { @@ -441,7 +463,7 @@ class Checker : public CheckerCPU void validateExecution(DynInstPtr &inst); void validateState(); - void copyResult(DynInstPtr &inst, uint64_t mismatch_val, int start_idx); + void copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx); void handlePendingInt(); private: diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 289861521..d6a467358 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -491,7 +491,9 @@ Checker::validateExecution(DynInstPtr &inst) // Unverifiable instructions assume they were executed // properly by the CPU. Grab the result from the // instruction and write it to the register. - copyResult(inst, 0, idx); + Result r; + r.integer = 0; + copyResult(inst, r, idx); } else if (inst->numDestRegs() > 0 && !result.empty()) { DPRINTF(Checker, "Dest regs %d, number of checker dest regs %d\n", inst->numDestRegs(), result.size()); @@ -525,7 +527,9 @@ Checker::validateExecution(DynInstPtr &inst) // The load/store queue in Detailed CPU can also cause problems // if load/store forwarding is allowed. if (inst->isLoad() && warnOnlyOnLoadError) { - copyResult(inst, inst_val, idx); + Result r; + r.integer = inst_val; + copyResult(inst, r, idx); } else { handleError(inst); } @@ -590,7 +594,7 @@ Checker::validateState() template void -Checker::copyResult(DynInstPtr &inst, uint64_t mismatch_val, +Checker::copyResult(DynInstPtr &inst, Result mismatch_val, int start_idx) { // We've already popped one dest off the queue, @@ -599,39 +603,65 @@ Checker::copyResult(DynInstPtr &inst, uint64_t mismatch_val, RegIndex idx = inst->destRegIdx(start_idx); switch (regIdxToClass(idx)) { case IntRegClass: - thread->setIntReg(idx, mismatch_val); + thread->setIntReg(idx, mismatch_val.integer); break; case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, mismatch_val); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, + mismatch_val.integer); break; case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val); + thread->setCCReg(idx - TheISA::CC_Reg_Base, mismatch_val.integer); + break; + case VectorRegClass: + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, + mismatch_val.vector); break; case MiscRegClass: thread->setMiscReg(idx - TheISA::Misc_Reg_Base, - mismatch_val); + mismatch_val.integer); break; } } + start_idx++; - uint64_t res = 0; for (int i = start_idx; i < inst->numDestRegs(); i++) { RegIndex idx = inst->destRegIdx(i); - inst->template popResult(res); switch (regIdxToClass(idx)) { - case IntRegClass: - thread->setIntReg(idx, res); - break; - case FloatRegClass: - thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); - break; - case CCRegClass: - thread->setCCReg(idx - TheISA::CC_Reg_Base, res); - break; - case MiscRegClass: - // Try to get the proper misc register index for ARM here... - thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); - break; + case IntRegClass: { + uint64_t res = 0; + inst->template popResult(res); + thread->setIntReg(idx, res); + } + break; + + case FloatRegClass: { + uint64_t res = 0; + inst->template popResult(res); + thread->setFloatRegBits(idx - TheISA::FP_Reg_Base, res); + } + break; + + case CCRegClass: { + uint64_t res = 0; + inst->template popResult(res); + thread->setCCReg(idx - TheISA::CC_Reg_Base, res); + } + break; + + case VectorRegClass: { + VectorReg res; + inst->template popResult(res); + thread->setVectorReg(idx - TheISA::Vector_Reg_Base, res); + } + break; + + case MiscRegClass: { + // Try to get the proper misc register index for ARM here... + uint64_t res = 0; + inst->template popResult(res); + thread->setMiscReg(idx - TheISA::Misc_Reg_Base, res); + } + break; // else Register is out of range... } } diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 71c231ba0..436c97847 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -216,6 +216,9 @@ class CheckerThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); @@ -240,6 +243,12 @@ class CheckerThreadContext : public ThreadContext checkerTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { + actualTC->setVectorReg(reg_idx, val); + checkerTC->setVectorReg(reg_idx, val); + } + /** Reads this thread's PC state. */ TheISA::PCState pcState() { return actualTC->pcState(); } @@ -296,6 +305,7 @@ class CheckerThreadContext : public ThreadContext int flattenIntIndex(int reg) { return actualTC->flattenIntIndex(reg); } int flattenFloatIndex(int reg) { return actualTC->flattenFloatIndex(reg); } int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) { return actualTC->flattenVectorIndex(reg); } int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } unsigned readStCondFailures() @@ -331,6 +341,12 @@ class CheckerThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; #endif // __CPU_CHECKER_EXEC_CONTEXT_HH__ diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index c65841db2..5c6b3fad7 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -76,6 +76,7 @@ class ExecContext { typedef TheISA::MiscReg MiscReg; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; public: /** @@ -126,6 +127,22 @@ class ExecContext { virtual void setCCRegOperand(const StaticInst *si, int idx, CCReg val) = 0; /** @} */ + /** + * @{ + * @name Vector Register Interfaces + * + */ + + /** Reads a vector register. */ + virtual const VectorReg &readVectorRegOperand (const StaticInst *si, + int idx) = 0; + + /** Sets a vector register to a value. */ + virtual void setVectorRegOperand(const StaticInst *si, + int idx, const VectorReg &val) = 0; + + /** @} */ + /** * @{ * @name Misc Register Interfaces diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index ab08e6b4a..03cf785ef 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -157,6 +157,8 @@ printRegName(std::ostream &os, TheISA::RegIndex reg) break; case CCRegClass: os << 'c' << static_cast(reg - TheISA::CC_Reg_Base); + case VectorRegClass: + os << 'v' << static_cast(reg - TheISA::Vector_Reg_Base); } } diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index 80d5d9872..6ea74047c 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -140,6 +140,20 @@ class ExecContext : public ::ExecContext return thread.readFloatRegBits(reg_idx); } + TheISA::CCReg + readCCRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread.readCCReg(reg_idx); + } + + const TheISA::VectorReg & + readVectorRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread.readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { @@ -162,6 +176,21 @@ class ExecContext : public ::ExecContext thread.setFloatRegBits(reg_idx, val); } + void + setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread.setCCReg(reg_idx, val); + } + + void + setVectorRegOperand(const StaticInst *si, int idx, + const TheISA::VectorReg &val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread.setVectorReg(reg_idx, val); + } + bool readPredicate() { @@ -265,20 +294,6 @@ class ExecContext : public ::ExecContext thread.getDTBPtr()->demapPage(vaddr, asn); } - TheISA::CCReg - readCCRegOperand(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; - return thread.readCCReg(reg_idx); - } - - void - setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; - thread.setCCReg(reg_idx, val); - } - void demapInstPage(Addr vaddr, uint64_t asn) { diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index f6b1f7944..3eb09271a 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -71,6 +71,11 @@ Scoreboard::findIndex(RegIndex reg, Index &scoreboard_index) scoreboard_index = TheISA::NumIntRegs + reg - TheISA::FP_Reg_Base; ret = true; break; + case VectorRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + reg - TheISA::Vector_Reg_Base; + ret = true; + break; case MiscRegClass: /* Don't bother with Misc registers */ ret = false; @@ -99,6 +104,9 @@ flattenRegIndex(TheISA::RegIndex reg, ThreadContext *thread_context) case CCRegClass: ret = thread_context->flattenCCIndex(reg); break; + case VectorRegClass: + ret = thread_context->flattenVectorIndex(reg); + break; case MiscRegClass: /* Don't bother to flatten misc regs as we don't need them here */ /* return thread_context->flattenMiscIndex(reg); */ diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 711bcafb2..3a3a9d3c3 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -60,11 +60,13 @@ class Scoreboard : public Named { public: /** The number of registers in the Scoreboard. These - * are just the integer, CC and float registers packed + * are just the integer, CC, float and vector registers packed * together with integer regs in the range [0,NumIntRegs-1], - * CC regs in the range [NumIntRegs, NumIntRegs+NumCCRegs-1] - * and float regs in the range - * [NumIntRegs+NumCCRegs, NumFloatRegs+NumIntRegs+NumCCRegs-1] */ + * CC regs in the range [NumIntRegs, NumIntRegs + NumCCRegs - 1], + * float regs in the range + * [NumIntRegs + NumCCRegs, NumFloatRegs + NumIntRegs + NumCCRegs - 1] + * and vector regs in the range [NumFloatRegs + NumIntRegs + NumCCRegs, + * NumFloatRegs + NumIntRegs + NumCCRegs + NumVectorRegs - 1]*/ const unsigned numRegs; /** Type to use for thread context registers */ @@ -97,7 +99,7 @@ class Scoreboard : public Named Scoreboard(const std::string &name) : Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + - TheISA::NumFloatRegs), + TheISA::NumFloatRegs + TheISA::NumVectorRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 92f96a3b6..d2220de82 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -114,6 +114,7 @@ class DerivO3CPU(BaseCPU): numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers") numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point " "registers") + # most ISAs don't use condition-code regs, so default is 0 _defaultNumPhysCCRegs = 0 if buildEnv['TARGET_ISA'] in ('arm','x86'): @@ -126,6 +127,12 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") + + # most ISAs don't use vector regs, so default is 0 + _defaultNumPhysVectorRegs = 0 + numPhysVectorRegs = Param.Unsigned(_defaultNumPhysVectorRegs, + "Number of physical vector registers") + numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 026907a94..d8f39bbe4 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -170,7 +170,8 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, - params->numPhysCCRegs), + params->numPhysCCRegs, + params->numPhysVectorRegs), freeList(name() + ".freelist", ®File), @@ -269,6 +270,7 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); + assert(params->numPhysVectorRegs >= numThreads * TheISA::NumVectorRegs); rename.setScoreboard(&scoreboard); iew.setScoreboard(&scoreboard); @@ -313,6 +315,12 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) renameMap[tid].setCCEntry(ridx, phys_reg); commitRenameMap[tid].setCCEntry(ridx, phys_reg); } + + for (RegIndex ridx = 0; ridx < TheISA::NumVectorRegs; ++ridx) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + renameMap[tid].setVectorEntry(ridx, phys_reg); + commitRenameMap[tid].setVectorEntry(ridx, phys_reg); + } } rename.setRenameMap(renameMap); @@ -521,6 +529,16 @@ FullO3CPU::regStats() .desc("number of cc regfile writes") .prereq(ccRegfileWrites); + vectorRegfileReads + .name(name() + ".vector_regfile_reads") + .desc("number of vector regfile reads") + .prereq(vectorRegfileReads); + + vectorRegfileWrites + .name(name() + ".vector_regfile_writes") + .desc("number of vector regfile writes") + .prereq(vectorRegfileWrites); + miscRegfileReads .name(name() + ".misc_regfile_reads") .desc("number of misc regfile reads") @@ -807,6 +825,18 @@ FullO3CPU::insertThread(ThreadID tid) scoreboard.setReg(phys_reg); } + //Bind vector Regs to Rename Map + max_reg = TheISA::NumIntRegs + TheISA::NumFloatRegs + TheISA::NumCCRegs + + TheISA::NumVectorRegs; + for (int vreg = TheISA::NumIntRegs + TheISA::NumFloatRegs + + TheISA::NumCCRegs; + vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = freeList.getVectorReg(); + + renameMap[tid].setEntry(vreg, phys_reg); + scoreboard.setReg(phys_reg); + } + //Copy Thread Data Into RegFile //this->copyFromTC(tid); @@ -860,6 +890,14 @@ FullO3CPU::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind condition-code Regs from Rename Map + max_reg = TheISA::Vector_Reg_Base + TheISA::NumVectorRegs; + for (int vreg = TheISA::Vector_Reg_Base; vreg < max_reg; vreg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(vreg); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Squash Throughout Pipeline DynInstPtr inst = commit.rob->readHeadInst(tid); InstSeqNum squash_seq_num = inst->seqNum; @@ -1258,6 +1296,14 @@ FullO3CPU::readCCReg(int reg_idx) return regFile.readCCReg(reg_idx); } +template +const VectorReg & +FullO3CPU::readVectorReg(int reg_idx) +{ + vectorRegfileReads++; + return regFile.readVectorReg(reg_idx); +} + template void FullO3CPU::setIntReg(int reg_idx, uint64_t val) @@ -1290,6 +1336,14 @@ FullO3CPU::setCCReg(int reg_idx, CCReg val) regFile.setCCReg(reg_idx, val); } +template +void +FullO3CPU::setVectorReg(int reg_idx, const VectorReg &val) +{ + vectorRegfileWrites++; + regFile.setVectorReg(reg_idx, val); +} + template uint64_t FullO3CPU::readArchIntReg(int reg_idx, ThreadID tid) @@ -1330,6 +1384,16 @@ FullO3CPU::readArchCCReg(int reg_idx, ThreadID tid) return regFile.readCCReg(phys_reg); } +template +const VectorReg& +FullO3CPU::readArchVectorReg(int reg_idx, ThreadID tid) +{ + vectorRegfileReads++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + + return regFile.readVectorReg(phys_reg); +} + template void FullO3CPU::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) @@ -1370,6 +1434,16 @@ FullO3CPU::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) regFile.setCCReg(phys_reg, val); } +template +void +FullO3CPU::setArchVectorReg(int reg_idx, const VectorReg &val, + ThreadID tid) +{ + vectorRegfileWrites++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookupVector(reg_idx); + regFile.setVectorReg(phys_reg, val); +} + template TheISA::PCState FullO3CPU::pcState(ThreadID tid) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index aa02ee2ea..f16450d19 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -427,6 +427,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readCCReg(int reg_idx); + const TheISA::VectorReg &readVectorReg(int reg_idx); + void setIntReg(int reg_idx, uint64_t val); void setFloatReg(int reg_idx, TheISA::FloatReg val); @@ -435,6 +437,8 @@ class FullO3CPU : public BaseO3CPU void setCCReg(int reg_idx, TheISA::CCReg val); + void setVectorReg(int reg_idx, const TheISA::VectorReg &val); + uint64_t readArchIntReg(int reg_idx, ThreadID tid); float readArchFloatReg(int reg_idx, ThreadID tid); @@ -443,6 +447,8 @@ class FullO3CPU : public BaseO3CPU TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); + const TheISA::VectorReg &readArchVectorReg(int reg_idx, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical @@ -456,6 +462,9 @@ class FullO3CPU : public BaseO3CPU void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid); + void setArchVectorReg(int reg_idx, const TheISA::VectorReg &val, + ThreadID tid); + /** Sets the commit PC state of a specific thread. */ void pcState(const TheISA::PCState &newPCState, ThreadID tid); @@ -734,6 +743,9 @@ class FullO3CPU : public BaseO3CPU //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; + //number of integer register file accesses + Stats::Scalar vectorRegfileReads; + Stats::Scalar vectorRegfileWrites; //number of misc Stats::Scalar miscRegfileReads; Stats::Scalar miscRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 6740c601d..d19e4d461 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -74,6 +74,7 @@ class BaseO3DynInst : public BaseDynInst typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -206,7 +207,6 @@ class BaseO3DynInst : public BaseDynInst void forwardOldRegs() { - for (int idx = 0; idx < this->numDestRegs(); idx++) { PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx); TheISA::RegIndex original_dest_reg = @@ -224,6 +224,11 @@ class BaseO3DynInst : public BaseDynInst this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); break; + case VectorRegClass: + this->setVectorRegOperand(this->staticInst.get(), idx, + this->cpu->readVectorReg(prev_phys_reg)); + break; + case MiscRegClass: // no need to forward misc reg values break; @@ -272,6 +277,11 @@ class BaseO3DynInst : public BaseDynInst return this->cpu->readCCReg(this->_srcRegIdx[idx]); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + return this->cpu->readVectorReg(this->_srcRegIdx[idx]); + } + /** @todo: Make results into arrays so they can handle multiple dest * registers. */ @@ -300,6 +310,13 @@ class BaseO3DynInst : public BaseDynInst BaseDynInst::setCCRegOperand(si, idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + this->cpu->setVectorReg(this->_destRegIdx[idx], val); + BaseDynInst::setVectorRegOperand(si, idx, val); + } + #if THE_ISA == MIPS_ISA MiscReg readRegOtherThread(int misc_reg, ThreadID tid) { diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index aa805e26e..d345d7ac8 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -109,6 +109,9 @@ class UnifiedFreeList /** The list of free condition-code registers. */ SimpleFreeList ccList; + /** The list of free vector registers. */ + SimpleFreeList vectorList; + /** * The register file object is used only to distinguish integer * from floating-point physical register indices. @@ -148,6 +151,9 @@ class UnifiedFreeList /** Gets a free cc register. */ PhysRegIndex getCCReg() { return ccList.getReg(); } + /** Gets a free vector register. */ + PhysRegIndex getVectorReg() { return vectorList.getReg(); } + /** Adds a register back to the free list. */ void addReg(PhysRegIndex freed_reg); @@ -160,6 +166,9 @@ class UnifiedFreeList /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIndex freed_reg) { ccList.addReg(freed_reg); } + /** Adds a vector register back to the free list. */ + void addVectorReg(PhysRegIndex freed_reg) { vectorList.addReg(freed_reg); } + /** Checks if there are any free integer registers. */ bool hasFreeIntRegs() const { return intList.hasFreeRegs(); } @@ -169,6 +178,9 @@ class UnifiedFreeList /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } + /** Checks if there are any free vector registers. */ + bool hasFreeVectorRegs() const { return vectorList.hasFreeRegs(); } + /** Returns the number of free integer registers. */ unsigned numFreeIntRegs() const { return intList.numFreeRegs(); } @@ -177,6 +189,9 @@ class UnifiedFreeList /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } + + /** Returns the number of free vector registers. */ + unsigned numFreeVectorRegs() const { return vectorList.numFreeRegs(); } }; inline void @@ -189,9 +204,11 @@ UnifiedFreeList::addReg(PhysRegIndex freed_reg) intList.addReg(freed_reg); } else if (regFile->isFloatPhysReg(freed_reg)) { floatList.addReg(freed_reg); - } else { - assert(regFile->isCCPhysReg(freed_reg)); + } else if (regFile->isCCPhysReg(freed_reg)) { ccList.addReg(freed_reg); + } else { + assert(regFile->isVectorPhysReg(freed_reg)); + vectorList.addReg(freed_reg); } // These assert conditions ensure that the number of free diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 7d359b992..e16843160 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -99,7 +99,7 @@ InstructionQueue::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, // Set the number of total physical registers numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + - params->numPhysCCRegs; + params->numPhysCCRegs + params->numPhysVectorRegs; //Create an entry for each physical register within the //dependency graph. diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 96ce44bdd..a7476c5ec 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -37,15 +37,20 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs) + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), ccRegFile(_numPhysicalCCRegs), + vectorRegFile(_numPhysicalVectorRegs), baseFloatRegIndex(_numPhysicalIntRegs), baseCCRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs), + baseVectorRegIndex(_numPhysicalIntRegs + _numPhysicalFloatRegs + + _numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs - + _numPhysicalCCRegs) + + _numPhysicalCCRegs + + _numPhysicalVectorRegs) { if (TheISA::NumCCRegs == 0 && _numPhysicalCCRegs != 0) { // Just make this a warning and go ahead and allocate them @@ -53,6 +58,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, warn("Non-zero number of physical CC regs specified, even though\n" " ISA does not use them.\n"); } + + if (TheISA::NumVectorRegs == 0 && _numPhysicalVectorRegs != 0) { + // Just make this a warning and go ahead and allocate them + // anyway, to keep from having to add checks everywhere + warn("Non-zero number of physical vector regs specified, even though\n" + " ISA does not use them.\n"); + } } @@ -73,9 +85,15 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) freeList->addFloatReg(reg_idx++); } - // The rest of the registers are the condition-code physical + // The next batch of registers are the condition-code physical // registers; put them onto the condition-code free list. - while (reg_idx < totalNumRegs) { + while (reg_idx < baseVectorRegIndex) { freeList->addCCReg(reg_idx++); } + + // The rest of the registers are the vector physical + // registers; put them onto the vector free list. + while (reg_idx < totalNumRegs) { + freeList->addVectorReg(reg_idx++); + } } diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 8b87725ca..71ca5015f 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -56,6 +56,7 @@ class PhysRegFile typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef union { FloatReg d; @@ -71,6 +72,9 @@ class PhysRegFile /** Condition-code register file. */ std::vector ccRegFile; + /** Vector register file. */ + std::vector vectorRegFile; + /** * The first floating-point physical register index. The physical * register file has a single continuous index space, with the @@ -93,6 +97,12 @@ class PhysRegFile */ unsigned baseCCRegIndex; + /** + * The first vector physical register index. The vector registers follow + * the condition-code registers. + */ + unsigned baseVectorRegIndex; + /** Total number of physical registers. */ unsigned totalNumRegs; @@ -103,7 +113,8 @@ class PhysRegFile */ PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, - unsigned _numPhysicalCCRegs); + unsigned _numPhysicalCCRegs, + unsigned _numPhysicalVectorRegs); /** * Destructor to free resources @@ -122,7 +133,11 @@ class PhysRegFile /** @return the number of condition-code physical registers. */ unsigned numCCPhysRegs() const - { return totalNumRegs - baseCCRegIndex; } + { return baseVectorRegIndex - baseCCRegIndex; } + + /** @return the number of vector physical registers. */ + unsigned numVectorPhysRegs() const + { return totalNumRegs - baseVectorRegIndex; } /** @return the total number of physical registers. */ unsigned totalNumPhysRegs() const { return totalNumRegs; } @@ -151,7 +166,16 @@ class PhysRegFile */ bool isCCPhysReg(PhysRegIndex reg_idx) { - return (baseCCRegIndex <= reg_idx && reg_idx < totalNumRegs); + return (baseCCRegIndex <= reg_idx && reg_idx < baseVectorRegIndex); + } + + /** + * @return true if the specified physical register index + * corresponds to a vector physical register. + */ + bool isVectorPhysReg(PhysRegIndex reg_idx) const + { + return baseVectorRegIndex <= reg_idx && reg_idx < totalNumRegs; } /** Reads an integer register. */ @@ -207,6 +231,18 @@ class PhysRegFile return ccRegFile[reg_offset]; } + /** Reads a vector register. */ + const VectorReg &readVectorReg(PhysRegIndex reg_idx) const + { + assert(isVectorPhysReg(reg_idx)); + + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + + DPRINTF(IEW, "RegFile: Access to vector register %i\n", int(reg_idx)); + return vectorRegFile[reg_offset]; + } + /** Sets an integer register to the given value. */ void setIntReg(PhysRegIndex reg_idx, uint64_t val) { @@ -262,6 +298,16 @@ class PhysRegFile ccRegFile[reg_offset] = val; } + + /** Sets a vector register to the given value. */ + void setVectorReg(PhysRegIndex reg_idx, const VectorReg &val) + { + assert(isVectorPhysReg(reg_idx)); + // Remove the base vector reg dependency. + PhysRegIndex reg_offset = reg_idx - baseVectorRegIndex; + DPRINTF(IEW, "RegFile: Setting vector register %i\n", int(reg_idx)); + vectorRegFile[reg_offset] = val; + } }; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 43b7ba9aa..3da6fd4fa 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -69,7 +69,7 @@ DefaultRename::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitWidth(params->commitWidth), numThreads(params->numThreads), maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs - + params->numPhysCCRegs) + + params->numPhysCCRegs + params->numPhysVectorRegs) { if (renameWidth > Impl::MaxWidth) fatal("renameWidth (%d) is larger than compiled limit (%d),\n" @@ -635,7 +635,8 @@ DefaultRename::renameInsts(ThreadID tid) // to rename to. Otherwise block. if (!renameMap[tid]->canRename(inst->numIntDestRegs(), inst->numFPDestRegs(), - inst->numCCDestRegs())) { + inst->numCCDestRegs(), + inst->numVectorDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); blockThisCycle = true; @@ -1016,6 +1017,11 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst, ThreadID tid) renamed_reg = map->lookupCC(flat_rel_src_reg); break; + case VectorRegClass: + flat_rel_src_reg = tc->flattenVectorIndex(rel_src_reg); + renamed_reg = map->lookupVector(flat_rel_src_reg); + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_src_reg = rel_src_reg; @@ -1082,6 +1088,12 @@ DefaultRename::renameDestRegs(DynInstPtr &inst, ThreadID tid) flat_uni_dest_reg = flat_rel_dest_reg + TheISA::CC_Reg_Base; break; + case VectorRegClass: + flat_rel_dest_reg = tc->flattenVectorIndex(rel_dest_reg); + rename_result = map->renameVector(flat_rel_dest_reg); + flat_uni_dest_reg = flat_rel_dest_reg + TheISA::Vector_Reg_Base; + break; + case MiscRegClass: // misc regs don't get flattened flat_rel_dest_reg = rel_dest_reg; @@ -1156,7 +1168,7 @@ inline int DefaultRename::calcFreeLQEntries(ThreadID tid) { int num_free = freeEntries[tid].lqEntries - - (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); + (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, " "loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries, loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ); @@ -1168,7 +1180,7 @@ inline int DefaultRename::calcFreeSQEntries(ThreadID tid) { int num_free = freeEntries[tid].sqEntries - - (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); + (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, " "stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries, storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ); diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index b0232df20..27ddd8c63 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -99,6 +99,9 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg); ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); + + vectorMap.init(TheISA::NumVectorRegs, &(freeList->vectorList), + (RegIndex)-1); } @@ -117,6 +120,9 @@ UnifiedRenameMap::rename(RegIndex arch_reg) case CCRegClass: return renameCC(rel_arch_reg); + case VectorRegClass: + return renameVector(rel_arch_reg); + case MiscRegClass: return renameMisc(rel_arch_reg); @@ -142,6 +148,9 @@ UnifiedRenameMap::lookup(RegIndex arch_reg) const case CCRegClass: return lookupCC(rel_arch_reg); + case VectorRegClass: + return lookupVector(rel_arch_reg); + case MiscRegClass: return lookupMisc(rel_arch_reg); @@ -166,6 +175,9 @@ UnifiedRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex phys_reg) case CCRegClass: return setCCEntry(rel_arch_reg, phys_reg); + case VectorRegClass: + return setVectorEntry(rel_arch_reg, phys_reg); + case MiscRegClass: // Misc registers do not actually rename, so don't change // their mappings. We end up here when a commit or squash diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 9d91f232e..37487c3d3 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -178,6 +178,9 @@ class UnifiedRenameMap /** The condition-code register rename map */ SimpleRenameMap ccMap; + /** The vector register rename map */ + SimpleRenameMap vectorMap; + public: typedef TheISA::RegIndex RegIndex; @@ -239,6 +242,17 @@ class UnifiedRenameMap return info; } + /** + * Perform rename() on a vector register, given a relative vector register + * index. + */ + RenameInfo renameVector(RegIndex rel_arch_reg) + { + RenameInfo info = vectorMap.rename(rel_arch_reg); + assert(regFile->isVectorPhysReg(info.first)); + return info; + } + /** * Perform rename() on a misc register, given a relative * misc register index. @@ -296,6 +310,17 @@ class UnifiedRenameMap return phys_reg; } + /** + * Perform lookup() on a vector register, given a relative + * vector register index. + */ + PhysRegIndex lookupVector(RegIndex rel_arch_reg) const + { + PhysRegIndex phys_reg = vectorMap.lookup(rel_arch_reg); + assert(regFile->isVectorPhysReg(phys_reg)); + return phys_reg; + } + /** * Perform lookup() on a misc register, given a relative * misc register index. @@ -348,6 +373,16 @@ class UnifiedRenameMap ccMap.setEntry(arch_reg, phys_reg); } + /** + * Perform setEntry() on a vector register, given a relative vector + * register index. + */ + void setVectorEntry(RegIndex arch_reg, PhysRegIndex phys_reg) + { + assert(regFile->isVectorPhysReg(phys_reg)); + vectorMap.setEntry(arch_reg, phys_reg); + } + /** * Return the minimum number of free entries across all of the * register classes. The minimum is used so we guarantee that @@ -362,11 +397,13 @@ class UnifiedRenameMap /** * Return whether there are enough registers to serve the request. */ - bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const + bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs, + uint32_t vectorRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && - ccRegs <= ccMap.numFreeEntries(); + ccRegs <= ccMap.numFreeEntries() && + vectorRegs <= vectorMap.numFreeEntries(); } }; diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 87d87900c..6e9b054da 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -189,6 +189,10 @@ class O3ThreadContext : public ThreadContext return readCCRegFlat(flattenCCIndex(reg_idx)); } + virtual const VectorReg &readVectorReg(int reg_idx) { + return readVectorRegFlat(flattenVectorIndex(reg_idx)); + } + /** Sets an integer register to a value. */ virtual void setIntReg(int reg_idx, uint64_t val) { setIntRegFlat(flattenIntIndex(reg_idx), val); @@ -206,6 +210,10 @@ class O3ThreadContext : public ThreadContext setCCRegFlat(flattenCCIndex(reg_idx), val); } + virtual void setVectorReg(int reg_idx, const VectorReg &val) { + setVectorRegFlat(flattenVectorIndex(reg_idx), val); + } + /** Reads this thread's PC state. */ virtual TheISA::PCState pcState() { return cpu->pcState(thread->threadId()); } @@ -246,6 +254,7 @@ class O3ThreadContext : public ThreadContext virtual int flattenIntIndex(int reg); virtual int flattenFloatIndex(int reg); virtual int flattenCCIndex(int reg); + virtual int flattenVectorIndex(int reg); virtual int flattenMiscIndex(int reg); /** Returns the number of consecutive store conditional failures. */ @@ -291,6 +300,9 @@ class O3ThreadContext : public ThreadContext virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); + + virtual const VectorReg &readVectorRegFlat(int idx); + virtual void setVectorRegFlat(int idx, const VectorReg &val); }; #endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index e6a3d5083..ecdd9ebb9 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -215,6 +215,13 @@ O3ThreadContext::readCCRegFlat(int reg_idx) return cpu->readArchCCReg(reg_idx, thread->threadId()); } +template +const TheISA::VectorReg & +O3ThreadContext::readVectorRegFlat(int reg_idx) +{ + return cpu->readArchVectorReg(reg_idx, thread->threadId()); +} + template void O3ThreadContext::setIntRegFlat(int reg_idx, uint64_t val) @@ -251,6 +258,15 @@ O3ThreadContext::setCCRegFlat(int reg_idx, TheISA::CCReg val) conditionalSquash(); } +template +void +O3ThreadContext::setVectorRegFlat(int reg_idx, + const TheISA::VectorReg &val) +{ + cpu->setArchVectorReg(reg_idx, val, thread->threadId()); + conditionalSquash(); +} + template void O3ThreadContext::pcState(const TheISA::PCState &val) @@ -290,6 +306,13 @@ O3ThreadContext::flattenCCIndex(int reg) return cpu->isa[thread->threadId()]->flattenCCIndex(reg); } +template +int +O3ThreadContext::flattenVectorIndex(int reg) +{ + return cpu->isa[thread->threadId()]->flattenVectorIndex(reg); +} + template int O3ThreadContext::flattenMiscIndex(int reg) diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 1805eae13..0cb789fe1 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -34,5 +34,6 @@ const char *RegClassStrings[] = { "IntRegClass", "FloatRegClass", "CCRegClass", + "VectorRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 549ebab26..6c7b1b55d 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -42,6 +42,7 @@ enum RegClass { IntRegClass, ///< Integer register FloatRegClass, ///< Floating-point register CCRegClass, ///< Condition-code register + VectorRegClass, ///< Vector register MiscRegClass ///< Control (misc) register }; @@ -76,12 +77,15 @@ RegClass regIdxToClass(TheISA::RegIndex reg_idx, } else if (reg_idx < TheISA::CC_Reg_Base) { cl = FloatRegClass; offset = TheISA::FP_Reg_Base; - } else if (reg_idx < TheISA::Misc_Reg_Base) { + } else if (reg_idx < TheISA::Vector_Reg_Base) { // if there are no CC regs, the ISA should set // CC_Reg_Base == Misc_Reg_Base so the if above // never succeeds cl = CCRegClass; offset = TheISA::CC_Reg_Base; + } else if (reg_idx < TheISA::Misc_Reg_Base) { + cl = VectorRegClass; + offset = TheISA::Vector_Reg_Base; } else { cl = MiscRegClass; offset = TheISA::Misc_Reg_Base; diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 2f7247010..27e434132 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -87,6 +87,7 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; BPredUnit *branchPred; @@ -239,6 +240,10 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; + //number of vector register file accesses + Stats::Scalar numVectorRegReads; + Stats::Scalar numVectorRegWrites; + // number of simulated memory references Stats::Scalar numMemRefs; Stats::Scalar numLoadInsts; @@ -325,6 +330,13 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext return thread->readCCReg(reg_idx); } + const VectorReg &readVectorRegOperand(const StaticInst *si, int idx) + { + numVectorRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::Vector_Reg_Base; + return thread->readVectorReg(reg_idx); + } + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) { numIntRegWrites++; @@ -353,6 +365,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext thread->setCCReg(reg_idx, val); } + void setVectorRegOperand(const StaticInst *si, int idx, + const VectorReg &val) + { + numVectorRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::Vector_Reg_Base; + thread->setVectorReg(reg_idx, val); + } + bool readPredicate() { return thread->readPredicate(); } void setPredicate(bool val) { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 20acff6ee..070a00dc8 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VectorRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" #include "sim/byteswap.hh" @@ -102,6 +103,8 @@ class SimpleThread : public ThreadState typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; + public: typedef ThreadContext::Status Status; @@ -111,9 +114,15 @@ class SimpleThread : public ThreadState FloatRegBits i[TheISA::NumFloatRegs]; } floatRegs; TheISA::IntReg intRegs[TheISA::NumIntRegs]; + #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif + +#ifdef ISA_HAS_VECTOR_REGS + TheISA::VectorReg vectorRegs[TheISA::NumVectorRegs]; +#endif + TheISA::ISA *const isa; // one "instance" of the current ISA. TheISA::PCState _pcState; @@ -282,6 +291,16 @@ class SimpleThread : public ThreadState #endif } + const VectorReg &readVectorReg(int reg_idx) + { + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(0 <= flatIndex); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Reading vector reg %d (%d).\n", + reg_idx, flatIndex); + return readVectorRegFlat(flatIndex); + } + void setIntReg(int reg_idx, uint64_t val) { int flatIndex = isa->flattenIntIndex(reg_idx); @@ -325,6 +344,19 @@ class SimpleThread : public ThreadState #endif } + void setVectorReg(int reg_idx, const VectorReg &val) + { +#ifdef ISA_HAS_VECTOR_REGS + int flatIndex = isa->flattenVectorIndex(reg_idx); + assert(flatIndex < TheISA::NumVectorRegs); + DPRINTF(VectorRegs, "Setting vector reg %d (%d).\n", + reg_idx, flatIndex); + setVectorRegFlat(flatIndex, val); +#else + panic("Tried to set a vector register."); +#endif + } + TheISA::PCState pcState() { @@ -413,6 +445,12 @@ class SimpleThread : public ThreadState return isa->flattenCCIndex(reg); } + int + flattenVectorIndex(int reg) + { + return isa->flattenVectorIndex(reg); + } + int flattenMiscIndex(int reg) { @@ -450,6 +488,18 @@ class SimpleThread : public ThreadState void setCCRegFlat(int idx, CCReg val) { panic("setCCRegFlat w/no CC regs!\n"); } #endif + +#ifdef ISA_HAS_VECTOR_REGS + const VectorReg &readVectorRegFlat(int idx) { return vectorRegs[idx]; } + void setVectorRegFlat(int idx, const VectorReg &val) + { vectorRegs[idx] = val; } +#else + const VectorReg &readVectorRegFlat(int idx) + { panic("readVectorRegFlat w/no Vector regs!\n"); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { panic("setVectorRegFlat w/no Vector regs!\n"); } +#endif }; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 684a22856..58cf752b7 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -98,6 +98,7 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t _numFPDestRegs; int8_t _numIntDestRegs; int8_t _numCCDestRegs; + int8_t _numVectorDestRegs; //@} public: @@ -116,9 +117,10 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numFPDestRegs() const { return _numFPDestRegs; } /// Number of integer destination regs. int8_t numIntDestRegs() const { return _numIntDestRegs; } - //@} - /// Number of coprocesor destination regs. + /// Number of condition code destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } + /// Number of vector destination regs. + int8_t numVectorDestRegs() const { return _numVectorDestRegs; } //@} /// @name Flag accessors. @@ -140,6 +142,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } + bool isVector() const { return flags[IsVector]; } bool isCC() const { return flags[IsCC]; } bool isControl() const { return flags[IsControl]; } @@ -252,7 +255,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + _numVectorDestRegs(0), machInst(_machInst), mnemonic(_mnemonic), + cachedDisassembly(0) { } public: @@ -326,7 +330,7 @@ class StaticInst : public RefCounted, public StaticInstFlags void printFlags(std::ostream &outs, const std::string &separator) const; /// Return name of machine instruction - std::string getName() { return mnemonic; } + std::string getName() const { return mnemonic; } }; #endif // __CPU_STATIC_INST_HH__ diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index fe1ae69dd..ce7604d3c 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -88,6 +88,15 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("CC reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // loop through the Vector registers. + for (int i = 0; i < TheISA::NumVectorRegs; ++i) { + const TheISA::VectorReg &t1 = one->readVectorReg(i); + const TheISA::VectorReg &t2 = two->readVectorReg(i); + if (t1 != t2) + panic("Vector reg idx %d doesn't match", i); + } + if (!(one->pcState() == two->pcState())) panic("PC state doesn't match."); int id1 = one->cpuId(); @@ -127,6 +136,16 @@ serialize(ThreadContext &tc, CheckpointOut &cp) SERIALIZE_ARRAY(ccRegs, NumCCRegs); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + for (int i = 0; i < NumVectorRegs; ++i) { + const VectorReg &v = tc.readVectorRegFlat(i); + for (int j = 0; i < NumVectorRegElements; ++j) + vectorRegs[i * NumVectorRegElements + j] = v[j]; + } + SERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); +#endif + tc.pcState().serialize(cp); // thread_num and cpu_id are deterministic from the config @@ -156,6 +175,17 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) tc.setCCRegFlat(i, ccRegs[i]); #endif +#ifdef ISA_HAS_VECTOR_REGS + VectorRegElement vectorRegs[NumVectorRegs * NumVectorRegElements]; + UNSERIALIZE_ARRAY(vectorRegs, NumVectorRegs * NumVectorRegElements); + for (int i = 0; i < NumVectorRegs; ++i) { + VectorReg v; + for (int j = 0; i < NumVectorRegElements; ++j) + v[j] = vectorRegs[i * NumVectorRegElements + j]; + tc.setVectorRegFlat(i, v); + } +#endif + PCState pcState; pcState.unserialize(cp); tc.pcState(pcState); diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 2544b19c6..cd8b98f0c 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -98,6 +98,7 @@ class ThreadContext typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; + typedef TheISA::VectorReg VectorReg; typedef TheISA::MiscReg MiscReg; public: @@ -205,6 +206,8 @@ class ThreadContext virtual CCReg readCCReg(int reg_idx) = 0; + virtual const VectorReg &readVectorReg(int reg_idx) = 0; + virtual void setIntReg(int reg_idx, uint64_t val) = 0; virtual void setFloatReg(int reg_idx, FloatReg val) = 0; @@ -213,6 +216,8 @@ class ThreadContext virtual void setCCReg(int reg_idx, CCReg val) = 0; + virtual void setVectorReg(int reg_idx, const VectorReg &val) = 0; + virtual TheISA::PCState pcState() = 0; virtual void pcState(const TheISA::PCState &val) = 0; @@ -236,6 +241,7 @@ class ThreadContext virtual int flattenIntIndex(int reg) = 0; virtual int flattenFloatIndex(int reg) = 0; virtual int flattenCCIndex(int reg) = 0; + virtual int flattenVectorIndex(int reg) = 0; virtual int flattenMiscIndex(int reg) = 0; virtual uint64_t @@ -291,6 +297,9 @@ class ThreadContext virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; + + virtual const VectorReg &readVectorRegFlat(int idx) = 0; + virtual void setVectorRegFlat(int idx, const VectorReg &val) = 0; /** @} */ }; @@ -402,6 +411,9 @@ class ProxyThreadContext : public ThreadContext CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } + const VectorReg &readVectorReg(int reg_idx) + { return actualTC->readVectorReg(reg_idx); } + void setIntReg(int reg_idx, uint64_t val) { actualTC->setIntReg(reg_idx, val); } @@ -414,6 +426,9 @@ class ProxyThreadContext : public ThreadContext void setCCReg(int reg_idx, CCReg val) { actualTC->setCCReg(reg_idx, val); } + void setVectorReg(int reg_idx, const VectorReg &val) + { actualTC->setVectorReg(reg_idx, val); } + TheISA::PCState pcState() { return actualTC->pcState(); } void pcState(const TheISA::PCState &val) { actualTC->pcState(val); } @@ -450,6 +465,9 @@ class ProxyThreadContext : public ThreadContext int flattenCCIndex(int reg) { return actualTC->flattenCCIndex(reg); } + int flattenVectorIndex(int reg) + { return actualTC->flattenVectorIndex(reg); } + int flattenMiscIndex(int reg) { return actualTC->flattenMiscIndex(reg); } @@ -487,6 +505,12 @@ class ProxyThreadContext : public ThreadContext void setCCRegFlat(int idx, CCReg val) { actualTC->setCCRegFlat(idx, val); } + + const VectorReg &readVectorRegFlat(int idx) + { return actualTC->readVectorRegFlat(idx); } + + void setVectorRegFlat(int idx, const VectorReg &val) + { actualTC->setVectorRegFlat(idx, val); } }; /** @{ */ diff --git a/src/sim/insttracer.hh b/src/sim/insttracer.hh index 6819c2199..3c954df26 100644 --- a/src/sim/insttracer.hh +++ b/src/sim/insttracer.hh @@ -58,6 +58,8 @@ namespace Trace { class InstRecord { protected: + typedef TheISA::VectorReg VectorReg; + Tick when; // The following fields are initialized by the constructor and @@ -97,6 +99,7 @@ class InstRecord union { uint64_t as_int; double as_double; + VectorReg as_vector; } data; /** @defgroup fetch_seq @@ -120,7 +123,8 @@ class InstRecord DataInt16 = 2, DataInt32 = 4, DataInt64 = 8, - DataDouble = 3 + DataDouble = 3, + DataVector = sizeof(VectorReg), } data_status; /** @ingroup memory @@ -173,6 +177,8 @@ class InstRecord void setData(int8_t d) { setData((uint8_t)d); } void setData(double d) { data.as_double = d; data_status = DataDouble; } + void setData(const VectorReg& v) + { data.as_vector = v; data_status = DataVector; } void setFetchSeq(InstSeqNum seq) { fetch_seq = seq; fetch_seq_valid = true; } @@ -198,6 +204,7 @@ class InstRecord uint64_t getIntData() const { return data.as_int; } double getFloatData() const { return data.as_double; } + const VectorReg &getVectorData() const { return data.as_vector; } int getDataStatus() const { return data_status; } InstSeqNum getFetchSeq() const { return fetch_seq; } -- 2.30.2