From c1f7bf7f0e97f8470eb4280870244b6b673dbff4 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:14 -0500 Subject: [PATCH] ARM: Add support for VFP vector mode. --- src/arch/arm/insts/vfp.hh | 65 ++++++++++++++ src/arch/arm/isa/formats/fp.isa | 84 ++++++++++++------ src/arch/arm/isa/insts/fp.isa | 148 ++++++++++++++++++++++++++++++++ src/arch/arm/miscregs.hh | 26 ++++++ src/arch/arm/predecoder.hh | 3 + src/arch/arm/types.hh | 4 + 6 files changed, 302 insertions(+), 28 deletions(-) diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 8cffb276e..77e104a13 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -68,8 +68,73 @@ setVfpMicroFlags(VfpMicroMode mode, T &flags) case VfpNotAMicroop: break; } + if (mode == VfpMicroop || mode == VfpFirstMicroop) { + flags[StaticInst::IsDelayedCommit] = true; + } } +class VfpMacroOp : public PredMacroOp +{ + public: + static bool + inScalarBank(IntRegIndex idx) + { + return (idx % 32) < 8; + } + + protected: + bool wide; + + VfpMacroOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, bool _wide) : + PredMacroOp(mnem, _machInst, __opClass), wide(_wide) + {} + + IntRegIndex + addStride(IntRegIndex idx, unsigned stride) + { + if (wide) { + stride *= 2; + } + unsigned offset = idx % 8; + idx = (IntRegIndex)(idx - offset); + offset += stride; + idx = (IntRegIndex)(idx + (offset % 8)); + return idx; + } + + void + nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2) + { + unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; + assert(!inScalarBank(dest)); + dest = addStride(dest, stride); + op1 = addStride(op1, stride); + if (!inScalarBank(op2)) { + op2 = addStride(op2, stride); + } + } + + void + nextIdxs(IntRegIndex &dest, IntRegIndex &op1) + { + unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; + assert(!inScalarBank(dest)); + dest = addStride(dest, stride); + if (!inScalarBank(op1)) { + op1 = addStride(op1, stride); + } + } + + void + nextIdxs(IntRegIndex &dest) + { + unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2; + assert(!inScalarBank(dest)); + dest = addStride(dest, stride); + } +}; + class VfpRegRegOp : public RegRegOp { protected: diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index e92757096..d15412825 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -504,65 +504,83 @@ let {{ case 0x0: if (bits(machInst, 6) == 0) { if (single) { - return new VmlaS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VmlaD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } else { if (single) { - return new VmlsS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VmlsD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } case 0x1: if (bits(machInst, 6) == 1) { if (single) { - return new VnmlaS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VnmlaD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } else { if (single) { - return new VnmlsS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VnmlsD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } case 0x2: if ((opc3 & 0x1) == 0) { if (single) { - return new VmulS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VmulD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } else { if (single) { - return new VnmulS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VnmulD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } case 0x3: if ((opc3 & 0x1) == 0) { if (single) { - return new VaddS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VaddD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } else { if (single) { - return new VsubS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VsubD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } case 0x8: if ((opc3 & 0x1) == 0) { if (single) { - return new VdivS(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, false); } else { - return new VdivD(machInst, vd, vn, vm); + return decodeVfpRegRegRegOp( + machInst, vd, vn, vm, true); } } break; @@ -572,39 +590,49 @@ let {{ bits(machInst, 3, 0) | (bits(machInst, 19, 16) << 4); if (single) { uint32_t imm = vfp_modified_imm(baseImm, false); - return new VmovImmS(machInst, vd, imm); + return decodeVfpRegImmOp( + machInst, vd, imm, false); } else { uint64_t imm = vfp_modified_imm(baseImm, true); - return new VmovImmD(machInst, vd, imm); + return decodeVfpRegImmOp( + machInst, vd, imm, true); } } switch (opc2) { case 0x0: if (opc3 == 1) { if (single) { - return new VmovRegS(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, false); } else { - return new VmovRegD(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, true); } } else { if (single) { - return new VabsS(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, false); } else { - return new VabsD(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, true); } } case 0x1: if (opc3 == 1) { if (single) { - return new VnegS(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, false); } else { - return new VnegD(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, true); } } else { if (single) { - return new VsqrtS(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, false); } else { - return new VsqrtD(machInst, vd, vm); + return decodeVfpRegRegOp( + machInst, vd, vm, true); } } case 0x2: diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 1402da61a..e2c7dd79b 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -37,6 +37,154 @@ // // Authors: Gabe Black +output header {{ + +template +class VfpMacroRegRegOp : public VfpMacroOp +{ + public: + VfpMacroRegRegOp(ExtMachInst _machInst, IntRegIndex _dest, + IntRegIndex _op1, bool _wide) : + VfpMacroOp("VfpMacroRegRegOp", _machInst, No_OpClass, _wide) + { + numMicroops = machInst.fpscrLen + 1; + assert(numMicroops > 1); + microOps = new StaticInstPtr[numMicroops]; + for (unsigned i = 0; i < numMicroops; i++) { + VfpMicroMode mode = VfpMicroop; + if (i == 0) + mode = VfpFirstMicroop; + else if (i == numMicroops - 1) + mode = VfpLastMicroop; + microOps[i] = new Micro(_machInst, _dest, _op1, mode); + nextIdxs(_dest, _op1); + } + } + + %(BasicExecPanic)s +}; + +template +static StaticInstPtr +decodeVfpRegRegOp(ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, bool wide) +{ + if (machInst.fpscrLen == 0 || VfpMacroOp::inScalarBank(dest)) { + return new VfpOp(machInst, dest, op1); + } else { + return new VfpMacroRegRegOp(machInst, dest, op1, wide); + } +} + +template +class VfpMacroRegImmOp : public VfpMacroOp +{ + public: + VfpMacroRegImmOp(ExtMachInst _machInst, IntRegIndex _dest, uint64_t _imm, + bool _wide) : + VfpMacroOp("VfpMacroRegImmOp", _machInst, No_OpClass, _wide) + { + numMicroops = machInst.fpscrLen + 1; + microOps = new StaticInstPtr[numMicroops]; + for (unsigned i = 0; i < numMicroops; i++) { + VfpMicroMode mode = VfpMicroop; + if (i == 0) + mode = VfpFirstMicroop; + else if (i == numMicroops - 1) + mode = VfpLastMicroop; + microOps[i] = new Micro(_machInst, _dest, _imm, mode); + nextIdxs(_dest); + } + } + + %(BasicExecPanic)s +}; + +template +static StaticInstPtr +decodeVfpRegImmOp(ExtMachInst machInst, + IntRegIndex dest, uint64_t imm, bool wide) +{ + if (machInst.fpscrLen == 0 || VfpMacroOp::inScalarBank(dest)) { + return new VfpOp(machInst, dest, imm); + } else { + return new VfpMacroRegImmOp(machInst, dest, imm, wide); + } +} + +template +class VfpMacroRegRegImmOp : public VfpMacroOp +{ + public: + VfpMacroRegRegImmOp(ExtMachInst _machInst, IntRegIndex _dest, + IntRegIndex _op1, uint64_t _imm, bool _wide) : + VfpMacroOp("VfpMacroRegRegImmOp", _machInst, No_OpClass, _wide) + { + numMicroops = machInst.fpscrLen + 1; + microOps = new StaticInstPtr[numMicroops]; + for (unsigned i = 0; i < numMicroops; i++) { + VfpMicroMode mode = VfpMicroop; + if (i == 0) + mode = VfpFirstMicroop; + else if (i == numMicroops - 1) + mode = VfpLastMicroop; + microOps[i] = new Micro(_machInst, _dest, _op1, _imm, mode); + nextIdxs(_dest, _op1); + } + } + + %(BasicExecPanic)s +}; + +template +static StaticInstPtr +decodeVfpRegRegImmOp(ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm, bool wide) +{ + if (machInst.fpscrLen == 0 || VfpMacroOp::inScalarBank(dest)) { + return new VfpOp(machInst, dest, op1, imm); + } else { + return new VfpMacroRegRegImmOp(machInst, dest, op1, imm, wide); + } +} + +template +class VfpMacroRegRegRegOp : public VfpMacroOp +{ + public: + VfpMacroRegRegRegOp(ExtMachInst _machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, bool _wide) : + VfpMacroOp("VfpMacroRegRegRegOp", _machInst, No_OpClass, _wide) + { + numMicroops = machInst.fpscrLen + 1; + microOps = new StaticInstPtr[numMicroops]; + for (unsigned i = 0; i < numMicroops; i++) { + VfpMicroMode mode = VfpMicroop; + if (i == 0) + mode = VfpFirstMicroop; + else if (i == numMicroops - 1) + mode = VfpLastMicroop; + microOps[i] = new Micro(_machInst, _dest, _op1, _op2, mode); + nextIdxs(_dest, _op1, _op2); + } + } + + %(BasicExecPanic)s +}; + +template +static StaticInstPtr +decodeVfpRegRegRegOp(ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2, bool wide) +{ + if (machInst.fpscrLen == 0 || VfpMacroOp::inScalarBank(dest)) { + return new VfpOp(machInst, dest, op1, op2); + } else { + return new VfpMacroRegRegRegOp(machInst, dest, op1, op2, wide); + } +} +}}; + let {{ header_output = "" diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 9c781f515..b7521cbd0 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -294,6 +294,32 @@ namespace ArmISA Bitfield<11> wnr; Bitfield<12> ext; EndBitUnion(FSR) + + BitUnion32(FPSCR) + Bitfield<0> ioc; + Bitfield<1> dzc; + Bitfield<2> ofc; + Bitfield<3> ufc; + Bitfield<4> ixc; + Bitfield<7> idc; + Bitfield<8> ioe; + Bitfield<9> dze; + Bitfield<10> ofe; + Bitfield<11> ufe; + Bitfield<12> ixe; + Bitfield<15> ide; + Bitfield<18, 16> len; + Bitfield<21, 20> stride; + Bitfield<23, 22> rMode; + Bitfield<24> fz; + Bitfield<25> dn; + Bitfield<26> ahp; + Bitfield<27> qc; + Bitfield<28> v; + Bitfield<29> c; + Bitfield<30> z; + Bitfield<31> n; + EndBitUnion(FPSCR) }; #endif // __ARCH_ARM_MISCREGS_HH__ diff --git a/src/arch/arm/predecoder.hh b/src/arch/arm/predecoder.hh index 5aba16a6f..a75199755 100644 --- a/src/arch/arm/predecoder.hh +++ b/src/arch/arm/predecoder.hh @@ -142,6 +142,9 @@ namespace ArmISA data = inst; offset = (fetchPC >= pc) ? 0 : pc - fetchPC; emi.thumb = (pc & (ULL(1) << PcTBitShift)) ? 1 : 0; + FPSCR fpscr = tc->readMiscReg(MISCREG_FPSCR); + emi.fpscrLen = fpscr.len; + emi.fpscrStride = fpscr.stride; process(); } diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index 2fe4f4aa7..95e5a18a0 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -51,6 +51,10 @@ namespace ArmISA typedef uint32_t MachInst; BitUnion64(ExtMachInst) + // FPSCR fields + Bitfield<41, 40> fpscrStride; + Bitfield<39, 37> fpscrLen; + // Bitfields to select mode. Bitfield<36> thumb; Bitfield<35> bigThumb; -- 2.30.2