From: Gabe Black Date: Wed, 2 Jun 2010 17:58:15 +0000 (-0500) Subject: ARM: Implement the floating/fixed point VCVT instructions. X-Git-Tag: stable_2012_02_02~1114 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aa05e5401c37c7e60f28e13f8e6de5c5f74e904d;p=gem5.git ARM: Implement the floating/fixed point VCVT instructions. --- diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 465384304..ceeaaa3cd 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -101,6 +101,150 @@ enum VfpRoundingMode VfpRoundZero = 3 }; +static inline uint64_t +vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm) +{ + fesetround(FeRoundZero); + val = val * powf(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + feclearexcept(FeAllExceptions); + if (isSigned) { + if (half) { + if (val < (int16_t)(1 << 15)) { + feraiseexcept(FeInvalid); + return (int16_t)(1 << 15); + } + if (val > (int16_t)mask(15)) { + feraiseexcept(FeInvalid); + return (int16_t)mask(15); + } + return (int16_t)val; + } else { + if (val < (int32_t)(1 << 31)) { + feraiseexcept(FeInvalid); + return (int32_t)(1 << 31); + } + if (val > (int32_t)mask(31)) { + feraiseexcept(FeInvalid); + return (int32_t)mask(31); + } + return (int32_t)val; + } + } else { + if (half) { + if (val < 0) { + feraiseexcept(FeInvalid); + return 0; + } + if (val > (mask(16))) { + feraiseexcept(FeInvalid); + return mask(16); + } + return (uint16_t)val; + } else { + if (val < 0) { + feraiseexcept(FeInvalid); + return 0; + } + if (val > (mask(32))) { + feraiseexcept(FeInvalid); + return mask(32); + } + return (uint32_t)val; + } + } +} + +static inline float +vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm) +{ + fesetround(FeRoundNearest); + if (half) + val = (uint16_t)val; + return val / powf(2.0, imm); +} + +static inline float +vfpSFixedToFpS(int32_t val, bool half, uint8_t imm) +{ + fesetround(FeRoundNearest); + if (half) + val = sext<16>(val & mask(16)); + return val / powf(2.0, imm); +} + +static inline uint64_t +vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm) +{ + fesetround(FeRoundZero); + val = val * pow(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + feclearexcept(FeAllExceptions); + if (isSigned) { + if (half) { + if (val < (int16_t)(1 << 15)) { + feraiseexcept(FeInvalid); + return (int16_t)(1 << 15); + } + if (val > (int16_t)mask(15)) { + feraiseexcept(FeInvalid); + return (int16_t)mask(15); + } + return (int16_t)val; + } else { + if (val < (int32_t)(1 << 31)) { + feraiseexcept(FeInvalid); + return (int32_t)(1 << 31); + } + if (val > (int32_t)mask(31)) { + feraiseexcept(FeInvalid); + return (int32_t)mask(31); + } + return (int32_t)val; + } + } else { + if (half) { + if (val < 0) { + feraiseexcept(FeInvalid); + return 0; + } + if (val > mask(16)) { + feraiseexcept(FeInvalid); + return mask(16); + } + return (uint16_t)val; + } else { + if (val < 0) { + feraiseexcept(FeInvalid); + return 0; + } + if (val > mask(32)) { + feraiseexcept(FeInvalid); + return mask(32); + } + return (uint32_t)val; + } + } +} + +static inline double +vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm) +{ + fesetround(FeRoundNearest); + if (half) + val = (uint16_t)val; + return val / pow(2.0, imm); +} + +static inline double +vfpSFixedToFpD(int32_t val, bool half, uint8_t imm) +{ + fesetround(FeRoundNearest); + if (half) + val = sext<16>(val & mask(16)); + return val / pow(2.0, imm); +} + typedef int VfpSavedState; static inline VfpSavedState diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index e553b180d..3d40caf9e 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -683,9 +683,47 @@ let {{ } } case 0xa: + { + const bool half = (bits(machInst, 7) == 0); + const uint32_t imm = bits(machInst, 5) | + (bits(machInst, 3, 0) << 1); + const uint32_t size = + (bits(machInst, 7) == 0 ? 16 : 32) - imm; + if (single) { + if (half) { + return new VcvtSHFixedFpS(machInst, vd, vd, size); + } else { + return new VcvtSFixedFpS(machInst, vd, vd, size); + } + } else { + if (half) { + return new VcvtSHFixedFpD(machInst, vd, vd, size); + } else { + return new VcvtSFixedFpD(machInst, vd, vd, size); + } + } + } case 0xb: - // Between FP and fixed point. - return new WarnUnimplemented("vcvt", machInst); + { + const bool half = (bits(machInst, 7) == 0); + const uint32_t imm = bits(machInst, 5) | + (bits(machInst, 3, 0) << 1); + const uint32_t size = + (bits(machInst, 7) == 0 ? 16 : 32) - imm; + if (single) { + if (half) { + return new VcvtUHFixedFpS(machInst, vd, vd, size); + } else { + return new VcvtUFixedFpS(machInst, vd, vd, size); + } + } else { + if (half) { + return new VcvtUHFixedFpD(machInst, vd, vd, size); + } else { + return new VcvtUFixedFpD(machInst, vd, vd, size); + } + } + } case 0xc: if (single) { return new VcvtFpUIntS(machInst, vd, vm); @@ -703,9 +741,47 @@ let {{ return new VcvtFpSIntD(machInst, vd, vm); } case 0xe: + { + const bool half = (bits(machInst, 7) == 0); + const uint32_t imm = bits(machInst, 5) | + (bits(machInst, 3, 0) << 1); + const uint32_t size = + (bits(machInst, 7) == 0 ? 16 : 32) - imm; + if (single) { + if (half) { + return new VcvtFpSHFixedS(machInst, vd, vd, size); + } else { + return new VcvtFpSFixedS(machInst, vd, vd, size); + } + } else { + if (half) { + return new VcvtFpSHFixedD(machInst, vd, vd, size); + } else { + return new VcvtFpSFixedD(machInst, vd, vd, size); + } + } + } case 0xf: - // Between FP and fixed point. - return new WarnUnimplemented("vcvt", machInst); + { + const bool half = (bits(machInst, 7) == 0); + const uint32_t imm = bits(machInst, 5) | + (bits(machInst, 3, 0) << 1); + const uint32_t size = + (bits(machInst, 7) == 0 ? 16 : 32) - imm; + if (single) { + if (half) { + return new VcvtFpUHFixedS(machInst, vd, vd, size); + } else { + return new VcvtFpUFixedS(machInst, vd, vd, size); + } + } else { + if (half) { + return new VcvtFpUHFixedD(machInst, vd, vd, size); + } else { + return new VcvtFpUFixedD(machInst, vd, vd, size); + } + } + } } break; } diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 1a8f25c5a..db1c5bf6b 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -997,3 +997,242 @@ let {{ decoder_output += VfpRegImmOpConstructor.subst(vcmpZeroDIop); exec_output += PredOpExecute.subst(vcmpZeroDIop); }}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + vcvtFpSFixedSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "VfpRegRegImmOp", + { "code": vcvtFpSFixedSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedSIop); + exec_output += PredOpExecute.subst(vcvtFpSFixedSIop); + + vcvtFpSFixedDCode = ''' + IntDoubleUnion cOp1; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = mid; + FpDestP1.uw = mid >> 32; + ''' + vcvtFpSFixedDIop = InstObjParams("vcvt", "VcvtFpSFixedD", "VfpRegRegImmOp", + { "code": vcvtFpSFixedDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedDIop); + exec_output += PredOpExecute.subst(vcvtFpSFixedDIop); + + vcvtFpUFixedSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "VfpRegRegImmOp", + { "code": vcvtFpUFixedSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedSIop); + exec_output += PredOpExecute.subst(vcvtFpUFixedSIop); + + vcvtFpUFixedDCode = ''' + IntDoubleUnion cOp1; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = mid; + FpDestP1.uw = mid >> 32; + ''' + vcvtFpUFixedDIop = InstObjParams("vcvt", "VcvtFpUFixedD", "VfpRegRegImmOp", + { "code": vcvtFpUFixedDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedDIop); + exec_output += PredOpExecute.subst(vcvtFpUFixedDIop); + + vcvtSFixedFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest = vfpSFixedToFpS(FpOp1.sw, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "VfpRegRegImmOp", + { "code": vcvtSFixedFpSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpSIop); + exec_output += PredOpExecute.subst(vcvtSFixedFpSIop); + + vcvtSFixedFpDCode = ''' + IntDoubleUnion cDest; + uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + cDest.fp = vfpSFixedToFpD(mid, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vcvtSFixedFpDIop = InstObjParams("vcvt", "VcvtSFixedFpD", "VfpRegRegImmOp", + { "code": vcvtSFixedFpDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpDIop); + exec_output += PredOpExecute.subst(vcvtSFixedFpDIop); + + vcvtUFixedFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest = vfpUFixedToFpS(FpOp1.uw, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "VfpRegRegImmOp", + { "code": vcvtUFixedFpSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpSIop); + exec_output += PredOpExecute.subst(vcvtUFixedFpSIop); + + vcvtUFixedFpDCode = ''' + IntDoubleUnion cDest; + uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + cDest.fp = vfpUFixedToFpD(mid, false, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vcvtUFixedFpDIop = InstObjParams("vcvt", "VcvtUFixedFpD", "VfpRegRegImmOp", + { "code": vcvtUFixedFpDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpDIop); + exec_output += PredOpExecute.subst(vcvtUFixedFpDIop); + + vcvtFpSHFixedSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS", + "VfpRegRegImmOp", + { "code": vcvtFpSHFixedSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedSIop); + exec_output += PredOpExecute.subst(vcvtFpSHFixedSIop); + + vcvtFpSHFixedDCode = ''' + IntDoubleUnion cOp1; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = result; + FpDestP1.uw = result >> 32; + ''' + vcvtFpSHFixedDIop = InstObjParams("vcvt", "VcvtFpSHFixedD", + "VfpRegRegImmOp", + { "code": vcvtFpSHFixedDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedDIop); + exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop); + + vcvtFpUHFixedSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS", + "VfpRegRegImmOp", + { "code": vcvtFpUHFixedSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedSIop); + exec_output += PredOpExecute.subst(vcvtFpUHFixedSIop); + + vcvtFpUHFixedDCode = ''' + IntDoubleUnion cOp1; + cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = mid; + FpDestP1.uw = mid >> 32; + ''' + vcvtFpUHFixedDIop = InstObjParams("vcvt", "VcvtFpUHFixedD", + "VfpRegRegImmOp", + { "code": vcvtFpUHFixedDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedDIop); + exec_output += PredOpExecute.subst(vcvtFpUHFixedDIop); + + vcvtSHFixedFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest = vfpSFixedToFpS(FpOp1.sh, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS", + "VfpRegRegImmOp", + { "code": vcvtSHFixedFpSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpSIop); + exec_output += PredOpExecute.subst(vcvtSHFixedFpSIop); + + vcvtSHFixedFpDCode = ''' + IntDoubleUnion cDest; + uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + cDest.fp = vfpSFixedToFpD(mid, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vcvtSHFixedFpDIop = InstObjParams("vcvt", "VcvtSHFixedFpD", + "VfpRegRegImmOp", + { "code": vcvtSHFixedFpDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpDIop); + exec_output += PredOpExecute.subst(vcvtSHFixedFpDIop); + + vcvtUHFixedFpSCode = ''' + VfpSavedState state = prepVfpFpscr(Fpscr); + FpDest = vfpUFixedToFpS(FpOp1.uh, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + ''' + vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS", + "VfpRegRegImmOp", + { "code": vcvtUHFixedFpSCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpSIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpSIop); + exec_output += PredOpExecute.subst(vcvtUHFixedFpSIop); + + vcvtUHFixedFpDCode = ''' + IntDoubleUnion cDest; + uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); + VfpSavedState state = prepVfpFpscr(Fpscr); + cDest.fp = vfpUFixedToFpD(mid, true, imm); + Fpscr = setVfpFpscr(Fpscr, state); + FpDestP0.uw = cDest.bits; + FpDestP1.uw = cDest.bits >> 32; + ''' + vcvtUHFixedFpDIop = InstObjParams("vcvt", "VcvtUHFixedFpD", + "VfpRegRegImmOp", + { "code": vcvtUHFixedFpDCode, + "predicate_test": predicateTest }, []) + header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpDIop); + decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpDIop); + exec_output += PredOpExecute.subst(vcvtUHFixedFpDIop); +}};