ARM: Implement the floating/fixed point VCVT instructions.
authorGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
committerGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
src/arch/arm/insts/vfp.hh
src/arch/arm/isa/formats/fp.isa
src/arch/arm/isa/insts/fp.isa

index 4653843042af15f48531e9069b3dcefd9c35d058..ceeaaa3cdbe88aa8104de4b4e34122437bc8152c 100644 (file)
@@ -101,6 +101,150 @@ enum VfpRoundingMode
     VfpRoundZero = 3
 };
 
+static inline uint64_t
+vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
+{
+    fesetround(FeRoundZero);
+    val = val * powf(2.0, imm);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    feclearexcept(FeAllExceptions);
+    if (isSigned) {
+        if (half) {
+            if (val < (int16_t)(1 << 15)) {
+                feraiseexcept(FeInvalid);
+                return (int16_t)(1 << 15);
+            }
+            if (val > (int16_t)mask(15)) {
+                feraiseexcept(FeInvalid);
+                return (int16_t)mask(15);
+            }
+            return (int16_t)val;
+        } else {
+            if (val < (int32_t)(1 << 31)) {
+                feraiseexcept(FeInvalid);
+                return (int32_t)(1 << 31);
+            }
+            if (val > (int32_t)mask(31)) {
+                feraiseexcept(FeInvalid);
+                return (int32_t)mask(31);
+            }
+            return (int32_t)val;
+        }
+    } else {
+        if (half) {
+            if (val < 0) {
+                feraiseexcept(FeInvalid);
+                return 0;
+            }
+            if (val > (mask(16))) {
+                feraiseexcept(FeInvalid);
+                return mask(16);
+            }
+            return (uint16_t)val;
+        } else {
+            if (val < 0) {
+                feraiseexcept(FeInvalid);
+                return 0;
+            }
+            if (val > (mask(32))) {
+                feraiseexcept(FeInvalid);
+                return mask(32);
+            }
+            return (uint32_t)val;
+        }
+    }
+}
+
+static inline float
+vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
+{
+    fesetround(FeRoundNearest);
+    if (half)
+        val = (uint16_t)val;
+    return val / powf(2.0, imm);
+}
+
+static inline float
+vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
+{
+    fesetround(FeRoundNearest);
+    if (half)
+        val = sext<16>(val & mask(16));
+    return val / powf(2.0, imm);
+}
+
+static inline uint64_t
+vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
+{
+    fesetround(FeRoundZero);
+    val = val * pow(2.0, imm);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    feclearexcept(FeAllExceptions);
+    if (isSigned) {
+        if (half) {
+            if (val < (int16_t)(1 << 15)) {
+                feraiseexcept(FeInvalid);
+                return (int16_t)(1 << 15);
+            }
+            if (val > (int16_t)mask(15)) {
+                feraiseexcept(FeInvalid);
+                return (int16_t)mask(15);
+            }
+            return (int16_t)val;
+        } else {
+            if (val < (int32_t)(1 << 31)) {
+                feraiseexcept(FeInvalid);
+                return (int32_t)(1 << 31);
+            }
+            if (val > (int32_t)mask(31)) {
+                feraiseexcept(FeInvalid);
+                return (int32_t)mask(31);
+            }
+            return (int32_t)val;
+        }
+    } else {
+        if (half) {
+            if (val < 0) {
+                feraiseexcept(FeInvalid);
+                return 0;
+            }
+            if (val > mask(16)) {
+                feraiseexcept(FeInvalid);
+                return mask(16);
+            }
+            return (uint16_t)val;
+        } else {
+            if (val < 0) {
+                feraiseexcept(FeInvalid);
+                return 0;
+            }
+            if (val > mask(32)) {
+                feraiseexcept(FeInvalid);
+                return mask(32);
+            }
+            return (uint32_t)val;
+        }
+    }
+}
+
+static inline double
+vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
+{
+    fesetround(FeRoundNearest);
+    if (half)
+        val = (uint16_t)val;
+    return val / pow(2.0, imm);
+}
+
+static inline double
+vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
+{
+    fesetround(FeRoundNearest);
+    if (half)
+        val = sext<16>(val & mask(16));
+    return val / pow(2.0, imm);
+}
+
 typedef int VfpSavedState;
 
 static inline VfpSavedState
index e553b180dc479486c3447f4d593ced63942f5b92..3d40caf9e88d47c19f36253b518c3f90ed17de14 100644 (file)
@@ -683,9 +683,47 @@ let {{
                     }
                 }
               case 0xa:
+                {
+                    const bool half = (bits(machInst, 7) == 0);
+                    const uint32_t imm = bits(machInst, 5) |
+                                         (bits(machInst, 3, 0) << 1);
+                    const uint32_t size =
+                        (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+                    if (single) {
+                        if (half) {
+                            return new VcvtSHFixedFpS(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtSFixedFpS(machInst, vd, vd, size);
+                        }
+                    } else {
+                        if (half) {
+                            return new VcvtSHFixedFpD(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtSFixedFpD(machInst, vd, vd, size);
+                        }
+                    }
+                }
               case 0xb:
-                // Between FP and fixed point.
-                return new WarnUnimplemented("vcvt", machInst);
+                {
+                    const bool half = (bits(machInst, 7) == 0);
+                    const uint32_t imm = bits(machInst, 5) |
+                                         (bits(machInst, 3, 0) << 1);
+                    const uint32_t size =
+                        (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+                    if (single) {
+                        if (half) {
+                            return new VcvtUHFixedFpS(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtUFixedFpS(machInst, vd, vd, size);
+                        }
+                    } else {
+                        if (half) {
+                            return new VcvtUHFixedFpD(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtUFixedFpD(machInst, vd, vd, size);
+                        }
+                    }
+                }
               case 0xc:
                 if (single) {
                     return new VcvtFpUIntS(machInst, vd, vm);
@@ -703,9 +741,47 @@ let {{
                     return new VcvtFpSIntD(machInst, vd, vm);
                 }
               case 0xe:
+                {
+                    const bool half = (bits(machInst, 7) == 0);
+                    const uint32_t imm = bits(machInst, 5) |
+                                         (bits(machInst, 3, 0) << 1);
+                    const uint32_t size =
+                        (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+                    if (single) {
+                        if (half) {
+                            return new VcvtFpSHFixedS(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtFpSFixedS(machInst, vd, vd, size);
+                        }
+                    } else {
+                        if (half) {
+                            return new VcvtFpSHFixedD(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtFpSFixedD(machInst, vd, vd, size);
+                        }
+                    }
+                }
               case 0xf:
-                // Between FP and fixed point.
-                return new WarnUnimplemented("vcvt", machInst);
+                {
+                    const bool half = (bits(machInst, 7) == 0);
+                    const uint32_t imm = bits(machInst, 5) |
+                                         (bits(machInst, 3, 0) << 1);
+                    const uint32_t size =
+                        (bits(machInst, 7) == 0 ? 16 : 32) - imm;
+                    if (single) {
+                        if (half) {
+                            return new VcvtFpUHFixedS(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtFpUFixedS(machInst, vd, vd, size);
+                        }
+                    } else {
+                        if (half) {
+                            return new VcvtFpUHFixedD(machInst, vd, vd, size);
+                        } else {
+                            return new VcvtFpUFixedD(machInst, vd, vd, size);
+                        }
+                    }
+                }
             }
             break;
         }
index 1a8f25c5a937ff00c175d2223a67aae385e1f403..db1c5bf6b848c172548c5aa5e112aefbafdc087f 100644 (file)
@@ -997,3 +997,242 @@ let {{
     decoder_output += VfpRegImmOpConstructor.subst(vcmpZeroDIop);
     exec_output += PredOpExecute.subst(vcmpZeroDIop);
 }};
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    vcvtFpSFixedSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "VfpRegRegImmOp",
+                                     { "code": vcvtFpSFixedSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedSIop);
+    exec_output += PredOpExecute.subst(vcvtFpSFixedSIop);
+
+    vcvtFpSFixedDCode = '''
+        IntDoubleUnion cOp1;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = mid;
+        FpDestP1.uw = mid >> 32;
+    '''
+    vcvtFpSFixedDIop = InstObjParams("vcvt", "VcvtFpSFixedD", "VfpRegRegImmOp",
+                                     { "code": vcvtFpSFixedDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSFixedDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSFixedDIop);
+    exec_output += PredOpExecute.subst(vcvtFpSFixedDIop);
+
+    vcvtFpUFixedSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "VfpRegRegImmOp",
+                                     { "code": vcvtFpUFixedSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedSIop);
+    exec_output += PredOpExecute.subst(vcvtFpUFixedSIop);
+
+    vcvtFpUFixedDCode = '''
+        IntDoubleUnion cOp1;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = mid;
+        FpDestP1.uw = mid >> 32;
+    '''
+    vcvtFpUFixedDIop = InstObjParams("vcvt", "VcvtFpUFixedD", "VfpRegRegImmOp",
+                                     { "code": vcvtFpUFixedDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUFixedDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUFixedDIop);
+    exec_output += PredOpExecute.subst(vcvtFpUFixedDIop);
+
+    vcvtSFixedFpSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest = vfpSFixedToFpS(FpOp1.sw, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "VfpRegRegImmOp",
+                                     { "code": vcvtSFixedFpSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpSIop);
+    exec_output += PredOpExecute.subst(vcvtSFixedFpSIop);
+
+    vcvtSFixedFpDCode = '''
+        IntDoubleUnion cDest;
+        uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        cDest.fp = vfpSFixedToFpD(mid, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vcvtSFixedFpDIop = InstObjParams("vcvt", "VcvtSFixedFpD", "VfpRegRegImmOp",
+                                     { "code": vcvtSFixedFpDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtSFixedFpDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSFixedFpDIop);
+    exec_output += PredOpExecute.subst(vcvtSFixedFpDIop);
+
+    vcvtUFixedFpSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest = vfpUFixedToFpS(FpOp1.uw, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "VfpRegRegImmOp",
+                                     { "code": vcvtUFixedFpSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpSIop);
+    exec_output += PredOpExecute.subst(vcvtUFixedFpSIop);
+
+    vcvtUFixedFpDCode = '''
+        IntDoubleUnion cDest;
+        uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        cDest.fp = vfpUFixedToFpD(mid, false, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vcvtUFixedFpDIop = InstObjParams("vcvt", "VcvtUFixedFpD", "VfpRegRegImmOp",
+                                     { "code": vcvtUFixedFpDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtUFixedFpDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUFixedFpDIop);
+    exec_output += PredOpExecute.subst(vcvtUFixedFpDIop);
+
+    vcvtFpSHFixedSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtFpSHFixedSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedSIop);
+    exec_output += PredOpExecute.subst(vcvtFpSHFixedSIop);
+
+    vcvtFpSHFixedDCode = '''
+        IntDoubleUnion cOp1;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = result;
+        FpDestP1.uw = result >> 32;
+    '''
+    vcvtFpSHFixedDIop = InstObjParams("vcvt", "VcvtFpSHFixedD",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtFpSHFixedDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpSHFixedDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpSHFixedDIop);
+    exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop);
+
+    vcvtFpUHFixedSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtFpUHFixedSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedSIop);
+    exec_output += PredOpExecute.subst(vcvtFpUHFixedSIop);
+
+    vcvtFpUHFixedDCode = '''
+        IntDoubleUnion cOp1;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = mid;
+        FpDestP1.uw = mid >> 32;
+    '''
+    vcvtFpUHFixedDIop = InstObjParams("vcvt", "VcvtFpUHFixedD",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtFpUHFixedDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtFpUHFixedDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtFpUHFixedDIop);
+    exec_output += PredOpExecute.subst(vcvtFpUHFixedDIop);
+
+    vcvtSHFixedFpSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest = vfpSFixedToFpS(FpOp1.sh, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtSHFixedFpSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpSIop);
+    exec_output += PredOpExecute.subst(vcvtSHFixedFpSIop);
+
+    vcvtSHFixedFpDCode = '''
+        IntDoubleUnion cDest;
+        uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        cDest.fp = vfpSFixedToFpD(mid, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vcvtSHFixedFpDIop = InstObjParams("vcvt", "VcvtSHFixedFpD",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtSHFixedFpDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtSHFixedFpDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtSHFixedFpDIop);
+    exec_output += PredOpExecute.subst(vcvtSHFixedFpDIop);
+
+    vcvtUHFixedFpSCode = '''
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        FpDest = vfpUFixedToFpS(FpOp1.uh, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+    '''
+    vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtUHFixedFpSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpSIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpSIop);
+    exec_output += PredOpExecute.subst(vcvtUHFixedFpSIop);
+
+    vcvtUHFixedFpDCode = '''
+        IntDoubleUnion cDest;
+        uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        VfpSavedState state = prepVfpFpscr(Fpscr);
+        cDest.fp = vfpUFixedToFpD(mid, true, imm);
+        Fpscr = setVfpFpscr(Fpscr, state);
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vcvtUHFixedFpDIop = InstObjParams("vcvt", "VcvtUHFixedFpD",
+                                      "VfpRegRegImmOp",
+                                     { "code": vcvtUHFixedFpDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += VfpRegRegImmOpDeclare.subst(vcvtUHFixedFpDIop);
+    decoder_output += VfpRegRegImmOpConstructor.subst(vcvtUHFixedFpDIop);
+    exec_output += PredOpExecute.subst(vcvtUHFixedFpDIop);
+}};