ARM: Implement the VFP version of vdiv and vsqrt.
authorGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:14 +0000 (12:58 -0500)
committerGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:14 +0000 (12:58 -0500)
src/arch/arm/isa/formats/fp.isa
src/arch/arm/isa/insts/fp.isa

index b6fcf4ac7c210c511010348dd503e50b541a1a72..2cca96beabec883df92ee1550ce770367a3c1c97 100644 (file)
@@ -552,7 +552,25 @@ let {{
             }
           case 0x8:
             if ((opc3 & 0x1) == 0) {
-                return new WarnUnimplemented("vdiv", machInst);
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VdivS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VdivD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
             }
             break;
           case 0xb:
@@ -624,7 +642,21 @@ let {{
                                 (IntRegIndex)vd, (IntRegIndex)vm);
                     }
                 } else {
-                    return new WarnUnimplemented("vsqrt", machInst);
+                    uint32_t vd;
+                    uint32_t vm;
+                    if (bits(machInst, 8) == 0) {
+                        vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                        vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                        return new VsqrtS(machInst,
+                                (IntRegIndex)vd, (IntRegIndex)vm);
+                    } else {
+                        vd = (bits(machInst, 22) << 5) |
+                             (bits(machInst, 15, 12) << 1);
+                        vm = (bits(machInst, 5) << 5) |
+                             (bits(machInst, 3, 0) << 1);
+                        return new VsqrtD(machInst,
+                                (IntRegIndex)vd, (IntRegIndex)vm);
+                    }
                 }
               case 0x2:
               case 0x3:
index dd3f6598c41f746df17390dd62066e08032001b4..99efcec3276fc002abacb804df08e98242421fb6 100644 (file)
@@ -356,4 +356,59 @@ let {{
     header_output += RegRegRegOpDeclare.subst(vsubDIop);
     decoder_output += RegRegRegOpConstructor.subst(vsubDIop);
     exec_output += PredOpExecute.subst(vsubDIop);
+
+    vdivSCode = '''
+        FpDest = FpOp1 / FpOp2;
+    '''
+    vdivSIop = InstObjParams("vdivs", "VdivS", "RegRegRegOp",
+                                     { "code": vdivSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vdivSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vdivSIop);
+    exec_output += PredOpExecute.subst(vdivSIop);
+
+    vdivDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.fp = cOp1.fp / cOp2.fp;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vdivDIop = InstObjParams("vdivd", "VdivD", "RegRegRegOp",
+                                     { "code": vdivDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vdivDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vdivDIop);
+    exec_output += PredOpExecute.subst(vdivDIop);
+
+    vsqrtSCode = '''
+        FpDest = sqrtf(FpOp1);
+        if (FpOp1 < 0) {
+            FpDest = NAN;
+        }
+    '''
+    vsqrtSIop = InstObjParams("vsqrts", "VsqrtS", "RegRegOp",
+                                     { "code": vsqrtSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegOpDeclare.subst(vsqrtSIop);
+    decoder_output += RegRegOpConstructor.subst(vsqrtSIop);
+    exec_output += PredOpExecute.subst(vsqrtSIop);
+
+    vsqrtDCode = '''
+        IntDoubleUnion cOp1, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cDest.fp = sqrt(cOp1.fp);
+        if (cOp1.fp < 0) {
+            cDest.fp = NAN;
+        }
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vsqrtDIop = InstObjParams("vsqrtd", "VsqrtD", "RegRegOp",
+                                     { "code": vsqrtDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegOpDeclare.subst(vsqrtDIop);
+    decoder_output += RegRegOpConstructor.subst(vsqrtDIop);
+    exec_output += PredOpExecute.subst(vsqrtDIop);
 }};