ARM: Implement the VFP negated multiplies.
authorGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:14 +0000 (12:58 -0500)
committerGabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:14 +0000 (12:58 -0500)
src/arch/arm/isa/formats/fp.isa
src/arch/arm/isa/insts/fp.isa

index 9bb062a2ed7cdde40eb5aef2a9b3aea496291e45..850f761d764674783663d64deeedf3a464e17066 100644 (file)
@@ -524,6 +524,48 @@ let {{
                             (IntRegIndex)vn, (IntRegIndex)vm);
                 }
             }
+          case 0x1:
+            if (bits(machInst, 6) == 1) {
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VnmlaS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VnmlaD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
+            } else {
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VnmlsS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VnmlsD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
+            }
           case 0x2:
             if ((opc3 & 0x1) == 0) {
                 uint32_t vd;
@@ -545,9 +587,27 @@ let {{
                     return new VmulD(machInst, (IntRegIndex)vd,
                             (IntRegIndex)vn, (IntRegIndex)vm);
                 }
+            } else {
+                uint32_t vd;
+                uint32_t vm;
+                uint32_t vn;
+                if (bits(machInst, 8) == 0) {
+                    vd = bits(machInst, 22) | (bits(machInst, 15, 12) << 1);
+                    vm = bits(machInst, 5) | (bits(machInst, 3, 0) << 1);
+                    vn = bits(machInst, 7) | (bits(machInst, 19, 16) << 1);
+                    return new VnmulS(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                } else {
+                    vd = (bits(machInst, 22) << 5) |
+                         (bits(machInst, 15, 12) << 1);
+                    vm = (bits(machInst, 5) << 5) |
+                         (bits(machInst, 3, 0) << 1);
+                    vn = (bits(machInst, 7) << 5) |
+                         (bits(machInst, 19, 16) << 1);
+                    return new VnmulD(machInst, (IntRegIndex)vd,
+                            (IntRegIndex)vn, (IntRegIndex)vm);
+                }
             }
-          case 0x1:
-            return new WarnUnimplemented("vnmla, vnmls, vnmul", machInst);
           case 0x3:
             if ((opc3 & 0x1) == 0) {
                 uint32_t vd;
index 58c2cafa7861fe5ee6557fcb0d9f26d006c3d177..d40b001769775faf1fb5520b8e0e1189f93fd3a2 100644 (file)
@@ -481,4 +481,109 @@ let {{
     header_output += RegRegRegOpDeclare.subst(vmlsDIop);
     decoder_output += RegRegRegOpConstructor.subst(vmlsDIop);
     exec_output += PredOpExecute.subst(vmlsDIop);
+
+    vnmlaSCode = '''
+        float mid = FpOp1 * FpOp2;
+        if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+            mid = NAN;
+        }
+        FpDest = -FpDest - mid;
+    '''
+    vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "RegRegRegOp",
+                                     { "code": vnmlaSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmlaSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmlaSIop);
+    exec_output += PredOpExecute.subst(vnmlaSIop);
+
+    vnmlaDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        double mid = cOp1.fp * cOp2.fp;
+        if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+                (isinf(cOp2.fp) && cOp1.fp == 0)) {
+            mid = NAN;
+        }
+        cDest.fp = -cDest.fp - mid;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vnmlaDIop = InstObjParams("vnmlad", "VnmlaD", "RegRegRegOp",
+                                     { "code": vnmlaDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmlaDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmlaDIop);
+    exec_output += PredOpExecute.subst(vnmlaDIop);
+
+    vnmlsSCode = '''
+        float mid = FpOp1 * FpOp2;
+        if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+            mid = NAN;
+        }
+        FpDest = -FpDest + mid;
+    '''
+    vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "RegRegRegOp",
+                                     { "code": vnmlsSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmlsSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmlsSIop);
+    exec_output += PredOpExecute.subst(vnmlsSIop);
+
+    vnmlsDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        double mid = cOp1.fp * cOp2.fp;
+        if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+                (isinf(cOp2.fp) && cOp1.fp == 0)) {
+            mid = NAN;
+        }
+        cDest.fp = -cDest.fp + mid;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vnmlsDIop = InstObjParams("vnmlsd", "VnmlsD", "RegRegRegOp",
+                                     { "code": vnmlsDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmlsDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmlsDIop);
+    exec_output += PredOpExecute.subst(vnmlsDIop);
+
+    vnmulSCode = '''
+        float mid = FpOp1 * FpOp2;
+        if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
+            mid = NAN;
+        }
+        FpDest = -mid;
+    '''
+    vnmulSIop = InstObjParams("vnmuls", "VnmulS", "RegRegRegOp",
+                                     { "code": vnmulSCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmulSIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmulSIop);
+    exec_output += PredOpExecute.subst(vnmulSIop);
+
+    vnmulDCode = '''
+        IntDoubleUnion cOp1, cOp2, cDest;
+        cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        double mid = cOp1.fp * cOp2.fp;
+        if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
+                (isinf(cOp2.fp) && cOp1.fp == 0)) {
+            mid = NAN;
+        }
+        cDest.fp = -mid;
+        FpDestP0.uw = cDest.bits;
+        FpDestP1.uw = cDest.bits >> 32;
+    '''
+    vnmulDIop = InstObjParams("vnmuld", "VnmulD", "RegRegRegOp",
+                                     { "code": vnmulDCode,
+                                       "predicate_test": predicateTest }, [])
+    header_output += RegRegRegOpDeclare.subst(vnmulDIop);
+    decoder_output += RegRegRegOpConstructor.subst(vnmulDIop);
+    exec_output += PredOpExecute.subst(vnmulDIop);
 }};