arch-arm: implement VMINNM and VMAXNM scalar version
authorCiro Santilli <ciro.santilli@arm.com>
Tue, 30 Apr 2019 17:24:00 +0000 (18:24 +0100)
committerCiro Santilli <ciro.santilli@arm.com>
Fri, 17 May 2019 10:02:40 +0000 (10:02 +0000)
ARMv8.2 16-bit versions have not yet been implemented, but a placeholders
were created for them.

Refactor the nearby decoding tree to closely match the ARM spec A32 decode
table.

That piece of the tree can also be called from thumb which decodes it in
the same way, although the thumb decode table has a different terminology

The old code didn't match neither A32 or T32 terminologies, so it is
better to at least match one of them to help verify correctness.

Change-Id: Iabbbca2932557cf6c98ce36690c385c3ddf39ed8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18690
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/arch/arm/isa/formats/fp.isa
src/arch/arm/isa/insts/fp.isa
src/arch/arm/isa/insts/neon.isa

index e730833dbfd233a5d169060585486774b26a155e..da439acb8051f6ea7b2fc264a73cdbbb75b01766 100644 (file)
@@ -2034,6 +2034,7 @@ let {{
                               (bits(machInst, 15, 12) << 1));
         }
     }
+
     IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt)
     {
         if (!isInt and size == 3) {
@@ -2044,31 +2045,64 @@ let {{
                               (bits(machInst, 3, 0) << 1));
         }
     }
-    StaticInstPtr
-    decodeShortFpTransfer(ExtMachInst machInst)
+
+    IntRegIndex decodeFpVn(ExtMachInst machInst, uint32_t size)
     {
-        const uint32_t l = bits(machInst, 20);
-        const uint32_t c = bits(machInst, 8);
-        const uint32_t a = bits(machInst, 23, 21);
-        const uint32_t q = bits(machInst, 6, 5);
-        const uint32_t o1 = bits(machInst, 18);
-        if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
-            (machInst.thumb == 0 && machInst.condCode == 0xf)) {
-            // Determine if this is backported aarch64 FP instruction
-            const bool b31_b24 = bits(machInst, 31, 24) == 0xFE;
-            const bool b23 = bits(machInst, 23);
-            const bool b21_b19 = bits(machInst, 21, 19) == 0x7;
-            const bool b11_b9  = bits(machInst, 11, 9) == 0x5;
-            const uint32_t size = bits(machInst, 9, 8);
-            const bool op3 = bits(machInst, 6);
-            const bool b4 = bits(machInst, 4) == 0x0;
-            const uint32_t rm = bits(machInst, 17, 16);
-            IntRegIndex vd = decodeFpVd(machInst, size, false);
-            IntRegIndex vm = decodeFpVm(machInst, size, false);
-            IntRegIndex vdInt = decodeFpVd(machInst, size, true);
-            if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) {
+        if (size == 3) {
+            return (IntRegIndex)((bits(machInst, 7) << 5) |
+                            (bits(machInst, 19, 16) << 1));
+        } else {
+            return (IntRegIndex)(bits(machInst, 7) |
+                            (bits(machInst, 19, 16) << 1));
+        }
+    }
+
+    StaticInstPtr
+    decodeFloatingPointDataProcessing(ExtMachInst machInst) {
+        const uint32_t op0 = bits(machInst, 23, 20);
+        const uint32_t op1 = bits(machInst, 19, 16);
+        const uint32_t op2 = bits(machInst, 9, 8);
+        const uint32_t op3 = bits(machInst, 6);
+        const uint32_t rm = bits(machInst, 17, 16);
+        const uint32_t size = bits(machInst, 9, 8);
+        IntRegIndex vd = decodeFpVd(machInst, size, false);
+        IntRegIndex vm = decodeFpVm(machInst, size, false);
+        IntRegIndex vdInt = decodeFpVd(machInst, size, true);
+        IntRegIndex vn = decodeFpVn(machInst, size);
+        if (bits(machInst, 31, 24) == 0xFE && !bits(machInst, 4)) {
+            if (bits(op0, 3) == 0 && op2 != 0 && !op3){
+                ConditionCode cond;
+                switch(bits(machInst, 21, 20)) {
+                case 0x0: cond = COND_EQ; break;
+                case 0x1: cond = COND_VS; break;
+                case 0x2: cond = COND_GE; break;
+                case 0x3: cond = COND_GT; break;
+                }
+                if (size == 3) {
+                    return new VselD(machInst, vd, vn, vm, cond);
+                } else {
+                    return new VselS(machInst, vd, vn, vm, cond);
+                }
+            } else if (bits(op0, 3) == 1 && bits(op0, 1, 0) == 0 && op2 != 0) {
+                const bool op = bits(machInst, 6);
+                if (op) {
+                    if (size == 1) {
+                        return new FailUnimplemented("vminnm.f16", machInst);
+                    }
+                    return decodeNeonSizeSingleDouble<VminnmS, VminnmD>(
+                        size, machInst, vd, vn, vm);
+                } else {
+                    if (size == 1) {
+                        return new FailUnimplemented("vmaxnm.f16", machInst);
+                    }
+                    return decodeNeonSizeSingleDouble<VmaxnmS, VmaxnmD>(
+                        size, machInst, vd, vn, vm);
+                }
+            } else if (bits(op0, 3) && bits(op0, 1, 0) == 3 &&
+                        bits(op1, 3) && op2 != 0 && op3)
+                    {
+                const uint32_t o1 = bits(machInst, 18);
                 if (o1 == 0) {
-                    // VINT* Integer Rounding Instruction
                     if (size == 3) {
                         switch(rm) {
                             case 0x0:
@@ -2105,119 +2139,112 @@ let {{
                 } else {
                     const bool op = bits(machInst, 7);
                     switch(rm) {
-                      case 0x0:
+                    case 0x0:
                         switch(size) {
-                          case 0x0:
+                        case 0x0:
                             return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                             return new FailUnimplemented(
                                 "vcvta.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                             if (op) {
                                 return new VcvtaFpSIntS(machInst, vdInt, vm);
                             } else {
                                 return new VcvtaFpUIntS(machInst, vdInt, vm);
                             }
-                          case 0x3:
+                        case 0x3:
                             if (op) {
                                 return new VcvtaFpSIntD(machInst, vdInt, vm);
                             } else {
                                 return new VcvtaFpUIntD(machInst, vdInt, vm);
                             }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                         }
-                      case 0x1:
+                    case 0x1:
                         switch(size) {
-                          case 0x0:
+                        case 0x0:
                             return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                             return new FailUnimplemented(
                                 "vcvtn.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                             if (op) {
                                 return new VcvtnFpSIntS(machInst, vdInt, vm);
                             } else {
                                 return new VcvtnFpUIntS(machInst, vdInt, vm);
                             }
-                          case 0x3:
+                        case 0x3:
                             if (op) {
                                 return new VcvtnFpSIntD(machInst, vdInt, vm);
                             } else {
                                 return new VcvtnFpUIntD(machInst, vdInt, vm);
                             }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                         }
-                      case 0x2:
+                    case 0x2:
                         switch(size) {
-                          case 0x0:
+                        case 0x0:
                             return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                             return new FailUnimplemented(
                                 "vcvtp.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                             if (op) {
                                 return new VcvtpFpSIntS(machInst, vdInt, vm);
                             } else {
                                 return new VcvtpFpUIntS(machInst, vdInt, vm);
                             }
-                          case 0x3:
+                        case 0x3:
                             if (op) {
                                 return new VcvtpFpSIntD(machInst, vdInt, vm);
                             } else {
                                 return new VcvtpFpUIntD(machInst, vdInt, vm);
                             }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                         }
-                      case 0x3:
+                    case 0x3:
                         switch(size) {
-                          case 0x0:
+                        case 0x0:
                             return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                             return new FailUnimplemented(
                                 "vcvtm.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                             if (op) {
                                 return new VcvtmFpSIntS(machInst, vdInt, vm);
                             } else {
                                 return new VcvtmFpUIntS(machInst, vdInt, vm);
                             }
-                          case 0x3:
+                        case 0x3:
                             if (op) {
                                 return new VcvtmFpSIntD(machInst, vdInt, vm);
                             } else {
                                 return new VcvtmFpUIntD(machInst, vdInt, vm);
                             }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                         }
-                      default: return new Unknown(machInst);
+                    default: return new Unknown(machInst);
                     }
                 }
-            } else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){
-                // VSEL* floating point conditional select
-
-                ConditionCode cond;
-                switch(bits(machInst, 21, 20)) {
-                  case 0x0: cond = COND_EQ; break;
-                  case 0x1: cond = COND_VS; break;
-                  case 0x2: cond = COND_GE; break;
-                  case 0x3: cond = COND_GT; break;
-                }
-
-                if (size == 3) {
-                      const IntRegIndex vn =
-                          (IntRegIndex)((bits(machInst, 7) << 5) |
-                                       (bits(machInst, 19, 16) << 1));
-                    return new VselD(machInst, vd, vn, vm, cond);
-                } else {
-                      const IntRegIndex vn =
-                          (IntRegIndex)((bits(machInst, 19, 16) << 1) |
-                                        bits(machInst, 7));
-                      return new VselS(machInst, vd, vn, vm, cond);
-                }
             } else {
                 return new Unknown(machInst);
             }
+        } else {
+            return new Unknown(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeShortFpTransfer(ExtMachInst machInst)
+    {
+        if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
+            (machInst.thumb == 0 && machInst.condCode == 0xf)) {
+                return decodeFloatingPointDataProcessing(machInst);
         }
+        const uint32_t l = bits(machInst, 20);
+        const uint32_t c = bits(machInst, 8);
+        const uint32_t a = bits(machInst, 23, 21);
+        const uint32_t q = bits(machInst, 6, 5);
         if (l == 0 && c == 0) {
             if (a == 0) {
                 const uint32_t vn = (bits(machInst, 19, 16) << 1) |
index d8323c455f7f465dd1b9885b917bb9ca5118dffd..df4d58308bbd282c4481b434909bb8544f651bdc 100644 (file)
@@ -578,6 +578,66 @@ let {{
     buildBinFpOp("vmul", "Vmul", "FpRegRegRegOp", "SimdFloatMultOp", "fpMulS",
                  "fpMulD")
 
+    def buildBinOp(name, base, opClass, op):
+        '''
+        Create backported aarch64 instructions that use fplib.
+
+        Because they are backported, these instructions are unconditional.
+        '''
+        global header_output, decoder_output, exec_output
+        inst_datas = [
+            (
+                "s",
+                '''
+                FpDest_uw = fplib%(op)s<>(FpOp1_uw, FpOp2_uw, fpscr);
+                '''
+            ),
+            (
+                "d",
+                '''
+                uint64_t op1 = ((uint64_t)FpOp1P0_uw |
+                               ((uint64_t)FpOp1P1_uw << 32));
+                uint64_t op2 = ((uint64_t)FpOp2P0_uw |
+                               ((uint64_t)FpOp2P1_uw << 32));
+                uint64_t dest = fplib%(op)s<>(op1, op2, fpscr);
+                FpDestP0_uw = dest;
+                FpDestP1_uw = dest >> 32;
+                '''
+            )
+        ]
+        Name = name[0].upper() + name[1:]
+        declareTempl = eval(base + "Declare");
+        constructorTempl = eval(base + "Constructor");
+        for size_suffix, code in inst_datas:
+            code = (
+                '''
+                FPSCR fpscr = (FPSCR)FpscrExc;
+                ''' +
+                code +
+                '''
+                FpscrExc = fpscr;
+                '''
+            )
+            iop = InstObjParams(
+                name + size_suffix,
+                Name + size_suffix.upper(),
+                base,
+                {
+                    "code": code % {"op": op},
+                    "op_class": opClass
+                },
+                []
+            )
+            header_output += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output += BasicExecute.subst(iop)
+    ops = [
+        ("vminnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MinNum"),
+        ("vmaxnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MaxNum"),
+    ]
+    for op in ops:
+        buildBinOp(*op)
+
     def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
         if doubleOp is None:
             doubleOp = singleOp
index bfebd103de10477161aee8c7cb812662895f74e6..f242451b2b5225eb94a80f4faf34a3cc09875105 100644 (file)
@@ -58,6 +58,22 @@ output header {{
         }
     }
 
+    template <class BaseS, class BaseD>
+    StaticInstPtr
+    decodeNeonSizeSingleDouble(unsigned size,
+                         ExtMachInst machInst, IntRegIndex dest,
+                         IntRegIndex op1, IntRegIndex op2)
+    {
+        switch (size) {
+          case 2:
+            return new BaseS(machInst, dest, op1, op2);
+          case 3:
+            return new BaseD(machInst, dest, op1, op2);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonSThreeUReg(unsigned size,