arch-arm: implement VMINNM and VMAXNM scalar version

author Ciro Santilli <ciro.santilli@arm.com>

Tue, 30 Apr 2019 17:24:00 +0000 (18:24 +0100)

committer Ciro Santilli <ciro.santilli@arm.com>

Fri, 17 May 2019 10:02:40 +0000 (10:02 +0000)
author Ciro Santilli <ciro.santilli@arm.com>
Tue, 30 Apr 2019 17:24:00 +0000 (18:24 +0100)
committer Ciro Santilli <ciro.santilli@arm.com>
Fri, 17 May 2019 10:02:40 +0000 (10:02 +0000)
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa

index e730833dbfd233a5d169060585486774b26a155e..da439acb8051f6ea7b2fc264a73cdbbb75b01766 100644 (file)
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -2034,6 +2034,7 @@ let {{
                                (bits(machInst, 15, 12) << 1));
          }
      }
+
      IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt)
      {
          if (!isInt and size == 3) {
@@ -2044,31 +2045,64 @@ let {{
                                (bits(machInst, 3, 0) << 1));
          }
      }
-    StaticInstPtr
-    decodeShortFpTransfer(ExtMachInst machInst)
+
+    IntRegIndex decodeFpVn(ExtMachInst machInst, uint32_t size)
      {
-        const uint32_t l = bits(machInst, 20);
-        const uint32_t c = bits(machInst, 8);
-        const uint32_t a = bits(machInst, 23, 21);
-        const uint32_t q = bits(machInst, 6, 5);
-        const uint32_t o1 = bits(machInst, 18);
-        if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
-            (machInst.thumb == 0 && machInst.condCode == 0xf)) {
-            // Determine if this is backported aarch64 FP instruction
-            const bool b31_b24 = bits(machInst, 31, 24) == 0xFE;
-            const bool b23 = bits(machInst, 23);
-            const bool b21_b19 = bits(machInst, 21, 19) == 0x7;
-            const bool b11_b9  = bits(machInst, 11, 9) == 0x5;
-            const uint32_t size = bits(machInst, 9, 8);
-            const bool op3 = bits(machInst, 6);
-            const bool b4 = bits(machInst, 4) == 0x0;
-            const uint32_t rm = bits(machInst, 17, 16);
-            IntRegIndex vd = decodeFpVd(machInst, size, false);
-            IntRegIndex vm = decodeFpVm(machInst, size, false);
-            IntRegIndex vdInt = decodeFpVd(machInst, size, true);
-            if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) {
+        if (size == 3) {
+            return (IntRegIndex)((bits(machInst, 7) << 5) |
+                            (bits(machInst, 19, 16) << 1));
+        } else {
+            return (IntRegIndex)(bits(machInst, 7) |
+                            (bits(machInst, 19, 16) << 1));
+        }
+    }
+
+    StaticInstPtr
+    decodeFloatingPointDataProcessing(ExtMachInst machInst) {
+        const uint32_t op0 = bits(machInst, 23, 20);
+        const uint32_t op1 = bits(machInst, 19, 16);
+        const uint32_t op2 = bits(machInst, 9, 8);
+        const uint32_t op3 = bits(machInst, 6);
+        const uint32_t rm = bits(machInst, 17, 16);
+        const uint32_t size = bits(machInst, 9, 8);
+        IntRegIndex vd = decodeFpVd(machInst, size, false);
+        IntRegIndex vm = decodeFpVm(machInst, size, false);
+        IntRegIndex vdInt = decodeFpVd(machInst, size, true);
+        IntRegIndex vn = decodeFpVn(machInst, size);
+        if (bits(machInst, 31, 24) == 0xFE && !bits(machInst, 4)) {
+            if (bits(op0, 3) == 0 && op2 != 0 && !op3){
+                ConditionCode cond;
+                switch(bits(machInst, 21, 20)) {
+                case 0x0: cond = COND_EQ; break;
+                case 0x1: cond = COND_VS; break;
+                case 0x2: cond = COND_GE; break;
+                case 0x3: cond = COND_GT; break;
+                }
+                if (size == 3) {
+                    return new VselD(machInst, vd, vn, vm, cond);
+                } else {
+                    return new VselS(machInst, vd, vn, vm, cond);
+                }
+            } else if (bits(op0, 3) == 1 && bits(op0, 1, 0) == 0 && op2 != 0) {
+                const bool op = bits(machInst, 6);
+                if (op) {
+                    if (size == 1) {
+                        return new FailUnimplemented("vminnm.f16", machInst);
+                    }
+                    return decodeNeonSizeSingleDouble<VminnmS, VminnmD>(
+                        size, machInst, vd, vn, vm);
+                } else {
+                    if (size == 1) {
+                        return new FailUnimplemented("vmaxnm.f16", machInst);
+                    }
+                    return decodeNeonSizeSingleDouble<VmaxnmS, VmaxnmD>(
+                        size, machInst, vd, vn, vm);
+                }
+            } else if (bits(op0, 3) && bits(op0, 1, 0) == 3 &&
+                        bits(op1, 3) && op2 != 0 && op3)
+                    {
+                const uint32_t o1 = bits(machInst, 18);
                  if (o1 == 0) {
-                    // VINT* Integer Rounding Instruction
                      if (size == 3) {
                          switch(rm) {
                              case 0x0:
@@ -2105,119 +2139,112 @@ let {{
                  } else {
                      const bool op = bits(machInst, 7);
                      switch(rm) {
-                      case 0x0:
+                    case 0x0:
                          switch(size) {
-                          case 0x0:
+                        case 0x0:
                              return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                              return new FailUnimplemented(
                                  "vcvta.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                              if (op) {
                                  return new VcvtaFpSIntS(machInst, vdInt, vm);
                              } else {
                                  return new VcvtaFpUIntS(machInst, vdInt, vm);
                              }
-                          case 0x3:
+                        case 0x3:
                              if (op) {
                                  return new VcvtaFpSIntD(machInst, vdInt, vm);
                              } else {
                                  return new VcvtaFpUIntD(machInst, vdInt, vm);
                              }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                          }
-                      case 0x1:
+                    case 0x1:
                          switch(size) {
-                          case 0x0:
+                        case 0x0:
                              return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                              return new FailUnimplemented(
                                  "vcvtn.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                              if (op) {
                                  return new VcvtnFpSIntS(machInst, vdInt, vm);
                              } else {
                                  return new VcvtnFpUIntS(machInst, vdInt, vm);
                              }
-                          case 0x3:
+                        case 0x3:
                              if (op) {
                                  return new VcvtnFpSIntD(machInst, vdInt, vm);
                              } else {
                                  return new VcvtnFpUIntD(machInst, vdInt, vm);
                              }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                          }
-                      case 0x2:
+                    case 0x2:
                          switch(size) {
-                          case 0x0:
+                        case 0x0:
                              return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                              return new FailUnimplemented(
                                  "vcvtp.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                              if (op) {
                                  return new VcvtpFpSIntS(machInst, vdInt, vm);
                              } else {
                                  return new VcvtpFpUIntS(machInst, vdInt, vm);
                              }
-                          case 0x3:
+                        case 0x3:
                              if (op) {
                                  return new VcvtpFpSIntD(machInst, vdInt, vm);
                              } else {
                                  return new VcvtpFpUIntD(machInst, vdInt, vm);
                              }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                          }
-                      case 0x3:
+                    case 0x3:
                          switch(size) {
-                          case 0x0:
+                        case 0x0:
                              return new Unknown(machInst);
-                          case 0x1:
+                        case 0x1:
                              return new FailUnimplemented(
                                  "vcvtm.u32.f16", machInst);
-                          case 0x2:
+                        case 0x2:
                              if (op) {
                                  return new VcvtmFpSIntS(machInst, vdInt, vm);
                              } else {
                                  return new VcvtmFpUIntS(machInst, vdInt, vm);
                              }
-                          case 0x3:
+                        case 0x3:
                              if (op) {
                                  return new VcvtmFpSIntD(machInst, vdInt, vm);
                              } else {
                                  return new VcvtmFpUIntD(machInst, vdInt, vm);
                              }
-                          default: return new Unknown(machInst);
+                        default: return new Unknown(machInst);
                          }
-                      default: return new Unknown(machInst);
+                    default: return new Unknown(machInst);
                      }
                  }
-            } else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){
-                // VSEL* floating point conditional select
-
-                ConditionCode cond;
-                switch(bits(machInst, 21, 20)) {
-                  case 0x0: cond = COND_EQ; break;
-                  case 0x1: cond = COND_VS; break;
-                  case 0x2: cond = COND_GE; break;
-                  case 0x3: cond = COND_GT; break;
-                }
-
-                if (size == 3) {
-                      const IntRegIndex vn =
-                          (IntRegIndex)((bits(machInst, 7) << 5) |
-                                       (bits(machInst, 19, 16) << 1));
-                    return new VselD(machInst, vd, vn, vm, cond);
-                } else {
-                      const IntRegIndex vn =
-                          (IntRegIndex)((bits(machInst, 19, 16) << 1) |
-                                        bits(machInst, 7));
-                      return new VselS(machInst, vd, vn, vm, cond);
-                }
              } else {
                  return new Unknown(machInst);
              }
+        } else {
+            return new Unknown(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeShortFpTransfer(ExtMachInst machInst)
+    {
+        if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
+            (machInst.thumb == 0 && machInst.condCode == 0xf)) {
+                return decodeFloatingPointDataProcessing(machInst);
          }
+        const uint32_t l = bits(machInst, 20);
+        const uint32_t c = bits(machInst, 8);
+        const uint32_t a = bits(machInst, 23, 21);
+        const uint32_t q = bits(machInst, 6, 5);
          if (l == 0 && c == 0) {
              if (a == 0) {
                  const uint32_t vn = (bits(machInst, 19, 16) << 1) |
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa

index d8323c455f7f465dd1b9885b917bb9ca5118dffd..df4d58308bbd282c4481b434909bb8544f651bdc 100644 (file)
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -578,6 +578,66 @@ let {{
      buildBinFpOp("vmul", "Vmul", "FpRegRegRegOp", "SimdFloatMultOp", "fpMulS",
                   "fpMulD")
  
+    def buildBinOp(name, base, opClass, op):
+        '''
+        Create backported aarch64 instructions that use fplib.
+
+        Because they are backported, these instructions are unconditional.
+        '''
+        global header_output, decoder_output, exec_output
+        inst_datas = [
+            (
+                "s",
+                '''
+                FpDest_uw = fplib%(op)s<>(FpOp1_uw, FpOp2_uw, fpscr);
+                '''
+            ),
+            (
+                "d",
+                '''
+                uint64_t op1 = ((uint64_t)FpOp1P0_uw |
+                               ((uint64_t)FpOp1P1_uw << 32));
+                uint64_t op2 = ((uint64_t)FpOp2P0_uw |
+                               ((uint64_t)FpOp2P1_uw << 32));
+                uint64_t dest = fplib%(op)s<>(op1, op2, fpscr);
+                FpDestP0_uw = dest;
+                FpDestP1_uw = dest >> 32;
+                '''
+            )
+        ]
+        Name = name[0].upper() + name[1:]
+        declareTempl = eval(base + "Declare");
+        constructorTempl = eval(base + "Constructor");
+        for size_suffix, code in inst_datas:
+            code = (
+                '''
+                FPSCR fpscr = (FPSCR)FpscrExc;
+                ''' +
+                code +
+                '''
+                FpscrExc = fpscr;
+                '''
+            )
+            iop = InstObjParams(
+                name + size_suffix,
+                Name + size_suffix.upper(),
+                base,
+                {
+                    "code": code % {"op": op},
+                    "op_class": opClass
+                },
+                []
+            )
+            header_output += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output += BasicExecute.subst(iop)
+    ops = [
+        ("vminnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MinNum"),
+        ("vmaxnm", "FpRegRegRegOp", "SimdFloatCmpOp", "MaxNum"),
+    ]
+    for op in ops:
+        buildBinOp(*op)
+
      def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
          if doubleOp is None:
              doubleOp = singleOp
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa

index bfebd103de10477161aee8c7cb812662895f74e6..f242451b2b5225eb94a80f4faf34a3cc09875105 100644 (file)
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -58,6 +58,22 @@ output header {{
          }
      }
  
+    template <class BaseS, class BaseD>
+    StaticInstPtr
+    decodeNeonSizeSingleDouble(unsigned size,
+                         ExtMachInst machInst, IntRegIndex dest,
+                         IntRegIndex op1, IntRegIndex op2)
+    {
+        switch (size) {
+          case 2:
+            return new BaseS(machInst, dest, op1, op2);
+          case 3:
+            return new BaseD(machInst, dest, op1, op2);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
      template <template <typename T> class Base>
      StaticInstPtr
      decodeNeonSThreeUReg(unsigned size,
author	Ciro Santilli <ciro.santilli@arm.com>
	Tue, 30 Apr 2019 17:24:00 +0000 (18:24 +0100)
committer	Ciro Santilli <ciro.santilli@arm.com>
	Fri, 17 May 2019 10:02:40 +0000 (10:02 +0000)
src/arch/arm/isa/formats/fp.isa		patch \| blob \| history
src/arch/arm/isa/insts/fp.isa		patch \| blob \| history
src/arch/arm/isa/insts/neon.isa		patch \| blob \| history