arch-arm: Implementation ARMv8.1 RDMA
authorJordi Vaquero <jordi.vaquero@metempsy.com>
Mon, 14 Sep 2020 16:08:38 +0000 (18:08 +0200)
committerJordi Vaquero <jordi.vaquero@metempsy.com>
Tue, 17 Nov 2020 11:45:35 +0000 (11:45 +0000)
Adding RDMA implementation for ARMv8.1
    + isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
    + isa/insts/neon.isa\neon64.isa: Adding function instructions

Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/arch/arm/ArmISA.py
src/arch/arm/isa/formats/aarch64.isa
src/arch/arm/isa/formats/fp.isa
src/arch/arm/isa/formats/neon64.isa
src/arch/arm/isa/insts/neon.isa
src/arch/arm/isa/insts/neon64.isa

index 02f24d3f3e7593b839cf1def9f7cc29007ecdecb..3e1866538fb4010760843bc7ef7881170f8dadaf 100644 (file)
@@ -80,7 +80,7 @@ class ArmISA(BaseISA):
     id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute Register 2")
     id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute Register 3")
     id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4")
-    id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute Register 5")
+    id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute Register 5")
 
     fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")
 
@@ -98,8 +98,8 @@ class ArmISA(BaseISA):
     id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
         "AArch64 Debug Feature Register 1")
 
-    # !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES
-    id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
+    # !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES
+    id_aa64isar0_el1 = Param.UInt64(0x0000000010000000,
         "AArch64 Instruction Set Attribute Register 0")
 
     # GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1
index f1a0cdba5b5956f51f289cc2f3843669a05d1ae0..9a487ea8137be6775de7571dca7e7e34e9c66fe1 100644 (file)
@@ -2975,6 +2975,8 @@ namespace Aarch64
             } else {
                 return new Unknown64(machInst);
             }
+        } else if (bits(machInst, 15) && bits(machInst, 10) == 1) {
+            return decodeNeonSc3SameExtra(machInst);
         } else if (bits(machInst, 23, 22) == 0 &&
                    bits(machInst, 15) == 0) {
             if (bits(machInst, 10) == 1) {
index f1b387e2fef65fa98458d36607fad296de7c042d..5e7880eb1c5f1304d8363101a7394b3518c57b49 100644 (file)
@@ -652,7 +652,10 @@ let {{
             }
           case 0xb:
             if (o1) {
-                if (u || q) {
+                if (u) {
+                    return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
+                            q, size, machInst, vd, vn, vm);
+                } else if (q) {
                     return new Unknown(machInst);
                 } else {
                     return decodeNeonUThreeUSReg<NVpaddD>(
@@ -669,7 +672,10 @@ let {{
             }
           case 0xc:
             if (o1) {
-                if (!u) {
+                if (u) {
+                    return decodeNeonSThreeSReg<VqrdmlshD, VqrdmlshQ>(
+                            q, size, machInst, vd, vn, vm);
+                } else {
                     if (bits(size, 1) == 0) {
                         if (q) {
                             return new NVfmaQFp<float>(machInst, vd, vn, vm);
@@ -1504,6 +1510,54 @@ let {{
                     return new Unknown(machInst);
                 }
             }
+          case 0xe:
+            if (u) {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsQ<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsQ<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            } else {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsD<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsD<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            }
+          case 0xf:
+            if (u) {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlshsQ<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlshsQ<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            } else {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlshsD<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlshsD<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            }
         }
         return new Unknown(machInst);
     }
index 6c2b2e02c15a6745652e8061ea4a2fc8f693288e..835909ad0505f863f8cba0e4b8e866ae9a31f03e 100644 (file)
@@ -66,6 +66,8 @@ namespace Aarch64
 
     // AdvSIMD scalar three same
     inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
+    // AdvSIMD scalar three same extra
+    inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst);
     // AdvSIMD scalar three different
     inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
     // AdvSIMD scalar two-reg misc
@@ -516,6 +518,20 @@ namespace Aarch64
         IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
 
         switch (opcode) {
+          case 0x10:
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
+                    size, machInst, vd, vn, vm);
+          case 0x11:
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
+                    size, machInst, vd, vn, vm);
           case 0x18:
           case 0x19:
           case 0x1a:
@@ -1531,10 +1547,16 @@ namespace Aarch64
                 return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
                     q, size, machInst, vd, vn, vm, index);
           case 0xd:
-            if (u || (size == 0x0 || size == 0x3))
-                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
+                                                   SqrdmlahElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
             else
-                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>(
+                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
+                                                   SqrdmulhElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xf:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
                     q, size, machInst, vd, vn, vm, index);
           default:
             return new Unknown64(machInst);
@@ -2105,6 +2127,28 @@ namespace Aarch64
         }
     }
 
+    StaticInstPtr
+    decodeNeonSc3SameExtra(ExtMachInst machInst)
+    {
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 15, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x10:
+            return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
+                size, machInst, vd, vn, vm);
+          case 0x11:
+            return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
+                size, machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
     StaticInstPtr
     decodeNeonSc3Diff(ExtMachInst machInst)
     {
@@ -2434,10 +2478,9 @@ namespace Aarch64
         }
         IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
 
-        if (u && opcode != 9)
-            return new Unknown64(machInst);
+        uint8_t u_opcode = opcode | u << 4;
 
-        switch (opcode) {
+        switch (u_opcode) {
           case 0x1:
             if (size < 2 || sz_L == 0x3)
                 return new Unknown64(machInst);
@@ -2465,11 +2508,7 @@ namespace Aarch64
           case 0x9:
             if (size < 2 || sz_L == 0x3)
                 return new Unknown64(machInst);
-            if (u)
-                return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
-                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
-            else
-                return decodeNeonUThreeImmScFpReg<FmulElemScX>(
+            return decodeNeonUThreeImmScFpReg<FmulElemScX>(
                     size & 0x1, machInst, vd, vn, vm_fp, index_fp);
           case 0xb:
             if (size == 0x0 || size == 0x3)
@@ -2484,10 +2523,20 @@ namespace Aarch64
                 return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
                     size, machInst, vd, vn, vm, index);
           case 0xd:
-            if (size == 0x0 || size == 0x3)
+            return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0x19:
+            if (size < 2 || sz_L == 0x3)
                 return new Unknown64(machInst);
-            else
-                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+            return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+
+          case 0x1d:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
+                    size, machInst, vd, vn, vm, index);
+
+          case 0x1f:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
                     size, machInst, vd, vn, vm, index);
           default:
             return new Unknown64(machInst);
index 6290203e1812306cc93900503fbfbfe74396d719..2d25f3611d6504dcbca5737e6c858b75e2f4d114 100644 (file)
@@ -1147,7 +1147,7 @@ let {{
 
     def threeEqualRegInst(name, Name, opClass, types, rCount, op,
                           readDest=False, pairwise=False, byElem=False,
-                          standardFpcsr=False, complex=False):
+                          standardFpcsr=False, complex=False, extra=''):
         global header_output, exec_output
         eWalkCode = simdEnabledCheckCode + '''
                     RegVect srcReg1, destReg;
@@ -1203,6 +1203,7 @@ let {{
             }
             ''' % { "op" : op, "readDest" : readDestCode }
         else:
+            eWalkCode += extra
             eWalkCode += '''
             for (unsigned i = 0; i < eCount; i++) {
                 Element srcElem1 = letoh(srcReg1.elements[i]);
@@ -1398,7 +1399,8 @@ let {{
         threeUnequalRegInst(name, Name, opClass, types, op,
                             True, False, True, readDest)
 
-    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
+    def twoEqualRegInst(name, Name, opClass, types, rCount, op,
+                        readDest=False, extra=''):
         global header_output, exec_output
         eWalkCode = simdEnabledCheckCode + '''
         RegVect srcReg1, srcReg2, destReg;
@@ -1415,6 +1417,7 @@ let {{
         readDestCode = ''
         if readDest:
             readDestCode = 'destElem = letoh(destReg.elements[i]);'
+        eWalkCode += extra
         eWalkCode += '''
         if (imm >= eCount) {
             return std::make_shared<UndefinedInstruction>(machInst, false,
@@ -2783,6 +2786,55 @@ let {{
     threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
     threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
 
+
+    vqrdmCode = '''
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          int nbits = sizeof(Element)*8;
+
+          auto val_max = std::numeric_limits<Element>::max();
+          auto val_min = std::numeric_limits<Element>::min();
+          BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+                ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+                ((BigElement)1 << (nbits - 1));
+          unsat_value >>= nbits;
+
+          if (unsat_value > val_max) {
+              fpscr.qc = 1;
+              destElem = val_max;
+          } else if (unsat_value < val_min) {
+              fpscr.qc = 1;
+              destElem = val_min;
+          } else {
+              destElem = unsat_value;
+          }
+          FpscrQc = fpscr;
+    '''
+    code_add = "+"
+    vqrdmlahCode = vqrdmCode % {'code': code_add}
+    rdm_check = '''
+      int sz = bits(machInst, 21, 20);
+      RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5);
+      if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0)
+          return std::make_shared<UndefinedInstruction>(machInst, true);
+      typedef __int128_t BigElement;
+    '''
+    threeEqualRegInst("vqrdmlah", "VqrdmlahD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+
+    code_sub = "-"
+    vqrdmlshCode = vqrdmCode % {'code': code_sub}
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+
+
     vqrdmulhCode = '''
         FPSCR fpscr = (FPSCR) FpscrQc;
         destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3085,18 @@ let {{
             "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
     twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
             "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlsh", "VqrdmlshsD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
 
     vshrCode = '''
         if (imm >= sizeof(srcElem1) * 8) {
index f049c3ead299bcf2987b255790cfe00cb8e3764e..36db47451ab85fb09aa4e9bb3382938bf23d7332 100644 (file)
@@ -52,7 +52,8 @@ let {{
 
     def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
                            readDest=False, pairwise=False, scalar=False,
-                           byElem=False, decoder='Generic', complex=False):
+                           byElem=False, decoder='Generic', complex=False,
+                           extra=''):
         assert (not pairwise) or ((not byElem) and (not scalar))
         global header_output, exec_output, decoders
         eWalkCode = simd64EnabledCheckCode + '''
@@ -110,6 +111,7 @@ let {{
                 continue;
             }
             '''
+            eWalkCode += extra
             eWalkCode += '''
         for (unsigned i = 0; i < eCount; i++) {
             %(scalarCheck)s
@@ -2336,7 +2338,81 @@ let {{
                      sqnegCode)
     twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
                      sqnegCode, scalar=True)
-    # SQRDMULH (by element)
+    sqrdmCode = '''
+
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          int nbits = sizeof(Element)*8;
+
+          auto val_max = std::numeric_limits<Element>::max();
+          auto val_min = std::numeric_limits<Element>::min();
+          BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+                ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+                ((BigElement)1 << (nbits - 1));
+          unsat_value >>= nbits;
+
+          if (unsat_value > val_max) {
+              fpscr.qc = 1;
+              destElem = val_max;
+          } else if (unsat_value < val_min) {
+              fpscr.qc = 1;
+              destElem = val_min;
+          } else {
+              destElem = unsat_value;
+          }
+          FpscrQc = fpscr;
+    '''
+    code_add = "+"
+    sqrdmlahCode = sqrdmCode % {'code': code_add}
+    rdm_check = '''
+      int sz = bits(machInst, 23, 22);
+      AA64ISAR0 isar0 = xc->tcBase()->readMiscReg( MISCREG_ID_AA64ISAR0_EL1);
+      if (!isar0.rdm || sz == 3 || sz == 0)
+          return std::make_shared<UndefinedInstruction>(machInst, true);
+      typedef __int128_t BigElement;
+    '''
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlahCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
+                       readDest=True, scalar=True, extra=rdm_check)
+    # SQRDMLAH (vector)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlahCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, scalar=True,
+                       readDest=True, extra=rdm_check)
+    # SQRDMLSH (by element)
+    code_sub = "-"
+    sqrdmlshCode = sqrdmCode % {'code': code_sub}
+
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlshCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
+                       readDest=True, scalar=True, extra=rdm_check)
+    # SQRDMLSH (vector)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlshCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, scalar=True,
+                       readDest=True, extra=rdm_check)
+    # SQRDMULby element)
     sqrdmulhCode = '''
             FPSCR fpscr = (FPSCR) FpscrQc;
             destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +