From: Jordi Vaquero Date: Mon, 14 Sep 2020 16:08:38 +0000 (+0200) Subject: arch-arm: Implementation ARMv8.1 RDMA X-Git-Tag: develop-gem5-snapshot~464 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b0dbc09b3f6f2c238efa841a1728459fc1431c03;p=gem5.git arch-arm: Implementation ARMv8.1 RDMA Adding RDMA implementation for ARMv8.1 + isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions + isa/insts/neon.isa\neon64.isa: Adding function instructions Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 02f24d3f3..3e1866538 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -80,7 +80,7 @@ class ArmISA(BaseISA): id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute Register 2") id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute Register 3") id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4") - id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute Register 5") + id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute Register 5") fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register") @@ -98,8 +98,8 @@ class ArmISA(BaseISA): id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000, "AArch64 Debug Feature Register 1") - # !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES - id_aa64isar0_el1 = Param.UInt64(0x0000000000000000, + # !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES + id_aa64isar0_el1 = Param.UInt64(0x0000000010000000, "AArch64 Instruction Set Attribute Register 0") # GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1 diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa index f1a0cdba5..9a487ea81 100644 --- a/src/arch/arm/isa/formats/aarch64.isa +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -2975,6 +2975,8 @@ namespace Aarch64 } else { return new Unknown64(machInst); } + } else if (bits(machInst, 15) && bits(machInst, 10) == 1) { + return decodeNeonSc3SameExtra(machInst); } else if (bits(machInst, 23, 22) == 0 && bits(machInst, 15) == 0) { if (bits(machInst, 10) == 1) { diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index f1b387e2f..5e7880eb1 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -652,7 +652,10 @@ let {{ } case 0xb: if (o1) { - if (u || q) { + if (u) { + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + } else if (q) { return new Unknown(machInst); } else { return decodeNeonUThreeUSReg( @@ -669,7 +672,10 @@ let {{ } case 0xc: if (o1) { - if (!u) { + if (u) { + return decodeNeonSThreeSReg( + q, size, machInst, vd, vn, vm); + } else { if (bits(size, 1) == 0) { if (q) { return new NVfmaQFp(machInst, vd, vn, vm); @@ -1504,6 +1510,54 @@ let {{ return new Unknown(machInst); } } + case 0xe: + if (u) { + switch (size) { + case 1: + return new VqrdmlahsQ( + machInst, vd, vn, vm, index); + case 2: + return new VqrdmlahsQ( + machInst, vd, vn, vm, index); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new VqrdmlahsD( + machInst, vd, vn, vm, index); + case 2: + return new VqrdmlahsD( + machInst, vd, vn, vm, index); + default: + return new Unknown(machInst); + } + } + case 0xf: + if (u) { + switch (size) { + case 1: + return new VqrdmlshsQ( + machInst, vd, vn, vm, index); + case 2: + return new VqrdmlshsQ( + machInst, vd, vn, vm, index); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new VqrdmlshsD( + machInst, vd, vn, vm, index); + case 2: + return new VqrdmlshsD( + machInst, vd, vn, vm, index); + default: + return new Unknown(machInst); + } + } } return new Unknown(machInst); } diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa index 6c2b2e02c..835909ad0 100644 --- a/src/arch/arm/isa/formats/neon64.isa +++ b/src/arch/arm/isa/formats/neon64.isa @@ -66,6 +66,8 @@ namespace Aarch64 // AdvSIMD scalar three same inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst); + // AdvSIMD scalar three same extra + inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst); // AdvSIMD scalar three different inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst); // AdvSIMD scalar two-reg misc @@ -516,6 +518,20 @@ namespace Aarch64 IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); switch (opcode) { + case 0x10: + if (q) + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + case 0x11: + if (q) + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); case 0x18: case 0x19: case 0x1a: @@ -1531,10 +1547,16 @@ namespace Aarch64 return decodeNeonSThreeImmHAndWReg( q, size, machInst, vd, vn, vm, index); case 0xd: - if (u || (size == 0x0 || size == 0x3)) - return new Unknown64(machInst); + if (u) + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); else - return decodeNeonSThreeImmHAndWReg( + return decodeNeonSThreeImmHAndWReg( + q, size, machInst, vd, vn, vm, index); + case 0xf: + return decodeNeonSThreeImmHAndWReg( q, size, machInst, vd, vn, vm, index); default: return new Unknown64(machInst); @@ -2105,6 +2127,28 @@ namespace Aarch64 } } + StaticInstPtr + decodeNeonSc3SameExtra(ExtMachInst machInst) + { + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x10: + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + case 0x11: + return decodeNeonSThreeHAndWReg( + size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst) { @@ -2434,10 +2478,9 @@ namespace Aarch64 } IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); - if (u && opcode != 9) - return new Unknown64(machInst); + uint8_t u_opcode = opcode | u << 4; - switch (opcode) { + switch (u_opcode) { case 0x1: if (size < 2 || sz_L == 0x3) return new Unknown64(machInst); @@ -2465,11 +2508,7 @@ namespace Aarch64 case 0x9: if (size < 2 || sz_L == 0x3) return new Unknown64(machInst); - if (u) - return decodeNeonUThreeImmScFpReg( - size & 0x1, machInst, vd, vn, vm_fp, index_fp); - else - return decodeNeonUThreeImmScFpReg( + return decodeNeonUThreeImmScFpReg( size & 0x1, machInst, vd, vn, vm_fp, index_fp); case 0xb: if (size == 0x0 || size == 0x3) @@ -2484,10 +2523,20 @@ namespace Aarch64 return decodeNeonSThreeImmHAndWReg( size, machInst, vd, vn, vm, index); case 0xd: - if (size == 0x0 || size == 0x3) + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + case 0x19: + if (size < 2 || sz_L == 0x3) return new Unknown64(machInst); - else - return decodeNeonSThreeImmHAndWReg( + return decodeNeonUThreeImmScFpReg( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + + case 0x1d: + return decodeNeonSThreeImmHAndWReg( + size, machInst, vd, vn, vm, index); + + case 0x1f: + return decodeNeonSThreeImmHAndWReg( size, machInst, vd, vn, vm, index); default: return new Unknown64(machInst); diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 6290203e1..2d25f3611 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1147,7 +1147,7 @@ let {{ def threeEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False, pairwise=False, byElem=False, - standardFpcsr=False, complex=False): + standardFpcsr=False, complex=False, extra=''): global header_output, exec_output eWalkCode = simdEnabledCheckCode + ''' RegVect srcReg1, destReg; @@ -1203,6 +1203,7 @@ let {{ } ''' % { "op" : op, "readDest" : readDestCode } else: + eWalkCode += extra eWalkCode += ''' for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = letoh(srcReg1.elements[i]); @@ -1398,7 +1399,8 @@ let {{ threeUnequalRegInst(name, Name, opClass, types, op, True, False, True, readDest) - def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False): + def twoEqualRegInst(name, Name, opClass, types, rCount, op, + readDest=False, extra=''): global header_output, exec_output eWalkCode = simdEnabledCheckCode + ''' RegVect srcReg1, srcReg2, destReg; @@ -1415,6 +1417,7 @@ let {{ readDestCode = '' if readDest: readDestCode = 'destElem = letoh(destReg.elements[i]);' + eWalkCode += extra eWalkCode += ''' if (imm >= eCount) { return std::make_shared(machInst, false, @@ -2783,6 +2786,55 @@ let {{ threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) + + vqrdmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + int nbits = sizeof(Element)*8; + + auto val_max = std::numeric_limits::max(); + auto val_min = std::numeric_limits::min(); + BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s + ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) + + ((BigElement)1 << (nbits - 1)); + unsat_value >>= nbits; + + if (unsat_value > val_max) { + fpscr.qc = 1; + destElem = val_max; + } else if (unsat_value < val_min) { + fpscr.qc = 1; + destElem = val_min; + } else { + destElem = unsat_value; + } + FpscrQc = fpscr; + ''' + code_add = "+" + vqrdmlahCode = vqrdmCode % {'code': code_add} + rdm_check = ''' + int sz = bits(machInst, 21, 20); + RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5); + if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0) + return std::make_shared(machInst, true); + typedef __int128_t BigElement; + ''' + threeEqualRegInst("vqrdmlah", "VqrdmlahD", + "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True, + extra=rdm_check) + threeEqualRegInst("vqrdmlah", "VqrdmlahQ", + "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True, + extra=rdm_check) + + code_sub = "-" + vqrdmlshCode = vqrdmCode % {'code': code_sub} + threeEqualRegInst("vqrdmlsh", "VqrdmlshD", + "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True, + extra=rdm_check) + threeEqualRegInst("vqrdmlsh", "VqrdmlshQ", + "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True, + extra=rdm_check) + + vqrdmulhCode = ''' FPSCR fpscr = (FPSCR) FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + @@ -3033,6 +3085,18 @@ let {{ "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) + twoEqualRegInst("vqrdmlah", "VqrdmlahsD", + "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True, + extra=rdm_check) + twoEqualRegInst("vqrdmlah", "VqrdmlahsQ", + "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True, + extra=rdm_check) + twoEqualRegInst("vqrdmlsh", "VqrdmlshsD", + "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True, + extra=rdm_check) + twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ", + "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True, + extra=rdm_check) vshrCode = ''' if (imm >= sizeof(srcElem1) * 8) { diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa index f049c3ead..36db47451 100644 --- a/src/arch/arm/isa/insts/neon64.isa +++ b/src/arch/arm/isa/insts/neon64.isa @@ -52,7 +52,8 @@ let {{ def threeEqualRegInstX(name, Name, opClass, types, rCount, op, readDest=False, pairwise=False, scalar=False, - byElem=False, decoder='Generic', complex=False): + byElem=False, decoder='Generic', complex=False, + extra=''): assert (not pairwise) or ((not byElem) and (not scalar)) global header_output, exec_output, decoders eWalkCode = simd64EnabledCheckCode + ''' @@ -110,6 +111,7 @@ let {{ continue; } ''' + eWalkCode += extra eWalkCode += ''' for (unsigned i = 0; i < eCount; i++) { %(scalarCheck)s @@ -2336,7 +2338,81 @@ let {{ sqnegCode) twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, sqnegCode, scalar=True) - # SQRDMULH (by element) + sqrdmCode = ''' + + FPSCR fpscr = (FPSCR) FpscrQc; + int nbits = sizeof(Element)*8; + + auto val_max = std::numeric_limits::max(); + auto val_min = std::numeric_limits::min(); + BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s + ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) + + ((BigElement)1 << (nbits - 1)); + unsat_value >>= nbits; + + if (unsat_value > val_max) { + fpscr.qc = 1; + destElem = val_max; + } else if (unsat_value < val_min) { + fpscr.qc = 1; + destElem = val_min; + } else { + destElem = unsat_value; + } + FpscrQc = fpscr; + ''' + code_add = "+" + sqrdmlahCode = sqrdmCode % {'code': code_add} + rdm_check = ''' + int sz = bits(machInst, 23, 22); + AA64ISAR0 isar0 = xc->tcBase()->readMiscReg( MISCREG_ID_AA64ISAR0_EL1); + if (!isar0.rdm || sz == 3 || sz == 0) + return std::make_shared(machInst, true); + typedef __int128_t BigElement; + ''' + threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmlahCode, byElem=True, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True, + readDest=True, scalar=True, extra=rdm_check) + # SQRDMLAH (vector) + threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmlahCode, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlahCode, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlahCode, scalar=True, + readDest=True, extra=rdm_check) + # SQRDMLSH (by element) + code_sub = "-" + sqrdmlshCode = sqrdmCode % {'code': code_sub} + + threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmlshCode, byElem=True, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True, + readDest=True, scalar=True, extra=rdm_check) + # SQRDMLSH (vector) + threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmlshCode, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlshCode, + readDest=True, extra=rdm_check) + threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmlshCode, scalar=True, + readDest=True, extra=rdm_check) + # SQRDMULby element) sqrdmulhCode = ''' FPSCR fpscr = (FPSCR) FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +