From 2c242d665f42614e9fe159ff81950a5d0f2661b7 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Mon, 18 Feb 2019 18:06:45 +0000 Subject: [PATCH] arch-arm: implement floating point aarch32 VCVTA family These instructions round floating point to integer, and were added to aarch32 as an extension to ARMv7. Change-Id: I62d1705badc95a4e8954a5ad62b2b6bc9e4ffe00 Reviewed-on: https://gem5-review.googlesource.com/c/16788 Reviewed-by: Giacomo Travaglini Maintainer: Andreas Sandberg --- src/arch/arm/isa/formats/fp.isa | 220 ++++++++++++++++++++++---------- src/arch/arm/isa/insts/fp.isa | 85 ++++++------ 2 files changed, 200 insertions(+), 105 deletions(-) diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 77a33e6fe..c159dc626 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2016-2018 ARM Limited +// Copyright (c) 2010-2011, 2016-2019 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -2001,6 +2001,26 @@ let {{ decodeShortFpTransfer(ExtMachInst machInst); ''' decoder_output = ''' + IntRegIndex decodeFpVd(ExtMachInst machInst, uint32_t size, bool isInt) + { + if (!isInt and size == 3) { + return (IntRegIndex)((bits(machInst, 22) << 5) | + (bits(machInst, 15, 12) << 1)); + } else { + return (IntRegIndex)(bits(machInst, 22) | + (bits(machInst, 15, 12) << 1)); + } + } + IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt) + { + if (!isInt and size == 3) { + return (IntRegIndex)((bits(machInst, 5) << 5) | + (bits(machInst, 3, 0) << 1)); + } else { + return (IntRegIndex)(bits(machInst, 5) | + (bits(machInst, 3, 0) << 1)); + } + } StaticInstPtr decodeShortFpTransfer(ExtMachInst machInst) { @@ -2008,67 +2028,143 @@ let {{ const uint32_t c = bits(machInst, 8); const uint32_t a = bits(machInst, 23, 21); const uint32_t b = bits(machInst, 6, 5); + const uint32_t o1 = bits(machInst, 18); if ((machInst.thumb == 1 && bits(machInst, 28) == 1) || (machInst.thumb == 0 && machInst.condCode == 0xf)) { // Determine if this is backported aarch64 FP instruction const bool b31_b24 = bits(machInst, 31, 24) == 0xFE; const bool b23 = bits(machInst, 23); - const bool b21_b18 = bits(machInst, 21, 18) == 0xE; + const bool b21_b19 = bits(machInst, 21, 19) == 0x7; const bool b11_b9 = bits(machInst, 11, 9) == 0x5; - const bool sz = bits(machInst, 8); - const bool b7_b6 = bits(machInst, 7, 6) == 0x1; - const bool b6 = bits(machInst, 6) == 0x0; + const uint32_t size = bits(machInst, 9, 8); + const bool op3 = bits(machInst, 6); const bool b4 = bits(machInst, 4) == 0x0; - if (b31_b24 && b23 && b21_b18 && b11_b9 && b7_b6 && b4) { - // VINT* Integer Rounding Instructon - const uint32_t rm = bits(machInst, 17, 16); - - if (sz) { - const IntRegIndex vd = - (IntRegIndex)((bits(machInst, 22) << 5) | - (bits(machInst, 15, 12) << 1)); - const IntRegIndex vm = - (IntRegIndex)((bits(machInst, 5) << 5) | - (bits(machInst, 3, 0) << 1)); - switch(rm) { - case 0x0: - return decodeVfpRegRegOp(machInst, vd, vm, - true); - case 0x1: - return decodeVfpRegRegOp(machInst, vd, vm, - true); - case 0x2: - return decodeVfpRegRegOp(machInst, vd, vm, - true); - case 0x3: - return decodeVfpRegRegOp(machInst, vd, vm, - true); - default: return new Unknown(machInst); - } - } else { - const IntRegIndex vd = - (IntRegIndex)(bits(machInst, 22) | - (bits(machInst, 15, 12) << 1)); - const IntRegIndex vm = - (IntRegIndex)(bits(machInst, 5) | - (bits(machInst, 3, 0) << 1)); - switch(rm) { - case 0x0: - return decodeVfpRegRegOp(machInst, vd, vm, - false); - case 0x1: - return decodeVfpRegRegOp(machInst, vd, vm, - false); - case 0x2: - return decodeVfpRegRegOp(machInst, vd, vm, - false); - case 0x3: - return decodeVfpRegRegOp(machInst, vd, vm, - false); - default: return new Unknown(machInst); - } - } - } else if (b31_b24 && !b23 && b11_b9 && b6 && b4){ + const uint32_t rm = bits(machInst, 17, 16); + IntRegIndex vd = decodeFpVd(machInst, size, false); + IntRegIndex vm = decodeFpVm(machInst, size, false); + IntRegIndex vdInt = decodeFpVd(machInst, size, true); + if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) { + if (o1 == 0) { + // VINT* Integer Rounding Instruction + if (size == 3) { + switch(rm) { + case 0x0: + return decodeVfpRegRegOp(machInst, vd, vm, + true); + case 0x1: + return decodeVfpRegRegOp(machInst, vd, vm, + true); + case 0x2: + return decodeVfpRegRegOp(machInst, vd, vm, + true); + case 0x3: + return decodeVfpRegRegOp(machInst, vd, vm, + true); + default: return new Unknown(machInst); + } + } else { + switch(rm) { + case 0x0: + return decodeVfpRegRegOp(machInst, vd, vm, + false); + case 0x1: + return decodeVfpRegRegOp(machInst, vd, vm, + false); + case 0x2: + return decodeVfpRegRegOp(machInst, vd, vm, + false); + case 0x3: + return decodeVfpRegRegOp(machInst, vd, vm, + false); + default: return new Unknown(machInst); + } + } + } else { + const bool op = bits(machInst, 7); + switch(rm) { + case 0x0: + switch(size) { + case 0x0: + return new Unknown(machInst); + case 0x1: + return new FailUnimplemented( + "vcvta.u32.f16", machInst); + case 0x2: + if (op) { + return new VcvtaFpSIntS(machInst, vdInt, vm); + } else { + return new VcvtaFpUIntS(machInst, vdInt, vm); + } + case 0x3: + if (op) { + return new VcvtaFpSIntD(machInst, vdInt, vm); + } else { + return new VcvtaFpUIntD(machInst, vdInt, vm); + } + } + case 0x1: + switch(size) { + case 0x0: + return new Unknown(machInst); + case 0x1: + return new FailUnimplemented( + "vcvtn.u32.f16", machInst); + case 0x2: + if (op) { + return new VcvtnFpSIntS(machInst, vdInt, vm); + } else { + return new VcvtnFpUIntS(machInst, vdInt, vm); + } + case 0x3: + if (op) { + return new VcvtnFpSIntD(machInst, vdInt, vm); + } else { + return new VcvtnFpUIntD(machInst, vdInt, vm); + } + } + case 0x2: + switch(size) { + case 0x0: + return new Unknown(machInst); + case 0x1: + return new FailUnimplemented( + "vcvtp.u32.f16", machInst); + case 0x2: + if (op) { + return new VcvtpFpSIntS(machInst, vdInt, vm); + } else { + return new VcvtpFpUIntS(machInst, vdInt, vm); + } + case 0x3: + if (op) { + return new VcvtpFpSIntD(machInst, vdInt, vm); + } else { + return new VcvtpFpUIntD(machInst, vdInt, vm); + } + } + case 0x3: + switch(size) { + case 0x0: + return new Unknown(machInst); + case 0x1: + return new FailUnimplemented( + "vcvtm.u32.f16", machInst); + case 0x2: + if (op) { + return new VcvtmFpSIntS(machInst, vdInt, vm); + } else { + return new VcvtmFpUIntS(machInst, vdInt, vm); + } + case 0x3: + if (op) { + return new VcvtmFpSIntD(machInst, vdInt, vm); + } else { + return new VcvtmFpUIntD(machInst, vdInt, vm); + } + } + } + } + } else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){ // VSEL* floating point conditional select ConditionCode cond; @@ -2079,24 +2175,12 @@ let {{ case 0x3: cond = COND_GT; break; } - if (sz) { - const IntRegIndex vd = - (IntRegIndex)((bits(machInst, 22) << 5) | - (bits(machInst, 15, 12) << 1)); - const IntRegIndex vm = - (IntRegIndex)((bits(machInst, 5) << 5) | - (bits(machInst, 3, 0) << 1)); + if (size == 3) { const IntRegIndex vn = (IntRegIndex)((bits(machInst, 7) << 5) | (bits(machInst, 19, 16) << 1)); return new VselD(machInst, vd, vn, vm, cond); } else { - const IntRegIndex vd = - (IntRegIndex)(bits(machInst, 22) | - (bits(machInst, 15, 12) << 1)); - const IntRegIndex vm = - (IntRegIndex)(bits(machInst, 5) | - (bits(machInst, 3, 0) << 1)); const IntRegIndex vn = (IntRegIndex)((bits(machInst, 19, 16) << 1) | bits(machInst, 7)); diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index dcf5889fb..d8323c455 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2013,2016 ARM Limited +// Copyright (c) 2010-2013,2016,2018-2019 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -993,85 +993,96 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDRIop); exec_output += PredOpExecute.subst(vcvtFpSIntDRIop); - vcvtFpUIntSCode = vfpEnabledCheckCode + ''' + round_mode_suffix_to_mode = { + '': 'VfpRoundZero', + 'a': 'VfpRoundAway', + 'm': 'VfpRoundDown', + 'n': 'VfpRoundNearest', + 'p': 'VfpRoundUpward', + } + + def buildVcvt(code, className, roundModeSuffix): + global header_output, decoder_output, exec_output, \ + vfpEnabledCheckCode, round_mode_suffix_to_mode + full_code = vfpEnabledCheckCode + code.format( + round_mode=round_mode_suffix_to_mode[roundModeSuffix], + ) + iop = InstObjParams( + "vcvt{}".format(roundModeSuffix), + className.format(roundModeSuffix), + "FpRegRegOp", + { "code": full_code, + "predicate_test": predicateTest, + "op_class": "SimdFloatCvtOp" }, + [] + ) + header_output += FpRegRegOpDeclare.subst(iop); + decoder_output += FpRegRegOpConstructor.subst(iop); + exec_output += PredOpExecute.subst(iop); + + code = ''' FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpToFixed(FpOp1, false, 32, 0); + FpDest_uw = vfpFpToFixed( + FpOp1, false, 32, 0, true, {round_mode}); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; ''' - vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp", - { "code": vcvtFpUIntSCode, - "predicate_test": predicateTest, - "op_class": "SimdFloatCvtOp" }, []) - header_output += FpRegRegOpDeclare.subst(vcvtFpUIntSIop); - decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntSIop); - exec_output += PredOpExecute.subst(vcvtFpUIntSIop); + for round_mode_suffix in round_mode_suffix_to_mode: + buildVcvt(code, "Vcvt{}FpUIntS", round_mode_suffix) - vcvtFpUIntDCode = vfpEnabledCheckCode + ''' + code = ''' FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpToFixed(cOp1, false, 32, 0); + uint64_t result = vfpFpToFixed( + cOp1, false, 32, 0, true, {round_mode}); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; FpscrExc = fpscr; ''' - vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "FpRegRegOp", - { "code": vcvtFpUIntDCode, - "predicate_test": predicateTest, - "op_class": "SimdFloatCvtOp" }, []) - header_output += FpRegRegOpDeclare.subst(vcvtFpUIntDIop); - decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntDIop); - exec_output += PredOpExecute.subst(vcvtFpUIntDIop); + for round_mode_suffix in round_mode_suffix_to_mode: + buildVcvt(code, "Vcvt{}FpUIntD", round_mode_suffix) - vcvtFpSIntSCode = vfpEnabledCheckCode + ''' + code = ''' FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpToFixed(FpOp1, true, 32, 0); + FpDest_sw = vfpFpToFixed( + FpOp1, true, 32, 0, true, {round_mode}); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; ''' - vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp", - { "code": vcvtFpSIntSCode, - "predicate_test": predicateTest, - "op_class": "SimdFloatCvtOp" }, []) - header_output += FpRegRegOpDeclare.subst(vcvtFpSIntSIop); - decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntSIop); - exec_output += PredOpExecute.subst(vcvtFpSIntSIop); + for round_mode_suffix in round_mode_suffix_to_mode: + buildVcvt(code, "Vcvt{}FpSIntS", round_mode_suffix) - vcvtFpSIntDCode = vfpEnabledCheckCode + ''' + code = ''' FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpToFixed(cOp1, true, 32, 0); + int64_t result = vfpFpToFixed( + cOp1, true, 32, 0, true, {round_mode}); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; FpscrExc = fpscr; ''' - vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "FpRegRegOp", - { "code": vcvtFpSIntDCode, - "predicate_test": predicateTest, - "op_class": "SimdFloatCvtOp" }, []) - header_output += FpRegRegOpDeclare.subst(vcvtFpSIntDIop); - decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDIop); - exec_output += PredOpExecute.subst(vcvtFpSIntDIop); + for round_mode_suffix in round_mode_suffix_to_mode: + buildVcvt(code, "Vcvt{}FpSIntD", round_mode_suffix) vcvtFpSFpDCode = vfpEnabledCheckCode + ''' FPSCR fpscr = (FPSCR) FpscrExc; -- 2.30.2