From 352d666fa1e9b5ae960127c95d19cf63c8ff0df7 Mon Sep 17 00:00:00 2001 From: Edmund Grimley Evans Date: Thu, 28 Jun 2018 14:32:01 +0100 Subject: [PATCH] arch-arm: Add FP16 support introduced by Armv8.2-A This changeset adds support for FP/SIMD instructions with half-precision floating-point operands. Change-Id: I4957f111c9c5e5d6a3747fe9d169d394d642fee8 Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/13084 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg --- src/arch/arm/insts/pred_inst.hh | 42 +++++-- src/arch/arm/isa/formats/aarch64.isa | 6 +- src/arch/arm/isa/formats/fp.isa | 6 +- src/arch/arm/isa/insts/fp64.isa | 162 ++++++++++++++++++++------- src/arch/arm/isa/insts/neon64.isa | 4 +- 5 files changed, 164 insertions(+), 56 deletions(-) diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh index d2a9f7080..62d1c09ab 100644 --- a/src/arch/arm/insts/pred_inst.hh +++ b/src/arch/arm/insts/pred_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2013 ARM Limited + * Copyright (c) 2010, 2012-2013, 2017-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -158,25 +158,51 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid, return bigData; } +/** Floating point data types. */ +enum class FpDataType { Fp16, Fp32, Fp64 }; + static inline uint64_t -vfp_modified_imm(uint8_t data, bool wide) +vfp_modified_imm(uint8_t data, FpDataType dtype) { uint64_t bigData = data; uint64_t repData; - if (wide) { - repData = bits(data, 6) ? 0xFF : 0; - bigData = (bits(bigData, 5, 0) << 48) | - (repData << 54) | (bits(~bigData, 6) << 62) | - (bits(bigData, 7) << 63); - } else { + switch (dtype) { + case FpDataType::Fp16: + repData = bits(data, 6) ? 0x3 : 0; + bigData = (bits(bigData, 5, 0) << 6) | + (repData << 12) | (bits(~bigData, 6) << 14) | + (bits(bigData, 7) << 15); + break; + case FpDataType::Fp32: repData = bits(data, 6) ? 0x1F : 0; bigData = (bits(bigData, 5, 0) << 19) | (repData << 25) | (bits(~bigData, 6) << 30) | (bits(bigData, 7) << 31); + break; + case FpDataType::Fp64: + repData = bits(data, 6) ? 0xFF : 0; + bigData = (bits(bigData, 5, 0) << 48) | + (repData << 54) | (bits(~bigData, 6) << 62) | + (bits(bigData, 7) << 63); + break; + default: + assert(0); } return bigData; } +static inline FpDataType +decode_fp_data_type(uint8_t encoding) +{ + switch (encoding) { + case 1: return FpDataType::Fp16; + case 2: return FpDataType::Fp32; + case 3: return FpDataType::Fp64; + default: + panic( + "Invalid floating point data type in VFP/SIMD or SVE instruction"); + } +} /** * Base class for predicated integer operations. diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa index 43dd557aa..77e598c7f 100644 --- a/src/arch/arm/isa/formats/aarch64.isa +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -1637,12 +1637,14 @@ namespace Aarch64 if (type == 0) { // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5) // :imm8<5:0>:Zeros(19) - uint32_t imm = vfp_modified_imm(imm8, false); + uint32_t imm = vfp_modified_imm(imm8, + FpDataType::Fp32); return new FmovImmS(machInst, rd, imm); } else if (type == 1) { // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8) // :imm8<5:0>:Zeros(48) - uint64_t imm = vfp_modified_imm(imm8, true); + uint64_t imm = vfp_modified_imm(imm8, + FpDataType::Fp64); return new FmovImmD(machInst, rd, imm); } else { return new Unknown64(machInst); diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 009b27c8f..2412a1f10 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011,2016 ARM Limited +// Copyright (c) 2010-2011, 2016-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -2401,11 +2401,11 @@ let {{ const uint32_t baseImm = bits(machInst, 3, 0) | (bits(machInst, 19, 16) << 4); if (single) { - uint32_t imm = vfp_modified_imm(baseImm, false); + uint32_t imm = vfp_modified_imm(baseImm, FpDataType::Fp32); return decodeVfpRegImmOp( machInst, vd, imm, false); } else { - uint64_t imm = vfp_modified_imm(baseImm, true); + uint64_t imm = vfp_modified_imm(baseImm, FpDataType::Fp64); return decodeVfpRegImmOp( machInst, vd, imm, true); } diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa index a5e1085de..6c0c6b808 100644 --- a/src/arch/arm/isa/insts/fp64.isa +++ b/src/arch/arm/isa/insts/fp64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2012-2013, 2016 ARM Limited +// Copyright (c) 2012-2013, 2016-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -172,6 +172,34 @@ let {{ decoder_output = "" exec_output = "" + halfIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + halfIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cOp2 = AA64FpOp2P0_uw; + uint16_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + halfBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + halfUnaryOp = "unaryOp(fpscr, AA64FpOp1P0," + \ + "%(func)s, fpscr.fz, fpscr.rMode)" + singleIntConvCode = vfp64EnabledCheckCode + ''' FPSCR fpscr = (FPSCR) FpscrExc; uint32_t cOp1 = AA64FpOp1P0_uw; @@ -232,23 +260,23 @@ let {{ fpscr.fz, fpscr.rMode) ''' - def buildTernaryFpOp(name, opClass, sOp, dOp): + def buildTernaryFpOp(name, opClass, hOp, sOp, dOp): global header_output, decoder_output, exec_output - for isDouble in True, False: + for suffix in "D", "S", "H": code = vfp64EnabledCheckCode + ''' FPSCR fpscr = (FPSCR) FpscrExc; ''' - if isDouble: + if suffix == "H": code += ''' - uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; - uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; - uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; - uint64_t cDest; - ''' "cDest = " + dOp + ";" + ''' + uint16_t cOp1 = AA64FpOp1P0_uw; + uint16_t cOp2 = AA64FpOp2P0_uw; + uint16_t cOp3 = AA64FpOp3P0_uw; + uint16_t cDest; + ''' "cDest = " + hOp + ";" + ''' AA64FpDestP0_uw = cDest; - AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP1_uw = 0; ''' - else: + elif suffix == "S": code += ''' uint32_t cOp1 = AA64FpOp1P0_uw; uint32_t cOp2 = AA64FpOp2P0_uw; @@ -258,13 +286,23 @@ let {{ AA64FpDestP0_uw = cDest; AA64FpDestP1_uw = 0; ''' + elif suffix == "D": + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' code += ''' AA64FpDestP2_uw = 0; AA64FpDestP3_uw = 0; FpscrExc = fpscr; ''' - iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + iop = InstObjParams(name.lower(), name + suffix, "FpRegRegRegRegOp", { "code": code, "op_class": opClass }, []) @@ -273,21 +311,33 @@ let {{ exec_output += BasicExecute.subst(iop) buildTernaryFpOp("FMAdd", "FloatMultAccOp", + "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)", "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)", "fplibMulAdd(cOp3, cOp1, cOp2, fpscr)" ) buildTernaryFpOp("FMSub", "FloatMultAccOp", - "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", - "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)" ) + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(cOp3, fplibNeg(cOp1), cOp2, fpscr)" ) buildTernaryFpOp("FNMAdd", "FloatMultAccOp", - "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)", - "fplibMulAdd(fplibNeg(cOp3), fplibNeg(cOp1), cOp2, fpscr)" ) + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), " + + "fplibNeg(cOp1), cOp2, fpscr)" ) buildTernaryFpOp("FNMSub", "FloatMultAccOp", - "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", - "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)" ) + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd(fplibNeg(cOp3), cOp1, cOp2, fpscr)" ) - def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + def buildBinFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp): global header_output, decoder_output, exec_output + code = halfIntConvCode2 % { "op": halfOp } + hIop = InstObjParams(name, Name + "H", base, + { "code": code, + "op_class": opClass }, []) + code = singleIntConvCode2 % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, @@ -301,44 +351,58 @@ let {{ declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); - for iop in sIop, dIop: + for iop in hIop, sIop, dIop: header_output += declareTempl.subst(iop) decoder_output += constructorTempl.subst(iop) exec_output += BasicExecute.subst(iop) buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp", + "fplibAdd(cOp1, cOp2, fpscr)", "fplibAdd(cOp1, cOp2, fpscr)", "fplibAdd(cOp1, cOp2, fpscr)") buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp", + "fplibSub(cOp1, cOp2, fpscr)", "fplibSub(cOp1, cOp2, fpscr)", "fplibSub(cOp1, cOp2, fpscr)") buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp", + "fplibDiv(cOp1, cOp2, fpscr)", "fplibDiv(cOp1, cOp2, fpscr)", "fplibDiv(cOp1, cOp2, fpscr)") buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp", + "fplibMul(cOp1, cOp2, fpscr)", "fplibMul(cOp1, cOp2, fpscr)", "fplibMul(cOp1, cOp2, fpscr)") buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp", + "fplibNeg(fplibMul(cOp1, cOp2, fpscr))", "fplibNeg(fplibMul(cOp1, cOp2, fpscr))", "fplibNeg(fplibMul(cOp1, cOp2, fpscr))") buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp", + "fplibMin(cOp1, cOp2, fpscr)", "fplibMin(cOp1, cOp2, fpscr)", "fplibMin(cOp1, cOp2, fpscr)") buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp", + "fplibMax(cOp1, cOp2, fpscr)", "fplibMax(cOp1, cOp2, fpscr)", "fplibMax(cOp1, cOp2, fpscr)") buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp", + "fplibMinNum(cOp1, cOp2, fpscr)", "fplibMinNum(cOp1, cOp2, fpscr)", "fplibMinNum(cOp1, cOp2, fpscr)") buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp", + "fplibMaxNum(cOp1, cOp2, fpscr)", "fplibMaxNum(cOp1, cOp2, fpscr)", "fplibMaxNum(cOp1, cOp2, fpscr)") - def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + def buildUnaryFpOp(name, Name, base, opClass, + halfOp, singleOp, doubleOp = None): if doubleOp is None: doubleOp = singleOp global header_output, decoder_output, exec_output + code = halfIntConvCode % { "op": halfOp } + hIop = InstObjParams(name, Name + "H", base, + { "code": code, + "op_class": opClass }, []) code = singleIntConvCode % { "op": singleOp } sIop = InstObjParams(name, Name + "S", base, { "code": code, @@ -351,28 +415,33 @@ let {{ declareTempl = eval( base + "Declare"); constructorTempl = eval("AA64" + base + "Constructor"); - for iop in sIop, dIop: + for iop in hIop, sIop, dIop: header_output += declareTempl.subst(iop) decoder_output += constructorTempl.subst(iop) exec_output += BasicExecute.subst(iop) buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp", - "fplibSqrt(cOp1, fpscr)", "fplibSqrt(cOp1, fpscr)") + "fplibSqrt(cOp1, fpscr)", + "fplibSqrt(cOp1, fpscr)", + "fplibSqrt(cOp1, fpscr)") - def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + def buildSimpleUnaryFpOp(name, Name, base, opClass, halfOp, singleOp, doubleOp = None, isIntConv = True): if doubleOp is None: doubleOp = singleOp global header_output, decoder_output, exec_output if isIntConv: + hCode = halfIntConvCode sCode = singleIntConvCode dCode = doubleIntConvCode else: + hCode = halfCode sCode = singleCode dCode = doubleCode - for code, op, suffix in [[sCode, singleOp, "S"], + for code, op, suffix in [[hCode, halfOp, "H"], + [sCode, singleOp, "S"], [dCode, doubleOp, "D"]]: iop = InstObjParams(name, Name + suffix, base, { "code": code % { "op": op }, @@ -386,30 +455,41 @@ let {{ exec_output += BasicExecute.subst(iop) buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp", - "fplibNeg(cOp1)", "fplibNeg(cOp1)") + "fplibNeg(cOp1)", + "fplibNeg(cOp1)", + "fplibNeg(cOp1)") buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp", - "fplibAbs(cOp1)", "fplibAbs(cOp1)") + "fplibAbs(cOp1)", + "fplibAbs(cOp1)", + "fplibAbs(cOp1)") buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEEVEN, false, fpscr)") buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_POSINF, false, fpscr)") buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_NEGINF, false, fpscr)") buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_ZERO, false, fpscr)") buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", - "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)") + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt(cOp1, FPRounding_TIEAWAY, false, fpscr)") buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)") + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), false, fpscr)") buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", - "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)") + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt(cOp1, FPCRRounding(fpscr), true, fpscr)") }}; let {{ diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa index 4897e7c91..eb130dbbd 100644 --- a/src/arch/arm/isa/insts/neon64.isa +++ b/src/arch/arm/isa/insts/neon64.isa @@ -1,6 +1,6 @@ // -*- mode: c++ -*- -// Copyright (c) 2012-2013, 2015-2016 ARM Limited +// Copyright (c) 2012-2013, 2015-2018 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -45,7 +45,7 @@ let {{ decoders = { 'Generic' : {} } # FP types (FP operations always work with unsigned representations) - floatTypes = ("uint32_t", "uint64_t") + floatTypes = ("uint16_t", "uint32_t", "uint64_t") smallFloatTypes = ("uint32_t",) def threeEqualRegInstX(name, Name, opClass, types, rCount, op, -- 2.30.2