From 5b23b6ea018575428f4119e3042ec6d8fb03f120 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 22 Apr 2020 17:24:26 +0100 Subject: [PATCH] arch-arm: SVE instructions do not use AHP format SVE half-precision floating-point instructions support only IEEE 754-2008 half-precision format and ignore the value of the FPCR.AHP bit, behaving as if it has an Effective value of 0. This patch is addressing this by masking the FPSCR.AHB bit before passing it to fplib. Change-Id: I1432fc3f7fefb81445fe042ae7d681f5cec40e64 Signed-off-by: Giacomo Travaglini Reviewed-by: Nikos Nikoleris Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28108 Tested-by: kokoro --- src/arch/arm/isa/insts/sve.isa | 14 +++++++------- src/arch/arm/miscregs.hh | 6 ++++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index b4c7fe5be..deb12bca4 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -2949,7 +2949,7 @@ let {{ if (sub_i) { elt2_i = fplibNeg(elt2_i); } - fpscr = (FPSCR) FpscrExc; + fpscr = FpscrExc & ~FpscrAhpMask; acc_r = fplibAdd(acc_r, elt2_i, fpscr); FpscrExc = fpscr; } @@ -2957,7 +2957,7 @@ let {{ if (sub_r) { elt2_r = fplibNeg(elt2_r); } - fpscr = (FPSCR) FpscrExc; + fpscr = FpscrExc & ~FpscrAhpMask; acc_i = fplibAdd(acc_i, elt2_r, fpscr); FpscrExc = fpscr; } @@ -3015,7 +3015,7 @@ let {{ if (neg_r) { elt2_a = fplibNeg(elt2_a); } - fpscr = (FPSCR) FpscrExc; + fpscr = FpscrExc & ~FpscrAhpMask; addend_r = fplibMulAdd(addend_r, elt1_a, elt2_a, fpscr); FpscrExc = fpscr;''' if predType != PredType.NONE: @@ -3028,7 +3028,7 @@ let {{ if (neg_i) { elt2_b = fplibNeg(elt2_b); } - fpscr = (FPSCR) FpscrExc; + fpscr = FpscrExc & ~FpscrAhpMask; addend_i = fplibMulAdd(addend_i, elt1_a, elt2_b, fpscr); FpscrExc = fpscr;''' if predType != PredType.NONE: @@ -3466,7 +3466,7 @@ let {{ sveExtInst('ext', 'Ext', 'SimdAluOp') # FABD fpOp = ''' - FPSCR fpscr = (FPSCR) FpscrExc; + FPSCR fpscr = FpscrExc & ~FpscrAhpMask; destElem = %s; FpscrExc = fpscr; ''' @@ -3497,7 +3497,7 @@ let {{ sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode) # FADDA fpAddaOp = ''' - FPSCR fpscr = (FPSCR) FpscrExc; + FPSCR fpscr = FpscrExc & ~FpscrAhpMask; destElem = fplibAdd(destElem, srcElem1, fpscr); FpscrExc = FpscrExc | fpscr; ''' @@ -3505,7 +3505,7 @@ let {{ fpAddaOp) # FADDV fpReduceOp = ''' - FPSCR fpscr = (FPSCR) FpscrExc; + FPSCR fpscr = FpscrExc & ~FpscrAhpMask; destElem = fplib%s(srcElem1, srcElem2, fpscr); FpscrExc = FpscrExc | fpscr; ''' diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 3900a4ccd..550b51cd9 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -1936,10 +1936,12 @@ namespace ArmISA // This mask selects bits of the FPSCR that actually go in the FpCondCodes // integer register to allow renaming. static const uint32_t FpCondCodesMask = 0xF0000000; - // This mask selects the cumulative FP exception flags of the FPSCR. - static const uint32_t FpscrExcMask = 0x0000009F; // This mask selects the cumulative saturation flag of the FPSCR. static const uint32_t FpscrQcMask = 0x08000000; + // This mask selects the AHP bit of the FPSCR. + static const uint32_t FpscrAhpMask = 0x04000000; + // This mask selects the cumulative FP exception flags of the FPSCR. + static const uint32_t FpscrExcMask = 0x0000009F; /** * Check for permission to read coprocessor registers. -- 2.30.2