From 3eab6ea51e4c2b97e7a68835dddacb989d7559c4 Mon Sep 17 00:00:00 2001 From: Jordi Vaquero Date: Fri, 17 Apr 2020 16:00:13 +0200 Subject: [PATCH] arch-arm: Fix SVE indx inst by sizeof error and dest overwrite This patch includes two fixes for SVE FMUL; FMLA FMLS AND FCMLA instructions + Fixes indexed functions like FMUL, FMLA, FMLS, FCMLA due to its destination register overwrite with temporary values, wince the imm can make changes in vector positions that will be read in the future. + sizeof return bytes not bits so division of 128 shouild be of 16 instead Change-Id: I304d1b254a299069c85bbc3319e5a6d4119436d0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28228 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/arch/arm/isa/insts/sve.isa | 54 +++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 16597d6d6..9314ba9a6 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -1835,26 +1835,25 @@ let {{ xc->tcBase()); // Number of elements in a 128 bit segment - constexpr unsigned ePerSegment = 128 / sizeof(Element); + constexpr unsigned ePerSegment = 16 / sizeof(Element); - ''' - - code += ''' + ArmISA::VecRegContainer tmpC; + auto auxDest = tmpC.as(); for (unsigned i = 0; i < eCount; i++) { - const auto segmentBase = i - i % ePerSegment; - const auto segmentIdx = segmentBase + index; - - const Element& srcElem1 = AA64FpOp1_x[i]; - const Element& srcElem2 = AA64FpOp2_x[segmentIdx]; - Element destElem = 0; + const auto segmentBase = i - i %% ePerSegment; + const auto segmentIdx = segmentBase + index; - ''' + const Element& srcElem1 = AA64FpOp1_x[i]; + const Element& srcElem2 = AA64FpOp2_x[segmentIdx]; + Element destElem = 0; - code += ''' - %(op)s - AA64FpDest_x[i] = destElem; + %(op)s + auxDest[i] = destElem; } - ''' % {'op': op} + + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; + }''' % {'op':op} baseClass = 'SveBinIdxUnpredOp' @@ -2067,8 +2066,10 @@ let {{ xc->tcBase()); // Number of elements in a 128 bit segment - constexpr unsigned ePerSegment = 128 / sizeof(Element); + constexpr unsigned ePerSegment = 16 / sizeof(Element); + ArmISA::VecRegContainer tmpC; + auto auxDest = tmpC.as(); for (unsigned i = 0; i < eCount; i++) { const auto segmentBase = i - i % ePerSegment; const auto segmentIdx = segmentBase + index; @@ -2077,10 +2078,13 @@ let {{ const Element& srcElem2 = AA64FpOp2_x[segmentIdx]; Element destElem = AA64FpDestMerge_x[i]; ''' - code += ''' %(op)s - AA64FpDest_x[i] = destElem; + auxDest[i] = destElem; + } + + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; }''' % {'op': op} iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp', @@ -3024,6 +3028,9 @@ let {{ code += ''' uint32_t eltspersegment = 16 / (2 * sizeof(Element));''' code += ''' + ArmISA::VecRegContainer tmpC; + auto auxDest = tmpC.as(); + for (int i = 0; i < eCount / 2; ++i) {''' if predType == PredType.NONE: code += ''' @@ -3067,9 +3074,14 @@ let {{ code += ''' }''' code += ''' - AA64FpDest_x[2 * i] = addend_r; - AA64FpDest_x[2 * i + 1] = addend_i; - }''' + auxDest[2 * i] = addend_r; + auxDest[2 * i + 1] = addend_i; + } + + for (unsigned i = 0; i < eCount; i++) { + AA64FpDest_x[i] = auxDest[i]; + } + ''' iop = InstObjParams(name, 'Sve' + Name, 'SveComplexIdxOp' if predType == PredType.NONE else 'SveComplexOp', -- 2.30.2