From: Sandipan Das Date: Sat, 6 Feb 2021 11:47:53 +0000 (+0530) Subject: arch-power: Add doubleword multiply-add instructions X-Git-Tag: develop-gem5-snapshot~46 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7b1f5e75cf7029e66b871fa312a9234674d46014;p=gem5.git arch-power: Add doubleword multiply-add instructions This introduces 128-bit addition helpers and adds the following instructions. * Multiply-Add Low Doubleword (maddld) * Multiply-Add High Doubleword (maddhd) * Multiply-Add High Doubleword Unsigned (maddhdu) Change-Id: I04e6ea5fb4978b341a6e648424de2930ad41f449 Signed-off-by: Sandipan Das --- diff --git a/src/arch/power/insts/integer.cc b/src/arch/power/insts/integer.cc index d2862ff39..da7f392ea 100644 --- a/src/arch/power/insts/integer.cc +++ b/src/arch/power/insts/integer.cc @@ -119,6 +119,7 @@ IntArithOp::generateDisassembly( { std::stringstream ss; bool printSecondSrc = true; + bool printThirdSrc = false; // Generate the correct mnemonic std::string myMnemonic(mnemonic); @@ -130,6 +131,10 @@ IntArithOp::generateDisassembly( !myMnemonic.compare("subfze") || !myMnemonic.compare("neg")){ printSecondSrc = false; + } else if (!myMnemonic.compare("maddhd") || + !myMnemonic.compare("maddhdu") || + !myMnemonic.compare("maddld")) { + printThirdSrc = true; } // Additional characters depending on isa bits being set @@ -153,6 +158,12 @@ IntArithOp::generateDisassembly( if (_numSrcRegs > 1 && printSecondSrc) { ss << ", "; printReg(ss, srcRegIdx(1)); + + // Print the third source register + if (_numSrcRegs > 2 && printThirdSrc) { + ss << ", "; + printReg(ss, srcRegIdx(2)); + } } } diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh index 12ad8fc99..366d36adb 100644 --- a/src/arch/power/insts/integer.hh +++ b/src/arch/power/insts/integer.hh @@ -154,6 +154,52 @@ class IntArithOp : public IntOp { } + /* Compute 128-bit sum of 128-bit to 64-bit unsigned integer addition */ + inline std::tuple + add(uint64_t ralo, uint64_t rahi, uint64_t rb) const + { + uint64_t slo, shi; + #if defined(__SIZEOF_INT128__) + __uint128_t ra = ((__uint128_t)rahi << 64) | ralo; + __uint128_t sum = ra + rb; + slo = sum; + shi = sum >> 64; + #else + shi = rahi + ((ralo + rb) < ralo); + slo = ralo + rb; + #endif + return std::make_tuple(slo, shi); + } + + /* Compute 128-bit sum of 128-bit to 64-bit signed integer addition */ + inline std::tuple + add(uint64_t ralo, int64_t rahi, int64_t rb) const + { + uint64_t slo; + int64_t shi; + #if defined(__SIZEOF_INT128__) + __int128_t ra = ((__int128_t)rahi << 64) | ralo; + __int128_t sum = (__int128_t)ra + rb; + slo = sum; + shi = sum >> 64; + #else + if (rb < 0) { + shi = rahi - 1; + slo = ralo + rb; + if (slo < rb) { + shi++; + } + } else { + shi = rahi; + slo = ralo + rb; + if (slo < rb) { + shi++; + } + } + #endif + return std::make_tuple(slo, shi); + } + /** * Compute 128-bit product of 64-bit unsigned integer multiplication * based on https://stackoverflow.com/a/28904636 @@ -197,6 +243,48 @@ class IntArithOp : public IntOp return std::make_tuple(plo, (int64_t)phi); } + /** + * Compute 128-bit result of 64-bit unsigned integer multiplication + * followed by addition + */ + inline std::tuple + multiplyAdd(uint64_t ra, uint64_t rb, uint64_t rc) const + { + uint64_t rlo, rhi; + #if defined(__SIZEOF_INT128__) + __uint128_t res = ((__uint128_t)ra * rb) + rc; + rlo = res; + rhi = res >> 64; + #else + uint64_t plo, phi; + std::tie(plo, phi) = multiply(ra, rb); + std::tie(rlo, rhi) = add(plo, phi, rc); + #endif + return std::make_tuple(rlo, rhi); + } + + /** + * Compute 128-bit result of 64-bit signed integer multiplication + * followed by addition + */ + inline std::tuple + multiplyAdd(int64_t ra, int64_t rb, int64_t rc) const + { + uint64_t rlo; + int64_t rhi; + #if defined(__SIZEOF_INT128__) + __int128_t res = (__int128_t)ra * rb + rc; + rlo = res; + rhi = res >> 64; + #else + uint64_t plo; + int64_t phi; + std::tie(plo, phi) = multiply(ra, rb); + std::tie(rlo, rhi) = add(plo, phi, rc); + #endif + return std::make_tuple(rlo, rhi); + } + std::string generateDisassembly( Addr pc, const Loader::SymbolTable *symtab) const override; }; diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa index 13bbe87c7..4297d7d14 100644 --- a/src/arch/power/isa/decoder.isa +++ b/src/arch/power/isa/decoder.isa @@ -209,6 +209,31 @@ decode PO default Unknown::unknown() { }}); } + 4: decode VA_XO { + + // Arithmetic instructions that use source registers Ra, Rb and Rc, + // with destination register Rt. + format IntArithOp { + 48: maddhd({{ + int64_t res; + std::tie(std::ignore, res) = multiplyAdd(Ra_sd, Rb_sd, Rc_sd); + Rt = res; + }}); + + 49: maddhdu({{ + uint64_t res; + std::tie(std::ignore, res) = multiplyAdd(Ra, Rb, Rc); + Rt = res; + }}); + + 51: maddld({{ + uint64_t res; + std::tie(res, std::ignore) = multiplyAdd(Ra_sd, Rb_sd, Rc_sd); + Rt = res; + }}); + } + } + format IntImmOp { 10: cmpli({{ Xer xer = XER; diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa index cb858a8e3..6860c26a7 100644 --- a/src/arch/power/isa/formats/integer.isa +++ b/src/arch/power/isa/formats/integer.isa @@ -328,6 +328,16 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0, }}; +// Instructions that use source registers Ra and Rb, with the result +// placed into Rt but do not check for carry, overflow or the Rc bit. +def format IntArithOp(code, inst_flags = []) {{ + + # Generate the class + (header_output, decoder_output, decode_block, exec_output) = \ + GenAluOp(name, Name, 'IntArithOp', code, inst_flags, BasicDecode, + BasicConstructor) +}}; + // Instructions that use source registers Ra and Rb, with the result // placed into Rt. Basically multiply and divide instructions. The