From 33da368e99955230a7d83c33c49c42fb59ec5015 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 2 Jun 2010 12:58:03 -0500 Subject: [PATCH] ARM: Implement all integer multiply instructions. --- src/arch/arm/isa/insts/insts.isa | 3 + src/arch/arm/isa/insts/mult.isa | 359 +++++++++++++++++++++++++++++++ src/arch/arm/isa/operands.isa | 8 + 3 files changed, 370 insertions(+) create mode 100644 src/arch/arm/isa/insts/mult.isa diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index a7a96a51c..2e94a43c9 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -57,3 +57,6 @@ //Branches ##include "branch.isa" + +//Multiply +##include "mult.isa" diff --git a/src/arch/arm/isa/insts/mult.isa b/src/arch/arm/isa/insts/mult.isa new file mode 100644 index 000000000..f1a5096fd --- /dev/null +++ b/src/arch/arm/isa/insts/mult.isa @@ -0,0 +1,359 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2010 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + calcQCode = ''' + cprintf("canOverflow: %%d\\n", Reg0 < resTemp); + replaceBits(CondCodes, 27, Reg0 < resTemp); + ''' + + calcCcCode = ''' + uint16_t _iz, _in; + _in = (resTemp >> %(negBit)d) & 1; + _iz = ((%(zType)s)resTemp == 0); + + CondCodes = _in << 31 | _iz << 30 | (CondCodes & 0x3FFFFFFF); + + DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz); + ''' + + def buildMultInst(mnem, doCc, unCc, regs, code, flagType): + global header_output, decoder_output, exec_output + cCode = carryCode[flagType] + vCode = overflowCode[flagType] + zType = "uint32_t" + negBit = 31 + if flagType == "llbit": + zType = "uint64_t" + negBit = 63 + if flagType == "overflow": + ccCode = calcQCode + else: + ccCode = calcCcCode % { + "negBit": negBit, + "zType": zType + } + + if not regs in (3, 4): + raise Exception, "Multiplication instructions with %d " + \ + "registers are not implemented" + + if regs == 3: + base = 'Mult3' + else: + base = 'Mult4' + + Name = "New" + mnem.capitalize() + + if unCc: + iop = InstObjParams(mnem, Name, base, + {"code" : code, + "predicate_test": predicateTest}) + if doCc: + iopCc = InstObjParams(mnem + "s", Name + "Cc", base, + {"code" : code + ccCode, + "predicate_test": predicateTest}) + + if regs == 3: + declare = Mult3Declare + constructor = Mult3Constructor + else: + declare = Mult4Declare + constructor = Mult4Constructor + + if unCc: + header_output += declare.subst(iop) + decoder_output += constructor.subst(iop) + exec_output += PredOpExecute.subst(iop) + if doCc: + header_output += declare.subst(iopCc) + decoder_output += constructor.subst(iopCc) + exec_output += PredOpExecute.subst(iopCc) + + def buildMult3Inst(mnem, code, flagType = "logic"): + buildMultInst(mnem, True, True, 3, code, flagType) + + def buildMult3InstCc(mnem, code, flagType = "logic"): + buildMultInst(mnem, True, False, 3, code, flagType) + + def buildMult3InstUnCc(mnem, code, flagType = "logic"): + buildMultInst(mnem, False, True, 3, code, flagType) + + def buildMult4Inst(mnem, code, flagType = "logic"): + buildMultInst(mnem, True, True, 4, code, flagType) + + def buildMult4InstCc(mnem, code, flagType = "logic"): + buildMultInst(mnem, True, False, 4, code, flagType) + + def buildMult4InstUnCc(mnem, code, flagType = "logic"): + buildMultInst(mnem, False, True, 4, code, flagType) + + buildMult4Inst ("mla", "Reg0 = resTemp = Reg1 * Reg2 + Reg3;") + buildMult4InstUnCc("mls", "Reg0 = resTemp = Reg3 - Reg1 * Reg2;") + buildMult3Inst ("mul", "Reg0 = resTemp = Reg1 * Reg2;") + buildMult4InstCc ("smlabb", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smlabt", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smlatb", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smlatt", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smlad", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)) + + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smladx", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)) + + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)) + + Reg3.sw; + ''', "overflow") + buildMult4Inst ("smlal", '''resTemp = sext<32>(Reg2) * sext<32>(Reg3) + + (int64_t)((Reg1.ud << 32) | Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''', "llbit") + buildMult4InstUnCc("smlalbb", '''resTemp = sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 15, 0)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlalbt", '''resTemp = sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 31, 16)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlaltb", '''resTemp = sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 15, 0)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlaltt", '''resTemp = sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 31, 16)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlald", '''resTemp = + sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 31, 16)) + + sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 15, 0)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlaldx", '''resTemp = + sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 15, 0)) + + sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 31, 16)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstCc ("smlawb", '''Reg0 = resTemp = + (Reg1.sw * + sext<16>(bits(Reg2, 15, 0)) + + (Reg3.sw << 16)) >> 16; + ''', "overflow") + buildMult4InstCc ("smlawt", '''Reg0 = resTemp = + (Reg1.sw * + sext<16>(bits(Reg2, 31, 16)) + + (Reg3.sw << 16)) >> 16; + ''', "overflow") + buildMult4InstCc ("smlsd", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)) - + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)) + + Reg3.sw; + ''', "overflow") + buildMult4InstCc ("smlsdx", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)) - + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)) + + Reg3.sw; + ''', "overflow") + buildMult4InstUnCc("smlsld", '''resTemp = + sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 15, 0)) - + sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 31, 16)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smlsldx", '''resTemp = + sext<16>(bits(Reg2, 15, 0)) * + sext<16>(bits(Reg3, 31, 16)) - + sext<16>(bits(Reg2, 31, 16)) * + sext<16>(bits(Reg3, 15, 0)) + + (int64_t)((Reg1.ud << 32) | + Reg0.ud); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4InstUnCc("smmla", '''Reg0 = resTemp = + ((int64_t)(Reg3.ud << 32) + + Reg1.sw * Reg2.sw) >> 32; + ''') + buildMult4InstUnCc("smmlar", '''Reg0 = resTemp = + ((int64_t)(Reg3.ud << 32) + + Reg1.sw * Reg2.sw + + ULL(0x80000000)) >> 32; + ''') + buildMult4InstUnCc("smmls", '''Reg0 = resTemp = + ((int64_t)(Reg3.ud << 32) - + Reg1.sw * Reg2.sw) >> 32; + ''') + buildMult4InstUnCc("smmlsr", '''Reg0 = resTemp = + ((int64_t)(Reg3.ud << 32) - + Reg1.sw * Reg2.sw + + ULL(0x80000000)) >> 32; + ''') + buildMult3InstUnCc("smmul", '''Reg0 = resTemp = + ((int64_t)Reg1 * + (int64_t)Reg2) >> 32; + ''') + buildMult3InstUnCc("smmulr", '''Reg0 = resTemp = + ((int64_t)Reg1 * + (int64_t)Reg2 + + ULL(0x80000000)) >> 32; + ''') + buildMult3InstCc ("smuad", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)) + + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)); + ''', "overflow") + buildMult3InstCc ("smuadx", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)) + + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)); + ''', "overflow") + buildMult3InstUnCc("smulbb", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)); + ''') + buildMult3InstUnCc("smulbt", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)); + ''') + buildMult3InstUnCc("smultb", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)); + ''') + buildMult3InstUnCc("smultt", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)); + ''') + buildMult4Inst ("smull", '''resTemp = Reg2.sw * Reg3.sw; + Reg0 = (int32_t)resTemp; + Reg1 = (int32_t)(resTemp >> 32); + ''', "llbit") + buildMult3InstUnCc("smulwb", '''Reg0 = resTemp = + (Reg1.sw * + sext<16>(bits(Reg2, 15, 0))) >> 16; + ''') + buildMult3InstUnCc("smulwt", '''Reg0 = resTemp = + (Reg1.sw * + sext<16>(bits(Reg2, 31, 16))) >> 16; + ''') + buildMult3InstUnCc("smusd", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 15, 0)) - + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 31, 16)); + ''') + buildMult3InstUnCc("smusdx", '''Reg0 = resTemp = + sext<16>(bits(Reg1, 15, 0)) * + sext<16>(bits(Reg2, 31, 16)) - + sext<16>(bits(Reg1, 31, 16)) * + sext<16>(bits(Reg2, 15, 0)); + ''') + buildMult4InstUnCc("umaal", '''resTemp = Reg2.ud * Reg3.ud + + Reg0.ud + Reg1.ud; + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''') + buildMult4Inst ("umlal", '''resTemp = Reg2.ud * Reg3.ud + Reg0.ud + + (Reg1.ud << 32); + Reg0.ud = (uint32_t)resTemp; + Reg1.ud = (uint32_t)(resTemp >> 32); + ''', "llbit") + buildMult4Inst ("umull", '''resTemp = Reg2.ud * Reg3.ud; + Reg0 = (uint32_t)resTemp; + Reg1 = (uint32_t)(resTemp >> 32); + ''', "llbit") +}}; diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index ab4d95d47..2621106ac 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -89,6 +89,14 @@ def operands {{ maybePCRead, maybePCWrite), 'Shift': ('IntReg', 'uw', 'shift', 'IsInteger', 5, maybePCRead, maybePCWrite), + 'Reg0': ('IntReg', 'uw', 'reg0', 'IsInteger', 6, + maybePCRead, maybePCWrite), + 'Reg1': ('IntReg', 'uw', 'reg1', 'IsInteger', 7, + maybePCRead, maybePCWrite), + 'Reg2': ('IntReg', 'uw', 'reg2', 'IsInteger', 8, + maybePCRead, maybePCWrite), + 'Reg3': ('IntReg', 'uw', 'reg3', 'IsInteger', 9, + maybePCRead, maybePCWrite), #General Purpose Integer Reg Operands 'Rd': ('IntReg', 'uw', 'RD', 'IsInteger', 1, maybePCRead, maybePCWrite), 'Rm': ('IntReg', 'uw', 'RM', 'IsInteger', 2, maybePCRead, maybePCWrite), -- 2.30.2