src/arch/arm/isa/insts/fp64.isa

   1 // -*- mode:c++ -*-
   2
   3 // Copyright (c) 2012-2013 ARM Limited
   4 // All rights reserved
   5 //
   6 // The license below extends only to copyright in the software and shall
   7 // not be construed as granting a license to any other intellectual
   8 // property including but not limited to intellectual property relating
   9 // to a hardware implementation of the functionality of the software
  10 // licensed hereunder.  You may use the software subject to the license
  11 // terms below provided that you ensure that this notice is replicated
  12 // unmodified and in its entirety in all distributions of the software,
  13 // modified or unmodified, in source code or in binary form.
  14 //
  15 // Redistribution and use in source and binary forms, with or without
  16 // modification, are permitted provided that the following conditions are
  17 // met: redistributions of source code must retain the above copyright
  18 // notice, this list of conditions and the following disclaimer;
  19 // redistributions in binary form must reproduce the above copyright
  20 // notice, this list of conditions and the following disclaimer in the
  21 // documentation and/or other materials provided with the distribution;
  22 // neither the name of the copyright holders nor the names of its
  23 // contributors may be used to endorse or promote products derived from
  24 // this software without specific prior written permission.
  25 //
  26 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  27 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  28 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  29 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  30 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  31 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  32 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  36 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37 //
  38 // Authors: Thomas Grocutt
  39 //          Edmund Grimley Evans
  40
  41 let {{
  42
  43     header_output = ""
  44     decoder_output = ""
  45     exec_output = ""
  46
  47     fmovImmSCode = vfp64EnabledCheckCode + '''
  48         AA64FpDestP0_uw = bits(imm, 31, 0);
  49         AA64FpDestP1_uw = 0;
  50         AA64FpDestP2_uw = 0;
  51         AA64FpDestP3_uw = 0;
  52     '''
  53     fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
  54                                 { "code": fmovImmSCode,
  55                                   "op_class": "SimdFloatMiscOp" }, [])
  56     header_output  += FpRegImmOpDeclare.subst(fmovImmSIop);
  57     decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
  58     exec_output    += BasicExecute.subst(fmovImmSIop);
  59
  60     fmovImmDCode = vfp64EnabledCheckCode + '''
  61         AA64FpDestP0_uw = bits(imm, 31, 0);
  62         AA64FpDestP1_uw = bits(imm, 63, 32);
  63         AA64FpDestP2_uw = 0;
  64         AA64FpDestP3_uw = 0;
  65     '''
  66     fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
  67                                 { "code": fmovImmDCode,
  68                                   "op_class": "SimdFloatMiscOp" }, [])
  69     header_output  += FpRegImmOpDeclare.subst(fmovImmDIop);
  70     decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
  71     exec_output    += BasicExecute.subst(fmovImmDIop);
  72
  73     fmovRegSCode = vfp64EnabledCheckCode + '''
  74         AA64FpDestP0_uw = AA64FpOp1P0_uw;
  75         AA64FpDestP1_uw = 0;
  76         AA64FpDestP2_uw = 0;
  77         AA64FpDestP3_uw = 0;
  78     '''
  79     fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
  80                                 { "code": fmovRegSCode,
  81                                   "op_class": "SimdFloatMiscOp" }, [])
  82     header_output  += FpRegRegOpDeclare.subst(fmovRegSIop);
  83     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
  84     exec_output    += BasicExecute.subst(fmovRegSIop);
  85
  86     fmovRegDCode = vfp64EnabledCheckCode + '''
  87         AA64FpDestP0_uw = AA64FpOp1P0_uw;
  88         AA64FpDestP1_uw = AA64FpOp1P1_uw;
  89         AA64FpDestP2_uw = 0;
  90         AA64FpDestP3_uw = 0;
  91     '''
  92     fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
  93                                 { "code": fmovRegDCode,
  94                                   "op_class": "SimdFloatMiscOp" }, [])
  95     header_output  += FpRegRegOpDeclare.subst(fmovRegDIop);
  96     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
  97     exec_output    += BasicExecute.subst(fmovRegDIop);
  98
  99     fmovCoreRegWCode = vfp64EnabledCheckCode + '''
 100         AA64FpDestP0_uw = WOp1_uw;
 101         AA64FpDestP1_uw = 0;
 102         AA64FpDestP2_uw = 0;
 103         AA64FpDestP3_uw = 0;
 104     '''
 105     fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
 106                                     { "code": fmovCoreRegWCode,
 107                                       "op_class": "SimdFloatMiscOp" }, [])
 108     header_output  += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
 109     decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
 110     exec_output    += BasicExecute.subst(fmovCoreRegWIop);
 111
 112     fmovCoreRegXCode = vfp64EnabledCheckCode + '''
 113         AA64FpDestP0_uw = XOp1_ud;
 114         AA64FpDestP1_uw = XOp1_ud >> 32;
 115         AA64FpDestP2_uw = 0;
 116         AA64FpDestP3_uw = 0;
 117     '''
 118     fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
 119                                     { "code": fmovCoreRegXCode,
 120                                       "op_class": "SimdFloatMiscOp" }, [])
 121     header_output  += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
 122     decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
 123     exec_output    += BasicExecute.subst(fmovCoreRegXIop);
 124
 125     fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
 126         AA64FpDestP2_uw = XOp1_ud;
 127         AA64FpDestP3_uw = XOp1_ud >> 32;
 128     '''
 129     fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
 130                                     { "code": fmovUCoreRegXCode,
 131                                       "op_class": "SimdFloatMiscOp" }, [])
 132     header_output  += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
 133     decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
 134     exec_output    += BasicExecute.subst(fmovUCoreRegXIop);
 135
 136     fmovRegCoreWCode = vfp64EnabledCheckCode + '''
 137         WDest = AA64FpOp1P0_uw;
 138     '''
 139     fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
 140                                      { "code": fmovRegCoreWCode,
 141                                        "op_class": "SimdFloatMiscOp" }, [])
 142     header_output  += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
 143     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
 144     exec_output    += BasicExecute.subst(fmovRegCoreWIop);
 145
 146     fmovRegCoreXCode = vfp64EnabledCheckCode + '''
 147         XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
 148     '''
 149     fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
 150                                      { "code": fmovRegCoreXCode,
 151                                        "op_class": "SimdFloatMiscOp" }, [])
 152     header_output  += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
 153     decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
 154     exec_output    += BasicExecute.subst(fmovRegCoreXIop);
 155
 156     fmovURegCoreXCode = vfp64EnabledCheckCode + '''
 157         XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
 158     '''
 159     fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
 160                                     { "code":     fmovURegCoreXCode,
 161                                       "op_class": "SimdFloatMiscOp" }, [])
 162     header_output  += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
 163     decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
 164     exec_output    += BasicExecute.subst(fmovURegCoreXIop);
 165 }};
 166
 167 let {{
 168
 169     header_output = ""
 170     decoder_output = ""
 171     exec_output = ""
 172
 173     singleIntConvCode = vfp64EnabledCheckCode + '''
 174         FPSCR fpscr = (FPSCR) FpscrExc;
 175         uint32_t cOp1  = AA64FpOp1P0_uw;
 176         uint32_t cDest = %(op)s;
 177         AA64FpDestP0_uw = cDest;
 178         AA64FpDestP1_uw = 0;
 179         AA64FpDestP2_uw = 0;
 180         AA64FpDestP3_uw = 0;
 181         FpscrExc = fpscr;
 182     '''
 183
 184     singleIntConvCode2 = vfp64EnabledCheckCode + '''
 185         FPSCR fpscr = (FPSCR) FpscrExc;
 186         uint32_t cOp1  = AA64FpOp1P0_uw;
 187         uint32_t cOp2  = AA64FpOp2P0_uw;
 188         uint32_t cDest = %(op)s;
 189         AA64FpDestP0_uw = cDest;
 190         AA64FpDestP1_uw = 0;
 191         AA64FpDestP2_uw = 0;
 192         AA64FpDestP3_uw = 0;
 193         FpscrExc = fpscr;
 194     '''
 195
 196     singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
 197                 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
 198     singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
 199
 200     doubleIntConvCode = vfp64EnabledCheckCode + '''
 201         FPSCR fpscr = (FPSCR) FpscrExc;
 202         uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
 203         uint64_t cDest = %(op)s;
 204         AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
 205         AA64FpDestP1_uw = cDest >> 32;
 206         AA64FpDestP2_uw = 0;
 207         AA64FpDestP3_uw = 0;
 208         FpscrExc = fpscr;
 209     '''
 210
 211     doubleIntConvCode2 = vfp64EnabledCheckCode + '''
 212         FPSCR fpscr = (FPSCR) FpscrExc;
 213         uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
 214         uint64_t cOp2  = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
 215         uint64_t cDest = %(op)s;
 216         AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
 217         AA64FpDestP1_uw = cDest >> 32;
 218         AA64FpDestP2_uw = 0;
 219         AA64FpDestP3_uw = 0;
 220         FpscrExc = fpscr;
 221     '''
 222
 223     doubleBinOp = '''
 224         binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
 225                         dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
 226                         %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
 227     '''
 228     doubleUnaryOp = '''
 229         unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
 230                 fpscr.fz, fpscr.rMode)
 231     '''
 232
 233     def buildTernaryFpOp(name, opClass, sOp, dOp):
 234         global header_output, decoder_output, exec_output
 235         for isDouble in True, False:
 236             code = vfp64EnabledCheckCode + '''
 237                 FPSCR fpscr = (FPSCR) FpscrExc;
 238             '''
 239             if isDouble:
 240                 code += '''
 241                     uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
 242                     uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
 243                     uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
 244                     uint64_t cDest;
 245                 ''' "cDest = " + dOp + ";" + '''
 246                     AA64FpDestP0_uw = cDest;
 247                     AA64FpDestP1_uw = cDest >> 32;
 248                 '''
 249             else:
 250                 code += '''
 251                     uint32_t cOp1 = AA64FpOp1P0_uw;
 252                     uint32_t cOp2 = AA64FpOp2P0_uw;
 253                     uint32_t cOp3 = AA64FpOp3P0_uw;
 254                     uint32_t cDest;
 255                 ''' "cDest = " + sOp + ";" + '''
 256                     AA64FpDestP0_uw = cDest;
 257                     AA64FpDestP1_uw = 0;
 258                 '''
 259             code += '''
 260                 AA64FpDestP2_uw = 0;
 261                 AA64FpDestP3_uw = 0;
 262                 FpscrExc = fpscr;
 263             '''
 264
 265             iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"),
 266                                 "FpRegRegRegRegOp",
 267                                 { "code": code, "op_class": opClass }, [])
 268
 269             header_output  += AA64FpRegRegRegRegOpDeclare.subst(iop)
 270             decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
 271             exec_output    += BasicExecute.subst(iop)
 272
 273     buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
 274                      "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
 275                      "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
 276     buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
 277                      "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
 278                      "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
 279     buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
 280                      "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
 281                      "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
 282     buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
 283                      "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
 284                      "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
 285
 286     def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
 287         global header_output, decoder_output, exec_output
 288
 289         code = singleIntConvCode2 % { "op": singleOp }
 290         sIop = InstObjParams(name, Name + "S", base,
 291                 { "code": code,
 292                   "op_class": opClass }, [])
 293
 294         code = doubleIntConvCode2 % { "op": doubleOp }
 295         dIop = InstObjParams(name, Name + "D", base,
 296                 { "code": code,
 297                   "op_class": opClass }, [])
 298
 299         declareTempl     = eval(         base + "Declare");
 300         constructorTempl = eval("AA64" + base + "Constructor");
 301
 302         for iop in sIop, dIop:
 303             header_output  += declareTempl.subst(iop)
 304             decoder_output += constructorTempl.subst(iop)
 305             exec_output    += BasicExecute.subst(iop)
 306
 307     buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
 308                  "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
 309                  "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
 310     buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
 311                  "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
 312                  "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
 313     buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
 314                  "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
 315                  "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
 316     buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
 317                  "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
 318                  "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
 319     buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
 320                  "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
 321                  "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
 322     buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
 323                  "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
 324                  "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
 325     buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
 326                  "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
 327                  "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
 328     buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
 329                  "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
 330                  "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
 331     buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
 332                  "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
 333                  "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
 334
 335     def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
 336         if doubleOp is None:
 337             doubleOp = singleOp
 338         global header_output, decoder_output, exec_output
 339
 340         code = singleIntConvCode % { "op": singleOp }
 341         sIop = InstObjParams(name, Name + "S", base,
 342                 { "code": code,
 343                   "op_class": opClass }, [])
 344         code = doubleIntConvCode % { "op": doubleOp }
 345         dIop = InstObjParams(name, Name + "D", base,
 346                 { "code": code,
 347                   "op_class": opClass }, [])
 348
 349         declareTempl     = eval(         base + "Declare");
 350         constructorTempl = eval("AA64" + base + "Constructor");
 351
 352         for iop in sIop, dIop:
 353             header_output  += declareTempl.subst(iop)
 354             decoder_output += constructorTempl.subst(iop)
 355             exec_output    += BasicExecute.subst(iop)
 356
 357     buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
 358                    "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
 359
 360     def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
 361                              doubleOp = None, isIntConv = True):
 362         if doubleOp is None:
 363             doubleOp = singleOp
 364         global header_output, decoder_output, exec_output
 365
 366         if isIntConv:
 367             sCode = singleIntConvCode
 368             dCode = doubleIntConvCode
 369         else:
 370             sCode = singleCode
 371             dCode = doubleCode
 372
 373         for code, op, suffix in [[sCode, singleOp, "S"],
 374                                  [dCode, doubleOp, "D"]]:
 375             iop = InstObjParams(name, Name + suffix, base,
 376                 { "code": code % { "op": op },
 377                   "op_class": opClass }, [])
 378
 379             declareTempl     = eval(         base + "Declare");
 380             constructorTempl = eval("AA64" + base + "Constructor");
 381
 382             header_output  += declareTempl.subst(iop)
 383             decoder_output += constructorTempl.subst(iop)
 384             exec_output    += BasicExecute.subst(iop)
 385
 386     buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
 387                          "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
 388     buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
 389                          "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
 390     buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
 391                          "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
 392                          "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
 393     buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
 394                          "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
 395                          "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
 396     buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
 397                          "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
 398                          "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
 399     buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
 400                          "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
 401                          "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
 402     buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
 403                          "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
 404                          "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
 405     buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
 406                          "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
 407                          "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
 408     buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
 409                          "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
 410                          "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
 411 }};
 412
 413 let {{
 414
 415     header_output = ""
 416     decoder_output = ""
 417     exec_output = ""
 418
 419     # Creates the integer to floating point instructions, including variants for
 420     # signed/unsigned, float/double, etc
 421     for regL, regOpL, width in [["W", "w", 32],
 422                                 ["X", "d", 64]]:
 423         for isDouble in True, False:
 424             for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
 425                                ["S", "int%d_t  cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
 426                 fcvtIntFpDCode = vfp64EnabledCheckCode + '''
 427                     FPSCR fpscr = (FPSCR) FpscrExc;
 428                     %s
 429                 ''' %(usCode)
 430
 431                 if isDouble:
 432                     fcvtIntFpDCode += '''
 433                         uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
 434                             %s, FPCRRounding(fpscr), fpscr);
 435                         AA64FpDestP0_uw = cDest;
 436                         AA64FpDestP1_uw = cDest >> 32;
 437                     ''' % ("true" if us == "U" else "false")
 438                 else:
 439                     fcvtIntFpDCode += '''
 440                         uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
 441                             %s, FPCRRounding(fpscr), fpscr);
 442                         AA64FpDestP0_uw = cDest;
 443                         AA64FpDestP1_uw = 0;
 444                     ''' % ("true" if us == "U" else "false")
 445                 fcvtIntFpDCode += '''
 446                     AA64FpDestP2_uw = 0;
 447                     AA64FpDestP3_uw = 0;
 448                     FpscrExc = fpscr;
 449                 '''
 450
 451                 instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
 452                 mnem     = "%scvtf" %(us.lower())
 453                 fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
 454                                                 { "code": fcvtIntFpDCode,
 455                                                   "op_class": "SimdFloatCvtOp" }, [])
 456                 header_output  += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
 457                 decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
 458                 exec_output    += BasicExecute.subst(fcvtIntFpDIop);
 459
 460     # Generates the floating point to integer conversion instructions in various
 461     # variants, eg signed/unsigned
 462     def buildFpCvtIntOp(isDouble, isSigned, isXReg):
 463         global header_output, decoder_output, exec_output
 464
 465         for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
 466                                     ["P", "FPRounding_POSINF"],
 467                                     ["M", "FPRounding_NEGINF"],
 468                                     ["Z", "FPRounding_ZERO"],
 469                                     ["A", "FPRounding_TIEAWAY"]]:
 470             fcvtFpIntCode = vfp64EnabledCheckCode + '''
 471                 FPSCR fpscr = (FPSCR) FpscrExc;'''
 472             if isDouble:
 473                 fcvtFpIntCode += '''
 474                 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
 475                 '''
 476             else:
 477                 fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
 478
 479             fcvtFpIntCode += '''
 480                 %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
 481                 FpscrExc = fpscr;
 482             ''' %("X"      if isXReg   else "W",
 483                   "64"     if isDouble else "32",
 484                   "64"     if isXReg   else "32",
 485                   "false"  if isSigned else "true",
 486                   roundingMode)
 487
 488             instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
 489                                              "X" if isXReg   else "W",
 490                                              "D" if isDouble else "S", rmode)
 491             mnem     = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
 492             fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
 493                                         { "code": fcvtFpIntCode,
 494                                         "op_class": "SimdFloatCvtOp" }, [])
 495             header_output  += FpRegRegOpDeclare.subst(fcvtFpIntIop);
 496             decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
 497             exec_output    += BasicExecute.subst(fcvtFpIntIop);
 498
 499     # Now actually do the building with the different variants
 500     for isDouble in True, False:
 501        for isSigned in True, False:
 502            for isXReg in True, False:
 503              buildFpCvtIntOp(isDouble, isSigned, isXReg)
 504
 505     fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
 506         FPSCR fpscr = (FPSCR) FpscrExc;
 507         uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
 508             FPCRRounding(fpscr), fpscr);
 509         AA64FpDestP0_uw = cDest;
 510         AA64FpDestP1_uw = cDest >> 32;
 511         AA64FpDestP2_uw = 0;
 512         AA64FpDestP3_uw = 0;
 513         FpscrExc = fpscr;
 514     '''
 515     fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
 516                                      { "code": fcvtFpSFpDCode,
 517                                        "op_class": "SimdFloatCvtOp" }, [])
 518     header_output  += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
 519     decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
 520     exec_output    += BasicExecute.subst(fcvtFpSFpDIop);
 521
 522     fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
 523         FPSCR fpscr = (FPSCR) FpscrExc;
 524         uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
 525         AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
 526             FPCRRounding(fpscr), fpscr);
 527         AA64FpDestP1_uw = 0;
 528         AA64FpDestP2_uw = 0;
 529         AA64FpDestP3_uw = 0;
 530         FpscrExc = fpscr;
 531     '''
 532     fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
 533                                  {"code":     fcvtFpDFpSCode,
 534                                   "op_class": "SimdFloatCvtOp" }, [])
 535     header_output  += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
 536     decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
 537     exec_output    += BasicExecute.subst(fcvtFpDFpSIop);
 538
 539     # Half precision to single or double precision conversion
 540     for isDouble in True, False:
 541         code = vfp64EnabledCheckCode + '''
 542             FPSCR fpscr = (FPSCR) FpscrExc;
 543             %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
 544                 FPCRRounding(fpscr), fpscr);
 545         ''' % ("uint64_t" if isDouble else "uint32_t",
 546                "64" if isDouble else "32")
 547         if isDouble:
 548             code += '''
 549                 AA64FpDestP0_uw = cDest;
 550                 AA64FpDestP1_uw = cDest >> 32;
 551             '''
 552         else:
 553             code += '''
 554                 AA64FpDestP0_uw = cDest;
 555                 AA64FpDestP1_uw = 0;
 556             '''
 557         code += '''
 558             AA64FpDestP2_uw = 0;
 559             AA64FpDestP3_uw = 0;
 560             FpscrExc = fpscr;
 561         '''
 562
 563         instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
 564         fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
 565                                      { "code": code,
 566                                        "op_class": "SimdFloatCvtOp" }, [])
 567         header_output  += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
 568         decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
 569         exec_output    += BasicExecute.subst(fcvtFpHFpIop);
 570
 571     # single or double precision to Half precision conversion
 572     for isDouble in True, False:
 573         code = vfp64EnabledCheckCode + '''
 574             FPSCR fpscr = (FPSCR) FpscrExc;
 575             %s;
 576             AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
 577                 FPCRRounding(fpscr), fpscr);
 578             AA64FpDestP1_uw = 0;
 579             AA64FpDestP2_uw = 0;
 580             AA64FpDestP3_uw = 0;
 581             FpscrExc = fpscr;
 582         ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
 583                if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
 584                "64" if isDouble else "32")
 585
 586         instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
 587         fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
 588                                      { "code": code,
 589                                        "op_class": "SimdFloatCvtOp" }, [])
 590         header_output  += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
 591         decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
 592         exec_output    += BasicExecute.subst(fcvtFpFpHIop);
 593
 594     # Build the various versions of the floating point compare instructions
 595     def buildFCmpOp(isQuiet, isDouble, isImm):
 596         global header_output, decoder_output, exec_output
 597
 598         fcmpCode = vfp64EnabledCheckCode + '''
 599             FPSCR fpscr = (FPSCR) FpscrExc;
 600             %s cOp1 = %s;
 601         ''' % ("uint64_t" if isDouble else "uint32_t",
 602                "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
 603                if isDouble else "AA64FpDestP0_uw")
 604         if isImm:
 605             fcmpCode += '''
 606                 %s cOp2 = imm;
 607             ''' % ("uint64_t" if isDouble else "uint32_t")
 608         else:
 609             fcmpCode += '''
 610                 %s cOp2  = %s;
 611             ''' % ("uint64_t" if isDouble else "uint32_t",
 612                    "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
 613                    if isDouble else "AA64FpOp1P0_uw")
 614         fcmpCode += '''
 615             int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
 616             CondCodesNZ = cc >> 2 & 3;
 617             CondCodesC = cc >> 1 & 1;
 618             CondCodesV = cc & 1;
 619             FpCondCodes = fpscr & FpCondCodesMask;
 620             FpscrExc    = fpscr;
 621         ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
 622
 623         typeName = "Imm" if isImm else "Reg"
 624         instName = "FCmp%s%s%s" %(""  if isQuiet  else "E", typeName,
 625                                   "D" if isDouble else "S")
 626         fcmpIop = InstObjParams("fcmp%s" %(""  if isQuiet else "e"), instName,
 627                                 "FpReg%sOp" %(typeName),
 628                                {"code":     fcmpCode,
 629                                 "op_class": "SimdFloatCmpOp"}, [])
 630
 631         declareTemp     = eval("FpReg%sOpDeclare"         %(typeName));
 632         constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
 633         header_output  += declareTemp.subst(fcmpIop);
 634         decoder_output += constructorTemp.subst(fcmpIop);
 635         exec_output    += BasicExecute.subst(fcmpIop);
 636
 637     for isQuiet in True, False:
 638         for isDouble in True, False:
 639             for isImm in True, False:
 640                 buildFCmpOp(isQuiet, isDouble, isImm)
 641
 642     # Build the various versions of the conditional floating point compare
 643     # instructions
 644     def buildFCCmpOp(isQuiet, isDouble):
 645         global header_output, decoder_output, exec_output
 646
 647         fccmpCode = vfp64EnabledCheckCode + '''
 648             FPSCR fpscr = (FPSCR) FpscrExc;
 649             if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
 650                 %s cOp1 = %s;
 651                 %s cOp2 = %s;
 652                 int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
 653                 CondCodesNZ = cc >> 2 & 3;
 654                 CondCodesC = cc >> 1 & 1;
 655                 CondCodesV = cc & 1;
 656             } else {
 657                 CondCodesNZ = (defCc >> 2) & 0x3;
 658                 CondCodesC  = (defCc >> 1) & 0x1;
 659                 CondCodesV  = defCc & 0x1;
 660             }
 661             FpCondCodes = fpscr & FpCondCodesMask;
 662             FpscrExc    = fpscr;
 663         ''' % ("uint64_t" if isDouble else "uint32_t",
 664                "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
 665                if isDouble else "AA64FpOp1P0_uw",
 666                "uint64_t" if isDouble else "uint32_t",
 667                "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
 668                if isDouble else "AA64FpOp2P0_uw",
 669                "64" if isDouble else "32", "false" if isQuiet else "true")
 670
 671         instName = "FCCmp%sReg%s" %(""  if isQuiet  else "E",
 672                                     "D" if isDouble else "S")
 673         fccmpIop = InstObjParams("fccmp%s" %(""  if isQuiet  else "e"),
 674                                  instName, "FpCondCompRegOp",
 675                                 {"code":           fccmpCode,
 676                                  "op_class":       "SimdFloatCmpOp"}, [])
 677         header_output  += DataXCondCompRegDeclare.subst(fccmpIop);
 678         decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
 679         exec_output    += BasicExecute.subst(fccmpIop);
 680
 681     for isQuiet in True, False:
 682         for isDouble in True, False:
 683             buildFCCmpOp(isQuiet, isDouble)
 684
 685 }};
 686
 687 let {{
 688
 689     header_output = ""
 690     decoder_output = ""
 691     exec_output = ""
 692
 693     # Generates the variants of the floating to fixed point instructions
 694     def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
 695         global header_output, decoder_output, exec_output
 696
 697         fcvtFpFixedCode = vfp64EnabledCheckCode + '''
 698             FPSCR fpscr = (FPSCR) FpscrExc;
 699         '''
 700         if isDouble:
 701             fcvtFpFixedCode += '''
 702                 uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
 703             '''
 704         else:
 705             fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
 706         fcvtFpFixedCode += '''
 707             %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
 708                 FPRounding_ZERO, fpscr);
 709             FpscrExc = fpscr;
 710         ''' %("X"      if isXReg   else "W",
 711               "64"     if isDouble else "32",
 712               "64"     if isXReg   else "32",
 713               "false"  if isSigned else "true")
 714
 715         instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
 716                                          "D" if isDouble else "S",
 717                                          "X" if isXReg   else "W")
 718         mnem = "fcvtz%s" %("s" if isSigned else "u")
 719         fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
 720                                        { "code": fcvtFpFixedCode,
 721                                          "op_class": "SimdFloatCvtOp" }, [])
 722         header_output  += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
 723         decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
 724         exec_output    += BasicExecute.subst(fcvtFpFixedIop);
 725
 726     # Generates the variants of the fixed to floating point instructions
 727     def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
 728         global header_output, decoder_output, exec_output
 729
 730         srcRegType = "X" if isXReg   else "W"
 731         fcvtFixedFpCode = vfp64EnabledCheckCode + '''
 732             FPSCR fpscr = (FPSCR) FpscrExc;
 733             %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
 734                 %s, FPCRRounding(fpscr), fpscr);
 735         ''' %("uint64_t" if isDouble else "uint32_t",
 736               "64" if isDouble else "32",
 737               "int" if isSigned else "uint", "64" if isXReg else "32",
 738               srcRegType,
 739               "false" if isSigned else "true")
 740         if isDouble:
 741             fcvtFixedFpCode += '''
 742                 AA64FpDestP0_uw = result;
 743                 AA64FpDestP1_uw = result >> 32;
 744             '''
 745         else:
 746             fcvtFixedFpCode += '''
 747                 AA64FpDestP0_uw = result;
 748                 AA64FpDestP1_uw = 0;
 749             '''
 750         fcvtFixedFpCode += '''
 751             AA64FpDestP2_uw = 0;
 752             AA64FpDestP3_uw = 0;
 753             FpscrExc = fpscr;
 754         '''
 755
 756         instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
 757                                          "D" if isDouble else "S",
 758                                          srcRegType)
 759         mnem = "%scvtf" %("s" if isSigned else "u")
 760         fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
 761                                        { "code":     fcvtFixedFpCode,
 762                                          "op_class": "SimdFloatCvtOp" }, [])
 763         header_output  += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
 764         decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
 765         exec_output    += BasicExecute.subst(fcvtFixedFpIop);
 766
 767     # loop over the variants building the instructions for each
 768     for isXReg in True, False:
 769         for isDouble in True, False:
 770             for isSigned in True, False:
 771                 buildFpCvtFixedOp(isSigned, isDouble, isXReg)
 772                 buildFixedCvtFpOp(isSigned, isDouble, isXReg)
 773 }};
 774
 775 let {{
 776
 777     header_output  = ""
 778     decoder_output = ""
 779     exec_output    = ""
 780
 781     for isDouble in True, False:
 782         code = '''
 783             if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
 784                 AA64FpDestP0_uw = AA64FpOp1P0_uw;
 785         '''
 786         if isDouble:
 787             code += '''
 788                     AA64FpDestP1_uw = AA64FpOp1P1_uw;
 789                 } else {
 790                     AA64FpDestP0_uw = AA64FpOp2P0_uw;
 791                     AA64FpDestP1_uw = AA64FpOp2P1_uw;
 792                 }
 793             '''
 794         else:
 795             code += '''
 796                 } else {
 797                     AA64FpDestP0_uw = AA64FpOp2P0_uw;
 798                 }
 799                 AA64FpDestP1_uw = 0;
 800             '''
 801         code += '''
 802             AA64FpDestP2_uw = 0;
 803             AA64FpDestP3_uw = 0;
 804         '''
 805
 806         iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
 807                             "FpCondSelOp", code)
 808         header_output  += DataXCondSelDeclare.subst(iop)
 809         decoder_output += DataXCondSelConstructor.subst(iop)
 810         exec_output    += BasicExecute.subst(iop)
 811 }};