src/arch/arm/insts/macromem.cc

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2007-2008 The Florida State University
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Stephen Hines
  41  */
  42
  43 #include <sstream>
  44
  45 #include "arch/arm/insts/macromem.hh"
  46
  47 #include "arch/arm/generated/decoder.hh"
  48 #include "arch/arm/insts/neon64_mem.hh"
  49
  50 using namespace std;
  51 using namespace ArmISAInst;
  52
  53 namespace ArmISA
  54 {
  55
  56 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
  57                        OpClass __opClass, IntRegIndex rn,
  58                        bool index, bool up, bool user, bool writeback,
  59                        bool load, uint32_t reglist) :
  60     PredMacroOp(mnem, machInst, __opClass)
  61 {
  62     uint32_t regs = reglist;
  63     uint32_t ones = number_of_ones(reglist);
  64     uint32_t mem_ops = ones;
  65
  66     // Copy the base address register if we overwrite it, or if this instruction
  67     // is basically a no-op (we have to do something)
  68     bool copy_base =  (bits(reglist, rn) && load) || !ones;
  69     bool force_user = user & !bits(reglist, 15);
  70     bool exception_ret = user & bits(reglist, 15);
  71     bool pc_temp = load && writeback && bits(reglist, 15);
  72
  73     if (!ones) {
  74         numMicroops = 1;
  75     } else if (load) {
  76         numMicroops = ((ones + 1) / 2)
  77                     + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
  78                     + (copy_base ? 1 : 0)
  79                     + (writeback? 1 : 0)
  80                     + (pc_temp ? 1 : 0);
  81     } else {
  82         numMicroops = ones + (writeback ? 1 : 0);
  83     }
  84
  85     microOps = new StaticInstPtr[numMicroops];
  86
  87     uint32_t addr = 0;
  88
  89     if (!up)
  90         addr = (ones << 2) - 4;
  91
  92     if (!index)
  93         addr += 4;
  94
  95     StaticInstPtr *uop = microOps;
  96
  97     // Add 0 to Rn and stick it in ureg0.
  98     // This is equivalent to a move.
  99     if (copy_base)
 100         *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
 101
 102     unsigned reg = 0;
 103     while (mem_ops != 0) {
 104         // Do load operations in pairs if possible
 105         if (load && mem_ops >= 2 &&
 106             !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
 107             // 64-bit memory operation
 108             // Find 2 set register bits (clear them after finding)
 109             unsigned reg_idx1;
 110             unsigned reg_idx2;
 111
 112             // Find the first register
 113             while (!bits(regs, reg)) reg++;
 114             replaceBits(regs, reg, 0);
 115             reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 116
 117             // Find the second register
 118             while (!bits(regs, reg)) reg++;
 119             replaceBits(regs, reg, 0);
 120             reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 121
 122             // Load into temp reg if necessary
 123             if (reg_idx2 == INTREG_PC && pc_temp)
 124                 reg_idx2 = INTREG_UREG1;
 125
 126             // Actually load both registers from memory
 127             *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
 128                     copy_base ? INTREG_UREG0 : rn, up, addr);
 129
 130             if (!writeback && reg_idx2 == INTREG_PC) {
 131                 // No writeback if idx==pc, set appropriate flags
 132                 (*uop)->setFlag(StaticInst::IsControl);
 133                 (*uop)->setFlag(StaticInst::IsIndirectControl);
 134
 135                 if (!(condCode == COND_AL || condCode == COND_UC))
 136                     (*uop)->setFlag(StaticInst::IsCondControl);
 137                 else
 138                     (*uop)->setFlag(StaticInst::IsUncondControl);
 139             }
 140
 141             if (up) addr += 8;
 142             else addr -= 8;
 143             mem_ops -= 2;
 144         } else {
 145             // 32-bit memory operation
 146             // Find register for operation
 147             unsigned reg_idx;
 148             while (!bits(regs, reg)) reg++;
 149             replaceBits(regs, reg, 0);
 150             reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
 151
 152             if (load) {
 153                 if (writeback && reg_idx == INTREG_PC) {
 154                     // If this instruction changes the PC and performs a
 155                     // writeback, ensure the pc load/branch is the last uop.
 156                     // Load into a temp reg here.
 157                     *uop = new MicroLdrUop(machInst, INTREG_UREG1,
 158                             copy_base ? INTREG_UREG0 : rn, up, addr);
 159                 } else if (reg_idx == INTREG_PC && exception_ret) {
 160                     // Special handling for exception return
 161                     *uop = new MicroLdrRetUop(machInst, reg_idx,
 162                             copy_base ? INTREG_UREG0 : rn, up, addr);
 163                 } else {
 164                     // standard single load uop
 165                     *uop = new MicroLdrUop(machInst, reg_idx,
 166                             copy_base ? INTREG_UREG0 : rn, up, addr);
 167                 }
 168
 169                 // Loading pc as last operation?  Set appropriate flags.
 170                 if (!writeback && reg_idx == INTREG_PC) {
 171                     (*uop)->setFlag(StaticInst::IsControl);
 172                     (*uop)->setFlag(StaticInst::IsIndirectControl);
 173
 174                     if (!(condCode == COND_AL || condCode == COND_UC))
 175                         (*uop)->setFlag(StaticInst::IsCondControl);
 176                     else
 177                         (*uop)->setFlag(StaticInst::IsUncondControl);
 178                 }
 179             } else {
 180                 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
 181             }
 182
 183             if (up) addr += 4;
 184             else addr -= 4;
 185             --mem_ops;
 186         }
 187
 188         // Load/store micro-op generated, go to next uop
 189         ++uop;
 190     }
 191
 192     if (writeback && ones) {
 193         // Perform writeback uop operation
 194         if (up)
 195             *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
 196         else
 197             *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
 198
 199         // Write PC after address writeback?
 200         if (pc_temp) {
 201             if (exception_ret) {
 202                 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
 203             } else {
 204                 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
 205             }
 206             (*uop)->setFlag(StaticInst::IsControl);
 207             (*uop)->setFlag(StaticInst::IsIndirectControl);
 208
 209             if (!(condCode == COND_AL || condCode == COND_UC))
 210                 (*uop)->setFlag(StaticInst::IsCondControl);
 211             else
 212                 (*uop)->setFlag(StaticInst::IsUncondControl);
 213
 214             if (rn == INTREG_SP)
 215                 (*uop)->setFlag(StaticInst::IsReturn);
 216
 217             ++uop;
 218         }
 219     }
 220
 221     --uop;
 222     (*uop)->setLastMicroop();
 223     microOps[0]->setFirstMicroop();
 224
 225     /* Take the control flags from the last microop for the macroop */
 226     if ((*uop)->isControl())
 227         setFlag(StaticInst::IsControl);
 228     if ((*uop)->isCondCtrl())
 229         setFlag(StaticInst::IsCondControl);
 230     if ((*uop)->isUncondCtrl())
 231         setFlag(StaticInst::IsUncondControl);
 232     if ((*uop)->isIndirectCtrl())
 233         setFlag(StaticInst::IsIndirectControl);
 234     if ((*uop)->isReturn())
 235         setFlag(StaticInst::IsReturn);
 236
 237     for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
 238         (*uop)->setDelayedCommit();
 239     }
 240 }
 241
 242 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 243                      uint32_t size, bool fp, bool load, bool noAlloc,
 244                      bool signExt, bool exclusive, bool acrel,
 245                      int64_t imm, AddrMode mode,
 246                      IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
 247     PredMacroOp(mnem, machInst, __opClass)
 248 {
 249     bool post = (mode == AddrMd_PostIndex);
 250     bool writeback = (mode != AddrMd_Offset);
 251
 252     if (load) {
 253         // Use integer rounding to round up loads of size 4
 254         numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
 255     } else {
 256         numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
 257     }
 258     microOps = new StaticInstPtr[numMicroops];
 259
 260     StaticInstPtr *uop = microOps;
 261
 262     rn = makeSP(rn);
 263
 264     if (!post) {
 265         *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
 266                 post ? 0 : imm);
 267     }
 268
 269     if (fp) {
 270         if (size == 16) {
 271             if (load) {
 272                 *uop++ = new MicroLdFp16Uop(machInst, rt,
 273                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 274                 *uop++ = new MicroLdFp16Uop(machInst, rt2,
 275                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 276             } else {
 277                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
 278                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 279                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
 280                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 281                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
 282                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 283                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
 284                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 285             }
 286         } else if (size == 8) {
 287             if (load) {
 288                 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
 289                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 290             } else {
 291                 *uop++ = new MicroStrFpXImmUop(machInst, rt,
 292                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 293                 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
 294                         post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
 295             }
 296         } else if (size == 4) {
 297             if (load) {
 298                 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
 299                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 300             } else {
 301                 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
 302                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 303             }
 304         }
 305     } else {
 306         if (size == 8) {
 307             if (load) {
 308                 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
 309                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 310             } else {
 311                 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
 312                         0, noAlloc, exclusive, acrel);
 313                 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
 314                         size, noAlloc, exclusive, acrel);
 315             }
 316         } else if (size == 4) {
 317             if (load) {
 318                 if (signExt) {
 319                     *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
 320                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 321                 } else {
 322                     *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
 323                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 324                 }
 325             } else {
 326                 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
 327                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 328             }
 329         }
 330     }
 331
 332     if (writeback) {
 333         *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
 334                                    post ? imm : 0);
 335     }
 336
 337     assert(uop == &microOps[numMicroops]);
 338     (*--uop)->setLastMicroop();
 339     microOps[0]->setFirstMicroop();
 340
 341     for (StaticInstPtr *curUop = microOps;
 342             !(*curUop)->isLastMicroop(); curUop++) {
 343         (*curUop)->setDelayedCommit();
 344     }
 345 }
 346
 347 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
 348                              OpClass __opClass, bool load, IntRegIndex dest,
 349                              IntRegIndex base, int64_t imm) :
 350     PredMacroOp(mnem, machInst, __opClass)
 351 {
 352     numMicroops = load ? 1 : 2;
 353     microOps = new StaticInstPtr[numMicroops];
 354
 355     StaticInstPtr *uop = microOps;
 356
 357     if (load) {
 358         *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
 359     } else {
 360         *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 361         (*uop)->setDelayedCommit();
 362         *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 363     }
 364     (*uop)->setLastMicroop();
 365     microOps[0]->setFirstMicroop();
 366 }
 367
 368 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
 369                                OpClass __opClass, bool load, IntRegIndex dest,
 370                                IntRegIndex base, int64_t imm) :
 371     PredMacroOp(mnem, machInst, __opClass)
 372 {
 373     numMicroops = load ? 2 : 3;
 374     microOps = new StaticInstPtr[numMicroops];
 375
 376     StaticInstPtr *uop = microOps;
 377
 378     if (load) {
 379         *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
 380     } else {
 381         *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
 382         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
 383     }
 384     *uop = new MicroAddXiUop(machInst, base, base, imm);
 385     (*uop)->setLastMicroop();
 386     microOps[0]->setFirstMicroop();
 387
 388     for (StaticInstPtr *curUop = microOps;
 389             !(*curUop)->isLastMicroop(); curUop++) {
 390         (*curUop)->setDelayedCommit();
 391     }
 392 }
 393
 394 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
 395                              OpClass __opClass, bool load, IntRegIndex dest,
 396                              IntRegIndex base, int64_t imm) :
 397     PredMacroOp(mnem, machInst, __opClass)
 398 {
 399     numMicroops = load ? 2 : 3;
 400     microOps = new StaticInstPtr[numMicroops];
 401
 402     StaticInstPtr *uop = microOps;
 403
 404     if (load) {
 405         *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
 406     } else {
 407         *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 408         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 409     }
 410     *uop = new MicroAddXiUop(machInst, base, base, imm);
 411     (*uop)->setLastMicroop();
 412     microOps[0]->setFirstMicroop();
 413
 414     for (StaticInstPtr *curUop = microOps;
 415             !(*curUop)->isLastMicroop(); curUop++) {
 416         (*curUop)->setDelayedCommit();
 417     }
 418 }
 419
 420 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
 421                              OpClass __opClass, bool load, IntRegIndex dest,
 422                              IntRegIndex base, IntRegIndex offset,
 423                              ArmExtendType type, int64_t imm) :
 424     PredMacroOp(mnem, machInst, __opClass)
 425 {
 426     numMicroops = load ? 1 : 2;
 427     microOps = new StaticInstPtr[numMicroops];
 428
 429     StaticInstPtr *uop = microOps;
 430
 431     if (load) {
 432         *uop = new MicroLdFp16RegUop(machInst, dest, base,
 433                                   offset, type, imm);
 434     } else {
 435         *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
 436                                        offset, type, imm);
 437         (*uop)->setDelayedCommit();
 438         *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
 439                                          offset, type, imm);
 440     }
 441
 442     (*uop)->setLastMicroop();
 443     microOps[0]->setFirstMicroop();
 444 }
 445
 446 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
 447                              OpClass __opClass, IntRegIndex dest,
 448                              int64_t imm) :
 449     PredMacroOp(mnem, machInst, __opClass)
 450 {
 451     numMicroops = 1;
 452     microOps = new StaticInstPtr[numMicroops];
 453
 454     microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
 455     microOps[0]->setLastMicroop();
 456     microOps[0]->setFirstMicroop();
 457 }
 458
 459 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 460                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 461                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 462     PredMacroOp(mnem, machInst, __opClass)
 463 {
 464     assert(regs > 0 && regs <= 4);
 465     assert(regs % elems == 0);
 466
 467     numMicroops = (regs > 2) ? 2 : 1;
 468     bool wb = (rm != 15);
 469     bool deinterleave = (elems > 1);
 470
 471     if (wb) numMicroops++;
 472     if (deinterleave) numMicroops += (regs / elems);
 473     microOps = new StaticInstPtr[numMicroops];
 474
 475     RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
 476
 477     uint32_t noAlign = TLB::MustBeOne;
 478
 479     unsigned uopIdx = 0;
 480     switch (regs) {
 481       case 4:
 482         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 483                 size, machInst, rMid, rn, 0, align);
 484         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 485                 size, machInst, rMid + 4, rn, 16, noAlign);
 486         break;
 487       case 3:
 488         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 489                 size, machInst, rMid, rn, 0, align);
 490         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 491                 size, machInst, rMid + 4, rn, 16, noAlign);
 492         break;
 493       case 2:
 494         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 495                 size, machInst, rMid, rn, 0, align);
 496         break;
 497       case 1:
 498         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 499                 size, machInst, rMid, rn, 0, align);
 500         break;
 501       default:
 502         // Unknown number of registers
 503         microOps[uopIdx++] = new Unknown(machInst);
 504     }
 505     if (wb) {
 506         if (rm != 15 && rm != 13) {
 507             microOps[uopIdx++] =
 508                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 509         } else {
 510             microOps[uopIdx++] =
 511                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 512         }
 513     }
 514     if (deinterleave) {
 515         switch (elems) {
 516           case 4:
 517             assert(regs == 4);
 518             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
 519                     size, machInst, vd * 2, rMid, inc * 2);
 520             break;
 521           case 3:
 522             assert(regs == 3);
 523             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
 524                     size, machInst, vd * 2, rMid, inc * 2);
 525             break;
 526           case 2:
 527             assert(regs == 4 || regs == 2);
 528             if (regs == 4) {
 529                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 530                         size, machInst, vd * 2, rMid, inc * 2);
 531                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 532                         size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
 533             } else {
 534                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 535                         size, machInst, vd * 2, rMid, inc * 2);
 536             }
 537             break;
 538           default:
 539             // Bad number of elements to deinterleave
 540             microOps[uopIdx++] = new Unknown(machInst);
 541         }
 542     }
 543     assert(uopIdx == numMicroops);
 544
 545     for (unsigned i = 0; i < numMicroops - 1; i++) {
 546         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 547         assert(uopPtr);
 548         uopPtr->setDelayedCommit();
 549     }
 550     microOps[0]->setFirstMicroop();
 551     microOps[numMicroops - 1]->setLastMicroop();
 552 }
 553
 554 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
 555                          OpClass __opClass, bool all, unsigned elems,
 556                          RegIndex rn, RegIndex vd, unsigned regs,
 557                          unsigned inc, uint32_t size, uint32_t align,
 558                          RegIndex rm, unsigned lane) :
 559     PredMacroOp(mnem, machInst, __opClass)
 560 {
 561     assert(regs > 0 && regs <= 4);
 562     assert(regs % elems == 0);
 563
 564     unsigned eBytes = (1 << size);
 565     unsigned loadSize = eBytes * elems;
 566     unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
 567                         sizeof(FloatRegBits);
 568
 569     assert(loadRegs > 0 && loadRegs <= 4);
 570
 571     numMicroops = 1;
 572     bool wb = (rm != 15);
 573
 574     if (wb) numMicroops++;
 575     numMicroops += (regs / elems);
 576     microOps = new StaticInstPtr[numMicroops];
 577
 578     RegIndex ufp0 = NumFloatV7ArchRegs;
 579
 580     unsigned uopIdx = 0;
 581     switch (loadSize) {
 582       case 1:
 583         microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
 584                 machInst, ufp0, rn, 0, align);
 585         break;
 586       case 2:
 587         if (eBytes == 2) {
 588             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
 589                     machInst, ufp0, rn, 0, align);
 590         } else {
 591             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
 592                     machInst, ufp0, rn, 0, align);
 593         }
 594         break;
 595       case 3:
 596         microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
 597                 machInst, ufp0, rn, 0, align);
 598         break;
 599       case 4:
 600         switch (eBytes) {
 601           case 1:
 602             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
 603                     machInst, ufp0, rn, 0, align);
 604             break;
 605           case 2:
 606             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
 607                     machInst, ufp0, rn, 0, align);
 608             break;
 609           case 4:
 610             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
 611                     machInst, ufp0, rn, 0, align);
 612             break;
 613         }
 614         break;
 615       case 6:
 616         microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
 617                 machInst, ufp0, rn, 0, align);
 618         break;
 619       case 8:
 620         switch (eBytes) {
 621           case 2:
 622             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
 623                     machInst, ufp0, rn, 0, align);
 624             break;
 625           case 4:
 626             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
 627                     machInst, ufp0, rn, 0, align);
 628             break;
 629         }
 630         break;
 631       case 12:
 632         microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
 633                 machInst, ufp0, rn, 0, align);
 634         break;
 635       case 16:
 636         microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
 637                 machInst, ufp0, rn, 0, align);
 638         break;
 639       default:
 640         // Unrecognized load size
 641         microOps[uopIdx++] = new Unknown(machInst);
 642     }
 643     if (wb) {
 644         if (rm != 15 && rm != 13) {
 645             microOps[uopIdx++] =
 646                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 647         } else {
 648             microOps[uopIdx++] =
 649                 new MicroAddiUop(machInst, rn, rn, loadSize);
 650         }
 651     }
 652     switch (elems) {
 653       case 4:
 654         assert(regs == 4);
 655         switch (size) {
 656           case 0:
 657             if (all) {
 658                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
 659                         machInst, vd * 2, ufp0, inc * 2);
 660             } else {
 661                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
 662                         machInst, vd * 2, ufp0, inc * 2, lane);
 663             }
 664             break;
 665           case 1:
 666             if (all) {
 667                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
 668                         machInst, vd * 2, ufp0, inc * 2);
 669             } else {
 670                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
 671                         machInst, vd * 2, ufp0, inc * 2, lane);
 672             }
 673             break;
 674           case 2:
 675             if (all) {
 676                 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
 677                         machInst, vd * 2, ufp0, inc * 2);
 678             } else {
 679                 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
 680                         machInst, vd * 2, ufp0, inc * 2, lane);
 681             }
 682             break;
 683           default:
 684             // Bad size
 685             microOps[uopIdx++] = new Unknown(machInst);
 686             break;
 687         }
 688         break;
 689       case 3:
 690         assert(regs == 3);
 691         switch (size) {
 692           case 0:
 693             if (all) {
 694                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
 695                         machInst, vd * 2, ufp0, inc * 2);
 696             } else {
 697                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
 698                         machInst, vd * 2, ufp0, inc * 2, lane);
 699             }
 700             break;
 701           case 1:
 702             if (all) {
 703                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
 704                         machInst, vd * 2, ufp0, inc * 2);
 705             } else {
 706                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
 707                         machInst, vd * 2, ufp0, inc * 2, lane);
 708             }
 709             break;
 710           case 2:
 711             if (all) {
 712                 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
 713                         machInst, vd * 2, ufp0, inc * 2);
 714             } else {
 715                 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
 716                         machInst, vd * 2, ufp0, inc * 2, lane);
 717             }
 718             break;
 719           default:
 720             // Bad size
 721             microOps[uopIdx++] = new Unknown(machInst);
 722             break;
 723         }
 724         break;
 725       case 2:
 726         assert(regs == 2);
 727         assert(loadRegs <= 2);
 728         switch (size) {
 729           case 0:
 730             if (all) {
 731                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
 732                         machInst, vd * 2, ufp0, inc * 2);
 733             } else {
 734                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
 735                         machInst, vd * 2, ufp0, inc * 2, lane);
 736             }
 737             break;
 738           case 1:
 739             if (all) {
 740                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
 741                         machInst, vd * 2, ufp0, inc * 2);
 742             } else {
 743                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
 744                         machInst, vd * 2, ufp0, inc * 2, lane);
 745             }
 746             break;
 747           case 2:
 748             if (all) {
 749                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
 750                         machInst, vd * 2, ufp0, inc * 2);
 751             } else {
 752                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
 753                         machInst, vd * 2, ufp0, inc * 2, lane);
 754             }
 755             break;
 756           default:
 757             // Bad size
 758             microOps[uopIdx++] = new Unknown(machInst);
 759             break;
 760         }
 761         break;
 762       case 1:
 763         assert(regs == 1 || (all && regs == 2));
 764         assert(loadRegs <= 2);
 765         for (unsigned offset = 0; offset < regs; offset++) {
 766             switch (size) {
 767               case 0:
 768                 if (all) {
 769                     microOps[uopIdx++] =
 770                         new MicroUnpackAllNeon2to2Uop<uint8_t>(
 771                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 772                 } else {
 773                     microOps[uopIdx++] =
 774                         new MicroUnpackNeon2to2Uop<uint8_t>(
 775                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 776                 }
 777                 break;
 778               case 1:
 779                 if (all) {
 780                     microOps[uopIdx++] =
 781                         new MicroUnpackAllNeon2to2Uop<uint16_t>(
 782                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 783                 } else {
 784                     microOps[uopIdx++] =
 785                         new MicroUnpackNeon2to2Uop<uint16_t>(
 786                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 787                 }
 788                 break;
 789               case 2:
 790                 if (all) {
 791                     microOps[uopIdx++] =
 792                         new MicroUnpackAllNeon2to2Uop<uint32_t>(
 793                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 794                 } else {
 795                     microOps[uopIdx++] =
 796                         new MicroUnpackNeon2to2Uop<uint32_t>(
 797                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 798                 }
 799                 break;
 800               default:
 801                 // Bad size
 802                 microOps[uopIdx++] = new Unknown(machInst);
 803                 break;
 804             }
 805         }
 806         break;
 807       default:
 808         // Bad number of elements to unpack
 809         microOps[uopIdx++] = new Unknown(machInst);
 810     }
 811     assert(uopIdx == numMicroops);
 812
 813     for (unsigned i = 0; i < numMicroops - 1; i++) {
 814         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 815         assert(uopPtr);
 816         uopPtr->setDelayedCommit();
 817     }
 818     microOps[0]->setFirstMicroop();
 819     microOps[numMicroops - 1]->setLastMicroop();
 820 }
 821
 822 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 823                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 824                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 825     PredMacroOp(mnem, machInst, __opClass)
 826 {
 827     assert(regs > 0 && regs <= 4);
 828     assert(regs % elems == 0);
 829
 830     numMicroops = (regs > 2) ? 2 : 1;
 831     bool wb = (rm != 15);
 832     bool interleave = (elems > 1);
 833
 834     if (wb) numMicroops++;
 835     if (interleave) numMicroops += (regs / elems);
 836     microOps = new StaticInstPtr[numMicroops];
 837
 838     uint32_t noAlign = TLB::MustBeOne;
 839
 840     RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
 841
 842     unsigned uopIdx = 0;
 843     if (interleave) {
 844         switch (elems) {
 845           case 4:
 846             assert(regs == 4);
 847             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
 848                     size, machInst, rMid, vd * 2, inc * 2);
 849             break;
 850           case 3:
 851             assert(regs == 3);
 852             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
 853                     size, machInst, rMid, vd * 2, inc * 2);
 854             break;
 855           case 2:
 856             assert(regs == 4 || regs == 2);
 857             if (regs == 4) {
 858                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 859                         size, machInst, rMid, vd * 2, inc * 2);
 860                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 861                         size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
 862             } else {
 863                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 864                         size, machInst, rMid, vd * 2, inc * 2);
 865             }
 866             break;
 867           default:
 868             // Bad number of elements to interleave
 869             microOps[uopIdx++] = new Unknown(machInst);
 870         }
 871     }
 872     switch (regs) {
 873       case 4:
 874         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 875                 size, machInst, rMid, rn, 0, align);
 876         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 877                 size, machInst, rMid + 4, rn, 16, noAlign);
 878         break;
 879       case 3:
 880         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 881                 size, machInst, rMid, rn, 0, align);
 882         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 883                 size, machInst, rMid + 4, rn, 16, noAlign);
 884         break;
 885       case 2:
 886         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 887                 size, machInst, rMid, rn, 0, align);
 888         break;
 889       case 1:
 890         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 891                 size, machInst, rMid, rn, 0, align);
 892         break;
 893       default:
 894         // Unknown number of registers
 895         microOps[uopIdx++] = new Unknown(machInst);
 896     }
 897     if (wb) {
 898         if (rm != 15 && rm != 13) {
 899             microOps[uopIdx++] =
 900                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 901         } else {
 902             microOps[uopIdx++] =
 903                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 904         }
 905     }
 906     assert(uopIdx == numMicroops);
 907
 908     for (unsigned i = 0; i < numMicroops - 1; i++) {
 909         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 910         assert(uopPtr);
 911         uopPtr->setDelayedCommit();
 912     }
 913     microOps[0]->setFirstMicroop();
 914     microOps[numMicroops - 1]->setLastMicroop();
 915 }
 916
 917 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
 918                          OpClass __opClass, bool all, unsigned elems,
 919                          RegIndex rn, RegIndex vd, unsigned regs,
 920                          unsigned inc, uint32_t size, uint32_t align,
 921                          RegIndex rm, unsigned lane) :
 922     PredMacroOp(mnem, machInst, __opClass)
 923 {
 924     assert(!all);
 925     assert(regs > 0 && regs <= 4);
 926     assert(regs % elems == 0);
 927
 928     unsigned eBytes = (1 << size);
 929     unsigned storeSize = eBytes * elems;
 930     unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
 931                          sizeof(FloatRegBits);
 932
 933     assert(storeRegs > 0 && storeRegs <= 4);
 934
 935     numMicroops = 1;
 936     bool wb = (rm != 15);
 937
 938     if (wb) numMicroops++;
 939     numMicroops += (regs / elems);
 940     microOps = new StaticInstPtr[numMicroops];
 941
 942     RegIndex ufp0 = NumFloatV7ArchRegs;
 943
 944     unsigned uopIdx = 0;
 945     switch (elems) {
 946       case 4:
 947         assert(regs == 4);
 948         switch (size) {
 949           case 0:
 950             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
 951                     machInst, ufp0, vd * 2, inc * 2, lane);
 952             break;
 953           case 1:
 954             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
 955                     machInst, ufp0, vd * 2, inc * 2, lane);
 956             break;
 957           case 2:
 958             microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
 959                     machInst, ufp0, vd * 2, inc * 2, lane);
 960             break;
 961           default:
 962             // Bad size
 963             microOps[uopIdx++] = new Unknown(machInst);
 964             break;
 965         }
 966         break;
 967       case 3:
 968         assert(regs == 3);
 969         switch (size) {
 970           case 0:
 971             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
 972                     machInst, ufp0, vd * 2, inc * 2, lane);
 973             break;
 974           case 1:
 975             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
 976                     machInst, ufp0, vd * 2, inc * 2, lane);
 977             break;
 978           case 2:
 979             microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
 980                     machInst, ufp0, vd * 2, inc * 2, lane);
 981             break;
 982           default:
 983             // Bad size
 984             microOps[uopIdx++] = new Unknown(machInst);
 985             break;
 986         }
 987         break;
 988       case 2:
 989         assert(regs == 2);
 990         assert(storeRegs <= 2);
 991         switch (size) {
 992           case 0:
 993             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
 994                     machInst, ufp0, vd * 2, inc * 2, lane);
 995             break;
 996           case 1:
 997             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
 998                     machInst, ufp0, vd * 2, inc * 2, lane);
 999             break;
1000           case 2:
1001             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1002                     machInst, ufp0, vd * 2, inc * 2, lane);
1003             break;
1004           default:
1005             // Bad size
1006             microOps[uopIdx++] = new Unknown(machInst);
1007             break;
1008         }
1009         break;
1010       case 1:
1011         assert(regs == 1 || (all && regs == 2));
1012         assert(storeRegs <= 2);
1013         for (unsigned offset = 0; offset < regs; offset++) {
1014             switch (size) {
1015               case 0:
1016                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1017                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1018                 break;
1019               case 1:
1020                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1021                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1022                 break;
1023               case 2:
1024                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1025                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1026                 break;
1027               default:
1028                 // Bad size
1029                 microOps[uopIdx++] = new Unknown(machInst);
1030                 break;
1031             }
1032         }
1033         break;
1034       default:
1035         // Bad number of elements to unpack
1036         microOps[uopIdx++] = new Unknown(machInst);
1037     }
1038     switch (storeSize) {
1039       case 1:
1040         microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1041                 machInst, ufp0, rn, 0, align);
1042         break;
1043       case 2:
1044         if (eBytes == 2) {
1045             microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1046                     machInst, ufp0, rn, 0, align);
1047         } else {
1048             microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1049                     machInst, ufp0, rn, 0, align);
1050         }
1051         break;
1052       case 3:
1053         microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1054                 machInst, ufp0, rn, 0, align);
1055         break;
1056       case 4:
1057         switch (eBytes) {
1058           case 1:
1059             microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1060                     machInst, ufp0, rn, 0, align);
1061             break;
1062           case 2:
1063             microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1064                     machInst, ufp0, rn, 0, align);
1065             break;
1066           case 4:
1067             microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1068                     machInst, ufp0, rn, 0, align);
1069             break;
1070         }
1071         break;
1072       case 6:
1073         microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1074                 machInst, ufp0, rn, 0, align);
1075         break;
1076       case 8:
1077         switch (eBytes) {
1078           case 2:
1079             microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1080                     machInst, ufp0, rn, 0, align);
1081             break;
1082           case 4:
1083             microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1084                     machInst, ufp0, rn, 0, align);
1085             break;
1086         }
1087         break;
1088       case 12:
1089         microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1090                 machInst, ufp0, rn, 0, align);
1091         break;
1092       case 16:
1093         microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1094                 machInst, ufp0, rn, 0, align);
1095         break;
1096       default:
1097         // Bad store size
1098         microOps[uopIdx++] = new Unknown(machInst);
1099     }
1100     if (wb) {
1101         if (rm != 15 && rm != 13) {
1102             microOps[uopIdx++] =
1103                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1104         } else {
1105             microOps[uopIdx++] =
1106                 new MicroAddiUop(machInst, rn, rn, storeSize);
1107         }
1108     }
1109     assert(uopIdx == numMicroops);
1110
1111     for (unsigned i = 0; i < numMicroops - 1; i++) {
1112         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1113         assert(uopPtr);
1114         uopPtr->setDelayedCommit();
1115     }
1116     microOps[0]->setFirstMicroop();
1117     microOps[numMicroops - 1]->setLastMicroop();
1118 }
1119
1120 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1121                          OpClass __opClass, RegIndex rn, RegIndex vd,
1122                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1123                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1124     PredMacroOp(mnem, machInst, __opClass)
1125 {
1126     RegIndex vx = NumFloatV8ArchRegs / 4;
1127     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1128     bool baseIsSP = isSP((IntRegIndex) rnsp);
1129
1130     numMicroops = wb ? 1 : 0;
1131
1132     int totNumBytes = numRegs * dataSize / 8;
1133     assert(totNumBytes <= 64);
1134
1135     // The guiding principle here is that no more than 16 bytes can be
1136     // transferred at a time
1137     int numMemMicroops = totNumBytes / 16;
1138     int residuum = totNumBytes % 16;
1139     if (residuum)
1140         ++numMemMicroops;
1141     numMicroops += numMemMicroops;
1142
1143     int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1144     numMicroops += numMarshalMicroops;
1145
1146     microOps = new StaticInstPtr[numMicroops];
1147     unsigned uopIdx = 0;
1148     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149         TLB::AllowUnaligned;
1150
1151     int i = 0;
1152     for (; i < numMemMicroops - 1; ++i) {
1153         microOps[uopIdx++] = new MicroNeonLoad64(
1154             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155             baseIsSP, 16 /* accSize */, eSize);
1156     }
1157     microOps[uopIdx++] =  new MicroNeonLoad64(
1158         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159         residuum ? residuum : 16 /* accSize */, eSize);
1160
1161     // Writeback microop: the post-increment amount is encoded in "Rm": a
1162     // 64-bit general register OR as '11111' for an immediate value equal to
1163     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164     if (wb) {
1165         if (rm != ((RegIndex) INTREG_X31)) {
1166             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167                                                       UXTX, 0);
1168         } else {
1169             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170                                                    totNumBytes);
1171         }
1172     }
1173
1174     for (int i = 0; i < numMarshalMicroops; ++i) {
1175         switch(numRegs) {
1176             case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1177                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1178                         numStructElems, 1, i /* step */);
1179                     break;
1180             case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1181                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1182                         numStructElems, 2, i /* step */);
1183                     break;
1184             case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1185                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1186                         numStructElems, 3, i /* step */);
1187                     break;
1188             case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1189                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1190                         numStructElems, 4, i /* step */);
1191                     break;
1192             default: panic("Invalid number of registers");
1193         }
1194
1195     }
1196
1197     assert(uopIdx == numMicroops);
1198
1199     for (int i = 0; i < numMicroops - 1; ++i) {
1200         microOps[i]->setDelayedCommit();
1201     }
1202     microOps[numMicroops - 1]->setLastMicroop();
1203 }
1204
1205 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1206                          OpClass __opClass, RegIndex rn, RegIndex vd,
1207                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1208                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1209     PredMacroOp(mnem, machInst, __opClass)
1210 {
1211     RegIndex vx = NumFloatV8ArchRegs / 4;
1212     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1213     bool baseIsSP = isSP((IntRegIndex) rnsp);
1214
1215     numMicroops = wb ? 1 : 0;
1216
1217     int totNumBytes = numRegs * dataSize / 8;
1218     assert(totNumBytes <= 64);
1219
1220     // The guiding principle here is that no more than 16 bytes can be
1221     // transferred at a time
1222     int numMemMicroops = totNumBytes / 16;
1223     int residuum = totNumBytes % 16;
1224     if (residuum)
1225         ++numMemMicroops;
1226     numMicroops += numMemMicroops;
1227
1228     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1229     numMicroops += numMarshalMicroops;
1230
1231     microOps = new StaticInstPtr[numMicroops];
1232     unsigned uopIdx = 0;
1233
1234     for (int i = 0; i < numMarshalMicroops; ++i) {
1235         switch (numRegs) {
1236             case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1237                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1238                         numStructElems, 1, i /* step */);
1239                     break;
1240             case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1241                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1242                         numStructElems, 2, i /* step */);
1243                     break;
1244             case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1245                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1246                         numStructElems, 3, i /* step */);
1247                     break;
1248             case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1249                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1250                         numStructElems, 4, i /* step */);
1251                     break;
1252             default: panic("Invalid number of registers");
1253         }
1254     }
1255
1256     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1257         TLB::AllowUnaligned;
1258
1259     int i = 0;
1260     for (; i < numMemMicroops - 1; ++i) {
1261         microOps[uopIdx++] = new MicroNeonStore64(
1262             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263             baseIsSP, 16 /* accSize */, eSize);
1264     }
1265     microOps[uopIdx++] = new MicroNeonStore64(
1266         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267         residuum ? residuum : 16 /* accSize */, eSize);
1268
1269     // Writeback microop: the post-increment amount is encoded in "Rm": a
1270     // 64-bit general register OR as '11111' for an immediate value equal to
1271     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272     if (wb) {
1273         if (rm != ((RegIndex) INTREG_X31)) {
1274             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275                                                       UXTX, 0);
1276         } else {
1277             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278                                                    totNumBytes);
1279         }
1280     }
1281
1282     assert(uopIdx == numMicroops);
1283
1284     for (int i = 0; i < numMicroops - 1; i++) {
1285         microOps[i]->setDelayedCommit();
1286     }
1287     microOps[numMicroops - 1]->setLastMicroop();
1288 }
1289
1290 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1291                              OpClass __opClass, RegIndex rn, RegIndex vd,
1292                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1293                              uint8_t numStructElems, uint8_t index, bool wb,
1294                              bool replicate) :
1295     PredMacroOp(mnem, machInst, __opClass),
1296     eSize(0), dataSize(0), numStructElems(0), index(0),
1297     wb(false), replicate(false)
1298
1299 {
1300     RegIndex vx = NumFloatV8ArchRegs / 4;
1301     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1302     bool baseIsSP = isSP((IntRegIndex) rnsp);
1303
1304     numMicroops = wb ? 1 : 0;
1305
1306     int eSizeBytes = 1 << eSize;
1307     int totNumBytes = numStructElems * eSizeBytes;
1308     assert(totNumBytes <= 64);
1309
1310     // The guiding principle here is that no more than 16 bytes can be
1311     // transferred at a time
1312     int numMemMicroops = totNumBytes / 16;
1313     int residuum = totNumBytes % 16;
1314     if (residuum)
1315         ++numMemMicroops;
1316     numMicroops += numMemMicroops;
1317
1318     int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1319     numMicroops += numMarshalMicroops;
1320
1321     microOps = new StaticInstPtr[numMicroops];
1322     unsigned uopIdx = 0;
1323
1324     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1325         TLB::AllowUnaligned;
1326
1327     int i = 0;
1328     for (; i < numMemMicroops - 1; ++i) {
1329         microOps[uopIdx++] = new MicroNeonLoad64(
1330             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1331             baseIsSP, 16 /* accSize */, eSize);
1332     }
1333     microOps[uopIdx++] = new MicroNeonLoad64(
1334         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1335         residuum ? residuum : 16 /* accSize */, eSize);
1336
1337     // Writeback microop: the post-increment amount is encoded in "Rm": a
1338     // 64-bit general register OR as '11111' for an immediate value equal to
1339     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1340     if (wb) {
1341         if (rm != ((RegIndex) INTREG_X31)) {
1342             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1343                                                       UXTX, 0);
1344         } else {
1345             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1346                                                    totNumBytes);
1347         }
1348     }
1349
1350     for (int i = 0; i < numMarshalMicroops; ++i) {
1351         microOps[uopIdx++] = new MicroUnpackNeon64(
1352             machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1353             numStructElems, index, i /* step */, replicate);
1354     }
1355
1356     assert(uopIdx == numMicroops);
1357
1358     for (int i = 0; i < numMicroops - 1; i++) {
1359         microOps[i]->setDelayedCommit();
1360     }
1361     microOps[numMicroops - 1]->setLastMicroop();
1362 }
1363
1364 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1365                              OpClass __opClass, RegIndex rn, RegIndex vd,
1366                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1367                              uint8_t numStructElems, uint8_t index, bool wb,
1368                              bool replicate) :
1369     PredMacroOp(mnem, machInst, __opClass),
1370     eSize(0), dataSize(0), numStructElems(0), index(0),
1371     wb(false), replicate(false)
1372 {
1373     RegIndex vx = NumFloatV8ArchRegs / 4;
1374     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1375     bool baseIsSP = isSP((IntRegIndex) rnsp);
1376
1377     numMicroops = wb ? 1 : 0;
1378
1379     int eSizeBytes = 1 << eSize;
1380     int totNumBytes = numStructElems * eSizeBytes;
1381     assert(totNumBytes <= 64);
1382
1383     // The guiding principle here is that no more than 16 bytes can be
1384     // transferred at a time
1385     int numMemMicroops = totNumBytes / 16;
1386     int residuum = totNumBytes % 16;
1387     if (residuum)
1388         ++numMemMicroops;
1389     numMicroops += numMemMicroops;
1390
1391     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1392     numMicroops += numMarshalMicroops;
1393
1394     microOps = new StaticInstPtr[numMicroops];
1395     unsigned uopIdx = 0;
1396
1397     for (int i = 0; i < numMarshalMicroops; ++i) {
1398         microOps[uopIdx++] = new MicroPackNeon64(
1399             machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1400             numStructElems, index, i /* step */, replicate);
1401     }
1402
1403     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1404         TLB::AllowUnaligned;
1405
1406     int i = 0;
1407     for (; i < numMemMicroops - 1; ++i) {
1408         microOps[uopIdx++] = new MicroNeonStore64(
1409             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1410             baseIsSP, 16 /* accsize */, eSize);
1411     }
1412     microOps[uopIdx++] = new MicroNeonStore64(
1413         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1414         residuum ? residuum : 16 /* accSize */, eSize);
1415
1416     // Writeback microop: the post-increment amount is encoded in "Rm": a
1417     // 64-bit general register OR as '11111' for an immediate value equal to
1418     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1419     if (wb) {
1420         if (rm != ((RegIndex) INTREG_X31)) {
1421             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1422                                                       UXTX, 0);
1423         } else {
1424             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1425                                                    totNumBytes);
1426         }
1427     }
1428
1429     assert(uopIdx == numMicroops);
1430
1431     for (int i = 0; i < numMicroops - 1; i++) {
1432         microOps[i]->setDelayedCommit();
1433     }
1434     microOps[numMicroops - 1]->setLastMicroop();
1435 }
1436
1437 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1438                              OpClass __opClass, IntRegIndex rn,
1439                              RegIndex vd, bool single, bool up,
1440                              bool writeback, bool load, uint32_t offset) :
1441     PredMacroOp(mnem, machInst, __opClass)
1442 {
1443     int i = 0;
1444
1445     // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446     // to be functionally identical except that fldmx is deprecated. For now
1447     // we'll assume they're otherwise interchangable.
1448     int count = (single ? offset : (offset / 2));
1449     if (count == 0 || count > NumFloatV7ArchRegs)
1450         warn_once("Bad offset field for VFP load/store multiple.\n");
1451     if (count == 0) {
1452         // Force there to be at least one microop so the macroop makes sense.
1453         writeback = true;
1454     }
1455     if (count > NumFloatV7ArchRegs)
1456         count = NumFloatV7ArchRegs;
1457
1458     numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1459     microOps = new StaticInstPtr[numMicroops];
1460
1461     int64_t addr = 0;
1462
1463     if (!up)
1464         addr = 4 * offset;
1465
1466     bool tempUp = up;
1467     for (int j = 0; j < count; j++) {
1468         if (load) {
1469             if (single) {
1470                 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1471                                                   tempUp, addr);
1472             } else {
1473                 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1474                                                     tempUp, addr);
1475                 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1476                                                     addr + (up ? 4 : -4));
1477             }
1478         } else {
1479             if (single) {
1480                 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1481                                                   tempUp, addr);
1482             } else {
1483                 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1484                                                     tempUp, addr);
1485                 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1486                                                     addr + (up ? 4 : -4));
1487             }
1488         }
1489         if (!tempUp) {
1490             addr -= (single ? 4 : 8);
1491             // The microops don't handle negative displacement, so turn if we
1492             // hit zero, flip polarity and start adding.
1493             if (addr <= 0) {
1494                 tempUp = true;
1495                 addr = -addr;
1496             }
1497         } else {
1498             addr += (single ? 4 : 8);
1499         }
1500     }
1501
1502     if (writeback) {
1503         if (up) {
1504             microOps[i++] =
1505                 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1506         } else {
1507             microOps[i++] =
1508                 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1509         }
1510     }
1511
1512     assert(numMicroops == i);
1513     microOps[numMicroops - 1]->setLastMicroop();
1514
1515     for (StaticInstPtr *curUop = microOps;
1516             !(*curUop)->isLastMicroop(); curUop++) {
1517         MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1518         assert(uopPtr);
1519         uopPtr->setDelayedCommit();
1520     }
1521 }
1522
1523 std::string
1524 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1525 {
1526     std::stringstream ss;
1527     printMnemonic(ss);
1528     printReg(ss, ura);
1529     ss << ", ";
1530     printReg(ss, urb);
1531     ss << ", ";
1532     ccprintf(ss, "#%d", imm);
1533     return ss.str();
1534 }
1535
1536 std::string
1537 MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1538 {
1539     std::stringstream ss;
1540     printMnemonic(ss);
1541     printReg(ss, ura);
1542     ss << ", ";
1543     printReg(ss, urb);
1544     ss << ", ";
1545     ccprintf(ss, "#%d", imm);
1546     return ss.str();
1547 }
1548
1549 std::string
1550 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1551 {
1552     std::stringstream ss;
1553     printMnemonic(ss);
1554     ss << "[PC,CPSR]";
1555     return ss.str();
1556 }
1557
1558 std::string
1559 MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1560 {
1561     std::stringstream ss;
1562     printMnemonic(ss);
1563     printReg(ss, ura);
1564     ccprintf(ss, ", ");
1565     printReg(ss, urb);
1566     printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1567     return ss.str();
1568 }
1569
1570 std::string
1571 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572 {
1573     std::stringstream ss;
1574     printMnemonic(ss);
1575     printReg(ss, ura);
1576     ss << ", ";
1577     printReg(ss, urb);
1578     return ss.str();
1579 }
1580
1581 std::string
1582 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1583 {
1584     std::stringstream ss;
1585     printMnemonic(ss);
1586     printReg(ss, ura);
1587     ss << ", ";
1588     printReg(ss, urb);
1589     ss << ", ";
1590     printReg(ss, urc);
1591     return ss.str();
1592 }
1593
1594 std::string
1595 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1596 {
1597     std::stringstream ss;
1598     printMnemonic(ss);
1599     if (isFloating())
1600         printReg(ss, ura + FP_Reg_Base);
1601     else
1602         printReg(ss, ura);
1603     ss << ", [";
1604     printReg(ss, urb);
1605     ss << ", ";
1606     ccprintf(ss, "#%d", imm);
1607     ss << "]";
1608     return ss.str();
1609 }
1610
1611 std::string
1612 MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1613 {
1614     std::stringstream ss;
1615     printMnemonic(ss);
1616     printReg(ss, dest);
1617     ss << ",";
1618     printReg(ss, dest2);
1619     ss << ", [";
1620     printReg(ss, urb);
1621     ss << ", ";
1622     ccprintf(ss, "#%d", imm);
1623     ss << "]";
1624     return ss.str();
1625 }
1626
1627 }