src/arch/arm/insts/macromem.cc

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2007-2008 The Florida State University
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Stephen Hines
  41  */
  42
  43 #include <sstream>
  44
  45 #include "arch/arm/insts/macromem.hh"
  46
  47 #include "arch/arm/generated/decoder.hh"
  48 #include "arch/arm/insts/neon64_mem.hh"
  49
  50 using namespace std;
  51 using namespace ArmISAInst;
  52
  53 namespace ArmISA
  54 {
  55
  56 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
  57                        OpClass __opClass, IntRegIndex rn,
  58                        bool index, bool up, bool user, bool writeback,
  59                        bool load, uint32_t reglist) :
  60     PredMacroOp(mnem, machInst, __opClass)
  61 {
  62     uint32_t regs = reglist;
  63     uint32_t ones = number_of_ones(reglist);
  64     uint32_t mem_ops = ones;
  65
  66     // Copy the base address register if we overwrite it, or if this instruction
  67     // is basically a no-op (we have to do something)
  68     bool copy_base =  (bits(reglist, rn) && load) || !ones;
  69     bool force_user = user & !bits(reglist, 15);
  70     bool exception_ret = user & bits(reglist, 15);
  71     bool pc_temp = load && writeback && bits(reglist, 15);
  72
  73     if (!ones) {
  74         numMicroops = 1;
  75     } else if (load) {
  76         numMicroops = ((ones + 1) / 2)
  77                     + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
  78                     + (copy_base ? 1 : 0)
  79                     + (writeback? 1 : 0)
  80                     + (pc_temp ? 1 : 0);
  81     } else {
  82         numMicroops = ones + (writeback ? 1 : 0);
  83     }
  84
  85     microOps = new StaticInstPtr[numMicroops];
  86
  87     uint32_t addr = 0;
  88
  89     if (!up)
  90         addr = (ones << 2) - 4;
  91
  92     if (!index)
  93         addr += 4;
  94
  95     StaticInstPtr *uop = microOps;
  96
  97     // Add 0 to Rn and stick it in ureg0.
  98     // This is equivalent to a move.
  99     if (copy_base)
 100         *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
 101
 102     unsigned reg = 0;
 103     while (mem_ops != 0) {
 104         // Do load operations in pairs if possible
 105         if (load && mem_ops >= 2 &&
 106             !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
 107             // 64-bit memory operation
 108             // Find 2 set register bits (clear them after finding)
 109             unsigned reg_idx1;
 110             unsigned reg_idx2;
 111
 112             // Find the first register
 113             while (!bits(regs, reg)) reg++;
 114             replaceBits(regs, reg, 0);
 115             reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 116
 117             // Find the second register
 118             while (!bits(regs, reg)) reg++;
 119             replaceBits(regs, reg, 0);
 120             reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 121
 122             // Load into temp reg if necessary
 123             if (reg_idx2 == INTREG_PC && pc_temp)
 124                 reg_idx2 = INTREG_UREG1;
 125
 126             // Actually load both registers from memory
 127             *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
 128                     copy_base ? INTREG_UREG0 : rn, up, addr);
 129
 130             if (!writeback && reg_idx2 == INTREG_PC) {
 131                 // No writeback if idx==pc, set appropriate flags
 132                 (*uop)->setFlag(StaticInst::IsControl);
 133                 (*uop)->setFlag(StaticInst::IsIndirectControl);
 134
 135                 if (!(condCode == COND_AL || condCode == COND_UC))
 136                     (*uop)->setFlag(StaticInst::IsCondControl);
 137                 else
 138                     (*uop)->setFlag(StaticInst::IsUncondControl);
 139             }
 140
 141             if (up) addr += 8;
 142             else addr -= 8;
 143             mem_ops -= 2;
 144         } else {
 145             // 32-bit memory operation
 146             // Find register for operation
 147             unsigned reg_idx;
 148             while(!bits(regs, reg)) reg++;
 149             replaceBits(regs, reg, 0);
 150             reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
 151
 152             if (load) {
 153                 if (writeback && reg_idx == INTREG_PC) {
 154                     // If this instruction changes the PC and performs a
 155                     // writeback, ensure the pc load/branch is the last uop.
 156                     // Load into a temp reg here.
 157                     *uop = new MicroLdrUop(machInst, INTREG_UREG1,
 158                             copy_base ? INTREG_UREG0 : rn, up, addr);
 159                 } else if (reg_idx == INTREG_PC && exception_ret) {
 160                     // Special handling for exception return
 161                     *uop = new MicroLdrRetUop(machInst, reg_idx,
 162                             copy_base ? INTREG_UREG0 : rn, up, addr);
 163                 } else {
 164                     // standard single load uop
 165                     *uop = new MicroLdrUop(machInst, reg_idx,
 166                             copy_base ? INTREG_UREG0 : rn, up, addr);
 167                 }
 168
 169                 // Loading pc as last operation?  Set appropriate flags.
 170                 if (!writeback && reg_idx == INTREG_PC) {
 171                     (*uop)->setFlag(StaticInst::IsControl);
 172                     (*uop)->setFlag(StaticInst::IsIndirectControl);
 173
 174                     if (!(condCode == COND_AL || condCode == COND_UC))
 175                         (*uop)->setFlag(StaticInst::IsCondControl);
 176                     else
 177                         (*uop)->setFlag(StaticInst::IsUncondControl);
 178                 }
 179             } else {
 180                 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
 181             }
 182
 183             if (up) addr += 4;
 184             else addr -= 4;
 185             --mem_ops;
 186         }
 187
 188         // Load/store micro-op generated, go to next uop
 189         ++uop;
 190     }
 191
 192     if (writeback && ones) {
 193         // Perform writeback uop operation
 194         if (up)
 195             *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
 196         else
 197             *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
 198
 199         // Write PC after address writeback?
 200         if (pc_temp) {
 201             if (exception_ret) {
 202                 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
 203             } else {
 204                 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
 205             }
 206             (*uop)->setFlag(StaticInst::IsControl);
 207             (*uop)->setFlag(StaticInst::IsIndirectControl);
 208
 209             if (!(condCode == COND_AL || condCode == COND_UC))
 210                 (*uop)->setFlag(StaticInst::IsCondControl);
 211             else
 212                 (*uop)->setFlag(StaticInst::IsUncondControl);
 213
 214             if (rn == INTREG_SP)
 215                 (*uop)->setFlag(StaticInst::IsReturn);
 216
 217             ++uop;
 218         }
 219     }
 220
 221     --uop;
 222     (*uop)->setLastMicroop();
 223
 224     /* Take the control flags from the last microop for the macroop */
 225     if ((*uop)->isControl())
 226         setFlag(StaticInst::IsControl);
 227     if ((*uop)->isCondCtrl())
 228         setFlag(StaticInst::IsCondControl);
 229     if ((*uop)->isUncondCtrl())
 230         setFlag(StaticInst::IsUncondControl);
 231     if ((*uop)->isIndirectCtrl())
 232         setFlag(StaticInst::IsIndirectControl);
 233     if ((*uop)->isReturn())
 234         setFlag(StaticInst::IsReturn);
 235
 236     for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
 237         (*uop)->setDelayedCommit();
 238     }
 239 }
 240
 241 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 242                      uint32_t size, bool fp, bool load, bool noAlloc,
 243                      bool signExt, bool exclusive, bool acrel,
 244                      int64_t imm, AddrMode mode,
 245                      IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
 246     PredMacroOp(mnem, machInst, __opClass)
 247 {
 248     bool post = (mode == AddrMd_PostIndex);
 249     bool writeback = (mode != AddrMd_Offset);
 250
 251     if (load) {
 252         // Use integer rounding to round up loads of size 4
 253         numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
 254     } else {
 255         numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
 256     }
 257     microOps = new StaticInstPtr[numMicroops];
 258
 259     StaticInstPtr *uop = microOps;
 260
 261     rn = makeSP(rn);
 262
 263     if (!post) {
 264         *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
 265                 post ? 0 : imm);
 266     }
 267
 268     if (fp) {
 269         if (size == 16) {
 270             if (load) {
 271                 *uop++ = new MicroLdFp16Uop(machInst, rt,
 272                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 273                 *uop++ = new MicroLdFp16Uop(machInst, rt2,
 274                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 275             } else {
 276                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
 277                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 278                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
 279                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 280                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
 281                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 282                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
 283                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 284             }
 285         } else if (size == 8) {
 286             if (load) {
 287                 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
 288                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 289             } else {
 290                 *uop++ = new MicroStrFpXImmUop(machInst, rt,
 291                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 292                 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
 293                         post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
 294             }
 295         } else if (size == 4) {
 296             if (load) {
 297                 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
 298                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 299             } else {
 300                 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
 301                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 302             }
 303         }
 304     } else {
 305         if (size == 8) {
 306             if (load) {
 307                 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
 308                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 309             } else {
 310                 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
 311                         0, noAlloc, exclusive, acrel);
 312                 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
 313                         size, noAlloc, exclusive, acrel);
 314             }
 315         } else if (size == 4) {
 316             if (load) {
 317                 if (signExt) {
 318                     *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
 319                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 320                 } else {
 321                     *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
 322                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 323                 }
 324             } else {
 325                 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
 326                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 327             }
 328         }
 329     }
 330
 331     if (writeback) {
 332         *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
 333                                    post ? imm : 0);
 334     }
 335
 336     assert(uop == &microOps[numMicroops]);
 337     (*--uop)->setLastMicroop();
 338
 339     for (StaticInstPtr *curUop = microOps;
 340             !(*curUop)->isLastMicroop(); curUop++) {
 341         (*curUop)->setDelayedCommit();
 342     }
 343 }
 344
 345 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
 346                              OpClass __opClass, bool load, IntRegIndex dest,
 347                              IntRegIndex base, int64_t imm) :
 348     PredMacroOp(mnem, machInst, __opClass)
 349 {
 350     numMicroops = load ? 1 : 2;
 351     microOps = new StaticInstPtr[numMicroops];
 352
 353     StaticInstPtr *uop = microOps;
 354
 355     if (load) {
 356         *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
 357     } else {
 358         *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 359         (*uop)->setDelayedCommit();
 360         *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 361     }
 362     (*uop)->setLastMicroop();
 363 }
 364
 365 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
 366                                OpClass __opClass, bool load, IntRegIndex dest,
 367                                IntRegIndex base, int64_t imm) :
 368     PredMacroOp(mnem, machInst, __opClass)
 369 {
 370     numMicroops = load ? 2 : 3;
 371     microOps = new StaticInstPtr[numMicroops];
 372
 373     StaticInstPtr *uop = microOps;
 374
 375     if (load) {
 376         *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
 377     } else {
 378         *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
 379         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
 380     }
 381     *uop = new MicroAddXiUop(machInst, base, base, imm);
 382     (*uop)->setLastMicroop();
 383
 384     for (StaticInstPtr *curUop = microOps;
 385             !(*curUop)->isLastMicroop(); curUop++) {
 386         (*curUop)->setDelayedCommit();
 387     }
 388 }
 389
 390 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
 391                              OpClass __opClass, bool load, IntRegIndex dest,
 392                              IntRegIndex base, int64_t imm) :
 393     PredMacroOp(mnem, machInst, __opClass)
 394 {
 395     numMicroops = load ? 2 : 3;
 396     microOps = new StaticInstPtr[numMicroops];
 397
 398     StaticInstPtr *uop = microOps;
 399
 400     if (load) {
 401         *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
 402     } else {
 403         *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 404         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 405     }
 406     *uop = new MicroAddXiUop(machInst, base, base, imm);
 407     (*uop)->setLastMicroop();
 408
 409     for (StaticInstPtr *curUop = microOps;
 410             !(*curUop)->isLastMicroop(); curUop++) {
 411         (*curUop)->setDelayedCommit();
 412     }
 413 }
 414
 415 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
 416                              OpClass __opClass, bool load, IntRegIndex dest,
 417                              IntRegIndex base, IntRegIndex offset,
 418                              ArmExtendType type, int64_t imm) :
 419     PredMacroOp(mnem, machInst, __opClass)
 420 {
 421     numMicroops = load ? 1 : 2;
 422     microOps = new StaticInstPtr[numMicroops];
 423
 424     StaticInstPtr *uop = microOps;
 425
 426     if (load) {
 427         *uop = new MicroLdFp16RegUop(machInst, dest, base,
 428                                   offset, type, imm);
 429     } else {
 430         *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
 431                                        offset, type, imm);
 432         (*uop)->setDelayedCommit();
 433         *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
 434                                          offset, type, imm);
 435     }
 436
 437     (*uop)->setLastMicroop();
 438 }
 439
 440 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
 441                              OpClass __opClass, IntRegIndex dest,
 442                              int64_t imm) :
 443     PredMacroOp(mnem, machInst, __opClass)
 444 {
 445     numMicroops = 1;
 446     microOps = new StaticInstPtr[numMicroops];
 447
 448     microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
 449     microOps[0]->setLastMicroop();
 450 }
 451
 452 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 453                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 454                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 455     PredMacroOp(mnem, machInst, __opClass)
 456 {
 457     assert(regs > 0 && regs <= 4);
 458     assert(regs % elems == 0);
 459
 460     numMicroops = (regs > 2) ? 2 : 1;
 461     bool wb = (rm != 15);
 462     bool deinterleave = (elems > 1);
 463
 464     if (wb) numMicroops++;
 465     if (deinterleave) numMicroops += (regs / elems);
 466     microOps = new StaticInstPtr[numMicroops];
 467
 468     RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
 469
 470     uint32_t noAlign = TLB::MustBeOne;
 471
 472     unsigned uopIdx = 0;
 473     switch (regs) {
 474       case 4:
 475         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 476                 size, machInst, rMid, rn, 0, align);
 477         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 478                 size, machInst, rMid + 4, rn, 16, noAlign);
 479         break;
 480       case 3:
 481         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 482                 size, machInst, rMid, rn, 0, align);
 483         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 484                 size, machInst, rMid + 4, rn, 16, noAlign);
 485         break;
 486       case 2:
 487         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 488                 size, machInst, rMid, rn, 0, align);
 489         break;
 490       case 1:
 491         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 492                 size, machInst, rMid, rn, 0, align);
 493         break;
 494       default:
 495         // Unknown number of registers
 496         microOps[uopIdx++] = new Unknown(machInst);
 497     }
 498     if (wb) {
 499         if (rm != 15 && rm != 13) {
 500             microOps[uopIdx++] =
 501                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 502         } else {
 503             microOps[uopIdx++] =
 504                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 505         }
 506     }
 507     if (deinterleave) {
 508         switch (elems) {
 509           case 4:
 510             assert(regs == 4);
 511             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
 512                     size, machInst, vd * 2, rMid, inc * 2);
 513             break;
 514           case 3:
 515             assert(regs == 3);
 516             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
 517                     size, machInst, vd * 2, rMid, inc * 2);
 518             break;
 519           case 2:
 520             assert(regs == 4 || regs == 2);
 521             if (regs == 4) {
 522                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 523                         size, machInst, vd * 2, rMid, inc * 2);
 524                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 525                         size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
 526             } else {
 527                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 528                         size, machInst, vd * 2, rMid, inc * 2);
 529             }
 530             break;
 531           default:
 532             // Bad number of elements to deinterleave
 533             microOps[uopIdx++] = new Unknown(machInst);
 534         }
 535     }
 536     assert(uopIdx == numMicroops);
 537
 538     for (unsigned i = 0; i < numMicroops - 1; i++) {
 539         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 540         assert(uopPtr);
 541         uopPtr->setDelayedCommit();
 542     }
 543     microOps[numMicroops - 1]->setLastMicroop();
 544 }
 545
 546 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
 547                          OpClass __opClass, bool all, unsigned elems,
 548                          RegIndex rn, RegIndex vd, unsigned regs,
 549                          unsigned inc, uint32_t size, uint32_t align,
 550                          RegIndex rm, unsigned lane) :
 551     PredMacroOp(mnem, machInst, __opClass)
 552 {
 553     assert(regs > 0 && regs <= 4);
 554     assert(regs % elems == 0);
 555
 556     unsigned eBytes = (1 << size);
 557     unsigned loadSize = eBytes * elems;
 558     unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
 559                         sizeof(FloatRegBits);
 560
 561     assert(loadRegs > 0 && loadRegs <= 4);
 562
 563     numMicroops = 1;
 564     bool wb = (rm != 15);
 565
 566     if (wb) numMicroops++;
 567     numMicroops += (regs / elems);
 568     microOps = new StaticInstPtr[numMicroops];
 569
 570     RegIndex ufp0 = NumFloatV7ArchRegs;
 571
 572     unsigned uopIdx = 0;
 573     switch (loadSize) {
 574       case 1:
 575         microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
 576                 machInst, ufp0, rn, 0, align);
 577         break;
 578       case 2:
 579         if (eBytes == 2) {
 580             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
 581                     machInst, ufp0, rn, 0, align);
 582         } else {
 583             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
 584                     machInst, ufp0, rn, 0, align);
 585         }
 586         break;
 587       case 3:
 588         microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
 589                 machInst, ufp0, rn, 0, align);
 590         break;
 591       case 4:
 592         switch (eBytes) {
 593           case 1:
 594             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
 595                     machInst, ufp0, rn, 0, align);
 596             break;
 597           case 2:
 598             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
 599                     machInst, ufp0, rn, 0, align);
 600             break;
 601           case 4:
 602             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
 603                     machInst, ufp0, rn, 0, align);
 604             break;
 605         }
 606         break;
 607       case 6:
 608         microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
 609                 machInst, ufp0, rn, 0, align);
 610         break;
 611       case 8:
 612         switch (eBytes) {
 613           case 2:
 614             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
 615                     machInst, ufp0, rn, 0, align);
 616             break;
 617           case 4:
 618             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
 619                     machInst, ufp0, rn, 0, align);
 620             break;
 621         }
 622         break;
 623       case 12:
 624         microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
 625                 machInst, ufp0, rn, 0, align);
 626         break;
 627       case 16:
 628         microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
 629                 machInst, ufp0, rn, 0, align);
 630         break;
 631       default:
 632         // Unrecognized load size
 633         microOps[uopIdx++] = new Unknown(machInst);
 634     }
 635     if (wb) {
 636         if (rm != 15 && rm != 13) {
 637             microOps[uopIdx++] =
 638                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 639         } else {
 640             microOps[uopIdx++] =
 641                 new MicroAddiUop(machInst, rn, rn, loadSize);
 642         }
 643     }
 644     switch (elems) {
 645       case 4:
 646         assert(regs == 4);
 647         switch (size) {
 648           case 0:
 649             if (all) {
 650                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
 651                         machInst, vd * 2, ufp0, inc * 2);
 652             } else {
 653                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
 654                         machInst, vd * 2, ufp0, inc * 2, lane);
 655             }
 656             break;
 657           case 1:
 658             if (all) {
 659                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
 660                         machInst, vd * 2, ufp0, inc * 2);
 661             } else {
 662                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
 663                         machInst, vd * 2, ufp0, inc * 2, lane);
 664             }
 665             break;
 666           case 2:
 667             if (all) {
 668                 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
 669                         machInst, vd * 2, ufp0, inc * 2);
 670             } else {
 671                 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
 672                         machInst, vd * 2, ufp0, inc * 2, lane);
 673             }
 674             break;
 675           default:
 676             // Bad size
 677             microOps[uopIdx++] = new Unknown(machInst);
 678             break;
 679         }
 680         break;
 681       case 3:
 682         assert(regs == 3);
 683         switch (size) {
 684           case 0:
 685             if (all) {
 686                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
 687                         machInst, vd * 2, ufp0, inc * 2);
 688             } else {
 689                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
 690                         machInst, vd * 2, ufp0, inc * 2, lane);
 691             }
 692             break;
 693           case 1:
 694             if (all) {
 695                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
 696                         machInst, vd * 2, ufp0, inc * 2);
 697             } else {
 698                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
 699                         machInst, vd * 2, ufp0, inc * 2, lane);
 700             }
 701             break;
 702           case 2:
 703             if (all) {
 704                 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
 705                         machInst, vd * 2, ufp0, inc * 2);
 706             } else {
 707                 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
 708                         machInst, vd * 2, ufp0, inc * 2, lane);
 709             }
 710             break;
 711           default:
 712             // Bad size
 713             microOps[uopIdx++] = new Unknown(machInst);
 714             break;
 715         }
 716         break;
 717       case 2:
 718         assert(regs == 2);
 719         assert(loadRegs <= 2);
 720         switch (size) {
 721           case 0:
 722             if (all) {
 723                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
 724                         machInst, vd * 2, ufp0, inc * 2);
 725             } else {
 726                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
 727                         machInst, vd * 2, ufp0, inc * 2, lane);
 728             }
 729             break;
 730           case 1:
 731             if (all) {
 732                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
 733                         machInst, vd * 2, ufp0, inc * 2);
 734             } else {
 735                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
 736                         machInst, vd * 2, ufp0, inc * 2, lane);
 737             }
 738             break;
 739           case 2:
 740             if (all) {
 741                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
 742                         machInst, vd * 2, ufp0, inc * 2);
 743             } else {
 744                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
 745                         machInst, vd * 2, ufp0, inc * 2, lane);
 746             }
 747             break;
 748           default:
 749             // Bad size
 750             microOps[uopIdx++] = new Unknown(machInst);
 751             break;
 752         }
 753         break;
 754       case 1:
 755         assert(regs == 1 || (all && regs == 2));
 756         assert(loadRegs <= 2);
 757         for (unsigned offset = 0; offset < regs; offset++) {
 758             switch (size) {
 759               case 0:
 760                 if (all) {
 761                     microOps[uopIdx++] =
 762                         new MicroUnpackAllNeon2to2Uop<uint8_t>(
 763                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 764                 } else {
 765                     microOps[uopIdx++] =
 766                         new MicroUnpackNeon2to2Uop<uint8_t>(
 767                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 768                 }
 769                 break;
 770               case 1:
 771                 if (all) {
 772                     microOps[uopIdx++] =
 773                         new MicroUnpackAllNeon2to2Uop<uint16_t>(
 774                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 775                 } else {
 776                     microOps[uopIdx++] =
 777                         new MicroUnpackNeon2to2Uop<uint16_t>(
 778                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 779                 }
 780                 break;
 781               case 2:
 782                 if (all) {
 783                     microOps[uopIdx++] =
 784                         new MicroUnpackAllNeon2to2Uop<uint32_t>(
 785                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 786                 } else {
 787                     microOps[uopIdx++] =
 788                         new MicroUnpackNeon2to2Uop<uint32_t>(
 789                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 790                 }
 791                 break;
 792               default:
 793                 // Bad size
 794                 microOps[uopIdx++] = new Unknown(machInst);
 795                 break;
 796             }
 797         }
 798         break;
 799       default:
 800         // Bad number of elements to unpack
 801         microOps[uopIdx++] = new Unknown(machInst);
 802     }
 803     assert(uopIdx == numMicroops);
 804
 805     for (unsigned i = 0; i < numMicroops - 1; i++) {
 806         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 807         assert(uopPtr);
 808         uopPtr->setDelayedCommit();
 809     }
 810     microOps[numMicroops - 1]->setLastMicroop();
 811 }
 812
 813 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 814                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 815                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 816     PredMacroOp(mnem, machInst, __opClass)
 817 {
 818     assert(regs > 0 && regs <= 4);
 819     assert(regs % elems == 0);
 820
 821     numMicroops = (regs > 2) ? 2 : 1;
 822     bool wb = (rm != 15);
 823     bool interleave = (elems > 1);
 824
 825     if (wb) numMicroops++;
 826     if (interleave) numMicroops += (regs / elems);
 827     microOps = new StaticInstPtr[numMicroops];
 828
 829     uint32_t noAlign = TLB::MustBeOne;
 830
 831     RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
 832
 833     unsigned uopIdx = 0;
 834     if (interleave) {
 835         switch (elems) {
 836           case 4:
 837             assert(regs == 4);
 838             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
 839                     size, machInst, rMid, vd * 2, inc * 2);
 840             break;
 841           case 3:
 842             assert(regs == 3);
 843             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
 844                     size, machInst, rMid, vd * 2, inc * 2);
 845             break;
 846           case 2:
 847             assert(regs == 4 || regs == 2);
 848             if (regs == 4) {
 849                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 850                         size, machInst, rMid, vd * 2, inc * 2);
 851                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 852                         size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
 853             } else {
 854                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 855                         size, machInst, rMid, vd * 2, inc * 2);
 856             }
 857             break;
 858           default:
 859             // Bad number of elements to interleave
 860             microOps[uopIdx++] = new Unknown(machInst);
 861         }
 862     }
 863     switch (regs) {
 864       case 4:
 865         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 866                 size, machInst, rMid, rn, 0, align);
 867         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 868                 size, machInst, rMid + 4, rn, 16, noAlign);
 869         break;
 870       case 3:
 871         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 872                 size, machInst, rMid, rn, 0, align);
 873         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 874                 size, machInst, rMid + 4, rn, 16, noAlign);
 875         break;
 876       case 2:
 877         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 878                 size, machInst, rMid, rn, 0, align);
 879         break;
 880       case 1:
 881         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 882                 size, machInst, rMid, rn, 0, align);
 883         break;
 884       default:
 885         // Unknown number of registers
 886         microOps[uopIdx++] = new Unknown(machInst);
 887     }
 888     if (wb) {
 889         if (rm != 15 && rm != 13) {
 890             microOps[uopIdx++] =
 891                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 892         } else {
 893             microOps[uopIdx++] =
 894                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 895         }
 896     }
 897     assert(uopIdx == numMicroops);
 898
 899     for (unsigned i = 0; i < numMicroops - 1; i++) {
 900         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 901         assert(uopPtr);
 902         uopPtr->setDelayedCommit();
 903     }
 904     microOps[numMicroops - 1]->setLastMicroop();
 905 }
 906
 907 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
 908                          OpClass __opClass, bool all, unsigned elems,
 909                          RegIndex rn, RegIndex vd, unsigned regs,
 910                          unsigned inc, uint32_t size, uint32_t align,
 911                          RegIndex rm, unsigned lane) :
 912     PredMacroOp(mnem, machInst, __opClass)
 913 {
 914     assert(!all);
 915     assert(regs > 0 && regs <= 4);
 916     assert(regs % elems == 0);
 917
 918     unsigned eBytes = (1 << size);
 919     unsigned storeSize = eBytes * elems;
 920     unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
 921                          sizeof(FloatRegBits);
 922
 923     assert(storeRegs > 0 && storeRegs <= 4);
 924
 925     numMicroops = 1;
 926     bool wb = (rm != 15);
 927
 928     if (wb) numMicroops++;
 929     numMicroops += (regs / elems);
 930     microOps = new StaticInstPtr[numMicroops];
 931
 932     RegIndex ufp0 = NumFloatV7ArchRegs;
 933
 934     unsigned uopIdx = 0;
 935     switch (elems) {
 936       case 4:
 937         assert(regs == 4);
 938         switch (size) {
 939           case 0:
 940             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
 941                     machInst, ufp0, vd * 2, inc * 2, lane);
 942             break;
 943           case 1:
 944             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
 945                     machInst, ufp0, vd * 2, inc * 2, lane);
 946             break;
 947           case 2:
 948             microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
 949                     machInst, ufp0, vd * 2, inc * 2, lane);
 950             break;
 951           default:
 952             // Bad size
 953             microOps[uopIdx++] = new Unknown(machInst);
 954             break;
 955         }
 956         break;
 957       case 3:
 958         assert(regs == 3);
 959         switch (size) {
 960           case 0:
 961             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
 962                     machInst, ufp0, vd * 2, inc * 2, lane);
 963             break;
 964           case 1:
 965             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
 966                     machInst, ufp0, vd * 2, inc * 2, lane);
 967             break;
 968           case 2:
 969             microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
 970                     machInst, ufp0, vd * 2, inc * 2, lane);
 971             break;
 972           default:
 973             // Bad size
 974             microOps[uopIdx++] = new Unknown(machInst);
 975             break;
 976         }
 977         break;
 978       case 2:
 979         assert(regs == 2);
 980         assert(storeRegs <= 2);
 981         switch (size) {
 982           case 0:
 983             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
 984                     machInst, ufp0, vd * 2, inc * 2, lane);
 985             break;
 986           case 1:
 987             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
 988                     machInst, ufp0, vd * 2, inc * 2, lane);
 989             break;
 990           case 2:
 991             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
 992                     machInst, ufp0, vd * 2, inc * 2, lane);
 993             break;
 994           default:
 995             // Bad size
 996             microOps[uopIdx++] = new Unknown(machInst);
 997             break;
 998         }
 999         break;
1000       case 1:
1001         assert(regs == 1 || (all && regs == 2));
1002         assert(storeRegs <= 2);
1003         for (unsigned offset = 0; offset < regs; offset++) {
1004             switch (size) {
1005               case 0:
1006                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1007                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1008                 break;
1009               case 1:
1010                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1011                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1012                 break;
1013               case 2:
1014                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1015                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016                 break;
1017               default:
1018                 // Bad size
1019                 microOps[uopIdx++] = new Unknown(machInst);
1020                 break;
1021             }
1022         }
1023         break;
1024       default:
1025         // Bad number of elements to unpack
1026         microOps[uopIdx++] = new Unknown(machInst);
1027     }
1028     switch (storeSize) {
1029       case 1:
1030         microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1031                 machInst, ufp0, rn, 0, align);
1032         break;
1033       case 2:
1034         if (eBytes == 2) {
1035             microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1036                     machInst, ufp0, rn, 0, align);
1037         } else {
1038             microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1039                     machInst, ufp0, rn, 0, align);
1040         }
1041         break;
1042       case 3:
1043         microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1044                 machInst, ufp0, rn, 0, align);
1045         break;
1046       case 4:
1047         switch (eBytes) {
1048           case 1:
1049             microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1050                     machInst, ufp0, rn, 0, align);
1051             break;
1052           case 2:
1053             microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1054                     machInst, ufp0, rn, 0, align);
1055             break;
1056           case 4:
1057             microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1058                     machInst, ufp0, rn, 0, align);
1059             break;
1060         }
1061         break;
1062       case 6:
1063         microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1064                 machInst, ufp0, rn, 0, align);
1065         break;
1066       case 8:
1067         switch (eBytes) {
1068           case 2:
1069             microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1070                     machInst, ufp0, rn, 0, align);
1071             break;
1072           case 4:
1073             microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1074                     machInst, ufp0, rn, 0, align);
1075             break;
1076         }
1077         break;
1078       case 12:
1079         microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1080                 machInst, ufp0, rn, 0, align);
1081         break;
1082       case 16:
1083         microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1084                 machInst, ufp0, rn, 0, align);
1085         break;
1086       default:
1087         // Bad store size
1088         microOps[uopIdx++] = new Unknown(machInst);
1089     }
1090     if (wb) {
1091         if (rm != 15 && rm != 13) {
1092             microOps[uopIdx++] =
1093                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1094         } else {
1095             microOps[uopIdx++] =
1096                 new MicroAddiUop(machInst, rn, rn, storeSize);
1097         }
1098     }
1099     assert(uopIdx == numMicroops);
1100
1101     for (unsigned i = 0; i < numMicroops - 1; i++) {
1102         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1103         assert(uopPtr);
1104         uopPtr->setDelayedCommit();
1105     }
1106     microOps[numMicroops - 1]->setLastMicroop();
1107 }
1108
1109 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1110                          OpClass __opClass, RegIndex rn, RegIndex vd,
1111                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1112                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1113     PredMacroOp(mnem, machInst, __opClass)
1114 {
1115     RegIndex vx = NumFloatV8ArchRegs / 4;
1116     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1117     bool baseIsSP = isSP((IntRegIndex) rnsp);
1118
1119     numMicroops = wb ? 1 : 0;
1120
1121     int totNumBytes = numRegs * dataSize / 8;
1122     assert(totNumBytes <= 64);
1123
1124     // The guiding principle here is that no more than 16 bytes can be
1125     // transferred at a time
1126     int numMemMicroops = totNumBytes / 16;
1127     int residuum = totNumBytes % 16;
1128     if (residuum)
1129         ++numMemMicroops;
1130     numMicroops += numMemMicroops;
1131
1132     int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1133     numMicroops += numMarshalMicroops;
1134
1135     microOps = new StaticInstPtr[numMicroops];
1136     unsigned uopIdx = 0;
1137     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1138         TLB::AllowUnaligned;
1139
1140     int i = 0;
1141     for(; i < numMemMicroops - 1; ++i) {
1142         microOps[uopIdx++] = new MicroNeonLoad64(
1143             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1144             baseIsSP, 16 /* accSize */, eSize);
1145     }
1146     microOps[uopIdx++] =  new MicroNeonLoad64(
1147         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1148         residuum ? residuum : 16 /* accSize */, eSize);
1149
1150     // Writeback microop: the post-increment amount is encoded in "Rm": a
1151     // 64-bit general register OR as '11111' for an immediate value equal to
1152     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1153     if (wb) {
1154         if (rm != ((RegIndex) INTREG_X31)) {
1155             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1156                                                       UXTX, 0);
1157         } else {
1158             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1159                                                    totNumBytes);
1160         }
1161     }
1162
1163     for (int i = 0; i < numMarshalMicroops; ++i) {
1164         switch(numRegs) {
1165             case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1166                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1167                         numStructElems, 1, i /* step */);
1168                     break;
1169             case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1170                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1171                         numStructElems, 2, i /* step */);
1172                     break;
1173             case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1174                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1175                         numStructElems, 3, i /* step */);
1176                     break;
1177             case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1178                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1179                         numStructElems, 4, i /* step */);
1180                     break;
1181             default: panic("Invalid number of registers");
1182         }
1183
1184     }
1185
1186     assert(uopIdx == numMicroops);
1187
1188     for (int i = 0; i < numMicroops - 1; ++i) {
1189         microOps[i]->setDelayedCommit();
1190     }
1191     microOps[numMicroops - 1]->setLastMicroop();
1192 }
1193
1194 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1195                          OpClass __opClass, RegIndex rn, RegIndex vd,
1196                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1197                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1198     PredMacroOp(mnem, machInst, __opClass)
1199 {
1200     RegIndex vx = NumFloatV8ArchRegs / 4;
1201     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1202     bool baseIsSP = isSP((IntRegIndex) rnsp);
1203
1204     numMicroops = wb ? 1 : 0;
1205
1206     int totNumBytes = numRegs * dataSize / 8;
1207     assert(totNumBytes <= 64);
1208
1209     // The guiding principle here is that no more than 16 bytes can be
1210     // transferred at a time
1211     int numMemMicroops = totNumBytes / 16;
1212     int residuum = totNumBytes % 16;
1213     if (residuum)
1214         ++numMemMicroops;
1215     numMicroops += numMemMicroops;
1216
1217     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1218     numMicroops += numMarshalMicroops;
1219
1220     microOps = new StaticInstPtr[numMicroops];
1221     unsigned uopIdx = 0;
1222
1223     for(int i = 0; i < numMarshalMicroops; ++i) {
1224         switch (numRegs) {
1225             case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1226                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1227                         numStructElems, 1, i /* step */);
1228                     break;
1229             case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1230                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1231                         numStructElems, 2, i /* step */);
1232                     break;
1233             case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1234                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1235                         numStructElems, 3, i /* step */);
1236                     break;
1237             case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1238                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1239                         numStructElems, 4, i /* step */);
1240                     break;
1241             default: panic("Invalid number of registers");
1242         }
1243     }
1244
1245     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1246         TLB::AllowUnaligned;
1247
1248     int i = 0;
1249     for(; i < numMemMicroops - 1; ++i) {
1250         microOps[uopIdx++] = new MicroNeonStore64(
1251             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1252             baseIsSP, 16 /* accSize */, eSize);
1253     }
1254     microOps[uopIdx++] = new MicroNeonStore64(
1255         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1256         residuum ? residuum : 16 /* accSize */, eSize);
1257
1258     // Writeback microop: the post-increment amount is encoded in "Rm": a
1259     // 64-bit general register OR as '11111' for an immediate value equal to
1260     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1261     if (wb) {
1262         if (rm != ((RegIndex) INTREG_X31)) {
1263             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1264                                                       UXTX, 0);
1265         } else {
1266             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1267                                                    totNumBytes);
1268         }
1269     }
1270
1271     assert(uopIdx == numMicroops);
1272
1273     for (int i = 0; i < numMicroops - 1; i++) {
1274         microOps[i]->setDelayedCommit();
1275     }
1276     microOps[numMicroops - 1]->setLastMicroop();
1277 }
1278
1279 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1280                              OpClass __opClass, RegIndex rn, RegIndex vd,
1281                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1282                              uint8_t numStructElems, uint8_t index, bool wb,
1283                              bool replicate) :
1284     PredMacroOp(mnem, machInst, __opClass),
1285     eSize(0), dataSize(0), numStructElems(0), index(0),
1286     wb(false), replicate(false)
1287
1288 {
1289     RegIndex vx = NumFloatV8ArchRegs / 4;
1290     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1291     bool baseIsSP = isSP((IntRegIndex) rnsp);
1292
1293     numMicroops = wb ? 1 : 0;
1294
1295     int eSizeBytes = 1 << eSize;
1296     int totNumBytes = numStructElems * eSizeBytes;
1297     assert(totNumBytes <= 64);
1298
1299     // The guiding principle here is that no more than 16 bytes can be
1300     // transferred at a time
1301     int numMemMicroops = totNumBytes / 16;
1302     int residuum = totNumBytes % 16;
1303     if (residuum)
1304         ++numMemMicroops;
1305     numMicroops += numMemMicroops;
1306
1307     int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1308     numMicroops += numMarshalMicroops;
1309
1310     microOps = new StaticInstPtr[numMicroops];
1311     unsigned uopIdx = 0;
1312
1313     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1314         TLB::AllowUnaligned;
1315
1316     int i = 0;
1317     for (; i < numMemMicroops - 1; ++i) {
1318         microOps[uopIdx++] = new MicroNeonLoad64(
1319             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1320             baseIsSP, 16 /* accSize */, eSize);
1321     }
1322     microOps[uopIdx++] = new MicroNeonLoad64(
1323         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1324         residuum ? residuum : 16 /* accSize */, eSize);
1325
1326     // Writeback microop: the post-increment amount is encoded in "Rm": a
1327     // 64-bit general register OR as '11111' for an immediate value equal to
1328     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1329     if (wb) {
1330         if (rm != ((RegIndex) INTREG_X31)) {
1331             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1332                                                       UXTX, 0);
1333         } else {
1334             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1335                                                    totNumBytes);
1336         }
1337     }
1338
1339     for(int i = 0; i < numMarshalMicroops; ++i) {
1340         microOps[uopIdx++] = new MicroUnpackNeon64(
1341             machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1342             numStructElems, index, i /* step */, replicate);
1343     }
1344
1345     assert(uopIdx == numMicroops);
1346
1347     for (int i = 0; i < numMicroops - 1; i++) {
1348         microOps[i]->setDelayedCommit();
1349     }
1350     microOps[numMicroops - 1]->setLastMicroop();
1351 }
1352
1353 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1354                              OpClass __opClass, RegIndex rn, RegIndex vd,
1355                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1356                              uint8_t numStructElems, uint8_t index, bool wb,
1357                              bool replicate) :
1358     PredMacroOp(mnem, machInst, __opClass),
1359     eSize(0), dataSize(0), numStructElems(0), index(0),
1360     wb(false), replicate(false)
1361 {
1362     RegIndex vx = NumFloatV8ArchRegs / 4;
1363     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1364     bool baseIsSP = isSP((IntRegIndex) rnsp);
1365
1366     numMicroops = wb ? 1 : 0;
1367
1368     int eSizeBytes = 1 << eSize;
1369     int totNumBytes = numStructElems * eSizeBytes;
1370     assert(totNumBytes <= 64);
1371
1372     // The guiding principle here is that no more than 16 bytes can be
1373     // transferred at a time
1374     int numMemMicroops = totNumBytes / 16;
1375     int residuum = totNumBytes % 16;
1376     if (residuum)
1377         ++numMemMicroops;
1378     numMicroops += numMemMicroops;
1379
1380     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1381     numMicroops += numMarshalMicroops;
1382
1383     microOps = new StaticInstPtr[numMicroops];
1384     unsigned uopIdx = 0;
1385
1386     for(int i = 0; i < numMarshalMicroops; ++i) {
1387         microOps[uopIdx++] = new MicroPackNeon64(
1388             machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1389             numStructElems, index, i /* step */, replicate);
1390     }
1391
1392     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1393         TLB::AllowUnaligned;
1394
1395     int i = 0;
1396     for(; i < numMemMicroops - 1; ++i) {
1397         microOps[uopIdx++] = new MicroNeonStore64(
1398             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1399             baseIsSP, 16 /* accsize */, eSize);
1400     }
1401     microOps[uopIdx++] = new MicroNeonStore64(
1402         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1403         residuum ? residuum : 16 /* accSize */, eSize);
1404
1405     // Writeback microop: the post-increment amount is encoded in "Rm": a
1406     // 64-bit general register OR as '11111' for an immediate value equal to
1407     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1408     if (wb) {
1409         if (rm != ((RegIndex) INTREG_X31)) {
1410             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1411                                                       UXTX, 0);
1412         } else {
1413             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1414                                                    totNumBytes);
1415         }
1416     }
1417
1418     assert(uopIdx == numMicroops);
1419
1420     for (int i = 0; i < numMicroops - 1; i++) {
1421         microOps[i]->setDelayedCommit();
1422     }
1423     microOps[numMicroops - 1]->setLastMicroop();
1424 }
1425
1426 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1427                              OpClass __opClass, IntRegIndex rn,
1428                              RegIndex vd, bool single, bool up,
1429                              bool writeback, bool load, uint32_t offset) :
1430     PredMacroOp(mnem, machInst, __opClass)
1431 {
1432     int i = 0;
1433
1434     // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1435     // to be functionally identical except that fldmx is deprecated. For now
1436     // we'll assume they're otherwise interchangable.
1437     int count = (single ? offset : (offset / 2));
1438     if (count == 0 || count > NumFloatV7ArchRegs)
1439         warn_once("Bad offset field for VFP load/store multiple.\n");
1440     if (count == 0) {
1441         // Force there to be at least one microop so the macroop makes sense.
1442         writeback = true;
1443     }
1444     if (count > NumFloatV7ArchRegs)
1445         count = NumFloatV7ArchRegs;
1446
1447     numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1448     microOps = new StaticInstPtr[numMicroops];
1449
1450     int64_t addr = 0;
1451
1452     if (!up)
1453         addr = 4 * offset;
1454
1455     bool tempUp = up;
1456     for (int j = 0; j < count; j++) {
1457         if (load) {
1458             if (single) {
1459                 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1460                                                   tempUp, addr);
1461             } else {
1462                 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1463                                                     tempUp, addr);
1464                 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1465                                                     addr + (up ? 4 : -4));
1466             }
1467         } else {
1468             if (single) {
1469                 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1470                                                   tempUp, addr);
1471             } else {
1472                 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1473                                                     tempUp, addr);
1474                 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1475                                                     addr + (up ? 4 : -4));
1476             }
1477         }
1478         if (!tempUp) {
1479             addr -= (single ? 4 : 8);
1480             // The microops don't handle negative displacement, so turn if we
1481             // hit zero, flip polarity and start adding.
1482             if (addr <= 0) {
1483                 tempUp = true;
1484                 addr = -addr;
1485             }
1486         } else {
1487             addr += (single ? 4 : 8);
1488         }
1489     }
1490
1491     if (writeback) {
1492         if (up) {
1493             microOps[i++] =
1494                 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1495         } else {
1496             microOps[i++] =
1497                 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1498         }
1499     }
1500
1501     assert(numMicroops == i);
1502     microOps[numMicroops - 1]->setLastMicroop();
1503
1504     for (StaticInstPtr *curUop = microOps;
1505             !(*curUop)->isLastMicroop(); curUop++) {
1506         MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1507         assert(uopPtr);
1508         uopPtr->setDelayedCommit();
1509     }
1510 }
1511
1512 std::string
1513 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1514 {
1515     std::stringstream ss;
1516     printMnemonic(ss);
1517     printReg(ss, ura);
1518     ss << ", ";
1519     printReg(ss, urb);
1520     ss << ", ";
1521     ccprintf(ss, "#%d", imm);
1522     return ss.str();
1523 }
1524
1525 std::string
1526 MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1527 {
1528     std::stringstream ss;
1529     printMnemonic(ss);
1530     printReg(ss, ura);
1531     ss << ", ";
1532     printReg(ss, urb);
1533     ss << ", ";
1534     ccprintf(ss, "#%d", imm);
1535     return ss.str();
1536 }
1537
1538 std::string
1539 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1540 {
1541     std::stringstream ss;
1542     printMnemonic(ss);
1543     ss << "[PC,CPSR]";
1544     return ss.str();
1545 }
1546
1547 std::string
1548 MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1549 {
1550     std::stringstream ss;
1551     printMnemonic(ss);
1552     printReg(ss, ura);
1553     ccprintf(ss, ", ");
1554     printReg(ss, urb);
1555     printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1556     return ss.str();
1557 }
1558
1559 std::string
1560 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1561 {
1562     std::stringstream ss;
1563     printMnemonic(ss);
1564     printReg(ss, ura);
1565     ss << ", ";
1566     printReg(ss, urb);
1567     return ss.str();
1568 }
1569
1570 std::string
1571 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572 {
1573     std::stringstream ss;
1574     printMnemonic(ss);
1575     printReg(ss, ura);
1576     ss << ", ";
1577     printReg(ss, urb);
1578     ss << ", ";
1579     printReg(ss, urc);
1580     return ss.str();
1581 }
1582
1583 std::string
1584 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1585 {
1586     std::stringstream ss;
1587     printMnemonic(ss);
1588     if (isFloating())
1589         printReg(ss, ura + FP_Reg_Base);
1590     else
1591         printReg(ss, ura);
1592     ss << ", [";
1593     printReg(ss, urb);
1594     ss << ", ";
1595     ccprintf(ss, "#%d", imm);
1596     ss << "]";
1597     return ss.str();
1598 }
1599
1600 std::string
1601 MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1602 {
1603     std::stringstream ss;
1604     printMnemonic(ss);
1605     printReg(ss, dest);
1606     ss << ",";
1607     printReg(ss, dest2);
1608     ss << ", [";
1609     printReg(ss, urb);
1610     ss << ", ";
1611     ccprintf(ss, "#%d", imm);
1612     ss << "]";
1613     return ss.str();
1614 }
1615
1616 }