src/arch/arm/insts/macromem.cc

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2007-2008 The Florida State University
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  */
  40
  41 #include "arch/arm/insts/macromem.hh"
  42
  43 #include <sstream>
  44
  45 #include "arch/arm/generated/decoder.hh"
  46 #include "arch/arm/insts/neon64_mem.hh"
  47
  48 using namespace std;
  49 using namespace ArmISAInst;
  50
  51 namespace ArmISA
  52 {
  53
  54 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
  55                        OpClass __opClass, IntRegIndex rn,
  56                        bool index, bool up, bool user, bool writeback,
  57                        bool load, uint32_t reglist) :
  58     PredMacroOp(mnem, machInst, __opClass)
  59 {
  60     uint32_t regs = reglist;
  61     uint32_t ones = number_of_ones(reglist);
  62     uint32_t mem_ops = ones;
  63
  64     // Copy the base address register if we overwrite it, or if this instruction
  65     // is basically a no-op (we have to do something)
  66     bool copy_base =  (bits(reglist, rn) && load) || !ones;
  67     bool force_user = user & !bits(reglist, 15);
  68     bool exception_ret = user & bits(reglist, 15);
  69     bool pc_temp = load && writeback && bits(reglist, 15);
  70
  71     if (!ones) {
  72         numMicroops = 1;
  73     } else if (load) {
  74         numMicroops = ((ones + 1) / 2)
  75                     + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
  76                     + (copy_base ? 1 : 0)
  77                     + (writeback? 1 : 0)
  78                     + (pc_temp ? 1 : 0);
  79     } else {
  80         numMicroops = ones + (writeback ? 1 : 0);
  81     }
  82
  83     microOps = new StaticInstPtr[numMicroops];
  84
  85     uint32_t addr = 0;
  86
  87     if (!up)
  88         addr = (ones << 2) - 4;
  89
  90     if (!index)
  91         addr += 4;
  92
  93     StaticInstPtr *uop = microOps;
  94
  95     // Add 0 to Rn and stick it in ureg0.
  96     // This is equivalent to a move.
  97     if (copy_base)
  98         *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
  99
 100     unsigned reg = 0;
 101     while (mem_ops != 0) {
 102         // Do load operations in pairs if possible
 103         if (load && mem_ops >= 2 &&
 104             !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
 105             // 64-bit memory operation
 106             // Find 2 set register bits (clear them after finding)
 107             unsigned reg_idx1;
 108             unsigned reg_idx2;
 109
 110             // Find the first register
 111             while (!bits(regs, reg)) reg++;
 112             replaceBits(regs, reg, 0);
 113             reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 114
 115             // Find the second register
 116             while (!bits(regs, reg)) reg++;
 117             replaceBits(regs, reg, 0);
 118             reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
 119
 120             // Load into temp reg if necessary
 121             if (reg_idx2 == INTREG_PC && pc_temp)
 122                 reg_idx2 = INTREG_UREG1;
 123
 124             // Actually load both registers from memory
 125             *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
 126                     copy_base ? INTREG_UREG0 : rn, up, addr);
 127
 128             if (!writeback && reg_idx2 == INTREG_PC) {
 129                 // No writeback if idx==pc, set appropriate flags
 130                 (*uop)->setFlag(StaticInst::IsControl);
 131                 (*uop)->setFlag(StaticInst::IsIndirectControl);
 132
 133                 if (!(condCode == COND_AL || condCode == COND_UC))
 134                     (*uop)->setFlag(StaticInst::IsCondControl);
 135                 else
 136                     (*uop)->setFlag(StaticInst::IsUncondControl);
 137             }
 138
 139             if (up) addr += 8;
 140             else addr -= 8;
 141             mem_ops -= 2;
 142         } else {
 143             // 32-bit memory operation
 144             // Find register for operation
 145             unsigned reg_idx;
 146             while (!bits(regs, reg)) reg++;
 147             replaceBits(regs, reg, 0);
 148             reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
 149
 150             if (load) {
 151                 if (writeback && reg_idx == INTREG_PC) {
 152                     // If this instruction changes the PC and performs a
 153                     // writeback, ensure the pc load/branch is the last uop.
 154                     // Load into a temp reg here.
 155                     *uop = new MicroLdrUop(machInst, INTREG_UREG1,
 156                             copy_base ? INTREG_UREG0 : rn, up, addr);
 157                 } else if (reg_idx == INTREG_PC && exception_ret) {
 158                     // Special handling for exception return
 159                     *uop = new MicroLdrRetUop(machInst, reg_idx,
 160                             copy_base ? INTREG_UREG0 : rn, up, addr);
 161                 } else {
 162                     // standard single load uop
 163                     *uop = new MicroLdrUop(machInst, reg_idx,
 164                             copy_base ? INTREG_UREG0 : rn, up, addr);
 165                 }
 166
 167                 // Loading pc as last operation?  Set appropriate flags.
 168                 if (!writeback && reg_idx == INTREG_PC) {
 169                     (*uop)->setFlag(StaticInst::IsControl);
 170                     (*uop)->setFlag(StaticInst::IsIndirectControl);
 171
 172                     if (!(condCode == COND_AL || condCode == COND_UC))
 173                         (*uop)->setFlag(StaticInst::IsCondControl);
 174                     else
 175                         (*uop)->setFlag(StaticInst::IsUncondControl);
 176                 }
 177             } else {
 178                 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
 179             }
 180
 181             if (up) addr += 4;
 182             else addr -= 4;
 183             --mem_ops;
 184         }
 185
 186         // Load/store micro-op generated, go to next uop
 187         ++uop;
 188     }
 189
 190     if (writeback && ones) {
 191         // Perform writeback uop operation
 192         if (up)
 193             *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
 194         else
 195             *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
 196
 197         // Write PC after address writeback?
 198         if (pc_temp) {
 199             if (exception_ret) {
 200                 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
 201             } else {
 202                 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
 203             }
 204             (*uop)->setFlag(StaticInst::IsControl);
 205             (*uop)->setFlag(StaticInst::IsIndirectControl);
 206
 207             if (!(condCode == COND_AL || condCode == COND_UC))
 208                 (*uop)->setFlag(StaticInst::IsCondControl);
 209             else
 210                 (*uop)->setFlag(StaticInst::IsUncondControl);
 211
 212             if (rn == INTREG_SP)
 213                 (*uop)->setFlag(StaticInst::IsReturn);
 214
 215             ++uop;
 216         }
 217     }
 218
 219     --uop;
 220     (*uop)->setLastMicroop();
 221     microOps[0]->setFirstMicroop();
 222
 223     /* Take the control flags from the last microop for the macroop */
 224     if ((*uop)->isControl())
 225         setFlag(StaticInst::IsControl);
 226     if ((*uop)->isCondCtrl())
 227         setFlag(StaticInst::IsCondControl);
 228     if ((*uop)->isUncondCtrl())
 229         setFlag(StaticInst::IsUncondControl);
 230     if ((*uop)->isIndirectCtrl())
 231         setFlag(StaticInst::IsIndirectControl);
 232     if ((*uop)->isReturn())
 233         setFlag(StaticInst::IsReturn);
 234
 235     for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
 236         (*uop)->setDelayedCommit();
 237     }
 238 }
 239
 240 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 241                      uint32_t size, bool fp, bool load, bool noAlloc,
 242                      bool signExt, bool exclusive, bool acrel,
 243                      int64_t imm, AddrMode mode,
 244                      IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
 245     PredMacroOp(mnem, machInst, __opClass)
 246 {
 247     bool post = (mode == AddrMd_PostIndex);
 248     bool writeback = (mode != AddrMd_Offset);
 249
 250     if (load) {
 251         // Use integer rounding to round up loads of size 4
 252         numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
 253     } else {
 254         numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
 255     }
 256     microOps = new StaticInstPtr[numMicroops];
 257
 258     StaticInstPtr *uop = microOps;
 259
 260     rn = makeSP(rn);
 261
 262     if (!post) {
 263         *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
 264                 post ? 0 : imm);
 265     }
 266
 267     if (fp) {
 268         if (size == 16) {
 269             if (load) {
 270                 *uop++ = new MicroLdFp16Uop(machInst, rt,
 271                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 272                 *uop++ = new MicroLdFp16Uop(machInst, rt2,
 273                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 274             } else {
 275                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
 276                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 277                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
 278                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 279                 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
 280                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 281                 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
 282                         post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
 283             }
 284         } else if (size == 8) {
 285             if (load) {
 286                 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
 287                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 288             } else {
 289                 *uop++ = new MicroStrFpXImmUop(machInst, rt,
 290                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 291                 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
 292                         post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
 293             }
 294         } else if (size == 4) {
 295             if (load) {
 296                 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
 297                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 298             } else {
 299                 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
 300                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 301             }
 302         }
 303     } else {
 304         if (size == 8) {
 305             if (load) {
 306                 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
 307                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 308             } else {
 309                 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
 310                         0, noAlloc, exclusive, acrel);
 311                 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
 312                         size, noAlloc, exclusive, acrel);
 313             }
 314         } else if (size == 4) {
 315             if (load) {
 316                 if (signExt) {
 317                     *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
 318                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 319                 } else {
 320                     *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
 321                             post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 322                 }
 323             } else {
 324                 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
 325                         post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
 326             }
 327         }
 328     }
 329
 330     if (writeback) {
 331         *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
 332                                    post ? imm : 0);
 333     }
 334
 335     assert(uop == &microOps[numMicroops]);
 336     (*--uop)->setLastMicroop();
 337     microOps[0]->setFirstMicroop();
 338
 339     for (StaticInstPtr *curUop = microOps;
 340             !(*curUop)->isLastMicroop(); curUop++) {
 341         (*curUop)->setDelayedCommit();
 342     }
 343 }
 344
 345 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
 346                              OpClass __opClass, bool load, IntRegIndex dest,
 347                              IntRegIndex base, int64_t imm) :
 348     PredMacroOp(mnem, machInst, __opClass)
 349 {
 350     numMicroops = load ? 1 : 2;
 351     microOps = new StaticInstPtr[numMicroops];
 352
 353     StaticInstPtr *uop = microOps;
 354
 355     if (load) {
 356         *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
 357     } else {
 358         *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 359         (*uop)->setDelayedCommit();
 360         *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 361     }
 362     (*uop)->setLastMicroop();
 363     microOps[0]->setFirstMicroop();
 364 }
 365
 366 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
 367                                OpClass __opClass, bool load, IntRegIndex dest,
 368                                IntRegIndex base, int64_t imm) :
 369     PredMacroOp(mnem, machInst, __opClass)
 370 {
 371     numMicroops = load ? 2 : 3;
 372     microOps = new StaticInstPtr[numMicroops];
 373
 374     StaticInstPtr *uop = microOps;
 375
 376     if (load) {
 377         *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
 378     } else {
 379         *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
 380         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
 381     }
 382     *uop = new MicroAddXiUop(machInst, base, base, imm);
 383     (*uop)->setLastMicroop();
 384     microOps[0]->setFirstMicroop();
 385
 386     for (StaticInstPtr *curUop = microOps;
 387             !(*curUop)->isLastMicroop(); curUop++) {
 388         (*curUop)->setDelayedCommit();
 389     }
 390 }
 391
 392 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
 393                              OpClass __opClass, bool load, IntRegIndex dest,
 394                              IntRegIndex base, int64_t imm) :
 395     PredMacroOp(mnem, machInst, __opClass)
 396 {
 397     numMicroops = load ? 2 : 3;
 398     microOps = new StaticInstPtr[numMicroops];
 399
 400     StaticInstPtr *uop = microOps;
 401
 402     if (load) {
 403         *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
 404     } else {
 405         *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
 406         *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
 407     }
 408     *uop = new MicroAddXiUop(machInst, base, base, imm);
 409     (*uop)->setLastMicroop();
 410     microOps[0]->setFirstMicroop();
 411
 412     for (StaticInstPtr *curUop = microOps;
 413             !(*curUop)->isLastMicroop(); curUop++) {
 414         (*curUop)->setDelayedCommit();
 415     }
 416 }
 417
 418 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
 419                              OpClass __opClass, bool load, IntRegIndex dest,
 420                              IntRegIndex base, IntRegIndex offset,
 421                              ArmExtendType type, int64_t imm) :
 422     PredMacroOp(mnem, machInst, __opClass)
 423 {
 424     numMicroops = load ? 1 : 2;
 425     microOps = new StaticInstPtr[numMicroops];
 426
 427     StaticInstPtr *uop = microOps;
 428
 429     if (load) {
 430         *uop = new MicroLdFp16RegUop(machInst, dest, base,
 431                                   offset, type, imm);
 432     } else {
 433         *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
 434                                        offset, type, imm);
 435         (*uop)->setDelayedCommit();
 436         *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
 437                                          offset, type, imm);
 438     }
 439
 440     (*uop)->setLastMicroop();
 441     microOps[0]->setFirstMicroop();
 442 }
 443
 444 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
 445                              OpClass __opClass, IntRegIndex dest,
 446                              int64_t imm) :
 447     PredMacroOp(mnem, machInst, __opClass)
 448 {
 449     numMicroops = 1;
 450     microOps = new StaticInstPtr[numMicroops];
 451
 452     microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
 453     microOps[0]->setLastMicroop();
 454     microOps[0]->setFirstMicroop();
 455 }
 456
 457 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 458                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 459                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 460     PredMacroOp(mnem, machInst, __opClass)
 461 {
 462     assert(regs > 0 && regs <= 4);
 463     assert(regs % elems == 0);
 464
 465     numMicroops = (regs > 2) ? 2 : 1;
 466     bool wb = (rm != 15);
 467     bool deinterleave = (elems > 1);
 468
 469     if (wb) numMicroops++;
 470     if (deinterleave) numMicroops += (regs / elems);
 471     microOps = new StaticInstPtr[numMicroops];
 472
 473     RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
 474
 475     uint32_t noAlign = TLB::MustBeOne;
 476
 477     unsigned uopIdx = 0;
 478     switch (regs) {
 479       case 4:
 480         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 481                 size, machInst, rMid, rn, 0, align);
 482         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 483                 size, machInst, rMid + 4, rn, 16, noAlign);
 484         break;
 485       case 3:
 486         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 487                 size, machInst, rMid, rn, 0, align);
 488         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 489                 size, machInst, rMid + 4, rn, 16, noAlign);
 490         break;
 491       case 2:
 492         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 493                 size, machInst, rMid, rn, 0, align);
 494         break;
 495       case 1:
 496         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 497                 size, machInst, rMid, rn, 0, align);
 498         break;
 499       default:
 500         // Unknown number of registers
 501         microOps[uopIdx++] = new Unknown(machInst);
 502     }
 503     if (wb) {
 504         if (rm != 15 && rm != 13) {
 505             microOps[uopIdx++] =
 506                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 507         } else {
 508             microOps[uopIdx++] =
 509                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 510         }
 511     }
 512     if (deinterleave) {
 513         switch (elems) {
 514           case 4:
 515             assert(regs == 4);
 516             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
 517                     size, machInst, vd * 2, rMid, inc * 2);
 518             break;
 519           case 3:
 520             assert(regs == 3);
 521             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
 522                     size, machInst, vd * 2, rMid, inc * 2);
 523             break;
 524           case 2:
 525             assert(regs == 4 || regs == 2);
 526             if (regs == 4) {
 527                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 528                         size, machInst, vd * 2, rMid, inc * 2);
 529                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 530                         size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
 531             } else {
 532                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 533                         size, machInst, vd * 2, rMid, inc * 2);
 534             }
 535             break;
 536           default:
 537             // Bad number of elements to deinterleave
 538             microOps[uopIdx++] = new Unknown(machInst);
 539         }
 540     }
 541     assert(uopIdx == numMicroops);
 542
 543     for (unsigned i = 0; i < numMicroops - 1; i++) {
 544         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 545         assert(uopPtr);
 546         uopPtr->setDelayedCommit();
 547     }
 548     microOps[0]->setFirstMicroop();
 549     microOps[numMicroops - 1]->setLastMicroop();
 550 }
 551
 552 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
 553                          OpClass __opClass, bool all, unsigned elems,
 554                          RegIndex rn, RegIndex vd, unsigned regs,
 555                          unsigned inc, uint32_t size, uint32_t align,
 556                          RegIndex rm, unsigned lane) :
 557     PredMacroOp(mnem, machInst, __opClass)
 558 {
 559     assert(regs > 0 && regs <= 4);
 560     assert(regs % elems == 0);
 561
 562     unsigned eBytes = (1 << size);
 563     unsigned loadSize = eBytes * elems;
 564     unsigned loadRegs M5_VAR_USED =
 565         (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
 566
 567     assert(loadRegs > 0 && loadRegs <= 4);
 568
 569     numMicroops = 1;
 570     bool wb = (rm != 15);
 571
 572     if (wb) numMicroops++;
 573     numMicroops += (regs / elems);
 574     microOps = new StaticInstPtr[numMicroops];
 575
 576     RegIndex ufp0 = VecSpecialElem;
 577
 578     unsigned uopIdx = 0;
 579     switch (loadSize) {
 580       case 1:
 581         microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
 582                 machInst, ufp0, rn, 0, align);
 583         break;
 584       case 2:
 585         if (eBytes == 2) {
 586             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
 587                     machInst, ufp0, rn, 0, align);
 588         } else {
 589             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
 590                     machInst, ufp0, rn, 0, align);
 591         }
 592         break;
 593       case 3:
 594         microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
 595                 machInst, ufp0, rn, 0, align);
 596         break;
 597       case 4:
 598         switch (eBytes) {
 599           case 1:
 600             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
 601                     machInst, ufp0, rn, 0, align);
 602             break;
 603           case 2:
 604             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
 605                     machInst, ufp0, rn, 0, align);
 606             break;
 607           case 4:
 608             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
 609                     machInst, ufp0, rn, 0, align);
 610             break;
 611         }
 612         break;
 613       case 6:
 614         microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
 615                 machInst, ufp0, rn, 0, align);
 616         break;
 617       case 8:
 618         switch (eBytes) {
 619           case 2:
 620             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
 621                     machInst, ufp0, rn, 0, align);
 622             break;
 623           case 4:
 624             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
 625                     machInst, ufp0, rn, 0, align);
 626             break;
 627         }
 628         break;
 629       case 12:
 630         microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
 631                 machInst, ufp0, rn, 0, align);
 632         break;
 633       case 16:
 634         microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
 635                 machInst, ufp0, rn, 0, align);
 636         break;
 637       default:
 638         // Unrecognized load size
 639         microOps[uopIdx++] = new Unknown(machInst);
 640     }
 641     if (wb) {
 642         if (rm != 15 && rm != 13) {
 643             microOps[uopIdx++] =
 644                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 645         } else {
 646             microOps[uopIdx++] =
 647                 new MicroAddiUop(machInst, rn, rn, loadSize);
 648         }
 649     }
 650     switch (elems) {
 651       case 4:
 652         assert(regs == 4);
 653         switch (size) {
 654           case 0:
 655             if (all) {
 656                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
 657                         machInst, vd * 2, ufp0, inc * 2);
 658             } else {
 659                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
 660                         machInst, vd * 2, ufp0, inc * 2, lane);
 661             }
 662             break;
 663           case 1:
 664             if (all) {
 665                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
 666                         machInst, vd * 2, ufp0, inc * 2);
 667             } else {
 668                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
 669                         machInst, vd * 2, ufp0, inc * 2, lane);
 670             }
 671             break;
 672           case 2:
 673             if (all) {
 674                 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
 675                         machInst, vd * 2, ufp0, inc * 2);
 676             } else {
 677                 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
 678                         machInst, vd * 2, ufp0, inc * 2, lane);
 679             }
 680             break;
 681           default:
 682             // Bad size
 683             microOps[uopIdx++] = new Unknown(machInst);
 684             break;
 685         }
 686         break;
 687       case 3:
 688         assert(regs == 3);
 689         switch (size) {
 690           case 0:
 691             if (all) {
 692                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
 693                         machInst, vd * 2, ufp0, inc * 2);
 694             } else {
 695                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
 696                         machInst, vd * 2, ufp0, inc * 2, lane);
 697             }
 698             break;
 699           case 1:
 700             if (all) {
 701                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
 702                         machInst, vd * 2, ufp0, inc * 2);
 703             } else {
 704                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
 705                         machInst, vd * 2, ufp0, inc * 2, lane);
 706             }
 707             break;
 708           case 2:
 709             if (all) {
 710                 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
 711                         machInst, vd * 2, ufp0, inc * 2);
 712             } else {
 713                 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
 714                         machInst, vd * 2, ufp0, inc * 2, lane);
 715             }
 716             break;
 717           default:
 718             // Bad size
 719             microOps[uopIdx++] = new Unknown(machInst);
 720             break;
 721         }
 722         break;
 723       case 2:
 724         assert(regs == 2);
 725         assert(loadRegs <= 2);
 726         switch (size) {
 727           case 0:
 728             if (all) {
 729                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
 730                         machInst, vd * 2, ufp0, inc * 2);
 731             } else {
 732                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
 733                         machInst, vd * 2, ufp0, inc * 2, lane);
 734             }
 735             break;
 736           case 1:
 737             if (all) {
 738                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
 739                         machInst, vd * 2, ufp0, inc * 2);
 740             } else {
 741                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
 742                         machInst, vd * 2, ufp0, inc * 2, lane);
 743             }
 744             break;
 745           case 2:
 746             if (all) {
 747                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
 748                         machInst, vd * 2, ufp0, inc * 2);
 749             } else {
 750                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
 751                         machInst, vd * 2, ufp0, inc * 2, lane);
 752             }
 753             break;
 754           default:
 755             // Bad size
 756             microOps[uopIdx++] = new Unknown(machInst);
 757             break;
 758         }
 759         break;
 760       case 1:
 761         assert(regs == 1 || (all && regs == 2));
 762         assert(loadRegs <= 2);
 763         for (unsigned offset = 0; offset < regs; offset++) {
 764             switch (size) {
 765               case 0:
 766                 if (all) {
 767                     microOps[uopIdx++] =
 768                         new MicroUnpackAllNeon2to2Uop<uint8_t>(
 769                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 770                 } else {
 771                     microOps[uopIdx++] =
 772                         new MicroUnpackNeon2to2Uop<uint8_t>(
 773                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 774                 }
 775                 break;
 776               case 1:
 777                 if (all) {
 778                     microOps[uopIdx++] =
 779                         new MicroUnpackAllNeon2to2Uop<uint16_t>(
 780                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 781                 } else {
 782                     microOps[uopIdx++] =
 783                         new MicroUnpackNeon2to2Uop<uint16_t>(
 784                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 785                 }
 786                 break;
 787               case 2:
 788                 if (all) {
 789                     microOps[uopIdx++] =
 790                         new MicroUnpackAllNeon2to2Uop<uint32_t>(
 791                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 792                 } else {
 793                     microOps[uopIdx++] =
 794                         new MicroUnpackNeon2to2Uop<uint32_t>(
 795                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 796                 }
 797                 break;
 798               default:
 799                 // Bad size
 800                 microOps[uopIdx++] = new Unknown(machInst);
 801                 break;
 802             }
 803         }
 804         break;
 805       default:
 806         // Bad number of elements to unpack
 807         microOps[uopIdx++] = new Unknown(machInst);
 808     }
 809     assert(uopIdx == numMicroops);
 810
 811     for (unsigned i = 0; i < numMicroops - 1; i++) {
 812         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 813         assert(uopPtr);
 814         uopPtr->setDelayedCommit();
 815     }
 816     microOps[0]->setFirstMicroop();
 817     microOps[numMicroops - 1]->setLastMicroop();
 818 }
 819
 820 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 821                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 822                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 823     PredMacroOp(mnem, machInst, __opClass)
 824 {
 825     assert(regs > 0 && regs <= 4);
 826     assert(regs % elems == 0);
 827
 828     numMicroops = (regs > 2) ? 2 : 1;
 829     bool wb = (rm != 15);
 830     bool interleave = (elems > 1);
 831
 832     if (wb) numMicroops++;
 833     if (interleave) numMicroops += (regs / elems);
 834     microOps = new StaticInstPtr[numMicroops];
 835
 836     uint32_t noAlign = TLB::MustBeOne;
 837
 838     RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
 839
 840     unsigned uopIdx = 0;
 841     if (interleave) {
 842         switch (elems) {
 843           case 4:
 844             assert(regs == 4);
 845             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
 846                     size, machInst, rMid, vd * 2, inc * 2);
 847             break;
 848           case 3:
 849             assert(regs == 3);
 850             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
 851                     size, machInst, rMid, vd * 2, inc * 2);
 852             break;
 853           case 2:
 854             assert(regs == 4 || regs == 2);
 855             if (regs == 4) {
 856                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 857                         size, machInst, rMid, vd * 2, inc * 2);
 858                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 859                         size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
 860             } else {
 861                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 862                         size, machInst, rMid, vd * 2, inc * 2);
 863             }
 864             break;
 865           default:
 866             // Bad number of elements to interleave
 867             microOps[uopIdx++] = new Unknown(machInst);
 868         }
 869     }
 870     switch (regs) {
 871       case 4:
 872         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 873                 size, machInst, rMid, rn, 0, align);
 874         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 875                 size, machInst, rMid + 4, rn, 16, noAlign);
 876         break;
 877       case 3:
 878         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 879                 size, machInst, rMid, rn, 0, align);
 880         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 881                 size, machInst, rMid + 4, rn, 16, noAlign);
 882         break;
 883       case 2:
 884         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 885                 size, machInst, rMid, rn, 0, align);
 886         break;
 887       case 1:
 888         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 889                 size, machInst, rMid, rn, 0, align);
 890         break;
 891       default:
 892         // Unknown number of registers
 893         microOps[uopIdx++] = new Unknown(machInst);
 894     }
 895     if (wb) {
 896         if (rm != 15 && rm != 13) {
 897             microOps[uopIdx++] =
 898                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 899         } else {
 900             microOps[uopIdx++] =
 901                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 902         }
 903     }
 904     assert(uopIdx == numMicroops);
 905
 906     for (unsigned i = 0; i < numMicroops - 1; i++) {
 907         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 908         assert(uopPtr);
 909         uopPtr->setDelayedCommit();
 910     }
 911     microOps[0]->setFirstMicroop();
 912     microOps[numMicroops - 1]->setLastMicroop();
 913 }
 914
 915 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
 916                          OpClass __opClass, bool all, unsigned elems,
 917                          RegIndex rn, RegIndex vd, unsigned regs,
 918                          unsigned inc, uint32_t size, uint32_t align,
 919                          RegIndex rm, unsigned lane) :
 920     PredMacroOp(mnem, machInst, __opClass)
 921 {
 922     assert(!all);
 923     assert(regs > 0 && regs <= 4);
 924     assert(regs % elems == 0);
 925
 926     unsigned eBytes = (1 << size);
 927     unsigned storeSize = eBytes * elems;
 928     unsigned storeRegs M5_VAR_USED =
 929         (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
 930
 931     assert(storeRegs > 0 && storeRegs <= 4);
 932
 933     numMicroops = 1;
 934     bool wb = (rm != 15);
 935
 936     if (wb) numMicroops++;
 937     numMicroops += (regs / elems);
 938     microOps = new StaticInstPtr[numMicroops];
 939
 940     RegIndex ufp0 = VecSpecialElem;
 941
 942     unsigned uopIdx = 0;
 943     switch (elems) {
 944       case 4:
 945         assert(regs == 4);
 946         switch (size) {
 947           case 0:
 948             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
 949                     machInst, ufp0, vd * 2, inc * 2, lane);
 950             break;
 951           case 1:
 952             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
 953                     machInst, ufp0, vd * 2, inc * 2, lane);
 954             break;
 955           case 2:
 956             microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
 957                     machInst, ufp0, vd * 2, inc * 2, lane);
 958             break;
 959           default:
 960             // Bad size
 961             microOps[uopIdx++] = new Unknown(machInst);
 962             break;
 963         }
 964         break;
 965       case 3:
 966         assert(regs == 3);
 967         switch (size) {
 968           case 0:
 969             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
 970                     machInst, ufp0, vd * 2, inc * 2, lane);
 971             break;
 972           case 1:
 973             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
 974                     machInst, ufp0, vd * 2, inc * 2, lane);
 975             break;
 976           case 2:
 977             microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
 978                     machInst, ufp0, vd * 2, inc * 2, lane);
 979             break;
 980           default:
 981             // Bad size
 982             microOps[uopIdx++] = new Unknown(machInst);
 983             break;
 984         }
 985         break;
 986       case 2:
 987         assert(regs == 2);
 988         assert(storeRegs <= 2);
 989         switch (size) {
 990           case 0:
 991             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
 992                     machInst, ufp0, vd * 2, inc * 2, lane);
 993             break;
 994           case 1:
 995             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
 996                     machInst, ufp0, vd * 2, inc * 2, lane);
 997             break;
 998           case 2:
 999             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1000                     machInst, ufp0, vd * 2, inc * 2, lane);
1001             break;
1002           default:
1003             // Bad size
1004             microOps[uopIdx++] = new Unknown(machInst);
1005             break;
1006         }
1007         break;
1008       case 1:
1009         assert(regs == 1 || (all && regs == 2));
1010         assert(storeRegs <= 2);
1011         for (unsigned offset = 0; offset < regs; offset++) {
1012             switch (size) {
1013               case 0:
1014                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1015                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016                 break;
1017               case 1:
1018                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1019                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1020                 break;
1021               case 2:
1022                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1023                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1024                 break;
1025               default:
1026                 // Bad size
1027                 microOps[uopIdx++] = new Unknown(machInst);
1028                 break;
1029             }
1030         }
1031         break;
1032       default:
1033         // Bad number of elements to unpack
1034         microOps[uopIdx++] = new Unknown(machInst);
1035     }
1036     switch (storeSize) {
1037       case 1:
1038         microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1039                 machInst, ufp0, rn, 0, align);
1040         break;
1041       case 2:
1042         if (eBytes == 2) {
1043             microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1044                     machInst, ufp0, rn, 0, align);
1045         } else {
1046             microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1047                     machInst, ufp0, rn, 0, align);
1048         }
1049         break;
1050       case 3:
1051         microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1052                 machInst, ufp0, rn, 0, align);
1053         break;
1054       case 4:
1055         switch (eBytes) {
1056           case 1:
1057             microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1058                     machInst, ufp0, rn, 0, align);
1059             break;
1060           case 2:
1061             microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1062                     machInst, ufp0, rn, 0, align);
1063             break;
1064           case 4:
1065             microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1066                     machInst, ufp0, rn, 0, align);
1067             break;
1068         }
1069         break;
1070       case 6:
1071         microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1072                 machInst, ufp0, rn, 0, align);
1073         break;
1074       case 8:
1075         switch (eBytes) {
1076           case 2:
1077             microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1078                     machInst, ufp0, rn, 0, align);
1079             break;
1080           case 4:
1081             microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1082                     machInst, ufp0, rn, 0, align);
1083             break;
1084         }
1085         break;
1086       case 12:
1087         microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1088                 machInst, ufp0, rn, 0, align);
1089         break;
1090       case 16:
1091         microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1092                 machInst, ufp0, rn, 0, align);
1093         break;
1094       default:
1095         // Bad store size
1096         microOps[uopIdx++] = new Unknown(machInst);
1097     }
1098     if (wb) {
1099         if (rm != 15 && rm != 13) {
1100             microOps[uopIdx++] =
1101                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1102         } else {
1103             microOps[uopIdx++] =
1104                 new MicroAddiUop(machInst, rn, rn, storeSize);
1105         }
1106     }
1107     assert(uopIdx == numMicroops);
1108
1109     for (unsigned i = 0; i < numMicroops - 1; i++) {
1110         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1111         assert(uopPtr);
1112         uopPtr->setDelayedCommit();
1113     }
1114     microOps[0]->setFirstMicroop();
1115     microOps[numMicroops - 1]->setLastMicroop();
1116 }
1117
1118 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1119                          OpClass __opClass, RegIndex rn, RegIndex vd,
1120                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1121                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1122     PredMacroOp(mnem, machInst, __opClass)
1123 {
1124     RegIndex vx = NumVecV8ArchRegs;
1125     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1126     bool baseIsSP = isSP((IntRegIndex) rnsp);
1127
1128     numMicroops = wb ? 1 : 0;
1129
1130     int totNumBytes = numRegs * dataSize / 8;
1131     assert(totNumBytes <= 64);
1132
1133     // The guiding principle here is that no more than 16 bytes can be
1134     // transferred at a time
1135     int numMemMicroops = totNumBytes / 16;
1136     int residuum = totNumBytes % 16;
1137     if (residuum)
1138         ++numMemMicroops;
1139     numMicroops += numMemMicroops;
1140
1141     int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1142     numMicroops += numMarshalMicroops;
1143
1144     microOps = new StaticInstPtr[numMicroops];
1145     unsigned uopIdx = 0;
1146     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1147         TLB::AllowUnaligned;
1148
1149     int i = 0;
1150     for (; i < numMemMicroops - 1; ++i) {
1151         microOps[uopIdx++] = new MicroNeonLoad64(
1152             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1153             baseIsSP, 16 /* accSize */, eSize);
1154     }
1155     microOps[uopIdx++] =  new MicroNeonLoad64(
1156         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1157         residuum ? residuum : 16 /* accSize */, eSize);
1158
1159     // Writeback microop: the post-increment amount is encoded in "Rm": a
1160     // 64-bit general register OR as '11111' for an immediate value equal to
1161     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1162     if (wb) {
1163         if (rm != ((RegIndex) INTREG_X31)) {
1164             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1165                                                       UXTX, 0);
1166         } else {
1167             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1168                                                    totNumBytes);
1169         }
1170     }
1171
1172     for (int i = 0; i < numMarshalMicroops; ++i) {
1173         switch(numRegs) {
1174             case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1175                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1176                         numStructElems, 1, i /* step */);
1177                     break;
1178             case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1179                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1180                         numStructElems, 2, i /* step */);
1181                     break;
1182             case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1183                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1184                         numStructElems, 3, i /* step */);
1185                     break;
1186             case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1187                         machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1188                         numStructElems, 4, i /* step */);
1189                     break;
1190             default: panic("Invalid number of registers");
1191         }
1192
1193     }
1194
1195     assert(uopIdx == numMicroops);
1196
1197     for (int i = 0; i < numMicroops - 1; ++i) {
1198         microOps[i]->setDelayedCommit();
1199     }
1200     microOps[numMicroops - 1]->setLastMicroop();
1201 }
1202
1203 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1204                          OpClass __opClass, RegIndex rn, RegIndex vd,
1205                          RegIndex rm, uint8_t eSize, uint8_t dataSize,
1206                          uint8_t numStructElems, uint8_t numRegs, bool wb) :
1207     PredMacroOp(mnem, machInst, __opClass)
1208 {
1209     RegIndex vx = NumVecV8ArchRegs;
1210     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1211     bool baseIsSP = isSP((IntRegIndex) rnsp);
1212
1213     numMicroops = wb ? 1 : 0;
1214
1215     int totNumBytes = numRegs * dataSize / 8;
1216     assert(totNumBytes <= 64);
1217
1218     // The guiding principle here is that no more than 16 bytes can be
1219     // transferred at a time
1220     int numMemMicroops = totNumBytes / 16;
1221     int residuum = totNumBytes % 16;
1222     if (residuum)
1223         ++numMemMicroops;
1224     numMicroops += numMemMicroops;
1225
1226     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1227     numMicroops += numMarshalMicroops;
1228
1229     microOps = new StaticInstPtr[numMicroops];
1230     unsigned uopIdx = 0;
1231
1232     for (int i = 0; i < numMarshalMicroops; ++i) {
1233         switch (numRegs) {
1234             case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1235                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1236                         numStructElems, 1, i /* step */);
1237                     break;
1238             case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1239                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1240                         numStructElems, 2, i /* step */);
1241                     break;
1242             case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1243                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1244                         numStructElems, 3, i /* step */);
1245                     break;
1246             case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1247                         machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1248                         numStructElems, 4, i /* step */);
1249                     break;
1250             default: panic("Invalid number of registers");
1251         }
1252     }
1253
1254     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1255         TLB::AllowUnaligned;
1256
1257     int i = 0;
1258     for (; i < numMemMicroops - 1; ++i) {
1259         microOps[uopIdx++] = new MicroNeonStore64(
1260             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1261             baseIsSP, 16 /* accSize */, eSize);
1262     }
1263     microOps[uopIdx++] = new MicroNeonStore64(
1264         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1265         residuum ? residuum : 16 /* accSize */, eSize);
1266
1267     // Writeback microop: the post-increment amount is encoded in "Rm": a
1268     // 64-bit general register OR as '11111' for an immediate value equal to
1269     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1270     if (wb) {
1271         if (rm != ((RegIndex) INTREG_X31)) {
1272             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1273                                                       UXTX, 0);
1274         } else {
1275             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1276                                                    totNumBytes);
1277         }
1278     }
1279
1280     assert(uopIdx == numMicroops);
1281
1282     for (int i = 0; i < numMicroops - 1; i++) {
1283         microOps[i]->setDelayedCommit();
1284     }
1285     microOps[numMicroops - 1]->setLastMicroop();
1286 }
1287
1288 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1289                              OpClass __opClass, RegIndex rn, RegIndex vd,
1290                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1291                              uint8_t numStructElems, uint8_t index, bool wb,
1292                              bool replicate) :
1293     PredMacroOp(mnem, machInst, __opClass),
1294     eSize(0), dataSize(0), numStructElems(0), index(0),
1295     wb(false), replicate(false)
1296
1297 {
1298     RegIndex vx = NumVecV8ArchRegs;
1299     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1300     bool baseIsSP = isSP((IntRegIndex) rnsp);
1301
1302     numMicroops = wb ? 1 : 0;
1303
1304     int eSizeBytes = 1 << eSize;
1305     int totNumBytes = numStructElems * eSizeBytes;
1306     assert(totNumBytes <= 64);
1307
1308     // The guiding principle here is that no more than 16 bytes can be
1309     // transferred at a time
1310     int numMemMicroops = totNumBytes / 16;
1311     int residuum = totNumBytes % 16;
1312     if (residuum)
1313         ++numMemMicroops;
1314     numMicroops += numMemMicroops;
1315
1316     int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1317     numMicroops += numMarshalMicroops;
1318
1319     microOps = new StaticInstPtr[numMicroops];
1320     unsigned uopIdx = 0;
1321
1322     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1323         TLB::AllowUnaligned;
1324
1325     int i = 0;
1326     for (; i < numMemMicroops - 1; ++i) {
1327         microOps[uopIdx++] = new MicroNeonLoad64(
1328             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1329             baseIsSP, 16 /* accSize */, eSize);
1330     }
1331     microOps[uopIdx++] = new MicroNeonLoad64(
1332         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1333         residuum ? residuum : 16 /* accSize */, eSize);
1334
1335     // Writeback microop: the post-increment amount is encoded in "Rm": a
1336     // 64-bit general register OR as '11111' for an immediate value equal to
1337     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1338     if (wb) {
1339         if (rm != ((RegIndex) INTREG_X31)) {
1340             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1341                                                       UXTX, 0);
1342         } else {
1343             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1344                                                    totNumBytes);
1345         }
1346     }
1347
1348     for (int i = 0; i < numMarshalMicroops; ++i) {
1349         microOps[uopIdx++] = new MicroUnpackNeon64(
1350             machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1351             numStructElems, index, i /* step */, replicate);
1352     }
1353
1354     assert(uopIdx == numMicroops);
1355
1356     for (int i = 0; i < numMicroops - 1; i++) {
1357         microOps[i]->setDelayedCommit();
1358     }
1359     microOps[numMicroops - 1]->setLastMicroop();
1360 }
1361
1362 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1363                              OpClass __opClass, RegIndex rn, RegIndex vd,
1364                              RegIndex rm, uint8_t eSize, uint8_t dataSize,
1365                              uint8_t numStructElems, uint8_t index, bool wb,
1366                              bool replicate) :
1367     PredMacroOp(mnem, machInst, __opClass),
1368     eSize(0), dataSize(0), numStructElems(0), index(0),
1369     wb(false), replicate(false)
1370 {
1371     RegIndex vx = NumVecV8ArchRegs;
1372     RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1373     bool baseIsSP = isSP((IntRegIndex) rnsp);
1374
1375     numMicroops = wb ? 1 : 0;
1376
1377     int eSizeBytes = 1 << eSize;
1378     int totNumBytes = numStructElems * eSizeBytes;
1379     assert(totNumBytes <= 64);
1380
1381     // The guiding principle here is that no more than 16 bytes can be
1382     // transferred at a time
1383     int numMemMicroops = totNumBytes / 16;
1384     int residuum = totNumBytes % 16;
1385     if (residuum)
1386         ++numMemMicroops;
1387     numMicroops += numMemMicroops;
1388
1389     int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1390     numMicroops += numMarshalMicroops;
1391
1392     microOps = new StaticInstPtr[numMicroops];
1393     unsigned uopIdx = 0;
1394
1395     for (int i = 0; i < numMarshalMicroops; ++i) {
1396         microOps[uopIdx++] = new MicroPackNeon64(
1397             machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1398             numStructElems, index, i /* step */, replicate);
1399     }
1400
1401     uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1402         TLB::AllowUnaligned;
1403
1404     int i = 0;
1405     for (; i < numMemMicroops - 1; ++i) {
1406         microOps[uopIdx++] = new MicroNeonStore64(
1407             machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1408             baseIsSP, 16 /* accsize */, eSize);
1409     }
1410     microOps[uopIdx++] = new MicroNeonStore64(
1411         machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1412         residuum ? residuum : 16 /* accSize */, eSize);
1413
1414     // Writeback microop: the post-increment amount is encoded in "Rm": a
1415     // 64-bit general register OR as '11111' for an immediate value equal to
1416     // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1417     if (wb) {
1418         if (rm != ((RegIndex) INTREG_X31)) {
1419             microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1420                                                       UXTX, 0);
1421         } else {
1422             microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1423                                                    totNumBytes);
1424         }
1425     }
1426
1427     assert(uopIdx == numMicroops);
1428
1429     for (int i = 0; i < numMicroops - 1; i++) {
1430         microOps[i]->setDelayedCommit();
1431     }
1432     microOps[numMicroops - 1]->setLastMicroop();
1433 }
1434
1435 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1436                              OpClass __opClass, IntRegIndex rn,
1437                              RegIndex vd, bool single, bool up,
1438                              bool writeback, bool load, uint32_t offset) :
1439     PredMacroOp(mnem, machInst, __opClass)
1440 {
1441     int i = 0;
1442
1443     // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1444     // to be functionally identical except that fldmx is deprecated. For now
1445     // we'll assume they're otherwise interchangable.
1446     int count = (single ? offset : (offset / 2));
1447     numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1448     microOps = new StaticInstPtr[numMicroops];
1449
1450     int64_t addr = 0;
1451
1452     if (!up)
1453         addr = 4 * offset;
1454
1455     bool tempUp = up;
1456     for (int j = 0; j < count; j++) {
1457         if (load) {
1458             if (single) {
1459                 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1460                                                   tempUp, addr);
1461             } else {
1462                 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1463                                                     tempUp, addr);
1464                 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1465                                                     addr + (up ? 4 : -4));
1466             }
1467         } else {
1468             if (single) {
1469                 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1470                                                   tempUp, addr);
1471             } else {
1472                 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1473                                                     tempUp, addr);
1474                 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1475                                                     addr + (up ? 4 : -4));
1476             }
1477         }
1478         if (!tempUp) {
1479             addr -= (single ? 4 : 8);
1480             // The microops don't handle negative displacement, so turn if we
1481             // hit zero, flip polarity and start adding.
1482             if (addr <= 0) {
1483                 tempUp = true;
1484                 addr = -addr;
1485             }
1486         } else {
1487             addr += (single ? 4 : 8);
1488         }
1489     }
1490
1491     if (writeback) {
1492         if (up) {
1493             microOps[i++] =
1494                 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1495         } else {
1496             microOps[i++] =
1497                 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1498         }
1499     }
1500
1501     assert(numMicroops == i);
1502     microOps[numMicroops - 1]->setLastMicroop();
1503
1504     for (StaticInstPtr *curUop = microOps;
1505             !(*curUop)->isLastMicroop(); curUop++) {
1506         MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1507         assert(uopPtr);
1508         uopPtr->setDelayedCommit();
1509     }
1510 }
1511
1512 std::string
1513 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1514 {
1515     std::stringstream ss;
1516     printMnemonic(ss);
1517     printIntReg(ss, ura);
1518     ss << ", ";
1519     printIntReg(ss, urb);
1520     ss << ", ";
1521     ccprintf(ss, "#%d", imm);
1522     return ss.str();
1523 }
1524
1525 std::string
1526 MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1527 {
1528     std::stringstream ss;
1529     printMnemonic(ss);
1530     printIntReg(ss, ura);
1531     ss << ", ";
1532     printIntReg(ss, urb);
1533     ss << ", ";
1534     ccprintf(ss, "#%d", imm);
1535     return ss.str();
1536 }
1537
1538 std::string
1539 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1540 {
1541     std::stringstream ss;
1542     printMnemonic(ss);
1543     ss << "[PC,CPSR]";
1544     return ss.str();
1545 }
1546
1547 std::string
1548 MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1549 {
1550     std::stringstream ss;
1551     printMnemonic(ss);
1552     printIntReg(ss, ura);
1553     ccprintf(ss, ", ");
1554     printIntReg(ss, urb);
1555     printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1556     return ss.str();
1557 }
1558
1559 std::string
1560 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1561 {
1562     std::stringstream ss;
1563     printMnemonic(ss);
1564     printIntReg(ss, ura);
1565     ss << ", ";
1566     printIntReg(ss, urb);
1567     return ss.str();
1568 }
1569
1570 std::string
1571 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572 {
1573     std::stringstream ss;
1574     printMnemonic(ss);
1575     printIntReg(ss, ura);
1576     ss << ", ";
1577     printIntReg(ss, urb);
1578     ss << ", ";
1579     printIntReg(ss, urc);
1580     return ss.str();
1581 }
1582
1583 std::string
1584 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1585 {
1586     std::stringstream ss;
1587     printMnemonic(ss);
1588     if (isFloating())
1589         printFloatReg(ss, ura);
1590     else
1591         printIntReg(ss, ura);
1592     ss << ", [";
1593     printIntReg(ss, urb);
1594     ss << ", ";
1595     ccprintf(ss, "#%d", imm);
1596     ss << "]";
1597     return ss.str();
1598 }
1599
1600 std::string
1601 MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1602 {
1603     std::stringstream ss;
1604     printMnemonic(ss);
1605     printIntReg(ss, dest);
1606     ss << ",";
1607     printIntReg(ss, dest2);
1608     ss << ", [";
1609     printIntReg(ss, urb);
1610     ss << ", ";
1611     ccprintf(ss, "#%d", imm);
1612     ss << "]";
1613     return ss.str();
1614 }
1615
1616 }