src/arch/arm/insts/macromem.cc

   1 /*
   2  * Copyright (c) 2010 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2007-2008 The Florida State University
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Stephen Hines
  41  */
  42
  43 #include "arch/arm/insts/macromem.hh"
  44 #include "arch/arm/decoder.hh"
  45 #include <sstream>
  46
  47 using namespace std;
  48 using namespace ArmISAInst;
  49
  50 namespace ArmISA
  51 {
  52
  53 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
  54                        OpClass __opClass, IntRegIndex rn,
  55                        bool index, bool up, bool user, bool writeback,
  56                        bool load, uint32_t reglist) :
  57     PredMacroOp(mnem, machInst, __opClass)
  58 {
  59     uint32_t regs = reglist;
  60     uint32_t ones = number_of_ones(reglist);
  61     // Remember that writeback adds a uop or two and the temp register adds one
  62     numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
  63
  64     // It's technically legal to do a lot of nothing
  65     if (!ones)
  66         numMicroops = 1;
  67
  68     microOps = new StaticInstPtr[numMicroops];
  69     uint32_t addr = 0;
  70
  71     if (!up)
  72         addr = (ones << 2) - 4;
  73
  74     if (!index)
  75         addr += 4;
  76
  77     StaticInstPtr *uop = microOps;
  78
  79     // Add 0 to Rn and stick it in ureg0.
  80     // This is equivalent to a move.
  81     *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
  82
  83     unsigned reg = 0;
  84     unsigned regIdx = 0;
  85     bool force_user = user & !bits(reglist, 15);
  86     bool exception_ret = user & bits(reglist, 15);
  87
  88     for (int i = 0; i < ones; i++) {
  89         // Find the next register.
  90         while (!bits(regs, reg))
  91             reg++;
  92         replaceBits(regs, reg, 0);
  93
  94         regIdx = reg;
  95         if (force_user) {
  96             regIdx = intRegInMode(MODE_USER, regIdx);
  97         }
  98
  99         if (load) {
 100             if (writeback && i == ones - 1) {
 101                 // If it's a writeback and this is the last register
 102                 // do the load into a temporary register which we'll move
 103                 // into the final one later
 104                 *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
 105                         up, addr);
 106             } else {
 107                 // Otherwise just do it normally
 108                 if (reg == INTREG_PC && exception_ret) {
 109                     // This must be the exception return form of ldm.
 110                     *++uop = new MicroLdrRetUop(machInst, regIdx,
 111                                                INTREG_UREG0, up, addr);
 112                 } else {
 113                     *++uop = new MicroLdrUop(machInst, regIdx,
 114                                             INTREG_UREG0, up, addr);
 115                 }
 116             }
 117         } else {
 118             *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
 119         }
 120
 121         if (up)
 122             addr += 4;
 123         else
 124             addr -= 4;
 125     }
 126
 127     if (writeback && ones) {
 128         // put the register update after we're done all loading
 129         if (up)
 130             *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
 131         else
 132             *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
 133
 134         // If this was a load move the last temporary value into place
 135         // this way we can't take an exception after we update the base
 136         // register.
 137         if (load && reg == INTREG_PC && exception_ret) {
 138             *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
 139             warn("creating instruction with exception return at curTick:%d\n",
 140                     curTick());
 141         } else if (load) {
 142             *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
 143             if (reg == INTREG_PC) {
 144                 (*uop)->setFlag(StaticInstBase::IsControl);
 145                 (*uop)->setFlag(StaticInstBase::IsCondControl);
 146                 (*uop)->setFlag(StaticInstBase::IsIndirectControl);
 147                 // This is created as a RAS POP
 148                 if (rn == INTREG_SP)
 149                     (*uop)->setFlag(StaticInstBase::IsReturn);
 150
 151             }
 152         }
 153     }
 154
 155     (*uop)->setLastMicroop();
 156
 157     for (StaticInstPtr *curUop = microOps;
 158             !(*curUop)->isLastMicroop(); curUop++) {
 159         MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
 160         assert(uopPtr);
 161         uopPtr->setDelayedCommit();
 162     }
 163 }
 164
 165 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 166                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 167                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 168     PredMacroOp(mnem, machInst, __opClass)
 169 {
 170     assert(regs > 0 && regs <= 4);
 171     assert(regs % elems == 0);
 172
 173     numMicroops = (regs > 2) ? 2 : 1;
 174     bool wb = (rm != 15);
 175     bool deinterleave = (elems > 1);
 176
 177     if (wb) numMicroops++;
 178     if (deinterleave) numMicroops += (regs / elems);
 179     microOps = new StaticInstPtr[numMicroops];
 180
 181     RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
 182
 183     uint32_t noAlign = TLB::MustBeOne;
 184
 185     unsigned uopIdx = 0;
 186     switch (regs) {
 187       case 4:
 188         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 189                 size, machInst, rMid, rn, 0, align);
 190         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 191                 size, machInst, rMid + 4, rn, 16, noAlign);
 192         break;
 193       case 3:
 194         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 195                 size, machInst, rMid, rn, 0, align);
 196         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 197                 size, machInst, rMid + 4, rn, 16, noAlign);
 198         break;
 199       case 2:
 200         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
 201                 size, machInst, rMid, rn, 0, align);
 202         break;
 203       case 1:
 204         microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
 205                 size, machInst, rMid, rn, 0, align);
 206         break;
 207       default:
 208         // Unknown number of registers
 209         microOps[uopIdx++] = new Unknown(machInst);
 210     }
 211     if (wb) {
 212         if (rm != 15 && rm != 13) {
 213             microOps[uopIdx++] =
 214                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 215         } else {
 216             microOps[uopIdx++] =
 217                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 218         }
 219     }
 220     if (deinterleave) {
 221         switch (elems) {
 222           case 4:
 223             assert(regs == 4);
 224             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
 225                     size, machInst, vd * 2, rMid, inc * 2);
 226             break;
 227           case 3:
 228             assert(regs == 3);
 229             microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
 230                     size, machInst, vd * 2, rMid, inc * 2);
 231             break;
 232           case 2:
 233             assert(regs == 4 || regs == 2);
 234             if (regs == 4) {
 235                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 236                         size, machInst, vd * 2, rMid, inc * 2);
 237                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 238                         size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
 239             } else {
 240                 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
 241                         size, machInst, vd * 2, rMid, inc * 2);
 242             }
 243             break;
 244           default:
 245             // Bad number of elements to deinterleave
 246             microOps[uopIdx++] = new Unknown(machInst);
 247         }
 248     }
 249     assert(uopIdx == numMicroops);
 250
 251     for (unsigned i = 0; i < numMicroops - 1; i++) {
 252         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 253         assert(uopPtr);
 254         uopPtr->setDelayedCommit();
 255     }
 256     microOps[numMicroops - 1]->setLastMicroop();
 257 }
 258
 259 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
 260                          OpClass __opClass, bool all, unsigned elems,
 261                          RegIndex rn, RegIndex vd, unsigned regs,
 262                          unsigned inc, uint32_t size, uint32_t align,
 263                          RegIndex rm, unsigned lane) :
 264     PredMacroOp(mnem, machInst, __opClass)
 265 {
 266     assert(regs > 0 && regs <= 4);
 267     assert(regs % elems == 0);
 268
 269     unsigned eBytes = (1 << size);
 270     unsigned loadSize = eBytes * elems;
 271     unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
 272                         sizeof(FloatRegBits);
 273
 274     assert(loadRegs > 0 && loadRegs <= 4);
 275
 276     numMicroops = 1;
 277     bool wb = (rm != 15);
 278
 279     if (wb) numMicroops++;
 280     numMicroops += (regs / elems);
 281     microOps = new StaticInstPtr[numMicroops];
 282
 283     RegIndex ufp0 = NumFloatArchRegs;
 284
 285     unsigned uopIdx = 0;
 286     switch (loadSize) {
 287       case 1:
 288         microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
 289                 machInst, ufp0, rn, 0, align);
 290         break;
 291       case 2:
 292         if (eBytes == 2) {
 293             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
 294                     machInst, ufp0, rn, 0, align);
 295         } else {
 296             microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
 297                     machInst, ufp0, rn, 0, align);
 298         }
 299         break;
 300       case 3:
 301         microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
 302                 machInst, ufp0, rn, 0, align);
 303         break;
 304       case 4:
 305         switch (eBytes) {
 306           case 1:
 307             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
 308                     machInst, ufp0, rn, 0, align);
 309             break;
 310           case 2:
 311             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
 312                     machInst, ufp0, rn, 0, align);
 313             break;
 314           case 4:
 315             microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
 316                     machInst, ufp0, rn, 0, align);
 317             break;
 318         }
 319         break;
 320       case 6:
 321         microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
 322                 machInst, ufp0, rn, 0, align);
 323         break;
 324       case 8:
 325         switch (eBytes) {
 326           case 2:
 327             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
 328                     machInst, ufp0, rn, 0, align);
 329             break;
 330           case 4:
 331             microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
 332                     machInst, ufp0, rn, 0, align);
 333             break;
 334         }
 335         break;
 336       case 12:
 337         microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
 338                 machInst, ufp0, rn, 0, align);
 339         break;
 340       case 16:
 341         microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
 342                 machInst, ufp0, rn, 0, align);
 343         break;
 344       default:
 345         // Unrecognized load size
 346         microOps[uopIdx++] = new Unknown(machInst);
 347     }
 348     if (wb) {
 349         if (rm != 15 && rm != 13) {
 350             microOps[uopIdx++] =
 351                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 352         } else {
 353             microOps[uopIdx++] =
 354                 new MicroAddiUop(machInst, rn, rn, loadSize);
 355         }
 356     }
 357     switch (elems) {
 358       case 4:
 359         assert(regs == 4);
 360         switch (size) {
 361           case 0:
 362             if (all) {
 363                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
 364                         machInst, vd * 2, ufp0, inc * 2);
 365             } else {
 366                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
 367                         machInst, vd * 2, ufp0, inc * 2, lane);
 368             }
 369             break;
 370           case 1:
 371             if (all) {
 372                 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
 373                         machInst, vd * 2, ufp0, inc * 2);
 374             } else {
 375                 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
 376                         machInst, vd * 2, ufp0, inc * 2, lane);
 377             }
 378             break;
 379           case 2:
 380             if (all) {
 381                 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
 382                         machInst, vd * 2, ufp0, inc * 2);
 383             } else {
 384                 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
 385                         machInst, vd * 2, ufp0, inc * 2, lane);
 386             }
 387             break;
 388           default:
 389             // Bad size
 390             microOps[uopIdx++] = new Unknown(machInst);
 391             break;
 392         }
 393         break;
 394       case 3:
 395         assert(regs == 3);
 396         switch (size) {
 397           case 0:
 398             if (all) {
 399                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
 400                         machInst, vd * 2, ufp0, inc * 2);
 401             } else {
 402                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
 403                         machInst, vd * 2, ufp0, inc * 2, lane);
 404             }
 405             break;
 406           case 1:
 407             if (all) {
 408                 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
 409                         machInst, vd * 2, ufp0, inc * 2);
 410             } else {
 411                 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
 412                         machInst, vd * 2, ufp0, inc * 2, lane);
 413             }
 414             break;
 415           case 2:
 416             if (all) {
 417                 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
 418                         machInst, vd * 2, ufp0, inc * 2);
 419             } else {
 420                 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
 421                         machInst, vd * 2, ufp0, inc * 2, lane);
 422             }
 423             break;
 424           default:
 425             // Bad size
 426             microOps[uopIdx++] = new Unknown(machInst);
 427             break;
 428         }
 429         break;
 430       case 2:
 431         assert(regs == 2);
 432         assert(loadRegs <= 2);
 433         switch (size) {
 434           case 0:
 435             if (all) {
 436                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
 437                         machInst, vd * 2, ufp0, inc * 2);
 438             } else {
 439                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
 440                         machInst, vd * 2, ufp0, inc * 2, lane);
 441             }
 442             break;
 443           case 1:
 444             if (all) {
 445                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
 446                         machInst, vd * 2, ufp0, inc * 2);
 447             } else {
 448                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
 449                         machInst, vd * 2, ufp0, inc * 2, lane);
 450             }
 451             break;
 452           case 2:
 453             if (all) {
 454                 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
 455                         machInst, vd * 2, ufp0, inc * 2);
 456             } else {
 457                 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
 458                         machInst, vd * 2, ufp0, inc * 2, lane);
 459             }
 460             break;
 461           default:
 462             // Bad size
 463             microOps[uopIdx++] = new Unknown(machInst);
 464             break;
 465         }
 466         break;
 467       case 1:
 468         assert(regs == 1 || (all && regs == 2));
 469         assert(loadRegs <= 2);
 470         for (unsigned offset = 0; offset < regs; offset++) {
 471             switch (size) {
 472               case 0:
 473                 if (all) {
 474                     microOps[uopIdx++] =
 475                         new MicroUnpackAllNeon2to2Uop<uint8_t>(
 476                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 477                 } else {
 478                     microOps[uopIdx++] =
 479                         new MicroUnpackNeon2to2Uop<uint8_t>(
 480                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 481                 }
 482                 break;
 483               case 1:
 484                 if (all) {
 485                     microOps[uopIdx++] =
 486                         new MicroUnpackAllNeon2to2Uop<uint16_t>(
 487                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 488                 } else {
 489                     microOps[uopIdx++] =
 490                         new MicroUnpackNeon2to2Uop<uint16_t>(
 491                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 492                 }
 493                 break;
 494               case 2:
 495                 if (all) {
 496                     microOps[uopIdx++] =
 497                         new MicroUnpackAllNeon2to2Uop<uint32_t>(
 498                             machInst, (vd + offset) * 2, ufp0, inc * 2);
 499                 } else {
 500                     microOps[uopIdx++] =
 501                         new MicroUnpackNeon2to2Uop<uint32_t>(
 502                             machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
 503                 }
 504                 break;
 505               default:
 506                 // Bad size
 507                 microOps[uopIdx++] = new Unknown(machInst);
 508                 break;
 509             }
 510         }
 511         break;
 512       default:
 513         // Bad number of elements to unpack
 514         microOps[uopIdx++] = new Unknown(machInst);
 515     }
 516     assert(uopIdx == numMicroops);
 517
 518     for (unsigned i = 0; i < numMicroops - 1; i++) {
 519         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 520         assert(uopPtr);
 521         uopPtr->setDelayedCommit();
 522     }
 523     microOps[numMicroops - 1]->setLastMicroop();
 524 }
 525
 526 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 527                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
 528                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
 529     PredMacroOp(mnem, machInst, __opClass)
 530 {
 531     assert(regs > 0 && regs <= 4);
 532     assert(regs % elems == 0);
 533
 534     numMicroops = (regs > 2) ? 2 : 1;
 535     bool wb = (rm != 15);
 536     bool interleave = (elems > 1);
 537
 538     if (wb) numMicroops++;
 539     if (interleave) numMicroops += (regs / elems);
 540     microOps = new StaticInstPtr[numMicroops];
 541
 542     uint32_t noAlign = TLB::MustBeOne;
 543
 544     RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
 545
 546     unsigned uopIdx = 0;
 547     if (interleave) {
 548         switch (elems) {
 549           case 4:
 550             assert(regs == 4);
 551             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
 552                     size, machInst, rMid, vd * 2, inc * 2);
 553             break;
 554           case 3:
 555             assert(regs == 3);
 556             microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
 557                     size, machInst, rMid, vd * 2, inc * 2);
 558             break;
 559           case 2:
 560             assert(regs == 4 || regs == 2);
 561             if (regs == 4) {
 562                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 563                         size, machInst, rMid, vd * 2, inc * 2);
 564                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 565                         size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
 566             } else {
 567                 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
 568                         size, machInst, rMid, vd * 2, inc * 2);
 569             }
 570             break;
 571           default:
 572             // Bad number of elements to interleave
 573             microOps[uopIdx++] = new Unknown(machInst);
 574         }
 575     }
 576     switch (regs) {
 577       case 4:
 578         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 579                 size, machInst, rMid, rn, 0, align);
 580         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 581                 size, machInst, rMid + 4, rn, 16, noAlign);
 582         break;
 583       case 3:
 584         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 585                 size, machInst, rMid, rn, 0, align);
 586         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 587                 size, machInst, rMid + 4, rn, 16, noAlign);
 588         break;
 589       case 2:
 590         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
 591                 size, machInst, rMid, rn, 0, align);
 592         break;
 593       case 1:
 594         microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
 595                 size, machInst, rMid, rn, 0, align);
 596         break;
 597       default:
 598         // Unknown number of registers
 599         microOps[uopIdx++] = new Unknown(machInst);
 600     }
 601     if (wb) {
 602         if (rm != 15 && rm != 13) {
 603             microOps[uopIdx++] =
 604                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 605         } else {
 606             microOps[uopIdx++] =
 607                 new MicroAddiUop(machInst, rn, rn, regs * 8);
 608         }
 609     }
 610     assert(uopIdx == numMicroops);
 611
 612     for (unsigned i = 0; i < numMicroops - 1; i++) {
 613         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 614         assert(uopPtr);
 615         uopPtr->setDelayedCommit();
 616     }
 617     microOps[numMicroops - 1]->setLastMicroop();
 618 }
 619
 620 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
 621                          OpClass __opClass, bool all, unsigned elems,
 622                          RegIndex rn, RegIndex vd, unsigned regs,
 623                          unsigned inc, uint32_t size, uint32_t align,
 624                          RegIndex rm, unsigned lane) :
 625     PredMacroOp(mnem, machInst, __opClass)
 626 {
 627     assert(!all);
 628     assert(regs > 0 && regs <= 4);
 629     assert(regs % elems == 0);
 630
 631     unsigned eBytes = (1 << size);
 632     unsigned storeSize = eBytes * elems;
 633     unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
 634                          sizeof(FloatRegBits);
 635
 636     assert(storeRegs > 0 && storeRegs <= 4);
 637
 638     numMicroops = 1;
 639     bool wb = (rm != 15);
 640
 641     if (wb) numMicroops++;
 642     numMicroops += (regs / elems);
 643     microOps = new StaticInstPtr[numMicroops];
 644
 645     RegIndex ufp0 = NumFloatArchRegs;
 646
 647     unsigned uopIdx = 0;
 648     switch (elems) {
 649       case 4:
 650         assert(regs == 4);
 651         switch (size) {
 652           case 0:
 653             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
 654                     machInst, ufp0, vd * 2, inc * 2, lane);
 655             break;
 656           case 1:
 657             microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
 658                     machInst, ufp0, vd * 2, inc * 2, lane);
 659             break;
 660           case 2:
 661             microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
 662                     machInst, ufp0, vd * 2, inc * 2, lane);
 663             break;
 664           default:
 665             // Bad size
 666             microOps[uopIdx++] = new Unknown(machInst);
 667             break;
 668         }
 669         break;
 670       case 3:
 671         assert(regs == 3);
 672         switch (size) {
 673           case 0:
 674             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
 675                     machInst, ufp0, vd * 2, inc * 2, lane);
 676             break;
 677           case 1:
 678             microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
 679                     machInst, ufp0, vd * 2, inc * 2, lane);
 680             break;
 681           case 2:
 682             microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
 683                     machInst, ufp0, vd * 2, inc * 2, lane);
 684             break;
 685           default:
 686             // Bad size
 687             microOps[uopIdx++] = new Unknown(machInst);
 688             break;
 689         }
 690         break;
 691       case 2:
 692         assert(regs == 2);
 693         assert(storeRegs <= 2);
 694         switch (size) {
 695           case 0:
 696             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
 697                     machInst, ufp0, vd * 2, inc * 2, lane);
 698             break;
 699           case 1:
 700             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
 701                     machInst, ufp0, vd * 2, inc * 2, lane);
 702             break;
 703           case 2:
 704             microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
 705                     machInst, ufp0, vd * 2, inc * 2, lane);
 706             break;
 707           default:
 708             // Bad size
 709             microOps[uopIdx++] = new Unknown(machInst);
 710             break;
 711         }
 712         break;
 713       case 1:
 714         assert(regs == 1 || (all && regs == 2));
 715         assert(storeRegs <= 2);
 716         for (unsigned offset = 0; offset < regs; offset++) {
 717             switch (size) {
 718               case 0:
 719                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
 720                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
 721                 break;
 722               case 1:
 723                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
 724                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
 725                 break;
 726               case 2:
 727                 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
 728                         machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
 729                 break;
 730               default:
 731                 // Bad size
 732                 microOps[uopIdx++] = new Unknown(machInst);
 733                 break;
 734             }
 735         }
 736         break;
 737       default:
 738         // Bad number of elements to unpack
 739         microOps[uopIdx++] = new Unknown(machInst);
 740     }
 741     switch (storeSize) {
 742       case 1:
 743         microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
 744                 machInst, ufp0, rn, 0, align);
 745         break;
 746       case 2:
 747         if (eBytes == 2) {
 748             microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
 749                     machInst, ufp0, rn, 0, align);
 750         } else {
 751             microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
 752                     machInst, ufp0, rn, 0, align);
 753         }
 754         break;
 755       case 3:
 756         microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
 757                 machInst, ufp0, rn, 0, align);
 758         break;
 759       case 4:
 760         switch (eBytes) {
 761           case 1:
 762             microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
 763                     machInst, ufp0, rn, 0, align);
 764             break;
 765           case 2:
 766             microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
 767                     machInst, ufp0, rn, 0, align);
 768             break;
 769           case 4:
 770             microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
 771                     machInst, ufp0, rn, 0, align);
 772             break;
 773         }
 774         break;
 775       case 6:
 776         microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
 777                 machInst, ufp0, rn, 0, align);
 778         break;
 779       case 8:
 780         switch (eBytes) {
 781           case 2:
 782             microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
 783                     machInst, ufp0, rn, 0, align);
 784             break;
 785           case 4:
 786             microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
 787                     machInst, ufp0, rn, 0, align);
 788             break;
 789         }
 790         break;
 791       case 12:
 792         microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
 793                 machInst, ufp0, rn, 0, align);
 794         break;
 795       case 16:
 796         microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
 797                 machInst, ufp0, rn, 0, align);
 798         break;
 799       default:
 800         // Bad store size
 801         microOps[uopIdx++] = new Unknown(machInst);
 802     }
 803     if (wb) {
 804         if (rm != 15 && rm != 13) {
 805             microOps[uopIdx++] =
 806                 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
 807         } else {
 808             microOps[uopIdx++] =
 809                 new MicroAddiUop(machInst, rn, rn, storeSize);
 810         }
 811     }
 812     assert(uopIdx == numMicroops);
 813
 814     for (unsigned i = 0; i < numMicroops - 1; i++) {
 815         MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
 816         assert(uopPtr);
 817         uopPtr->setDelayedCommit();
 818     }
 819     microOps[numMicroops - 1]->setLastMicroop();
 820 }
 821
 822 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
 823                              OpClass __opClass, IntRegIndex rn,
 824                              RegIndex vd, bool single, bool up,
 825                              bool writeback, bool load, uint32_t offset) :
 826     PredMacroOp(mnem, machInst, __opClass)
 827 {
 828     int i = 0;
 829
 830     // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
 831     // to be functionally identical except that fldmx is deprecated. For now
 832     // we'll assume they're otherwise interchangable.
 833     int count = (single ? offset : (offset / 2));
 834     if (count == 0 || count > NumFloatArchRegs)
 835         warn_once("Bad offset field for VFP load/store multiple.\n");
 836     if (count == 0) {
 837         // Force there to be at least one microop so the macroop makes sense.
 838         writeback = true;
 839     }
 840     if (count > NumFloatArchRegs)
 841         count = NumFloatArchRegs;
 842
 843     numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
 844     microOps = new StaticInstPtr[numMicroops];
 845
 846     int64_t addr = 0;
 847
 848     if (!up)
 849         addr = 4 * offset;
 850
 851     bool tempUp = up;
 852     for (int j = 0; j < count; j++) {
 853         if (load) {
 854             if (single) {
 855                 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
 856                                                   tempUp, addr);
 857             } else {
 858                 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
 859                                                     tempUp, addr);
 860                 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
 861                                                     addr + (up ? 4 : -4));
 862             }
 863         } else {
 864             if (single) {
 865                 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
 866                                                   tempUp, addr);
 867             } else {
 868                 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
 869                                                     tempUp, addr);
 870                 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
 871                                                     addr + (up ? 4 : -4));
 872             }
 873         }
 874         if (!tempUp) {
 875             addr -= (single ? 4 : 8);
 876             // The microops don't handle negative displacement, so turn if we
 877             // hit zero, flip polarity and start adding.
 878             if (addr <= 0) {
 879                 tempUp = true;
 880                 addr = -addr;
 881             }
 882         } else {
 883             addr += (single ? 4 : 8);
 884         }
 885     }
 886
 887     if (writeback) {
 888         if (up) {
 889             microOps[i++] =
 890                 new MicroAddiUop(machInst, rn, rn, 4 * offset);
 891         } else {
 892             microOps[i++] =
 893                 new MicroSubiUop(machInst, rn, rn, 4 * offset);
 894         }
 895     }
 896
 897     assert(numMicroops == i);
 898     microOps[numMicroops - 1]->setLastMicroop();
 899
 900     for (StaticInstPtr *curUop = microOps;
 901             !(*curUop)->isLastMicroop(); curUop++) {
 902         MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
 903         assert(uopPtr);
 904         uopPtr->setDelayedCommit();
 905     }
 906 }
 907
 908 std::string
 909 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 910 {
 911     std::stringstream ss;
 912     printMnemonic(ss);
 913     printReg(ss, ura);
 914     ss << ", ";
 915     printReg(ss, urb);
 916     ss << ", ";
 917     ccprintf(ss, "#%d", imm);
 918     return ss.str();
 919 }
 920
 921 std::string
 922 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 923 {
 924     std::stringstream ss;
 925     printMnemonic(ss);
 926     ss << "[PC,CPSR]";
 927     return ss.str();
 928 }
 929
 930 std::string
 931 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 932 {
 933     std::stringstream ss;
 934     printMnemonic(ss);
 935     printReg(ss, ura);
 936     ss << ", ";
 937     printReg(ss, urb);
 938     return ss.str();
 939 }
 940
 941 std::string
 942 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 943 {
 944     std::stringstream ss;
 945     printMnemonic(ss);
 946     printReg(ss, ura);
 947     ss << ", ";
 948     printReg(ss, urb);
 949     ss << ", ";
 950     printReg(ss, urc);
 951     return ss.str();
 952 }
 953
 954 std::string
 955 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 956 {
 957     std::stringstream ss;
 958     printMnemonic(ss);
 959     printReg(ss, ura);
 960     ss << ", [";
 961     printReg(ss, urb);
 962     ss << ", ";
 963     ccprintf(ss, "#%d", imm);
 964     ss << "]";
 965     return ss.str();
 966 }
 967
 968 }