src/arch/power/isa/decoder.isa

   1 // -*- mode:c++ -*-
   2
   3 // Copyright (c) 2009 The University of Edinburgh
   4 // Copyright (c) 2021 IBM Corporation
   5 // All rights reserved.
   6 //
   7 // Redistribution and use in source and binary forms, with or without
   8 // modification, are permitted provided that the following conditions are
   9 // met: redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer;
  11 // redistributions in binary form must reproduce the above copyright
  12 // notice, this list of conditions and the following disclaimer in the
  13 // documentation and/or other materials provided with the distribution;
  14 // neither the name of the copyright holders nor the names of its
  15 // contributors may be used to endorse or promote products derived from
  16 // this software without specific prior written permission.
  17 //
  18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 ////////////////////////////////////////////////////////////////////
  31 //
  32 // The actual Power ISA decoder
  33 // ------------------------------
  34 //
  35 // I've used the Power ISA Book I v2.06 for instruction formats,
  36 // opcode numbers, register names, etc.
  37 //
  38 decode PO default Unknown::unknown() {
  39
  40     // Unconditionally branch to a PC-relative or absoulute address.
  41     format BranchOp {
  42         18: b({{ NIA = CIA + disp; }},
  43               {{ NIA = disp; }});
  44     }
  45
  46     // Conditionally branch to a PC-relative or absoulute address based
  47     // on CR and CTR.
  48     format BranchDispCondOp {
  49         16: bc({{ NIA = CIA + disp; }},
  50                {{ NIA = disp; }});
  51     }
  52
  53     19: decode XL_XO {
  54
  55         // Conditionally branch to an address in a register based on
  56         // either CR only or both CR and CTR.
  57         format BranchRegCondOp {
  58             16: bclr({{ NIA = LR & -4ULL; }}, true, [ IsReturn ]);
  59             528: bcctr({{ NIA = CTR & -4ULL; }});
  60             560: bctar({{ NIA = TAR & -4ULL; }}, true);
  61         }
  62
  63         // Condition register manipulation instructions.
  64         format CondLogicOp {
  65             257: crand({{
  66                 uint32_t crBa = bits(CR, 31 - ba);
  67                 uint32_t crBb = bits(CR, 31 - bb);
  68                 CR = insertBits(CR, 31 - bt, crBa & crBb);
  69             }});
  70
  71             449: cror({{
  72                 uint32_t crBa = bits(CR, 31 - ba);
  73                 uint32_t crBb = bits(CR, 31 - bb);
  74                 CR = insertBits(CR, 31 - bt, crBa | crBb);
  75             }});
  76
  77             255: crnand({{
  78                 uint32_t crBa = bits(CR, 31 - ba);
  79                 uint32_t crBb = bits(CR, 31 - bb);
  80                 CR = insertBits(CR, 31 - bt, !(crBa & crBb));
  81             }});
  82
  83             193: crxor({{
  84                 uint32_t crBa = bits(CR, 31 - ba);
  85                 uint32_t crBb = bits(CR, 31 - bb);
  86                 CR = insertBits(CR, 31 - bt, crBa ^ crBb);
  87             }});
  88
  89             33: crnor({{
  90                 uint32_t crBa = bits(CR, 31 - ba);
  91                 uint32_t crBb = bits(CR, 31 - bb);
  92                 CR = insertBits(CR, 31 - bt, !(crBa | crBb));
  93             }});
  94
  95             289: creqv({{
  96                 uint32_t crBa = bits(CR, 31 - ba);
  97                 uint32_t crBb = bits(CR, 31 - bb);
  98                 CR = insertBits(CR, 31 - bt, crBa == crBb);
  99             }});
 100
 101             129: crandc({{
 102                 uint32_t crBa = bits(CR, 31 - ba);
 103                 uint32_t crBb = bits(CR, 31 - bb);
 104                 CR = insertBits(CR, 31 - bt, crBa & !crBb);
 105             }});
 106
 107             417: crorc({{
 108                 uint32_t crBa = bits(CR, 31 - ba);
 109                 uint32_t crBb = bits(CR, 31 - bb);
 110                 CR = insertBits(CR, 31 - bt, crBa | !crBb);
 111             }});
 112         }
 113
 114         format CondMoveOp {
 115             0: mcrf({{
 116                 uint32_t crBfa = bits(CR, 31 - bfa*4, 28 - bfa*4);
 117                 CR = insertBits(CR, 31 - bf*4, 28 - bf*4, crBfa);
 118             }});
 119         }
 120
 121         format MiscOp {
 122             150: isync({{ }}, [ IsSerializeAfter ]);
 123         }
 124
 125         default: decode DX_XO {
 126             format IntDispArithOp {
 127                 2: addpcis({{ Rt = NIA + (disp << 16); }});
 128             }
 129         }
 130     }
 131
 132     17: IntOp::sc({{ return std::make_shared<SESyscallFault>(); }});
 133
 134     format LoadDispOp {
 135         34: lbz({{ Rt = Mem_ub; }});
 136         40: lhz({{ Rt = Mem_uh; }});
 137         42: lha({{ Rt = Mem_sh; }});
 138         32: lwz({{ Rt = Mem_uw; }});
 139     }
 140
 141     58: decode DS_XO {
 142         format LoadDispShiftOp {
 143             2: lwa({{ Rt = Mem_sw; }});
 144             0: ld({{ Rt = Mem; }});
 145         }
 146
 147         format LoadDispShiftUpdateOp {
 148             1: ldu({{ Rt = Mem; }});
 149         }
 150     }
 151
 152     62: decode DS_XO {
 153         format StoreDispShiftOp {
 154             0: std({{ Mem = Rs; }});
 155         }
 156
 157         format StoreDispShiftUpdateOp {
 158             1: stdu({{ Mem = Rs; }});
 159         }
 160     }
 161
 162     format LoadDispUpdateOp {
 163         35: lbzu({{ Rt = Mem_ub; }});
 164         41: lhzu({{ Rt = Mem_uh; }});
 165         43: lhau({{ Rt = Mem_sh; }});
 166         33: lwzu({{ Rt = Mem_uw; }});
 167     }
 168
 169     format StoreDispOp {
 170         38: stb({{ Mem_ub = Rs_ub; }});
 171         44: sth({{ Mem_uh = Rs_uh; }});
 172         36: stw({{ Mem_uw = Rs_uw; }});
 173     }
 174
 175     format StoreDispUpdateOp {
 176         39: stbu({{ Mem_ub = Rs_ub; }});
 177         45: sthu({{ Mem_uh = Rs_uh; }});
 178         37: stwu({{ Mem_uw = Rs_uw; }});
 179     }
 180
 181     format IntImmArithCheckRaOp {
 182         14: addi({{ Rt = Ra + simm; }},
 183                  {{ Rt = simm }});
 184         15: addis({{ Rt = Ra + (simm << 16); }},
 185                   {{ Rt = simm << 16; }});
 186     }
 187
 188     format IntImmArithOp {
 189         12: addic({{
 190             uint64_t src = Ra;
 191             Rt = src + simm;
 192         }},
 193         true);
 194
 195         13: addic_({{
 196             uint64_t src = Ra;
 197             Rt = src + simm;
 198         }},
 199         true, true);
 200
 201         8: subfic({{
 202             uint64_t src = ~Ra;
 203             Rt = src + simm + 1;
 204         }},
 205         true);
 206
 207         7: mulli({{
 208             int64_t res = Ra_sd * simm;
 209             Rt = res;
 210         }});
 211     }
 212
 213     format IntImmTrapOp {
 214         3: twi({{ Ra_sw }});
 215         2: tdi({{ Ra }});
 216     }
 217
 218     4: decode VA_XO {
 219
 220         // Arithmetic instructions that use source registers Ra, Rb and Rc,
 221         // with destination register Rt.
 222         format IntArithOp {
 223             48: maddhd({{
 224                 int64_t res;
 225                 std::tie(std::ignore, res) = multiplyAdd(Ra_sd, Rb_sd, Rc_sd);
 226                 Rt = res;
 227             }});
 228
 229             49: maddhdu({{
 230                 uint64_t res;
 231                 std::tie(std::ignore, res) = multiplyAdd(Ra, Rb, Rc);
 232                 Rt = res;
 233             }});
 234
 235             51: maddld({{
 236                 uint64_t res;
 237                 std::tie(res, std::ignore) = multiplyAdd(Ra_sd, Rb_sd, Rc_sd);
 238                 Rt = res;
 239             }});
 240         }
 241     }
 242
 243     format IntImmCompOp {
 244         11: cmpi({{
 245             if (length) {
 246                 cr = makeCRField(Ra_sd, simm, xer.so);
 247             } else {
 248                 cr = makeCRField((int32_t)Ra_sd, simm, xer.so);
 249             }
 250         }});
 251     }
 252
 253     format IntImmCompLogicOp {
 254         10: cmpli({{
 255             if (length) {
 256                 cr = makeCRField(Ra, uimm, xer.so);
 257             } else {
 258                 cr = makeCRField((uint32_t)Ra, uimm, xer.so);
 259             }
 260         }});
 261      }
 262
 263     format IntImmLogicOp {
 264         24: ori({{ Ra = Rs | uimm; }});
 265         25: oris({{ Ra = Rs | (uimm << 16); }});
 266         26: xori({{ Ra = Rs ^ uimm; }});
 267         27: xoris({{ Ra = Rs ^ (uimm << 16); }});
 268         28: andi_({{ Ra = Rs & uimm; }},
 269                   true);
 270         29: andis_({{ Ra = Rs & (uimm << 16); }},
 271                    true);
 272     }
 273
 274     format IntRotateOp {
 275         21: rlwinm({{
 276             uint64_t res;
 277             res = rotate(Rs, shift);
 278             res = res & bitmask(maskBeg, maskEnd);
 279             Ra = res;
 280         }});
 281
 282         23: rlwnm({{
 283             uint64_t res;
 284             res = rotate(Rs, Rb);
 285             res = res & bitmask(maskBeg, maskEnd);
 286             Ra = res;
 287         }});
 288
 289         20: rlwimi({{
 290             uint64_t res, mask;
 291             mask = bitmask(maskBeg, maskEnd);
 292             res = rotate(Rs, shift);
 293             res = (res & mask) | (Ra & ~mask);
 294             Ra = res;
 295         }});
 296     }
 297
 298     // There are a large number of instructions that have the same primary
 299     // opcode (PO) of 31. In this case, the instructions are of different
 300     // forms. For every form, the XO fields may vary in position and width.
 301     // The X, XFL, XFX and XL form instructions use bits 21 - 30 and the
 302     // XO form instructions use bits 22 - 30 as extended opcode (XO). To
 303     // avoid conflicts, instructions of each form have to be defined under
 304     // separate decode blocks. However, only a single decode block can be
 305     // associated with a particular PO and it will recognize only one type
 306     // of XO field. A solution for associating decode blocks for the other
 307     // types of XO fields with the same PO is to have the other blocks as
 308     // nested default cases.
 309     31: decode X_XO {
 310
 311         // All loads with an index register. The non-update versions
 312         // all use the value 0 if Ra == R0, not the value contained in
 313         // R0. Others update Ra with the effective address. In all cases,
 314         // Ra and Rb are source registers, Rt is the destintation.
 315         format LoadIndexOp {
 316             87: lbzx({{ Rt = Mem_ub; }});
 317             52: lbarx({{ Rt = Mem_ub; }},
 318                       {{ Rsv = 1; RsvLen = 1; RsvAddr = EA; }});
 319             279: lhzx({{ Rt = Mem_uh; }});
 320             343: lhax({{ Rt = Mem_sh; }});
 321             116: lharx({{ Rt = Mem_uh;}},
 322                        {{  Rsv = 1; RsvLen = 2; RsvAddr = EA; }});
 323             790: lhbrx({{ Rt = swap_byte(Mem_uh); }});
 324             23: lwzx({{ Rt = Mem_uw; }});
 325             341: lwax({{ Rt = Mem_sw; }});
 326             20: lwarx({{ Rt = Mem_uw; }},
 327                       {{ Rsv = 1; RsvLen = 4; RsvAddr = EA; }});
 328             534: lwbrx({{ Rt = swap_byte(Mem_uw); }});
 329             21: ldx({{ Rt = Mem; }});
 330             84: ldarx({{ Rt = Mem_ud; }},
 331                       {{ Rsv = 1; RsvLen = 8; RsvAddr = EA; }});
 332             532: ldbrx({{ Rt = swap_byte(Mem); }});
 333             535: lfsx({{ Ft_sf = Mem_sf; }});
 334             599: lfdx({{ Ft = Mem_df; }});
 335             855: lfiwax({{ Ft_uw = Mem; }});
 336         }
 337
 338         format LoadIndexUpdateOp {
 339             119: lbzux({{ Rt = Mem_ub; }});
 340             311: lhzux({{ Rt = Mem_uh; }});
 341             375: lhaux({{ Rt = Mem_sh; }});
 342             55: lwzux({{ Rt = Mem_uw; }});
 343             373: lwaux({{ Rt = Mem_sw; }});
 344             53: ldux({{ Rt = Mem; }});
 345             567: lfsux({{ Ft_sf = Mem_sf; }});
 346             631: lfdux({{ Ft = Mem_df; }});
 347         }
 348
 349         format StoreIndexOp {
 350             215: stbx({{ Mem_ub = Rs_ub; }});
 351             694: stbcx({{
 352                 Mem_ub = Rs_ub;
 353             }}, {{
 354                 bool store_performed = false;
 355                 if (Rsv) {
 356                     if (RsvLen == 1) {
 357                         if (RsvAddr == EA) {
 358                             store_performed = true;
 359                         }
 360                     }
 361                 }
 362                 Xer xer = XER;
 363                 Cr cr = CR;
 364                 cr.cr0 = ((store_performed ? 0x2 : 0x0) | xer.so);
 365                 CR = cr;
 366                 Rsv = 0;
 367             }});
 368             407: sthx({{ Mem_uh = Rs_uh; }});
 369             726: sthcx({{
 370                 Mem_uh = Rs_uh;
 371             }}, {{
 372                 bool store_performed = false;
 373                 if (Rsv) {
 374                     if (RsvLen == 2) {
 375                         if (RsvAddr == EA) {
 376                             store_performed = true;
 377                         }
 378                     }
 379                 }
 380                 Xer xer = XER;
 381                 Cr cr = CR;
 382                 cr.cr0 = ((store_performed ? 0x2 : 0x0) | xer.so);
 383                 CR = cr;
 384                 Rsv = 0;
 385             }});
 386             918: sthbrx({{ Mem_uh = swap_byte(Rs_uh); }});
 387             151: stwx({{ Mem_uw = Rs_uw; }});
 388             150: stwcx({{
 389                 Mem_uw = Rs_uw;
 390             }}, {{
 391                 bool store_performed = false;
 392                 if (Rsv) {
 393                     if (RsvLen == 4) {
 394                         if (RsvAddr == EA) {
 395                             store_performed = true;
 396                         }
 397                     }
 398                 }
 399                 Xer xer = XER;
 400                 Cr cr = CR;
 401                 cr.cr0 = ((store_performed ? 0x2 : 0x0) | xer.so);
 402                 CR = cr;
 403                 Rsv = 0;
 404             }});
 405             662: stwbrx({{ Mem_uw = swap_byte(Rs_uw); }});
 406             149: stdx({{ Mem = Rs }});
 407             214: stdcx({{
 408                 Mem = Rs;
 409             }}, {{
 410                 bool store_performed = false;
 411                 if (Rsv) {
 412                     if (RsvLen == 8) {
 413                         if (RsvAddr == EA) {
 414                             store_performed = true;
 415                         }
 416                     }
 417                 }
 418                 Xer xer = XER;
 419                 Cr cr = CR;
 420                 cr.cr0 = ((store_performed ? 0x2 : 0x0) | xer.so);
 421                 CR = cr;
 422                 Rsv = 0;
 423             }});
 424             660: stdbrx({{ Mem = swap_byte(Rs); }});
 425         }
 426
 427         format StoreIndexUpdateOp {
 428             247: stbux({{ Mem_ub = Rs_ub; }});
 429             439: sthux({{ Mem_uh = Rs_uh; }});
 430             183: stwux({{ Mem_uw = Rs_uw; }});
 431             181: stdux({{ Mem = Rs; }});
 432         }
 433
 434         format IntArithOp {
 435             779: modsw({{
 436                 int64_t src1 = Ra_sw;
 437                 int64_t src2 = Rb_sw;
 438                 if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
 439                     Rt = src1 % src2;
 440                 } else {
 441                     Rt = 0;
 442                 }
 443             }});
 444
 445             267: moduw({{
 446                 uint64_t src1 = Ra_uw;
 447                 uint64_t src2 = Rb_uw;
 448                 if (src2 != 0) {
 449                     Rt = src1 % src2;
 450                 } else {
 451                     Rt = 0;
 452                 }
 453             }});
 454
 455             777: modsd({{
 456                 int64_t src1 = Ra_sd;
 457                 int64_t src2 = Rb_sd;
 458                 if ((src1 != INT64_MIN || src2 != -1) && src2 != 0) {
 459                     Rt = src1 % src2;
 460                 } else {
 461                     Rt = 0;
 462                 }
 463             }});
 464
 465             265: modud({{
 466                 uint64_t src1 = Ra;
 467                 uint64_t src2 = Rb;
 468                 if (src2 != 0) {
 469                     Rt = src1 % src2;
 470                 } else {
 471                     Rt = 0;
 472                 }
 473             }});
 474         }
 475
 476         format IntCompOp {
 477             0: cmp({{
 478                 if (length) {
 479                     cr = makeCRField(Ra_sd, Rb_sd, xer.so);
 480                 } else {
 481                     cr = makeCRField((int32_t)Ra_sd, (int32_t)Rb_sd, xer.so);
 482                 }
 483             }});
 484
 485             32: cmpl({{
 486                 if (length) {
 487                     cr = makeCRField(Ra, Rb, xer.so);
 488                 } else {
 489                     cr = makeCRField((uint32_t)Ra, (uint32_t)Rb, xer.so);
 490                 }
 491             }});
 492
 493             192: cmprb({{
 494                 uint32_t src1 = Ra_ub;
 495                 uint32_t src2 = Rb_uw;
 496                 uint8_t src2lo = src2 & 0xff;
 497                 uint8_t src2hi = (src2 >>= 8) & 0xff;
 498                 uint32_t res = (src2lo <= src1) & (src1 <= src2hi);
 499                 if (length) {
 500                     src2lo = (src2 >>= 8) & 0xff;
 501                     src2hi = (src2 >>= 8) & 0xff;
 502                     res = ((src2lo <= src1) & (src1 <= src2hi)) | res;
 503                 }
 504                 cr = res << 2;
 505             }});
 506
 507             224: cmpeqb({{
 508                 // Based on "Determine if a word has a byte equal to n"
 509                 // from https://graphics.stanford.edu/~seander/bithacks.html
 510                 const uint64_t m1 = 0x0101010101010101;
 511                 const uint64_t m2 = 0x8080808080808080;
 512                 uint64_t res = Rb ^ (Ra_ub * m1);
 513                 res = (res - m1) & ~res & m2;
 514                 cr = (res != 0) << 2;
 515             }});
 516         }
 517
 518         // Integer logic instructions use source registers Rs and Rb,
 519         // with destination register Ra.
 520         format IntLogicOp {
 521             28: and({{ Ra = Rs & Rb; }}, true);
 522             316: xor({{ Ra = Rs ^ Rb; }}, true);
 523             476: nand({{ Ra = ~(Rs & Rb); }}, true);
 524             444: or({{ Ra = Rs | Rb; }}, true);
 525             124: nor({{ Ra = ~(Rs | Rb); }}, true);
 526             60: andc({{ Ra = Rs & ~Rb; }}, true);
 527             284: eqv({{ Ra = ~(Rs ^ Rb); }}, true);
 528             412: orc({{ Ra = Rs | ~Rb; }}, true);
 529             954: extsb({{ Ra = Rs_sb; }}, true);
 530             922: extsh({{ Ra = Rs_sh; }}, true);
 531             986: extsw({{ Ra = Rs_sw; }}, true);
 532             26: cntlzw({{ Ra = findLeadingZeros(Rs_uw); }}, true);
 533             58: cntlzd({{ Ra = findLeadingZeros(Rs); }}, true);
 534             538: cnttzw({{ Ra = findTrailingZeros(Rs_uw); }}, true);
 535             570: cnttzd({{ Ra = findTrailingZeros(Rs); }}, true);
 536
 537             508: cmpb({{
 538                 uint64_t mask = 0xff;
 539                 uint64_t res = 0;
 540                 for (int i = 0; i < 8; ++i) {
 541                     if ((Rs & mask) == (Rb & mask)) {
 542                         res |= mask;
 543                     }
 544                     mask <<= 8;
 545                 }
 546                 Ra = res;
 547             }});
 548
 549             122: popcntb({{
 550                 // Based on "Counting bits set, in parallel"
 551                 // from https://graphics.stanford.edu/~seander/bithacks.html
 552                 const uint64_t m1 = 0x5555555555555555ULL;
 553                 const uint64_t m2 = 0x3333333333333333ULL;
 554                 const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL;
 555                 uint64_t res = Rs;
 556                 res = (res & m1) + ((res >> 1) & m1);
 557                 res = (res & m2) + ((res >> 2) & m2);
 558                 res = (res & m4) + ((res >> 4) & m4);
 559                 Ra = res;
 560             }});
 561
 562             378: popcntw({{
 563             #if defined(__GNUC__) || (defined(__clang__) && \
 564                     __has_builtin(__builtin_popcount))
 565                 uint64_t src = Rs;
 566                 uint64_t res = __builtin_popcount(src >> 32);
 567                 res = (res << 32) | __builtin_popcount(src);
 568             #else
 569                 // Based on "Counting bits set, in parallel"
 570                 // from https://graphics.stanford.edu/~seander/bithacks.html
 571                 const uint64_t m1 = 0x5555555555555555ULL;
 572                 const uint64_t m2 = 0x3333333333333333ULL;
 573                 const uint64_t m4 = 0x0f0f0f0f0f0f0f0fULL;
 574                 const uint64_t m8 = 0x00ff00ff00ff00ffULL;
 575                 const uint64_t m16 = 0x0000ffff0000ffffULL;
 576                 uint64_t res = Rs;
 577                 res = (res & m1) + ((res >> 1) & m1);
 578                 res = (res & m2) + ((res >> 2) & m2);
 579                 res = (res & m4) + ((res >> 4) & m4);
 580                 res = (res & m8) + ((res >> 8) & m8);
 581                 res = (res & m16) + ((res >> 16) & m16);
 582             #endif
 583                 Ra = res;
 584             }});
 585
 586             506: popcntd({{ Ra = popCount(Rs); }});
 587
 588             154: prtyw({{
 589                 uint64_t res = Rs;
 590                 res = res ^ (res >> 16);
 591                 res = res ^ (res >> 8);
 592                 res = res & 0x100000001;
 593                 Ra = res;
 594             }});
 595
 596             186: prtyd({{
 597                 uint64_t res = Rs;
 598                 res = res ^ (res >> 32);
 599                 res = res ^ (res >> 16);
 600                 res = res ^ (res >> 8);
 601                 res = res & 0x1;
 602                 Ra = res;
 603             }});
 604
 605             252: bpermd({{
 606                 uint64_t res = 0;
 607                 for (int i = 0; i < 8; ++i) {
 608                     int index = (Rs >> (i * 8)) & 0xff;
 609                     if (index < 64) {
 610                         if (Rb & (1ULL << (63 - index))) {
 611                             res |= 1 << i;
 612                         }
 613                     }
 614                 }
 615                 Ra = res;
 616             }});
 617         }
 618
 619         // Integer instructions with a shift value.
 620         format IntShiftOp {
 621             24: slw({{
 622                 int32_t shift = Rb_sw;
 623                 uint32_t res = Rs_uw & ~((shift << 26) >> 31);
 624                 if (shift != 0) {
 625                     shift = shift & 0x1f;
 626                     res = res << shift;
 627                 }
 628                 Ra = res;
 629             }});
 630
 631             536: srw({{
 632                 int32_t shift = Rb_sw;
 633                 uint32_t res = Rs_uw & ~((shift << 26) >> 31);
 634                 if (shift != 0) {
 635                     shift = shift & 0x1f;
 636                     res = res >> shift;
 637                 }
 638                 Ra = res;
 639             }});
 640
 641             792: sraw({{
 642                 int32_t src = Rs_sw;
 643                 uint32_t shift = Rb_uw;
 644                 int64_t res;
 645                 if ((shift & 0x20) != 0) {
 646                     res = src >> 31;
 647                     if (res != 0) {
 648                         setCA = true;
 649                     }
 650                 } else {
 651                     if (shift != 0) {
 652                         shift = shift & 0x1f;
 653                         res = src >> shift;
 654                         if (src < 0 && (src & mask(shift)) != 0) {
 655                             setCA = true;
 656                         }
 657                     } else {
 658                         res = src;
 659                     }
 660                 }
 661                 Ra = res;
 662             }},
 663             true);
 664
 665             824: srawi({{
 666                 int32_t src = Rs_sw;
 667                 int64_t res;
 668                 if (shift != 0) {
 669                     res = src >> shift;
 670                     if (src < 0 && (src & mask(shift)) != 0) {
 671                         setCA = true;
 672                     }
 673                 } else {
 674                     res = src;
 675                 }
 676                 Ra = res;
 677             }},
 678             true);
 679         }
 680
 681         format IntConcatShiftOp {
 682             27: sld({{
 683                 int64_t shift = Rb_sd;
 684                 uint64_t res = Rs & ~((shift << 57) >> 63);
 685                 if (shift != 0) {
 686                     shift = shift & 0x3f;
 687                     res = res << shift;
 688                 }
 689                 Ra = res;
 690             }});
 691
 692             539: srd({{
 693                 int64_t shift = Rb_sd;
 694                 uint64_t res = Rs & ~((shift << 57) >> 63);
 695                 if (shift != 0) {
 696                     shift = shift & 0x3f;
 697                     res = res >> shift;
 698                 }
 699                 Ra = res;
 700             }});
 701
 702             794: srad({{
 703                 int64_t src = Rs_sd;
 704                 uint64_t shift = Rb;
 705                 int64_t res;
 706                 if ((shift & 0x40) != 0) {
 707                     res = src >> 63;
 708                     if (res != 0) {
 709                         setCA = true;
 710                     }
 711                 } else {
 712                     if (shift != 0) {
 713                         shift = shift & 0x3f;
 714                         res = src >> shift;
 715                         if (src < 0 && (src & mask(shift)) != 0) {
 716                             setCA = true;
 717                         }
 718                     } else {
 719                         res = src;
 720                     }
 721                 }
 722                 Ra = res;
 723             }},
 724             true);
 725         }
 726
 727         format IntTrapOp {
 728             4: tw({{ Ra_sw }}, {{ Rb_sw }});
 729             68: td({{ Ra }}, {{ Rb }});
 730         }
 731
 732         format IntOp {
 733             576: mcrxrx({{
 734                 uint8_t res;
 735                 Xer xer = XER;
 736                 res = (xer.ov << 3) | (xer.ov32 << 2) |
 737                       (xer.ca << 1) | xer.ca32;
 738                 CR = insertCRField(CR, BF, res);
 739             }});
 740         }
 741
 742         format StoreIndexOp {
 743             663: stfsx({{ Mem_sf = Fs_sf; }});
 744             727: stfdx({{ Mem_df = Fs; }});
 745             983: stfiwx({{ Mem = Fs_uw; }});
 746         }
 747
 748         format StoreIndexUpdateOp {
 749             695: stfsux({{ Mem_sf = Fs_sf; }});
 750             759: stfdux({{ Mem_df = Fs; }});
 751         }
 752
 753         // These instructions all provide data cache hints
 754         format MiscOp {
 755             278: dcbt({{ }});
 756             246: dcbtst({{ }});
 757             598: sync({{ }}, [ IsReadBarrier, IsWriteBarrier ]);
 758             854: eieio({{ }}, [ IsReadBarrier, IsWriteBarrier ]);
 759         }
 760
 761         // These instructions are of XO form with bit 21 as the OE bit.
 762         default: decode XO_XO {
 763
 764             // These instructions can all be reduced to the form
 765             // Rt = src1 + src2 [+ CA], therefore we just give src1 and src2
 766             // (and, if necessary, CA) definitions and let the python script
 767             // deal with setting things up correctly. We also give flags to
 768             // say which control registers to set.
 769             format IntSumOp {
 770                 266: add({{ Ra }}, {{ Rb }});
 771                 40: subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
 772                 10: addc({{ Ra }}, {{ Rb }},
 773                          computeCA = true);
 774                 8: subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }},
 775                          true);
 776                 104: neg({{ ~Ra }}, {{ 1 }});
 777                 138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
 778                           true);
 779                 234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }},
 780                            true);
 781                 136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
 782                            true);
 783                 232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }},
 784                             true);
 785                 202: addze({{ Ra }}, {{ xer.ca }},
 786                            computeCA = true);
 787                 200: subfze({{ ~Ra }}, {{ xer.ca }},
 788                             computeCA = true);
 789             }
 790
 791             // Arithmetic instructions all use source registers Ra and Rb,
 792             // with destination register Rt.
 793             format IntArithCheckRcOp {
 794                 75: mulhw({{
 795                     uint64_t res = (int64_t)Ra_sw * Rb_sw;
 796                     res = res >> 32;
 797                     Rt = res;
 798                 }});
 799
 800                 11: mulhwu({{
 801                     uint64_t res = (uint64_t)Ra_uw * Rb_uw;
 802                     res = res >> 32;
 803                     Rt = res;
 804                 }});
 805
 806                 235: mullw({{
 807                     int64_t res = (int64_t)Ra_sw * Rb_sw;
 808                     if (res != (int32_t)res) {
 809                         setOV = true;
 810                     }
 811                     Rt = res;
 812                 }},
 813                 true);
 814
 815                 73: mulhd({{
 816                     int64_t res;
 817                     std::tie(std::ignore, res) = multiply(Ra_sd, Rb_sd);
 818                     Rt = res;
 819                 }});
 820
 821                 9: mulhdu({{
 822                     uint64_t res;
 823                     std::tie(std::ignore, res) = multiply(Ra, Rb);
 824                     Rt = res;
 825                 }});
 826
 827                 233: mulld({{
 828                     int64_t src1 = Ra_sd;
 829                     int64_t src2 = Rb_sd;
 830                     uint64_t res = src1 * src2;
 831                     std::tie(res, std::ignore) = multiply(src1, src2);
 832                     if (src1 != 0 && (int64_t)res / src1 != src2) {
 833                         setOV = true;
 834                     }
 835                     Rt = res;
 836                 }},
 837                 true);
 838
 839                 491: divw({{
 840                     int32_t src1 = Ra_sw;
 841                     int32_t src2 = Rb_sw;
 842                     if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
 843                         Rt = (uint32_t)(src1 / src2);
 844                     } else {
 845                         Rt = 0;
 846                         setOV = true;
 847                     }
 848                 }},
 849                 true);
 850
 851                 459: divwu({{
 852                     uint32_t src1 = Ra_uw;
 853                     uint32_t src2 = Rb_uw;
 854                     if (src2 != 0) {
 855                         Rt = src1 / src2;
 856                     } else {
 857                         Rt = 0;
 858                         setOV = true;
 859                     }
 860                 }},
 861                 true);
 862
 863                 427: divwe({{
 864                     int32_t src1 = Ra_sw;
 865                     int32_t src2 = Rb_sw;
 866                     int64_t res;
 867                     if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
 868                         res = ((int64_t)src1 << 32) / src2;
 869                         if (res == (int32_t)res) {
 870                             Rt = (uint32_t)res;
 871                         } else {
 872                             Rt = 0;
 873                             setOV = true;
 874                         }
 875                     } else {
 876                         Rt = 0;
 877                         setOV = true;
 878                     }
 879                 }},
 880                 true);
 881
 882                 395: divweu({{
 883                     uint32_t src1 = Ra_ud;
 884                     uint32_t src2 = Rb_ud;
 885                     uint64_t res;
 886                     if (src2 != 0) {
 887                         res = ((uint64_t)src1 << 32) / src2;
 888                         if (res <= UINT32_MAX) {
 889                             Rt = (uint32_t)res;
 890                         } else {
 891                             Rt = 0;
 892                             setOV = true;
 893                         }
 894                     } else {
 895                         Rt = 0;
 896                         setOV = true;
 897                     }
 898                 }},
 899                 true);
 900
 901                 489: divd({{
 902                     int64_t src1 = Ra_sd;
 903                     int64_t src2 = Rb_sd;
 904                     if ((src1 != INT64_MIN || src2 != -1) && src2 != 0) {
 905                         Rt = src1 / src2;
 906                     } else {
 907                         Rt = 0;
 908                         setOV = true;
 909                     }
 910                 }},
 911                 true);
 912
 913                 457: divdu({{
 914                     uint64_t src1 = Ra;
 915                     uint64_t src2 = Rb;
 916                     if (src2 != 0) {
 917                         Rt = src1 / src2;
 918                     } else {
 919                         Rt = 0;
 920                         setOV = true;
 921                     }
 922                 }},
 923                 true);
 924
 925                 425: divde({{
 926                     int64_t src1 = Ra_sd;
 927                     int64_t src2 = Rb_sd;
 928                     int64_t res;
 929                     std::tie(setOV, res, std::ignore) = divide(0, src1, src2);
 930                     if (!setOV) {
 931                         Rt = res;
 932                     } else {
 933                         Rt = 0;
 934                     }
 935                 }},
 936                 true);
 937
 938                 393: divdeu({{
 939                     uint64_t src1 = Ra;
 940                     uint64_t src2 = Rb;
 941                     uint64_t res;
 942                     std::tie(setOV, res, std::ignore) = divide(0, src1, src2);
 943                     if (!setOV) {
 944                         Rt = res;
 945                     } else {
 946                         Rt = 0;
 947                     }
 948                 }},
 949                 true);
 950             }
 951
 952             // These instructions are of XS form and use bits 21 - 29 as XO.
 953             default: decode XS_XO {
 954                 format IntConcatShiftOp {
 955                     413: sradi({{
 956                         int64_t src = Rs_sd;
 957                         if (shift != 0) {
 958                             Ra = src >> shift;
 959                             if (src < 0 && (src & mask(shift))) {
 960                                 setCA = true;
 961                             }
 962                         } else {
 963                             Ra = src;
 964                         }
 965                     }},
 966                     true);
 967
 968                     445: extswsli({{
 969                         int64_t src = Rs_sw;
 970                         if (shift != 0) {
 971                             Ra = src << shift;
 972                         } else {
 973                             Ra = src;
 974                         }
 975                     }});
 976                 }
 977
 978                 default: decode XFX_XO {
 979                     format IntOp {
 980                         339: decode SPR {
 981                             0x20: mfxer({{ Rt = XER; }});
 982                             0x100: mflr({{ Rt = LR; }});
 983                             0x120: mfctr({{ Rt = CTR; }});
 984                             0x1f9: mftar({{ Rt = TAR; }});
 985                             0x188: mftb({{ Rt = curTick(); }});
 986                             0x1a8: mftbu({{ Rt_uw = curTick() >> 32; }});
 987                         }
 988
 989                         467: decode SPR {
 990                             0x20: mtxer({{ XER = Rs; }});
 991                             0x100: mtlr({{ LR = Rs; }});
 992                             0x120: mtctr({{ CTR = Rs; }});
 993                             0x1f9: mttar({{ TAR = Rs; }});
 994                         }
 995
 996                         144: decode S {
 997                             0: mtcrf({{
 998                                 uint32_t mask = 0;
 999                                 for (int i = 0; i < 8; ++i) {
1000                                     if ((FXM >> i) & 0x1) {
1001                                         mask |= 0xf << (4 * i);
1002                                     }
1003                                 }
1004                                 CR = (Rs & mask) | (CR & ~mask);
1005                             }});
1006
1007                             1: mtocrf({{
1008                                 int count = popCount(FXM);
1009                                 uint32_t mask = 0xf << (4 * findMsbSet(FXM));
1010                                 if (count == 1) {
1011                                     CR = (Rs & mask) | (CR & ~mask);
1012                                 }
1013                             }});
1014                         }
1015
1016                         19: decode S {
1017                             0: mfcr({{ Rt = CR; }});
1018
1019                             1: mfocrf({{
1020                                 int count = popCount(FXM);
1021                                 uint64_t mask = 0xf << (4 * findMsbSet(FXM));
1022                                 if (count == 1) {
1023                                     Rt = CR & mask;
1024                                 }
1025                             }});
1026                         }
1027
1028                         512: mcrxr({{
1029                             CR = insertCRField(CR, BF, XER<31:28>);
1030                             XER = XER<27:0>;
1031                         }});
1032                     }
1033                 }
1034             }
1035         }
1036     }
1037
1038     // These instructions are of MD form and use bits 27 - 29 as XO.
1039     30: decode MD_XO {
1040         format IntConcatRotateOp {
1041             0: rldicl({{
1042                 uint64_t res;
1043                 if (shift != 0) {
1044                     res = rotate(Rs, shift);
1045                 } else {
1046                     res = Rs;
1047                 }
1048                 res = res & bitmask(maskBeg, 63);
1049                 Ra = res;
1050             }});
1051
1052             1: rldicr({{
1053                 uint64_t res;
1054                 if (shift != 0) {
1055                     res = rotate(Rs, shift);
1056                 } else {
1057                     res = Rs;
1058                 }
1059                 res = res & bitmask(0, maskEnd);
1060                 Ra = res;
1061             }});
1062
1063             2: rldic({{
1064                 uint64_t res;
1065                 if (shift != 0) {
1066                     res = rotate(Rs, shift);
1067                 } else {
1068                     res = Rs;
1069                 }
1070                 res = res & bitmask(maskBeg, ~shift);
1071                 Ra = res;
1072             }});
1073
1074             3: rldimi({{
1075                 uint64_t res, mask;
1076                 mask = bitmask(maskBeg, ~shift);
1077                 if (shift != 0) {
1078                     res = rotate(Rs, shift);
1079                 } else {
1080                     res = Rs;
1081                 }
1082                 res = res & mask;
1083                 res = res | (Ra & ~mask);
1084                 Ra = res;
1085             }});
1086
1087             // These instructions are of MDS form and use bits 27 - 30 as XO.
1088             default: decode MDS_XO {
1089                 8: rldcl({{
1090                     uint64_t res;
1091                     uint32_t shift = Rb & 0x3f;
1092                     if (shift != 0) {
1093                         res = rotate(Rs, shift);
1094                     } else {
1095                         res = Rs;
1096                     }
1097                     res = res & bitmask(maskBeg, 63);
1098                     Ra = res;
1099                 }});
1100
1101                 9: rldcr({{
1102                     uint64_t res;
1103                     uint32_t shift = Rb & 0x3f;
1104                     if (shift != 0) {
1105                         res = rotate(Rs, shift);
1106                     } else {
1107                         res = Rs;
1108                     }
1109                     res = res & bitmask(0, maskEnd);
1110                     Ra = res;
1111                 }});
1112             }
1113         }
1114     }
1115
1116     format LoadDispOp {
1117         48: lfs({{ Ft_sf = Mem_sf; }});
1118         50: lfd({{ Ft = Mem_df; }});
1119     }
1120
1121     format LoadDispUpdateOp {
1122         49: lfsu({{ Ft_sf = Mem_sf; }});
1123         51: lfdu({{ Ft = Mem_df; }});
1124     }
1125
1126     format StoreDispOp {
1127         52: stfs({{ Mem_sf = Fs_sf; }});
1128         54: stfd({{ Mem_df = Fs; }});
1129     }
1130
1131     format StoreDispUpdateOp {
1132         53: stfsu({{ Mem_sf = Fs_sf; }});
1133         55: stfdu({{ Mem_df = Fs; }});
1134     }
1135
1136     format FloatArithOp {
1137         59: decode A_XO {
1138             21: fadds({{ Ft = Fa + Fb; }});
1139             20: fsubs({{ Ft = Fa - Fb; }});
1140             25: fmuls({{ Ft = Fa * Fc; }});
1141             18: fdivs({{ Ft = Fa / Fb; }});
1142             29: fmadds({{ Ft = (Fa * Fc) + Fb; }});
1143             28: fmsubs({{ Ft = (Fa * Fc) - Fb; }});
1144             31: fnmadds({{ Ft = -((Fa * Fc) + Fb); }});
1145             30: fnmsubs({{ Ft = -((Fa * Fc) - Fb); }});
1146         }
1147     }
1148
1149     63: decode A_XO {
1150         format FloatArithOp {
1151             21: fadd({{ Ft = Fa + Fb; }});
1152             20: fsub({{ Ft = Fa - Fb; }});
1153             25: fmul({{ Ft = Fa * Fc; }});
1154             18: fdiv({{ Ft = Fa / Fb; }});
1155             29: fmadd({{ Ft = (Fa * Fc) + Fb; }});
1156             28: fmsub({{ Ft = (Fa * Fc) - Fb; }});
1157             31: fnmadd({{ Ft = -((Fa * Fc) + Fb); }});
1158             30: fnmsub({{ Ft = -((Fa * Fc) - Fb); }});
1159         }
1160
1161         default: decode X_XO {
1162             format FloatRCCheckOp {
1163                 72: fmr({{ Ft = Fb; }});
1164                 264: fabs({{
1165                     Ft_ud = Fb_ud;
1166                     Ft_ud = insertBits(Ft_ud, 63, 0); }});
1167                 136: fnabs({{
1168                     Ft_ud = Fb_ud;
1169                     Ft_ud = insertBits(Ft_ud, 63, 1); }});
1170                 40: fneg({{ Ft = -Fb; }});
1171                 8: fcpsgn({{
1172                     Ft_ud = Fb_ud;
1173                     Ft_ud = insertBits(Ft_ud, 63, Fa_ud<63:63>);
1174                 }});
1175             }
1176
1177             format FloatConvertOp {
1178                 12: frsp({{ Ft_sf = Fb; }});
1179                 15: fctiwz({{ Ft_sw = (int32_t)trunc(Fb); }});
1180             }
1181
1182             format FloatOp {
1183               0: fcmpu({{
1184                   uint32_t c = makeCRField(Fa, Fb);
1185                   Fpscr fpscr = FPSCR;
1186                   fpscr.fprf.fpcc = c;
1187                   FPSCR = fpscr;
1188                   CR = insertCRField(CR, BF, c);
1189               }});
1190             }
1191
1192             format FloatRCCheckOp {
1193                 583: mffs({{ Ft_ud = FPSCR; }});
1194                 134: mtfsfi({{
1195                     FPSCR = insertCRField(FPSCR, BF + (8 * (1 - W_FIELD)),
1196                                           U_FIELD);
1197                 }});
1198                 70: mtfsb0({{ FPSCR = insertBits(FPSCR, 31 - BT, 0); }});
1199                 38: mtfsb1({{ FPSCR = insertBits(FPSCR, 31 - BT, 1); }});
1200
1201                 default: decode XFL_XO {
1202                     711: mtfsf({{
1203                         if (L_FIELD == 1) { FPSCR = Fb_ud; }
1204                         else {
1205                             for (int i = 0; i < 8; ++i) {
1206                                 if (bits(FLM, i) == 1) {
1207                                     int k = 4 * (i + (8 * (1 - W_FIELD)));
1208                                     FPSCR = insertBits(FPSCR, k + 3, k,
1209                                                        bits(Fb_ud, k + 3, k));
1210                                 }
1211                             }
1212                         }
1213                     }});
1214                 }
1215             }
1216         }
1217     }
1218 }