src/gallium/auxiliary/rtasm/rtasm_ppc.c

   1 /**************************************************************************
   2  *
   3  * Copyright (C) 2008 Tungsten Graphics, Inc.   All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included
  13  * in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  19  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  **************************************************************************/
  23
  24 /**
  25  * PPC code generation.
  26  * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
  27  * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
  28  *
  29  * Other PPC refs:
  30  * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
  31  * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
  32  * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
  33  *
  34  * \author Brian Paul
  35  */
  36
  37
  38 #include <stdio.h>
  39 #include "util/u_memory.h"
  40 #include "pipe/p_debug.h"
  41 #include "rtasm_execmem.h"
  42 #include "rtasm_ppc.h"
  43
  44
  45 void
  46 ppc_init_func(struct ppc_function *p)
  47 {
  48    uint i;
  49
  50    p->num_inst = 0;
  51    p->max_inst = 100; /* first guess at buffer size */
  52    p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
  53    p->reg_used = 0x0;
  54    p->fp_used = 0x0;
  55    p->vec_used = 0x0;
  56
  57    /* only allow using gp registers 3..12 for now */
  58    for (i = 0; i < 3; i++)
  59       ppc_reserve_register(p, i);
  60    for (i = 12; i < PPC_NUM_REGS; i++)
  61       ppc_reserve_register(p, i);
  62 }
  63
  64
  65 void
  66 ppc_release_func(struct ppc_function *p)
  67 {
  68    assert(p->num_inst <= p->max_inst);
  69    if (p->store != NULL) {
  70       rtasm_exec_free(p->store);
  71    }
  72    p->store = NULL;
  73 }
  74
  75
  76 uint
  77 ppc_num_instructions(const struct ppc_function *p)
  78 {
  79    return p->num_inst;
  80 }
  81
  82
  83 void (*ppc_get_func(struct ppc_function *p))(void)
  84 {
  85 #if 0
  86    DUMP_END();
  87    if (DISASSEM && p->store)
  88       debug_printf("disassemble %p %p\n", p->store, p->csr);
  89
  90    if (p->store == p->error_overflow)
  91       return (void (*)(void)) NULL;
  92    else
  93 #endif
  94       return (void (*)(void)) p->store;
  95 }
  96
  97
  98 void
  99 ppc_dump_func(const struct ppc_function *p)
 100 {
 101    uint i;
 102    for (i = 0; i < p->num_inst; i++) {
 103       debug_printf("%3u: 0x%08x\n", i, p->store[i]);
 104    }
 105 }
 106
 107
 108 /**
 109  * Mark a register as being unavailable.
 110  */
 111 int
 112 ppc_reserve_register(struct ppc_function *p, int reg)
 113 {
 114    assert(reg < PPC_NUM_REGS);
 115    p->reg_used |= (1 << reg);
 116    return reg;
 117 }
 118
 119
 120 /**
 121  * Allocate a general purpose register.
 122  * \return register index or -1 if none left.
 123  */
 124 int
 125 ppc_allocate_register(struct ppc_function *p)
 126 {
 127    unsigned i;
 128    for (i = 0; i < PPC_NUM_REGS; i++) {
 129       const uint64_t mask = 1 << i;
 130       if ((p->reg_used & mask) == 0) {
 131          p->reg_used |= mask;
 132          return i;
 133       }
 134    }
 135    return -1;
 136 }
 137
 138
 139 /**
 140  * Mark the given general purpose register as "unallocated".
 141  */
 142 void
 143 ppc_release_register(struct ppc_function *p, int reg)
 144 {
 145    assert(reg < PPC_NUM_REGS);
 146    assert(p->reg_used & (1 << reg));
 147    p->reg_used &= ~(1 << reg);
 148 }
 149
 150
 151 /**
 152  * Allocate a floating point register.
 153  * \return register index or -1 if none left.
 154  */
 155 int
 156 ppc_allocate_fp_register(struct ppc_function *p)
 157 {
 158    unsigned i;
 159    for (i = 0; i < PPC_NUM_FP_REGS; i++) {
 160       const uint64_t mask = 1 << i;
 161       if ((p->fp_used & mask) == 0) {
 162          p->fp_used |= mask;
 163          return i;
 164       }
 165    }
 166    return -1;
 167 }
 168
 169
 170 /**
 171  * Mark the given floating point register as "unallocated".
 172  */
 173 void
 174 ppc_release_fp_register(struct ppc_function *p, int reg)
 175 {
 176    assert(reg < PPC_NUM_FP_REGS);
 177    assert(p->fp_used & (1 << reg));
 178    p->fp_used &= ~(1 << reg);
 179 }
 180
 181
 182 /**
 183  * Allocate a vector register.
 184  * \return register index or -1 if none left.
 185  */
 186 int
 187 ppc_allocate_vec_register(struct ppc_function *p)
 188 {
 189    unsigned i;
 190    for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
 191       const uint64_t mask = 1 << i;
 192       if ((p->vec_used & mask) == 0) {
 193          p->vec_used |= mask;
 194          return i;
 195       }
 196    }
 197    return -1;
 198 }
 199
 200
 201 /**
 202  * Mark the given vector register as "unallocated".
 203  */
 204 void
 205 ppc_release_vec_register(struct ppc_function *p, int reg)
 206 {
 207    assert(reg < PPC_NUM_VEC_REGS);
 208    assert(p->vec_used & (1 << reg));
 209    p->vec_used &= ~(1 << reg);
 210 }
 211
 212
 213 /**
 214  * Append instruction to instruction buffer.  Grow buffer if out of room.
 215  */
 216 static void
 217 emit_instruction(struct ppc_function *p, uint32_t inst_bits)
 218 {
 219    if (!p->store)
 220       return;  /* out of memory, drop the instruction */
 221
 222    if (p->num_inst == p->max_inst) {
 223       /* allocate larger buffer */
 224       uint32_t *newbuf;
 225       p->max_inst *= 2;  /* 2x larger */
 226       newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
 227       if (newbuf) {
 228          memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
 229       }
 230       rtasm_exec_free(p->store);
 231       p->store = newbuf;
 232       if (!p->store) {
 233          /* out of memory */
 234          p->num_inst = 0;
 235          return;
 236       }
 237    }
 238
 239    p->store[p->num_inst++] = inst_bits;
 240 }
 241
 242
 243 union vx_inst {
 244    uint32_t bits;
 245    struct {
 246       unsigned op:6;
 247       unsigned vD:5;
 248       unsigned vA:5;
 249       unsigned vB:5;
 250       unsigned op2:11;
 251    } inst;
 252 };
 253
 254 static INLINE void
 255 emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
 256 {
 257    union vx_inst inst;
 258    inst.inst.op = 4;
 259    inst.inst.vD = vD;
 260    inst.inst.vA = vA;
 261    inst.inst.vB = vB;
 262    inst.inst.op2 = op2;
 263    emit_instruction(p, inst.bits);
 264 }
 265
 266
 267 union vxr_inst {
 268    uint32_t bits;
 269    struct {
 270       unsigned op:6;
 271       unsigned vD:5;
 272       unsigned vA:5;
 273       unsigned vB:5;
 274       unsigned rC:1;
 275       unsigned op2:10;
 276    } inst;
 277 };
 278
 279 static INLINE void
 280 emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
 281 {
 282    union vxr_inst inst;
 283    inst.inst.op = 4;
 284    inst.inst.vD = vD;
 285    inst.inst.vA = vA;
 286    inst.inst.vB = vB;
 287    inst.inst.rC = 0;
 288    inst.inst.op2 = op2;
 289    emit_instruction(p, inst.bits);
 290 }
 291
 292
 293 union va_inst {
 294    uint32_t bits;
 295    struct {
 296       unsigned op:6;
 297       unsigned vD:5;
 298       unsigned vA:5;
 299       unsigned vB:5;
 300       unsigned vC:5;
 301       unsigned op2:6;
 302    } inst;
 303 };
 304
 305 static INLINE void
 306 emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC)
 307 {
 308    union va_inst inst;
 309    inst.inst.op = 4;
 310    inst.inst.vD = vD;
 311    inst.inst.vA = vA;
 312    inst.inst.vB = vB;
 313    inst.inst.vC = vC;
 314    inst.inst.op2 = op2;
 315    emit_instruction(p, inst.bits);
 316 }
 317
 318
 319 union i_inst {
 320    uint32_t bits;
 321    struct {
 322       unsigned op:6;
 323       unsigned li:24;
 324       unsigned aa:1;
 325       unsigned lk:1;
 326    } inst;
 327 };
 328
 329 static INLINE void
 330 emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
 331 {
 332    union i_inst inst;
 333    inst.inst.op = op;
 334    inst.inst.li = li;
 335    inst.inst.aa = aa;
 336    inst.inst.lk = lk;
 337    emit_instruction(p, inst.bits);
 338 }
 339
 340
 341 union xl_inst {
 342    uint32_t bits;
 343    struct {
 344       unsigned op:6;
 345       unsigned bo:5;
 346       unsigned bi:5;
 347       unsigned unused:3;
 348       unsigned bh:2;
 349       unsigned op2:10;
 350       unsigned lk:1;
 351    } inst;
 352 };
 353
 354 static INLINE void
 355 emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
 356         uint op2, uint lk)
 357 {
 358    union xl_inst inst;
 359    inst.inst.op = op;
 360    inst.inst.bo = bo;
 361    inst.inst.bi = bi;
 362    inst.inst.unused = 0x0;
 363    inst.inst.bh = bh;
 364    inst.inst.op2 = op2;
 365    inst.inst.lk = lk;
 366    emit_instruction(p, inst.bits);
 367 }
 368
 369 static INLINE void
 370 dump_xl(const char *name, uint inst)
 371 {
 372    union xl_inst i;
 373
 374    i.bits = inst;
 375    debug_printf("%s = 0x%08x\n", name, inst);
 376    debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
 377    debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
 378    debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
 379    debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
 380    debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
 381    debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
 382    debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
 383 }
 384
 385
 386 union x_inst {
 387    uint32_t bits;
 388    struct {
 389       unsigned op:6;
 390       unsigned vrs:5;
 391       unsigned ra:5;
 392       unsigned rb:5;
 393       unsigned op2:10;
 394       unsigned unused:1;
 395    } inst;
 396 };
 397
 398 static INLINE void
 399 emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2)
 400 {
 401    union x_inst inst;
 402    inst.inst.op = op;
 403    inst.inst.vrs = vrs;
 404    inst.inst.ra = ra;
 405    inst.inst.rb = rb;
 406    inst.inst.op2 = op2;
 407    inst.inst.unused = 0x0;
 408    emit_instruction(p, inst.bits);
 409 }
 410
 411
 412 union d_inst {
 413    uint32_t bits;
 414    struct {
 415       unsigned op:6;
 416       unsigned rt:5;
 417       unsigned ra:5;
 418       unsigned si:16;
 419    } inst;
 420 };
 421
 422 static INLINE void
 423 emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si)
 424 {
 425    union d_inst inst;
 426    assert(si >= -32768);
 427    assert(si <= 32767);
 428    inst.inst.op = op;
 429    inst.inst.rt = rt;
 430    inst.inst.ra = ra;
 431    inst.inst.si = (unsigned) (si & 0xffff);
 432    emit_instruction(p, inst.bits);
 433 }
 434
 435
 436 union a_inst {
 437    uint32_t bits;
 438    struct {
 439       unsigned op:6;
 440       unsigned frt:5;
 441       unsigned fra:5;
 442       unsigned frb:5;
 443       unsigned unused:5;
 444       unsigned op2:5;
 445       unsigned rc:1;
 446    } inst;
 447 };
 448
 449 static INLINE void
 450 emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
 451        uint rc)
 452 {
 453    union a_inst inst;
 454    inst.inst.op = op;
 455    inst.inst.frt = frt;
 456    inst.inst.fra = fra;
 457    inst.inst.frb = frb;
 458    inst.inst.unused = 0x0;
 459    inst.inst.op2 = op2;
 460    inst.inst.rc = rc;
 461    emit_instruction(p, inst.bits);
 462 }
 463
 464
 465 union xo_inst {
 466    uint32_t bits;
 467    struct {
 468       unsigned op:6;
 469       unsigned rt:5;
 470       unsigned ra:5;
 471       unsigned rb:5;
 472       unsigned oe:1;
 473       unsigned op2:9;
 474       unsigned rc:1;
 475    } inst;
 476 };
 477
 478 static INLINE void
 479 emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
 480         uint op2, uint rc)
 481 {
 482    union xo_inst inst;
 483    inst.inst.op = op;
 484    inst.inst.rt = rt;
 485    inst.inst.ra = ra;
 486    inst.inst.rb = rb;
 487    inst.inst.oe = oe;
 488    inst.inst.op2 = op2;
 489    inst.inst.rc = rc;
 490    emit_instruction(p, inst.bits);
 491 }
 492
 493
 494
 495
 496
 497 /**
 498  ** float vector arithmetic
 499  **/
 500
 501 /** vector float add */
 502 void
 503 ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 504 {
 505    emit_vx(p, 10, vD, vA, vB);
 506 }
 507
 508 /** vector float substract */
 509 void
 510 ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 511 {
 512    emit_vx(p, 74, vD, vA, vB);
 513 }
 514
 515 /** vector float min */
 516 void
 517 ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 518 {
 519    emit_vx(p, 1098, vD, vA, vB);
 520 }
 521
 522 /** vector float max */
 523 void
 524 ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 525 {
 526    emit_vx(p, 1034, vD, vA, vB);
 527 }
 528
 529 /** vector float mult add: vD = vA * vB + vC */
 530 void
 531 ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 532 {
 533    emit_va(p, 46, vD, vA, vC, vB); /* note arg order */
 534 }
 535
 536 /** vector float negative mult subtract: vD = vA - vB * vC */
 537 void
 538 ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 539 {
 540    emit_va(p, 47, vD, vB, vA, vC); /* note arg order */
 541 }
 542
 543 /** vector float compare greater than */
 544 void
 545 ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 546 {
 547    emit_vxr(p, 710, vD, vA, vB);
 548 }
 549
 550 /** vector float compare greater than or equal to */
 551 void
 552 ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 553 {
 554    emit_vxr(p, 454, vD, vA, vB);
 555 }
 556
 557 /** vector float compare equal */
 558 void
 559 ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 560 {
 561    emit_vxr(p, 198, vD, vA, vB);
 562 }
 563
 564 /** vector float 2^x */
 565 void
 566 ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
 567 {
 568    emit_vx(p, 394, vD, 0, vB);
 569 }
 570
 571 /** vector float log2(x) */
 572 void
 573 ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
 574 {
 575    emit_vx(p, 458, vD, 0, vB);
 576 }
 577
 578 /** vector float reciprocol */
 579 void
 580 ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
 581 {
 582    emit_vx(p, 266, vD, 0, vB);
 583 }
 584
 585 /** vector float reciprocol sqrt estimate */
 586 void
 587 ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
 588 {
 589    emit_vx(p, 330, vD, 0, vB);
 590 }
 591
 592 /** vector float round to negative infinity */
 593 void
 594 ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
 595 {
 596    emit_vx(p, 714, vD, 0, vB);
 597 }
 598
 599 /** vector float round to positive infinity */
 600 void
 601 ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
 602 {
 603    emit_vx(p, 650, vD, 0, vB);
 604 }
 605
 606 /** vector float round to nearest int */
 607 void
 608 ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
 609 {
 610    emit_vx(p, 522, vD, 0, vB);
 611 }
 612
 613 /** vector float round to int toward zero */
 614 void
 615 ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
 616 {
 617    emit_vx(p, 586, vD, 0, vB);
 618 }
 619
 620 /** vector store: store vR at mem[vA+vB] */
 621 void
 622 ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB)
 623 {
 624    emit_x(p, 31, vR, vA, vB, 231);
 625 }
 626
 627 /** vector load: vR = mem[vA+vB] */
 628 void
 629 ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB)
 630 {
 631    emit_x(p, 31, vR, vA, vB, 103);
 632 }
 633
 634 /** load vector element word: vR = mem_word[ra+rb] */
 635 void
 636 ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb)
 637 {
 638    emit_x(p, 31, vr, ra, rb, 71);
 639 }
 640
 641
 642
 643
 644 /**
 645  ** vector bitwise operations
 646  **/
 647
 648 /** vector and */
 649 void
 650 ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
 651 {
 652    emit_vx(p, 1028, vD, vA, vB);
 653 }
 654
 655 /** vector and complement */
 656 void
 657 ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
 658 {
 659    emit_vx(p, 1092, vD, vA, vB);
 660 }
 661
 662 /** vector or */
 663 void
 664 ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
 665 {
 666    emit_vx(p, 1156, vD, vA, vB);
 667 }
 668
 669 /** vector nor */
 670 void
 671 ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
 672 {
 673    emit_vx(p, 1284, vD, vA, vB);
 674 }
 675
 676 /** vector xor */
 677 void
 678 ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
 679 {
 680    emit_vx(p, 1220, vD, vA, vB);
 681 }
 682
 683 /** Pseudo-instruction: vector move */
 684 void
 685 ppc_vmove(struct ppc_function *p, uint vD, uint vA)
 686 {
 687    ppc_vor(p, vD, vA, vA);
 688 }
 689
 690 /** Set vector register to {0,0,0,0} */
 691 void
 692 ppc_vzero(struct ppc_function *p, uint vr)
 693 {
 694    ppc_vxor(p, vr, vr, vr);
 695 }
 696
 697
 698
 699
 700 /**
 701  ** Vector shuffle / select / splat / etc
 702  **/
 703
 704 /** vector permute */
 705 void
 706 ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 707 {
 708    emit_va(p, 43, vD, vA, vB, vC);
 709 }
 710
 711 /** vector select */
 712 void
 713 ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 714 {
 715    emit_va(p, 42, vD, vA, vB, vC);
 716 }
 717
 718 /** vector splat byte */
 719 void
 720 ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
 721 {
 722    emit_vx(p, 42, vD, imm, vB);
 723 }
 724
 725 /** vector splat half word */
 726 void
 727 ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
 728 {
 729    emit_vx(p, 588, vD, imm, vB);
 730 }
 731
 732 /** vector splat word */
 733 void
 734 ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
 735 {
 736    emit_vx(p, 652, vD, imm, vB);
 737 }
 738
 739 /** vector splat signed immediate word */
 740 void
 741 ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
 742 {
 743    assert(imm >= -16);
 744    assert(imm < 15);
 745    emit_vx(p, 908, vD, imm, 0);
 746 }
 747
 748 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
 749 void
 750 ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
 751 {
 752    emit_vx(p, 388, vD, vA, vB);
 753 }
 754
 755
 756
 757
 758 /**
 759  ** integer arithmetic
 760  **/
 761
 762 /** rt = ra + imm */
 763 void
 764 ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
 765 {
 766    emit_d(p, 14, rt, ra, imm);
 767 }
 768
 769 /** rt = ra + (imm << 16) */
 770 void
 771 ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
 772 {
 773    emit_d(p, 15, rt, ra, imm);
 774 }
 775
 776 /** rt = ra + rb */
 777 void
 778 ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
 779 {
 780    emit_xo(p, 31, rt, ra, rb, 0, 266, 0);
 781 }
 782
 783 /** rt = ra AND ra */
 784 void
 785 ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
 786 {
 787    emit_x(p, 31, ra, rt, rb, 28);  /* note argument order */
 788 }
 789
 790 /** rt = ra AND imm */
 791 void
 792 ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
 793 {
 794    emit_d(p, 28, ra, rt, imm);  /* note argument order */
 795 }
 796
 797 /** rt = ra OR ra */
 798 void
 799 ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
 800 {
 801    emit_x(p, 31, ra, rt, rb, 444);  /* note argument order */
 802 }
 803
 804 /** rt = ra OR imm */
 805 void
 806 ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
 807 {
 808    emit_d(p, 24, ra, rt, imm);  /* note argument order */
 809 }
 810
 811 /** rt = ra XOR ra */
 812 void
 813 ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
 814 {
 815    emit_x(p, 31, ra, rt, rb, 316);  /* note argument order */
 816 }
 817
 818 /** rt = ra XOR imm */
 819 void
 820 ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
 821 {
 822    emit_d(p, 26, ra, rt, imm);  /* note argument order */
 823 }
 824
 825 /** pseudo instruction: move: rt = ra */
 826 void
 827 ppc_mr(struct ppc_function *p, uint rt, uint ra)
 828 {
 829    ppc_or(p, rt, ra, ra);
 830 }
 831
 832 /** pseudo instruction: load immediate: rt = imm */
 833 void
 834 ppc_li(struct ppc_function *p, uint rt, int imm)
 835 {
 836    ppc_addi(p, rt, 0, imm);
 837 }
 838
 839 /** rt = imm << 16 */
 840 void
 841 ppc_lis(struct ppc_function *p, uint rt, int imm)
 842 {
 843    ppc_addis(p, rt, 0, imm);
 844 }
 845
 846 /** rt = imm */
 847 void
 848 ppc_load_int(struct ppc_function *p, uint rt, int imm)
 849 {
 850    ppc_lis(p, rt, (imm >> 16));          /* rt = imm >> 16 */
 851    ppc_ori(p, rt, rt, (imm & 0xffff));   /* rt = rt | (imm & 0xffff) */
 852 }
 853
 854
 855
 856
 857 /**
 858  ** integer load/store
 859  **/
 860
 861 /** store rs at memory[(ra)+d],
 862  * then update ra = (ra)+d
 863  */
 864 void
 865 ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
 866 {
 867    emit_d(p, 37, rs, ra, d);
 868 }
 869
 870 /** store rs at memory[(ra)+d] */
 871 void
 872 ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
 873 {
 874    emit_d(p, 36, rs, ra, d);
 875 }
 876
 877 /** Load rt = mem[(ra)+d];  then zero set high 32 bits to zero. */
 878 void
 879 ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
 880 {
 881    emit_d(p, 32, rt, ra, d);
 882 }
 883
 884
 885
 886 /**
 887  ** Float (non-vector) arithmetic
 888  **/
 889
 890 /** add: frt = fra + frb */
 891 void
 892 ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
 893 {
 894    emit_a(p, 63, frt, fra, frb, 21, 0);
 895 }
 896
 897 /** sub: frt = fra - frb */
 898 void
 899 ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
 900 {
 901    emit_a(p, 63, frt, fra, frb, 20, 0);
 902 }
 903
 904 /** convert to int: rt = (int) ra */
 905 void
 906 ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
 907 {
 908    emit_x(p, 63, rt, 0, fra, 15);
 909 }
 910
 911 /** store frs at mem[(ra)+offset] */
 912 void
 913 ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
 914 {
 915    emit_d(p, 52, frs, ra, offset);
 916 }
 917
 918 /** store frs at mem[(ra)+(rb)] */
 919 void
 920 ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
 921 {
 922    emit_x(p, 31, frs, ra, rb, 983);
 923 }
 924
 925 /** load frt = mem[(ra)+offset] */
 926 void
 927 ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
 928 {
 929    emit_d(p, 48, frt, ra, offset);
 930 }
 931
 932
 933
 934
 935
 936 /**
 937  ** branch instructions
 938  **/
 939
 940 /** BLR: Branch to link register (p. 35) */
 941 void
 942 ppc_blr(struct ppc_function *p)
 943 {
 944    emit_i(p, 18, 0, 0, 1);
 945 }
 946
 947 /** Branch Conditional to Link Register (p. 36) */
 948 void
 949 ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
 950 {
 951    emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
 952 }
 953
 954 /** Pseudo instruction: return from subroutine */
 955 void
 956 ppc_return(struct ppc_function *p)
 957 {
 958    ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
 959 }