src/gallium/drivers/nvc0/nvc0_pc_optimize.c

   1 /*
   2  * Copyright 2010 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nvc0_pc.h"
  24 #include "nvc0_program.h"
  25
  26 #define DESCEND_ARBITRARY(j, f)                                 \
  27 do {                                                            \
  28    b->pass_seq = ctx->pc->pass_seq;                             \
  29                                                                 \
  30    for (j = 0; j < 2; ++j)                                      \
  31       if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
  32          f(ctx, b->out[j]);                                       \
  33 } while (0)
  34
  35 static INLINE boolean
  36 registers_interfere(struct nv_value *a, struct nv_value *b)
  37 {
  38    if (a->reg.file != b->reg.file)
  39       return FALSE;
  40    if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
  41       return FALSE;
  42
  43    assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
  44
  45    if (a->join->reg.id < b->join->reg.id) {
  46       return (a->join->reg.id + a->reg.size >= b->join->reg.id);
  47    } else
  48    if (a->join->reg.id > b->join->reg.id) {
  49       return (b->join->reg.id + b->reg.size >= a->join->reg.id);
  50    }
  51
  52    return FALSE;
  53 }
  54
  55 static INLINE boolean
  56 values_equal(struct nv_value *a, struct nv_value *b)
  57 {
  58    if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
  59       return FALSE;
  60    if (NV_IS_MEMORY_FILE(a->reg.file))
  61       return a->reg.address == b->reg.address;
  62    else
  63       return a->join->reg.id == b->join->reg.id;
  64 }
  65
  66 #if 0
  67 static INLINE boolean
  68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
  69 {
  70    int si, di;
  71
  72    for (di = 0; di < 4 && a->def[di]; ++di)
  73       for (si = 0; si < 5 && b->src[si]; ++si)
  74          if (registers_interfere(a->def[di], b->src[si]->value))
  75             return FALSE;
  76
  77    return TRUE;
  78 }
  79
  80 /* Check whether we can swap the order of the instructions,
  81  * where a & b may be either the earlier or the later one.
  82  */
  83 static boolean
  84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
  85 {
  86    return inst_commutation_check(a, b) && inst_commutation_check(b, a);
  87 }
  88 #endif
  89
  90 static INLINE boolean
  91 inst_removable(struct nv_instruction *nvi)
  92 {
  93    if (nvi->opcode == NV_OP_ST)
  94       return FALSE;
  95    return (!(nvi->terminator ||
  96              nvi->join ||
  97              nvi->target ||
  98              nvi->fixed ||
  99              nvc0_insn_refcount(nvi)));
 100 }
 101
 102 /* Check if we do not actually have to emit this instruction. */
 103 static INLINE boolean
 104 inst_is_noop(struct nv_instruction *nvi)
 105 {
 106    if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
 107       return TRUE;
 108    if (nvi->terminator || nvi->join)
 109       return FALSE;
 110    if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
 111       return TRUE;
 112    if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
 113       return FALSE;
 114    if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
 115       return FALSE;
 116
 117    if (nvi->src[0]->value->join->reg.id < 0) {
 118       NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
 119       return TRUE;
 120    }
 121
 122    if (nvi->opcode == NV_OP_SELECT)
 123       if (!values_equal(nvi->def[0], nvi->src[1]->value))
 124          return FALSE;
 125    return values_equal(nvi->def[0], nvi->src[0]->value);
 126 }
 127
 128 struct nv_pass {
 129    struct nv_pc *pc;
 130    int n;
 131    void *priv;
 132 };
 133
 134 static int
 135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
 136
 137 static void
 138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
 139 {
 140    struct nv_pc *pc = (struct nv_pc *)priv;
 141    struct nv_basic_block *in;
 142    struct nv_instruction *nvi, *next;
 143    int j;
 144
 145    for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
 146
 147    if (j >= 0) {
 148       in = pc->bb_list[j];
 149
 150       /* check for no-op branches (BRA $PC+8) */
 151       if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
 152          in->emit_size -= 8;
 153          pc->emit_size -= 8;
 154
 155          for (++j; j < pc->num_blocks; ++j)
 156             pc->bb_list[j]->emit_pos -= 8;
 157
 158          nvc0_insn_delete(in->exit);
 159       }
 160       b->emit_pos = in->emit_pos + in->emit_size;
 161    }
 162
 163    pc->bb_list[pc->num_blocks++] = b;
 164
 165    /* visit node */
 166
 167    for (nvi = b->entry; nvi; nvi = next) {
 168       next = nvi->next;
 169       if (inst_is_noop(nvi) ||
 170           (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
 171          nvc0_insn_delete(nvi);
 172       } else
 173          b->emit_size += 8;
 174    }
 175    pc->emit_size += b->emit_size;
 176
 177 #ifdef NOUVEAU_DEBUG
 178    if (!b->entry)
 179       debug_printf("BB:%i is now empty\n", b->id);
 180    else
 181       debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
 182 #endif
 183 }
 184
 185 static int
 186 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
 187 {
 188    struct nv_pass pass;
 189
 190    pass.pc = pc;
 191
 192    pc->pass_seq++;
 193    nv_pass_flatten(&pass, root);
 194
 195    nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
 196
 197    return 0;
 198 }
 199
 200 int
 201 nvc0_pc_exec_pass2(struct nv_pc *pc)
 202 {
 203    int i, ret;
 204
 205    NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
 206
 207    pc->num_blocks = 0; /* will reorder bb_list */
 208
 209    for (i = 0; i < pc->num_subroutines + 1; ++i)
 210       if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
 211          return ret;
 212    return 0;
 213 }
 214
 215 static INLINE boolean
 216 is_cspace_load(struct nv_instruction *nvi)
 217 {
 218    if (!nvi)
 219       return FALSE;
 220    assert(nvi->indirect != 0);
 221    return (nvi->opcode == NV_OP_LD &&
 222            nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
 223            nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
 224 }
 225
 226 static INLINE boolean
 227 is_immd32_load(struct nv_instruction *nvi)
 228 {
 229    if (!nvi)
 230       return FALSE;
 231    return (nvi->opcode == NV_OP_MOV &&
 232            nvi->src[0]->value->reg.file == NV_FILE_IMM &&
 233            nvi->src[0]->value->reg.size == 4);
 234 }
 235
 236 static INLINE void
 237 check_swap_src_0_1(struct nv_instruction *nvi)
 238 {
 239    static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 240
 241    struct nv_ref *src0 = nvi->src[0];
 242    struct nv_ref *src1 = nvi->src[1];
 243
 244    if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET)
 245       return;
 246    assert(src0 && src1 && src0->value && src1->value);
 247
 248    if (src1->value->reg.file != NV_FILE_GPR)
 249       return;
 250
 251    if (is_cspace_load(src0->value->insn)) {
 252       if (!is_cspace_load(src1->value->insn)) {
 253          nvi->src[0] = src1;
 254          nvi->src[1] = src0;
 255       }
 256    } else
 257    if (is_immd32_load(src0->value->insn)) {
 258       if (!is_cspace_load(src1->value->insn) &&
 259           !is_immd32_load(src1->value->insn)) {
 260          nvi->src[0] = src1;
 261          nvi->src[1] = src0;
 262       }
 263    }
 264
 265    if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
 266       nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
 267 }
 268
 269 static void
 270 nvi_set_indirect_load(struct nv_pc *pc,
 271                       struct nv_instruction *nvi, struct nv_value *val)
 272 {
 273    for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
 274         ++nvi->indirect);
 275    assert(nvi->indirect < 6);
 276    nv_reference(pc, nvi, nvi->indirect, val);
 277 }
 278
 279 static int
 280 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
 281 {
 282    struct nv_instruction *nvi, *ld;
 283    int s;
 284
 285    for (nvi = b->entry; nvi; nvi = nvi->next) {
 286       check_swap_src_0_1(nvi);
 287
 288       for (s = 0; s < 3 && nvi->src[s]; ++s) {
 289          ld = nvi->src[s]->value->insn;
 290          if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
 291             continue;
 292          if (!nvc0_insn_can_load(nvi, s, ld))
 293             continue;
 294
 295          /* fold it ! */
 296          nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
 297          if (ld->indirect >= 0)
 298             nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
 299
 300          if (!nvc0_insn_refcount(ld))
 301             nvc0_insn_delete(ld);
 302       }
 303    }
 304    DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
 305
 306    return 0;
 307 }
 308
 309 /* NOTE: Assumes loads have not yet been folded. */
 310 static int
 311 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
 312 {
 313    struct nv_instruction *nvi, *mi, *next;
 314    int j;
 315    uint8_t mod;
 316
 317    for (nvi = b->entry; nvi; nvi = next) {
 318       next = nvi->next;
 319       if (nvi->opcode == NV_OP_SUB) {
 320          nvi->src[1]->mod ^= NV_MOD_NEG;
 321          nvi->opcode = NV_OP_ADD;
 322       }
 323
 324       for (j = 0; j < 3 && nvi->src[j]; ++j) {
 325          mi = nvi->src[j]->value->insn;
 326          if (!mi)
 327             continue;
 328          if (mi->def[0]->refc > 1 || mi->predicate >= 0)
 329             continue;
 330
 331          if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
 332          else
 333          if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
 334          else
 335             continue;
 336          assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
 337
 338          mod |= mi->src[0]->mod;
 339
 340          if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
 341             /* abs neg [abs] = abs */
 342             mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
 343          } else
 344          if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
 345             /* neg as opcode and modifier on same insn cannot occur */
 346             /* neg neg abs = abs, neg neg = identity */
 347             assert(j == 0);
 348             if (mod & NV_MOD_ABS)
 349                nvi->opcode = NV_OP_ABS;
 350             else
 351                nvi->opcode = NV_OP_MOV;
 352             mod = 0;
 353          }
 354
 355          if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
 356             continue;
 357
 358          nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
 359
 360          nvi->src[j]->mod ^= mod;
 361       }
 362
 363       if (nvi->opcode == NV_OP_SAT) {
 364          mi = nvi->src[0]->value->insn;
 365
 366          if (mi->def[0]->refc > 1 ||
 367              (mi->opcode != NV_OP_ADD &&
 368               mi->opcode != NV_OP_MUL &&
 369               mi->opcode != NV_OP_MAD))
 370             continue;
 371          mi->saturate = 1;
 372          mi->def[0] = nvi->def[0];
 373          mi->def[0]->insn = mi;
 374          nvc0_insn_delete(nvi);
 375       }
 376    }
 377    DESCEND_ARBITRARY(j, nv_pass_lower_mods);
 378
 379    return 0;
 380 }
 381
 382 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
 383
 384 static void
 385 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
 386 {
 387    if (mod & NV_MOD_ABS) {
 388       if (type == NV_TYPE_F32)
 389          *val &= 0x7fffffff;
 390       else
 391       if ((*val) & (1 << 31))
 392          *val = ~(*val) + 1;
 393    }
 394    if (mod & NV_MOD_NEG) {
 395       if (type == NV_TYPE_F32)
 396          *val ^= 0x80000000;
 397       else
 398          *val = ~(*val) + 1;
 399    }
 400    if (mod & NV_MOD_SAT) {
 401       union {
 402          float f;
 403          uint32_t u;
 404          int32_t i;
 405       } u;
 406       u.u = *val;
 407       if (type == NV_TYPE_F32) {
 408          u.f = CLAMP(u.f, -1.0f, 1.0f);
 409       } else
 410       if (type == NV_TYPE_U16) {
 411          u.u = MIN2(u.u, 0xffff);
 412       } else
 413       if (type == NV_TYPE_S16) {
 414          u.i = CLAMP(u.i, -32768, 32767);
 415       }
 416       *val = u.u;
 417    }
 418    if (mod & NV_MOD_NOT)
 419       *val = ~*val;
 420 }
 421
 422 static void
 423 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
 424                     struct nv_value *src0, struct nv_value *src1)
 425 {
 426    struct nv_value *val;
 427    union {
 428       float f32;
 429       uint32_t u32;
 430       int32_t s32;
 431    } u0, u1, u;
 432    ubyte type;
 433
 434    if (!nvi->def[0])
 435       return;
 436    type = NV_OPTYPE(nvi->opcode);
 437
 438    u.u32 = 0;
 439    u0.u32 = src0->reg.imm.u32;
 440    u1.u32 = src1->reg.imm.u32;
 441
 442    apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
 443    apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
 444
 445    switch (nvi->opcode) {
 446    case NV_OP_MAD_F32:
 447       if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
 448          return;
 449       /* fall through */
 450    case NV_OP_MUL_F32:
 451       u.f32 = u0.f32 * u1.f32;
 452       break;
 453    case NV_OP_MUL_B32:
 454       u.u32 = u0.u32 * u1.u32;
 455       break;
 456    case NV_OP_ADD_F32:
 457       u.f32 = u0.f32 + u1.f32;
 458       break;
 459    case NV_OP_ADD_B32:
 460       u.u32 = u0.u32 + u1.u32;
 461       break;
 462    case NV_OP_SUB_F32:
 463       u.f32 = u0.f32 - u1.f32;
 464       break;
 465       /*
 466    case NV_OP_SUB_B32:
 467       u.u32 = u0.u32 - u1.u32;
 468       break;
 469       */
 470    default:
 471       return;
 472    }
 473
 474    val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
 475    val->reg.imm.u32 = u.u32;
 476
 477    nv_reference(pc, nvi, 1, NULL);
 478    nv_reference(pc, nvi, 0, val);
 479
 480    if (nvi->opcode == NV_OP_MAD_F32) {
 481       nvi->src[1] = nvi->src[0];
 482       nvi->src[0] = nvi->src[2];
 483       nvi->src[2] = NULL;
 484       nvi->opcode = NV_OP_ADD_F32;
 485
 486       if (val->reg.imm.u32 == 0) {
 487          nvi->src[1] = NULL;
 488          nvi->opcode = NV_OP_MOV;
 489       }
 490    } else {
 491       nvi->opcode = NV_OP_MOV;
 492    }
 493 }
 494
 495 static void
 496 constant_operand(struct nv_pc *pc,
 497                  struct nv_instruction *nvi, struct nv_value *val, int s)
 498 {
 499    union {
 500       float f32;
 501       uint32_t u32;
 502       int32_t s32;
 503    } u;
 504    int shift;
 505    int t = s ? 0 : 1;
 506    uint op;
 507    ubyte type;
 508
 509    if (!nvi->def[0])
 510       return;
 511    type = NV_OPTYPE(nvi->opcode);
 512
 513    u.u32 = val->reg.imm.u32;
 514    apply_modifiers(&u.u32, type, nvi->src[s]->mod);
 515
 516    if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
 517       nvi->opcode = NV_OP_MOV;
 518       nv_reference(pc, nvi, t, NULL);
 519       if (s) {
 520          nvi->src[0] = nvi->src[1];
 521          nvi->src[1] = NULL;
 522       }
 523       return;
 524    }
 525
 526    switch (nvi->opcode) {
 527    case NV_OP_MUL_F32:
 528       if (u.f32 == 1.0f || u.f32 == -1.0f) {
 529          if (u.f32 == -1.0f)
 530             nvi->src[t]->mod ^= NV_MOD_NEG;
 531          switch (nvi->src[t]->mod) {
 532          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 533          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 534          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 535          default:
 536             return;
 537          }
 538          nvi->opcode = op;
 539          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 540          nv_reference(pc, nvi, 1, NULL);
 541          nvi->src[0]->mod = 0;
 542       } else
 543       if (u.f32 == 2.0f || u.f32 == -2.0f) {
 544          if (u.f32 == -2.0f)
 545             nvi->src[t]->mod ^= NV_MOD_NEG;
 546          nvi->opcode = NV_OP_ADD_F32;
 547          nv_reference(pc, nvi, s, nvi->src[t]->value);
 548          nvi->src[s]->mod = nvi->src[t]->mod;
 549       }
 550       break;
 551    case NV_OP_ADD_F32:
 552       if (u.u32 == 0) {
 553          switch (nvi->src[t]->mod) {
 554          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 555          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 556          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 557          case NV_MOD_NEG | NV_MOD_ABS:
 558             op = NV_OP_CVT;
 559             nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 560             break;
 561          default:
 562             return;
 563          }
 564          nvi->opcode = op;
 565          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 566          nv_reference(pc, nvi, 1, NULL);
 567          if (nvi->opcode != NV_OP_CVT)
 568             nvi->src[0]->mod = 0;
 569       }
 570       break;
 571    case NV_OP_ADD_B32:
 572       if (u.u32 == 0) {
 573          assert(nvi->src[t]->mod == 0);
 574          nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
 575          nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 576          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 577          nv_reference(pc, nvi, 1, NULL);
 578       }
 579       break;
 580    case NV_OP_MUL_B32:
 581       /* multiplication by 0 already handled above */
 582       assert(nvi->src[s]->mod == 0);
 583       shift = ffs(u.s32) - 1;
 584       if (shift == 0) {
 585          nvi->opcode = NV_OP_MOV;
 586          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 587          nv_reference(pc, nvi, 1, NULL);
 588       } else
 589       if (u.s32 > 0 && u.s32 == (1 << shift)) {
 590          nvi->opcode = NV_OP_SHL;
 591          (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
 592          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 593          nv_reference(pc, nvi, 1, val);
 594          break;
 595       }
 596       break;
 597    case NV_OP_RCP:
 598       u.f32 = 1.0f / u.f32;
 599       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 600       nvi->opcode = NV_OP_MOV;
 601       assert(s == 0);
 602       nv_reference(pc, nvi, 0, val);
 603       break;
 604    case NV_OP_RSQ:
 605       u.f32 = 1.0f / sqrtf(u.f32);
 606       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 607       nvi->opcode = NV_OP_MOV;
 608       assert(s == 0);
 609       nv_reference(pc, nvi, 0, val);
 610       break;
 611    default:
 612       break;
 613    }
 614 }
 615
 616 static void
 617 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
 618 {
 619    struct nv_value *src0 = nvi->src[0]->value;
 620    struct nv_value *src1 = nvi->src[1]->value;
 621
 622    if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
 623       return;
 624    if (src0->reg.file != NV_FILE_GPR)
 625       return;
 626    nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
 627    nvc0_insn_delete(nvi);
 628 }
 629
 630 /* check if we can MUL + ADD -> MAD/FMA */
 631 static void
 632 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
 633 {
 634    struct nv_value *src0 = nvi->src[0]->value;
 635    struct nv_value *src1 = nvi->src[1]->value;
 636    struct nv_value *src;
 637    int s;
 638    uint8_t mod[4];
 639
 640    if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
 641    else
 642    if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
 643    else
 644       return;
 645
 646    if ((src0->insn && src0->insn->bb != nvi->bb) ||
 647        (src1->insn && src1->insn->bb != nvi->bb))
 648       return;
 649
 650    /* check for immediates from prior constant folding */
 651    if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
 652       return;
 653    src = nvi->src[s]->value;
 654
 655    mod[0] = nvi->src[0]->mod;
 656    mod[1] = nvi->src[1]->mod;
 657    mod[2] = src->insn->src[0]->mod;
 658    mod[3] = src->insn->src[1]->mod;
 659
 660    if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
 661       return;
 662
 663    nvi->opcode = NV_OP_MAD_F32;
 664
 665    nv_reference(ctx->pc, nvi, s, NULL);
 666    nvi->src[2] = nvi->src[!s];
 667    nvi->src[!s] = NULL;
 668
 669    nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
 670    nvi->src[0]->mod = mod[2] ^ mod[s];
 671    nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
 672    nvi->src[1]->mod = mod[3];
 673 }
 674
 675 static int
 676 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
 677 {
 678    struct nv_instruction *nvi, *next;
 679    int j;
 680
 681    for (nvi = b->entry; nvi; nvi = next) {
 682       struct nv_value *src0, *src1;
 683       uint baseop = NV_BASEOP(nvi->opcode);
 684
 685       next = nvi->next;
 686
 687       src0 = nvc0_pc_find_immediate(nvi->src[0]);
 688       src1 = nvc0_pc_find_immediate(nvi->src[1]);
 689
 690       if (src0 && src1) {
 691          constant_expression(ctx->pc, nvi, src0, src1);
 692       } else {
 693          if (src0)
 694             constant_operand(ctx->pc, nvi, src0, 0);
 695          else
 696          if (src1)
 697             constant_operand(ctx->pc, nvi, src1, 1);
 698       }
 699
 700       if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
 701          handle_min_max(ctx, nvi);
 702       else
 703       if (nvi->opcode == NV_OP_ADD_F32)
 704          handle_add_mul(ctx, nvi);
 705    }
 706    DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
 707
 708    return 0;
 709 }
 710
 711 /* TODO: redundant store elimination */
 712
 713 struct mem_record {
 714    struct mem_record *next;
 715    struct nv_instruction *insn;
 716    uint32_t ofst;
 717    uint32_t base;
 718    uint32_t size;
 719 };
 720
 721 #define MEM_RECORD_POOL_SIZE 1024
 722
 723 struct pass_reld_elim {
 724    struct nv_pc *pc;
 725
 726    struct mem_record *imm;
 727    struct mem_record *mem_v;
 728    struct mem_record *mem_a;
 729    struct mem_record *mem_c[16];
 730    struct mem_record *mem_l;
 731
 732    struct mem_record pool[MEM_RECORD_POOL_SIZE];
 733    int alloc;
 734 };
 735
 736 /* Extend the load operation in @rec to also cover the data loaded by @ld.
 737  * The two loads may not overlap but reference adjacent memory locations.
 738  */
 739 static void
 740 combine_load(struct nv_pc *pc, struct mem_record *rec,
 741              struct nv_instruction *ld)
 742 {
 743    struct nv_instruction *fv = rec->insn;
 744    struct nv_value *mem = ld->src[0]->value;
 745    uint32_t size = rec->size + mem->reg.size;
 746    int j;
 747    int d = rec->size / 4;
 748
 749    assert(rec->size < 16);
 750    if (rec->ofst > mem->reg.address) {
 751       if ((size == 8 && mem->reg.address & 3) ||
 752           (size > 8 && mem->reg.address & 7))
 753          return;
 754       rec->ofst = mem->reg.address;
 755       for (j = 0; j < d; ++j)
 756          fv->def[mem->reg.size / 4 + j] = fv->def[j];
 757       d = 0;
 758    } else
 759    if ((size == 8 && rec->ofst & 3) ||
 760        (size > 8 && rec->ofst & 7)) {
 761       return;
 762    }
 763
 764    for (j = 0; j < mem->reg.size / 4; ++j) {
 765       fv->def[d] = ld->def[j];
 766       fv->def[d++]->insn = fv;
 767    }
 768
 769    if (fv->src[0]->value->refc > 1)
 770       nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
 771    fv->src[0]->value->reg.address = rec->ofst;
 772    fv->src[0]->value->reg.size = rec->size = size;
 773
 774    nvc0_insn_delete(ld);
 775 }
 776
 777 static void
 778 combine_export(struct mem_record *rec, struct nv_instruction *ex)
 779 {
 780
 781 }
 782
 783 static INLINE void
 784 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
 785                uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
 786 {
 787    struct mem_record *it = &ctx->pool[ctx->alloc++];
 788
 789    it->next = *rec;
 790    *rec = it;
 791    it->base = base;
 792    it->ofst = ofst;
 793    it->insn = nvi;
 794    it->size = nvi->src[0]->value->reg.size;
 795 }
 796
 797 /* vectorize and reuse loads from memory or of immediates */
 798 static int
 799 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 800 {
 801    struct mem_record **rec, *it;
 802    struct nv_instruction *ld, *next;
 803    struct nv_value *mem;
 804    uint32_t base, ofst;
 805    int s;
 806
 807    for (ld = b->entry; ld; ld = next) {
 808       next = ld->next;
 809
 810       if (is_cspace_load(ld)) {
 811          mem = ld->src[0]->value;
 812          rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
 813       } else
 814       if (ld->opcode == NV_OP_VFETCH) {
 815          mem = ld->src[0]->value;
 816          rec = &ctx->mem_a;
 817       } else
 818       if (ld->opcode == NV_OP_EXPORT) {
 819          mem = ld->src[0]->value;
 820          if (mem->reg.file != NV_FILE_MEM_V)
 821             continue;
 822          rec = &ctx->mem_v;
 823       } else {
 824          continue;
 825       }
 826       if (ld->def[0] && ld->def[0]->refc == 0)
 827          continue;
 828       ofst = mem->reg.address;
 829       base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
 830
 831       for (it = *rec; it; it = it->next) {
 832          if (it->base == base &&
 833              ((it->ofst >> 4) == (ofst >> 4)) &&
 834              ((it->ofst + it->size == ofst) ||
 835               (it->ofst - mem->reg.size == ofst))) {
 836             /* only NV_OP_VFETCH can load exactly 12 bytes */
 837             if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
 838                continue;
 839             if (it->ofst < ofst) {
 840                if ((it->ofst & 0xf) == 4)
 841                   continue;
 842             } else
 843             if ((ofst & 0xf) == 4)
 844                continue;
 845             break;
 846          }
 847       }
 848       if (it) {
 849          switch (ld->opcode) {
 850          case NV_OP_EXPORT: combine_export(it, ld); break;
 851          default:
 852             combine_load(ctx->pc, it, ld);
 853             break;
 854          }
 855       } else
 856       if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
 857          add_mem_record(ctx, rec, base, ofst, ld);
 858       }
 859    }
 860
 861    ctx->alloc = 0;
 862    ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
 863    for (s = 0; s < 16; ++s)
 864       ctx->mem_c[s] = NULL;
 865
 866    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 867    return 0;
 868 }
 869
 870 static void
 871 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
 872 {
 873 }
 874
 875 /* elimination of redundant stores */
 876 static int
 877 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 878 {
 879    struct mem_record **rec, *it;
 880    struct nv_instruction *st, *next;
 881    struct nv_value *mem;
 882    uint32_t base, ofst, size;
 883    int s;
 884
 885    for (st = b->entry; st; st = next) {
 886       next = st->next;
 887
 888       if (st->opcode == NV_OP_ST) {
 889          mem = st->src[0]->value;
 890          rec = &ctx->mem_l;
 891       } else
 892       if (st->opcode == NV_OP_EXPORT) {
 893          mem = st->src[0]->value;
 894          if (mem->reg.file != NV_FILE_MEM_V)
 895             continue;
 896          rec = &ctx->mem_v;
 897       } else
 898       if (st->opcode == NV_OP_ST) {
 899          /* TODO: purge */
 900       }
 901       ofst = mem->reg.address;
 902       base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
 903       size = mem->reg.size;
 904
 905       for (it = *rec; it; it = it->next) {
 906          if (it->base == base &&
 907              (it->ofst <= ofst && (it->ofst + size) > ofst))
 908             break;
 909       }
 910       if (it)
 911          eliminate_store(it, st);
 912       else
 913          add_mem_record(ctx, rec, base, ofst, st);
 914    }
 915
 916    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 917    return 0;
 918 }
 919
 920 /* TODO: properly handle loads from l[] memory in the presence of stores */
 921 static int
 922 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 923 {
 924 #if 0
 925    struct load_record **rec, *it;
 926    struct nv_instruction *ld, *next;
 927    uint64_t data[2];
 928    struct nv_value *val;
 929    int j;
 930
 931    for (ld = b->entry; ld; ld = next) {
 932       next = ld->next;
 933       if (!ld->src[0])
 934          continue;
 935       val = ld->src[0]->value;
 936       rec = NULL;
 937
 938       if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
 939          data[0] = val->reg.id;
 940          data[1] = 0;
 941          rec = &ctx->mem_v;
 942       } else
 943       if (ld->opcode == NV_OP_LDA) {
 944          data[0] = val->reg.id;
 945          data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
 946          if (val->reg.file >= NV_FILE_MEM_C(0) &&
 947              val->reg.file <= NV_FILE_MEM_C(15))
 948             rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
 949          else
 950          if (val->reg.file == NV_FILE_MEM_S)
 951             rec = &ctx->mem_s;
 952          else
 953          if (val->reg.file == NV_FILE_MEM_L)
 954             rec = &ctx->mem_l;
 955       } else
 956       if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
 957          data[0] = val->reg.imm.u32;
 958          data[1] = 0;
 959          rec = &ctx->imm;
 960       }
 961
 962       if (!rec || !ld->def[0]->refc)
 963          continue;
 964
 965       for (it = *rec; it; it = it->next)
 966          if (it->data[0] == data[0] && it->data[1] == data[1])
 967             break;
 968
 969       if (it) {
 970          if (ld->def[0]->reg.id >= 0)
 971             it->value = ld->def[0];
 972          else
 973          if (!ld->fixed)
 974             nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
 975       } else {
 976          if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
 977             continue;
 978          it = &ctx->pool[ctx->alloc++];
 979          it->next = *rec;
 980          it->data[0] = data[0];
 981          it->data[1] = data[1];
 982          it->value = ld->def[0];
 983          *rec = it;
 984       }
 985    }
 986
 987    ctx->imm = NULL;
 988    ctx->mem_s = NULL;
 989    ctx->mem_v = NULL;
 990    for (j = 0; j < 16; ++j)
 991       ctx->mem_c[j] = NULL;
 992    ctx->mem_l = NULL;
 993    ctx->alloc = 0;
 994
 995    DESCEND_ARBITRARY(j, nv_pass_reload_elim);
 996 #endif
 997    return 0;
 998 }
 999
1000 static int
1001 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1002 {
1003    int i, c, j;
1004
1005    for (i = 0; i < ctx->pc->num_instructions; ++i) {
1006       struct nv_instruction *nvi = &ctx->pc->instructions[i];
1007       struct nv_value *def[4];
1008
1009       if (!nv_is_texture_op(nvi->opcode))
1010          continue;
1011       nvi->tex_mask = 0;
1012
1013       for (c = 0; c < 4; ++c) {
1014          if (nvi->def[c]->refc)
1015             nvi->tex_mask |= 1 << c;
1016          def[c] = nvi->def[c];
1017       }
1018
1019       j = 0;
1020       for (c = 0; c < 4; ++c)
1021          if (nvi->tex_mask & (1 << c))
1022             nvi->def[j++] = def[c];
1023       for (c = 0; c < 4; ++c)
1024          if (!(nvi->tex_mask & (1 << c)))
1025            nvi->def[j++] = def[c];
1026       assert(j == 4);
1027    }
1028    return 0;
1029 }
1030
1031 struct nv_pass_dce {
1032    struct nv_pc *pc;
1033    uint removed;
1034 };
1035
1036 static int
1037 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1038 {
1039    int j;
1040    struct nv_instruction *nvi, *next;
1041
1042    for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1043       next = nvi->next;
1044
1045       if (inst_removable(nvi)) {
1046          nvc0_insn_delete(nvi);
1047          ++ctx->removed;
1048       }
1049    }
1050    DESCEND_ARBITRARY(j, nv_pass_dce);
1051
1052    return 0;
1053 }
1054
1055 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1056  * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1057  * BREAK and dummy ELSE block.
1058  */
1059 static INLINE boolean
1060 bb_is_if_else_endif(struct nv_basic_block *bb)
1061 {
1062    if (!bb->out[0] || !bb->out[1])
1063       return FALSE;
1064
1065    if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1066       return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1067               !bb->out[1]->out[1]);
1068    } else {
1069       return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1070               !bb->out[0]->out[1] &&
1071               !bb->out[1]->out[1]);
1072    }
1073 }
1074
1075 /* Predicate instructions and delete any branch at the end if it is
1076  * not a break from a loop.
1077  */
1078 static void
1079 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1080                        struct nv_value *pred, uint8_t cc)
1081 {
1082    struct nv_instruction *nvi, *prev;
1083    int s;
1084
1085    if (!b->entry)
1086       return;
1087    for (nvi = b->entry; nvi; nvi = nvi->next) {
1088       prev = nvi;
1089       if (inst_is_noop(nvi))
1090          continue;
1091       for (s = 0; nvi->src[s]; ++s);
1092       assert(s < 6);
1093       nvi->predicate = s;
1094       nvi->cc = cc;
1095       nv_reference(pc, nvi, nvi->predicate, pred);
1096    }
1097    if (prev->opcode == NV_OP_BRA &&
1098        b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1099        b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1100       nvc0_insn_delete(prev);
1101 }
1102
1103 static INLINE boolean
1104 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1105 {
1106    if (nvi->def[0] && values_equal(nvi->def[0], pred))
1107       return FALSE;
1108    return nvc0_insn_is_predicateable(nvi);
1109 }
1110
1111 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1112  * where feasible.
1113  */
1114 static int
1115 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1116 {
1117    struct nv_instruction *nvi;
1118    struct nv_value *pred;
1119    int k;
1120    int n0, n1; /* instruction counts of outgoing blocks */
1121
1122    if (bb_is_if_else_endif(b)) {
1123       assert(b->exit && b->exit->opcode == NV_OP_BRA);
1124
1125       assert(b->exit->predicate >= 0);
1126       pred = b->exit->src[b->exit->predicate]->value;
1127
1128       n1 = n0 = 0;
1129       for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1130          if (!may_predicate_insn(nvi, pred))
1131             break;
1132       if (!nvi) {
1133          /* we're after register allocation, so there always is an ELSE block */
1134          for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1135             if (!may_predicate_insn(nvi, pred))
1136                break;
1137       }
1138
1139       /* 12 is an arbitrary limit */
1140       if (!nvi && n0 < 12 && n1 < 12) {
1141          predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1142          predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1143
1144          nvc0_insn_delete(b->exit); /* delete the branch */
1145
1146          /* and a potential joinat before it */
1147          if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1148             nvc0_insn_delete(b->exit);
1149
1150          /* remove join operations at the end of the conditional */
1151          k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1152          if ((nvi = b->out[0]->out[k]->entry)) {
1153             nvi->join = 0;
1154             if (nvi->opcode == NV_OP_JOIN)
1155                nvc0_insn_delete(nvi);
1156          }
1157       }
1158    }
1159    DESCEND_ARBITRARY(k, nv_pass_flatten);
1160
1161    return 0;
1162 }
1163
1164 /* Tests instructions for equality, but independently of sources. */
1165 static boolean
1166 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1167 {
1168    if (a->opcode != b->opcode)
1169       return FALSE;
1170    if (nv_is_texture_op(a->opcode)) {
1171       if (a->ext.tex.t != b->ext.tex.t ||
1172           a->ext.tex.s != b->ext.tex.s)
1173          return FALSE;
1174       if (a->tex_dim != b->tex_dim ||
1175           a->tex_array != b->tex_array ||
1176           a->tex_cube != b->tex_cube ||
1177           a->tex_shadow != b->tex_shadow ||
1178           a->tex_live != b->tex_live)
1179          return FALSE;
1180    } else
1181    if (a->opcode == NV_OP_CVT) {
1182       if (a->ext.cvt.s != b->ext.cvt.s ||
1183           a->ext.cvt.d != b->ext.cvt.d)
1184          return FALSE;
1185    } else
1186    if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1187        NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1188       if (a->set_cond != b->set_cond)
1189          return FALSE;
1190    } else
1191    if (a->opcode == NV_OP_LINTERP ||
1192        a->opcode == NV_OP_PINTERP) {
1193       if (a->centroid != b->centroid ||
1194           a->flat != b->flat)
1195          return FALSE;
1196    }
1197    if (a->cc != b->cc)
1198       return FALSE;
1199    if (a->lanes != b->lanes ||
1200        a->patch != b->patch ||
1201        a->saturate != b->saturate)
1202       return FALSE;
1203    if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1204       return FALSE;
1205    return TRUE;
1206 }
1207
1208 /* local common subexpression elimination, stupid O(n^2) implementation */
1209 static int
1210 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1211 {
1212    struct nv_instruction *ir, *ik, *next;
1213    struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1214    int s, d;
1215    unsigned int reps;
1216
1217    do {
1218       reps = 0;
1219       for (ir = entry; ir; ir = next) {
1220          next = ir->next;
1221          if (ir->fixed)
1222             continue;
1223          for (ik = entry; ik != ir; ik = ik->next) {
1224             if (!is_operation_equal(ir, ik))
1225                continue;
1226             if (!ir->def[0] || !ik->def[0])
1227                continue;
1228
1229             if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1230                continue;
1231
1232             for (d = 0; d < 4; ++d) {
1233                if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1234                   break;
1235                if (ir->def[d]) {
1236                   if (!values_equal(ik->def[0], ir->def[0]))
1237                      break;
1238                } else {
1239                   d = 4;
1240                   break;
1241                }
1242             }
1243             if (d != 4)
1244                continue;
1245
1246             for (s = 0; s < 5; ++s) {
1247                struct nv_value *a, *b;
1248
1249                if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1250                   break;
1251                if (!ir->src[s]) {
1252                   s = 5;
1253                   break;
1254                }
1255
1256                if (ik->src[s]->mod != ir->src[s]->mod)
1257                   break;
1258                a = ik->src[s]->value;
1259                b = ir->src[s]->value;
1260                if (a == b)
1261                   continue;
1262                if (a->reg.file != b->reg.file ||
1263                    a->reg.id < 0 || /* this excludes memory loads/stores */
1264                    a->reg.id != b->reg.id)
1265                   break;
1266             }
1267             if (s == 5) {
1268                nvc0_insn_delete(ir);
1269                for (d = 0; d < 4 && ir->def[d]; ++d)
1270                   nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1271                ++reps;
1272                break;
1273             }
1274          }
1275       }
1276    } while(reps);
1277
1278    DESCEND_ARBITRARY(s, nv_pass_cse);
1279
1280    return 0;
1281 }
1282
1283 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1284  * neighbouring registers. CSE might have messed this up.
1285  * Just generate a MOV for each source to avoid conflicts if they're used in
1286  * multiple NV_OP_BIND at different positions.
1287  */
1288 static int
1289 nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
1290 {
1291    struct nv_value *val;
1292    struct nv_instruction *bnd, *nvi, *next;
1293    int s;
1294
1295    for (bnd = b->entry; bnd; bnd = next) {
1296       next = bnd->next;
1297       if (bnd->opcode != NV_OP_BIND)
1298          continue;
1299       for (s = 0; s < 4 && bnd->src[s]; ++s) {
1300          val = bnd->src[s]->value;
1301
1302          nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1303          nvi->def[0] = new_value_like(ctx->pc, val);
1304          nvi->def[0]->insn = nvi;
1305          nv_reference(ctx->pc, nvi, 0, val);
1306          nv_reference(ctx->pc, bnd, s, nvi->def[0]);
1307
1308          nvc0_insn_insert_before(bnd, nvi);
1309       }
1310    }
1311    DESCEND_ARBITRARY(s, nv_pass_fix_bind);
1312
1313    return 0;
1314 }
1315
1316 static int
1317 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1318 {
1319    struct pass_reld_elim *reldelim;
1320    struct nv_pass pass;
1321    struct nv_pass_dce dce;
1322    int ret;
1323
1324    pass.n = 0;
1325    pass.pc = pc;
1326
1327    /* Do CSE so we can just compare values by pointer in subsequent passes. */
1328    pc->pass_seq++;
1329    ret = nv_pass_cse(&pass, root);
1330    if (ret)
1331       return ret;
1332
1333    /* Do this first, so we don't have to pay attention
1334     * to whether sources are supported memory loads.
1335     */
1336    pc->pass_seq++;
1337    ret = nv_pass_algebraic_opt(&pass, root);
1338    if (ret)
1339       return ret;
1340
1341    pc->pass_seq++;
1342    ret = nv_pass_lower_mods(&pass, root);
1343    if (ret)
1344       return ret;
1345
1346    pc->pass_seq++;
1347    ret = nvc0_pass_fold_loads(&pass, root);
1348    if (ret)
1349       return ret;
1350
1351    if (pc->opt_reload_elim) {
1352       reldelim = CALLOC_STRUCT(pass_reld_elim);
1353       reldelim->pc = pc;
1354
1355       pc->pass_seq++;
1356       ret = nv_pass_reload_elim(reldelim, root);
1357       if (ret) {
1358          FREE(reldelim);
1359          return ret;
1360       }
1361       memset(reldelim, 0, sizeof(struct pass_reld_elim));
1362       reldelim->pc = pc;
1363    }
1364
1365    /* May run DCE before load-combining since that pass will clean up
1366     * after itself.
1367     */
1368    dce.pc = pc;
1369    do {
1370       dce.removed = 0;
1371       pc->pass_seq++;
1372       ret = nv_pass_dce(&dce, root);
1373       if (ret)
1374          return ret;
1375    } while (dce.removed);
1376
1377    if (pc->opt_reload_elim) {
1378       pc->pass_seq++;
1379       ret = nv_pass_mem_opt(reldelim, root);
1380       if (!ret) {
1381          memset(reldelim, 0, sizeof(struct pass_reld_elim));
1382          reldelim->pc = pc;
1383
1384          pc->pass_seq++;
1385          ret = nv_pass_mem_opt(reldelim, root);
1386       }
1387       FREE(reldelim);
1388       if (ret)
1389          return ret;
1390    }
1391
1392    ret = nv_pass_tex_mask(&pass, root);
1393    if (ret)
1394       return ret;
1395
1396    pc->pass_seq++;
1397    ret = nv_pass_fix_bind(&pass, root);
1398
1399    return ret;
1400 }
1401
1402 int
1403 nvc0_pc_exec_pass0(struct nv_pc *pc)
1404 {
1405    int i, ret;
1406
1407    for (i = 0; i < pc->num_subroutines + 1; ++i)
1408       if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1409          return ret;
1410    return 0;
1411 }