src/gallium/drivers/nvc0/nvc0_pc_optimize.c

   1 /*
   2  * Copyright 2010 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nvc0_pc.h"
  24 #include "nvc0_program.h"
  25
  26 #define DESCEND_ARBITRARY(j, f)                                 \
  27 do {                                                            \
  28    b->pass_seq = ctx->pc->pass_seq;                             \
  29                                                                 \
  30    for (j = 0; j < 2; ++j)                                      \
  31       if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
  32          f(ctx, b->out[j]);                                       \
  33 } while (0)
  34
  35 static INLINE boolean
  36 registers_interfere(struct nv_value *a, struct nv_value *b)
  37 {
  38    if (a->reg.file != b->reg.file)
  39       return FALSE;
  40    if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
  41       return FALSE;
  42
  43    assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
  44
  45    if (a->join->reg.id < b->join->reg.id) {
  46       return (a->join->reg.id + a->reg.size >= b->join->reg.id);
  47    } else
  48    if (a->join->reg.id > b->join->reg.id) {
  49       return (b->join->reg.id + b->reg.size >= a->join->reg.id);
  50    }
  51
  52    return FALSE;
  53 }
  54
  55 static INLINE boolean
  56 values_equal(struct nv_value *a, struct nv_value *b)
  57 {
  58    if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
  59       return FALSE;
  60    if (NV_IS_MEMORY_FILE(a->reg.file))
  61       return a->reg.address == b->reg.address;
  62    else
  63       return a->join->reg.id == b->join->reg.id;
  64 }
  65
  66 #if 0
  67 static INLINE boolean
  68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
  69 {
  70    int si, di;
  71
  72    for (di = 0; di < 4 && a->def[di]; ++di)
  73       for (si = 0; si < 5 && b->src[si]; ++si)
  74          if (registers_interfere(a->def[di], b->src[si]->value))
  75             return FALSE;
  76
  77    return TRUE;
  78 }
  79
  80 /* Check whether we can swap the order of the instructions,
  81  * where a & b may be either the earlier or the later one.
  82  */
  83 static boolean
  84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
  85 {
  86    return inst_commutation_check(a, b) && inst_commutation_check(b, a);
  87 }
  88 #endif
  89
  90 static INLINE boolean
  91 inst_removable(struct nv_instruction *nvi)
  92 {
  93    if (nvi->opcode == NV_OP_ST)
  94       return FALSE;
  95    return (!(nvi->terminator ||
  96              nvi->join ||
  97              nvi->target ||
  98              nvi->fixed ||
  99              nvc0_insn_refcount(nvi)));
 100 }
 101
 102 /* Check if we do not actually have to emit this instruction. */
 103 static INLINE boolean
 104 inst_is_noop(struct nv_instruction *nvi)
 105 {
 106    if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
 107       return TRUE;
 108    if (nvi->terminator || nvi->join)
 109       return FALSE;
 110    if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
 111       return TRUE;
 112    if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
 113       return FALSE;
 114    if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
 115       return FALSE;
 116
 117    if (nvi->src[0]->value->join->reg.id < 0) {
 118       NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
 119       return TRUE;
 120    }
 121
 122    if (nvi->opcode == NV_OP_SELECT)
 123       if (!values_equal(nvi->def[0], nvi->src[1]->value))
 124          return FALSE;
 125    return values_equal(nvi->def[0], nvi->src[0]->value);
 126 }
 127
 128 struct nv_pass {
 129    struct nv_pc *pc;
 130    int n;
 131    void *priv;
 132 };
 133
 134 static int
 135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
 136
 137 static void
 138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
 139 {
 140    struct nv_pc *pc = (struct nv_pc *)priv;
 141    struct nv_basic_block *in;
 142    struct nv_instruction *nvi, *next;
 143    int j;
 144
 145    for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
 146
 147    if (j >= 0) {
 148       in = pc->bb_list[j];
 149
 150       /* check for no-op branches (BRA $PC+8) */
 151       if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
 152          in->emit_size -= 8;
 153          pc->emit_size -= 8;
 154
 155          for (++j; j < pc->num_blocks; ++j)
 156             pc->bb_list[j]->emit_pos -= 8;
 157
 158          nvc0_insn_delete(in->exit);
 159       }
 160       b->emit_pos = in->emit_pos + in->emit_size;
 161    }
 162
 163    pc->bb_list[pc->num_blocks++] = b;
 164
 165    /* visit node */
 166
 167    for (nvi = b->entry; nvi; nvi = next) {
 168       next = nvi->next;
 169       if (inst_is_noop(nvi) ||
 170           (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
 171          nvc0_insn_delete(nvi);
 172       } else
 173          b->emit_size += 8;
 174    }
 175    pc->emit_size += b->emit_size;
 176
 177 #ifdef NOUVEAU_DEBUG
 178    if (!b->entry)
 179       debug_printf("BB:%i is now empty\n", b->id);
 180    else
 181       debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
 182 #endif
 183 }
 184
 185 static int
 186 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
 187 {
 188    struct nv_pass pass;
 189
 190    pass.pc = pc;
 191
 192    pc->pass_seq++;
 193    nv_pass_flatten(&pass, root);
 194
 195    nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
 196
 197    return 0;
 198 }
 199
 200 int
 201 nvc0_pc_exec_pass2(struct nv_pc *pc)
 202 {
 203    int i, ret;
 204
 205    NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
 206
 207    pc->num_blocks = 0; /* will reorder bb_list */
 208
 209    for (i = 0; i < pc->num_subroutines + 1; ++i)
 210       if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
 211          return ret;
 212    return 0;
 213 }
 214
 215 static INLINE boolean
 216 is_cspace_load(struct nv_instruction *nvi)
 217 {
 218    if (!nvi)
 219       return FALSE;
 220    assert(nvi->indirect != 0);
 221    return (nvi->opcode == NV_OP_LD &&
 222            nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
 223            nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
 224 }
 225
 226 static INLINE boolean
 227 is_immd32_load(struct nv_instruction *nvi)
 228 {
 229    if (!nvi)
 230       return FALSE;
 231    return (nvi->opcode == NV_OP_MOV &&
 232            nvi->src[0]->value->reg.file == NV_FILE_IMM &&
 233            nvi->src[0]->value->reg.size == 4);
 234 }
 235
 236 static INLINE void
 237 check_swap_src_0_1(struct nv_instruction *nvi)
 238 {
 239    static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 240
 241    struct nv_ref *src0 = nvi->src[0];
 242    struct nv_ref *src1 = nvi->src[1];
 243
 244    if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET)
 245       return;
 246    assert(src0 && src1 && src0->value && src1->value);
 247
 248    if (is_cspace_load(src0->value->insn)) {
 249       if (!is_cspace_load(src1->value->insn)) {
 250          nvi->src[0] = src1;
 251          nvi->src[1] = src0;
 252       }
 253    } else
 254    if (is_immd32_load(src0->value->insn)) {
 255       if (!is_cspace_load(src1->value->insn) &&
 256           !is_immd32_load(src1->value->insn)) {
 257          nvi->src[0] = src1;
 258          nvi->src[1] = src0;
 259       }
 260    }
 261
 262    if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
 263       nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
 264 }
 265
 266 static void
 267 nvi_set_indirect_load(struct nv_pc *pc,
 268                       struct nv_instruction *nvi, struct nv_value *val)
 269 {
 270    for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
 271         ++nvi->indirect);
 272    assert(nvi->indirect < 6);
 273    nv_reference(pc, nvi, nvi->indirect, val);
 274 }
 275
 276 static int
 277 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
 278 {
 279    struct nv_instruction *nvi, *ld;
 280    int s;
 281
 282    for (nvi = b->entry; nvi; nvi = nvi->next) {
 283       check_swap_src_0_1(nvi);
 284
 285       for (s = 0; s < 3 && nvi->src[s]; ++s) {
 286          ld = nvi->src[s]->value->insn;
 287          if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
 288             continue;
 289          if (!nvc0_insn_can_load(nvi, s, ld))
 290             continue;
 291
 292          /* fold it ! */
 293          nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
 294          if (ld->indirect >= 0)
 295             nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
 296
 297          if (!nvc0_insn_refcount(ld))
 298             nvc0_insn_delete(ld);
 299       }
 300    }
 301    DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
 302
 303    return 0;
 304 }
 305
 306 /* NOTE: Assumes loads have not yet been folded. */
 307 static int
 308 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
 309 {
 310    struct nv_instruction *nvi, *mi, *next;
 311    int j;
 312    uint8_t mod;
 313
 314    for (nvi = b->entry; nvi; nvi = next) {
 315       next = nvi->next;
 316       if (nvi->opcode == NV_OP_SUB) {
 317          nvi->src[1]->mod ^= NV_MOD_NEG;
 318          nvi->opcode = NV_OP_ADD;
 319       }
 320
 321       for (j = 0; j < 3 && nvi->src[j]; ++j) {
 322          mi = nvi->src[j]->value->insn;
 323          if (!mi)
 324             continue;
 325          if (mi->def[0]->refc > 1 || mi->predicate >= 0)
 326             continue;
 327
 328          if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
 329          else
 330          if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
 331          else
 332             continue;
 333          assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
 334
 335          mod |= mi->src[0]->mod;
 336
 337          if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
 338             /* abs neg [abs] = abs */
 339             mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
 340          } else
 341          if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
 342             /* neg as opcode and modifier on same insn cannot occur */
 343             /* neg neg abs = abs, neg neg = identity */
 344             assert(j == 0);
 345             if (mod & NV_MOD_ABS)
 346                nvi->opcode = NV_OP_ABS;
 347             else
 348                nvi->opcode = NV_OP_MOV;
 349             mod = 0;
 350          }
 351
 352          if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
 353             continue;
 354
 355          nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
 356
 357          nvi->src[j]->mod ^= mod;
 358       }
 359
 360       if (nvi->opcode == NV_OP_SAT) {
 361          mi = nvi->src[0]->value->insn;
 362
 363          if (mi->def[0]->refc > 1 ||
 364              (mi->opcode != NV_OP_ADD &&
 365               mi->opcode != NV_OP_MUL &&
 366               mi->opcode != NV_OP_MAD))
 367             continue;
 368          mi->saturate = 1;
 369          mi->def[0] = nvi->def[0];
 370          mi->def[0]->insn = mi;
 371          nvc0_insn_delete(nvi);
 372       }
 373    }
 374    DESCEND_ARBITRARY(j, nv_pass_lower_mods);
 375
 376    return 0;
 377 }
 378
 379 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
 380
 381 static void
 382 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
 383 {
 384    if (mod & NV_MOD_ABS) {
 385       if (type == NV_TYPE_F32)
 386          *val &= 0x7fffffff;
 387       else
 388       if ((*val) & (1 << 31))
 389          *val = ~(*val) + 1;
 390    }
 391    if (mod & NV_MOD_NEG) {
 392       if (type == NV_TYPE_F32)
 393          *val ^= 0x80000000;
 394       else
 395          *val = ~(*val) + 1;
 396    }
 397    if (mod & NV_MOD_SAT) {
 398       union {
 399          float f;
 400          uint32_t u;
 401          int32_t i;
 402       } u;
 403       u.u = *val;
 404       if (type == NV_TYPE_F32) {
 405          u.f = CLAMP(u.f, -1.0f, 1.0f);
 406       } else
 407       if (type == NV_TYPE_U16) {
 408          u.u = MIN2(u.u, 0xffff);
 409       } else
 410       if (type == NV_TYPE_S16) {
 411          u.i = CLAMP(u.i, -32768, 32767);
 412       }
 413       *val = u.u;
 414    }
 415    if (mod & NV_MOD_NOT)
 416       *val = ~*val;
 417 }
 418
 419 static void
 420 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
 421                     struct nv_value *src0, struct nv_value *src1)
 422 {
 423    struct nv_value *val;
 424    union {
 425       float f32;
 426       uint32_t u32;
 427       int32_t s32;
 428    } u0, u1, u;
 429    ubyte type;
 430
 431    if (!nvi->def[0])
 432       return;
 433    type = NV_OPTYPE(nvi->opcode);
 434
 435    u.u32 = 0;
 436    u0.u32 = src0->reg.imm.u32;
 437    u1.u32 = src1->reg.imm.u32;
 438
 439    apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
 440    apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
 441
 442    switch (nvi->opcode) {
 443    case NV_OP_MAD_F32:
 444       if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
 445          return;
 446       /* fall through */
 447    case NV_OP_MUL_F32:
 448       u.f32 = u0.f32 * u1.f32;
 449       break;
 450    case NV_OP_MUL_B32:
 451       u.u32 = u0.u32 * u1.u32;
 452       break;
 453    case NV_OP_ADD_F32:
 454       u.f32 = u0.f32 + u1.f32;
 455       break;
 456    case NV_OP_ADD_B32:
 457       u.u32 = u0.u32 + u1.u32;
 458       break;
 459    case NV_OP_SUB_F32:
 460       u.f32 = u0.f32 - u1.f32;
 461       break;
 462       /*
 463    case NV_OP_SUB_B32:
 464       u.u32 = u0.u32 - u1.u32;
 465       break;
 466       */
 467    default:
 468       return;
 469    }
 470
 471    val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
 472    val->reg.imm.u32 = u.u32;
 473
 474    nv_reference(pc, nvi, 1, NULL);
 475    nv_reference(pc, nvi, 0, val);
 476
 477    if (nvi->opcode == NV_OP_MAD_F32) {
 478       nvi->src[1] = nvi->src[0];
 479       nvi->src[0] = nvi->src[2];
 480       nvi->src[2] = NULL;
 481       nvi->opcode = NV_OP_ADD_F32;
 482
 483       if (val->reg.imm.u32 == 0) {
 484          nvi->src[1] = NULL;
 485          nvi->opcode = NV_OP_MOV;
 486       }
 487    } else {
 488       nvi->opcode = NV_OP_MOV;
 489    }
 490 }
 491
 492 static void
 493 constant_operand(struct nv_pc *pc,
 494                  struct nv_instruction *nvi, struct nv_value *val, int s)
 495 {
 496    union {
 497       float f32;
 498       uint32_t u32;
 499       int32_t s32;
 500    } u;
 501    int shift;
 502    int t = s ? 0 : 1;
 503    uint op;
 504    ubyte type;
 505
 506    if (!nvi->def[0])
 507       return;
 508    type = NV_OPTYPE(nvi->opcode);
 509
 510    u.u32 = val->reg.imm.u32;
 511    apply_modifiers(&u.u32, type, nvi->src[s]->mod);
 512
 513    if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
 514       nvi->opcode = NV_OP_MOV;
 515       nv_reference(pc, nvi, t, NULL);
 516       if (s) {
 517          nvi->src[0] = nvi->src[1];
 518          nvi->src[1] = NULL;
 519       }
 520       return;
 521    }
 522
 523    switch (nvi->opcode) {
 524    case NV_OP_MUL_F32:
 525       if (u.f32 == 1.0f || u.f32 == -1.0f) {
 526          if (u.f32 == -1.0f)
 527             nvi->src[t]->mod ^= NV_MOD_NEG;
 528          switch (nvi->src[t]->mod) {
 529          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 530          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 531          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 532          default:
 533             return;
 534          }
 535          nvi->opcode = op;
 536          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 537          nv_reference(pc, nvi, 1, NULL);
 538          nvi->src[0]->mod = 0;
 539       } else
 540       if (u.f32 == 2.0f || u.f32 == -2.0f) {
 541          if (u.f32 == -2.0f)
 542             nvi->src[t]->mod ^= NV_MOD_NEG;
 543          nvi->opcode = NV_OP_ADD_F32;
 544          nv_reference(pc, nvi, s, nvi->src[t]->value);
 545          nvi->src[s]->mod = nvi->src[t]->mod;
 546       }
 547    case NV_OP_ADD_F32:
 548       if (u.u32 == 0) {
 549          switch (nvi->src[t]->mod) {
 550          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 551          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 552          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 553          case NV_MOD_NEG | NV_MOD_ABS:
 554             op = NV_OP_CVT;
 555             nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 556             break;
 557          default:
 558             return;
 559          }
 560          nvi->opcode = op;
 561          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 562          nv_reference(pc, nvi, 1, NULL);
 563          if (nvi->opcode != NV_OP_CVT)
 564             nvi->src[0]->mod = 0;
 565       }
 566    case NV_OP_ADD_B32:
 567       if (u.u32 == 0) {
 568          assert(nvi->src[t]->mod == 0);
 569          nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
 570          nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 571          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 572          nv_reference(pc, nvi, 1, NULL);
 573       }
 574       break;
 575    case NV_OP_MUL_B32:
 576       /* multiplication by 0 already handled above */
 577       assert(nvi->src[s]->mod == 0);
 578       shift = ffs(u.s32) - 1;
 579       if (shift == 0) {
 580          nvi->opcode = NV_OP_MOV;
 581          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 582          nv_reference(pc, nvi, 1, NULL);
 583       } else
 584       if (u.s32 > 0 && u.s32 == (1 << shift)) {
 585          nvi->opcode = NV_OP_SHL;
 586          (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
 587          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 588          nv_reference(pc, nvi, 1, val);
 589          break;
 590       }
 591       break;
 592    case NV_OP_RCP:
 593       u.f32 = 1.0f / u.f32;
 594       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 595       nvi->opcode = NV_OP_MOV;
 596       assert(s == 0);
 597       nv_reference(pc, nvi, 0, val);
 598       break;
 599    case NV_OP_RSQ:
 600       u.f32 = 1.0f / sqrtf(u.f32);
 601       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 602       nvi->opcode = NV_OP_MOV;
 603       assert(s == 0);
 604       nv_reference(pc, nvi, 0, val);
 605       break;
 606    default:
 607       break;
 608    }
 609 }
 610
 611 static void
 612 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
 613 {
 614    struct nv_value *src0 = nvi->src[0]->value;
 615    struct nv_value *src1 = nvi->src[1]->value;
 616
 617    if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
 618       return;
 619    if (src0->reg.file != NV_FILE_GPR)
 620       return;
 621    nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
 622    nvc0_insn_delete(nvi);
 623 }
 624
 625 /* check if we can MUL + ADD -> MAD/FMA */
 626 static void
 627 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
 628 {
 629    struct nv_value *src0 = nvi->src[0]->value;
 630    struct nv_value *src1 = nvi->src[1]->value;
 631    struct nv_value *src;
 632    int s;
 633    uint8_t mod[4];
 634
 635    if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
 636    else
 637    if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
 638    else
 639       return;
 640
 641    if ((src0->insn && src0->insn->bb != nvi->bb) ||
 642        (src1->insn && src1->insn->bb != nvi->bb))
 643       return;
 644
 645    /* check for immediates from prior constant folding */
 646    if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
 647       return;
 648    src = nvi->src[s]->value;
 649
 650    mod[0] = nvi->src[0]->mod;
 651    mod[1] = nvi->src[1]->mod;
 652    mod[2] = src->insn->src[0]->mod;
 653    mod[3] = src->insn->src[1]->mod;
 654
 655    if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
 656       return;
 657
 658    nvi->opcode = NV_OP_MAD_F32;
 659
 660    nv_reference(ctx->pc, nvi, s, NULL);
 661    nvi->src[2] = nvi->src[!s];
 662    nvi->src[!s] = NULL;
 663
 664    nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
 665    nvi->src[0]->mod = mod[2] ^ mod[s];
 666    nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
 667    nvi->src[1]->mod = mod[3];
 668 }
 669
 670 static int
 671 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
 672 {
 673    struct nv_instruction *nvi, *next;
 674    int j;
 675
 676    for (nvi = b->entry; nvi; nvi = next) {
 677       struct nv_value *src0, *src1;
 678       uint baseop = NV_BASEOP(nvi->opcode);
 679
 680       next = nvi->next;
 681
 682       src0 = nvc0_pc_find_immediate(nvi->src[0]);
 683       src1 = nvc0_pc_find_immediate(nvi->src[1]);
 684
 685       if (src0 && src1) {
 686          constant_expression(ctx->pc, nvi, src0, src1);
 687       } else {
 688          if (src0)
 689             constant_operand(ctx->pc, nvi, src0, 0);
 690          else
 691          if (src1)
 692             constant_operand(ctx->pc, nvi, src1, 1);
 693       }
 694
 695       if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
 696          handle_min_max(ctx, nvi);
 697       else
 698       if (nvi->opcode == NV_OP_ADD_F32)
 699          handle_add_mul(ctx, nvi);
 700    }
 701    DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
 702
 703    return 0;
 704 }
 705
 706 /* TODO: redundant store elimination */
 707
 708 struct mem_record {
 709    struct mem_record *next;
 710    struct nv_instruction *insn;
 711    uint32_t ofst;
 712    uint32_t base;
 713    uint32_t size;
 714 };
 715
 716 #define MEM_RECORD_POOL_SIZE 1024
 717
 718 struct pass_reld_elim {
 719    struct nv_pc *pc;
 720
 721    struct mem_record *imm;
 722    struct mem_record *mem_v;
 723    struct mem_record *mem_a;
 724    struct mem_record *mem_c[16];
 725    struct mem_record *mem_l;
 726
 727    struct mem_record pool[MEM_RECORD_POOL_SIZE];
 728    int alloc;
 729 };
 730
 731 /* Extend the load operation in @rec to also cover the data loaded by @ld.
 732  * The two loads may not overlap but reference adjacent memory locations.
 733  */
 734 static void
 735 combine_load(struct mem_record *rec, struct nv_instruction *ld)
 736 {
 737    struct nv_instruction *fv = rec->insn;
 738    struct nv_value *mem = ld->src[0]->value;
 739    uint32_t size = rec->size + mem->reg.size;
 740    int j;
 741    int d = rec->size / 4;
 742
 743    assert(rec->size < 16);
 744    if (rec->ofst > mem->reg.address) {
 745       if ((size == 8 && mem->reg.address & 3) ||
 746           (size > 8 && mem->reg.address & 7))
 747          return;
 748       rec->ofst = mem->reg.address;
 749       for (j = 0; j < d; ++j)
 750          fv->def[mem->reg.size / 4 + j] = fv->def[j];
 751       d = 0;
 752    } else
 753    if ((size == 8 && rec->ofst & 3) ||
 754        (size > 8 && rec->ofst & 7)) {
 755       return;
 756    }
 757
 758    for (j = 0; j < mem->reg.size / 4; ++j) {
 759       fv->def[d] = ld->def[j];
 760       fv->def[d++]->insn = fv;
 761    }
 762
 763    fv->src[0]->value->reg.address = rec->ofst;
 764    fv->src[0]->value->reg.size = rec->size = size;
 765
 766    nvc0_insn_delete(ld);
 767 }
 768
 769 static void
 770 combine_export(struct mem_record *rec, struct nv_instruction *ex)
 771 {
 772
 773 }
 774
 775 static INLINE void
 776 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
 777                uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
 778 {
 779    struct mem_record *it = &ctx->pool[ctx->alloc++];
 780
 781    it->next = *rec;
 782    *rec = it;
 783    it->base = base;
 784    it->ofst = ofst;
 785    it->insn = nvi;
 786    it->size = nvi->src[0]->value->reg.size;
 787 }
 788
 789 /* vectorize and reuse loads from memory or of immediates */
 790 static int
 791 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 792 {
 793    struct mem_record **rec, *it;
 794    struct nv_instruction *ld, *next;
 795    struct nv_value *mem;
 796    uint32_t base, ofst;
 797    int s;
 798
 799    for (ld = b->entry; ld; ld = next) {
 800       next = ld->next;
 801
 802       if (is_cspace_load(ld)) {
 803          mem = ld->src[0]->value;
 804          rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
 805       } else
 806       if (ld->opcode == NV_OP_VFETCH) {
 807          mem = ld->src[0]->value;
 808          rec = &ctx->mem_a;
 809       } else
 810       if (ld->opcode == NV_OP_EXPORT) {
 811          mem = ld->src[0]->value;
 812          if (mem->reg.file != NV_FILE_MEM_V)
 813             continue;
 814          rec = &ctx->mem_v;
 815       } else {
 816          continue;
 817       }
 818       if (ld->def[0] && ld->def[0]->refc == 0)
 819          continue;
 820       ofst = mem->reg.address;
 821       base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
 822
 823       for (it = *rec; it; it = it->next) {
 824          if (it->base == base &&
 825              ((it->ofst >> 4) == (ofst >> 4)) &&
 826              ((it->ofst + it->size == ofst) ||
 827               (it->ofst - mem->reg.size == ofst))) {
 828             /* only NV_OP_VFETCH can load exactly 12 bytes */
 829             if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
 830                continue;
 831             if (it->ofst < ofst) {
 832                if ((it->ofst & 0xf) == 4)
 833                   continue;
 834             } else
 835             if ((ofst & 0xf) == 4)
 836                continue;
 837             break;
 838          }
 839       }
 840       if (it) {
 841          switch (ld->opcode) {
 842          case NV_OP_EXPORT: combine_export(it, ld); break;
 843          default:
 844             combine_load(it, ld);
 845             break;
 846          }
 847       } else
 848       if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
 849          add_mem_record(ctx, rec, base, ofst, ld);
 850       }
 851    }
 852
 853    ctx->alloc = 0;
 854    ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
 855    for (s = 0; s < 16; ++s)
 856       ctx->mem_c[s] = NULL;
 857
 858    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 859    return 0;
 860 }
 861
 862 static void
 863 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
 864 {
 865 }
 866
 867 /* elimination of redundant stores */
 868 static int
 869 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 870 {
 871    struct mem_record **rec, *it;
 872    struct nv_instruction *st, *next;
 873    struct nv_value *mem;
 874    uint32_t base, ofst, size;
 875    int s;
 876
 877    for (st = b->entry; st; st = next) {
 878       next = st->next;
 879
 880       if (st->opcode == NV_OP_ST) {
 881          mem = st->src[0]->value;
 882          rec = &ctx->mem_l;
 883       } else
 884       if (st->opcode == NV_OP_EXPORT) {
 885          mem = st->src[0]->value;
 886          if (mem->reg.file != NV_FILE_MEM_V)
 887             continue;
 888          rec = &ctx->mem_v;
 889       } else
 890       if (st->opcode == NV_OP_ST) {
 891          /* TODO: purge */
 892       }
 893       ofst = mem->reg.address;
 894       base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
 895       size = mem->reg.size;
 896
 897       for (it = *rec; it; it = it->next) {
 898          if (it->base == base &&
 899              (it->ofst <= ofst && (it->ofst + size) > ofst))
 900             break;
 901       }
 902       if (it)
 903          eliminate_store(it, st);
 904       else
 905          add_mem_record(ctx, rec, base, ofst, st);
 906    }
 907
 908    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 909    return 0;
 910 }
 911
 912 /* TODO: properly handle loads from l[] memory in the presence of stores */
 913 static int
 914 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 915 {
 916 #if 0
 917    struct load_record **rec, *it;
 918    struct nv_instruction *ld, *next;
 919    uint64_t data[2];
 920    struct nv_value *val;
 921    int j;
 922
 923    for (ld = b->entry; ld; ld = next) {
 924       next = ld->next;
 925       if (!ld->src[0])
 926          continue;
 927       val = ld->src[0]->value;
 928       rec = NULL;
 929
 930       if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
 931          data[0] = val->reg.id;
 932          data[1] = 0;
 933          rec = &ctx->mem_v;
 934       } else
 935       if (ld->opcode == NV_OP_LDA) {
 936          data[0] = val->reg.id;
 937          data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
 938          if (val->reg.file >= NV_FILE_MEM_C(0) &&
 939              val->reg.file <= NV_FILE_MEM_C(15))
 940             rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
 941          else
 942          if (val->reg.file == NV_FILE_MEM_S)
 943             rec = &ctx->mem_s;
 944          else
 945          if (val->reg.file == NV_FILE_MEM_L)
 946             rec = &ctx->mem_l;
 947       } else
 948       if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
 949          data[0] = val->reg.imm.u32;
 950          data[1] = 0;
 951          rec = &ctx->imm;
 952       }
 953
 954       if (!rec || !ld->def[0]->refc)
 955          continue;
 956
 957       for (it = *rec; it; it = it->next)
 958          if (it->data[0] == data[0] && it->data[1] == data[1])
 959             break;
 960
 961       if (it) {
 962          if (ld->def[0]->reg.id >= 0)
 963             it->value = ld->def[0];
 964          else
 965          if (!ld->fixed)
 966             nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
 967       } else {
 968          if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
 969             continue;
 970          it = &ctx->pool[ctx->alloc++];
 971          it->next = *rec;
 972          it->data[0] = data[0];
 973          it->data[1] = data[1];
 974          it->value = ld->def[0];
 975          *rec = it;
 976       }
 977    }
 978
 979    ctx->imm = NULL;
 980    ctx->mem_s = NULL;
 981    ctx->mem_v = NULL;
 982    for (j = 0; j < 16; ++j)
 983       ctx->mem_c[j] = NULL;
 984    ctx->mem_l = NULL;
 985    ctx->alloc = 0;
 986
 987    DESCEND_ARBITRARY(j, nv_pass_reload_elim);
 988 #endif
 989    return 0;
 990 }
 991
 992 static int
 993 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
 994 {
 995    int i, c, j;
 996
 997    for (i = 0; i < ctx->pc->num_instructions; ++i) {
 998       struct nv_instruction *nvi = &ctx->pc->instructions[i];
 999       struct nv_value *def[4];
1000
1001       if (!nv_is_texture_op(nvi->opcode))
1002          continue;
1003       nvi->tex_mask = 0;
1004
1005       for (c = 0; c < 4; ++c) {
1006          if (nvi->def[c]->refc)
1007             nvi->tex_mask |= 1 << c;
1008          def[c] = nvi->def[c];
1009       }
1010
1011       j = 0;
1012       for (c = 0; c < 4; ++c)
1013          if (nvi->tex_mask & (1 << c))
1014             nvi->def[j++] = def[c];
1015       for (c = 0; c < 4; ++c)
1016          if (!(nvi->tex_mask & (1 << c)))
1017            nvi->def[j++] = def[c];
1018       assert(j == 4);
1019    }
1020    return 0;
1021 }
1022
1023 struct nv_pass_dce {
1024    struct nv_pc *pc;
1025    uint removed;
1026 };
1027
1028 static int
1029 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1030 {
1031    int j;
1032    struct nv_instruction *nvi, *next;
1033
1034    for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1035       next = nvi->next;
1036
1037       if (inst_removable(nvi)) {
1038          nvc0_insn_delete(nvi);
1039          ++ctx->removed;
1040       }
1041    }
1042    DESCEND_ARBITRARY(j, nv_pass_dce);
1043
1044    return 0;
1045 }
1046
1047 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1048  * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1049  * BREAK and dummy ELSE block.
1050  */
1051 static INLINE boolean
1052 bb_is_if_else_endif(struct nv_basic_block *bb)
1053 {
1054    if (!bb->out[0] || !bb->out[1])
1055       return FALSE;
1056
1057    if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1058       return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1059               !bb->out[1]->out[1]);
1060    } else {
1061       return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1062               !bb->out[0]->out[1] &&
1063               !bb->out[1]->out[1]);
1064    }
1065 }
1066
1067 /* Predicate instructions and delete any branch at the end if it is
1068  * not a break from a loop.
1069  */
1070 static void
1071 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1072                        struct nv_value *pred, uint8_t cc)
1073 {
1074    struct nv_instruction *nvi, *prev;
1075    int s;
1076
1077    if (!b->entry)
1078       return;
1079    for (nvi = b->entry; nvi; nvi = nvi->next) {
1080       prev = nvi;
1081       if (inst_is_noop(nvi))
1082          continue;
1083       for (s = 0; nvi->src[s]; ++s);
1084       assert(s < 6);
1085       nvi->predicate = s;
1086       nvi->cc = cc;
1087       nv_reference(pc, nvi, nvi->predicate, pred);
1088    }
1089    if (prev->opcode == NV_OP_BRA &&
1090        b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1091        b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1092       nvc0_insn_delete(prev);
1093 }
1094
1095 static INLINE boolean
1096 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1097 {
1098    if (nvi->def[0] && values_equal(nvi->def[0], pred))
1099       return FALSE;
1100    return nvc0_insn_is_predicateable(nvi);
1101 }
1102
1103 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1104  * where feasible.
1105  */
1106 static int
1107 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1108 {
1109    struct nv_instruction *nvi;
1110    struct nv_value *pred;
1111    int k;
1112    int n0, n1; /* instruction counts of outgoing blocks */
1113
1114    if (bb_is_if_else_endif(b)) {
1115       assert(b->exit && b->exit->opcode == NV_OP_BRA);
1116
1117       assert(b->exit->predicate >= 0);
1118       pred = b->exit->src[b->exit->predicate]->value;
1119
1120       n1 = n0 = 0;
1121       for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1122          if (!may_predicate_insn(nvi, pred))
1123             break;
1124       if (!nvi) {
1125          /* we're after register allocation, so there always is an ELSE block */
1126          for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1127             if (!may_predicate_insn(nvi, pred))
1128                break;
1129       }
1130
1131       /* 12 is an arbitrary limit */
1132       if (!nvi && n0 < 12 && n1 < 12) {
1133          predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1134          predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1135
1136          nvc0_insn_delete(b->exit); /* delete the branch */
1137
1138          /* and a potential joinat before it */
1139          if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1140             nvc0_insn_delete(b->exit);
1141
1142          /* remove join operations at the end of the conditional */
1143          k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1144          if ((nvi = b->out[0]->out[k]->entry)) {
1145             nvi->join = 0;
1146             if (nvi->opcode == NV_OP_JOIN)
1147                nvc0_insn_delete(nvi);
1148          }
1149       }
1150    }
1151    DESCEND_ARBITRARY(k, nv_pass_flatten);
1152
1153    return 0;
1154 }
1155
1156 /* Tests instructions for equality, but independently of sources. */
1157 static boolean
1158 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1159 {
1160    if (a->opcode != b->opcode)
1161       return FALSE;
1162    if (nv_is_texture_op(a->opcode)) {
1163       if (a->ext.tex.t != b->ext.tex.t ||
1164           a->ext.tex.s != b->ext.tex.s)
1165          return FALSE;
1166       if (a->tex_dim != b->tex_dim ||
1167           a->tex_array != b->tex_array ||
1168           a->tex_cube != b->tex_cube ||
1169           a->tex_shadow != b->tex_shadow ||
1170           a->tex_live != b->tex_live)
1171          return FALSE;
1172    } else
1173    if (a->opcode == NV_OP_CVT) {
1174       if (a->ext.cvt.s != b->ext.cvt.s ||
1175           a->ext.cvt.d != b->ext.cvt.d)
1176          return FALSE;
1177    } else
1178    if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1179        NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1180       if (a->set_cond != b->set_cond)
1181          return FALSE;
1182    } else
1183    if (a->opcode == NV_OP_LINTERP ||
1184        a->opcode == NV_OP_PINTERP) {
1185       if (a->centroid != b->centroid ||
1186           a->flat != b->flat)
1187          return FALSE;
1188    }
1189    if (a->cc != b->cc)
1190       return FALSE;
1191    if (a->lanes != b->lanes ||
1192        a->patch != b->patch ||
1193        a->saturate != b->saturate)
1194       return FALSE;
1195    if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1196       return FALSE;
1197    return TRUE;
1198 }
1199
1200 /* local common subexpression elimination, stupid O(n^2) implementation */
1201 static int
1202 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1203 {
1204    struct nv_instruction *ir, *ik, *next;
1205    struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1206    int s, d;
1207    unsigned int reps;
1208
1209    do {
1210       reps = 0;
1211       for (ir = entry; ir; ir = next) {
1212          next = ir->next;
1213          if (ir->fixed)
1214             continue;
1215          for (ik = entry; ik != ir; ik = ik->next) {
1216             if (!is_operation_equal(ir, ik))
1217                continue;
1218             if (!ir->def[0] || !ik->def[0])
1219                continue;
1220
1221             if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1222                continue;
1223
1224             for (d = 0; d < 4; ++d) {
1225                if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1226                   break;
1227                if (ir->def[d]) {
1228                   if (!values_equal(ik->def[0], ir->def[0]))
1229                      break;
1230                } else {
1231                   d = 4;
1232                   break;
1233                }
1234             }
1235             if (d != 4)
1236                continue;
1237
1238             for (s = 0; s < 5; ++s) {
1239                struct nv_value *a, *b;
1240
1241                if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1242                   break;
1243                if (!ir->src[s]) {
1244                   s = 5;
1245                   break;
1246                }
1247
1248                if (ik->src[s]->mod != ir->src[s]->mod)
1249                   break;
1250                a = ik->src[s]->value;
1251                b = ir->src[s]->value;
1252                if (a == b)
1253                   continue;
1254                if (a->reg.file != b->reg.file ||
1255                    a->reg.id < 0 || /* this excludes memory loads/stores */
1256                    a->reg.id != b->reg.id)
1257                   break;
1258             }
1259             if (s == 5) {
1260                nvc0_insn_delete(ir);
1261                for (d = 0; d < 4 && ir->def[d]; ++d)
1262                   nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1263                ++reps;
1264                break;
1265             }
1266          }
1267       }
1268    } while(reps);
1269
1270    DESCEND_ARBITRARY(s, nv_pass_cse);
1271
1272    return 0;
1273 }
1274
1275 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1276  * neighbouring registers. CSE might have messed this up.
1277  * Just generate a MOV for each source to avoid conflicts if they're used in
1278  * multiple NV_OP_BIND at different positions.
1279  */
1280 static int
1281 nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
1282 {
1283    struct nv_value *val;
1284    struct nv_instruction *bnd, *nvi, *next;
1285    int s;
1286
1287    for (bnd = b->entry; bnd; bnd = next) {
1288       next = bnd->next;
1289       if (bnd->opcode != NV_OP_BIND)
1290          continue;
1291       for (s = 0; s < 4 && bnd->src[s]; ++s) {
1292          val = bnd->src[s]->value;
1293
1294          nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1295          nvi->def[0] = new_value_like(ctx->pc, val);
1296          nvi->def[0]->insn = nvi;
1297          nv_reference(ctx->pc, nvi, 0, val);
1298          nv_reference(ctx->pc, bnd, s, nvi->def[0]);
1299
1300          nvc0_insn_insert_before(bnd, nvi);
1301       }
1302    }
1303    DESCEND_ARBITRARY(s, nv_pass_fix_bind);
1304
1305    return 0;
1306 }
1307
1308 static int
1309 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1310 {
1311    struct pass_reld_elim *reldelim;
1312    struct nv_pass pass;
1313    struct nv_pass_dce dce;
1314    int ret;
1315
1316    pass.n = 0;
1317    pass.pc = pc;
1318
1319    /* Do CSE so we can just compare values by pointer in subsequent passes. */
1320    pc->pass_seq++;
1321    ret = nv_pass_cse(&pass, root);
1322    if (ret)
1323       return ret;
1324
1325    /* Do this first, so we don't have to pay attention
1326     * to whether sources are supported memory loads.
1327     */
1328    pc->pass_seq++;
1329    ret = nv_pass_algebraic_opt(&pass, root);
1330    if (ret)
1331       return ret;
1332
1333    pc->pass_seq++;
1334    ret = nv_pass_lower_mods(&pass, root);
1335    if (ret)
1336       return ret;
1337
1338    pc->pass_seq++;
1339    ret = nvc0_pass_fold_loads(&pass, root);
1340    if (ret)
1341       return ret;
1342
1343    if (pc->opt_reload_elim) {
1344       reldelim = CALLOC_STRUCT(pass_reld_elim);
1345       reldelim->pc = pc;
1346
1347       pc->pass_seq++;
1348       ret = nv_pass_reload_elim(reldelim, root);
1349       if (ret) {
1350          FREE(reldelim);
1351          return ret;
1352       }
1353       memset(reldelim, 0, sizeof(struct pass_reld_elim));
1354       reldelim->pc = pc;
1355    }
1356
1357    /* May run DCE before load-combining since that pass will clean up
1358     * after itself.
1359     */
1360    dce.pc = pc;
1361    do {
1362       dce.removed = 0;
1363       pc->pass_seq++;
1364       ret = nv_pass_dce(&dce, root);
1365       if (ret)
1366          return ret;
1367    } while (dce.removed);
1368
1369    if (pc->opt_reload_elim) {
1370       pc->pass_seq++;
1371       ret = nv_pass_mem_opt(reldelim, root);
1372       if (!ret) {
1373          memset(reldelim, 0, sizeof(struct pass_reld_elim));
1374          reldelim->pc = pc;
1375
1376          pc->pass_seq++;
1377          ret = nv_pass_mem_opt(reldelim, root);
1378       }
1379       FREE(reldelim);
1380       if (ret)
1381          return ret;
1382    }
1383
1384    ret = nv_pass_tex_mask(&pass, root);
1385    if (ret)
1386       return ret;
1387
1388    pc->pass_seq++;
1389    ret = nv_pass_fix_bind(&pass, root);
1390
1391    return ret;
1392 }
1393
1394 int
1395 nvc0_pc_exec_pass0(struct nv_pc *pc)
1396 {
1397    int i, ret;
1398
1399    for (i = 0; i < pc->num_subroutines + 1; ++i)
1400       if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1401          return ret;
1402    return 0;
1403 }