src/gallium/drivers/nv50/nv50_pc_optimize.c

   1 /*
   2  * Copyright 2010 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 /* #define NV50PC_DEBUG */
  24
  25 #include "nv50_pc.h"
  26
  27 #define DESCEND_ARBITRARY(j, f)                                 \
  28 do {                                                            \
  29    b->pass_seq = ctx->pc->pass_seq;                             \
  30                                                                 \
  31    for (j = 0; j < 2; ++j)                                      \
  32       if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
  33          f(ctx, b->out[j]);                                       \
  34 } while (0)
  35
  36 extern unsigned nv50_inst_min_size(struct nv_instruction *);
  37
  38 struct nv_pc_pass {
  39    struct nv_pc *pc;
  40 };
  41
  42 static INLINE boolean
  43 values_equal(struct nv_value *a, struct nv_value *b)
  44 {
  45    /* XXX: sizes */
  46    return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
  47 }
  48
  49 static INLINE boolean
  50 inst_commutation_check(struct nv_instruction *a,
  51                        struct nv_instruction *b)
  52 {
  53    int si, di;
  54
  55    for (di = 0; di < 4; ++di) {
  56       if (!a->def[di])
  57          break;
  58       for (si = 0; si < 5; ++si) {
  59          if (!b->src[si])
  60             continue;
  61          if (values_equal(a->def[di], b->src[si]->value))
  62             return FALSE;
  63       }
  64    }
  65
  66    if (b->flags_src && b->flags_src->value == a->flags_def)
  67       return FALSE;
  68
  69    return TRUE;
  70 }
  71
  72 /* Check whether we can swap the order of the instructions,
  73  * where a & b may be either the earlier or the later one.
  74  */
  75 static boolean
  76 inst_commutation_legal(struct nv_instruction *a,
  77                        struct nv_instruction *b)
  78 {
  79    return inst_commutation_check(a, b) && inst_commutation_check(b, a);
  80 }
  81
  82 static INLINE boolean
  83 inst_cullable(struct nv_instruction *nvi)
  84 {
  85    if (nvi->opcode == NV_OP_STA)
  86       return FALSE;
  87    return (!(nvi->is_terminator || nvi->is_join ||
  88              nvi->target ||
  89              nvi->fixed ||
  90              nv_nvi_refcount(nvi)));
  91 }
  92
  93 static INLINE boolean
  94 nvi_isnop(struct nv_instruction *nvi)
  95 {
  96    if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
  97       return TRUE;
  98
  99    /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
 100     *  but we can still remove it if it is a no-op move.
 101     */
 102    if (/* nvi->fixed || */
 103        /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
 104        nvi->flags_def ||
 105        nvi->is_terminator ||
 106        nvi->is_join)
 107       return FALSE;
 108
 109    if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
 110       return TRUE;
 111
 112    if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
 113       return FALSE;
 114
 115    if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
 116       return FALSE;
 117
 118    if (nvi->src[0]->value->join->reg.id < 0) {
 119       NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
 120       return TRUE;
 121    }
 122
 123    if (nvi->opcode == NV_OP_SELECT)
 124       if (!values_equal(nvi->def[0], nvi->src[1]->value))
 125          return FALSE;
 126
 127    return values_equal(nvi->def[0], nvi->src[0]->value);
 128 }
 129
 130 struct nv_pass {
 131    struct nv_pc *pc;
 132    int n;
 133    void *priv;
 134 };
 135
 136 static int
 137 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
 138
 139 static void
 140 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
 141 {
 142    struct nv_pc *pc = (struct nv_pc *)priv;
 143    struct nv_basic_block *in;
 144    struct nv_instruction *nvi, *next;
 145    int j;
 146    uint size, n32 = 0;
 147
 148    for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
 149    if (j >= 0) {
 150       in = pc->bb_list[j];
 151
 152       /* check for no-op branches (BRA $PC+8) */
 153       if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
 154          in->bin_size -= 8;
 155          pc->bin_size -= 8;
 156
 157          for (++j; j < pc->num_blocks; ++j)
 158             pc->bb_list[j]->bin_pos -= 8;
 159
 160          nv_nvi_delete(in->exit);
 161       }
 162       b->bin_pos = in->bin_pos + in->bin_size;
 163    }
 164
 165    pc->bb_list[pc->num_blocks++] = b;
 166
 167    /* visit node */
 168
 169    for (nvi = b->entry; nvi; nvi = next) {
 170       next = nvi->next;
 171       if (nvi_isnop(nvi))
 172          nv_nvi_delete(nvi);
 173    }
 174
 175    for (nvi = b->entry; nvi; nvi = next) {
 176       next = nvi->next;
 177
 178       size = nv50_inst_min_size(nvi);
 179       if (nvi->next && size < 8)
 180          ++n32;
 181       else
 182       if ((n32 & 1) && nvi->next &&
 183           nv50_inst_min_size(nvi->next) == 4 &&
 184           inst_commutation_legal(nvi, nvi->next)) {
 185          ++n32;
 186          nv_nvi_permute(nvi, nvi->next);
 187          next = nvi;
 188       } else {
 189          nvi->is_long = 1;
 190
 191          b->bin_size += n32 & 1;
 192          if (n32 & 1)
 193             nvi->prev->is_long = 1;
 194          n32 = 0;
 195       }
 196       b->bin_size += 1 + nvi->is_long;
 197    }
 198
 199    if (!b->entry) {
 200       NV50_DBGMSG("block %p is now empty\n", b);
 201    } else
 202    if (!b->exit->is_long) {
 203       assert(n32);
 204       b->exit->is_long = 1;
 205       b->bin_size += 1;
 206
 207       /* might have del'd a hole tail of instructions */
 208       if (!b->exit->prev->is_long && !(n32 & 1)) {
 209          b->bin_size += 1;
 210          b->exit->prev->is_long = 1;
 211       }
 212    }
 213    assert(!b->entry || (b->exit && b->exit->is_long));
 214
 215    pc->bin_size += b->bin_size *= 4;
 216 }
 217
 218 static int
 219 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
 220 {
 221    struct nv_pass pass;
 222
 223    pass.pc = pc;
 224
 225    pc->pass_seq++;
 226
 227    nv_pass_flatten(&pass, root);
 228
 229    nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
 230
 231    return 0;
 232 }
 233
 234 int
 235 nv_pc_exec_pass2(struct nv_pc *pc)
 236 {
 237    int i, ret;
 238
 239    NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
 240
 241    pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0]));
 242
 243    pc->num_blocks = 0;
 244
 245    for (i = 0; i < pc->num_subroutines + 1; ++i)
 246       if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
 247          return ret;
 248    return 0;
 249 }
 250
 251 static INLINE boolean
 252 is_cmem_load(struct nv_instruction *nvi)
 253 {
 254    return (nvi->opcode == NV_OP_LDA &&
 255            nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
 256            nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
 257 }
 258
 259 static INLINE boolean
 260 is_smem_load(struct nv_instruction *nvi)
 261 {
 262    return (nvi->opcode == NV_OP_LDA &&
 263            (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
 264             nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
 265 }
 266
 267 static INLINE boolean
 268 is_immd_move(struct nv_instruction *nvi)
 269 {
 270    return (nvi->opcode == NV_OP_MOV &&
 271            nvi->src[0]->value->reg.file == NV_FILE_IMM);
 272 }
 273
 274 static INLINE void
 275 check_swap_src_0_1(struct nv_instruction *nvi)
 276 {
 277    static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 278
 279    struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
 280
 281    if (!nv_op_commutative(nvi->opcode))
 282       return;
 283    assert(src0 && src1);
 284
 285    if (src1->value->reg.file == NV_FILE_IMM)
 286       return;
 287
 288    if (is_cmem_load(src0->value->insn)) {
 289       if (!is_cmem_load(src1->value->insn)) {
 290          nvi->src[0] = src1;
 291          nvi->src[1] = src0;
 292          /* debug_printf("swapping cmem load to 1\n"); */
 293       }
 294    } else
 295    if (is_smem_load(src1->value->insn)) {
 296       if (!is_smem_load(src0->value->insn)) {
 297          nvi->src[0] = src1;
 298          nvi->src[1] = src0;
 299          /* debug_printf("swapping smem load to 0\n"); */
 300       }
 301    }
 302
 303    if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
 304       nvi->set_cond = cc_swapped[nvi->set_cond];
 305 }
 306
 307 static int
 308 nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
 309 {
 310    struct nv_instruction *nvi, *sti, *next;
 311    int j;
 312
 313    for (sti = b->entry; sti; sti = next) {
 314       next = sti->next;
 315
 316       /* only handling MOV to $oX here */
 317       if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
 318          continue;
 319       if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
 320          continue;
 321
 322       nvi = sti->src[0]->value->insn;
 323       if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
 324          continue;
 325       assert(nvi->def[0] == sti->src[0]->value);
 326
 327       if (nvi->def[0]->refc > 1)
 328          continue;
 329
 330       /* cannot write to $oX when using immediate */
 331       for (j = 0; j < 4 && nvi->src[j]; ++j)
 332          if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
 333             break;
 334       if (j < 4 && nvi->src[j])
 335          continue;
 336
 337       nvi->def[0] = sti->def[0];
 338       nvi->fixed = sti->fixed;
 339
 340       nv_nvi_delete(sti);
 341    }
 342    DESCEND_ARBITRARY(j, nv_pass_fold_stores);
 343
 344    return 0;
 345 }
 346
 347 static int
 348 nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
 349 {
 350    struct nv_instruction *nvi, *ld;
 351    int j;
 352
 353    for (nvi = b->entry; nvi; nvi = nvi->next) {
 354       check_swap_src_0_1(nvi);
 355
 356       for (j = 0; j < 3; ++j) {
 357          if (!nvi->src[j])
 358             break;
 359          ld = nvi->src[j]->value->insn;
 360          if (!ld)
 361             continue;
 362
 363          if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
 364             nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
 365             continue;
 366          }
 367
 368          if (ld->opcode != NV_OP_LDA)
 369             continue;
 370          if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
 371             continue;
 372
 373          if (j == 0 && ld->src[4]) /* can't load shared mem */
 374             continue;
 375
 376          /* fold it ! */ /* XXX: ref->insn */
 377          nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
 378          if (ld->src[4])
 379             nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
 380
 381          if (!nv_nvi_refcount(ld))
 382             nv_nvi_delete(ld);
 383       }
 384    }
 385    DESCEND_ARBITRARY(j, nv_pass_fold_loads);
 386
 387    return 0;
 388 }
 389
 390 static int
 391 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
 392 {
 393    int j;
 394    struct nv_instruction *nvi, *mi, *next;
 395    ubyte mod;
 396
 397    for (nvi = b->entry; nvi; nvi = next) {
 398       next = nvi->next;
 399       if (nvi->opcode == NV_OP_SUB) {
 400          nvi->opcode = NV_OP_ADD;
 401          nvi->src[1]->mod ^= NV_MOD_NEG;
 402       }
 403
 404       /* should not put any modifiers on NEG and ABS */
 405       assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
 406       assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
 407
 408       for (j = 0; j < 4; ++j) {
 409          if (!nvi->src[j])
 410             break;
 411
 412          mi = nvi->src[j]->value->insn;
 413          if (!mi)
 414             continue;
 415          if (mi->def[0]->refc > 1)
 416             continue;
 417
 418          if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
 419          else
 420          if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
 421          else
 422             continue;
 423
 424          if (nvi->opcode == NV_OP_ABS)
 425             mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
 426          else
 427          if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
 428             nvi->opcode = NV_OP_MOV;
 429             mod = 0;
 430          }
 431
 432          if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
 433             continue;
 434
 435          nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
 436
 437          nvi->src[j]->mod ^= mod;
 438       }
 439
 440       if (nvi->opcode == NV_OP_SAT) {
 441          mi = nvi->src[0]->value->insn;
 442
 443          if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
 444             mi->saturate = 1;
 445             mi->def[0] = nvi->def[0];
 446             nv_nvi_delete(nvi);
 447          }
 448       }
 449    }
 450    DESCEND_ARBITRARY(j, nv_pass_lower_mods);
 451
 452    return 0;
 453 }
 454
 455 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
 456
 457 static void
 458 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
 459 {
 460    if (mod & NV_MOD_ABS) {
 461       if (type == NV_TYPE_F32)
 462          *val &= 0x7fffffff;
 463       else
 464       if ((*val) & (1 << 31))
 465          *val = ~(*val) + 1;
 466    }
 467    if (mod & NV_MOD_NEG) {
 468       if (type == NV_TYPE_F32)
 469          *val ^= 0x80000000;
 470       else
 471          *val = ~(*val) + 1;
 472    }
 473 }
 474
 475 static INLINE uint
 476 modifiers_opcode(ubyte mod)
 477 {
 478    switch (mod) {
 479    case NV_MOD_NEG: return NV_OP_NEG;
 480    case NV_MOD_ABS: return NV_OP_ABS;
 481    case 0:
 482       return NV_OP_MOV;
 483    default:
 484       return NV_OP_NOP;
 485    }
 486 }
 487
 488 static void
 489 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
 490                     struct nv_value *src0, struct nv_value *src1)
 491 {
 492    struct nv_value *val;
 493    union {
 494       float f32;
 495       uint32_t u32;
 496       int32_t s32;
 497    } u0, u1, u;
 498    ubyte type;
 499
 500    if (!nvi->def[0])
 501       return;
 502    type = nvi->def[0]->reg.type;
 503
 504    u.u32 = 0;
 505    u0.u32 = src0->reg.imm.u32;
 506    u1.u32 = src1->reg.imm.u32;
 507
 508    modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
 509    modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
 510
 511    switch (nvi->opcode) {
 512    case NV_OP_MAD:
 513       if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
 514          return;
 515       /* fall through */
 516    case NV_OP_MUL:
 517       switch (type) {
 518       case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
 519       case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
 520       case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
 521       default:
 522          assert(0);
 523          break;
 524       }
 525       break;
 526    case NV_OP_ADD:
 527       switch (type) {
 528       case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
 529       case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
 530       case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
 531       default:
 532          assert(0);
 533          break;
 534       }
 535       break;
 536    case NV_OP_SUB:
 537       switch (type) {
 538       case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
 539       case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
 540       case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
 541       default:
 542          assert(0);
 543          break;
 544       }
 545       break;
 546    default:
 547       return;
 548    }
 549
 550    nvi->opcode = NV_OP_MOV;
 551
 552    val = new_value(pc, NV_FILE_IMM, type);
 553
 554    val->reg.imm.u32 = u.u32;
 555
 556    nv_reference(pc, &nvi->src[1], NULL);
 557    nv_reference(pc, &nvi->src[0], val);
 558
 559    if (nvi->src[2]) { /* from MAD */
 560       nvi->src[1] = nvi->src[0];
 561       nvi->src[0] = nvi->src[2];
 562       nvi->src[2] = NULL;
 563       nvi->opcode = NV_OP_ADD;
 564    }
 565 }
 566
 567 static void
 568 constant_operand(struct nv_pc *pc,
 569                  struct nv_instruction *nvi, struct nv_value *val, int s)
 570 {
 571    union {
 572       float f32;
 573       uint32_t u32;
 574       int32_t s32;
 575    } u;
 576    int t = s ? 0 : 1;
 577    uint op;
 578    ubyte type;
 579
 580    if (!nvi->def[0])
 581       return;
 582    type = nvi->def[0]->reg.type;
 583
 584    u.u32 = val->reg.imm.u32;
 585    modifiers_apply(&u.u32, type, nvi->src[s]->mod);
 586
 587    switch (nvi->opcode) {
 588    case NV_OP_MUL:
 589       if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
 590           (NV_TYPE_ISINT(type) && u.u32 == 1)) {
 591          if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
 592             break;
 593          nvi->opcode = op;
 594          nv_reference(pc, &nvi->src[s], NULL);
 595          nvi->src[0] = nvi->src[t];
 596          nvi->src[1] = NULL;
 597       } else
 598       if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
 599           (NV_TYPE_ISINT(type) && u.u32 == 2)) {
 600          nvi->opcode = NV_OP_ADD;
 601          nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
 602          nvi->src[s]->mod = nvi->src[t]->mod;
 603       } else
 604       if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
 605          if (nvi->src[t]->mod & NV_MOD_NEG)
 606             nvi->opcode = NV_OP_MOV;
 607          else
 608             nvi->opcode = NV_OP_NEG;
 609          nv_reference(pc, &nvi->src[s], NULL);
 610          nvi->src[0] = nvi->src[t];
 611          nvi->src[1] = NULL;
 612       } else
 613       if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
 614          nvi->opcode = NV_OP_ADD;
 615          nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
 616          nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
 617       } else
 618       if (u.u32 == 0) {
 619          nvi->opcode = NV_OP_MOV;
 620          nv_reference(pc, &nvi->src[t], NULL);
 621          if (s) {
 622             nvi->src[0] = nvi->src[1];
 623             nvi->src[1] = NULL;
 624          }
 625       }
 626       break;
 627    case NV_OP_ADD:
 628       if (u.u32 == 0) {
 629          if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
 630             break;
 631          nvi->opcode = op;
 632          nv_reference(pc, &nvi->src[s], NULL);
 633          nvi->src[0] = nvi->src[t];
 634          nvi->src[1] = NULL;
 635       }
 636       break;
 637    case NV_OP_RCP:
 638       u.f32 = 1.0f / u.f32;
 639       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 640       nvi->opcode = NV_OP_MOV;
 641       assert(s == 0);
 642       nv_reference(pc, &nvi->src[0], val);
 643       break;
 644    case NV_OP_RSQ:
 645       u.f32 = 1.0f / sqrtf(u.f32);
 646       (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
 647       nvi->opcode = NV_OP_MOV;
 648       assert(s == 0);
 649       nv_reference(pc, &nvi->src[0], val);
 650       break;
 651    default:
 652       break;
 653    }
 654
 655    if (nvi->opcode == NV_OP_MOV && nvi->flags_def) {
 656       struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT);
 657
 658       nv_reference(pc, &cvt->src[0], nvi->def[0]);
 659
 660       cvt->flags_def = nvi->flags_def;
 661       nvi->flags_def = NULL;
 662    }
 663 }
 664
 665 static int
 666 nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
 667 {
 668    struct nv_instruction *nvi, *next;
 669    int j;
 670
 671    for (nvi = b->entry; nvi; nvi = next) {
 672       struct nv_value *src0, *src1, *src;
 673       int mod;
 674
 675       next = nvi->next;
 676
 677       src0 = nvcg_find_immediate(nvi->src[0]);
 678       src1 = nvcg_find_immediate(nvi->src[1]);
 679
 680       if (src0 && src1)
 681          constant_expression(ctx->pc, nvi, src0, src1);
 682       else {
 683          if (src0)
 684             constant_operand(ctx->pc, nvi, src0, 0);
 685          else
 686          if (src1)
 687             constant_operand(ctx->pc, nvi, src1, 1);
 688       }
 689
 690       /* try to combine MUL, ADD into MAD */
 691       if (nvi->opcode != NV_OP_ADD)
 692          continue;
 693
 694       src0 = nvi->src[0]->value;
 695       src1 = nvi->src[1]->value;
 696
 697       if (SRC_IS_MUL(src0) && src0->refc == 1)
 698          src = src0;
 699       else
 700       if (SRC_IS_MUL(src1) && src1->refc == 1)
 701          src = src1;
 702       else
 703          continue;
 704
 705       nvi->opcode = NV_OP_MAD;
 706       mod = nvi->src[(src == src0) ? 0 : 1]->mod;
 707       nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
 708       nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
 709
 710       assert(!(mod & ~NV_MOD_NEG));
 711       nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
 712       nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
 713       nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
 714       nvi->src[1]->mod = src->insn->src[1]->mod;
 715    }
 716    DESCEND_ARBITRARY(j, nv_pass_lower_arith);
 717
 718    return 0;
 719 }
 720
 721 /* TODO: redundant store elimination */
 722
 723 struct load_record {
 724    struct load_record *next;
 725    uint64_t data;
 726    struct nv_value *value;
 727 };
 728
 729 #define LOAD_RECORD_POOL_SIZE 1024
 730
 731 struct nv_pass_reld_elim {
 732    struct nv_pc *pc;
 733
 734    struct load_record *imm;
 735    struct load_record *mem_s;
 736    struct load_record *mem_v;
 737    struct load_record *mem_c[16];
 738    struct load_record *mem_l;
 739
 740    struct load_record pool[LOAD_RECORD_POOL_SIZE];
 741    int alloc;
 742 };
 743
 744 /* TODO: properly handle loads from l[] memory in the presence of stores */
 745 static int
 746 nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
 747 {
 748    struct load_record **rec, *it;
 749    struct nv_instruction *ld, *next;
 750    uint64_t data;
 751    struct nv_value *val;
 752    int j;
 753
 754    for (ld = b->entry; ld; ld = next) {
 755       next = ld->next;
 756       if (!ld->src[0])
 757          continue;
 758       val = ld->src[0]->value;
 759       rec = NULL;
 760
 761       if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
 762          data = val->reg.id;
 763          rec = &ctx->mem_v;
 764       } else
 765       if (ld->opcode == NV_OP_LDA) {
 766          data = val->reg.id;
 767          if (val->reg.file >= NV_FILE_MEM_C(0) &&
 768              val->reg.file <= NV_FILE_MEM_C(15))
 769             rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
 770          else
 771          if (val->reg.file == NV_FILE_MEM_S)
 772             rec = &ctx->mem_s;
 773          else
 774          if (val->reg.file == NV_FILE_MEM_L)
 775             rec = &ctx->mem_l;
 776       } else
 777       if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
 778          data = val->reg.imm.u32;
 779          rec = &ctx->imm;
 780       }
 781
 782       if (!rec || !ld->def[0]->refc)
 783          continue;
 784
 785       for (it = *rec; it; it = it->next)
 786          if (it->data == data)
 787             break;
 788
 789       if (it) {
 790          if (ld->def[0]->reg.id >= 0)
 791             it->value = ld->def[0];
 792          else
 793          if (!ld->fixed)
 794             nvcg_replace_value(ctx->pc, ld->def[0], it->value);
 795       } else {
 796          if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
 797             continue;
 798          it = &ctx->pool[ctx->alloc++];
 799          it->next = *rec;
 800          it->data = data;
 801          it->value = ld->def[0];
 802          *rec = it;
 803       }
 804    }
 805
 806    ctx->imm = NULL;
 807    ctx->mem_s = NULL;
 808    ctx->mem_v = NULL;
 809    for (j = 0; j < 16; ++j)
 810       ctx->mem_c[j] = NULL;
 811    ctx->mem_l = NULL;
 812    ctx->alloc = 0;
 813
 814    DESCEND_ARBITRARY(j, nv_pass_reload_elim);
 815
 816    return 0;
 817 }
 818
 819 static int
 820 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
 821 {
 822    int i, c, j;
 823
 824    for (i = 0; i < ctx->pc->num_instructions; ++i) {
 825       struct nv_instruction *nvi = &ctx->pc->instructions[i];
 826       struct nv_value *def[4];
 827
 828       if (!nv_is_vector_op(nvi->opcode))
 829          continue;
 830       nvi->tex_mask = 0;
 831
 832       for (c = 0; c < 4; ++c) {
 833          if (nvi->def[c]->refc)
 834             nvi->tex_mask |= 1 << c;
 835          def[c] = nvi->def[c];
 836       }
 837
 838       j = 0;
 839       for (c = 0; c < 4; ++c)
 840          if (nvi->tex_mask & (1 << c))
 841             nvi->def[j++] = def[c];
 842       for (c = 0; c < 4; ++c)
 843          if (!(nvi->tex_mask & (1 << c)))
 844            nvi->def[j++] = def[c];
 845       assert(j == 4);
 846    }
 847    return 0;
 848 }
 849
 850 struct nv_pass_dce {
 851    struct nv_pc *pc;
 852    uint removed;
 853 };
 854
 855 static int
 856 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
 857 {
 858    int j;
 859    struct nv_instruction *nvi, *next;
 860
 861    for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
 862       next = nvi->next;
 863
 864       if (inst_cullable(nvi)) {
 865          nv_nvi_delete(nvi);
 866
 867          ++ctx->removed;
 868       }
 869    }
 870    DESCEND_ARBITRARY(j, nv_pass_dce);
 871
 872    return 0;
 873 }
 874
 875 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
 876  * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
 877  * BREAK and dummy ELSE block.
 878  */
 879 static INLINE boolean
 880 bb_is_if_else_endif(struct nv_basic_block *bb)
 881 {
 882    if (!bb->out[0] || !bb->out[1])
 883       return FALSE;
 884
 885    if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
 886       return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
 887               !bb->out[1]->out[1]);
 888    } else {
 889       return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
 890               !bb->out[0]->out[1] &&
 891               !bb->out[1]->out[1]);
 892    }
 893 }
 894
 895 /* predicate instructions and remove branch at the end */
 896 static void
 897 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
 898                        struct nv_value *p, ubyte cc)
 899 {
 900    struct nv_instruction *nvi;
 901
 902    if (!b->entry)
 903       return;
 904    for (nvi = b->entry; nvi->next; nvi = nvi->next) {
 905       if (!nvi_isnop(nvi)) {
 906          nvi->cc = cc;
 907          nv_reference(pc, &nvi->flags_src, p);
 908       }
 909    }
 910
 911    if (nvi->opcode == NV_OP_BRA)
 912       nv_nvi_delete(nvi);
 913    else
 914    if (!nvi_isnop(nvi)) {
 915       nvi->cc = cc;
 916       nv_reference(pc, &nvi->flags_src, p);
 917    }
 918 }
 919
 920 /* NOTE: Run this after register allocation, we can just cut out the cflow
 921  * instructions and hook the predicates to the conditional OPs if they are
 922  * not using immediates; better than inserting SELECT to join definitions.
 923  *
 924  * NOTE: Should adapt prior optimization to make this possible more often.
 925  */
 926 static int
 927 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
 928 {
 929    struct nv_instruction *nvi;
 930    struct nv_value *pred;
 931    int i;
 932    int n0 = 0, n1 = 0;
 933
 934    if (bb_is_if_else_endif(b)) {
 935
 936       NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
 937
 938       for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
 939          if (!nv50_nvi_can_predicate(nvi))
 940             break;
 941       if (!nvi) {
 942          for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
 943             if (!nv50_nvi_can_predicate(nvi))
 944                break;
 945 #ifdef NV50_PC_DEBUG
 946          if (nvi) {
 947             debug_printf("cannot predicate: "); nv_print_instruction(nvi);
 948          }
 949       } else {
 950          debug_printf("cannot predicate: "); nv_print_instruction(nvi);
 951 #endif
 952       }
 953
 954       if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
 955          assert(b->exit && b->exit->flags_src);
 956          pred = b->exit->flags_src->value;
 957
 958          predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
 959          predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
 960
 961          assert(b->exit && b->exit->opcode == NV_OP_BRA);
 962          nv_nvi_delete(b->exit);
 963
 964          if (b->exit && b->exit->opcode == NV_OP_JOINAT)
 965             nv_nvi_delete(b->exit);
 966
 967          i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
 968
 969          if ((nvi = b->out[0]->out[i]->entry)) {
 970             nvi->is_join = 0;
 971             if (nvi->opcode == NV_OP_JOIN)
 972                nv_nvi_delete(nvi);
 973          }
 974       }
 975    }
 976    DESCEND_ARBITRARY(i, nv_pass_flatten);
 977
 978    return 0;
 979 }
 980
 981 /* local common subexpression elimination, stupid O(n^2) implementation */
 982 static int
 983 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
 984 {
 985    struct nv_instruction *ir, *ik, *next;
 986    struct nv_instruction *entry = b->phi ? b->phi : b->entry;
 987    int s;
 988    unsigned int reps;
 989
 990    do {
 991       reps = 0;
 992       for (ir = entry; ir; ir = next) {
 993          next = ir->next;
 994          for (ik = entry; ik != ir; ik = ik->next) {
 995             if (ir->opcode != ik->opcode || ir->fixed)
 996                continue;
 997
 998             if (!ir->def[0] || !ik->def[0] ||
 999                 ik->opcode == NV_OP_LDA ||
1000                 ik->opcode == NV_OP_STA ||
1001                 ik->opcode == NV_OP_MOV ||
1002                 nv_is_vector_op(ik->opcode))
1003                continue; /* ignore loads, stores & moves */
1004
1005             if (ik->src[4] || ir->src[4])
1006                continue; /* don't mess with address registers */
1007
1008             if (ik->flags_src || ir->flags_src ||
1009                 ik->flags_def || ir->flags_def)
1010                continue; /* and also not with flags, for now */
1011
1012             if (ik->def[0]->reg.file == NV_FILE_OUT ||
1013                 ir->def[0]->reg.file == NV_FILE_OUT ||
1014                 !values_equal(ik->def[0], ir->def[0]))
1015                continue;
1016
1017             for (s = 0; s < 3; ++s) {
1018                struct nv_value *a, *b;
1019
1020                if (!ik->src[s]) {
1021                   if (ir->src[s])
1022                      break;
1023                   continue;
1024                }
1025                if (ik->src[s]->mod != ir->src[s]->mod)
1026                   break;
1027                a = ik->src[s]->value;
1028                b = ir->src[s]->value;
1029                if (a == b)
1030                   continue;
1031                if (a->reg.file != b->reg.file ||
1032                    a->reg.id < 0 ||
1033                    a->reg.id != b->reg.id)
1034                   break;
1035             }
1036             if (s == 3) {
1037                nv_nvi_delete(ir);
1038                ++reps;
1039                nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
1040                break;
1041             }
1042          }
1043       }
1044    } while(reps);
1045
1046    DESCEND_ARBITRARY(s, nv_pass_cse);
1047
1048    return 0;
1049 }
1050
1051 static int
1052 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1053 {
1054    struct nv_pass_reld_elim *reldelim;
1055    struct nv_pass pass;
1056    struct nv_pass_dce dce;
1057    int ret;
1058
1059    pass.n = 0;
1060    pass.pc = pc;
1061
1062    /* Do this first, so we don't have to pay attention
1063     * to whether sources are supported memory loads.
1064     */
1065    pc->pass_seq++;
1066    ret = nv_pass_lower_arith(&pass, root);
1067    if (ret)
1068       return ret;
1069
1070    pc->pass_seq++;
1071    ret = nv_pass_fold_loads(&pass, root);
1072    if (ret)
1073       return ret;
1074
1075    pc->pass_seq++;
1076    ret = nv_pass_fold_stores(&pass, root);
1077    if (ret)
1078       return ret;
1079
1080    if (pc->opt_reload_elim) {
1081       reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
1082       reldelim->pc = pc;
1083       pc->pass_seq++;
1084       ret = nv_pass_reload_elim(reldelim, root);
1085       FREE(reldelim);
1086       if (ret)
1087          return ret;
1088    }
1089
1090    pc->pass_seq++;
1091    ret = nv_pass_cse(&pass, root);
1092    if (ret)
1093       return ret;
1094
1095    pc->pass_seq++;
1096    ret = nv_pass_lower_mods(&pass, root);
1097    if (ret)
1098       return ret;
1099
1100    dce.pc = pc;
1101    do {
1102       dce.removed = 0;
1103       pc->pass_seq++;
1104       ret = nv_pass_dce(&dce, root);
1105       if (ret)
1106          return ret;
1107    } while (dce.removed);
1108
1109    ret = nv_pass_tex_mask(&pass, root);
1110    if (ret)
1111       return ret;
1112
1113    return ret;
1114 }
1115
1116 int
1117 nv_pc_exec_pass0(struct nv_pc *pc)
1118 {
1119    int i, ret;
1120
1121    for (i = 0; i < pc->num_subroutines + 1; ++i)
1122       if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1123          return ret;
1124    return 0;
1125 }