nvc0_insn_refcount(nvi)));
}
+/* Check if we do not actually have to emit this instruction. */
static INLINE boolean
inst_is_noop(struct nv_instruction *nvi)
{
return;
assert(src0 && src1 && src0->value && src1->value);
+ if (src1->value->reg.file != NV_FILE_GPR)
+ return;
+
if (is_cspace_load(src0->value->insn)) {
if (!is_cspace_load(src1->value->insn)) {
nvi->src[0] = src1;
}
}
- if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET)
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
}
static void
nv_reference(pc, nvi, s, nvi->src[t]->value);
nvi->src[s]->mod = nvi->src[t]->mod;
}
+ break;
case NV_OP_ADD_F32:
if (u.u32 == 0) {
switch (nvi->src[t]->mod) {
if (nvi->opcode != NV_OP_CVT)
nvi->src[0]->mod = 0;
}
+ break;
case NV_OP_ADD_B32:
if (u.u32 == 0) {
assert(nvi->src[t]->mod == 0);
* The two loads may not overlap but reference adjacent memory locations.
*/
static void
-combine_load(struct mem_record *rec, struct nv_instruction *ld)
+combine_load(struct nv_pc *pc, struct mem_record *rec,
+ struct nv_instruction *ld)
{
struct nv_instruction *fv = rec->insn;
struct nv_value *mem = ld->src[0]->value;
fv->def[d++]->insn = fv;
}
+ if (fv->src[0]->value->refc > 1)
+ nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
fv->src[0]->value->reg.address = rec->ofst;
fv->src[0]->value->reg.size = rec->size = size;
switch (ld->opcode) {
case NV_OP_EXPORT: combine_export(it, ld); break;
default:
- combine_load(it, ld);
+ combine_load(ctx->pc, it, ld);
break;
}
} else
return 0;
}
-#if 0
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
* BREAK and dummy ELSE block.
}
}
-/* predicate instructions and remove branch at the end */
+/* Predicate instructions and delete any branch at the end if it is
+ * not a break from a loop.
+ */
static void
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
- struct nv_value *p, ubyte cc)
+ struct nv_value *pred, uint8_t cc)
{
+ struct nv_instruction *nvi, *prev;
+ int s;
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ prev = nvi;
+ if (inst_is_noop(nvi))
+ continue;
+ for (s = 0; nvi->src[s]; ++s);
+ assert(s < 6);
+ nvi->predicate = s;
+ nvi->cc = cc;
+ nv_reference(pc, nvi, nvi->predicate, pred);
+ }
+ if (prev->opcode == NV_OP_BRA &&
+ b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
+ b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
+ nvc0_insn_delete(prev);
}
-#endif
-/* NOTE: Run this after register allocation, we can just cut out the cflow
- * instructions and hook the predicates to the conditional OPs if they are
- * not using immediates; better than inserting SELECT to join definitions.
- *
- * NOTE: Should adapt prior optimization to make this possible more often.
+static INLINE boolean
+may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
+{
+ if (nvi->def[0] && values_equal(nvi->def[0], pred))
+ return FALSE;
+ return nvc0_insn_is_predicateable(nvi);
+}
+
+/* Transform IF/ELSE/ENDIF constructs into predicated instructions
+ * where feasible.
*/
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int k;
+ int n0, n1; /* instruction counts of outgoing blocks */
+
+ if (bb_is_if_else_endif(b)) {
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+
+ assert(b->exit->predicate >= 0);
+ pred = b->exit->src[b->exit->predicate]->value;
+
+ n1 = n0 = 0;
+ for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ if (!nvi) {
+ /* we're after register allocation, so there always is an ELSE block */
+ for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ }
+
+ /* 12 is an arbitrary limit */
+ if (!nvi && n0 < 12 && n1 < 12) {
+ predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
+ predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
+
+ nvc0_insn_delete(b->exit); /* delete the branch */
+
+ /* and a potential joinat before it */
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nvc0_insn_delete(b->exit);
+
+ /* remove join operations at the end of the conditional */
+ k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+ if ((nvi = b->out[0]->out[k]->entry)) {
+ nvi->join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nvc0_insn_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(k, nv_pass_flatten);
+
return 0;
}
+/* Tests instructions for equality, but independently of sources. */
+static boolean
+is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ if (a->opcode != b->opcode)
+ return FALSE;
+ if (nv_is_texture_op(a->opcode)) {
+ if (a->ext.tex.t != b->ext.tex.t ||
+ a->ext.tex.s != b->ext.tex.s)
+ return FALSE;
+ if (a->tex_dim != b->tex_dim ||
+ a->tex_array != b->tex_array ||
+ a->tex_cube != b->tex_cube ||
+ a->tex_shadow != b->tex_shadow ||
+ a->tex_live != b->tex_live)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_CVT) {
+ if (a->ext.cvt.s != b->ext.cvt.s ||
+ a->ext.cvt.d != b->ext.cvt.d)
+ return FALSE;
+ } else
+ if (NV_BASEOP(a->opcode) == NV_OP_SET ||
+ NV_BASEOP(a->opcode) == NV_OP_SLCT) {
+ if (a->set_cond != b->set_cond)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_LINTERP ||
+ a->opcode == NV_OP_PINTERP) {
+ if (a->centroid != b->centroid ||
+ a->flat != b->flat)
+ return FALSE;
+ }
+ if (a->cc != b->cc)
+ return FALSE;
+ if (a->lanes != b->lanes ||
+ a->patch != b->patch ||
+ a->saturate != b->saturate)
+ return FALSE;
+ if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
+ return FALSE;
+ return TRUE;
+}
+
/* local common subexpression elimination, stupid O(n^2) implementation */
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
- int s;
+ int s, d;
unsigned int reps;
do {
reps = 0;
for (ir = entry; ir; ir = next) {
next = ir->next;
+ if (ir->fixed)
+ continue;
for (ik = entry; ik != ir; ik = ik->next) {
- if (ir->opcode != ik->opcode || ir->fixed)
+ if (!is_operation_equal(ir, ik))
continue;
-
- if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
+ if (!ir->def[0] || !ik->def[0])
continue;
if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
continue;
- if (!values_equal(ik->def[0], ir->def[0]))
+ for (d = 0; d < 4; ++d) {
+ if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
+ break;
+ if (ir->def[d]) {
+ if (!values_equal(ik->def[0], ir->def[0]))
+ break;
+ } else {
+ d = 4;
+ break;
+ }
+ }
+ if (d != 4)
continue;
- for (s = 0; s < 3; ++s) {
+ for (s = 0; s < 5; ++s) {
struct nv_value *a, *b;
- if (!ik->src[s]) {
- if (ir->src[s])
- break;
- continue;
+ if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
+ break;
+ if (!ir->src[s]) {
+ s = 5;
+ break;
}
+
if (ik->src[s]->mod != ir->src[s]->mod)
break;
a = ik->src[s]->value;
if (a == b)
continue;
if (a->reg.file != b->reg.file ||
- a->reg.id < 0 ||
+ a->reg.id < 0 || /* this excludes memory loads/stores */
a->reg.id != b->reg.id)
break;
}
- if (s == 3) {
+ if (s == 5) {
nvc0_insn_delete(ir);
+ for (d = 0; d < 4 && ir->def[d]; ++d)
+ nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
++reps;
- nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}