nvc0_insn_refcount(nvi)));
}
+/* Check if we do not actually have to emit this instruction. */
static INLINE boolean
inst_is_noop(struct nv_instruction *nvi)
{
return 0;
}
-#if 0
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
* BREAK and dummy ELSE block.
}
}
-/* predicate instructions and remove branch at the end */
+/* Predicate instructions and delete any branch at the end if it is
+ * not a break from a loop.
+ */
static void
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
- struct nv_value *p, ubyte cc)
+ struct nv_value *pred, uint8_t cc)
{
+ struct nv_instruction *nvi, *prev;
+ int s;
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ prev = nvi;
+ if (inst_is_noop(nvi))
+ continue;
+ for (s = 0; nvi->src[s]; ++s);
+ assert(s < 6);
+ nvi->predicate = s;
+ nvi->cc = cc;
+ nv_reference(pc, nvi, nvi->predicate, pred);
+ }
+ if (prev->opcode == NV_OP_BRA &&
+ b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
+ b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
+ nvc0_insn_delete(prev);
}
-#endif
-/* NOTE: Run this after register allocation, we can just cut out the cflow
- * instructions and hook the predicates to the conditional OPs if they are
- * not using immediates; better than inserting SELECT to join definitions.
- *
- * NOTE: Should adapt prior optimization to make this possible more often.
+static INLINE boolean
+may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
+{
+ if (nvi->def[0] && values_equal(nvi->def[0], pred))
+ return FALSE;
+ return nvc0_insn_is_predicateable(nvi);
+}
+
+/* Transform IF/ELSE/ENDIF constructs into predicated instructions
+ * where feasible.
*/
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int k;
+ int n0, n1; /* instruction counts of outgoing blocks */
+
+ if (bb_is_if_else_endif(b)) {
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+
+ assert(b->exit->predicate >= 0);
+ pred = b->exit->src[b->exit->predicate]->value;
+
+ n1 = n0 = 0;
+ for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ if (!nvi) {
+ /* we're after register allocation, so there always is an ELSE block */
+ for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ }
+
+ /* 12 is an arbitrary limit */
+ if (!nvi && n0 < 12 && n1 < 12) {
+ predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
+ predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
+
+ nvc0_insn_delete(b->exit); /* delete the branch */
+
+ /* and a potential joinat before it */
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nvc0_insn_delete(b->exit);
+
+ /* remove join operations at the end of the conditional */
+ k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+ if ((nvi = b->out[0]->out[k]->entry)) {
+ nvi->join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nvc0_insn_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(k, nv_pass_flatten);
+
return 0;
}
PRINT("%s", gree);
if (NV_BASEOP(i->opcode) == NV_OP_SET)
- PRINT("set %s", nv_cond_name(i->set_cond));
+ PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));
else
if (i->saturate)
PRINT("sat %s", nvc0_opcode_name(i->opcode));
{ NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
{ NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
{ NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
- { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
- { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+ { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
static void
bld_flow(struct bld_context *bld, uint opcode,
- struct nv_value *src, struct nv_basic_block *target,
+ struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
boolean reconverge)
{
struct nv_instruction *nvi;
nvi = new_instruction(bld->pc, opcode);
nvi->target = target;
nvi->terminator = 1;
- if (src)
- bld_src_predicate(bld, nvi, 0, src);
+ if (pred) {
+ nvi->cc = cc;
+ bld_src_predicate(bld, nvi, 0, pred);
+ }
}
static ubyte
case TGSI_OPCODE_IF:
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
- src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
- emit_fetch(bld, insn, 0, 0), bld->zero);
+ if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
+ pred = bld_clone(bld, pred->insn);
+ pred->reg.size = 1;
+ pred->reg.file = NV_FILE_PRED;
+ if (pred->insn->opcode == NV_OP_FSET_F32)
+ pred->insn->opcode = NV_OP_SET_F32;
+ } else {
+ pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
+ pred, bld->zero);
+ }
+ assert(!mask);
- bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
+ bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
++bld->cond_lvl;
bld_new_block(bld, b);
{
struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
}