case NV_OP_XOR:
case NV_OP_SHL:
case NV_OP_SHR:
- return (s == 1) && (nvi->def[0]->reg.file == NV_FILE_GPR);
+ return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
+ (nvi->def[0]->reg.file == NV_FILE_GPR);
case NV_OP_MOV:
assert(s == 0);
return (nvi->def[0]->reg.file == NV_FILE_GPR);
boolean
nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
{
+ int i;
+
+ for (i = 0; i < 3 && nvi->src[i]; ++i)
+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+
switch (nvi->opcode) {
case NV_OP_ABS:
case NV_OP_ADD:
FREE(pc->refs[i]);
}
+static const char *
+edge_name(ubyte type)
+{
+ switch (type) {
+ case CFG_EDGE_FORWARD: return "forward";
+ case CFG_EDGE_BACK: return "back";
+ case CFG_EDGE_LOOP_ENTER: return "loop";
+ case CFG_EDGE_LOOP_LEAVE: return "break";
+ default:
+ return "?";
+ }
+}
+
void
-nv_print_program(struct nv_basic_block *b)
+nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
{
- struct nv_instruction *i = b->phi;
+ struct nv_basic_block *bb[64], *bbb[16], *b;
+ int j, p, pp;
+
+ bb[0] = root;
+ p = 1;
+ pp = 0;
+
+ while (p > 0) {
+ b = bb[--p];
+ b->priv = 0;
+
+ for (j = 1; j >= 0; --j) {
+ if (!b->out[j])
+ continue;
+
+ switch (b->out_kind[j]) {
+ case CFG_EDGE_BACK:
+ continue;
+ case CFG_EDGE_FORWARD:
+ if (++b->out[j]->priv == b->out[j]->num_in)
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ bbb[pp++] = b->out[j];
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ f(priv, b);
- b->priv = 0;
+ if (!p)
+ while (pp > 0)
+ bb[p++] = bbb[--pp];
+ }
+}
+
+static void
+nv_do_print_program(void *priv, struct nv_basic_block *b)
+{
+ struct nv_instruction *i = b->phi;
debug_printf("=== BB %i ", b->id);
if (b->out[0])
- debug_printf("(--0> %i) ", b->out[0]->id);
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
if (b->out[1])
- debug_printf("(--1> %i) ", b->out[1]->id);
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
debug_printf("===\n");
+ i = b->phi;
if (!i)
i = b->entry;
for (; i; i = i->next)
nv_print_instruction(i);
+}
- if (!b->out[0]) {
- debug_printf("END\n\n");
- return;
- }
- if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
- return;
-
- if (b->out[0] != b)
- nv_print_program(b->out[0]);
+void
+nv_print_program(struct nv_basic_block *root)
+{
+ nv_pc_pass_in_order(root, nv_do_print_program, root);
- if (b->out[1] && b->out[1] != b)
- nv_print_program(b->out[1]);
+ debug_printf("END\n\n");
}
static INLINE void
assert(pc->emit == &code[pc->bin_size / 4]);
/* XXX: we can do better than this ... */
- if ((pc->emit[-2] & 2) || (pc->emit[-1] & 3) == 3) {
+ if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3) == 3) {
pc->emit[0] = 0xf0000001;
pc->emit[1] = 0xe0000000;
pc->bin_size += 8;
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
+ nv_print_program(pc->root);
/* optimization */
ret = nv_pc_exec_pass0(pc);
i1->next->prev = i1;
}
-void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *b)
+void
+nvbb_attach_block(struct nv_basic_block *parent,
+ struct nv_basic_block *b, ubyte edge_kind)
{
+ assert(b->num_in < 8);
+
if (parent->out[0]) {
assert(!parent->out[1]);
parent->out[1] = b;
- } else
+ parent->out_kind[1] = edge_kind;
+ } else {
parent->out[0] = b;
+ parent->out_kind[0] = edge_kind;
+ }
- b->in[b->num_in++] = parent;
+ b->in[b->num_in] = parent;
+ b->in_kind[b->num_in++] = edge_kind;
}
-int
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
{
- int j, n;
+ int j;
if (b == d)
- return 1;
+ return TRUE;
- n = 0;
for (j = 0; j < b->num_in; ++j)
- n += nvbb_dominated_by(b->in[j], d);
+ if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
+ return FALSE;
- return (n && (n == b->num_in)) ? 1 : 0;
+ return j ? TRUE : FALSE;
}
/* check if bf (future) can be reached from bp (past) */
if (bp == bt)
return FALSE;
- if (bp->out[0] && bp->out[0] != bp &&
+ if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK &&
nvbb_reachable_by(bf, bp->out[0], bt))
return TRUE;
- if (bp->out[1] && bp->out[1] != bp &&
+ if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK &&
nvbb_reachable_by(bf, bp->out[1], bt))
return TRUE;
return FALSE;
}
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+ int i;
+
+ if (!nvbb_dominated_by(df, b)) {
+ for (i = 0; i < df->num_in; ++i) {
+ if (df->in_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if (nvbb_dominated_by(df->in[i], b))
+ return df;
+ }
+ }
+ for (i = 0; i < 2 && b->out[i]; ++i) {
+ if (b->out_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ }
+ return NULL;
+}
+
struct nv_basic_block *
nvbb_dom_frontier(struct nv_basic_block *b)
{
- struct nv_basic_block *df = b->out[0];
-
- assert(df);
- while (nvbb_dominated_by(df, b) ||
- (!nvbb_dominated_by(df->in[0], b) &&
- (!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) {
- df = df->out[0];
- assert(df);
- }
- assert(df);
- return df;
+ struct nv_basic_block *df;
+ int i;
+
+ for (i = 0; i < 2 && b->out[i]; ++i)
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ return NULL;
}
/* XXX: need to clean this up so we get the typecasting right more naturally */
+/* LOOP FIXME 1
+ * In bld_store_loop_var, only replace values that belong to the TGSI register
+ * written.
+ * For TGSI MOV, we only associate the source value with the value tracker of
+ * the destination, instead of generating an actual MOV.
+ *
+ * Possible solution: generate PHI functions in loop headers in advance.
+ */
+/* LOOP FIXME 2:
+ * In fetch_by_bb, when going back through a break-block, we miss all of the
+ * definitions from inside the loop.
+ */
+
#include <unistd.h>
#include "nv50_context.h"
struct nv_value *top;
struct nv_value **body;
unsigned size;
+ uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */
+ uint16_t loop_def;
};
static INLINE void
bld_vals_push(&stacks[i * 4 + c]);
}
-#define FETCH_TEMP(i, c) (bld->tvs[i][c].top)
-#define STORE_TEMP(i, c, v) (bld->tvs[i][c].top = (v))
-#define FETCH_ADDR(i, c) (bld->avs[i][c].top)
-#define STORE_ADDR(i, c, v) (bld->avs[i][c].top = (v))
-#define FETCH_PRED(i, c) (bld->pvs[i][c].top)
-#define STORE_PRED(i, c, v) (bld->pvs[i][c].top = (v))
-#define FETCH_OUTR(i, c) (bld->ovs[i][c].top)
-#define STORE_OUTR(i, c, v) \
- do { \
- bld->ovs[i][c].top = (v); \
- bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
- } while (0)
-
struct bld_context {
struct nv50_translation_info *ti;
struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
int cond_lvl;
struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
+ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
int loop_lvl;
struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
uint num_immds;
};
+static INLINE struct nv_value *
+bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
+{
+ stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
+
+ return stk[i * 4 + c].top;
+}
+
+static void
+bld_store_loop_var(struct bld_context *, struct bld_value_stack *);
+
+static INLINE void
+bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
+ struct nv_value *val)
+{
+ bld_store_loop_var(bld, &stk[i * 4 + c]);
+
+ stk[i * 4 + c].top = val;
+}
+
+static INLINE void
+bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl)
+{
+ int i;
+ const uint16_t mask = ~(1 << lvl);
+
+ for (i = 0; i < n * 4; ++i) {
+ stk[i].loop_def &= mask;
+ stk[i].loop_use &= mask;
+ }
+}
+
+#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
+#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
+#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
+
+#define STORE_OUTR(i, c, v) \
+ do { \
+ bld->ovs[i][c].top = (v); \
+ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
+ } while (0)
+
static INLINE void
bld_warn_uninitialized(struct bld_context *bld, int kind,
struct bld_value_stack *stk, struct nv_basic_block *b)
long i = (stk - &bld->tvs[0][0]) / 4;
long c = (stk - &bld->tvs[0][0]) & 3;
- debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n",
- i, c, kind ? "may be" : "is", b->id);
+ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
+ i, (int)('x' + c), kind ? "may be" : "is", b->id);
}
static INLINE struct nv_value *
return;
}
for (i = 0; i < b->num_in; ++i)
- fetch_by_bb(stack, vals, n, b->in[i]);
+ if (b->in_kind[i] != CFG_EDGE_BACK)
+ fetch_by_bb(stack, vals, n, b->in[i]);
}
static INLINE struct nv_value *
}
for (i = 0; i < n; ++i) {
+ /* if value dominates b, continue to the redefinitions */
if (nvbb_dominated_by(b, vals[i]->insn->bb))
continue;
+ /* if value dominates any in-block, b should be the dom frontier */
for (j = 0; j < b->num_in; ++j)
if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
break;
+ /* otherwise, find the dominance frontier and put the phi there */
if (j == b->num_in) {
in = nvbb_dom_frontier(vals[i]->insn->bb);
val = bld_phi(bld, in, stack);
static INLINE struct nv_value *
bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
{
+ stack->loop_use |= 1 << bld->loop_lvl;
return bld_phi(bld, bld->pc->current_block, stack);
}
return bld->saved_immd[n];
}
+static void
+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
+ struct nv_value *);
+
+/* When setting a variable inside a loop, and we have used it before in the
+ * loop, we need to insert a phi function in the loop header.
+ */
+static void
+bld_store_loop_var(struct bld_context *bld, struct bld_value_stack *stk)
+{
+ struct nv_basic_block *bb;
+ struct nv_instruction *phi;
+ struct nv_value *val;
+ int ll;
+ uint16_t loop_def = stk->loop_def;
+
+ if (!(ll = bld->loop_lvl))
+ return;
+ stk->loop_def |= 1 << ll;
+
+ if ((~stk->loop_use | loop_def) & (1 << ll))
+ return;
+
+#if 0
+ debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n",
+ (stk - &bld->tvs[0][0]) / 4,
+ (int)('x' + ((stk - &bld->tvs[0][0]) & 3)),
+ loop_def, stk->loop_use);
+#endif
+
+ stk->loop_def |= 1 << ll;
+
+ assert(bld->loop_bb[ll - 1]->num_in == 1);
+
+ /* get last assignment from outside this loop, could be from bld_phi */
+ val = stk->body[stk->size - 1];
+
+ /* create the phi in the loop entry block */
+
+ bb = bld->pc->current_block;
+ bld->pc->current_block = bld->loop_bb[ll - 1];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value(bld->pc, val->reg.file, val->reg.type));
+
+ bld->pc->pass_seq++;
+ bld_replace_value(bld->pc, bld->loop_bb[ll - 1], val, phi->def[0]);
+
+ assert(!stk->top);
+ bld_vals_push_val(stk, phi->def[0]);
+
+ phi->target = (struct nv_basic_block *)stk; /* cheat */
+
+ nv_reference(bld->pc, &phi->src[0], val);
+ nv_reference(bld->pc, &phi->src[1], phi->def[0]);
+
+ bld->pc->current_block = bb;
+}
+
+static void
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
+{
+ struct nv_instruction *phi;
+ struct nv_value *val;
+
+ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = phi->next) {
+ val = bld_fetch_global(bld, (struct bld_value_stack *)phi->target);
+ nv_reference(bld->pc, &phi->src[1], val);
+ phi->target = NULL;
+ }
+}
+
static INLINE struct nv_value *
bld_imm_f32(struct bld_context *bld, float f)
{
static void
bld_flow(struct bld_context *bld, uint opcode, ubyte cc,
- struct nv_value *src, boolean plan_reconverge)
+ struct nv_value *src, struct nv_basic_block *target,
+ boolean plan_reconverge)
{
struct nv_instruction *nvi;
nvi = new_instruction(bld->pc, opcode);
nvi->is_terminator = 1;
nvi->cc = cc;
- nvi->flags_src = new_ref(bld->pc, src);
+ nvi->target = target;
+ if (src)
+ nvi->flags_src = new_ref(bld->pc, src);
}
static ubyte
{
struct nv_basic_block *b = new_basic_block(bld->pc);
- nvbb_attach_block(bld->pc->current_block, b);
+ nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0));
- bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, FALSE);
+ bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, FALSE);
++bld->cond_lvl;
bld_new_block(bld, b);
struct nv_basic_block *b = new_basic_block(bld->pc);
--bld->cond_lvl;
- nvbb_attach_block(bld->join_bb[bld->cond_lvl], b);
+ nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
bld->cond_bb[bld->cond_lvl]->exit->target = b;
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
bld_new_block(bld, b);
}
break;
- case TGSI_OPCODE_ENDIF: /* XXX: deal with ENDIF; ENDIF; */
+ case TGSI_OPCODE_ENDIF:
{
struct nv_basic_block *b = new_basic_block(bld->pc);
--bld->cond_lvl;
- nvbb_attach_block(bld->pc->current_block, b);
- nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b);
+ nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+ nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
bld->cond_bb[bld->cond_lvl]->exit->target = b;
}
break;
case TGSI_OPCODE_BGNLOOP:
- assert(0);
+ {
+ struct nv_basic_block *bl = new_basic_block(bld->pc);
+ struct nv_basic_block *bb = new_basic_block(bld->pc);
+
+ bld->loop_bb[bld->loop_lvl] = bl;
+ bld->brkt_bb[bld->loop_lvl] = bb;
+
+ bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
+
+ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
+
+ if (bld->loop_lvl == bld->pc->loop_nesting_bound)
+ bld->pc->loop_nesting_bound++;
+
+ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
+ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
+ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
+ }
break;
case TGSI_OPCODE_BRK:
- assert(0);
+ {
+ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE);
+
+ /* XXX: don't do this for redundant BRKs */
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
+ }
break;
case TGSI_OPCODE_CONT:
- assert(0);
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ }
break;
case TGSI_OPCODE_ENDLOOP:
- assert(0);
+ {
+ struct nv_basic_block *bb = bld->loop_bb[--bld->loop_lvl];
+
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
+
+ bld_new_block(bld, bld->brkt_bb[bld->loop_lvl]);
+ }
break;
case TGSI_OPCODE_ABS:
case TGSI_OPCODE_CEIL:
emit_store(bld, insn, c, dst0[c]);
}
+static INLINE void
+bld_free_value_trackers(struct bld_value_stack *base, int n)
+{
+ int i, c;
+
+ for (i = 0; i < n; ++i)
+ for (c = 0; c < 4; ++c)
+ if (base[i * 4 + c].body)
+ FREE(base[i * 4 + c].body);
+}
+
int
nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
{
bld->pc = pc;
bld->ti = ti;
- pc->loop_nesting_bound = 1; /* XXX: should work with 0 */
+ pc->loop_nesting_bound = 1;
c = util_bitcount(bld->ti->p->fp.interp >> 24);
if (c && ti->p->type == PIPE_SHADER_FRAGMENT) {
}
}
+ bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS);
+ bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS);
+ bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS);
+
+ bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
FREE(bld);
return 0;
}
-#if 0
/* If a variable is assigned in a loop, replace all references to the value
* from outside the loop with a phi value.
*/
static void
-bld_adjust_nv_refs(struct nv_pc *pc, struct nv_basic_block *b,
- struct nv_value *old_val,
- struct nv_value *new_val)
+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
{
struct nv_instruction *nvi;
if (nvi->flags_src && nvi->flags_src->value == old_val)
nv_reference(pc, &nvi->flags_src, new_val);
}
+
b->pass_seq = pc->pass_seq;
if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
- bld_adjust_nv_refs(pc, b, old_val, new_val);
+ bld_replace_value(pc, b->out[0], old_val, new_val);
if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
- bld_adjust_nv_refs(pc, b, old_val, new_val);
+ bld_replace_value(pc, b->out[1], old_val, new_val);
}
-#endif