switch (i->opcode) {
case NV_OP_MOV:
case NV_OP_LDA:
+ case NV_OP_STA:
return 0;
default:
return 1;
case NV_OP_FLOOR:
case NV_OP_TRUNC:
case NV_OP_CVT:
+ case NV_OP_ROUND:
+ case NV_OP_NEG:
case NV_OP_MAD:
case NV_OP_MUL:
case NV_OP_SAT:
if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
value->reg.file == NV_FILE_MEM_P))
return TRUE;
- if (s == 1 &&
- value->reg.file >= NV_FILE_MEM_C(0) &&
- value->reg.file <= NV_FILE_MEM_C(15))
- return TRUE;
- if (s == 2 && nvi->src[1]->value->reg.file == NV_FILE_GPR)
- return TRUE;
- return FALSE;
+ if (value->reg.file < NV_FILE_MEM_C(0) ||
+ value->reg.file > NV_FILE_MEM_C(15))
+ return FALSE;
+ return (s == 1) ||
+ ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
case NV_OP_MOV:
assert(s == 0);
- return TRUE;
+ return /* TRUE */ FALSE; /* don't turn MOVs into loads */
default:
return FALSE;
}
}
}
+/* We may want an opcode table. */
+boolean
+nv50_op_can_write_flags(uint opcode)
+{
+ if (nv_is_vector_op(opcode))
+ return FALSE;
+ switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
+ case NV_OP_PHI:
+ case NV_OP_MOV:
+ case NV_OP_SELECT:
+ case NV_OP_LINTERP:
+ case NV_OP_PINTERP:
+ case NV_OP_LDA:
+ return FALSE;
+ default:
+ break;
+ }
+ if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2)
+ return FALSE;
+ return TRUE;
+}
+
int
nv_nvi_refcount(struct nv_instruction *nvi)
{
return n;
}
+struct nv_value *
+nvcg_find_constant(struct nv_ref *ref)
+{
+ struct nv_value *src;
+
+ if (!ref)
+ return NULL;
+
+ src = ref->value;
+ while (src->insn && src->insn->opcode == NV_OP_MOV) {
+ assert(!src->insn->src[0]->mod);
+ src = src->insn->src[0]->value;
+ }
+ if ((src->reg.file == NV_FILE_IMM) ||
+ (src->insn && src->insn->opcode == NV_OP_LDA &&
+ src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+ return src;
+ return NULL;
+}
+
+struct nv_value *
+nvcg_find_immediate(struct nv_ref *ref)
+{
+ struct nv_value *src = nvcg_find_constant(ref);
+
+ return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
static void
nv_pc_free_refs(struct nv_pc *pc)
{
int i;
for (i = 0; i < pc->num_refs; i += 64)
FREE(pc->refs[i]);
+ FREE(pc->refs);
}
static const char *
case CFG_EDGE_BACK: return "back";
case CFG_EDGE_LOOP_ENTER: return "loop";
case CFG_EDGE_LOOP_LEAVE: return "break";
+ case CFG_EDGE_FAKE: return "fake";
default:
return "?";
}
case CFG_EDGE_BACK:
continue;
case CFG_EDGE_FORWARD:
+ case CFG_EDGE_FAKE:
if (++b->out[j]->priv == b->out[j]->num_in)
bb[p++] = b->out[j];
break;
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_LEAVE:
- bbb[pp++] = b->out[j];
+ if (!b->out[j]->priv) {
+ bbb[pp++] = b->out[j];
+ b->out[j]->priv = 1;
+ }
break;
default:
assert(0);
f(priv, b);
- if (!p)
- while (pp > 0)
- bb[p++] = bbb[--pp];
+ if (!p) {
+ p = pp;
+ for (; pp > 0; --pp)
+ bb[pp - 1] = bbb[pp - 1];
+ }
}
}
static void
-nv_do_print_program(void *priv, struct nv_basic_block *b)
+nv_do_print_function(void *priv, struct nv_basic_block *b)
{
- struct nv_instruction *i = b->phi;
+ struct nv_instruction *i;
debug_printf("=== BB %i ", b->id);
if (b->out[0])
}
void
-nv_print_program(struct nv_basic_block *root)
+nv_print_function(struct nv_basic_block *root)
{
- nv_pc_pass_in_order(root, nv_do_print_program, root);
+ if (root->subroutine)
+ debug_printf("SUBROUTINE %i\n", root->subroutine);
+ else
+ debug_printf("MAIN\n");
- debug_printf("END\n\n");
+ nv_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nv_print_program(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i])
+ nv_print_function(pc->root[i]);
}
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
+static void
+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
+{
+ int i;
+
+ b->pass_seq = pc->pass_seq;
+
+ fprintf(f, "\t%i [shape=box]\n", b->id);
+
+ for (i = 0; i < 2; ++i) {
+ if (!b->out[i])
+ continue;
+ switch (b->out_kind[i]) {
+ case CFG_EDGE_FORWARD:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
+ break;
+ case CFG_EDGE_BACK:
+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+ continue;
+ case CFG_EDGE_FAKE:
+ fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (b->out[i]->pass_seq < pc->pass_seq)
+ nv_do_print_cfgraph(pc, f, b->out[i]);
+ }
+}
+
+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
+static void
+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
+{
+ FILE *f;
+
+ f = fopen(filepath, "a");
+ if (!f)
+ return;
+
+ fprintf(f, "digraph G {\n");
+
+ ++pc->pass_seq;
+
+ nv_do_print_cfgraph(pc, f, pc->root[subr]);
+
+ fprintf(f, "}\n");
+
+ fclose(f);
+}
+#endif /* NV50_DEBUG_PROG_CFLOW */
+
static INLINE void
nvcg_show_bincode(struct nv_pc *pc)
{
- int i;
+ unsigned i;
- for (i = 0; i < pc->bin_size / 4; ++i)
+ for (i = 0; i < pc->bin_size / 4; ++i) {
debug_printf("0x%08x ", pc->emit[i]);
+ if ((i % 16) == 15)
+ debug_printf("\n");
+ }
debug_printf("\n");
}
uint32_t *code = pc->emit;
int n;
- debug_printf("emitting program: size = %u\n", pc->bin_size);
+ NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size);
for (n = 0; n < pc->num_blocks; ++n) {
struct nv_instruction *i;
assert(pc->emit == &code[pc->bin_size / 4]);
/* XXX: we can do better than this ... */
- if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
+ if (!pc->bin_size ||
+ !(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
pc->emit[0] = 0xf0000001;
pc->emit[1] = 0xe0000000;
pc->bin_size += 8;
pc->emit = code;
code[pc->bin_size / 4 - 1] |= 1;
+#if NV50_DEBUG & NV50_DEBUG_SHADER
nvcg_show_bincode(pc);
+#endif
return 0;
}
{
struct nv_pc *pc;
int ret;
+ int i;
pc = CALLOC_STRUCT(nv_pc);
if (!pc)
return 1;
+ pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
+ if (!pc->root) {
+ FREE(pc);
+ return 1;
+ }
+ pc->num_subroutines = ti->subr_nr;
+
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
- nv_print_program(pc->root);
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
+ nv_print_program(pc);
+#endif
+
+ pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
/* optimization */
ret = nv_pc_exec_pass0(pc);
if (ret)
goto out;
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
+ nv_print_program(pc);
+#endif
/* register allocation */
ret = nv_pc_exec_pass1(pc);
if (ret)
goto out;
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
+ nv_print_program(pc);
+ nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
+#endif
/* prepare for emission */
ret = nv_pc_exec_pass2(pc);
if (ret)
goto out;
+ assert(!(pc->bin_size % 8));
pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
if (!pc->emit) {
ti->p->immd_size = pc->immd_count * 4;
ti->p->immd = pc->immd_buf;
- /* highest 16 bit reg to num of 32 bit regs */
- ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
+ /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
+ ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1);
ti->p->fixups = pc->fixups;
ti->p->num_fixups = pc->num_fixups;
- debug_printf("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+ ti->p->uses_lmem = ti->store_to_memory;
+
+ NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success");
out:
nv_pc_free_refs(pc);
- if (ret) {
+
+ for (i = 0; i < pc->num_blocks; ++i)
+ FREE(pc->bb_list[i]);
+ if (pc->root)
+ FREE(pc->root);
+ if (ret) { /* on success, these will be referenced by nv50_program */
if (pc->emit)
- free(pc->emit);
+ FREE(pc->emit);
if (pc->immd_buf)
- free(pc->immd_buf);
+ FREE(pc->immd_buf);
if (pc->fixups)
- free(pc->fixups);
+ FREE(pc->fixups);
}
- free(pc);
-
+ FREE(pc);
return ret;
}
i->bb = b;
b->num_instructions++;
+
+ if (i->prev && i->prev->is_terminator)
+ nv_nvi_permute(i->prev, i);
+}
+
+void
+nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+ if (!at->next) {
+ nvbb_insert_tail(at->bb, ni);
+ return;
+ }
+ ni->next = at->next;
+ ni->prev = at;
+ ni->next->prev = ni;
+ ni->prev->next = ni;
}
void
if (nvi == b->phi) {
if (nvi->opcode != NV_OP_PHI)
- debug_printf("NOTE: b->phi points to non-PHI instruction\n");
+ NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n");
assert(!nvi->prev);
if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
return j ? TRUE : FALSE;
}
-/* check if bf (future) can be reached from bp (past) */
+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
boolean
nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
struct nv_basic_block *bt)
{
- if (bf == bp)
- return TRUE;
- if (bp == bt)
- return FALSE;
+ struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
+ int i, p, n;
- if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK &&
- nvbb_reachable_by(bf, bp->out[0], bt))
- return TRUE;
- if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK &&
- nvbb_reachable_by(bf, bp->out[1], bt))
- return TRUE;
- return FALSE;
+ p = 0;
+ n = 1;
+ q[0] = bp;
+
+ while (p < n) {
+ b = q[p++];
+
+ if (b == bf)
+ break;
+ if (b == bt)
+ continue;
+ assert(n <= (1024 - 2));
+
+ for (i = 0; i < 2; ++i) {
+ if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
+ q[n] = b->out[i];
+ q[n++]->priv = 1;
+ }
+ }
+ }
+ for (--n; n >= 0; --n)
+ q[n]->priv = 0;
+
+ return (b == bf);
}
static struct nv_basic_block *
nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
{
+ struct nv_basic_block *out;
int i;
if (!nvbb_dominated_by(df, b)) {
return df;
}
}
- for (i = 0; i < 2 && b->out[i]; ++i) {
- if (b->out_kind[i] == CFG_EDGE_BACK)
+ for (i = 0; i < 2 && df->out[i]; ++i) {
+ if (df->out_kind[i] == CFG_EDGE_BACK)
continue;
- if ((df = nvbb_find_dom_frontier(b, b->out[i])))
- return df;
+ if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+ return out;
}
return NULL;
}