nv50/ir: Add convenience method for calculating the live sets of a function.

[mesa.git] / src / gallium / drivers / nv50 / nv50_pc.c
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c

index b9d274414d15e336866706e5c162f474d3f41828..9137f871f5a7be643cded5daa994cc782be875c9 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -55,6 +55,7 @@ nv50_indirect_opnd(struct nv_instruction *i)
     switch (i->opcode) {
     case NV_OP_MOV:
     case NV_OP_LDA:
+   case NV_OP_STA:
        return 0;
     default:
        return 1;
@@ -101,6 +102,8 @@ nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
     case NV_OP_FLOOR:
     case NV_OP_TRUNC:
     case NV_OP_CVT:
+   case NV_OP_ROUND:
+   case NV_OP_NEG:
     case NV_OP_MAD:
     case NV_OP_MUL:
     case NV_OP_SAT:
@@ -110,16 +113,14 @@ nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
        if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
                       value->reg.file == NV_FILE_MEM_P))
           return TRUE;
-      if (s == 1 &&
-          value->reg.file >= NV_FILE_MEM_C(0) &&
-          value->reg.file <= NV_FILE_MEM_C(15))
-         return TRUE;
-      if (s == 2 && nvi->src[1]->value->reg.file == NV_FILE_GPR)
-         return TRUE;
-      return FALSE;
+      if (value->reg.file < NV_FILE_MEM_C(0) ||
+          value->reg.file > NV_FILE_MEM_C(15))
+         return FALSE;
+      return (s == 1) ||
+         ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
     case NV_OP_MOV:
        assert(s == 0);
-      return TRUE;
+      return /* TRUE */ FALSE; /* don't turn MOVs into loads */
     default:
        return FALSE;
     }
@@ -169,6 +170,28 @@ nv50_supported_src_mods(uint opcode, int s)
     }
  }
  
+/* We may want an opcode table. */
+boolean
+nv50_op_can_write_flags(uint opcode)
+{
+   if (nv_is_vector_op(opcode))
+      return FALSE;
+   switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
+   case NV_OP_PHI:
+   case NV_OP_MOV:
+   case NV_OP_SELECT:
+   case NV_OP_LINTERP:
+   case NV_OP_PINTERP:
+   case NV_OP_LDA:
+      return FALSE;
+   default:
+      break;
+   }
+   if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2)
+      return FALSE;
+   return TRUE;
+}
+
  int
  nv_nvi_refcount(struct nv_instruction *nvi)
  {
@@ -202,12 +225,42 @@ nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
     return n;
  }
  
+struct nv_value *
+nvcg_find_constant(struct nv_ref *ref)
+{
+   struct nv_value *src;
+
+   if (!ref)
+      return NULL;
+
+   src = ref->value;
+   while (src->insn && src->insn->opcode == NV_OP_MOV) {
+      assert(!src->insn->src[0]->mod);
+      src = src->insn->src[0]->value;
+   }
+   if ((src->reg.file == NV_FILE_IMM) ||
+       (src->insn && src->insn->opcode == NV_OP_LDA &&
+        src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+        src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+      return src;
+   return NULL;
+}
+
+struct nv_value *
+nvcg_find_immediate(struct nv_ref *ref)
+{
+   struct nv_value *src = nvcg_find_constant(ref);
+
+   return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
  static void
  nv_pc_free_refs(struct nv_pc *pc)
  {
     int i;
     for (i = 0; i < pc->num_refs; i += 64)
        FREE(pc->refs[i]);
+   FREE(pc->refs);
  }
  
  static const char *
@@ -218,6 +271,7 @@ edge_name(ubyte type)
     case CFG_EDGE_BACK: return "back";
     case CFG_EDGE_LOOP_ENTER: return "loop";
     case CFG_EDGE_LOOP_LEAVE: return "break";
+   case CFG_EDGE_FAKE: return "fake";
     default:
        return "?";
     }
@@ -245,6 +299,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
           case CFG_EDGE_BACK:
              continue;
           case CFG_EDGE_FORWARD:
+         case CFG_EDGE_FAKE:
              if (++b->out[j]->priv == b->out[j]->num_in)
                 bb[p++] = b->out[j];
              break;
@@ -252,7 +307,10 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
              bb[p++] = b->out[j];
              break;
           case CFG_EDGE_LOOP_LEAVE:
-            bbb[pp++] = b->out[j];
+            if (!b->out[j]->priv) {
+               bbb[pp++] = b->out[j];
+               b->out[j]->priv = 1;
+            }
              break;
           default:
              assert(0);
@@ -262,16 +320,18 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
  
        f(priv, b);
  
-      if (!p)
-         while (pp > 0)
-            bb[p++] = bbb[--pp];
+      if (!p) {
+         p = pp;
+         for (; pp > 0; --pp)
+            bb[pp - 1] = bbb[pp - 1];
+      }
     }
  }
  
  static void
-nv_do_print_program(void *priv, struct nv_basic_block *b)
+nv_do_print_function(void *priv, struct nv_basic_block *b)
  {
-   struct nv_instruction *i = b->phi;
+   struct nv_instruction *i;
  
     debug_printf("=== BB %i ", b->id);
     if (b->out[0])
@@ -288,20 +348,95 @@ nv_do_print_program(void *priv, struct nv_basic_block *b)
  }
  
  void
-nv_print_program(struct nv_basic_block *root)
+nv_print_function(struct nv_basic_block *root)
  {
-   nv_pc_pass_in_order(root, nv_do_print_program, root);
+   if (root->subroutine)
+      debug_printf("SUBROUTINE %i\n", root->subroutine);
+   else
+      debug_printf("MAIN\n");
  
-   debug_printf("END\n\n");
+   nv_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nv_print_program(struct nv_pc *pc)
+{
+   int i;
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i])
+         nv_print_function(pc->root[i]);
  }
  
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
+static void
+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
+{
+   int i;
+
+   b->pass_seq = pc->pass_seq;
+
+   fprintf(f, "\t%i [shape=box]\n", b->id);
+
+   for (i = 0; i < 2; ++i) {
+      if (!b->out[i])
+         continue;
+      switch (b->out_kind[i]) {
+      case CFG_EDGE_FORWARD:
+         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_LOOP_ENTER:
+         fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_LOOP_LEAVE:
+         fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_BACK:
+         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+         continue;
+      case CFG_EDGE_FAKE:
+         fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      if (b->out[i]->pass_seq < pc->pass_seq)
+         nv_do_print_cfgraph(pc, f, b->out[i]);
+   }
+}
+
+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
+static void
+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
+{
+   FILE *f;
+
+   f = fopen(filepath, "a");
+   if (!f)
+      return;
+
+   fprintf(f, "digraph G {\n");
+
+   ++pc->pass_seq;
+
+   nv_do_print_cfgraph(pc, f, pc->root[subr]);
+
+   fprintf(f, "}\n");
+
+   fclose(f);
+}
+#endif /* NV50_DEBUG_PROG_CFLOW */
+
  static INLINE void
  nvcg_show_bincode(struct nv_pc *pc)
  {
-   int i;
+   unsigned i;
  
-   for (i = 0; i < pc->bin_size / 4; ++i)
+   for (i = 0; i < pc->bin_size / 4; ++i) {
        debug_printf("0x%08x ", pc->emit[i]);
+      if ((i % 16) == 15)
+         debug_printf("\n");
+   }
     debug_printf("\n");
  }
  
@@ -311,7 +446,7 @@ nv50_emit_program(struct nv_pc *pc)
     uint32_t *code = pc->emit;
     int n;
  
-   debug_printf("emitting program: size = %u\n", pc->bin_size);
+   NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size);
  
     for (n = 0; n < pc->num_blocks; ++n) {
        struct nv_instruction *i;
@@ -327,7 +462,8 @@ nv50_emit_program(struct nv_pc *pc)
     assert(pc->emit == &code[pc->bin_size / 4]);
  
     /* XXX: we can do better than this ... */
-   if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
+   if (!pc->bin_size ||
+       !(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
        pc->emit[0] = 0xf0000001;
        pc->emit[1] = 0xe0000000;
        pc->bin_size += 8;
@@ -336,7 +472,9 @@ nv50_emit_program(struct nv_pc *pc)
     pc->emit = code;
     code[pc->bin_size / 4 - 1] |= 1;
  
+#if NV50_DEBUG & NV50_DEBUG_SHADER
     nvcg_show_bincode(pc);
+#endif
  
     return 0;
  }
@@ -346,30 +484,50 @@ nv50_generate_code(struct nv50_translation_info *ti)
  {
     struct nv_pc *pc;
     int ret;
+   int i;
  
     pc = CALLOC_STRUCT(nv_pc);
     if (!pc)
        return 1;
  
+   pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
+   if (!pc->root) {
+      FREE(pc);
+      return 1;
+   }
+   pc->num_subroutines = ti->subr_nr;
+
     ret = nv50_tgsi_to_nc(pc, ti);
     if (ret)
        goto out;
-   nv_print_program(pc->root);
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
+   nv_print_program(pc);
+#endif
+
+   pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
  
     /* optimization */
     ret = nv_pc_exec_pass0(pc);
     if (ret)
        goto out;
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
+   nv_print_program(pc);
+#endif
  
     /* register allocation */
     ret = nv_pc_exec_pass1(pc);
     if (ret)
        goto out;
+#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
+   nv_print_program(pc);
+   nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
+#endif
  
     /* prepare for emission */
     ret = nv_pc_exec_pass2(pc);
     if (ret)
        goto out;
+   assert(!(pc->bin_size % 8));
  
     pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
     if (!pc->emit) {
@@ -386,26 +544,32 @@ nv50_generate_code(struct nv50_translation_info *ti)
     ti->p->immd_size = pc->immd_count * 4;
     ti->p->immd = pc->immd_buf;
  
-   /* highest 16 bit reg to num of 32 bit regs */
-   ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
+   /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
+   ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1);
  
     ti->p->fixups = pc->fixups;
     ti->p->num_fixups = pc->num_fixups;
  
-   debug_printf("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+   ti->p->uses_lmem = ti->store_to_memory;
+
+   NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success");
  
  out:
     nv_pc_free_refs(pc);
-   if (ret) {
+
+   for (i = 0; i < pc->num_blocks; ++i)
+      FREE(pc->bb_list[i]);
+   if (pc->root)
+      FREE(pc->root);
+   if (ret) { /* on success, these will be referenced by nv50_program */
        if (pc->emit)
-         free(pc->emit);
+         FREE(pc->emit);
        if (pc->immd_buf)
-         free(pc->immd_buf);
+         FREE(pc->immd_buf);
        if (pc->fixups)
-         free(pc->fixups);
+         FREE(pc->fixups);
     }
-   free(pc);
-
+   FREE(pc);
     return ret;
  }
  
@@ -460,6 +624,22 @@ nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
  
     i->bb = b;
     b->num_instructions++;
+
+   if (i->prev && i->prev->is_terminator)
+      nv_nvi_permute(i->prev, i);
+}
+
+void
+nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+   if (!at->next) {
+      nvbb_insert_tail(at->bb, ni);
+      return;
+   }
+   ni->next = at->next;
+   ni->prev = at;
+   ni->next->prev = ni;
+   ni->prev->next = ni;
  }
  
  void
@@ -492,7 +672,7 @@ nv_nvi_delete(struct nv_instruction *nvi)
  
     if (nvi == b->phi) {
        if (nvi->opcode != NV_OP_PHI)
-         debug_printf("NOTE: b->phi points to non-PHI instruction\n");
+         NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n");
  
        assert(!nvi->prev);
        if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
@@ -564,28 +744,44 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
     return j ? TRUE : FALSE;
  }
  
-/* check if bf (future) can be reached from bp (past) */
+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
  boolean
  nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
                    struct nv_basic_block *bt)
  {
-   if (bf == bp)
-      return TRUE;
-   if (bp == bt)
-      return FALSE;
+   struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
+   int i, p, n;
  
-   if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK &&
-       nvbb_reachable_by(bf, bp->out[0], bt))
-      return TRUE;
-   if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK &&
-       nvbb_reachable_by(bf, bp->out[1], bt))
-      return TRUE;
-   return FALSE;
+   p = 0;
+   n = 1;
+   q[0] = bp;
+
+   while (p < n) {
+      b = q[p++];
+
+      if (b == bf)
+         break;
+      if (b == bt)
+         continue;
+      assert(n <= (1024 - 2));
+
+      for (i = 0; i < 2; ++i) {
+         if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
+            q[n] = b->out[i];
+            q[n++]->priv = 1;
+         }
+      }
+   }
+   for (--n; n >= 0; --n)
+      q[n]->priv = 0;
+
+   return (b == bf);
  }
  
  static struct nv_basic_block *
  nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
  {
+   struct nv_basic_block *out;
     int i;
  
     if (!nvbb_dominated_by(df, b)) {
@@ -596,11 +792,11 @@ nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
              return df;
        }
     }
-   for (i = 0; i < 2 && b->out[i]; ++i) {
-      if (b->out_kind[i] == CFG_EDGE_BACK)
+   for (i = 0; i < 2 && df->out[i]; ++i) {
+      if (df->out_kind[i] == CFG_EDGE_BACK)
           continue;
-      if ((df = nvbb_find_dom_frontier(b, b->out[i])))
-         return df;
+      if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+         return out;
     }
     return NULL;
  }