nv50: prepare for having multiple functions

author Christoph Bumiller <e0425955@student.tuwien.ac.at>

Tue, 7 Sep 2010 13:40:34 +0000 (15:40 +0200)

committer Christoph Bumiller <e0425955@student.tuwien.ac.at>

Thu, 9 Sep 2010 17:21:34 +0000 (19:21 +0200)
author Christoph Bumiller <e0425955@student.tuwien.ac.at>
Tue, 7 Sep 2010 13:40:34 +0000 (15:40 +0200)
committer Christoph Bumiller <e0425955@student.tuwien.ac.at>
Thu, 9 Sep 2010 17:21:34 +0000 (19:21 +0200)
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c

index e34c0553eb4aff843e6e55b5a71ff760df6687c8..c54f16e4c53f674ec13a533ff6e74f05b1e52cc0 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -304,7 +304,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
  }
  
  static void
-nv_do_print_program(void *priv, struct nv_basic_block *b)
+nv_do_print_function(void *priv, struct nv_basic_block *b)
  {
     struct nv_instruction *i = b->phi;
  
@@ -323,11 +323,23 @@ nv_do_print_program(void *priv, struct nv_basic_block *b)
  }
  
  void
-nv_print_program(struct nv_basic_block *root)
+nv_print_function(struct nv_basic_block *root)
  {
-   nv_pc_pass_in_order(root, nv_do_print_program, root);
+   if (root->subroutine)
+      debug_printf("SUBROUTINE %i\n", root->subroutine);
+   else
+      debug_printf("MAIN\n");
  
-   debug_printf("END\n\n");
+   nv_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nv_print_program(struct nv_pc *pc)
+{
+   int i;
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i])
+         nv_print_function(pc->root[i]);
  }
  
  static INLINE void
@@ -388,11 +400,18 @@ nv50_generate_code(struct nv50_translation_info *ti)
     if (!pc)
        return 1;
  
+   pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
+   if (!pc->root) {
+      FREE(pc);
+      return 1;
+   }
+   pc->num_subroutines = ti->subr_nr;
+
     ret = nv50_tgsi_to_nc(pc, ti);
     if (ret)
        goto out;
  #ifdef NV50PC_DEBUG
-   nv_print_program(pc->root);
+   nv_print_program(pc);
  #endif
  
     /* optimization */
@@ -400,7 +419,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
     if (ret)
        goto out;
  #ifdef NV50PC_DEBUG
-   nv_print_program(pc->root);
+   nv_print_program(pc);
  #endif
  
     /* register allocation */
@@ -408,7 +427,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
     if (ret)
        goto out;
  #ifdef NV50PC_DEBUG
-   nv_print_program(pc->root);
+   nv_print_program(pc);
  #endif
  
     /* prepare for emission */
@@ -441,16 +460,19 @@ nv50_generate_code(struct nv50_translation_info *ti)
  
  out:
     nv_pc_free_refs(pc);
-   if (ret) {
+
+   if (pc->bb_list)
+      FREE(pc->bb_list);
+
+   if (ret) { /* on success, these will be referenced by nv50_program */
        if (pc->emit)
-         free(pc->emit);
+         FREE(pc->emit);
        if (pc->immd_buf)
-         free(pc->immd_buf);
+         FREE(pc->immd_buf);
        if (pc->fixups)
-         free(pc->fixups);
+         FREE(pc->fixups);
     }
-   free(pc);
-
+   FREE(pc);
     return ret;
  }
  
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h

index 703d32d334e662622489e9ce1187e43951822a79..d9cc775572e3e39d13c575fbeec2f0502dbda851 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -282,7 +282,7 @@ struct nv_basic_block {
     ubyte in_kind[8];
  
     int id;
-   struct nv_basic_block *last_visitor;
+   int subroutine;
     uint priv;
     uint pass_seq;
  
@@ -314,10 +314,10 @@ nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
     bin[fixup->offset / 4] = val;
  }
  
-struct nv_pc {
-   struct nv50_translation_info *ti;
+struct nv50_translation_info;
  
-   struct nv_basic_block *root;
+struct nv_pc {
+   struct nv_basic_block **root;
     struct nv_basic_block *current_block;
     struct nv_basic_block *parent_block;
  
@@ -332,6 +332,7 @@ struct nv_pc {
     int num_instructions;
     int num_refs;
     int num_blocks;
+   int num_subroutines;
  
     int max_reg[4];
  
@@ -463,7 +464,8 @@ void nv_print_instruction(struct nv_instruction *);
  
  /* nv50_pc.c */
  
-void nv_print_program(struct nv_basic_block *b);
+void nv_print_function(struct nv_basic_block *root);
+void nv_print_program(struct nv_pc *);
  
  boolean nv_op_commutative(uint opcode);
  int nv50_indirect_opnd(struct nv_instruction *);
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c

index 1ed5032175493294b491dce3786a9f80eadf71fd..4f5bdc1f9fb6d6bc791a4532870464e7515c653d 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -213,23 +213,36 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
     pc->bin_size += b->bin_size *= 4;
  }
  
-int
-nv_pc_exec_pass2(struct nv_pc *pc)
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
  {
     struct nv_pass pass;
  
     pass.pc = pc;
  
     pc->pass_seq++;
-   nv_pass_flatten(&pass, pc->root);
+
+   nv_pass_flatten(&pass, root);
+
+   nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+   return 0;
+}
+
+int
+nv_pc_exec_pass2(struct nv_pc *pc)
+{
+   int i, ret;
  
     NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
  
-   pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
-   pc->num_blocks = 0;
+   pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0]));
  
-   nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
+   pc->num_blocks = 0;
  
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+         return ret;
     return 0;
  }
  
@@ -1032,8 +1045,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
     return 0;
  }
  
-int
-nv_pc_exec_pass0(struct nv_pc *pc)
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
  {
     struct nv_pass_reld_elim *reldelim;
     struct nv_pass pass;
@@ -1047,35 +1060,35 @@ nv_pc_exec_pass0(struct nv_pc *pc)
      * to whether sources are supported memory loads.
      */
     pc->pass_seq++;
-   ret = nv_pass_lower_arith(&pass, pc->root);
+   ret = nv_pass_lower_arith(&pass, root);
     if (ret)
        return ret;
  
     pc->pass_seq++;
-   ret = nv_pass_fold_loads(&pass, pc->root);
+   ret = nv_pass_fold_loads(&pass, root);
     if (ret)
        return ret;
  
     pc->pass_seq++;
-   ret = nv_pass_fold_stores(&pass, pc->root);
+   ret = nv_pass_fold_stores(&pass, root);
     if (ret)
        return ret;
  
     reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
     reldelim->pc = pc;
     pc->pass_seq++;
-   ret = nv_pass_reload_elim(reldelim, pc->root);
+   ret = nv_pass_reload_elim(reldelim, root);
     FREE(reldelim);
     if (ret)
        return ret;
  
     pc->pass_seq++;
-   ret = nv_pass_cse(&pass, pc->root);
+   ret = nv_pass_cse(&pass, root);
     if (ret)
        return ret;
  
     pc->pass_seq++;
-   ret = nv_pass_lower_mods(&pass, pc->root);
+   ret = nv_pass_lower_mods(&pass, root);
     if (ret)
        return ret;
  
@@ -1083,14 +1096,25 @@ nv_pc_exec_pass0(struct nv_pc *pc)
     do {
        dce.removed = 0;
        pc->pass_seq++;
-      ret = nv_pass_dce(&dce, pc->root);
+      ret = nv_pass_dce(&dce, root);
        if (ret)
           return ret;
     } while (dce.removed);
  
-   ret = nv_pass_tex_mask(&pass, pc->root);
+   ret = nv_pass_tex_mask(&pass, root);
     if (ret)
        return ret;
  
     return ret;
  }
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+   int i, ret;
+
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+         return ret;
+   return 0;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c

index d401706b5bc22e8afe83882e553d06ae63608b08..2998343db52d36a2c429f463d7e8783bea864969 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -874,8 +874,8 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
     return 0;
  }
  
-int
-nv_pc_exec_pass1(struct nv_pc *pc)
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
  {
     struct nv_pc_pass *ctx;
     int i, ret;
@@ -890,12 +890,12 @@ nv_pc_exec_pass1(struct nv_pc *pc)
     ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
  
     pc->pass_seq++;
-   ret = pass_generate_phi_movs(ctx, pc->root);
+   ret = pass_generate_phi_movs(ctx, root);
     assert(!ret);
  
     for (i = 0; i < pc->loop_nesting_bound; ++i) {
        pc->pass_seq++;
-      ret = pass_build_live_sets(ctx, pc->root);
+      ret = pass_build_live_sets(ctx, root);
        assert(!ret && "live sets");
        if (ret) {
           NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
@@ -904,10 +904,10 @@ nv_pc_exec_pass1(struct nv_pc *pc)
     }
  
     pc->pass_seq++;
-   nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
+   nv_pc_pass_in_order(root, pass_order_instructions, ctx);
  
     pc->pass_seq++;
-   ret = pass_build_intervals(ctx, pc->root);
+   ret = pass_build_intervals(ctx, root);
     assert(!ret && "build intervals");
     if (ret) {
        NOUVEAU_ERR("failed to build live intervals\n");
@@ -944,3 +944,14 @@ out:
     FREE(ctx);
     return ret;
  }
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+   int i, ret;
+
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+         return ret;
+   return 0;
+}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c

index d7d3030e2f6880b46324d37fab8ea9d9778bee7c..925028700cdf5936315684f4f11978a5850593d6 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -147,10 +147,17 @@ prog_inst(struct nv50_translation_info *ti,
     int s, c, k;
     unsigned mask;
  
+   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+      ti->subr[ti->subr_nr].pos = id - 1;
+      ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
+      ++ti->subr_nr;
+   }
+
     if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      dst = &inst->Dst[0].Register;
+
        for (c = 0; c < 4; ++c) {
-         dst = &inst->Dst[0].Register;
-         if (inst->Dst[0].Register.Indirect)
+         if (dst->Indirect)
              nv50_indirect_outputs(ti, id);
           if (!(dst->WriteMask & (1 << c)))
              continue;
@@ -182,6 +189,44 @@ prog_inst(struct nv50_translation_info *ti,
     }
  }
  
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nv50_subroutine *subr,
+                     const struct tgsi_full_instruction *inst)
+{
+   const struct tgsi_dst_register *dst;
+   const struct tgsi_src_register *src;
+   int s, c, k;
+   unsigned mask;
+
+   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+      src = &inst->Src[s].Register;
+      if (src->File != TGSI_FILE_TEMPORARY)
+         continue;
+      mask = nv50_tgsi_src_mask(inst, s);
+
+      assert(!inst->Src[s].Register.Indirect);
+
+      for (c = 0; c < 4; ++c) {
+         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+         if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+            if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+               subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+      }
+   }
+
+   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+      dst = &inst->Dst[0].Register;
+
+      for (c = 0; c < 4; ++c)
+         if (dst->WriteMask & (1 << c))
+            subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+   }
+}
+
  static void
  prog_immediate(struct nv50_translation_info *ti,
                 const struct tgsi_full_immediate *imm)
@@ -482,7 +527,7 @@ nv50_prog_scan(struct nv50_translation_info *ti)
  {
     struct nv50_program *p = ti->p;
     struct tgsi_parse_context parse;
-   int ret;
+   int ret, i;
  
     p->vp.edgeflag = 0x40;
     p->vp.psiz = 0x40;
@@ -496,6 +541,9 @@ nv50_prog_scan(struct nv50_translation_info *ti)
     tgsi_dump(p->pipe.tokens, 0);
  #endif
  
+   ti->subr =
+      CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
     ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
     ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
  
@@ -519,6 +567,13 @@ nv50_prog_scan(struct nv50_translation_info *ti)
        }
     }
  
+   /* Scan to determine which registers are inputs/outputs of a subroutine. */
+   for (i = 0; i < ti->subr_nr; ++i) {
+      int pc = ti->subr[i].id;
+      while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+         prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+   }
+
     p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
     p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
  
@@ -572,6 +627,8 @@ out:
        FREE(ti->immd32_ty);
     if (ti->insns)
        FREE(ti->insns);
+   if (ti->subr)
+      FREE(ti->subr);
     FREE(ti);
     return ret ? FALSE : TRUE;
  }
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h

index 3c3f1f7f9701fb828678000b42cb3ccd7290c7ba..918baf325f5bb7d59e31464d073db15f7dd4be49 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -27,6 +27,8 @@
  #include "tgsi/tgsi_scan.h"
  #include "nouveau/nouveau_class.h"
  
+#define NV50_CAP_MAX_PROGRAM_TEMPS (128 / 4)
+
  struct nv50_varying {
     uint8_t id; /* tgsi index */
     uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
@@ -92,13 +94,13 @@ struct nv50_program {
  #define NV50_INTERP_FLAT     (1 << 1)
  #define NV50_INTERP_CENTROID (1 << 2)
  
-#define NV50_PROG_MAX_SUBROUTINES 8
-
  /* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
  struct nv50_subroutine {
-   int id;
-   uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */
-   uint32_t retv[4][1];
+   unsigned id;
+   unsigned pos;
+   /* function inputs and outputs */
+   uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
+   uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
  };
  
  struct nv50_translation_info {
@@ -119,8 +121,8 @@ struct nv50_translation_info {
     unsigned immd32_nr;
     ubyte *immd32_ty;
     ubyte edgeflag_out;
-   struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES];
-   int subr_nr;
+   struct nv50_subroutine *subr;
+   unsigned subr_nr;
  };
  
  int nv50_generate_code(struct nv50_translation_info *ti);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c

index fc75d81d54971302b1b3ffd0d280c8024065aa2e..c1efa443dafb63e02b281f5ec1d0085a443f77a0 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -26,6 +26,7 @@
  #include "nv50_context.h"
  #include "nv50_screen.h"
  #include "nv50_resource.h"
+#include "nv50_program.h"
  
  #include "nouveau/nouveau_stateobj.h"
  
@@ -152,7 +153,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                 return 0;
         case PIPE_CAP_MAX_VS_TEMPS:
         case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */
-               return 128 / 4;
+               return NV50_CAP_MAX_PROGRAM_TEMPS;
         case PIPE_CAP_DEPTH_CLAMP:
                 return 1;
         default:
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c

index 386dbda423d94f0e8f489040a039ebd39f4c2e06..dea8fa0663e5dfe8bc44aae7b067bcdcc1dc1c01 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -1850,7 +1850,7 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
     struct bld_context *bld = CALLOC_STRUCT(bld_context);
     int c;
  
-   pc->root = pc->current_block = new_basic_block(pc);
+   pc->root[0] = pc->current_block = new_basic_block(pc);
  
     bld->pc = pc;
     bld->ti = ti;
author	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Tue, 7 Sep 2010 13:40:34 +0000 (15:40 +0200)
committer	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Thu, 9 Sep 2010 17:21:34 +0000 (19:21 +0200)
src/gallium/drivers/nv50/nv50_pc.c		patch \| blob \| history
src/gallium/drivers/nv50/nv50_pc.h		patch \| blob \| history
src/gallium/drivers/nv50/nv50_pc_optimize.c		patch \| blob \| history
src/gallium/drivers/nv50/nv50_pc_regalloc.c		patch \| blob \| history
src/gallium/drivers/nv50/nv50_program.c		patch \| blob \| history
src/gallium/drivers/nv50/nv50_program.h		patch \| blob \| history
src/gallium/drivers/nv50/nv50_screen.c		patch \| blob \| history
src/gallium/drivers/nv50/nv50_tgsi_to_nc.c		patch \| blob \| history