i965g: consult fs inputs when laying out vs output regs

author Keith Whitwell <keithw@vmware.com>

Wed, 11 Nov 2009 02:07:11 +0000 (18:07 -0800)

committer Keith Whitwell <keithw@vmware.com>

Thu, 12 Nov 2009 02:51:58 +0000 (18:51 -0800)
author Keith Whitwell <keithw@vmware.com>
Wed, 11 Nov 2009 02:07:11 +0000 (18:07 -0800)
committer Keith Whitwell <keithw@vmware.com>
Thu, 12 Nov 2009 02:51:58 +0000 (18:51 -0800)
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h

index 4a975ecd7ecca476dfabfd61b4eec71e6800fd86..31f3cf36855f1380ce93aefe59d1cbe3b9714370 100644 (file)
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -161,11 +161,24 @@ struct brw_vertex_shader {
     GLboolean use_const_buffer;
  };
  
+struct brw_fs_signature {
+   GLuint nr_inputs;
+   struct {
+      GLuint semantic:5;
+      GLuint semantic_index:27;
+   } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+                                  ((s)->nr_inputs * sizeof (s)->input[0])) 
+
  
  struct brw_fragment_shader {
     const struct tgsi_token *tokens;
     struct tgsi_shader_info info;
  
+   struct brw_fs_signature signature;
+
     unsigned iz_lookup;
     //unsigned wm_lookup;
     
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c

index 44f9ad6f9cd18256083f5c56d9211ced6cf45d1b..7febf9e0c2fcc5d3ad2f2fd62a169732eecba71e 100644 (file)
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
  
     tgsi_scan_shader(fs->tokens, &fs->info);
  
+   fs->signature.nr_inputs = fs->info.num_inputs;
+   for (i = 0; i < fs->info.num_inputs; i++) {
+      fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+      fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+   }
+
     for (i = 0; i < fs->info.num_inputs; i++)
        if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
          fs->uses_depth = 1;
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c

index 966940ceacb7370cd23cf383cd4589df3d045b00..05a62ed9745e58469141970821d918c8e3b7dc34 100644 (file)
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
  {
     struct brw_vs_prog_key key;
     struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
     enum pipe_error ret;
  
     memset(&key, 0, sizeof(key));
  
-   /* Just upload the program verbatim for now.  Always send it all
-    * the inputs it asks for, whether they are varying or not.
-    */
     key.program_string_id = vp->id;
     key.nr_userclip = brw->curr.ucp.nr;
     key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
                         brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
  
+   memcpy(&key.fs_signature, &fs->signature,
+          brw_fs_signature_size(&fs->signature));
+
+
     /* Make an early check for the key.
      */
     if (brw_search_cache(&brw->cache, BRW_VS_PROG,
-                        &key, sizeof(key),
+                        &key, brw_vs_prog_key_size(&key),
                          NULL, 0,
                          &brw->vs.prog_data,
                          &brw->vs.prog_bo))
@@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
   */
  const struct brw_tracked_state brw_vs_prog = {
     .dirty = {
-      .mesa  = PIPE_NEW_CLIP | PIPE_NEW_RAST,
+      .mesa  = (PIPE_NEW_CLIP | 
+                PIPE_NEW_RAST |
+                PIPE_NEW_FRAGMENT_SHADER),
        .brw   = BRW_NEW_VERTEX_PROGRAM,
        .cache = 0
     },
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h

index b4e450d89bfe731b118d0c66b431327d36c17222..3d1598d02b9a2204c7647cdb9465a79fd6c7aa42 100644 (file)
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -43,8 +43,11 @@ struct brw_vs_prog_key {
     GLuint nr_userclip:4;
     GLuint copy_edgeflag:1;
     GLuint pad:26;
+   struct brw_fs_signature fs_signature;
  };
  
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+                                 brw_fs_signature_size(&(s)->fs_signature))
  
  
  #define MAX_IF_DEPTH 32
@@ -65,8 +68,8 @@ struct brw_vs_compile {
  
     GLboolean copy_edgeflag;
  
-   GLuint first_output;
-   GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint overflow_grf_start;
+   GLuint overflow_count;
  
     GLuint first_tmp;
     GLuint last_tmp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c

index 26f0ec5a11a731bea153c65a91818fc880d30f39..933c9c4d63ce010a8a2b914cfcde8ede799df6b8 100644 (file)
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c )
  }
  
  
+static boolean is_position_output( struct brw_vs_compile *c,
+                                   unsigned vs_output )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+
+   return (semantic == TGSI_SEMANTIC_POSITION &&
+           index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+                                  unsigned vs_output,
+                                  unsigned *fs_input_slot )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+   unsigned i;
+
+   for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+      if (c->key.fs_signature.input[i].semantic == semantic &&
+          c->key.fs_signature.input[i].semantic_index == index) {
+         *fs_input_slot = i;
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
  
  /**
   * Preallocate GRF register before code emit.
@@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     /* Allocate outputs.  The non-position outputs go straight into message regs.
      */
     c->nr_outputs = c->prog_data.nr_outputs;
-   c->first_output = reg;
-   c->first_overflow_output = 0;
  
     if (c->chipset.is_igdng)
        mrf = 8;
     else
        mrf = 4;
  
+   
+   if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+      c->overflow_grf_start = reg;
+      c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+      reg += c->overflow_count;
+   }
+
     /* XXX: need to access vertex output semantics here:
      */
     for (i = 0; i < c->prog_data.nr_outputs; i++) {
-      assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+      unsigned slot;
  
-      /* XXX: Hardwire position to zero:
-       */
-      if (i == 0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
-      }
-      /* XXX: disable psiz:
+      /* XXX: Put output position in slot zero always.  Clipper, etc,
+       * need access to this reg.
         */
-      else if (0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+      if (is_position_output(c, i)) {
+        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
          reg++;
-        mrf++;         /* just a placeholder?  XXX fix later stages & remove this */
        }
-      else if (mrf < 16) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
-        mrf++;
+      else if (find_output_slot(c, i, &slot)) {
+         
+         if (0 /* is_psize_output(c, i) */ ) {
+            /* c->psize_out.grf = reg; */
+            /* c->psize_out.mrf = i; */
+         }
+         
+         /* The first (16-4) outputs can go straight into the message regs.
+          */
+         if (slot + mrf < BRW_MAX_MRF) {
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+         }
+         else {
+            int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+         }
        }
        else {
-        /* too many vertex results to fit in MRF, use GRF for overflow */
-        if (!c->first_overflow_output)
-           c->first_overflow_output = i;
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
+         c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
        }
     }     
  
@@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
     struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
     struct brw_reg ndc;
     int eot;
+   int i;
     GLuint len_vertext_header = 2;
  
     if (c->key.copy_edgeflag) {
@@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
         len_vertext_header = 2;
     }
  
-   eot = (c->first_overflow_output == 0);
+   eot = (c->overflow_count == 0);
  
     brw_urb_WRITE(p, 
                  brw_null_reg(), /* dest */
@@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                  0,             /* urb destination offset */
                  BRW_URB_SWIZZLE_INTERLEAVE);
  
-   if (c->first_overflow_output > 0) {
-      /* Not all of the vertex outputs/results fit into the MRF.
-       * Move the overflowed attributes from the GRF to the MRF and
-       * issue another brw_urb_WRITE().
-       */
+   /* Not all of the vertex outputs/results fit into the MRF.
+    * Move the overflowed attributes from the GRF to the MRF and
+    * issue another brw_urb_WRITE().
+    */
+   for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+      unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+      GLuint j;
+
+      eot = (i + nr >= c->overflow_count);
+
        /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
         * at mrf[4] atm...
         */
-      GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
-        /* move from GRF to MRF */
-        brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
-        mrf++;
+      for (j = 0; j < nr; j++) {
+        brw_MOV(p, brw_message_reg(4+j), 
+                 brw_vec8_grf(c->overflow_grf_start + i + j, 0));
        }
  
        brw_urb_WRITE(p,
@@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                      c->r0,          /* src */
                      0,              /* allocate */
                      1,              /* used */
-                    mrf+1,          /* msg len */
+                    nr+1,          /* msg len */
                      0,              /* response len */
-                    1,              /* eot */
-                    1,              /* writes complete */
-                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    eot,            /* eot */
+                    eot,            /* writes complete */
+                    i-1,            /* urb destination offset */
                      BRW_URB_SWIZZLE_INTERLEAVE);
     }
  }
author	Keith Whitwell <keithw@vmware.com>
	Wed, 11 Nov 2009 02:07:11 +0000 (18:07 -0800)
committer	Keith Whitwell <keithw@vmware.com>
	Thu, 12 Nov 2009 02:51:58 +0000 (18:51 -0800)
src/gallium/drivers/i965/brw_context.h		patch \| blob \| history
src/gallium/drivers/i965/brw_pipe_shader.c		patch \| blob \| history
src/gallium/drivers/i965/brw_vs.c		patch \| blob \| history
src/gallium/drivers/i965/brw_vs.h		patch \| blob \| history
src/gallium/drivers/i965/brw_vs_emit.c		patch \| blob \| history