i965g: consult fs inputs when laying out vs output regs
authorKeith Whitwell <keithw@vmware.com>
Wed, 11 Nov 2009 02:07:11 +0000 (18:07 -0800)
committerKeith Whitwell <keithw@vmware.com>
Thu, 12 Nov 2009 02:51:58 +0000 (18:51 -0800)
Vertex shader now emits just the FS inputs, in the positions and order
expected by the fragment shader.

This means potentially regenerating the vertex shader to match
different fragment shader's input layouts.

src/gallium/drivers/i965/brw_context.h
src/gallium/drivers/i965/brw_pipe_shader.c
src/gallium/drivers/i965/brw_vs.c
src/gallium/drivers/i965/brw_vs.h
src/gallium/drivers/i965/brw_vs_emit.c

index 4a975ecd7ecca476dfabfd61b4eec71e6800fd86..31f3cf36855f1380ce93aefe59d1cbe3b9714370 100644 (file)
@@ -161,11 +161,24 @@ struct brw_vertex_shader {
    GLboolean use_const_buffer;
 };
 
+struct brw_fs_signature {
+   GLuint nr_inputs;
+   struct {
+      GLuint semantic:5;
+      GLuint semantic_index:27;
+   } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+                                  ((s)->nr_inputs * sizeof (s)->input[0])) 
+
 
 struct brw_fragment_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
+   struct brw_fs_signature signature;
+
    unsigned iz_lookup;
    //unsigned wm_lookup;
    
index 44f9ad6f9cd18256083f5c56d9211ced6cf45d1b..7febf9e0c2fcc5d3ad2f2fd62a169732eecba71e 100644 (file)
@@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
 
    tgsi_scan_shader(fs->tokens, &fs->info);
 
+   fs->signature.nr_inputs = fs->info.num_inputs;
+   for (i = 0; i < fs->info.num_inputs; i++) {
+      fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+      fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+   }
+
    for (i = 0; i < fs->info.num_inputs; i++)
       if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
         fs->uses_depth = 1;
index 966940ceacb7370cd23cf383cd4589df3d045b00..05a62ed9745e58469141970821d918c8e3b7dc34 100644 (file)
@@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
 {
    struct brw_vs_prog_key key;
    struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
    enum pipe_error ret;
 
    memset(&key, 0, sizeof(key));
 
-   /* Just upload the program verbatim for now.  Always send it all
-    * the inputs it asks for, whether they are varying or not.
-    */
    key.program_string_id = vp->id;
    key.nr_userclip = brw->curr.ucp.nr;
    key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
                        brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
 
+   memcpy(&key.fs_signature, &fs->signature,
+          brw_fs_signature_size(&fs->signature));
+
+
    /* Make an early check for the key.
     */
    if (brw_search_cache(&brw->cache, BRW_VS_PROG,
-                        &key, sizeof(key),
+                        &key, brw_vs_prog_key_size(&key),
                         NULL, 0,
                         &brw->vs.prog_data,
                         &brw->vs.prog_bo))
@@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_vs_prog = {
    .dirty = {
-      .mesa  = PIPE_NEW_CLIP | PIPE_NEW_RAST,
+      .mesa  = (PIPE_NEW_CLIP | 
+                PIPE_NEW_RAST |
+                PIPE_NEW_FRAGMENT_SHADER),
       .brw   = BRW_NEW_VERTEX_PROGRAM,
       .cache = 0
    },
index b4e450d89bfe731b118d0c66b431327d36c17222..3d1598d02b9a2204c7647cdb9465a79fd6c7aa42 100644 (file)
@@ -43,8 +43,11 @@ struct brw_vs_prog_key {
    GLuint nr_userclip:4;
    GLuint copy_edgeflag:1;
    GLuint pad:26;
+   struct brw_fs_signature fs_signature;
 };
 
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+                                 brw_fs_signature_size(&(s)->fs_signature))
 
 
 #define MAX_IF_DEPTH 32
@@ -65,8 +68,8 @@ struct brw_vs_compile {
 
    GLboolean copy_edgeflag;
 
-   GLuint first_output;
-   GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint overflow_grf_start;
+   GLuint overflow_count;
 
    GLuint first_tmp;
    GLuint last_tmp;
index 26f0ec5a11a731bea153c65a91818fc880d30f39..933c9c4d63ce010a8a2b914cfcde8ede799df6b8 100644 (file)
@@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c )
 }
 
 
+static boolean is_position_output( struct brw_vs_compile *c,
+                                   unsigned vs_output )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+
+   return (semantic == TGSI_SEMANTIC_POSITION &&
+           index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+                                  unsigned vs_output,
+                                  unsigned *fs_input_slot )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+   unsigned i;
+
+   for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+      if (c->key.fs_signature.input[i].semantic == semantic &&
+          c->key.fs_signature.input[i].semantic_index == index) {
+         *fs_input_slot = i;
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
 
 /**
  * Preallocate GRF register before code emit.
@@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* Allocate outputs.  The non-position outputs go straight into message regs.
     */
    c->nr_outputs = c->prog_data.nr_outputs;
-   c->first_output = reg;
-   c->first_overflow_output = 0;
 
    if (c->chipset.is_igdng)
       mrf = 8;
    else
       mrf = 4;
 
+   
+   if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+      c->overflow_grf_start = reg;
+      c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+      reg += c->overflow_count;
+   }
+
    /* XXX: need to access vertex output semantics here:
     */
    for (i = 0; i < c->prog_data.nr_outputs; i++) {
-      assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+      unsigned slot;
 
-      /* XXX: Hardwire position to zero:
-       */
-      if (i == 0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
-      }
-      /* XXX: disable psiz:
+      /* XXX: Put output position in slot zero always.  Clipper, etc,
+       * need access to this reg.
        */
-      else if (0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+      if (is_position_output(c, i)) {
+        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
         reg++;
-        mrf++;         /* just a placeholder?  XXX fix later stages & remove this */
       }
-      else if (mrf < 16) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
-        mrf++;
+      else if (find_output_slot(c, i, &slot)) {
+         
+         if (0 /* is_psize_output(c, i) */ ) {
+            /* c->psize_out.grf = reg; */
+            /* c->psize_out.mrf = i; */
+         }
+         
+         /* The first (16-4) outputs can go straight into the message regs.
+          */
+         if (slot + mrf < BRW_MAX_MRF) {
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+         }
+         else {
+            int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+         }
       }
       else {
-        /* too many vertex results to fit in MRF, use GRF for overflow */
-        if (!c->first_overflow_output)
-           c->first_overflow_output = i;
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
+         c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
       }
    }     
 
@@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
    struct brw_reg ndc;
    int eot;
+   int i;
    GLuint len_vertext_header = 2;
 
    if (c->key.copy_edgeflag) {
@@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        len_vertext_header = 2;
    }
 
-   eot = (c->first_overflow_output == 0);
+   eot = (c->overflow_count == 0);
 
    brw_urb_WRITE(p, 
                 brw_null_reg(), /* dest */
@@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                 0,             /* urb destination offset */
                 BRW_URB_SWIZZLE_INTERLEAVE);
 
-   if (c->first_overflow_output > 0) {
-      /* Not all of the vertex outputs/results fit into the MRF.
-       * Move the overflowed attributes from the GRF to the MRF and
-       * issue another brw_urb_WRITE().
-       */
+   /* Not all of the vertex outputs/results fit into the MRF.
+    * Move the overflowed attributes from the GRF to the MRF and
+    * issue another brw_urb_WRITE().
+    */
+   for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+      unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+      GLuint j;
+
+      eot = (i + nr >= c->overflow_count);
+
       /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
        * at mrf[4] atm...
        */
-      GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
-        /* move from GRF to MRF */
-        brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
-        mrf++;
+      for (j = 0; j < nr; j++) {
+        brw_MOV(p, brw_message_reg(4+j), 
+                 brw_vec8_grf(c->overflow_grf_start + i + j, 0));
       }
 
       brw_urb_WRITE(p,
@@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                     c->r0,          /* src */
                     0,              /* allocate */
                     1,              /* used */
-                    mrf+1,          /* msg len */
+                    nr+1,          /* msg len */
                     0,              /* response len */
-                    1,              /* eot */
-                    1,              /* writes complete */
-                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    eot,            /* eot */
+                    eot,            /* writes complete */
+                    i-1,            /* urb destination offset */
                     BRW_URB_SWIZZLE_INTERLEAVE);
    }
 }