mesa: avoid generating constant vertex attributes in fixedfunc programs

author Keith Whitwell <keith@tungstengraphics.com>

Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)

committer Keith Whitwell <keith@tungstengraphics.com>

Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)
author Keith Whitwell <keith@tungstengraphics.com>
Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)
committer Keith Whitwell <keith@tungstengraphics.com>
Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h

index bc099dabeb08efe225801d839ada7f29bfb64708..ca1e369a357cc4270b6e1daecf6c257c50d7d394 100644 (file)
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3073,6 +3073,8 @@ struct __GLcontextRec
     GLenum RenderMode;        /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
     GLbitfield NewState;      /**< bitwise-or of _NEW_* flags */
  
+   GLuint varying_vp_inputs;
+
     /** \name Derived state */
     /*@{*/
     GLbitfield _TriangleCaps;      /**< bitwise-or of DD_* flags */
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c

index eb8dc2a3398f2ed42360ea56b30b4e9d1b2f37ef..e0eb5f81e2b873202c78e3cc48a2da1a469bb79c 100644 (file)
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -465,7 +465,8 @@ _mesa_update_state_locked( GLcontext *ctx )
        _mesa_update_tnl_spaces( ctx, new_state );
  
     if (ctx->FragmentProgram._MaintainTexEnvProgram) {
-      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT |
+                     _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
     }
     if (ctx->VertexProgram._MaintainTnlProgram) {
        prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
@@ -504,3 +505,38 @@ _mesa_update_state( GLcontext *ctx )
     _mesa_update_state_locked(ctx);
     _mesa_unlock_context_textures(ctx);
  }
+
+
+
+
+/* Want to figure out which fragment program inputs are actually
+ * constant/current values from ctx->Current.  These should be
+ * referenced as a tracked state variable rather than a fragment
+ * program input, to save the overhead of putting a constant value in
+ * every submitted vertex, transferring it to hardware, interpolating
+ * it across the triangle, etc...
+ *
+ * When there is a VP bound, just use vp->outputs.  But when we're
+ * generating vp from fixed function state, basically want to
+ * calculate:
+ *
+ * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | 
+ *                 potential_vp_outputs )
+ *
+ * Where potential_vp_outputs is calculated by looking at enabled
+ * texgen, etc.
+ * 
+ * The generated fragment program should then only declare inputs that
+ * may vary or otherwise differ from the ctx->Current values.
+ * Otherwise, the fp should track them as state values instead.
+ */
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+                             unsigned varying_inputs )
+{
+   if (ctx->varying_vp_inputs != varying_inputs) {
+      ctx->varying_vp_inputs = varying_inputs;
+      ctx->NewState |= _NEW_ARRAY;
+      //_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs);
+   }
+}
diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h

index bb7cb8f32a35e3f8396987784d46278351535edc..dc08043a7620c4d468a34380eb67ec46e71d1d4f 100644 (file)
--- a/src/mesa/main/state.h
+++ b/src/mesa/main/state.h
@@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx );
  extern void
  _mesa_update_state_locked( GLcontext *ctx );
  
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+                             unsigned varying_inputs );
  
  #endif
diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c

index ac49373604df10865cb80a7e9a7c31dc685aec82..7cd82f98b0d2c244688d8de2694f41802adde554 100644 (file)
--- a/src/mesa/main/texenvprogram.c
+++ b/src/mesa/main/texenvprogram.c
@@ -189,6 +189,63 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
     }
  }
  
+#define VERT_BIT_TEX_ANY    (0xff << VERT_ATTRIB_TEX0)
+#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
+
+/* Identify all possible varying inputs.  The fragment program will
+ * never reference non-varying inputs, but will track them via state
+ * constants instead.
+ *
+ * This function figures out all the inputs that the fragment program
+ * has access to.  The bitmask is later reduced to just those which
+ * are actually referenced.
+ */
+static GLuint get_fp_input_mask( GLcontext *ctx )
+{
+   GLuint fp_inputs = 0;
+
+   if (1) {
+      GLuint varying_inputs = ctx->varying_vp_inputs;
+
+      /* First look at what values may be computed by the generated
+       * vertex program:
+       */
+      if (ctx->Light.Enabled) {
+         fp_inputs |= FRAG_BIT_COL0;
+
+         if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+            fp_inputs |= FRAG_BIT_COL1;
+      }
+
+      fp_inputs |= (ctx->Texture._TexGenEnabled |
+                    ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
+
+      /* Then look at what might be varying as a result of enabled
+       * arrays, etc:
+       */
+      if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0;
+      if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1;
+
+      fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) 
+                    << FRAG_ATTRIB_TEX0);
+
+   }
+   else {
+      /* calculate from vp->outputs */
+      GLuint vp_outputs = 0;
+
+      if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0;
+      if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1;
+
+      fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) 
+                   << VERT_RESULT_TEX0) 
+                  >> FRAG_ATTRIB_TEX0);
+   }
+   
+   return fp_inputs;
+}
+
+
  /**
   * Examine current texture environment state and generate a unique
   * key to identify it.
@@ -196,17 +253,21 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
  static void make_state_key( GLcontext *ctx,  struct state_key *key )
  {
     GLuint i, j;
-       
+   GLuint inputs_referenced = FRAG_BIT_COL0;
+   GLuint inputs_available = get_fp_input_mask( ctx );
+
     memset(key, 0, sizeof(*key));
  
     for (i=0;i<MAX_TEXTURE_UNITS;i++) {
        const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
                 
-      if (!texUnit->_ReallyEnabled)
+      if (!texUnit->_ReallyEnabled) 
           continue;
  
        key->unit[i].enabled = 1;
        key->enabled_units |= (1<<i);
+      key->nr_enabled_units = i+1;
+      inputs_referenced |= FRAG_BIT_TEX(i);
  
        key->unit[i].source_index = 
          translate_tex_src_bit(texUnit->_ReallyEnabled);                
@@ -234,13 +295,18 @@ static void make_state_key( GLcontext *ctx,  struct state_key *key )
        }
     }
         
-   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
        key->separate_specular = 1;
+      inputs_referenced |= FRAG_BIT_COL1;
+   }
  
     if (ctx->Fog.Enabled) {
        key->fog_enabled = 1;
        key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
+      inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
     }
+
+   key->inputs_available = (inputs_available & inputs_referenced);
  }
  
  /* Use uregs to represent registers internally, translate to Mesa's
@@ -446,11 +512,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p,
  #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
  #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
  
+static GLuint frag_to_vert_attrib( GLuint attrib )
+{
+   switch (attrib) {
+   case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
+   case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
+   default:
+      assert(attrib >= FRAG_ATTRIB_TEX0);
+      assert(attrib <= FRAG_ATTRIB_TEX7);
+      return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
+   }
+}
+
  
  static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
  {
-   p->program->Base.InputsRead |= (1 << input);
-   return make_ureg(PROGRAM_INPUT, input);
+   if (p->state->inputs_available & (1<<input)) {
+      p->program->Base.InputsRead |= (1 << input);
+      return make_ureg(PROGRAM_INPUT, input);
+   }
+   else {
+      GLuint idx = frag_to_vert_attrib( input );
+      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
+   }
  }
  
  
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c

index 0f9d8da3568d918d0267b43e9862b56586104b95..3d74f9f43195adefcb1f546901e57eeb7dbf8293 100644 (file)
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx )
     struct vbo_context *vbo = vbo_context(ctx);
     struct vbo_exec_context *exec = &vbo->exec;
     const struct gl_client_array **inputs = &exec->array.inputs[0];
+   GLuint const_inputs = 0;
     GLuint i;
  
     exec->array.program_mode = get_program_mode(ctx);
@@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx )
        for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
          if (exec->array.legacy_array[i]->Enabled)
             inputs[i] = exec->array.legacy_array[i];
-        else
+        else {
             inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
        }
  
        for (i = 0; i < MAT_ATTRIB_MAX; i++) {
          inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
+         const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
        }
  
        /* Could use just about anything, just to fill in the empty
         * slots:
         */
-      for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++)
+      for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
          inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+         const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+      }
  
        break;
     case VP_NV:
@@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx )
             inputs[i] = exec->array.generic_array[i];
          else if (exec->array.legacy_array[i]->Enabled)
             inputs[i] = exec->array.legacy_array[i];
-        else
+        else {
             inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
        }
  
        /* Could use just about anything, just to fill in the empty
         * slots:
         */
-      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++)
+      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
          inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+         const_inputs |= 1 << i;
+      }
  
        break;
     case VP_ARB:
@@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx )
          inputs[0] = exec->array.generic_array[0];
        else if (exec->array.legacy_array[0]->Enabled)
          inputs[0] = exec->array.legacy_array[0];
-      else
+      else {
          inputs[0] = &vbo->legacy_currval[0];
+         const_inputs |= 1 << 0;
+      }
  
  
        for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
          if (exec->array.legacy_array[i]->Enabled)
             inputs[i] = exec->array.legacy_array[i];
-        else
+        else {
             inputs[i] = &vbo->legacy_currval[i];
+            const_inputs |= 1 << i;
+         }
        }
  
        for (i = 0; i < 16; i++) {
          if (exec->array.generic_array[i]->Enabled)
             inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
-        else
+        else {
             inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+            const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+         }
+
        }
        break;
     }
+
+   _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
  }
  
  static void bind_arrays( GLcontext *ctx )
@@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
  
     bind_arrays( ctx );
  
+   /* Again...
+    */
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
     prim[0].begin = 1;
     prim[0].end = 1;
     prim[0].weak = 0;
@@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode,
  
     bind_arrays( ctx );
  
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
     ib.count = count;
     ib.type = type; 
     ib.obj = ctx->Array.ElementArrayBufferObj;
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c

index f497e9a5a55f454f1da2b8326a5be6e362c89bf4..ad60c9b05f03a30472f2456c7c4c238378799a75 100644 (file)
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
     GLubyte *data = exec->vtx.buffer_map;
     const GLuint *map;
     GLuint attr;
+   GLuint varying_inputs = 0;
  
     /* Install the default (ie Current) attributes first, then overlay
      * all active ones.
@@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
          arrays[attr]._MaxElement = count; /* ??? */
  
          data += exec->vtx.attrsz[src] * sizeof(GLfloat);
+         varying_inputs |= 1<<attr;
        }
     }
+
+   _mesa_set_varying_vp_inputs( ctx, varying_inputs );
  }
  
  
@@ -242,6 +246,10 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec )
           */
          vbo_exec_bind_arrays( ctx );
  
+         if (ctx->NewState)
+            _mesa_update_state( ctx );
+
+
          ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
          exec->vtx.buffer_map = NULL;
  
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c

index 4c97acddb9ff4850cc9ab343c9bf994ce88aadc7..b015bf278648ae7dc2d7004a545cb8faee4eaaca 100644 (file)
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -118,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
     GLuint data = node->buffer_offset;
     const GLuint *map;
     GLuint attr;
+   GLuint varying_inputs = 0;
  
     /* Install the default (ie Current) attributes first, then overlay
      * all active ones.
@@ -167,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
          assert(arrays[attr].BufferObj->Name);
  
          data += node->attrsz[src] * sizeof(GLfloat);
+         varying_inputs |= 1<<attr;
        }
     }
+
+   _mesa_set_varying_vp_inputs( ctx, varying_inputs );
  }
  
  static void vbo_save_loopback_vertex_list( GLcontext *ctx,
author	Keith Whitwell <keith@tungstengraphics.com>
	Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)
committer	Keith Whitwell <keith@tungstengraphics.com>
	Fri, 3 Oct 2008 16:30:59 +0000 (17:30 +0100)
src/mesa/main/mtypes.h		patch \| blob \| history
src/mesa/main/state.c		patch \| blob \| history
src/mesa/main/state.h		patch \| blob \| history
src/mesa/main/texenvprogram.c		patch \| blob \| history
src/mesa/vbo/vbo_exec_array.c		patch \| blob \| history
src/mesa/vbo/vbo_exec_draw.c		patch \| blob \| history
src/mesa/vbo/vbo_save_draw.c		patch \| blob \| history