mesa: more complete fix for transform_invarient glitches

author Keith Whitwell <keithw@vmware.com>

Tue, 5 May 2009 11:12:28 +0000 (12:12 +0100)

committer Keith Whitwell <keithw@vmware.com>

Tue, 5 May 2009 12:14:53 +0000 (13:14 +0100)
author Keith Whitwell <keithw@vmware.com>
Tue, 5 May 2009 11:12:28 +0000 (12:12 +0100)
committer Keith Whitwell <keithw@vmware.com>
Tue, 5 May 2009 12:14:53 +0000 (13:14 +0100)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c

index 016284de9aa2c272ac9c181bfa9a2167e11bd59d..d780f91f048b95dbb296aa8f2dddb10ab0296b96 100644 (file)
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1522,4 +1522,17 @@ _mesa_Flush(void)
  }
  
  
+/**
+ * Set mvp_with_dp4 flag.  If a driver has a preference for DP4 over
+ * MUL/MAD, or vice versa, call this function to register that.
+ * Otherwise we default to MUL/MAD.
+ */
+void
+_mesa_set_mvp_with_dp4( GLcontext *ctx,
+                        GLboolean flag )
+{
+   ctx->mvp_with_dp4 = flag;
+}
+
+
  /*@}*/
diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h

index ecc1cec77998afff97738b1e714114edd3c4e16b..5b57d88029c253f17a77b35524994ac6a3bdddd9 100644 (file)
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@@ -151,6 +151,10 @@ extern struct _glapi_table *
  _mesa_get_dispatch(GLcontext *ctx);
  
  
+void
+_mesa_set_mvp_with_dp4( GLcontext *ctx,
+                        GLboolean flag );
+
  
  /** \name Miscellaneous */
  /*@{*/
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c

index 82e1c4af665c2a0a2cfc2473870b34b57981d1fb..43325b13529be1927e211449aa0f542cabb2422e 100644 (file)
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -315,12 +315,6 @@ static void make_state_key( GLcontext *ctx, struct state_key *key )
   */
  #define DISASSEM 0
  
-/* Should be tunable by the driver - do we want to do matrix
- * multiplications with DP4's or with MUL/MAD's?  SSE works better
- * with the latter, drivers may differ.
- */
-#define PREFER_DP4 1
-
  
  /* Use uregs to represent registers internally, translate to Mesa's
   * expected formats on emit.  
@@ -348,6 +342,7 @@ struct tnl_program {
     const struct state_key *state;
     struct gl_vertex_program *program;
     GLint max_inst;  /** number of instructions allocated for program */
+   GLboolean mvp_with_dp4;
     
     GLuint temp_in_use;
     GLuint temp_reserved;
@@ -775,7 +770,7 @@ static struct ureg get_eye_position( struct tnl_program *p )
  
        p->eye_position = reserve_temp(p);
  
-      if (PREFER_DP4) {
+      if (p->mvp_with_dp4) {
          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
                                   0, modelview );
  
@@ -881,7 +876,7 @@ static void build_hpos( struct tnl_program *p )
     struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
     struct ureg mvp[4];
  
-   if (PREFER_DP4) {
+   if (p->mvp_with_dp4) {
        register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 
                               0, mvp );
        emit_matrix_transform_vec4( p, hpos, mvp, pos );
@@ -1574,7 +1569,7 @@ static void build_texture_transform( struct tnl_program *p )
             struct ureg in = (!is_undef(out_texgen) ? 
                               out_texgen : 
                               register_input(p, VERT_ATTRIB_TEX0+i));
-           if (PREFER_DP4) {
+           if (p->mvp_with_dp4) {
                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
                                        0, texmat );
                emit_matrix_transform_vec4( p, out, texmat, in );
@@ -1708,6 +1703,7 @@ static void build_tnl_program( struct tnl_program *p )
  static void
  create_new_program( const struct state_key *key,
                      struct gl_vertex_program *program,
+                    GLboolean mvp_with_dp4,
                      GLuint max_temps)
  {
     struct tnl_program p;
@@ -1721,6 +1717,7 @@ create_new_program( const struct state_key *key,
     p.transformed_normal = undef;
     p.identity = undef;
     p.temp_in_use = 0;
+   p.mvp_with_dp4 = mvp_with_dp4;
     
     if (max_temps >= sizeof(int) * 8)
        p.temp_reserved = 0;
@@ -1776,6 +1773,7 @@ _mesa_get_fixed_func_vertex_program(GLcontext *ctx)
           return NULL;
  
        create_new_program( &key, prog,
+                          ctx->mvp_with_dp4,
                            ctx->Const.VertexProgram.MaxTemps );
  
  #if 0
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h

index cf7107772862ca434b71219a29733969569577fe..587dc801466927abeaa35c9dd4f85d9ce47f89b5 100644 (file)
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2980,6 +2980,12 @@ struct __GLcontextRec
     /** software compression/decompression supported or not */
     GLboolean Mesa_DXTn;
  
+   /** 
+    * Use dp4 (rather than mul/mad) instructions for position
+    * transformation?
+    */
+   GLboolean mvp_with_dp4;
+
     /** Core tnl module support */
     struct gl_tnl_module TnlModule;
  
diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c

index ecd98dc85c5490079c8b9762f6e38b7afcec63f4..f70c75cec8e3c4a6e74777a8ed436723bae1da36 100644 (file)
--- a/src/mesa/shader/programopt.c
+++ b/src/mesa/shader/programopt.c
@@ -45,8 +45,8 @@
   * into a vertex program.
   * May be used to implement the position_invariant option.
   */
-void
-_mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog)
+static void
+_mesa_insert_mvp_dp4_code(GLcontext *ctx, struct gl_vertex_program *vprog)
  {
     struct prog_instruction *newInst;
     const GLuint origLen = vprog->Base.NumInstructions;
@@ -113,6 +113,121 @@ _mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog)
  }
  
  
+static void
+_mesa_insert_mvp_mad_code(GLcontext *ctx, struct gl_vertex_program *vprog)
+{
+   struct prog_instruction *newInst;
+   const GLuint origLen = vprog->Base.NumInstructions;
+   const GLuint newLen = origLen + 4;
+   GLuint hposTemp;
+   GLuint i;
+
+   /*
+    * Setup state references for the modelview/projection matrix.
+    * XXX we should check if these state vars are already declared.
+    */
+   static const gl_state_index mvpState[4][STATE_LENGTH] = {
+      { STATE_MVP_MATRIX, 0, 0, 0, STATE_MATRIX_TRANSPOSE },
+      { STATE_MVP_MATRIX, 0, 1, 1, STATE_MATRIX_TRANSPOSE },
+      { STATE_MVP_MATRIX, 0, 2, 2, STATE_MATRIX_TRANSPOSE },
+      { STATE_MVP_MATRIX, 0, 3, 3, STATE_MATRIX_TRANSPOSE },
+   };
+   GLint mvpRef[4];
+
+   for (i = 0; i < 4; i++) {
+      mvpRef[i] = _mesa_add_state_reference(vprog->Base.Parameters,
+                                            mvpState[i]);
+   }
+
+   /* Alloc storage for new instructions */
+   newInst = _mesa_alloc_instructions(newLen);
+   if (!newInst) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY,
+                  "glProgramString(inserting position_invariant code)");
+      return;
+   }
+
+   /* TEMP hposTemp; */
+   hposTemp = vprog->Base.NumTemporaries++;
+
+   /*
+    * Generated instructions:
+    *    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
+    *    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
+    *    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
+    *    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
+    */
+   _mesa_init_instructions(newInst, 4);
+
+   newInst[0].Opcode = OPCODE_MUL;
+   newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+   newInst[0].DstReg.Index = hposTemp;
+   newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+   newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+   newInst[0].SrcReg[0].Index = VERT_ATTRIB_POS;
+   newInst[0].SrcReg[0].Swizzle = SWIZZLE_XXXX;
+   newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+   newInst[0].SrcReg[1].Index = mvpRef[0];
+   newInst[0].SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+   for (i = 1; i <= 2; i++) {
+      newInst[i].Opcode = OPCODE_MAD;
+      newInst[i].DstReg.File = PROGRAM_TEMPORARY;
+      newInst[i].DstReg.Index = hposTemp;
+      newInst[i].DstReg.WriteMask = WRITEMASK_XYZW;
+      newInst[i].SrcReg[0].File = PROGRAM_INPUT;
+      newInst[i].SrcReg[0].Index = VERT_ATTRIB_POS;
+      newInst[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(i,i,i,i);
+      newInst[i].SrcReg[1].File = PROGRAM_STATE_VAR;
+      newInst[i].SrcReg[1].Index = mvpRef[i];
+      newInst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP;
+      newInst[i].SrcReg[2].File = PROGRAM_TEMPORARY;
+      newInst[i].SrcReg[2].Index = hposTemp;
+      newInst[1].SrcReg[2].Swizzle = SWIZZLE_NOOP;
+   }
+
+   newInst[3].Opcode = OPCODE_MAD;
+   newInst[3].DstReg.File = PROGRAM_OUTPUT;
+   newInst[3].DstReg.Index = VERT_RESULT_HPOS;
+   newInst[3].DstReg.WriteMask = WRITEMASK_XYZW;
+   newInst[3].SrcReg[0].File = PROGRAM_INPUT;
+   newInst[3].SrcReg[0].Index = VERT_ATTRIB_POS;
+   newInst[3].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+   newInst[3].SrcReg[1].File = PROGRAM_STATE_VAR;
+   newInst[3].SrcReg[1].Index = mvpRef[3];
+   newInst[3].SrcReg[1].Swizzle = SWIZZLE_NOOP;
+   newInst[3].SrcReg[2].File = PROGRAM_TEMPORARY;
+   newInst[3].SrcReg[2].Index = hposTemp;
+   newInst[3].SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+
+   /* Append original instructions after new instructions */
+   _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen);
+
+   /* free old instructions */
+   _mesa_free_instructions(vprog->Base.Instructions, origLen);
+
+   /* install new instructions */
+   vprog->Base.Instructions = newInst;
+   vprog->Base.NumInstructions = newLen;
+   vprog->Base.InputsRead |= VERT_BIT_POS;
+   vprog->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
+}
+
+
+void
+_mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog)
+{
+   if (ctx->mvp_with_dp4) 
+      _mesa_insert_mvp_dp4_code( ctx, vprog );
+   else
+      _mesa_insert_mvp_mad_code( ctx, vprog );
+}
+      
+
+
+
+
  
  /**
   * Append extra instructions onto the given fragment program to implement
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c

index 92a630eff97c15c8b6b32e507396ac959f51fa6f..2a1f21c51cad7b46d6a27ddbf191a27ea3ce792b 100644 (file)
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -177,6 +177,12 @@ struct st_context *st_create_context(struct pipe_context *pipe,
  
     ctx = _mesa_create_context(visual, shareCtx, &funcs, NULL);
  
+   /* XXX: need a capability bit in gallium to query if the pipe
+    * driver prefers DP4 or MUL/MAD for vertex transformation.
+    */
+   if (debug_get_bool_option("MESA_MVP_DP4", FALSE))
+      _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
     return st_create_context_priv(ctx, pipe);
  }
author	Keith Whitwell <keithw@vmware.com>
	Tue, 5 May 2009 11:12:28 +0000 (12:12 +0100)
committer	Keith Whitwell <keithw@vmware.com>
	Tue, 5 May 2009 12:14:53 +0000 (13:14 +0100)
src/mesa/main/context.c		patch \| blob \| history
src/mesa/main/context.h		patch \| blob \| history
src/mesa/main/ffvertex_prog.c		patch \| blob \| history
src/mesa/main/mtypes.h		patch \| blob \| history
src/mesa/shader/programopt.c		patch \| blob \| history
src/mesa/state_tracker/st_context.c		patch \| blob \| history