optimize generated vertex programs a bit

author Roland Scheidegger <sroland@tungstengraphics.com>

Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)

committer Roland Scheidegger <sroland@tungstengraphics.com>

Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)
author Roland Scheidegger <sroland@tungstengraphics.com>
Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)
committer Roland Scheidegger <sroland@tungstengraphics.com>
Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h

index d9885dbeec45ee232ac2d8f81f1a3f8fc0c0aaf6..0633b3b8bf760017b030a99e1593dc31b825559e 100644 (file)
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -138,6 +138,14 @@ typedef union { GLfloat f; GLint i; } fi_type;
  #define M_E (2.7182818284590452354)
  #endif
  
+#ifndef ONE_DIV_LN2
+#define ONE_DIV_LN2 (1.442695040888963456)
+#endif
+
+#ifndef ONE_DIV_SQRT_LN2
+#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
+#endif
+
  #ifndef FLT_MAX_EXP
  #define FLT_MAX_EXP 128
  #endif
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c

index 72d49093721bb2a162837f8afa06b4580f9227d0..b9ff08de5fcc4e9ae7ab5d55dcf178c74f935e26 100644 (file)
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -4101,7 +4101,7 @@ _mesa_parse_arb_vertex_program(GLcontext *ctx, GLenum target,
     program->Base.Parameters = ap.Base.Parameters; 
  
  #if DEBUG_VP
-   _mesa_printf("____________Vertex program %u __________\n", program->Base.ID);
+   _mesa_printf("____________Vertex program %u __________\n", program->Base.Id);
     _mesa_print_program(&program->Base);
  #endif
  }
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c

index d301f19090a6f3b274cfd0e78615c117223c8643..7e6cd26c55e7f2b8a68ad27fb71ed26dcd4762fb 100644 (file)
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -996,6 +996,30 @@ _mesa_fetch_state(GLcontext *ctx, const enum state_index state[],
                }
                 break;
             }
+           case STATE_FOG_PARAMS_OPTIMIZED:
+              /* this makes it possible to use simpler per-vertex fog calcs. POW
+                 (for EXP/EXP2 fog) might be more expensive than EX2 on some hw,
+                 plus it needs another constant (e) anyway. Linear fog can now be
+                 done with a single MAD.
+                 linear: fogcoord * -1/(end-start) + end/(end-start)
+                 exp: 2^-(density/ln(2) * fogcoord)
+                 exp2: 2^-((density/(ln(2)^2) * fogcoord)^2) */
+              value[0] = -1.0F / (ctx->Fog.End - ctx->Fog.Start);
+              value[1] = ctx->Fog.End / (ctx->Fog.End - ctx->Fog.Start);
+              value[2] = ctx->Fog.Density * ONE_DIV_LN2;
+              value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2;
+              break;
+           case STATE_SPOT_DIR_NORMALIZED: {
+              /* here, state[2] is the light number */
+              /* pre-normalize spot dir */
+              const GLuint ln = (GLuint) state[2];
+              value[0] = ctx->Light.Light[ln].EyeDirection[0];
+              value[1] = ctx->Light.Light[ln].EyeDirection[1];
+              value[2] = ctx->Light.Light[ln].EyeDirection[2];
+              NORMALIZE_3FV(value);
+              value[3] = ctx->Light.Light[ln]._CosCutoff;
+              break;
+           }
             default:
                /* unknown state indexes are silently ignored
                *  should be handled by the driver.
@@ -1075,6 +1099,10 @@ make_state_flags(const GLint state[])
          return _NEW_MODELVIEW;
        case STATE_TEXRECT_SCALE:
          return _NEW_TEXTURE;
+      case STATE_FOG_PARAMS_OPTIMIZED:
+        return _NEW_FOG;
+      case STATE_SPOT_DIR_NORMALIZED:
+        return _NEW_LIGHT;
        default:
           /* unknown state indexes are silently ignored and
           *  no flag set, since it is handled by the driver.
@@ -1232,6 +1260,8 @@ append_token(char *dst, enum state_index k)
     case STATE_INTERNAL:
     case STATE_NORMAL_SCALE:
     case STATE_POSITION_NORMALIZED:
+   case STATE_FOG_PARAMS_OPTIMIZED:
+   case STATE_SPOT_DIR_NORMALIZED:
        append(dst, "(internal)");
        break;
     default:
diff --git a/src/mesa/shader/program.h b/src/mesa/shader/program.h

index af06c03598fec04846ba3585a05e248e80440a72..a0bde0776299d672c455b3cf3597a0936500eaef 100644 (file)
--- a/src/mesa/shader/program.h
+++ b/src/mesa/shader/program.h
@@ -190,6 +190,8 @@ enum state_index {
     STATE_NORMAL_SCALE,
     STATE_TEXRECT_SCALE,
     STATE_POSITION_NORMALIZED,   /* normalized light position */
+   STATE_FOG_PARAMS_OPTIMIZED,  /* for faster fog calc */
+   STATE_SPOT_DIR_NORMALIZED,   /* pre-normalized spot dir */
     STATE_INTERNAL_DRIVER       /* first available state index for drivers (must be last) */
  };
  
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c

index 805d05ae7288583616964b6494dc620010c28b9c..0b6f506f4e5e06b51f105c91978c2ff3cbf7199b 100644 (file)
--- a/src/mesa/tnl/t_vp_build.c
+++ b/src/mesa/tnl/t_vp_build.c
@@ -806,14 +806,13 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
     /* Calculate spot attenuation:
      */
     if (!p->state->unit[i].light_spotcutoff_is_180) {
-      struct ureg spot_dir = register_param3(p, STATE_LIGHT, i,
-                                            STATE_SPOT_DIRECTION);
+      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
+                                                 STATE_SPOT_DIR_NORMALIZED, i);
        struct ureg spot = get_temp(p);
        struct ureg slt = get_temp(p);
-              
-      emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */
-      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot);
-      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot);
+
+      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
+      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
        emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
        emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
  
@@ -1103,29 +1102,26 @@ static void build_fog( struct tnl_program *p )
     }
  
     if (p->state->tnl_do_vertex_fog) {
-      struct ureg params = register_param1(p, STATE_FOG_PARAMS);
+      struct ureg params = register_param1(p, STATE_FOG_PARAMS_OPTIMIZED);
        struct ureg tmp = get_temp(p);
  
        switch (p->state->fog_mode) {
        case FOG_LINEAR: {
          struct ureg id = get_identity_param(p);
-        emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), input); 
-        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); 
+        emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y));
          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
          break;
        }
        case FOG_EXP:
          emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); 
-        emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-                 register_const1f(p, M_E), negate(tmp)); 
+        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+        emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
          break;
        case FOG_EXP2:
-        emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); 
+        emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
          emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
-        emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-                 register_const1f(p, M_E), negate(tmp)); 
+        emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
          break;
        }
        
@@ -1317,8 +1313,6 @@ static void build_texture_transform( struct tnl_program *p )
  }
  
  
-/* Seems like it could be tighter:
- */
  static void build_pointsize( struct tnl_program *p )
  {
     struct ureg eye = get_eye_position(p);
@@ -1327,20 +1321,25 @@ static void build_pointsize( struct tnl_program *p )
     struct ureg out = register_output(p, VERT_RESULT_PSIZ);
     struct ureg ut = get_temp(p);
  
-   /* 1, -Z, Z * Z, 1 */      
-   emit_op1(p, OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W));
-   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z)));
-   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z)));
-
-
-   /* p1 +  p2 * dist + p3 * dist * dist, 0 */
-   emit_op2(p, OPCODE_DP3, ut, 0, ut, state_attenuation);
-
-   /* 1 / factor */
-   emit_op1(p, OPCODE_RCP, ut, 0, ut ); 
-
-   /* out = pointSize / factor */
-   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 
+   /* p1 + dist * (p2 + dist * p3); */
+   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+               swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
+   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+               ut, swizzle1(state_attenuation, X));
+
+   /* 1 / sqrt(factor) */
+   emit_op1(p, OPCODE_RSQ, ut, 0, ut );
+
+#if 1
+   /* out = pointSize / sqrt(factor) */
+   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
+#else
+   /* not sure, might make sense to do clamping here,
+      but it's not done in t_vb_points neither */
+   emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size);
+   emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y));
+   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
+#endif
  
     release_temp(p, ut);
  }
author	Roland Scheidegger <sroland@tungstengraphics.com>
	Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)
committer	Roland Scheidegger <sroland@tungstengraphics.com>
	Thu, 8 Feb 2007 23:36:40 +0000 (00:36 +0100)
src/mesa/main/imports.h		patch \| blob \| history
src/mesa/shader/arbprogparse.c		patch \| blob \| history
src/mesa/shader/program.c		patch \| blob \| history
src/mesa/shader/program.h		patch \| blob \| history
src/mesa/tnl/t_vp_build.c		patch \| blob \| history