Merge branch 'master' into drm-gem

[mesa.git] / src / mesa / tnl / t_vp_build.c
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c

index f9e5045be71855ac3f01a0bc3614e4165f11f6be..d79f84f1eb48327b26ed96cf8e3cc347c1887fdf 100644 (file)
--- a/src/mesa/tnl/t_vp_build.c
+++ b/src/mesa/tnl/t_vp_build.c
@@ -1,8 +1,8 @@
  /*
   * Mesa 3-D graphics library
- * Version:  6.5
+ * Version:  7.1
   *
- * Copyright (C) 2006  Tungsten Graphics   All Rights Reserved.
+ * Copyright (C) 2007  Tungsten Graphics   All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the "Software"),
@@ -33,11 +33,11 @@
  #include "glheader.h"
  #include "macros.h"
  #include "enums.h"
-#include "program.h"
-#include "prog_instruction.h"
-#include "prog_parameter.h"
-#include "prog_print.h"
-#include "prog_statevars.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
  #include "t_context.h" /* NOTE: very light dependency on this */
  #include "t_vp_build.h"
  
@@ -457,9 +457,13 @@ static void register_matrix_param5( struct tnl_program *p,
  }
  
  
+/**
+ * Convert a ureg source register to a prog_src_register.
+ */
  static void emit_arg( struct prog_src_register *src,
                       struct ureg reg )
  {
+   assert(reg.file != PROGRAM_OUTPUT);
     src->File = reg.file;
     src->Index = reg.idx;
     src->Swizzle = reg.swz;
@@ -469,15 +473,24 @@ static void emit_arg( struct prog_src_register *src,
     src->RelAddr = 0;
  }
  
+/**
+ * Convert a ureg dest register to a prog_dst_register.
+ */
  static void emit_dst( struct prog_dst_register *dst,
                       struct ureg reg, GLuint mask )
  {
+   /* Check for legal output register type.  UNDEFINED will occur in
+    * instruction that don't produce a result (like END).
+    */
+   assert(reg.file == PROGRAM_TEMPORARY ||
+          reg.file == PROGRAM_OUTPUT ||
+          reg.file == PROGRAM_UNDEFINED);
     dst->File = reg.file;
     dst->Index = reg.idx;
     /* allow zero as a shorthand for xyzw */
     dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 
-   dst->CondMask = COND_TR;
-   dst->CondSwizzle = 0;
+   dst->CondMask = COND_TR;  /* always pass cond test */
+   dst->CondSwizzle = SWIZZLE_NOOP;
     dst->CondSrc = 0;
     dst->pad = 0;
  }
@@ -500,7 +513,7 @@ static void debug_insn( struct prog_instruction *inst, const char *fn,
  
  
  static void emit_op3fn(struct tnl_program *p,
-                      GLuint op,
+                       enum prog_opcode op,
                        struct ureg dest,
                        GLuint mask,
                        struct ureg src0,
@@ -686,7 +699,7 @@ static struct ureg get_eye_normal( struct tnl_program *p )
          struct ureg rescale = register_param2(p, STATE_INTERNAL,
                                                STATE_NORMAL_SCALE);
  
-        emit_op2( p, OPCODE_MUL, p->eye_normal, 0, normal, 
+        emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
                    swizzle1(rescale, X));
        }
     }
@@ -853,7 +866,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
  
  
  /* Need to add some addtional parameters to allow lighting in object
- * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
+ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
   * space lighting.
   */
  static void build_lighting( struct tnl_program *p )
@@ -877,7 +890,6 @@ static void build_lighting( struct tnl_program *p )
     {
        struct ureg shininess = get_material(p, 0, STATE_SHININESS);
        emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
-      release_temp(p, shininess);
  
        _col0 = make_temp(p, get_scenecolor(p, 0));
        if (separate)
@@ -891,7 +903,6 @@ static void build_lighting( struct tnl_program *p )
        struct ureg shininess = get_material(p, 1, STATE_SHININESS);
        emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 
                negate(swizzle1(shininess,X)));
-      release_temp(p, shininess);
  
        _bfc0 = make_temp(p, get_scenecolor(p, 1));
        if (separate)
@@ -942,20 +953,35 @@ static void build_lighting( struct tnl_program *p )
              */
             VPpli = register_param3(p, STATE_LIGHT, i, 
                                     STATE_POSITION_NORMALIZED); 
-           half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            if (p->state->light_local_viewer) {
+                struct ureg eye_hat = get_eye_position_normalized(p);
+                half = get_temp(p);
+                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                emit_normalize_vec3(p, half, half);
+            } else {
+                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            }
          } 
          else {
             struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 
                                                STATE_POSITION); 
             struct ureg V = get_eye_position(p);
             struct ureg dist = get_temp(p);
+           struct ureg tmpPpli = get_temp(p);
  
             VPpli = get_temp(p); 
-           half = get_temp(p);
   
-           /* Calulate VPpli vector
+            /* In homogeneous object coordinates
+             */
+            emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
+            emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
+
+           /* Calculate VPpli vector
              */
-           emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 
+           emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V); 
+
+            /* we're done with tmpPpli now */
+           release_temp(p, tmpPpli);
  
             /* Normalize VPpli.  The dist value also used in
              * attenuation below.
@@ -971,10 +997,14 @@ static void build_lighting( struct tnl_program *p )
                 p->state->unit[i].light_attenuated) {
                att = calculate_light_attenuation(p, i, VPpli, dist);
             }
+           
+           /* We're done with dist now */
+           release_temp(p, dist);
          
        
             /* Calculate viewer direction, or use infinite viewer:
              */
+           half = get_temp(p);
             if (p->state->light_local_viewer) {
                struct ureg eye_hat = get_eye_position_normalized(p);
                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
@@ -985,8 +1015,6 @@ static void build_lighting( struct tnl_program *p )
             }
  
             emit_normalize_vec3(p, half, half);
-
-           release_temp(p, dist);
          }
  
          /* Calculate dot products:
@@ -994,6 +1022,10 @@ static void build_lighting( struct tnl_program *p )
          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
          emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
  
+        /* we're done with VPpli and half now, so free them as to not drive up
+           our temp usage unnecessary */
+        release_temp(p, VPpli);
+        release_temp(p, half);
         
          /* Front face lighting:
           */
@@ -1082,8 +1114,6 @@ static void build_lighting( struct tnl_program *p )
             release_temp(p, specular);
          }
  
-        release_temp(p, half);
-        release_temp(p, VPpli);
          release_temp(p, att);
        }
     }
@@ -1096,7 +1126,7 @@ static void build_fog( struct tnl_program *p )
  {
     struct ureg fog = register_output(p, VERT_RESULT_FOGC);
     struct ureg input;
-   
+
     if (p->state->fog_source_is_depth) {
        input = swizzle1(get_eye_position(p), Z);
     }
@@ -1104,39 +1134,48 @@ static void build_fog( struct tnl_program *p )
        input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
     }
  
-   if (p->state->tnl_do_vertex_fog) {
+   if (p->state->fog_mode && p->state->tnl_do_vertex_fog) {
        struct ureg params = register_param2(p, STATE_INTERNAL,
                                            STATE_FOG_PARAMS_OPTIMIZED);
        struct ureg tmp = get_temp(p);
+      GLboolean useabs = (p->state->fog_mode != FOG_EXP2);
+
+      if (useabs) {
+        emit_op1(p, OPCODE_ABS, tmp, 0, input);
+      }
  
        switch (p->state->fog_mode) {
        case FOG_LINEAR: {
          struct ureg id = get_identity_param(p);
-        emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y));
+        emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
+                       swizzle1(params,X), swizzle1(params,Y));
          emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
          emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
          break;
        }
        case FOG_EXP:
-        emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+        emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
+                       swizzle1(params,Z));
          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
          break;
        case FOG_EXP2:
          emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
-        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
+        emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
          emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
          break;
        }
-      
+
        release_temp(p, tmp);
     }
     else {
        /* results = incoming fog coords (compute fog per-fragment later) 
         *
         * KW:  Is it really necessary to do anything in this case?
+       * BP: Yes, we always need to compute the absolute value, unless
+       * we want to push that down into the fragment program...
         */
-      emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
+      GLboolean useabs = GL_TRUE;
+      emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input);
     }
  }
   
@@ -1325,14 +1364,16 @@ static void build_pointsize( struct tnl_program *p )
     struct ureg out = register_output(p, VERT_RESULT_PSIZ);
     struct ureg ut = get_temp(p);
  
+   /* dist = |eyez| */
+   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
     /* p1 + dist * (p2 + dist * p3); */
-   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
-   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
                 ut, swizzle1(state_attenuation, X));
  
     /* 1 / sqrt(factor) */
-   emit_op1(p, OPCODE_RSQ, ut, 0, ut );
+   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
  
  #if 1
     /* out = pointSize / sqrt(factor) */
@@ -1340,8 +1381,8 @@ static void build_pointsize( struct tnl_program *p )
  #else
     /* not sure, might make sense to do clamping here,
        but it's not done in t_vb_points neither */
-   emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size);
-   emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y));
+   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
+   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
     emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
  #endif
  
@@ -1410,9 +1451,8 @@ create_new_program( const struct state_key *key,
     else
        p.temp_reserved = ~((1<<max_temps)-1);
  
-   p.program->Base.Instructions
-      = (struct prog_instruction*) MALLOC(sizeof(struct prog_instruction) * MAX_INSN);
-   p.program->Base.String = 0;
+   p.program->Base.Instructions = _mesa_alloc_instructions(MAX_INSN);
+   p.program->Base.String = NULL;
     p.program->Base.NumInstructions =
     p.program->Base.NumTemporaries =
     p.program->Base.NumParameters =
@@ -1424,21 +1464,22 @@ create_new_program( const struct state_key *key,
     build_tnl_program( &p );
  }
  
-static void *search_cache( struct tnl_cache *cache,
-                          GLuint hash,
-                          const void *key,
-                          GLuint keysize)
+
+static struct gl_vertex_program *
+search_cache(struct tnl_cache *cache, GLuint hash,
+             const void *key, GLuint keysize)
  {
     struct tnl_cache_item *c;
  
     for (c = cache->items[hash % cache->size]; c; c = c->next) {
        if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0)
-        return c->data;
+        return c->prog;
     }
  
     return NULL;
  }
  
+
  static void rehash( struct tnl_cache *cache )
  {
     struct tnl_cache_item **items;
@@ -1461,15 +1502,17 @@ static void rehash( struct tnl_cache *cache )
     cache->size = size;
  }
  
-static void cache_item( struct tnl_cache *cache,
+static void cache_item( GLcontext *ctx,
+                        struct tnl_cache *cache,
                         GLuint hash,
                         void *key,
-                       void *data )
+                       struct gl_vertex_program *prog )
  {
-   struct tnl_cache_item *c = (struct tnl_cache_item*) _mesa_malloc(sizeof(*c));
+   struct tnl_cache_item *c = CALLOC_STRUCT(tnl_cache_item);
     c->hash = hash;
     c->key = key;
-   c->data = data;
+
+   c->prog = prog;
  
     if (++cache->n_items > cache->size * 1.5)
        rehash(cache);
@@ -1498,7 +1541,10 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx )
     GLuint hash;
     const struct gl_vertex_program *prev = ctx->VertexProgram._Current;
  
-   if (!ctx->VertexProgram._Current) {
+   if (!ctx->VertexProgram._Current ||
+       ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) {
+      struct gl_vertex_program *newProg;
+
        /* Grab all the relevent state and put it in a single structure:
         */
        key = make_state_key(ctx);
@@ -1506,33 +1552,33 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx )
  
        /* Look for an already-prepared program for this state:
         */
-      ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *)
-        search_cache( tnl->vp_cache, hash, key, sizeof(*key) );
+      newProg = search_cache( tnl->vp_cache, hash, key, sizeof(*key));
     
        /* OK, we'll have to build a new one:
         */
-      if (!ctx->VertexProgram._TnlProgram) {
+      if (!newProg) {
+
          if (0)
             _mesa_printf("Build new TNL program\n");
          
-        ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *)
+        newProg = (struct gl_vertex_program *)
             ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 
  
-        create_new_program( key, ctx->VertexProgram._TnlProgram, 
-                            ctx->Const.VertexProgram.MaxTemps );
+        create_new_program( key, newProg, ctx->Const.VertexProgram.MaxTemps );
  
          if (ctx->Driver.ProgramStringNotify)
             ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 
-                                       &ctx->VertexProgram._TnlProgram->Base );
+                                             &newProg->Base );
  
-        cache_item(tnl->vp_cache, hash, key, ctx->VertexProgram._TnlProgram );
+         /* Our ownership of newProg is transferred to the cache */
+        cache_item(ctx, tnl->vp_cache, hash, key, newProg);
        }
        else {
          FREE(key);
-        if (0) 
-           _mesa_printf("Found existing TNL program for key %x\n", hash);
        }
-      ctx->VertexProgram._Current = ctx->VertexProgram._TnlProgram;
+
+      _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, newProg);
+      _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, newProg);
     }
  
     /* Tell the driver about the change.  Could define a new target for
@@ -1565,7 +1611,7 @@ void _tnl_ProgramCacheDestroy( GLcontext *ctx )
        for (c = tnl->vp_cache->items[i]; c; c = next) {
          next = c->next;
          FREE(c->key);
-        FREE(c->data);
+        _mesa_reference_vertprog(ctx, &c->prog, NULL);
          FREE(c);
        }