fixes to _mesa_combine_programs(), from gallium-0.1
[mesa.git] / src / mesa / shader / prog_execute.c
index 013d65ce8699a04dd16a201196d466aa26919871..8ce2ca396411266c45dc3fc6d5bf37897c9a418e 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Mesa 3-D graphics library
- * Version:  6.5.3
+ * Version:  7.0.3
  *
  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
 #include "prog_instruction.h"
 #include "prog_parameter.h"
 #include "prog_print.h"
-#include "slang_library_noise.h"
+#include "shader/slang/slang_library_noise.h"
 
 
-/* See comments below for info about this */
-#define LAMBDA_ZERO 1
-
 /* debug predicate */
 #define DEBUG_PROG 0
 
@@ -92,6 +89,7 @@ get_register_pointer(const struct prog_src_register *source,
       else {
          const struct gl_program_parameter_list *params;
          ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
+                source->File == PROGRAM_CONSTANT ||
                 source->File == PROGRAM_STATE_VAR);
          params = machine->CurProgram->Parameters;
          if (reg < 0 || reg >= params->NumParameters)
@@ -218,120 +216,64 @@ fetch_vector4(const struct prog_src_register *source,
    }
 }
 
-#if 0
+
 /**
- * Fetch the derivative with respect to X for the given register.
- * \return GL_TRUE if it was easily computed or GL_FALSE if we
- * need to execute another instance of the program (ugh)!
+ * Fetch the derivative with respect to X or Y for the given register.
+ * XXX this currently only works for fragment program input attribs.
  */
-static GLboolean
+static void
 fetch_vector4_deriv(GLcontext * ctx,
                     const struct prog_src_register *source,
-                    const SWspan * span,
-                    char xOrY, GLint column, GLfloat result[4])
+                    const struct gl_program_machine *machine,
+                    char xOrY, GLfloat result[4])
 {
-   GLfloat src[4];
+   if (source->File == PROGRAM_INPUT && source->Index < machine->NumDeriv) {
+      const GLint col = machine->CurElement;
+      const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
+      const GLfloat invQ = 1.0f / w;
+      GLfloat deriv[4];
 
-   ASSERT(xOrY == 'X' || xOrY == 'Y');
-
-   switch (source->Index) {
-   case FRAG_ATTRIB_WPOS:
       if (xOrY == 'X') {
-         src[0] = 1.0;
-         src[1] = 0.0;
-         src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
-            / ctx->DrawBuffer->_DepthMaxF;
-         src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
+         deriv[0] = machine->DerivX[source->Index][0] * invQ;
+         deriv[1] = machine->DerivX[source->Index][1] * invQ;
+         deriv[2] = machine->DerivX[source->Index][2] * invQ;
+         deriv[3] = machine->DerivX[source->Index][3] * invQ;
       }
       else {
-         src[0] = 0.0;
-         src[1] = 1.0;
-         src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
-            / ctx->DrawBuffer->_DepthMaxF;
-         src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
-      }
-      break;
-   case FRAG_ATTRIB_COL0:
-   case FRAG_ATTRIB_COL1:
-      if (xOrY == 'X') {
-         src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
-         src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
-         src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
-         src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
+         deriv[0] = machine->DerivY[source->Index][0] * invQ;
+         deriv[1] = machine->DerivY[source->Index][1] * invQ;
+         deriv[2] = machine->DerivY[source->Index][2] * invQ;
+         deriv[3] = machine->DerivY[source->Index][3] * invQ;
       }
-      else {
-         src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
-         src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
-         src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
-         src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
+
+      result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
+      result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
+      result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
+      result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
+      
+      if (source->NegateBase) {
+         result[0] = -result[0];
+         result[1] = -result[1];
+         result[2] = -result[2];
+         result[3] = -result[3];
       }
-      break;
-   case FRAG_ATTRIB_FOGC:
-      if (xOrY == 'X') {
-         src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
-         src[1] = 0.0;
-         src[2] = 0.0;
-         src[3] = 0.0;
+      if (source->Abs) {
+         result[0] = FABSF(result[0]);
+         result[1] = FABSF(result[1]);
+         result[2] = FABSF(result[2]);
+         result[3] = FABSF(result[3]);
       }
-      else {
-         src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
-         src[1] = 0.0;
-         src[2] = 0.0;
-         src[3] = 0.0;
+      if (source->NegateAbs) {
+         result[0] = -result[0];
+         result[1] = -result[1];
+         result[2] = -result[2];
+         result[3] = -result[3];
       }
-      break;
-   default:
-      assert(source->Index < FRAG_ATTRIB_MAX);
-      /* texcoord or varying */
-      if (xOrY == 'X') {
-         /* this is a little tricky - I think I've got it right */
-         const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
-                                      +
-                                      span->attrStepX[source->Index][3] *
-                                      column);
-         src[0] = span->attrStepX[source->Index][0] * invQ;
-         src[1] = span->attrStepX[source->Index][1] * invQ;
-         src[2] = span->attrStepX[source->Index][2] * invQ;
-         src[3] = span->attrStepX[source->Index][3] * invQ;
-      }
-      else {
-         /* Tricky, as above, but in Y direction */
-         const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
-                                      + span->attrStepY[source->Index][3]);
-         src[0] = span->attrStepY[source->Index][0] * invQ;
-         src[1] = span->attrStepY[source->Index][1] * invQ;
-         src[2] = span->attrStepY[source->Index][2] * invQ;
-         src[3] = span->attrStepY[source->Index][3] * invQ;
-      }
-      break;
-   }
-
-   result[0] = src[GET_SWZ(source->Swizzle, 0)];
-   result[1] = src[GET_SWZ(source->Swizzle, 1)];
-   result[2] = src[GET_SWZ(source->Swizzle, 2)];
-   result[3] = src[GET_SWZ(source->Swizzle, 3)];
-
-   if (source->NegateBase) {
-      result[0] = -result[0];
-      result[1] = -result[1];
-      result[2] = -result[2];
-      result[3] = -result[3];
-   }
-   if (source->Abs) {
-      result[0] = FABSF(result[0]);
-      result[1] = FABSF(result[1]);
-      result[2] = FABSF(result[2]);
-      result[3] = FABSF(result[3]);
    }
-   if (source->NegateAbs) {
-      result[0] = -result[0];
-      result[1] = -result[1];
-      result[2] = -result[2];
-      result[3] = -result[3];
+   else {
+      ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
    }
-   return GL_TRUE;
 }
-#endif
 
 
 /**
@@ -358,6 +300,36 @@ fetch_vector1(const struct prog_src_register *source,
 }
 
 
+/**
+ * Fetch texel from texture.  Use partial derivatives when possible.
+ */
+static INLINE void
+fetch_texel(GLcontext *ctx,
+            const struct gl_program_machine *machine,
+            const struct prog_instruction *inst,
+            const GLfloat texcoord[4], GLfloat lodBias,
+            GLfloat color[4])
+{
+   const GLuint unit = machine->Samplers[inst->TexSrcUnit];
+
+   /* Note: we only have the right derivatives for fragment input attribs.
+    */
+   if (machine->NumDeriv > 0 &&
+       inst->SrcReg[0].File == PROGRAM_INPUT &&
+       inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
+      /* simple texture fetch for which we should have derivatives */
+      GLuint attr = inst->SrcReg[0].Index;
+      machine->FetchTexelDeriv(ctx, texcoord,
+                               machine->DerivX[attr],
+                               machine->DerivY[attr],
+                               lodBias, unit, color);
+   }
+   else {
+      machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
+   }
+}
+
+
 /**
  * Test value against zero and return GT, LT, EQ or UN if NaN.
  */
@@ -519,106 +491,6 @@ store_vector4(const struct prog_instruction *inst,
 }
 
 
-#if 0
-/**
- * Initialize a new machine state instance from an existing one, adding
- * the partial derivatives onto the input registers.
- * Used to implement DDX and DDY instructions in non-trivial cases.
- */
-static void
-init_machine_deriv(GLcontext * ctx,
-                   const struct gl_program_machine *machine,
-                   const struct gl_fragment_program *program,
-                   const SWspan * span, char xOrY,
-                   struct gl_program_machine *dMachine)
-{
-   GLuint attr;
-
-   ASSERT(xOrY == 'X' || xOrY == 'Y');
-
-   /* copy existing machine */
-   _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
-
-   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
-      /* XXX also need to do this when using valgrind */
-      /* Clear temporary registers (undefined for ARB_f_p) */
-      _mesa_bzero((void *) machine->Temporaries,
-                  MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
-   }
-
-   /* Add derivatives */
-   if (program->Base.InputsRead & FRAG_BIT_WPOS) {
-      GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
-      if (xOrY == 'X') {
-         wpos[0] += 1.0F;
-         wpos[1] += 0.0F;
-         wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
-         wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
-      }
-      else {
-         wpos[0] += 0.0F;
-         wpos[1] += 1.0F;
-         wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
-         wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
-      }
-   }
-
-   /* primary, secondary colors */
-   for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
-      if (program->Base.InputsRead & (1 << attr)) {
-         GLfloat *col = machine->Attribs[attr][machine->CurElement];
-         if (xOrY == 'X') {
-            col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
-            col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
-            col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
-            col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
-         }
-         else {
-            col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
-            col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
-            col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
-            col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
-         }
-      }
-   }
-   if (program->Base.InputsRead & FRAG_BIT_FOGC) {
-      GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
-      if (xOrY == 'X') {
-         fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
-      }
-      else {
-         fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
-      }
-   }
-   /* texcoord and varying vars */
-   for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
-      if (program->Base.InputsRead & (1 << attr)) {
-         GLfloat *val = machine->Attribs[attr][machine->CurElement];
-         /* XXX perspective-correct interpolation */
-         if (xOrY == 'X') {
-            val[0] += span->attrStepX[attr][0];
-            val[1] += span->attrStepX[attr][1];
-            val[2] += span->attrStepX[attr][2];
-            val[3] += span->attrStepX[attr][3];
-         }
-         else {
-            val[0] += span->attrStepY[attr][0];
-            val[1] += span->attrStepY[attr][1];
-            val[2] += span->attrStepY[attr][2];
-            val[3] += span->attrStepY[attr][3];
-         }
-      }
-   }
-
-   /* init condition codes */
-   dMachine->CondCodes[0] = COND_EQ;
-   dMachine->CondCodes[1] = COND_EQ;
-   dMachine->CondCodes[2] = COND_EQ;
-   dMachine->CondCodes[3] = COND_EQ;
-}
-#endif
-
-
 /**
  * Execute the given vertex/fragment program.
  *
@@ -762,57 +634,18 @@ _mesa_execute_program(GLcontext * ctx,
          break;
       case OPCODE_DDX:         /* Partial derivative with respect to X */
          {
-#if 0
-            GLfloat a[4], aNext[4], result[4];
-            struct gl_program_machine dMachine;
-            if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
-                                     column, result)) {
-               /* This is tricky.  Make a copy of the current machine state,
-                * increment the input registers by the dx or dy partial
-                * derivatives, then re-execute the program up to the
-                * preceeding instruction, then fetch the source register.
-                * Finally, find the difference in the register values for
-                * the original and derivative runs.
-                */
-               fetch_vector4(&inst->SrcReg[0], machine, program, a);
-               init_machine_deriv(ctx, machine, program, span,
-                                  'X', &dMachine);
-               execute_program(ctx, program, pc, &dMachine, span, column);
-               fetch_vector4(&inst->SrcReg[0], &dMachine, program,
-                             aNext);
-               result[0] = aNext[0] - a[0];
-               result[1] = aNext[1] - a[1];
-               result[2] = aNext[2] - a[2];
-               result[3] = aNext[3] - a[3];
-            }
+            GLfloat result[4];
+            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                'X', result);
             store_vector4(inst, machine, result);
-#else
-            store_vector4(inst, machine, ZeroVec);
-#endif
          }
          break;
       case OPCODE_DDY:         /* Partial derivative with respect to Y */
          {
-#if 0
-            GLfloat a[4], aNext[4], result[4];
-            struct gl_program_machine dMachine;
-            if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
-                                     column, result)) {
-               init_machine_deriv(ctx, machine, program, span,
-                                  'Y', &dMachine);
-               fetch_vector4(&inst->SrcReg[0], machine, program, a);
-               execute_program(ctx, program, pc, &dMachine, span, column);
-               fetch_vector4(&inst->SrcReg[0], &dMachine, program,
-                             aNext);
-               result[0] = aNext[0] - a[0];
-               result[1] = aNext[1] - a[1];
-               result[2] = aNext[2] - a[2];
-               result[3] = aNext[3] - a[3];
-            }
+            GLfloat result[4];
+            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                'Y', result);
             store_vector4(inst, machine, result);
-#else
-            store_vector4(inst, machine, ZeroVec);
-#endif
          }
          break;
       case OPCODE_DP3:
@@ -933,6 +766,9 @@ _mesa_execute_program(GLcontext * ctx,
             else {
                cond = eval_condition(machine, inst);
             }
+            if (DEBUG_PROG) {
+               printf("IF: %d\n", cond);
+            }
             /* do if/else */
             if (cond) {
                /* do if-clause (just continue execution) */
@@ -1498,33 +1334,18 @@ _mesa_execute_program(GLcontext * ctx,
          }
          break;
       case OPCODE_TEX:         /* Both ARB and NV frag prog */
-         /* Texel lookup */
+         /* Simple texel lookup */
          {
-            /* Note: only use the precomputed lambda value when we're
-             * sampling texture unit [K] with texcoord[K].
-             * Otherwise, the lambda value may have no relation to the
-             * instruction's texcoord or texture image.  Using the wrong
-             * lambda is usually bad news.
-             * The rest of the time, just use zero (until we get a more
-             * sophisticated way of computing lambda).
-             */
-            GLfloat coord[4], color[4], lambda;
-#if 0
-            if (inst->SrcReg[0].File == PROGRAM_INPUT &&
-                inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
-               lambda = span->array->lambda[inst->TexSrcUnit][column];
-            else
-#endif
-               lambda = 0.0;
-            fetch_vector4(&inst->SrcReg[0], machine, coord);
-            machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit,
-                                   color);
+            GLfloat texcoord[4], color[4];
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
             if (DEBUG_PROG) {
-               printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
-                      "lod %f\n",
+               printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
                       color[0], color[1], color[2], color[3],
                       inst->TexSrcUnit,
-                      coord[0], coord[1], coord[2], coord[3], lambda);
+                      texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
             }
             store_vector4(inst, machine, color);
          }
@@ -1534,21 +1355,18 @@ _mesa_execute_program(GLcontext * ctx,
          {
             const struct gl_texture_unit *texUnit
                = &ctx->Texture.Unit[inst->TexSrcUnit];
-            GLfloat coord[4], color[4], lambda, bias;
-#if 0
-            if (inst->SrcReg[0].File == PROGRAM_INPUT &&
-                inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
-               lambda = span->array->lambda[inst->TexSrcUnit][column];
-            else
-#endif
-               lambda = 0.0;
-            fetch_vector4(&inst->SrcReg[0], machine, coord);
-            /* coord[3] is the bias to add to lambda */
-            bias = texUnit->LodBias + coord[3];
-            if (texUnit->_Current)
-               bias += texUnit->_Current->LodBias;
-            machine->FetchTexelLod(ctx, coord, lambda + bias,
-                                   inst->TexSrcUnit, color);
+            GLfloat texcoord[4], color[4], lodBias;
+
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+            /* texcoord[3] is the bias to add to lambda */
+            lodBias = texUnit->LodBias + texcoord[3];
+            if (texUnit->_Current) {
+               lodBias += texUnit->_Current->LodBias;
+            }
+
+            fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
+
             store_vector4(inst, machine, color);
          }
          break;
@@ -1560,6 +1378,7 @@ _mesa_execute_program(GLcontext * ctx,
             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
+                                     0.0, /* lodBias */
                                      inst->TexSrcUnit, color);
             store_vector4(inst, machine, color);
          }
@@ -1567,14 +1386,8 @@ _mesa_execute_program(GLcontext * ctx,
       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
          /* Texture lookup w/ projective divide */
          {
-            GLfloat texcoord[4], color[4], lambda;
-#if 0
-            if (inst->SrcReg[0].File == PROGRAM_INPUT &&
-                inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
-               lambda = span->array->lambda[inst->TexSrcUnit][column];
-            else
-#endif
-               lambda = 0.0;
+            GLfloat texcoord[4], color[4];
+
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
             /* Not so sure about this test - if texcoord[3] is
              * zero, we'd probably be fine except for an ASSERT in
@@ -1585,22 +1398,19 @@ _mesa_execute_program(GLcontext * ctx,
                texcoord[1] /= texcoord[3];
                texcoord[2] /= texcoord[3];
             }
-            machine->FetchTexelLod(ctx, texcoord, lambda,
-                                   inst->TexSrcUnit, color);
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
             store_vector4(inst, machine, color);
          }
          break;
       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
-         /* Texture lookup w/ projective divide */
+         /* Texture lookup w/ projective divide, as above, but do not
+          * do the divide by w if sampling from a cube map.
+          */
          {
-            GLfloat texcoord[4], color[4], lambda;
-#if 0
-            if (inst->SrcReg[0].File == PROGRAM_INPUT &&
-                inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit)
-               lambda = span->array->lambda[inst->TexSrcUnit][column];
-            else
-#endif
-               lambda = 0.0;
+            GLfloat texcoord[4], color[4];
+
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
                 texcoord[3] != 0.0) {
@@ -1608,8 +1418,9 @@ _mesa_execute_program(GLcontext * ctx,
                texcoord[1] /= texcoord[3];
                texcoord[2] /= texcoord[3];
             }
-            machine->FetchTexelLod(ctx, texcoord, lambda,
-                                   inst->TexSrcUnit, color);
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
             store_vector4(inst, machine, color);
          }
          break;
@@ -1709,10 +1520,9 @@ _mesa_execute_program(GLcontext * ctx,
       case OPCODE_END:
          return GL_TRUE;
       default:
-         _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
+         _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
                        inst->Opcode);
          return GL_TRUE;        /* return value doesn't matter */
-
       }
 
       numExec++;