swrast: silence double->float assignment warnings
[mesa.git] / src / mesa / swrast / s_atifragshader.c
index aeadefc8caa870379ff5cffe7fbcc39aa0e8b6ae..05da64de3acea2f7b91289d7ea44f1305f8e3eb4 100644 (file)
@@ -1,5 +1,4 @@
 /*
- *
  * Copyright (C) 2004  David Airlie   All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "glheader.h"
-#include "colormac.h"
-#include "context.h"
-#include "atifragshader.h"
-#include "macros.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "shader/atifragshader.h"
+#include "swrast/s_atifragshader.h"
+
+
+/**
+ * State for executing ATI fragment shader.
+ */
+struct atifs_machine
+{
+   GLfloat Registers[6][4];         /** six temporary registers */
+   GLfloat PrevPassRegisters[6][4];
+   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
+};
+
 
-#include "s_atifragshader.h"
-#include "s_nvfragprog.h"
-#include "s_span.h"
-#include "s_texture.h"
 
 /**
  * Fetch a texel.
@@ -39,52 +46,50 @@ static void
 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
            GLuint unit, GLfloat color[4])
 {
-   GLchan rgba[4];
    SWcontext *swrast = SWRAST_CONTEXT(ctx);
 
    /* XXX use a float-valued TextureSample routine here!!! */
-   swrast->TextureSample[unit] (ctx, unit, ctx->Texture.Unit[unit]._Current,
-                               1, (const GLfloat(*)[4]) texcoord,
-                               &lambda, &rgba);
-   color[0] = CHAN_TO_FLOAT(rgba[0]);
-   color[1] = CHAN_TO_FLOAT(rgba[1]);
-   color[2] = CHAN_TO_FLOAT(rgba[2]);
-   color[3] = CHAN_TO_FLOAT(rgba[3]);
+   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
+                               1, (const GLfloat(*)[4]) texcoord,
+                               &lambda, (GLfloat (*)[4]) color);
 }
 
 static void
-apply_swizzle(struct atifs_machine *machine, GLuint reg, GLuint swizzle)
+apply_swizzle(GLfloat values[4], GLuint swizzle)
 {
    GLfloat s, t, r, q;
 
-   s = machine->Registers[reg][0];
-   t = machine->Registers[reg][1];
-   r = machine->Registers[reg][2];
-   q = machine->Registers[reg][3];
+   s = values[0];
+   t = values[1];
+   r = values[2];
+   q = values[3];
 
    switch (swizzle) {
    case GL_SWIZZLE_STR_ATI:
-      machine->Registers[reg][0] = s;
-      machine->Registers[reg][1] = t;
-      machine->Registers[reg][2] = r;
+      values[0] = s;
+      values[1] = t;
+      values[2] = r;
       break;
    case GL_SWIZZLE_STQ_ATI:
-      machine->Registers[reg][0] = s;
-      machine->Registers[reg][1] = t;
-      machine->Registers[reg][2] = q;
+      values[0] = s;
+      values[1] = t;
+      values[2] = q;
       break;
    case GL_SWIZZLE_STR_DR_ATI:
-      machine->Registers[reg][0] = s / r;
-      machine->Registers[reg][1] = t / r;
-      machine->Registers[reg][2] = 1 / r;
+      values[0] = s / r;
+      values[1] = t / r;
+      values[2] = 1 / r;
       break;
    case GL_SWIZZLE_STQ_DQ_ATI:
-      machine->Registers[reg][0] = s / q;
-      machine->Registers[reg][1] = t / q;
-      machine->Registers[reg][2] = 1 / q;
+/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
+      if (q == 0.0F)
+         q = 0.000000001F;
+      values[0] = s / q;
+      values[1] = t / q;
+      values[2] = 1.0 / q;
       break;
    }
-   machine->Registers[reg][3] = 0.0;
+   values[3] = 0.0;
 }
 
 static void
@@ -96,7 +101,7 @@ apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
       return;
 
    start = optype ? 3 : 0;
-   end = optype ? 4 : 3;
+   end = 4;
 
    for (i = start; i < end; i++) {
       switch (rep) {
@@ -126,7 +131,7 @@ apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
       return;
 
    start = optype ? 3 : 0;
-   end = optype ? 4 : 3;
+   end = 4;
 
    for (i = start; i < end; i++) {
       if (mod & GL_COMP_BIT_ATI)
@@ -167,27 +172,27 @@ apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
         val[i] = 8 * val[i];
         break;
       case GL_HALF_BIT_ATI:
-        val[i] = val[i] * 0.5;
+        val[i] = val[i] * 0.5F;
         break;
       case GL_QUARTER_BIT_ATI:
-        val[i] = val[i] * 0.25;
+        val[i] = val[i] * 0.25F;
         break;
       case GL_EIGHTH_BIT_ATI:
-        val[i] = val[i] * 0.125;
+        val[i] = val[i] * 0.125F;
         break;
       }
 
       if (has_sat) {
-        if (val[i] < 0.0)
-           val[i] = 0;
-        else if (val[i] > 1.0)
-           val[i] = 1.0;
+        if (val[i] < 0.0F)
+           val[i] = 0.0F;
+        else if (val[i] > 1.0F)
+           val[i] = 1.0F;
       }
       else {
-        if (val[i] < -8.0)
-           val[i] = -8.0;
-        else if (val[i] > 8.0)
-           val[i] = 8.0;
+        if (val[i] < -8.0F)
+           val[i] = -8.0F;
+        else if (val[i] > 8.0F)
+           val[i] = 8.0F;
       }
    }
 }
@@ -230,17 +235,6 @@ finish_pass(struct atifs_machine *machine)
    }
 }
 
-/**
- * Execute the given fragment shader
- * NOTE: we do everything in single-precision floating point; we don't
- * currently observe the single/half/fixed-precision qualifiers.
- * \param ctx - rendering context
- * \param program - the fragment program to execute
- * \param machine - machine state (register file)
- * \param maxInst - max number of instructions to execute
- * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
- */
-
 struct ati_fs_opcode_st ati_fs_opcodes[] = {
    {GL_ADD_ATI, 2},
    {GL_SUB_ATI, 2},
@@ -259,7 +253,7 @@ struct ati_fs_opcode_st ati_fs_opcodes[] = {
 
 static void
 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
-              const struct sw_span *span, GLuint column, GLuint idx)
+              const SWspan *span, GLuint column, GLuint idx)
 {
    GLuint swizzle = texinst->swizzle;
    GLuint pass_tex = texinst->src;
@@ -267,57 +261,66 @@ handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
       pass_tex -= GL_TEXTURE0_ARB;
       COPY_4V(machine->Registers[idx],
-             span->array->texcoords[pass_tex][column]);
+             span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
    }
-   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI
-           && machine->pass == 2) {
+   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
       pass_tex -= GL_REG_0_ATI;
       COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
    }
-   apply_swizzle(machine, idx, swizzle);
+   apply_swizzle(machine->Registers[idx], swizzle);
 
 }
 
 static void
 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
-                struct atifs_setupinst *texinst, const struct sw_span *span,
+                struct atifs_setupinst *texinst, const SWspan *span,
                 GLuint column, GLuint idx)
 {
+/* sample from unit idx using texinst->src as coords */
    GLuint swizzle = texinst->swizzle;
-   GLuint sample_tex = texinst->src;
+   GLuint coord_source = texinst->src;
+   GLfloat tex_coords[4] = { 0 };
 
-   if (sample_tex >= GL_TEXTURE0_ARB && sample_tex <= GL_TEXTURE7_ARB) {
-      sample_tex -= GL_TEXTURE0_ARB;
-      fetch_texel(ctx, span->array->texcoords[sample_tex][column], 0.0F,
-                 sample_tex, machine->Registers[idx]);
+   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
+      coord_source -= GL_TEXTURE0_ARB;
+      COPY_4V(tex_coords,
+              span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
    }
-   else if (sample_tex >= GL_REG_0_ATI && sample_tex <= GL_REG_5_ATI) {
-      /* this is wrong... */
-      sample_tex -= GL_REG_0_ATI;
-      fetch_texel(ctx, machine->Registers[sample_tex], 0, sample_tex,
-                 machine->Registers[idx]);
+   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
+      coord_source -= GL_REG_0_ATI;
+      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
    }
-   apply_swizzle(machine, idx, swizzle);
+   apply_swizzle(tex_coords, swizzle);
+   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
 }
 
-#define SETUP_SRC_REG(optype, i, x)         do {       \
-    if (optype) \
-      src[optype][i][3] = x[3]; \
-    else \
-      COPY_3V(src[optype][i], x); \
-  } while (0)
-
-static GLboolean
-execute_shader(GLcontext * ctx,
-              const struct ati_fragment_shader *shader, GLuint maxInst,
-              struct atifs_machine *machine, const struct sw_span *span,
-              GLuint column)
+#define SETUP_SRC_REG(optype, i, x)            \
+do {                                           \
+   COPY_4V(src[optype][i], x);                         \
+} while (0)
+
+
+
+/**
+ * Execute the given fragment shader.
+ * NOTE: we do everything in single-precision floating point
+ * \param ctx - rendering context
+ * \param shader - the shader to execute
+ * \param machine - virtual machine state
+ * \param span - the SWspan we're operating on
+ * \param column - which pixel [i] we're operating on in the span
+ */
+static void
+execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
+              struct atifs_machine *machine, const SWspan *span,
+               GLuint column)
 {
    GLuint pc;
    struct atifs_instruction *inst;
    struct atifs_setupinst *texinst;
    GLint optype;
-   GLint i, j, pass;
+   GLuint i;
+   GLint j, pass;
    GLint dstreg;
    GLfloat src[2][3][4];
    GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
@@ -340,19 +343,19 @@ execute_shader(GLcontext * ctx,
 
         /* setup the source registers for color and alpha ops */
         for (optype = 0; optype < 2; optype++) {
-           for (i = 0; i < inst->ArgCount[optype]; i++) {
+           for (i = 0; i < inst->ArgCount[optype]; i++) {
               GLint index = inst->SrcReg[optype][i].Index;
 
               if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
                  SETUP_SRC_REG(optype, i,
                                machine->Registers[index - GL_REG_0_ATI]);
               else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
-                 if (shader->localConstDef & (1 << (index - GL_CON_0_ATI))) {
+                 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
                     SETUP_SRC_REG(optype, i,
                                shader->Constants[index - GL_CON_0_ATI]);
                  } else {
                     SETUP_SRC_REG(optype, i,
-                               ctx->ATIFragmentShader.globalConstants[index - GL_CON_0_ATI]);
+                               ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
                  }
               }
               else if (index == GL_ONE)
@@ -475,6 +478,7 @@ execute_shader(GLcontext * ctx,
                     GLfloat result;
 
                     /* DOT 2 always uses the source from the color op */
+                    /* could save recalculation of dot products for alpha inst */
                     result = src[0][0][0] * src[0][1][0] +
                        src[0][0][1] * src[0][1][1] + src[0][2][2];
                     if (!optype) {
@@ -484,7 +488,6 @@ execute_shader(GLcontext * ctx,
                     }
                     else
                        dst[optype][3] = result;
-
                  }
                  break;
               case GL_DOT3_ATI:
@@ -510,7 +513,7 @@ execute_shader(GLcontext * ctx,
                     GLfloat result;
 
                     /* DOT 4 always uses the source from the color op */
-                    result = src[optype][0][0] * src[0][1][0] +
+                    result = src[0][0][0] * src[0][1][0] +
                        src[0][0][1] * src[0][1][1] +
                        src[0][0][2] * src[0][1][2] +
                        src[0][0][3] * src[0][1][3];
@@ -534,88 +537,68 @@ execute_shader(GLcontext * ctx,
               dstreg = inst->DstReg[optype].Index;
               dstp = machine->Registers[dstreg - GL_REG_0_ATI];
 
-              write_dst_addr(optype, inst->DstReg[optype].dstMod,
+              if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
+                 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
+                 write_dst_addr(optype, inst->DstReg[optype].dstMod,
                              inst->DstReg[optype].dstMask, dst[optype],
                              dstp);
+              else
+                 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
            }
         }
       }
    }
-   return GL_TRUE;
 }
 
+
+/**
+ * Init fragment shader virtual machine state.
+ */
 static void
 init_machine(GLcontext * ctx, struct atifs_machine *machine,
             const struct ati_fragment_shader *shader,
-            const struct sw_span *span, GLuint col)
+            const SWspan *span, GLuint col)
 {
+   GLfloat (*inputs)[4] = machine->Inputs;
    GLint i, j;
 
    for (i = 0; i < 6; i++) {
       for (j = 0; j < 4; j++)
-        ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0;
-
+        machine->Registers[i][j] = 0.0;
    }
 
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] =
-      CHAN_TO_FLOAT(span->array->rgba[col][0]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] =
-      CHAN_TO_FLOAT(span->array->rgba[col][1]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] =
-      CHAN_TO_FLOAT(span->array->rgba[col][2]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] =
-      CHAN_TO_FLOAT(span->array->rgba[col][3]);
-
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] =
-      CHAN_TO_FLOAT(span->array->spec[col][0]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] =
-      CHAN_TO_FLOAT(span->array->spec[col][1]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] =
-      CHAN_TO_FLOAT(span->array->spec[col][2]);
-   ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] =
-      CHAN_TO_FLOAT(span->array->spec[col][3]);
-
-   ctx->ATIFragmentShader.Machine.pass = 0;
+   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
+   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
 }
 
 
 
 /**
- * Execute the current fragment program, operating on the given span.
+ * Execute the current ATI shader program, operating on the given span.
  */
 void
-_swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span)
+_swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
 {
    const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   struct atifs_machine machine;
    GLuint i;
 
-   ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
+   /* incoming colors should be floats */
+   ASSERT(span->array->ChanType == GL_FLOAT);
 
    for (i = 0; i < span->end; i++) {
       if (span->array->mask[i]) {
-        init_machine(ctx, &ctx->ATIFragmentShader.Machine,
-                     ctx->ATIFragmentShader.Current, span, i);
+        init_machine(ctx, &machine, shader, span, i);
 
-        if (execute_shader(ctx, shader, ~0,
-                           &ctx->ATIFragmentShader.Machine, span, i)) {
-           span->array->mask[i] = GL_FALSE;
-        }
+        execute_shader(ctx, shader, &machine, span, i);
 
+         /* store result color */
         {
-           const GLfloat *colOut =
-              ctx->ATIFragmentShader.Machine.Registers[0];
-
-           /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
-           UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
-           UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
-           UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
-           UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
+           const GLfloat *colOut = machine.Registers[0];
+            /*fprintf(stderr,"outputs %f %f %f %f\n",
+              colOut[0], colOut[1], colOut[2], colOut[3]); */
+            COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
         }
       }
-
    }
-
-
-   ctx->_CurrentProgram = 0;
-
 }