Fixes for 32-bit GLchans: smooth/flat/textured triangles seem to work now.

[mesa.git] / src / mesa / swrast / s_texture.c
diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c

index 73e1b6718d161c186fafab1bab689e547abb36e9..9c3e2c5b5f6623e827492ea42bae807285653297 100644 (file)
--- a/src/mesa/swrast/s_texture.c
+++ b/src/mesa/swrast/s_texture.c
@@ -1,4 +1,4 @@
-/* $Id: s_texture.c,v 1.21 2001/03/28 20:40:52 gareth Exp $ */
+/* $Id: s_texture.c,v 1.38 2001/08/14 14:08:44 brianp Exp $ */
  
  /*
   * Mesa 3-D graphics library
@@ -39,20 +39,6 @@
  #include "s_texture.h"
  
  
-/* XXX this is temporary, until GL/glext.h is updated. */
-#ifndef GL_DOT3_RGB_ARB
-#define GL_DOT3_RGB_ARB 0x86AE
-#endif
-#ifndef GL_DOT3_RGBA_ARB
-#define GL_DOT3_RGBA_ARB 0x86AF
-#endif
-
-/* XXX this is temporary, until GL/glext.h is updated. */
-#ifndef GL_CLAMP_TO_BORDER_ARB
-#define GL_CLAMP_TO_BORDER_ARB 0x812D
-#endif
-
-
  /*
   * These values are used in the fixed-point arithmetic used
   * for linear filtering.
@@ -199,6 +185,16 @@
  
  
  
+/*
+ * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
+ * see 1-pixel bands of improperly weighted linear-sampled texels.  The
+ * tests/texwrap.c demo is a good test.
+ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
+ * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
+ */
+#define FRAC(f)  ((f) - IFLOOR(f))
+
+
  
  /*
   * Bitflags for texture border color sampling.
@@ -285,9 +281,15 @@ sample_1d_nearest(GLcontext *ctx,
     /* skip over the border, if any */
     i += img->Border;
  
-   (*img->FetchTexel)(img, i, 0, 0, (GLvoid *) rgba);
-   if (img->Format == GL_COLOR_INDEX) {
-      palette_sample(ctx, tObj, rgba[0], rgba);
+   if (i < 0 || i >= (GLint) img->Width) {
+      /* Need this test for GL_CLAMP_TO_BORDER_ARB mode */
+      COPY_CHAN4(rgba, tObj->BorderColor);
+   }
+   else {
+      (*img->FetchTexel)(img, i, 0, 0, (GLvoid *) rgba);
+      if (img->Format == GL_COLOR_INDEX) {
+         palette_sample(ctx, tObj, rgba[0], rgba);
+      }
     }
  }
  
@@ -321,10 +323,15 @@ sample_1d_linear(GLcontext *ctx,
  
     {
        const GLfloat a = FRAC(u);
-      /* compute sample weights in fixed point in [0,WEIGHT_SCALE] */
-      const GLint w0 = IROUND((1.0F-a) * WEIGHT_SCALE);
-      const GLint w1 = IROUND(      a  * WEIGHT_SCALE);
  
+#if CHAN_TYPE == GL_FLOAT || CHAN_TYPE == GL_UNSIGNED_SHORT
+      const GLfloat w0 = (1.0F-a);
+      const GLfloat w1 =       a ;
+#else /* CHAN_BITS == 8 */
+      /* compute sample weights in fixed point in [0,WEIGHT_SCALE] */
+      const GLint w0 = IROUND_POS((1.0F - a) * WEIGHT_SCALE);
+      const GLint w1 = IROUND_POS(        a  * WEIGHT_SCALE);
+#endif
        GLchan t0[4], t1[4];  /* texels */
  
        if (useBorderColor & I0BIT) {
@@ -346,10 +353,23 @@ sample_1d_linear(GLcontext *ctx,
           }
        }
  
+#if CHAN_TYPE == GL_FLOAT
+      rgba[0] = w0 * t0[0] + w1 * t1[0];
+      rgba[1] = w0 * t0[1] + w1 * t1[1];
+      rgba[2] = w0 * t0[2] + w1 * t1[2];
+      rgba[3] = w0 * t0[3] + w1 * t1[3];
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+      rgba[0] = (GLchan) (w0 * t0[0] + w1 * t1[0] + 0.5);
+      rgba[1] = (GLchan) (w0 * t0[1] + w1 * t1[1] + 0.5);
+      rgba[2] = (GLchan) (w0 * t0[2] + w1 * t1[2] + 0.5);
+      rgba[3] = (GLchan) (w0 * t0[3] + w1 * t1[3] + 0.5);
+#else /* CHAN_BITS == 8 */
        rgba[0] = (GLchan) ((w0 * t0[0] + w1 * t1[0]) >> WEIGHT_SHIFT);
        rgba[1] = (GLchan) ((w0 * t0[1] + w1 * t1[1]) >> WEIGHT_SHIFT);
        rgba[2] = (GLchan) ((w0 * t0[2] + w1 * t1[2]) >> WEIGHT_SHIFT);
        rgba[3] = (GLchan) ((w0 * t0[3] + w1 * t1[3]) >> WEIGHT_SHIFT);
+#endif
+
     }
  }
  
@@ -379,6 +399,16 @@ sample_1d_linear_mipmap_nearest(GLcontext *ctx,
  
  
  
+/*
+ * This is really just needed in order to prevent warnings with some compilers.
+ */
+#if CHAN_TYPE == GL_FLOAT
+#define INTCAST
+#else
+#define INTCAST (GLint)
+#endif
+
+
  static void
  sample_1d_nearest_mipmap_linear(GLcontext *ctx,
                                  const struct gl_texture_object *tObj,
@@ -397,10 +427,10 @@ sample_1d_nearest_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_1d_nearest(ctx, tObj, tObj->Image[level  ], s, t0);
        sample_1d_nearest(ctx, tObj, tObj->Image[level+1], s, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -424,10 +454,10 @@ sample_1d_linear_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_1d_linear(ctx, tObj, tObj->Image[level  ], s, t0);
        sample_1d_linear(ctx, tObj, tObj->Image[level+1], s, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -569,9 +599,15 @@ sample_2d_nearest(GLcontext *ctx,
     i += img->Border;
     j += img->Border;
  
-   (*img->FetchTexel)(img, i, j, 0, (GLvoid *) rgba);
-   if (img->Format == GL_COLOR_INDEX) {
-      palette_sample(ctx, tObj, rgba[0], rgba);
+   if (i < 0 || i >= (GLint) img->Width || j < 0 || j >= (GLint) img->Height) {
+      /* Need this test for GL_CLAMP_TO_BORDER_ARB mode */
+      COPY_CHAN4(rgba, tObj->BorderColor);
+   }
+   else {
+      (*img->FetchTexel)(img, i, j, 0, (GLvoid *) rgba);
+      if (img->Format == GL_COLOR_INDEX) {
+         palette_sample(ctx, tObj, rgba[0], rgba);
+      }
     }
  }
  
@@ -614,11 +650,19 @@ sample_2d_linear(GLcontext *ctx,
     {
        const GLfloat a = FRAC(u);
        const GLfloat b = FRAC(v);
+
+#if CHAN_TYPE == GL_FLOAT || CHAN_TYPE == GL_UNSIGNED_SHORT
+      const GLfloat w00 = (1.0F-a) * (1.0F-b);
+      const GLfloat w10 =       a  * (1.0F-b);
+      const GLfloat w01 = (1.0F-a) *       b ;
+      const GLfloat w11 =       a  *       b ;
+#else /* CHAN_BITS == 8 */
        /* compute sample weights in fixed point in [0,WEIGHT_SCALE] */
-      const GLint w00 = IROUND((1.0F-a) * (1.0F-b) * WEIGHT_SCALE);
-      const GLint w10 = IROUND(      a  * (1.0F-b) * WEIGHT_SCALE);
-      const GLint w01 = IROUND((1.0F-a) *       b  * WEIGHT_SCALE);
-      const GLint w11 = IROUND(      a  *       b  * WEIGHT_SCALE);
+      const GLint w00 = IROUND_POS((1.0F-a) * (1.0F-b) * WEIGHT_SCALE);
+      const GLint w10 = IROUND_POS(      a  * (1.0F-b) * WEIGHT_SCALE);
+      const GLint w01 = IROUND_POS((1.0F-a) *       b  * WEIGHT_SCALE);
+      const GLint w11 = IROUND_POS(      a  *       b  * WEIGHT_SCALE);
+#endif
        GLchan t00[4];
        GLchan t10[4];
        GLchan t01[4];
@@ -660,11 +704,31 @@ sample_2d_linear(GLcontext *ctx,
              palette_sample(ctx, tObj, t11[0], t11);
           }
        }
+#if CHAN_TYPE == GL_FLOAT
+      rgba[0] = w00 * t00[0] + w10 * t10[0] + w01 * t01[0] + w11 * t11[0];
+      rgba[1] = w00 * t00[1] + w10 * t10[1] + w01 * t01[1] + w11 * t11[1];
+      rgba[2] = w00 * t00[2] + w10 * t10[2] + w01 * t01[2] + w11 * t11[2];
+      rgba[3] = w00 * t00[3] + w10 * t10[3] + w01 * t01[3] + w11 * t11[3];
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+      rgba[0] = (GLchan) (w00 * t00[0] + w10 * t10[0] +
+                          w01 * t01[0] + w11 * t11[0] + 0.5);
+      rgba[1] = (GLchan) (w00 * t00[1] + w10 * t10[1] +
+                          w01 * t01[1] + w11 * t11[1] + 0.5);
+      rgba[2] = (GLchan) (w00 * t00[2] + w10 * t10[2] +
+                          w01 * t01[2] + w11 * t11[2] + 0.5);
+      rgba[3] = (GLchan) (w00 * t00[3] + w10 * t10[3] +
+                          w01 * t01[3] + w11 * t11[3] + 0.5);
+#else /* CHAN_BITS == 8 */
+      rgba[0] = (GLchan) ((w00 * t00[0] + w10 * t10[0] +
+                           w01 * t01[0] + w11 * t11[0]) >> WEIGHT_SHIFT);
+      rgba[1] = (GLchan) ((w00 * t00[1] + w10 * t10[1] +
+                           w01 * t01[1] + w11 * t11[1]) >> WEIGHT_SHIFT);
+      rgba[2] = (GLchan) ((w00 * t00[2] + w10 * t10[2] +
+                           w01 * t01[2] + w11 * t11[2]) >> WEIGHT_SHIFT);
+      rgba[3] = (GLchan) ((w00 * t00[3] + w10 * t10[3] +
+                           w01 * t01[3] + w11 * t11[3]) >> WEIGHT_SHIFT);
+#endif
  
-      rgba[0] = (GLchan) ((w00 * t00[0] + w10 * t10[0] + w01 * t01[0] + w11 * t11[0]) >> WEIGHT_SHIFT);
-      rgba[1] = (GLchan) ((w00 * t00[1] + w10 * t10[1] + w01 * t01[1] + w11 * t11[1]) >> WEIGHT_SHIFT);
-      rgba[2] = (GLchan) ((w00 * t00[2] + w10 * t10[2] + w01 * t01[2] + w11 * t11[2]) >> WEIGHT_SHIFT);
-      rgba[3] = (GLchan) ((w00 * t00[3] + w10 * t10[3] + w01 * t01[3] + w11 * t11[3]) >> WEIGHT_SHIFT);
     }
  
  }
@@ -715,10 +779,10 @@ sample_2d_nearest_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_2d_nearest(ctx, tObj, tObj->Image[level  ], s, t, t0);
        sample_2d_nearest(ctx, tObj, tObj->Image[level+1], s, t, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -742,10 +806,10 @@ sample_2d_linear_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_2d_linear(ctx, tObj, tObj->Image[level  ], s, t, t0);
        sample_2d_linear(ctx, tObj, tObj->Image[level+1], s, t, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -786,6 +850,84 @@ sample_linear_2d( GLcontext *ctx, GLuint texUnit,
  }
  
  
+/*
+ * Optimized 2-D texture sampling:
+ *    S and T wrap mode == GL_REPEAT
+ *    GL_NEAREST min/mag filter
+ *    No border
+ *    Format = GL_RGB
+ */
+static void
+opt_sample_rgb_2d( GLcontext *ctx, GLuint texUnit,
+                   const struct gl_texture_object *tObj,
+                   GLuint n, const GLfloat s[], const GLfloat t[],
+                   const GLfloat u[], const GLfloat lambda[],
+                   GLchan rgba[][4] )
+{
+   const struct gl_texture_image *img = tObj->Image[tObj->BaseLevel];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint colMask = img->Width - 1;
+   const GLint rowMask = img->Height - 1;
+   const GLint shift = img->WidthLog2;
+   GLuint k;
+   (void) u;
+   (void) lambda;
+   ASSERT(tObj->WrapS==GL_REPEAT);
+   ASSERT(tObj->WrapT==GL_REPEAT);
+   ASSERT(img->Border==0);
+   ASSERT(img->Format==GL_RGB);
+
+   for (k=0; k<n; k++) {
+      GLint i = IFLOOR(s[k] * width) & colMask;
+      GLint j = IFLOOR(t[k] * height) & rowMask;
+      GLint pos = (j << shift) | i;
+      GLchan *texel = ((GLchan *) img->Data) + 3*pos;
+      rgba[k][RCOMP] = texel[0];
+      rgba[k][GCOMP] = texel[1];
+      rgba[k][BCOMP] = texel[2];
+   }
+}
+
+
+/*
+ * Optimized 2-D texture sampling:
+ *    S and T wrap mode == GL_REPEAT
+ *    GL_NEAREST min/mag filter
+ *    No border
+ *    Format = GL_RGBA
+ */
+static void
+opt_sample_rgba_2d( GLcontext *ctx, GLuint texUnit,
+                    const struct gl_texture_object *tObj,
+                    GLuint n, const GLfloat s[], const GLfloat t[],
+                    const GLfloat u[], const GLfloat lambda[],
+                    GLchan rgba[][4] )
+{
+   const struct gl_texture_image *img = tObj->Image[tObj->BaseLevel];
+   const GLfloat width = (GLfloat) img->Width;
+   const GLfloat height = (GLfloat) img->Height;
+   const GLint colMask = img->Width - 1;
+   const GLint rowMask = img->Height - 1;
+   const GLint shift = img->WidthLog2;
+   GLuint i;
+   (void) u;
+   (void) lambda;
+   ASSERT(tObj->WrapS==GL_REPEAT);
+   ASSERT(tObj->WrapT==GL_REPEAT);
+   ASSERT(img->Border==0);
+   ASSERT(img->Format==GL_RGBA);
+
+   for (i = 0; i < n; i++) {
+      const GLint col = IFLOOR(s[i] * width) & colMask;
+      const GLint row = IFLOOR(t[i] * height) & rowMask;
+      const GLint pos = (row << shift) | col;
+      const GLchan *texel = ((GLchan *) img->Data) + (pos << 2);    /* pos*4 */
+      COPY_CHAN4(rgba[i], texel);
+   }
+}
+
+
  /*
   * Given an array of (s,t) texture coordinate and lambda (level of detail)
   * values, return an array of texture sample.
@@ -805,14 +947,33 @@ sample_lambda_2d( GLcontext *ctx, GLuint texUnit,
     /* since lambda is monotonous-array use this check first */
     if (lambda[0] <= minMagThresh && lambda[n-1] <= minMagThresh) {
        /* magnification for whole span */
+      const struct gl_texture_image *img = tObj->Image[tObj->BaseLevel];
        switch (tObj->MagFilter) {
        case GL_NEAREST:
-        sample_nearest_2d(ctx, texUnit, tObj, n, s, t, u,
-                           lambda, rgba);
+         if (tObj->WrapS == GL_REPEAT && tObj->WrapT == GL_REPEAT &&
+             img->Border == 0) {
+            switch (img->Format) {
+            case GL_RGB:
+               opt_sample_rgb_2d(ctx, texUnit, tObj, n, s, t, NULL,
+                                 NULL, rgba);
+               break;
+            case GL_RGBA:
+               opt_sample_rgba_2d(ctx, texUnit, tObj, n, s, t, NULL,
+                                  NULL, rgba);
+               break;
+            default:
+               sample_nearest_2d(ctx, texUnit, tObj, n, s, t, NULL,
+                                 NULL, rgba);
+            }
+         }
+         else {
+            sample_nearest_2d(ctx, texUnit, tObj, n, s, t, NULL,
+                              NULL, rgba);
+         }
           break;
        case GL_LINEAR:
-        sample_linear_2d(ctx, texUnit, tObj, n, s, t, u,
-                         lambda, rgba);
+        sample_linear_2d(ctx, texUnit, tObj, n, s, t, NULL,
+                         NULL, rgba);
           break;
        default:
           _mesa_problem(NULL, "Bad mag filter in sample_lambda_2d");
@@ -872,93 +1033,6 @@ sample_lambda_2d( GLcontext *ctx, GLuint texUnit,
  }
  
  
-/*
- * Optimized 2-D texture sampling:
- *    S and T wrap mode == GL_REPEAT
- *    GL_NEAREST min/mag filter
- *    No border
- *    Format = GL_RGB
- */
-static void
-opt_sample_rgb_2d( GLcontext *ctx, GLuint texUnit,
-                   const struct gl_texture_object *tObj,
-                   GLuint n, const GLfloat s[], const GLfloat t[],
-                   const GLfloat u[], const GLfloat lambda[],
-                   GLchan rgba[][4] )
-{
-   const struct gl_texture_image *img = tObj->Image[tObj->BaseLevel];
-   const GLfloat width = (GLfloat) img->Width;
-   const GLfloat height = (GLfloat) img->Height;
-   const GLint colMask = img->Width - 1;
-   const GLint rowMask = img->Height - 1;
-   const GLint shift = img->WidthLog2;
-   GLuint k;
-   (void) u;
-   (void) lambda;
-   ASSERT(tObj->WrapS==GL_REPEAT);
-   ASSERT(tObj->WrapT==GL_REPEAT);
-   ASSERT(tObj->MinFilter==GL_NEAREST);
-   ASSERT(tObj->MagFilter==GL_NEAREST);
-   ASSERT(img->Border==0);
-   ASSERT(img->Format==GL_RGB);
-
-   /* NOTE: negative float->int doesn't floor, add 10000 as to work-around */
-   for (k=0;k<n;k++) {
-      GLint i = (GLint) ((s[k] + 10000.0) * width) & colMask;
-      GLint j = (GLint) ((t[k] + 10000.0) * height) & rowMask;
-      GLint pos = (j << shift) | i;
-      GLchan *texel = ((GLchan *) img->Data) + pos + pos + pos;  /* pos*3 */
-      rgba[k][RCOMP] = texel[0];
-      rgba[k][GCOMP] = texel[1];
-      rgba[k][BCOMP] = texel[2];
-   }
-}
-
-
-/*
- * Optimized 2-D texture sampling:
- *    S and T wrap mode == GL_REPEAT
- *    GL_NEAREST min/mag filter
- *    No border
- *    Format = GL_RGBA
- */
-static void
-opt_sample_rgba_2d( GLcontext *ctx, GLuint texUnit,
-                    const struct gl_texture_object *tObj,
-                    GLuint n, const GLfloat s[], const GLfloat t[],
-                    const GLfloat u[], const GLfloat lambda[],
-                    GLchan rgba[][4] )
-{
-   const struct gl_texture_image *img = tObj->Image[tObj->BaseLevel];
-   const GLfloat width = (GLfloat) img->Width;
-   const GLfloat height = (GLfloat) img->Height;
-   const GLint colMask = img->Width - 1;
-   const GLint rowMask = img->Height - 1;
-   const GLint shift = img->WidthLog2;
-   GLuint k;
-   (void) u;
-   (void) lambda;
-   ASSERT(tObj->WrapS==GL_REPEAT);
-   ASSERT(tObj->WrapT==GL_REPEAT);
-   ASSERT(tObj->MinFilter==GL_NEAREST);
-   ASSERT(tObj->MagFilter==GL_NEAREST);
-   ASSERT(img->Border==0);
-   ASSERT(img->Format==GL_RGBA);
-
-   /* NOTE: negative float->int doesn't floor, add 10000 as to work-around */
-   for (k=0;k<n;k++) {
-      GLint i = (GLint) ((s[k] + 10000.0) * width) & colMask;
-      GLint j = (GLint) ((t[k] + 10000.0) * height) & rowMask;
-      GLint pos = (j << shift) | i;
-      GLchan *texel = ((GLchan *) img->Data) + (pos << 2);    /* pos*4 */
-      rgba[k][RCOMP] = texel[0];
-      rgba[k][GCOMP] = texel[1];
-      rgba[k][BCOMP] = texel[2];
-      rgba[k][ACOMP] = texel[3];
-   }
-}
-
-
  
  /**********************************************************************/
  /*                    3-D Texture Sampling Functions                  */
@@ -983,9 +1057,17 @@ sample_3d_nearest(GLcontext *ctx,
     COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapT, t, height, j);
     COMPUTE_NEAREST_TEXEL_LOCATION(tObj->WrapR, r, depth,  k);
  
-   (*img->FetchTexel)(img, i, j, k, (GLvoid *) rgba);
-   if (img->Format == GL_COLOR_INDEX) {
-      palette_sample(ctx, tObj, rgba[0], rgba);
+   if (i < 0 || i >= (GLint) img->Width ||
+       j < 0 || j >= (GLint) img->Height ||
+       k < 0 || k >= (GLint) img->Depth) {
+      /* Need this test for GL_CLAMP_TO_BORDER_ARB mode */
+      COPY_CHAN4(rgba, tObj->BorderColor);
+   }
+   else {
+      (*img->FetchTexel)(img, i, j, k, (GLvoid *) rgba);
+      if (img->Format == GL_COLOR_INDEX) {
+         palette_sample(ctx, tObj, rgba[0], rgba);
+      }
     }
  }
  
@@ -1035,15 +1117,28 @@ sample_3d_linear(GLcontext *ctx,
        const GLfloat a = FRAC(u);
        const GLfloat b = FRAC(v);
        const GLfloat c = FRAC(w);
+
+#if CHAN_TYPE == GL_FLOAT || CHAN_TYPE == GL_UNSIGNED_SHORT
+      /* compute sample weights in fixed point in [0,WEIGHT_SCALE] */
+      GLfloat w000 = (1.0F-a) * (1.0F-b) * (1.0F-c);
+      GLfloat w100 =       a  * (1.0F-b) * (1.0F-c);
+      GLfloat w010 = (1.0F-a) *       b  * (1.0F-c);
+      GLfloat w110 =       a  *       b  * (1.0F-c);
+      GLfloat w001 = (1.0F-a) * (1.0F-b) *       c ;
+      GLfloat w101 =       a  * (1.0F-b) *       c ;
+      GLfloat w011 = (1.0F-a) *       b  *       c ;
+      GLfloat w111 =       a  *       b  *       c ;
+#else /* CHAN_BITS == 8 */
        /* compute sample weights in fixed point in [0,WEIGHT_SCALE] */
-      GLint w000 = IROUND((1.0F-a) * (1.0F-b) * (1.0F-c) * WEIGHT_SCALE);
-      GLint w100 = IROUND(      a  * (1.0F-b) * (1.0F-c) * WEIGHT_SCALE);
-      GLint w010 = IROUND((1.0F-a) *       b  * (1.0F-c) * WEIGHT_SCALE);
-      GLint w110 = IROUND(      a  *       b  * (1.0F-c) * WEIGHT_SCALE);
-      GLint w001 = IROUND((1.0F-a) * (1.0F-b) *       c  * WEIGHT_SCALE);
-      GLint w101 = IROUND(      a  * (1.0F-b) *       c  * WEIGHT_SCALE);
-      GLint w011 = IROUND((1.0F-a) *       b  *       c  * WEIGHT_SCALE);
-      GLint w111 = IROUND(      a  *       b  *       c  * WEIGHT_SCALE);
+      GLint w000 = IROUND_POS((1.0F-a) * (1.0F-b) * (1.0F-c) * WEIGHT_SCALE);
+      GLint w100 = IROUND_POS(      a  * (1.0F-b) * (1.0F-c) * WEIGHT_SCALE);
+      GLint w010 = IROUND_POS((1.0F-a) *       b  * (1.0F-c) * WEIGHT_SCALE);
+      GLint w110 = IROUND_POS(      a  *       b  * (1.0F-c) * WEIGHT_SCALE);
+      GLint w001 = IROUND_POS((1.0F-a) * (1.0F-b) *       c  * WEIGHT_SCALE);
+      GLint w101 = IROUND_POS(      a  * (1.0F-b) *       c  * WEIGHT_SCALE);
+      GLint w011 = IROUND_POS((1.0F-a) *       b  *       c  * WEIGHT_SCALE);
+      GLint w111 = IROUND_POS(      a  *       b  *       c  * WEIGHT_SCALE);
+#endif
  
        GLchan t000[4], t010[4], t001[4], t011[4];
        GLchan t100[4], t110[4], t101[4], t111[4];
@@ -1122,9 +1217,36 @@ sample_3d_linear(GLcontext *ctx,
           }
        }
  
+#if CHAN_TYPE == GL_FLOAT
+      rgba[0] = w000*t000[0] + w010*t010[0] + w001*t001[0] + w011*t011[0] +
+                w100*t100[0] + w110*t110[0] + w101*t101[0] + w111*t111[0];
+      rgba[1] = w000*t000[1] + w010*t010[1] + w001*t001[1] + w011*t011[1] +
+                w100*t100[1] + w110*t110[1] + w101*t101[1] + w111*t111[1];
+      rgba[2] = w000*t000[2] + w010*t010[2] + w001*t001[2] + w011*t011[2] +
+                w100*t100[2] + w110*t110[2] + w101*t101[2] + w111*t111[2];
+      rgba[3] = w000*t000[3] + w010*t010[3] + w001*t001[3] + w011*t011[3] +
+                w100*t100[3] + w110*t110[3] + w101*t101[3] + w111*t111[3];
+#elif CHAN_TYPE == GL_UNSIGNED_SHORT
+      rgba[0] = (GLchan) (w000*t000[0] + w010*t010[0] +
+                          w001*t001[0] + w011*t011[0] +
+                          w100*t100[0] + w110*t110[0] +
+                          w101*t101[0] + w111*t111[0] + 0.5);
+      rgba[1] = (GLchan) (w000*t000[1] + w010*t010[1] +
+                          w001*t001[1] + w011*t011[1] +
+                          w100*t100[1] + w110*t110[1] +
+                          w101*t101[1] + w111*t111[1] + 0.5);
+      rgba[2] = (GLchan) (w000*t000[2] + w010*t010[2] +
+                          w001*t001[2] + w011*t011[2] +
+                          w100*t100[2] + w110*t110[2] +
+                          w101*t101[2] + w111*t111[2] + 0.5);
+      rgba[3] = (GLchan) (w000*t000[3] + w010*t010[3] +
+                          w001*t001[3] + w011*t011[3] +
+                          w100*t100[3] + w110*t110[3] +
+                          w101*t101[3] + w111*t111[3] + 0.5);
+#else /* CHAN_BITS == 8 */
        rgba[0] = (GLchan) (
                   (w000*t000[0] + w010*t010[0] + w001*t001[0] + w011*t011[0] +
-                  w100*t100[0] + w110*t110[0] + w101*t101[0] + w111*t111[0]  )
+                  w100*t100[0] + w110*t110[0] + w101*t101[0] + w111*t111[0] )
                   >> WEIGHT_SHIFT);
        rgba[1] = (GLchan) (
                   (w000*t000[1] + w010*t010[1] + w001*t001[1] + w011*t011[1] +
@@ -1138,6 +1260,8 @@ sample_3d_linear(GLcontext *ctx,
                   (w000*t000[3] + w010*t010[3] + w001*t001[3] + w011*t011[3] +
                    w100*t100[3] + w110*t110[3] + w101*t101[3] + w111*t111[3] )
                   >> WEIGHT_SHIFT);
+#endif
+
     }
  }
  
@@ -1186,10 +1310,10 @@ sample_3d_nearest_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_3d_nearest(ctx, tObj, tObj->Image[level  ], s, t, r, t0);
        sample_3d_nearest(ctx, tObj, tObj->Image[level+1], s, t, r, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -1212,10 +1336,10 @@ sample_3d_linear_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_3d_linear(ctx, tObj, tObj->Image[level  ], s, t, r, t0);
        sample_3d_linear(ctx, tObj, tObj->Image[level+1], s, t, r, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -1491,10 +1615,10 @@ sample_cube_nearest_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_2d_nearest(ctx, tObj, images[level  ], newS, newT, t0);
        sample_2d_nearest(ctx, tObj, images[level+1], newS, newT, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -1521,10 +1645,10 @@ sample_cube_linear_mipmap_linear(GLcontext *ctx,
        const GLfloat f = FRAC(lambda);
        sample_2d_linear(ctx, tObj, images[level  ], newS, newT, t0);
        sample_2d_linear(ctx, tObj, images[level+1], newS, newT, t1);
-      rgba[RCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
-      rgba[GCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
-      rgba[BCOMP] = (GLchan) (GLint) ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
-      rgba[ACOMP] = (GLchan) (GLint) ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
+      rgba[RCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[RCOMP] + f * t1[RCOMP]);
+      rgba[GCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[GCOMP] + f * t1[GCOMP]);
+      rgba[BCOMP] = (GLchan) INTCAST ((1.0F-f) * t0[BCOMP] + f * t1[BCOMP]);
+      rgba[ACOMP] = (GLchan) INTCAST ((1.0F-f) * t0[ACOMP] + f * t1[ACOMP]);
     }
  }
  
@@ -1605,6 +1729,7 @@ sample_lambda_cube( GLcontext *ctx, GLuint texUnit,
     }
  }
  
+
  static void
  null_sample_func( GLcontext *ctx, GLuint texUnit,
                   const struct gl_texture_object *tObj, GLuint n,
@@ -1614,6 +1739,8 @@ null_sample_func( GLcontext *ctx, GLuint texUnit,
  {
  }
  
+
+
  /**********************************************************************/
  /*                       Texture Sampling Setup                       */
  /**********************************************************************/
@@ -1727,14 +1854,23 @@ texture_combine(const GLcontext *ctx,
                  CONST GLchan (*texel)[4],
                  GLchan (*rgba)[4])
  {
-   GLchan ccolor [3][3*MAX_WIDTH][4];
     const GLchan (*argRGB [3])[4];
     const GLchan (*argA [3])[4];
     GLuint i, j;
     const GLuint RGBshift = textureUnit->CombineScaleShiftRGB;
     const GLuint Ashift   = textureUnit->CombineScaleShiftA;
+#if CHAN_TYPE == GL_FLOAT
+   const GLchan RGBmult = (GLfloat) (1 << RGBshift);
+   const GLchan Amult = (GLfloat) (1 << Ashift);
+#else
+   const GLint half = (CHAN_MAX + 1) / 2;
+#endif
  
-   ASSERT(ctx->Extensions.EXT_texture_env_combine);
+   DEFMNARRAY(GLchan, ccolor, 3, 3 * MAX_WIDTH, 4);  /* mac 32k limitation */
+   CHECKARRAY(ccolor, return);  /* mac 32k limitation */
+
+   ASSERT(ctx->Extensions.EXT_texture_env_combine ||
+          ctx->Extensions.ARB_texture_env_combine);
  
     /*
      * Do operand setup for up to 3 operands.  Loop over the terms.
@@ -1776,14 +1912,16 @@ texture_combine(const GLcontext *ctx,
           case GL_CONSTANT_EXT:
              {
                 GLchan (*c)[4] = ccolor[j];
-               GLchan red, green, blue;
+               GLchan red, green, blue, alpha;
                 UNCLAMPED_FLOAT_TO_CHAN(red,   textureUnit->EnvColor[0]);
                 UNCLAMPED_FLOAT_TO_CHAN(green, textureUnit->EnvColor[1]);
                 UNCLAMPED_FLOAT_TO_CHAN(blue,  textureUnit->EnvColor[2]);
+               UNCLAMPED_FLOAT_TO_CHAN(alpha, textureUnit->EnvColor[3]);
                 for (i = 0; i < n; i++) {
                    c[i][RCOMP] = red;
                    c[i][GCOMP] = green;
                    c[i][BCOMP] = blue;
+                  c[i][ACOMP] = alpha;
                 }
                 argRGB[j] = (const GLchan (*)[4]) ccolor[j];
              }
@@ -1796,6 +1934,7 @@ texture_combine(const GLcontext *ctx,
           const GLchan (*src)[4] = argRGB[j];
           GLchan (*dst)[4] = ccolor[j];
  
+         /* point to new arg[j] storage */
           argRGB[j] = (const GLchan (*)[4]) ccolor[j];
  
           if (textureUnit->CombineOperandRGB[j] == GL_ONE_MINUS_SRC_COLOR) {
@@ -1806,7 +1945,6 @@ texture_combine(const GLcontext *ctx,
              }
           }
           else if (textureUnit->CombineOperandRGB[j] == GL_SRC_ALPHA) {
-            src = (const GLchan (*)[4]) argA[j];
              for (i = 0; i < n; i++) {
                 dst[i][RCOMP] = src[i][ACOMP];
                 dst[i][GCOMP] = src[i][ACOMP];
@@ -1815,7 +1953,6 @@ texture_combine(const GLcontext *ctx,
           }
           else {
              ASSERT(textureUnit->CombineOperandRGB[j] ==GL_ONE_MINUS_SRC_ALPHA);
-            src = (const GLchan (*)[4]) argA[j];
              for (i = 0; i < n; i++) {
                 dst[i][RCOMP] = CHAN_MAX - src[i][ACOMP];
                 dst[i][GCOMP] = CHAN_MAX - src[i][ACOMP];
@@ -1854,12 +1991,18 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
              if (RGBshift) {
                 for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+                  rgba[i][RCOMP] = arg0[i][RCOMP] * RGBmult;
+                  rgba[i][GCOMP] = arg0[i][GCOMP] * RGBmult;
+                  rgba[i][BCOMP] = arg0[i][BCOMP] * RGBmult;
+#else
                    GLuint r = (GLuint) arg0[i][RCOMP] << RGBshift;
                    GLuint g = (GLuint) arg0[i][GCOMP] << RGBshift;
                    GLuint b = (GLuint) arg0[i][BCOMP] << RGBshift;
                    rgba[i][RCOMP] = MIN2(r, CHAN_MAX);
                    rgba[i][GCOMP] = MIN2(g, CHAN_MAX);
                    rgba[i][BCOMP] = MIN2(b, CHAN_MAX);
+#endif
                 }
              }
              else {
@@ -1875,14 +2018,22 @@ texture_combine(const GLcontext *ctx,
           {
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+#if CHAN_TYPE != GL_FLOAT
              const GLint shift = 8 - RGBshift;
+#endif
              for (i = 0; i < n; i++) {
-               GLuint r = PROD(arg0[i][0], arg1[i][RCOMP]) >> shift;
-               GLuint g = PROD(arg0[i][1], arg1[i][GCOMP]) >> shift;
-               GLuint b = PROD(arg0[i][2], arg1[i][BCOMP]) >> shift;
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * RGBmult;
+               rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * RGBmult;
+               rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * RGBmult;
+#else
+               GLuint r = PROD(arg0[i][RCOMP], arg1[i][RCOMP]) >> shift;
+               GLuint g = PROD(arg0[i][GCOMP], arg1[i][GCOMP]) >> shift;
+               GLuint b = PROD(arg0[i][BCOMP], arg1[i][BCOMP]) >> shift;
                 rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
                 rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
                 rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -1891,12 +2042,18 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
              for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * RGBmult;
+#else
                 GLint r = ((GLint) arg0[i][RCOMP] + (GLint) arg1[i][RCOMP]) << RGBshift;
                 GLint g = ((GLint) arg0[i][GCOMP] + (GLint) arg1[i][GCOMP]) << RGBshift;
                 GLint b = ((GLint) arg0[i][BCOMP] + (GLint) arg1[i][BCOMP]) << RGBshift;
                 rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
                 rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
                 rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -1905,15 +2062,21 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
              for (i = 0; i < n; i++) {
-               GLint r = (GLint) arg0[i][RCOMP] + (GLint) arg1[i][RCOMP] - 128;
-               GLint g = (GLint) arg0[i][GCOMP] + (GLint) arg1[i][GCOMP] - 128;
-               GLint b = (GLint) arg0[i][BCOMP] + (GLint) arg1[i][BCOMP] - 128;
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5) * RGBmult;
+#else
+               GLint r = (GLint) arg0[i][RCOMP] + (GLint) arg1[i][RCOMP] -half;
+               GLint g = (GLint) arg0[i][GCOMP] + (GLint) arg1[i][GCOMP] -half;
+               GLint b = (GLint) arg0[i][BCOMP] + (GLint) arg1[i][BCOMP] -half;
                 r = (r < 0) ? 0 : r << RGBshift;
                 g = (g < 0) ? 0 : g << RGBshift;
                 b = (b < 0) ? 0 : b << RGBshift;
                 rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
                 rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
                 rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -1922,8 +2085,18 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
              const GLchan (*arg2)[4] = (const GLchan (*)[4]) argRGB[2];
+#if CHAN_TYPE != GL_FLOAT
              const GLint shift = 8 - RGBshift;
+#endif
              for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
+                      arg1[i][RCOMP] * (CHAN_MAXF - arg2[i][RCOMP])) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
+                      arg1[i][GCOMP] * (CHAN_MAXF - arg2[i][GCOMP])) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
+                      arg1[i][BCOMP] * (CHAN_MAXF - arg2[i][BCOMP])) * RGBmult;
+#else
                 GLuint r = (PROD(arg0[i][RCOMP], arg2[i][RCOMP])
                             + PROD(arg1[i][RCOMP], CHAN_MAX - arg2[i][RCOMP]))
                                >> shift;
@@ -1936,6 +2109,27 @@ texture_combine(const GLcontext *ctx,
                 rgba[i][RCOMP] = (GLchan) MIN2(r, CHAN_MAX);
                 rgba[i][GCOMP] = (GLchan) MIN2(g, CHAN_MAX);
                 rgba[i][BCOMP] = (GLchan) MIN2(b, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_SUBTRACT_ARB:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * RGBmult;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * RGBmult;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * RGBmult;
+#else
+               GLint r = ((GLint) arg0[i][RCOMP] - (GLint) arg1[i][RCOMP]) << RGBshift;
+               GLint g = ((GLint) arg0[i][GCOMP] - (GLint) arg1[i][GCOMP]) << RGBshift;
+               GLint b = ((GLint) arg0[i][BCOMP] - (GLint) arg1[i][BCOMP]) << RGBshift;
+               rgba[i][RCOMP] = (GLchan) CLAMP(r, 0, CHAN_MAX);
+               rgba[i][GCOMP] = (GLchan) CLAMP(g, 0, CHAN_MAX);
+               rgba[i][BCOMP] = (GLchan) CLAMP(b, 0, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -1944,22 +2138,28 @@ texture_combine(const GLcontext *ctx,
        case GL_DOT3_RGB_ARB:
        case GL_DOT3_RGBA_ARB:
           {
-            const GLubyte (*arg0)[4] = (const GLubyte (*)[4]) argRGB[0];
-            const GLubyte (*arg1)[4] = (const GLubyte (*)[4]) argRGB[1];
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
             /* ATI's EXT extension has a constant scale by 4.  The ARB
              * one will likely remove this restriction, and we should
              * drop the EXT extension in favour of the ARB one.
              */
              for (i = 0; i < n; i++) {
-               GLint dot = (S_PROD((GLint)arg0[i][RCOMP] - 128,
-                                  (GLint)arg1[i][RCOMP] - 128) +
-                           S_PROD((GLint)arg0[i][GCOMP] - 128,
-                                  (GLint)arg1[i][GCOMP] - 128) +
-                           S_PROD((GLint)arg0[i][BCOMP] - 128,
-                                  (GLint)arg1[i][BCOMP] - 128)) >> 6;
-               rgba[i][RCOMP] = (GLubyte) CLAMP(dot, 0, 255);
-               rgba[i][GCOMP] = (GLubyte) CLAMP(dot, 0, 255);
-               rgba[i][BCOMP] = (GLubyte) CLAMP(dot, 0, 255);
+#if CHAN_TYPE == GL_FLOAT
+               GLchan dot = ((arg0[i][RCOMP]-0.5F) * (arg1[i][RCOMP]-0.5F) +
+                             (arg0[i][GCOMP]-0.5F) * (arg1[i][GCOMP]-0.5F) +
+                             (arg0[i][BCOMP]-0.5F) * (arg1[i][BCOMP]-0.5F))
+                            * 4.0F;
+#else
+               GLint dot = (S_PROD((GLint)arg0[i][RCOMP] - half,
+                                  (GLint)arg1[i][RCOMP] - half) +
+                           S_PROD((GLint)arg0[i][GCOMP] - half,
+                                  (GLint)arg1[i][GCOMP] - half) +
+                           S_PROD((GLint)arg0[i][BCOMP] - half,
+                                  (GLint)arg1[i][BCOMP] - half)) >> 6;
+#endif
+               dot = CLAMP(dot, 0, CHAN_MAX);
+               rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = (GLchan) dot;
              }
           }
           break;
@@ -1973,7 +2173,11 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
              if (Ashift) {
                 for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+                  GLchan a = arg0[i][ACOMP] * Amult;
+#else
                    GLuint a = (GLuint) arg0[i][ACOMP] << Ashift;
+#endif
                    rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
                 }
              }
@@ -1988,10 +2192,16 @@ texture_combine(const GLcontext *ctx,
           {
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
+#if CHAN_TYPE != GL_FLOAT
              const GLint shift = 8 - Ashift;
+#endif
              for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * Amult;
+#else
                 GLuint a = (PROD(arg0[i][ACOMP], arg1[i][ACOMP]) >> shift);
                 rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -2000,8 +2210,12 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
              const GLchan  (*arg1)[4] = (const GLchan (*)[4]) argA[1];
              for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * Amult;
+#else
                 GLint a = ((GLint) arg0[i][ACOMP] + arg1[i][ACOMP]) << Ashift;
                 rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -2010,9 +2224,13 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
              for (i = 0; i < n; i++) {
-               GLint a = (GLint) arg0[i][ACOMP] + (GLint) arg1[i][ACOMP] - 128;
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * Amult;
+#else
+               GLint a = (GLint) arg0[i][ACOMP] + (GLint) arg1[i][ACOMP] -half;
                 a = (a < 0) ? 0 : a << Ashift;
                 rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
              }
           }
           break;
@@ -2021,15 +2239,38 @@ texture_combine(const GLcontext *ctx,
              const GLchan (*arg0)[4] = (const GLchan (*)[4]) argA[0];
              const GLchan (*arg1)[4] = (const GLchan (*)[4]) argA[1];
              const GLchan (*arg2)[4] = (const GLchan (*)[4]) argA[2];
+#if CHAN_TYPE != GL_FLOAT
              const GLint shift = 8 - Ashift;
+#endif
              for (i=0; i<n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
+                                 arg1[i][ACOMP] * (CHAN_MAXF - arg2[i][ACOMP]))
+                                * Amult;
+#else
                 GLuint a = (PROD(arg0[i][ACOMP], arg2[i][ACOMP])
                             + PROD(arg1[i][ACOMP], CHAN_MAX - arg2[i][ACOMP]))
                                >> shift;
                 rgba[i][ACOMP] = (GLchan) MIN2(a, CHAN_MAX);
+#endif
+            }
+         }
+         break;
+      case GL_SUBTRACT_ARB:
+         {
+            const GLchan (*arg0)[4] = (const GLchan (*)[4]) argRGB[0];
+            const GLchan (*arg1)[4] = (const GLchan (*)[4]) argRGB[1];
+            for (i = 0; i < n; i++) {
+#if CHAN_TYPE == GL_FLOAT
+               rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * Amult;
+#else
+               GLint a = ((GLint) arg0[i][ACOMP] - (GLint) arg1[i][ACOMP]) << RGBshift;
+               rgba[i][ACOMP] = (GLchan) CLAMP(a, 0, CHAN_MAX);
+#endif
              }
           }
           break;
+
        default:
           _mesa_problem(NULL, "invalid combine mode");
     }
@@ -2042,6 +2283,7 @@ texture_combine(const GLcontext *ctx,
          rgba[i][ACOMP] = rgba[i][RCOMP];
        }
     }
+   UNDEFARRAY(ccolor);  /* mac 32k limitation */
  }
  #undef PROD
  
@@ -2316,6 +2558,8 @@ apply_texture( const GLcontext *ctx,
          }
          break;
  
+     /* XXX don't clamp results if GLchan is float??? */
+
        case GL_ADD:  /* GL_EXT_texture_add_env */
           switch (format) {
              case GL_ALPHA:
@@ -2531,8 +2775,8 @@ sample_depth_texture(const GLcontext *ctx,
  
           if (0) {
              /* compute a single weighted depth sample and do one comparison */
-            const GLfloat a = FRAC(u);
-            const GLfloat b = FRAC(v);
+            const GLfloat a = FRAC(u + 1.0F);
+            const GLfloat b = FRAC(v + 1.0F);
              const GLfloat w00 = (1.0F - a) * (1.0F - b);
              const GLfloat w10 = (       a) * (1.0F - b);
              const GLfloat w01 = (1.0F - a) * (       b);