r100/r200: fix front rendering issue.
[mesa.git] / src / mesa / drivers / dri / r200 / r200_texstate.c
index 3d82aac323473ab7804d135ffa4be723d66c123c..12b828846f49d2fe611efaf1bbf316b0c3d8bf24 100644 (file)
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
@@ -33,13 +32,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *   Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "glheader.h"
-#include "imports.h"
-#include "context.h"
-#include "macros.h"
-#include "texformat.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/texformat.h"
+#include "main/texobj.h"
+#include "main/enums.h"
 
+#include "common_context.h"
+#include "radeon_mipmap_tree.h"
 #include "r200_context.h"
 #include "r200_state.h"
 #include "r200_ioctl.h"
@@ -48,535 +50,101 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r200_tcl.h"
 
 
+#define R200_TXFORMAT_A8        R200_TXFORMAT_I8
+#define R200_TXFORMAT_L8        R200_TXFORMAT_I8
 #define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
 #define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
 #define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
+#define R200_TXFORMAT_RGB_DXT1  R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
+#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
 
 #define _COLOR(f) \
     [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
 #define _ALPHA(f) \
     [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
 #define _YUV(f) \
     [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
 #define _INVALID(f) \
     [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
-#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_YCBCR_REV) \
-                            && (tx_table[f].format != 0xffffffff) )
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+                            && (tx_table_be[f].format != 0xffffffff) )
 
-static const struct {
+struct tx_table {
    GLuint format, filter;
-}
-tx_table[] =
+};
+
+static const struct tx_table tx_table_be[] =
 {
-   _ALPHA(RGBA8888),
+   [ MESA_FORMAT_RGBA8888 ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA_REV(RGBA8888),
    _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
    _INVALID(RGB888),
    _COLOR(RGB565),
+   _COLOR_REV(RGB565),
    _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
    _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
    _ALPHA(AL88),
-   _INVALID(A8),
-   _INVALID(L8),
-   _COLOR(I8),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
    _INVALID(CI8),
    _YUV(YCBCR),
    _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+
+static const struct tx_table tx_table_le[] =
+{
+   _ALPHA(RGBA8888),
+   [ MESA_FORMAT_RGBA8888_REV ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   [ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB8888, 0 },
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
 };
 
 #undef _COLOR
 #undef _ALPHA
 #undef _INVALID
 
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
- * too.
- * 
- * \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- *                 hardware state.
- */
-static void r200SetTexImages( r200ContextPtr rmesa,
-                             struct gl_texture_object *tObj )
-{
-   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
-   const struct gl_texture_image *baseImage = tObj->Image[tObj->BaseLevel];
-   GLint curOffset;
-   GLint i;
-   GLint numLevels;
-   GLint log2Width, log2Height, log2Depth;
-
-   /* Set the hardware texture format
-    */
-
-   t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
-                      R200_TXFORMAT_ALPHA_IN_MAP);
-   t->pp_txfilter &= ~R200_YUV_TO_RGB;
-
-   if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
-      t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
-      t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
-   }
-   else {
-      _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
-      return;
-   }
-
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    */
-
-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
-   log2Width  = tObj->Image[t->base.firstLevel]->WidthLog2;
-   log2Height = tObj->Image[t->base.firstLevel]->HeightLog2;
-   log2Depth  = tObj->Image[t->base.firstLevel]->DepthLog2;
-
-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
-   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
-   /* Calculate mipmap offsets and dimensions for blitting (uploading)
-    * The idea is that we lay out the mipmap levels within a block of
-    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
-    */
-   curOffset = 0;
-
-   for (i = 0; i < numLevels; i++) {
-      const struct gl_texture_image *texImage;
-      GLuint size;
-
-      texImage = tObj->Image[i + t->base.firstLevel];
-      if ( !texImage )
-        break;
-
-      /* find image size in bytes */
-      if (texImage->IsCompressed) {
-         size = texImage->CompressedSize;
-      }
-      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-         size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
-                 & ~63) * texImage->Height;
-      }
-      else {
-         int w = texImage->Width * texImage->TexFormat->TexelBytes;
-         if (w < 32)
-            w = 32;
-         size = w * texImage->Height * texImage->Depth;
-      }
-      assert(size > 0);
-
-
-      /* Align to 32-byte offset.  It is faster to do this unconditionally
-       * (no branch penalty).
-       */
-
-      curOffset = (curOffset + 0x1f) & ~0x1f;
-
-      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-      t->image[0][i].height = size / t->image[0][i].width;
-
-#if 0
-      /* for debugging only and only  applicable to non-rectangle targets */
-      assert(size % t->image[0][i].width == 0);
-      assert(t->image[0][i].x == 0
-             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
-#endif
-
-      if (0)
-         fprintf(stderr,
-                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
-                 i, texImage->Width, texImage->Height,
-                 t->image[0][i].x, t->image[0][i].y,
-                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
-
-      curOffset += size;
-
-   }
-
-   /* Align the total size of texture memory block.
-    */
-   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
-   /* Setup remaining cube face blits, if needed */
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */
-      const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1)
-                              & ~(BLIT_WIDTH_BYTES-1);
-      const GLuint lines = faceSize / BLIT_WIDTH_BYTES;
-      GLuint face;
-      /* reuse face 0 x/y/width/height - just adjust y */
-      for (face = 1; face < 6; face++) {
-         for (i = 0; i < numLevels; i++) {
-            t->image[face][i].x =  t->image[0][i].x;
-            t->image[face][i].y =  t->image[0][i].y + face * lines;
-            t->image[face][i].width  = t->image[0][i].width;
-            t->image[face][i].height = t->image[0][i].height;
-         }
-      }
-      t->base.totalSize = 6 * faceSize; /* total texmem needed */
-   }
-
-
-   /* Hardware state:
-    */
-   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
-   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
-
-   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
-                      R200_TXFORMAT_HEIGHT_MASK |
-                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
-                       R200_TXFORMAT_F5_WIDTH_MASK |
-                       R200_TXFORMAT_F5_HEIGHT_MASK);
-   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
-                     (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
-
-   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
-   if (tObj->Target == GL_TEXTURE_3D) {
-      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
-      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
-   }
-   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      ASSERT(log2Width == log2height);
-      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
-                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
-                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
-      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
-      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
-   }
-
-   t->pp_txsize = (((tObj->Image[t->base.firstLevel]->Width - 1) << 0) |
-                   ((tObj->Image[t->base.firstLevel]->Height - 1) << 16));
-
-   /* Only need to round to nearest 32 for textures, but the blitter
-    * requires 64-byte aligned pitches, and we may/may not need the
-    * blitter.   NPOT only!
-    */
-   if (baseImage->IsCompressed)
-      t->pp_txpitch = (tObj->Image[t->base.firstLevel]->Width + 63) & ~(63);
-   else
-      t->pp_txpitch = ((tObj->Image[t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
-   t->pp_txpitch -= 32;
-
-   t->dirty_state = TEX_ALL;
-
-   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
-}
-
-
-
 /* ================================================================
  * Texture combine functions
  */
 
-#define R200_DISABLE           0
-#define R200_REPLACE           1
-#define R200_MODULATE          2
-#define R200_DECAL             3
-#define R200_BLEND             4
-#define R200_ADD               5
-#define R200_MAX_COMBFUNC      6
-
-static GLuint r200_color_combine[][R200_MAX_COMBFUNC] =
-{
-   /* Unit 0:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXC_ARG_A_ZERO  |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_DIFFUSE_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_REPLACE = 0x00802800
-       */
-      (R200_TXC_ARG_A_ZERO |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R0_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_MODULATE = 0x00800142
-       */
-      (R200_TXC_ARG_A_DIFFUSE_COLOR | /* current starts in DIFFUSE */
-       R200_TXC_ARG_B_R0_COLOR |
-       R200_TXC_ARG_C_ZERO |
-       R200_TXC_OP_MADD),
-
-      /* GL_DECAL = 0x008c2d42
-       */
-      (R200_TXC_ARG_A_DIFFUSE_COLOR |
-       R200_TXC_ARG_B_R0_COLOR |
-       R200_TXC_ARG_C_R0_ALPHA |
-       R200_TXC_OP_LERP),
-
-      /* GL_BLEND = 0x008c2902
-       */
-      (R200_TXC_ARG_A_DIFFUSE_COLOR |
-       R200_TXC_ARG_B_TFACTOR_COLOR |
-       R200_TXC_ARG_C_R0_COLOR |
-       R200_TXC_OP_LERP),
-
-      /* GL_ADD = 0x00812802
-       */
-      (R200_TXC_ARG_A_DIFFUSE_COLOR |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R0_COLOR |
-       R200_TXC_COMP_ARG_B |
-       R200_TXC_OP_MADD),
-   },
-
-   /* Unit 1:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXC_ARG_A_ZERO |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R0_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_REPLACE = 0x00803000
-       */
-      (R200_TXC_ARG_A_ZERO |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R1_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_MODULATE = 0x00800182
-       */
-      (R200_TXC_ARG_A_R0_COLOR | /* current in R0 thereafter */
-       R200_TXC_ARG_B_R1_COLOR |
-       R200_TXC_ARG_C_ZERO |
-       R200_TXC_OP_MADD),
-
-      /* GL_DECAL = 0x008c3582
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_R1_COLOR |
-       R200_TXC_ARG_C_R1_ALPHA |
-       R200_TXC_OP_LERP),
-
-      /* GL_BLEND = 0x008c3102
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_TFACTOR_COLOR |
-       R200_TXC_ARG_C_R1_COLOR |
-       R200_TXC_OP_LERP),
-
-      /* GL_ADD = 0x00813002
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R1_COLOR |
-       R200_TXC_COMP_ARG_B |
-       R200_TXC_OP_MADD),
-   },
-
-   /* Unit 2:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXC_ARG_A_ZERO |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R0_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_REPLACE = 0x00803800
-       */
-      (R200_TXC_ARG_A_ZERO |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R2_COLOR |
-       R200_TXC_OP_MADD),
-
-      /* GL_MODULATE = 0x008001c2
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_R2_COLOR |
-       R200_TXC_ARG_C_ZERO |
-       R200_TXC_OP_MADD),
-
-      /* GL_DECAL = 0x008c3dc2
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_R2_COLOR |
-       R200_TXC_ARG_C_R2_ALPHA |
-       R200_TXC_OP_LERP),
-
-      /* GL_BLEND = 0x008c3902
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_TFACTOR_COLOR |
-       R200_TXC_ARG_C_R2_COLOR |
-       R200_TXC_OP_LERP),
-
-      /* GL_ADD = 0x00813802
-       */
-      (R200_TXC_ARG_A_R0_COLOR |
-       R200_TXC_ARG_B_ZERO |
-       R200_TXC_ARG_C_R2_COLOR |
-       R200_TXC_COMP_ARG_B |
-       R200_TXC_OP_MADD),
-   }
-};
-
-static GLuint r200_alpha_combine[][R200_MAX_COMBFUNC] =
-{
-   /* Unit 0:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_DIFFUSE_ALPHA |
-       R200_TXA_OP_MADD),
-
-
-      /* GL_REPLACE = 0x00800500
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_MODULATE = 0x00800051
-       */
-      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
-       R200_TXA_ARG_B_R0_ALPHA |
-       R200_TXA_ARG_C_ZERO |
-       R200_TXA_OP_MADD),
-
-      /* GL_DECAL = 0x00800100
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_DIFFUSE_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_BLEND = 0x00800051
-       */
-      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
-       R200_TXA_ARG_B_TFACTOR_ALPHA |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_LERP),
-
-      /* GL_ADD = 0x00800051
-       */
-      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_COMP_ARG_B |
-       R200_TXA_OP_MADD),
-   },
-
-   /* Unit 1:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_REPLACE = 0x00800600
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R1_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_MODULATE = 0x00800061
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_R1_ALPHA |
-       R200_TXA_ARG_C_ZERO |
-       R200_TXA_OP_MADD),
-
-      /* GL_DECAL = 0x00800100
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_BLEND = 0x00800061
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_TFACTOR_ALPHA |
-       R200_TXA_ARG_C_R1_ALPHA |
-       R200_TXA_OP_LERP),
-
-      /* GL_ADD = 0x00800061
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R1_ALPHA |
-       R200_TXA_COMP_ARG_B |
-       R200_TXA_OP_MADD),
-   },
-
-   /* Unit 2:
-    */
-   {
-      /* Disable combiner stage
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_REPLACE = 0x00800700
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R2_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_MODULATE = 0x00800071
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_R2_ALPHA |
-       R200_TXA_ARG_C_ZERO |
-       R200_TXA_OP_MADD),
-
-      /* GL_DECAL = 0x00800100
-       */
-      (R200_TXA_ARG_A_ZERO |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R0_ALPHA |
-       R200_TXA_OP_MADD),
-
-      /* GL_BLEND = 0x00800071
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_TFACTOR_ALPHA |
-       R200_TXA_ARG_C_R2_ALPHA |
-       R200_TXA_OP_LERP),
-
-      /* GL_ADD = 0x00800021
-       */
-      (R200_TXA_ARG_A_R0_ALPHA |
-       R200_TXA_ARG_B_ZERO |
-       R200_TXA_ARG_C_R2_ALPHA |
-       R200_TXA_COMP_ARG_B |
-       R200_TXA_OP_MADD),
-   }
-};
-
-
 /* GL_ARB_texture_env_combine support
  */
 
@@ -588,22 +156,34 @@ static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] =
    {
       R200_TXC_ARG_A_R0_COLOR,
       R200_TXC_ARG_A_R1_COLOR,
-      R200_TXC_ARG_A_R2_COLOR
+      R200_TXC_ARG_A_R2_COLOR,
+      R200_TXC_ARG_A_R3_COLOR,
+      R200_TXC_ARG_A_R4_COLOR,
+      R200_TXC_ARG_A_R5_COLOR
    },
    {
       R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A,
       R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A,
-      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A
+      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_COLOR | R200_TXC_COMP_ARG_A
    },
    {
       R200_TXC_ARG_A_R0_ALPHA,
       R200_TXC_ARG_A_R1_ALPHA,
-      R200_TXC_ARG_A_R2_ALPHA
+      R200_TXC_ARG_A_R2_ALPHA,
+      R200_TXC_ARG_A_R3_ALPHA,
+      R200_TXC_ARG_A_R4_ALPHA,
+      R200_TXC_ARG_A_R5_ALPHA
    },
    {
       R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A,
       R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A,
-      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A
+      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_ALPHA | R200_TXC_COMP_ARG_A
    },
 };
 
@@ -615,6 +195,14 @@ static GLuint r200_tfactor_color[] =
    R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A
 };
 
+static GLuint r200_tfactor1_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR1_COLOR,
+   R200_TXC_ARG_A_TFACTOR1_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA | R200_TXC_COMP_ARG_A
+};
+
 static GLuint r200_primary_color[] =
 {
    R200_TXC_ARG_A_DIFFUSE_COLOR,
@@ -642,12 +230,18 @@ static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] =
    {
       R200_TXA_ARG_A_R0_ALPHA,
       R200_TXA_ARG_A_R1_ALPHA,
-      R200_TXA_ARG_A_R2_ALPHA
+      R200_TXA_ARG_A_R2_ALPHA,
+      R200_TXA_ARG_A_R3_ALPHA,
+      R200_TXA_ARG_A_R4_ALPHA,
+      R200_TXA_ARG_A_R5_ALPHA
    },
    {
       R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A,
       R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A,
-      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A
+      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R3_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R4_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R5_ALPHA | R200_TXA_COMP_ARG_A
    },
 };
 
@@ -657,6 +251,12 @@ static GLuint r200_tfactor_alpha[] =
    R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A
 };
 
+static GLuint r200_tfactor1_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR1_ALPHA,
+   R200_TXA_ARG_A_TFACTOR1_ALPHA | R200_TXA_COMP_ARG_A
+};
+
 static GLuint r200_primary_alpha[] =
 {
    R200_TXA_ARG_A_DIFFUSE_ALPHA,
@@ -702,13 +302,17 @@ do {                                                      \
  * Texture unit state management
  */
 
-static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit )
+static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuint replaceargs )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    GLuint color_combine, alpha_combine;
-   GLuint color_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2];
-   GLuint alpha_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2];
+   GLuint color_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] &
+      ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK | R200_TXC_TFACTOR_SEL_MASK |
+       R200_TXC_TFACTOR1_SEL_MASK);
+   GLuint alpha_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] &
+      ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK |
+       R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK);
 
    /* texUnit->_Current can be NULL if and only if the texture unit is
     * not actually enabled.
@@ -717,7 +321,7 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit )
           || (texUnit->_Current != NULL) );
 
    if ( R200_DEBUG & DEBUG_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, ctx, unit );
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
    }
 
    /* Set the texture environment state.  Isn't this nice and clean?
@@ -726,464 +330,609 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit )
     * reduces the amount of special-casing we have to do, alpha-only
     * textures being a notable exception.
     */
+
+   color_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXC_OUTPUT_REG_SHIFT) |
+                       (unit << R200_TXC_TFACTOR_SEL_SHIFT) |
+                       (replaceargs << R200_TXC_TFACTOR1_SEL_SHIFT);
+   alpha_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXA_OUTPUT_REG_SHIFT) |
+                       (unit << R200_TXA_TFACTOR_SEL_SHIFT) |
+                       (replaceargs << R200_TXA_TFACTOR1_SEL_SHIFT);
+
    if ( !texUnit->_ReallyEnabled ) {
-      /* Don't cache these results.
-       */
-      rmesa->state.texture.unit[unit].format = 0;
-      rmesa->state.texture.unit[unit].envMode = 0;
-      color_combine = r200_color_combine[unit][R200_DISABLE];
-      alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+      assert( unit == 0);
+      color_combine = R200_TXC_ARG_A_ZERO | R200_TXC_ARG_B_ZERO
+         | R200_TXC_ARG_C_DIFFUSE_COLOR | R200_TXC_OP_MADD;
+      alpha_combine = R200_TXA_ARG_A_ZERO | R200_TXA_ARG_B_ZERO
+         | R200_TXA_ARG_C_DIFFUSE_ALPHA | R200_TXA_OP_MADD;
    }
    else {
-      const struct gl_texture_object *tObj = texUnit->_Current;
-      const GLenum format = tObj->Image[tObj->BaseLevel]->Format;
       GLuint color_arg[3], alpha_arg[3];
-      GLuint i, numColorArgs = 0, numAlphaArgs = 0;
-      GLuint RGBshift = texUnit->CombineScaleShiftRGB;
-      GLuint Ashift = texUnit->CombineScaleShiftA;
+      GLuint i;
+      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
 
-      switch ( texUnit->EnvMode ) {
-      case GL_REPLACE:
-        switch ( format ) {
-        case GL_RGBA:
-        case GL_LUMINANCE_ALPHA:
-        case GL_INTENSITY:
-           color_combine = r200_color_combine[unit][R200_REPLACE];
-           alpha_combine = r200_alpha_combine[unit][R200_REPLACE];
-           break;
-        case GL_ALPHA:
-           color_combine = r200_color_combine[unit][R200_DISABLE];
-           alpha_combine = r200_alpha_combine[unit][R200_REPLACE];
+
+      const GLint replaceoprgb =
+        ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandRGB[0] - GL_SRC_COLOR;
+      const GLint replaceopa =
+        ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandA[0] - GL_SRC_ALPHA;
+
+      /* Step 1:
+       * Extract the color and alpha combine function arguments.
+       */
+      for ( i = 0 ; i < numColorArgs ; i++ ) {
+        GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+        const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+        assert(op >= 0);
+        assert(op <= 3);
+        switch ( srcRGBi ) {
+        case GL_TEXTURE:
+           color_arg[i] = r200_register_color[op][unit];
            break;
-        case GL_LUMINANCE:
-        case GL_RGB:
-        case GL_YCBCR_MESA:
-           color_combine = r200_color_combine[unit][R200_REPLACE];
-           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+        case GL_CONSTANT:
+           color_arg[i] = r200_tfactor_color[op];
            break;
-        case GL_COLOR_INDEX:
-        default:
-           return GL_FALSE;
-        }
-        break;
-
-      case GL_MODULATE:
-        switch ( format ) {
-        case GL_RGBA:
-        case GL_LUMINANCE_ALPHA:
-        case GL_INTENSITY:
-           color_combine = r200_color_combine[unit][R200_MODULATE];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+        case GL_PRIMARY_COLOR:
+           color_arg[i] = r200_primary_color[op];
            break;
-        case GL_ALPHA:
-           color_combine = r200_color_combine[unit][R200_DISABLE];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+        case GL_PREVIOUS:
+           if (replaceargs != unit) {
+              const GLint srcRGBreplace =
+                 ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
+              if (op >= 2) {
+                 op = op ^ replaceopa;
+              }
+              else {
+                 op = op ^ replaceoprgb;
+              }
+              switch (srcRGBreplace) {
+              case GL_TEXTURE:
+                 color_arg[i] = r200_register_color[op][replaceargs];
+                 break;
+              case GL_CONSTANT:
+                 color_arg[i] = r200_tfactor1_color[op];
+                 break;
+              case GL_PRIMARY_COLOR:
+                 color_arg[i] = r200_primary_color[op];
+                 break;
+              case GL_PREVIOUS:
+                 if (slot == 0)
+                    color_arg[i] = r200_primary_color[op];
+                 else
+                    color_arg[i] = r200_register_color[op]
+                       [rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                 break;
+              case GL_ZERO:
+                 color_arg[i] = r200_zero_color[op];
+                 break;
+              case GL_ONE:
+                 color_arg[i] = r200_zero_color[op+1];
+                 break;
+              case GL_TEXTURE0:
+              case GL_TEXTURE1:
+              case GL_TEXTURE2:
+              case GL_TEXTURE3:
+              case GL_TEXTURE4:
+              case GL_TEXTURE5:
+                 color_arg[i] = r200_register_color[op][srcRGBreplace - GL_TEXTURE0];
+                 break;
+              default:
+              return GL_FALSE;
+              }
+           }
+           else {
+              if (slot == 0)
+                 color_arg[i] = r200_primary_color[op];
+              else
+                 color_arg[i] = r200_register_color[op]
+                    [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
            break;
-        case GL_RGB:
-        case GL_LUMINANCE:
-        case GL_YCBCR_MESA:
-           color_combine = r200_color_combine[unit][R200_MODULATE];
-           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+        case GL_ZERO:
+           color_arg[i] = r200_zero_color[op];
            break;
-        case GL_COLOR_INDEX:
-        default:
-           return GL_FALSE;
-        }
-        break;
-
-      case GL_DECAL:
-        switch ( format ) {
-        case GL_RGBA:
-        case GL_RGB:
-        case GL_YCBCR_MESA:
-           color_combine = r200_color_combine[unit][R200_DECAL];
-           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+        case GL_ONE:
+           color_arg[i] = r200_zero_color[op+1];
            break;
-        case GL_ALPHA:
-        case GL_LUMINANCE:
-        case GL_LUMINANCE_ALPHA:
-        case GL_INTENSITY:
-           color_combine = r200_color_combine[unit][R200_DISABLE];
-           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+        case GL_TEXTURE0:
+        case GL_TEXTURE1:
+        case GL_TEXTURE2:
+        case GL_TEXTURE3:
+        case GL_TEXTURE4:
+        case GL_TEXTURE5:
+           color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0];
            break;
-        case GL_COLOR_INDEX:
         default:
            return GL_FALSE;
         }
-        break;
+      }
 
-      case GL_BLEND:
-        switch ( format ) {
-        case GL_RGBA:
-        case GL_RGB:
-        case GL_LUMINANCE:
-        case GL_LUMINANCE_ALPHA:
-        case GL_YCBCR_MESA:
-           color_combine = r200_color_combine[unit][R200_BLEND];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+        GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+        const GLint srcAi = texUnit->_CurrentCombine->SourceA[i];
+        assert(op >= 0);
+        assert(op <= 1);
+        switch ( srcAi ) {
+        case GL_TEXTURE:
+           alpha_arg[i] = r200_register_alpha[op][unit];
            break;
-        case GL_ALPHA:
-           color_combine = r200_color_combine[unit][R200_DISABLE];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+        case GL_CONSTANT:
+           alpha_arg[i] = r200_tfactor_alpha[op];
            break;
-        case GL_INTENSITY:
-           color_combine = r200_color_combine[unit][R200_BLEND];
-           alpha_combine = r200_alpha_combine[unit][R200_BLEND];
+        case GL_PRIMARY_COLOR:
+           alpha_arg[i] = r200_primary_alpha[op];
            break;
-        case GL_COLOR_INDEX:
-        default:
-           return GL_FALSE;
-        }
-        break;
-
-      case GL_ADD:
-        switch ( format ) {
-        case GL_RGBA:
-        case GL_RGB:
-        case GL_LUMINANCE:
-        case GL_LUMINANCE_ALPHA:
-        case GL_YCBCR_MESA:
-           color_combine = r200_color_combine[unit][R200_ADD];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+        case GL_PREVIOUS:
+           if (replaceargs != unit) {
+              const GLint srcAreplace =
+                 ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
+              op = op ^ replaceopa;
+              switch (srcAreplace) {
+              case GL_TEXTURE:
+                 alpha_arg[i] = r200_register_alpha[op][replaceargs];
+                 break;
+              case GL_CONSTANT:
+                 alpha_arg[i] = r200_tfactor1_alpha[op];
+                 break;
+              case GL_PRIMARY_COLOR:
+                 alpha_arg[i] = r200_primary_alpha[op];
+                 break;
+              case GL_PREVIOUS:
+                 if (slot == 0)
+                    alpha_arg[i] = r200_primary_alpha[op];
+                 else
+                    alpha_arg[i] = r200_register_alpha[op]
+                       [rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                 break;
+              case GL_ZERO:
+                 alpha_arg[i] = r200_zero_alpha[op];
+                 break;
+              case GL_ONE:
+                 alpha_arg[i] = r200_zero_alpha[op+1];
+                 break;
+              case GL_TEXTURE0:
+              case GL_TEXTURE1:
+              case GL_TEXTURE2:
+              case GL_TEXTURE3:
+              case GL_TEXTURE4:
+              case GL_TEXTURE5:
+                 alpha_arg[i] = r200_register_alpha[op][srcAreplace - GL_TEXTURE0];
+                 break;
+              default:
+              return GL_FALSE;
+              }
+           }
+           else {
+              if (slot == 0)
+                 alpha_arg[i] = r200_primary_alpha[op];
+              else
+                 alpha_arg[i] = r200_register_alpha[op]
+                   [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
            break;
-        case GL_ALPHA:
-           color_combine = r200_color_combine[unit][R200_DISABLE];
-           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+        case GL_ZERO:
+           alpha_arg[i] = r200_zero_alpha[op];
            break;
-        case GL_INTENSITY:
-           color_combine = r200_color_combine[unit][R200_ADD];
-           alpha_combine = r200_alpha_combine[unit][R200_ADD];
+        case GL_ONE:
+           alpha_arg[i] = r200_zero_alpha[op+1];
+           break;
+        case GL_TEXTURE0:
+        case GL_TEXTURE1:
+        case GL_TEXTURE2:
+        case GL_TEXTURE3:
+        case GL_TEXTURE4:
+        case GL_TEXTURE5:
+           alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0];
            break;
-        case GL_COLOR_INDEX:
         default:
            return GL_FALSE;
         }
+      }
+
+      /* Step 2:
+       * Build up the color and alpha combine functions.
+       */
+      switch ( texUnit->_CurrentCombine->ModeRGB ) {
+      case GL_REPLACE:
+        color_combine = (R200_TXC_ARG_A_ZERO |
+                         R200_TXC_ARG_B_ZERO |
+                         R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, C );
+        break;
+      case GL_MODULATE:
+        color_combine = (R200_TXC_ARG_C_ZERO |
+                         R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, B );
+        break;
+      case GL_ADD:
+        color_combine = (R200_TXC_ARG_B_ZERO |
+                         R200_TXC_COMP_ARG_B | 
+                         R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        break;
+      case GL_ADD_SIGNED:
+        color_combine = (R200_TXC_ARG_B_ZERO |
+                         R200_TXC_COMP_ARG_B |
+                         R200_TXC_BIAS_ARG_C | /* new */
+                         R200_TXC_OP_MADD); /* was ADDSIGNED */
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        break;
+      case GL_SUBTRACT:
+        color_combine = (R200_TXC_ARG_B_ZERO |
+                         R200_TXC_COMP_ARG_B | 
+                         R200_TXC_NEG_ARG_C |
+                         R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        break;
+      case GL_INTERPOLATE:
+        color_combine = (R200_TXC_OP_LERP);
+        R200_COLOR_ARG( 0, B );
+        R200_COLOR_ARG( 1, A );
+        R200_COLOR_ARG( 2, C );
         break;
 
-      case GL_COMBINE:
-        /* Don't cache these results.
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+        /* The EXT version of the DOT3 extension does not support the
+         * scale factor, but the ARB version (and the version in OpenGL
+         * 1.3) does.
          */
-        rmesa->state.texture.unit[unit].format = 0;
-        rmesa->state.texture.unit[unit].envMode = 0;
-
-        /* Step 0:
-         * Calculate how many arguments we need to process.
+        RGBshift = 0;
+        /* FALLTHROUGH */
+
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+        /* DOT3 works differently on R200 than on R100.  On R100, just
+         * setting the DOT3 mode did everything for you.  On R200, the
+         * driver has to enable the biasing and scale in the inputs to
+         * put them in the proper [-1,1] range.  This is what the 4x and
+         * the -0.5 in the DOT3 spec do.  The post-scale is then set
+         * normally.
          */
-        switch ( texUnit->CombineModeRGB ) {
-        case GL_REPLACE:
-           numColorArgs = 1;
-           break;
-        case GL_MODULATE:
-        case GL_ADD:
-        case GL_ADD_SIGNED:
-        case GL_SUBTRACT:
-        case GL_DOT3_RGB:
-        case GL_DOT3_RGBA:
-        case GL_DOT3_RGB_EXT:
-        case GL_DOT3_RGBA_EXT:
-           numColorArgs = 2;
-           break;
-        case GL_INTERPOLATE:
-        case GL_MODULATE_ADD_ATI:
-        case GL_MODULATE_SIGNED_ADD_ATI:
-        case GL_MODULATE_SUBTRACT_ATI:
-           numColorArgs = 3;
-           break;
-        default:
-           return GL_FALSE;
-        }
 
-        switch ( texUnit->CombineModeA ) {
-        case GL_REPLACE:
-           numAlphaArgs = 1;
-           break;
-        case GL_MODULATE:
-        case GL_ADD:
-        case GL_ADD_SIGNED:
-        case GL_SUBTRACT:
-           numAlphaArgs = 2;
-           break;
-        case GL_INTERPOLATE:
-        case GL_MODULATE_ADD_ATI:
-        case GL_MODULATE_SIGNED_ADD_ATI:
-        case GL_MODULATE_SUBTRACT_ATI:
-           numAlphaArgs = 3;
-           break;
-        default:
-           return GL_FALSE;
-        }
+        color_combine = (R200_TXC_ARG_C_ZERO |
+                         R200_TXC_OP_DOT3 |
+                         R200_TXC_BIAS_ARG_A |
+                         R200_TXC_BIAS_ARG_B |
+                         R200_TXC_SCALE_ARG_A |
+                         R200_TXC_SCALE_ARG_B);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, B );
+        break;
 
-        /* Step 1:
-         * Extract the color and alpha combine function arguments.
-         */
-        for ( i = 0 ; i < numColorArgs ; i++ ) {
-           const GLuint op = texUnit->CombineOperandRGB[i] - GL_SRC_COLOR;
-           assert(op >= 0);
-           assert(op <= 3);
-           switch ( texUnit->CombineSourceRGB[i] ) {
-           case GL_TEXTURE:
-              color_arg[i] = r200_register_color[op][unit];
-              break;
-           case GL_CONSTANT:
-              color_arg[i] = r200_tfactor_color[op];
-              break;
-           case GL_PRIMARY_COLOR:
-              color_arg[i] = r200_primary_color[op];
-              break;
-           case GL_PREVIOUS:
-              if (unit == 0)
-                 color_arg[i] = r200_primary_color[op];
-              else
-                 color_arg[i] = r200_register_color[op][0];
-              break;
-           case GL_ZERO:
-              color_arg[i] = r200_zero_color[op];
-              break;
-           case GL_ONE:
-              color_arg[i] = r200_zero_color[op+1];
-              break;
-           default:
-              return GL_FALSE;
-           }
-        }
+      case GL_MODULATE_ADD_ATI:
+        color_combine = (R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        R200_COLOR_ARG( 2, B );
+        break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+        color_combine = (R200_TXC_BIAS_ARG_C | /* new */
+                         R200_TXC_OP_MADD); /* was ADDSIGNED */
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        R200_COLOR_ARG( 2, B );
+        break;
+      case GL_MODULATE_SUBTRACT_ATI:
+        color_combine = (R200_TXC_NEG_ARG_C |
+                         R200_TXC_OP_MADD);
+        R200_COLOR_ARG( 0, A );
+        R200_COLOR_ARG( 1, C );
+        R200_COLOR_ARG( 2, B );
+        break;
+      default:
+        return GL_FALSE;
+      }
 
-        for ( i = 0 ; i < numAlphaArgs ; i++ ) {
-           const GLuint op = texUnit->CombineOperandA[i] - GL_SRC_ALPHA;
-           assert(op >= 0);
-           assert(op <= 1);
-           switch ( texUnit->CombineSourceA[i] ) {
-           case GL_TEXTURE:
-              alpha_arg[i] = r200_register_alpha[op][unit];
-              break;
-           case GL_CONSTANT:
-              alpha_arg[i] = r200_tfactor_alpha[op];
-              break;
-           case GL_PRIMARY_COLOR:
-              alpha_arg[i] = r200_primary_alpha[op];
-              break;
-           case GL_PREVIOUS:
-              if (unit == 0)
-                 alpha_arg[i] = r200_primary_alpha[op];
-              else
-                 alpha_arg[i] = r200_register_alpha[op][0];
-              break;
-           case GL_ZERO:
-              alpha_arg[i] = r200_zero_alpha[op];
-              break;
-           case GL_ONE:
-              alpha_arg[i] = r200_zero_alpha[op+1];
-              break;
-           default:
-              return GL_FALSE;
-           }
-        }
+      switch ( texUnit->_CurrentCombine->ModeA ) {
+      case GL_REPLACE:
+        alpha_combine = (R200_TXA_ARG_A_ZERO |
+                         R200_TXA_ARG_B_ZERO |
+                         R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, C );
+        break;
+      case GL_MODULATE:
+        alpha_combine = (R200_TXA_ARG_C_ZERO |
+                         R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, B );
+        break;
+      case GL_ADD:
+        alpha_combine = (R200_TXA_ARG_B_ZERO |
+                         R200_TXA_COMP_ARG_B |
+                         R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        break;
+      case GL_ADD_SIGNED:
+        alpha_combine = (R200_TXA_ARG_B_ZERO |
+                         R200_TXA_COMP_ARG_B |
+                         R200_TXA_BIAS_ARG_C | /* new */
+                         R200_TXA_OP_MADD); /* was ADDSIGNED */
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        break;
+      case GL_SUBTRACT:
+        alpha_combine = (R200_TXA_ARG_B_ZERO |
+                         R200_TXA_COMP_ARG_B |
+                         R200_TXA_NEG_ARG_C |
+                         R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        break;
+      case GL_INTERPOLATE:
+        alpha_combine = (R200_TXA_OP_LERP);
+        R200_ALPHA_ARG( 0, B );
+        R200_ALPHA_ARG( 1, A );
+        R200_ALPHA_ARG( 2, C );
+        break;
+
+      case GL_MODULATE_ADD_ATI:
+        alpha_combine = (R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        R200_ALPHA_ARG( 2, B );
+        break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+        alpha_combine = (R200_TXA_BIAS_ARG_C | /* new */
+                         R200_TXA_OP_MADD); /* was ADDSIGNED */
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        R200_ALPHA_ARG( 2, B );
+        break;
+      case GL_MODULATE_SUBTRACT_ATI:
+        alpha_combine = (R200_TXA_NEG_ARG_C |
+                         R200_TXA_OP_MADD);
+        R200_ALPHA_ARG( 0, A );
+        R200_ALPHA_ARG( 1, C );
+        R200_ALPHA_ARG( 2, B );
+        break;
+      default:
+        return GL_FALSE;
+      }
+
+      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+          || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+        alpha_scale |= R200_TXA_DOT_ALPHA;
+        Ashift = RGBshift;
+      }
+
+      /* Step 3:
+       * Apply the scale factor.
+       */
+      color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
+      alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
+
+      /* All done!
+       */
+   }
+
+   if ( rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] != color_combine ||
+       rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] != alpha_combine ||
+       rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] != color_scale ||
+       rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
+      R200_STATECHANGE( rmesa, pix[slot] );
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] = alpha_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] = color_scale;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] = alpha_scale;
+   }
+
+   return GL_TRUE;
+}
+
+void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+                     unsigned long long offset, GLint depth, GLuint pitch)
+{
+       r200ContextPtr rmesa = pDRICtx->driverPrivate;
+       struct gl_texture_object *tObj =
+           _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+       radeonTexObjPtr t;
+
+       if (!tObj)
+               return;
+
+       t = (radeonTexObjPtr) tObj->DriverData;
+
+       t->image_override = GL_TRUE;
+
+       if (!offset)
+               return;
+
+       t->pp_txoffset = offset;
+       t->pp_txpitch = pitch - 32;
+
+       switch (depth) {
+       case 32:
+               t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
+               t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
+               break;
+       case 24:
+       default:
+               t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+               t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
+               break;
+       case 16:
+               t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
+               t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
+               break;
+       }
+}
+
+#define REF_COLOR 1
+#define REF_ALPHA 2
+
+static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint i, j, currslot;
+   GLint maxunitused = -1;
+   GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE};
+   GLubyte stageref[7] = {0, 0, 0, 0, 0, 0, 0};
+   GLint nextunit[R200_MAX_TEXTURE_UNITS] = {0, 0, 0, 0, 0, 0};
+   GLint currentnext = -1;
+   GLboolean ok;
 
-        /* Step 2:
-         * Build up the color and alpha combine functions.
-         */
-        switch ( texUnit->CombineModeRGB ) {
-        case GL_REPLACE:
-           color_combine = (R200_TXC_ARG_A_ZERO |
-                            R200_TXC_ARG_B_ZERO |
-                            R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, C );
-           break;
-        case GL_MODULATE:
-           color_combine = (R200_TXC_ARG_C_ZERO |
-                            R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, B );
-           break;
-        case GL_ADD:
-           color_combine = (R200_TXC_ARG_B_ZERO |
-                            R200_TXC_COMP_ARG_B | 
-                            R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           break;
-        case GL_ADD_SIGNED:
-           color_combine = (R200_TXC_ARG_B_ZERO |
-                            R200_TXC_COMP_ARG_B |
-                            R200_TXC_BIAS_ARG_C |      /* new */
-                            R200_TXC_OP_MADD); /* was ADDSIGNED */
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           break;
-        case GL_SUBTRACT:
-           color_combine = (R200_TXC_ARG_B_ZERO |
-                            R200_TXC_COMP_ARG_B | 
-                            R200_TXC_NEG_ARG_C |
-                            R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           break;
-        case GL_INTERPOLATE:
-           color_combine = (R200_TXC_OP_LERP);
-           R200_COLOR_ARG( 0, B );
-           R200_COLOR_ARG( 1, A );
-           R200_COLOR_ARG( 2, C );
-           break;
+   /* find highest used unit */
+   for ( j = 0; j < R200_MAX_TEXTURE_UNITS; j++) {
+      if (ctx->Texture.Unit[j]._ReallyEnabled) {
+        maxunitused = j;
+      }
+   }
+   stageref[maxunitused + 1] = REF_COLOR | REF_ALPHA;
 
-        case GL_DOT3_RGB_EXT:
-        case GL_DOT3_RGBA_EXT:
-           /* The EXT version of the DOT3 extension does not support the
-            * scale factor, but the ARB version (and the version in OpenGL
-            * 1.3) does.
-            */
-           RGBshift = 0;
-           Ashift = 0;
-           /* FALLTHROUGH */
-
-        case GL_DOT3_RGB:
-        case GL_DOT3_RGBA:
-           /* DOT3 works differently on R200 than on R100.  On R100, just
-            * setting the DOT3 mode did everything for you.  On R200, the
-            * driver has to enable the biasing (the -0.5 in the combine
-            * equation), and it has add the 4x scale factor.  The hardware
-            * only supports up to 8x in the post filter, so 2x part of it
-            * happens on the inputs going into the combiner.
-            */
-
-           RGBshift++;
-           Ashift = RGBshift;
-
-           color_combine = (R200_TXC_ARG_C_ZERO |
-                            R200_TXC_OP_DOT3 |
-                            R200_TXC_BIAS_ARG_A |
-                            R200_TXC_BIAS_ARG_B |
-                            R200_TXC_SCALE_ARG_A |
-                            R200_TXC_SCALE_ARG_B);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, B );
-           break;
+   for ( j = maxunitused; j >= 0; j-- ) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j];
 
-        case GL_MODULATE_ADD_ATI:
-           color_combine = (R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           R200_COLOR_ARG( 2, B );
-           break;
-        case GL_MODULATE_SIGNED_ADD_ATI:
-           color_combine = (R200_TXC_BIAS_ARG_C |      /* new */
-                            R200_TXC_OP_MADD); /* was ADDSIGNED */
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           R200_COLOR_ARG( 2, B );
-           break;
-        case GL_MODULATE_SUBTRACT_ATI:
-           color_combine = (R200_TXC_NEG_ARG_C |
-                            R200_TXC_OP_MADD);
-           R200_COLOR_ARG( 0, A );
-           R200_COLOR_ARG( 1, C );
-           R200_COLOR_ARG( 2, B );
-           break;
-        default:
-           return GL_FALSE;
-        }
+      rmesa->state.texture.unit[j].outputreg = -1;
 
-        switch ( texUnit->CombineModeA ) {
-        case GL_REPLACE:
-           alpha_combine = (R200_TXA_ARG_A_ZERO |
-                            R200_TXA_ARG_B_ZERO |
-                            R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, C );
-           break;
-        case GL_MODULATE:
-           alpha_combine = (R200_TXA_ARG_C_ZERO |
-                            R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, B );
-           break;
-        case GL_ADD:
-           alpha_combine = (R200_TXA_ARG_B_ZERO |
-                            R200_TXA_COMP_ARG_B |
-                            R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           break;
-        case GL_ADD_SIGNED:
-           alpha_combine = (R200_TXA_ARG_B_ZERO |
-                            R200_TXA_COMP_ARG_B |
-                            R200_TXA_BIAS_ARG_C |      /* new */
-                            R200_TXA_OP_MADD); /* was ADDSIGNED */
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           break;
-        case GL_SUBTRACT:
-           alpha_combine = (R200_TXA_ARG_B_ZERO |
-                            R200_TXA_COMP_ARG_B |
-                            R200_TXA_NEG_ARG_C |
-                            R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           break;
-        case GL_INTERPOLATE:
-           alpha_combine = (R200_TXA_OP_LERP);
-           R200_ALPHA_ARG( 0, B );
-           R200_ALPHA_ARG( 1, A );
-           R200_ALPHA_ARG( 2, C );
-           break;
+      if (stageref[j + 1]) {
 
-        case GL_MODULATE_ADD_ATI:
-           alpha_combine = (R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           R200_ALPHA_ARG( 2, B );
-           break;
-        case GL_MODULATE_SIGNED_ADD_ATI:
-           alpha_combine = (R200_TXA_BIAS_ARG_C |      /* new */
-                            R200_TXA_OP_MADD); /* was ADDSIGNED */
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           R200_ALPHA_ARG( 2, B );
-           break;
-        case GL_MODULATE_SUBTRACT_ATI:
-           alpha_combine = (R200_TXA_NEG_ARG_C |
-                            R200_TXA_OP_MADD);
-           R200_ALPHA_ARG( 0, A );
-           R200_ALPHA_ARG( 1, C );
-           R200_ALPHA_ARG( 2, B );
-           break;
-        default:
+        /* use the lowest available reg. That gets us automatically reg0 for the last stage.
+           need this even for disabled units, as it may get referenced due to the replace
+           optimization */
+        for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) {
+           if (texregfree[i]) {
+              rmesa->state.texture.unit[j].outputreg = i;
+              break;
+           }
+        }
+        if (rmesa->state.texture.unit[j].outputreg == -1) {
+           /* no more free regs we can use. Need a fallback :-( */
            return GL_FALSE;
+         }
+
+         nextunit[j] = currentnext;
+
+         if (!texUnit->_ReallyEnabled) {
+        /* the not enabled stages are referenced "indirectly",
+            must not cut off the lower stages */
+           stageref[j] = REF_COLOR | REF_ALPHA;
+           continue;
+         }
+        currentnext = j;
+        const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+        const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+        const GLboolean isdot3rgba = (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ||
+                                     (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT);
+
+
+        /* check if we need the color part, special case for dot3_rgba
+           as if only the alpha part is referenced later on it still is using the color part */
+        if ((stageref[j + 1] & REF_COLOR) || isdot3rgba) {
+           for ( i = 0 ; i < numColorArgs ; i++ ) {
+              const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+              const GLuint op = texUnit->_CurrentCombine->OperandRGB[i];
+              switch ( srcRGBi ) {
+              case GL_PREVIOUS:
+                 /* op 0/1 are referencing color, op 2/3 alpha */
+                 stageref[j] |= (op >> 1) + 1;
+                 break;
+              case GL_TEXTURE:
+                 texregfree[j] = GL_FALSE;
+                 break;
+              case GL_TEXTURE0:
+              case GL_TEXTURE1:
+              case GL_TEXTURE2:
+              case GL_TEXTURE3:
+              case GL_TEXTURE4:
+              case GL_TEXTURE5:
+                 texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE;
+                 break;
+              default: /* don't care about other sources here */
+                 break;
+              }
+           }
         }
 
-        if ( (texUnit->CombineModeRGB == GL_DOT3_RGB_EXT)
-             || (texUnit->CombineModeRGB == GL_DOT3_RGB) ) {
-           alpha_scale |= R200_TXA_DOT_ALPHA;
+        /* alpha args are ignored for dot3_rgba */
+        if ((stageref[j + 1] & REF_ALPHA) && !isdot3rgba) {
+
+           for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+              const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+              switch ( srcAi ) {
+              case GL_PREVIOUS:
+                 stageref[j] |= REF_ALPHA;
+                 break;
+              case GL_TEXTURE:
+                 texregfree[j] = GL_FALSE;
+                 break;
+              case GL_TEXTURE0:
+              case GL_TEXTURE1:
+              case GL_TEXTURE2:
+              case GL_TEXTURE3:
+              case GL_TEXTURE4:
+              case GL_TEXTURE5:
+                 texregfree[srcAi - GL_TEXTURE0] = GL_FALSE;
+                 break;
+              default: /* don't care about other sources here */
+                 break;
+              }
+           }
         }
+      }
+   }
 
-        /* Step 3:
-         * Apply the scale factor.
-         */
-        color_scale &= ~R200_TXC_SCALE_MASK;
-        alpha_scale &= ~R200_TXA_SCALE_MASK;
-        color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
-        alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
+   /* don't enable texture sampling for units if the result is not used */
+   for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled && !texregfree[i])
+        rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      else rmesa->state.texture.unit[i].unitneeded = 0;
+   }
 
-        /* All done!
-         */
-        break;
+   ok = GL_TRUE;
+   currslot = 0;
+   rmesa->state.envneeded = 1;
+
+   i = 0;
+   while ((i <= maxunitused) && (i >= 0)) {
+      /* only output instruction if the results are referenced */
+      if (ctx->Texture.Unit[i]._ReallyEnabled && stageref[i+1]) {
+         GLuint replaceunit = i;
+        /* try to optimize GL_REPLACE away (only one level deep though) */
+        if (   (ctx->Texture.Unit[i]._CurrentCombine->ModeRGB == GL_REPLACE) &&
+               (ctx->Texture.Unit[i]._CurrentCombine->ModeA == GL_REPLACE) &&
+               (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftRGB == 0) &&
+               (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftA == 0) &&
+               (nextunit[i] > 0) ) {
+           /* yippie! can optimize it away! */
+           replaceunit = i;
+           i = nextunit[i];
+        }
 
-      default:
-        return GL_FALSE;
+        /* need env instruction slot */
+        rmesa->state.envneeded |= 1 << currslot;
+        ok = r200UpdateTextureEnv( ctx, i, currslot, replaceunit );
+        if (!ok) return GL_FALSE;
+        currslot++;
       }
+      i = i + 1;
    }
 
-   if ( rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND] != color_combine ||
-       rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND] != alpha_combine ||
-       rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2] != color_scale ||
-       rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
-      R200_STATECHANGE( rmesa, pix[unit] );
-      rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND] = color_combine;
-      rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND] = alpha_combine;
-      rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2] = color_scale;
-      rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2] = alpha_scale;
+   if (currslot == 0) {
+      /* need one stage at least */
+      rmesa->state.texture.unit[0].outputreg = 0;
+      ok = r200UpdateTextureEnv( ctx, 0, 0, 0 );
    }
 
-   return GL_TRUE;
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
+
+   return ok;
 }
 
+#undef REF_COLOR
+#undef REF_ALPHA
+
+
 #define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK |                \
                              R200_MIN_FILTER_MASK |            \
                              R200_MAG_FILTER_MASK |            \
@@ -1197,22 +946,26 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit )
 #define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |       \
                              R200_TXFORMAT_HEIGHT_MASK |       \
                              R200_TXFORMAT_FORMAT_MASK |       \
-                              R200_TXFORMAT_F5_WIDTH_MASK |    \
-                              R200_TXFORMAT_F5_HEIGHT_MASK |   \
+                             R200_TXFORMAT_F5_WIDTH_MASK |     \
+                             R200_TXFORMAT_F5_HEIGHT_MASK |    \
                              R200_TXFORMAT_ALPHA_IN_MAP |      \
                              R200_TXFORMAT_CUBIC_MAP_ENABLE |  \
-                              R200_TXFORMAT_NON_POWER2)
+                             R200_TXFORMAT_NON_POWER2)
 
 #define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |         \
                                 R200_TEXCOORD_MASK |           \
+                                R200_CLAMP_Q_MASK |            \
                                 R200_VOLUME_FILTER_MASK)
 
 
 static void import_tex_obj_state( r200ContextPtr rmesa,
                                  int unit,
-                                 r200TexObjPtr texobj )
+                                 radeonTexObjPtr texobj )
 {
-   GLuint *cmd = R200_DB_STATE( tex[unit] );
+/* do not use RADEON_DB_STATE to avoid stale texture caches */
+   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+
+   R200_STATECHANGE( rmesa, tex[unit] );
 
    cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
@@ -1222,92 +975,114 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
    cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
    cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
    cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
-   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
-   R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
+   }
+   else {
+      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
+   }
+
+   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+      //      GLuint bytesPerFace = texobj->base.totalSize / 6;
+      //      ASSERT(texobj->base.totalSize % 6 == 0);
+      GLuint bytesPerFace = 1; // TODO
 
-   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      GLuint *cube_cmd = R200_DB_STATE( cube[unit] );
-      GLuint bytesPerFace = texobj->base.totalSize / 6;
-      ASSERT(texobj->totalSize % 6 == 0);
+      R200_STATECHANGE( rmesa, cube[unit] );
       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+        /* that value is submitted twice. could change cube atom
+           to not include that command when new drm is used */
+        cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      }
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
       cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
-      R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
    }
 
    texobj->dirty_state &= ~(1<<unit);
 }
 
-
-
-
 static void set_texgen_matrix( r200ContextPtr rmesa, 
                               GLuint unit,
                               const GLfloat *s_plane,
                               const GLfloat *t_plane,
-                              const GLfloat *r_plane )
+                              const GLfloat *r_plane,
+                              const GLfloat *q_plane )
 {
-   static const GLfloat scale_identity[4] = { 1,1,1,1 };
-
-   if (!TEST_EQ_4V( s_plane, scale_identity) ||
-       !TEST_EQ_4V( t_plane, scale_identity) ||
-       !TEST_EQ_4V( r_plane, scale_identity)) {
-      rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
-      rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
-      rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
-      rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
-      rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
-
-      rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
-      rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
-      rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
-      rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
-
-      /* NOTE: r_plane goes in the 4th row, not 3rd! */
-      rmesa->TexGenMatrix[unit].m[3]  = r_plane[0];
-      rmesa->TexGenMatrix[unit].m[7]  = r_plane[1];
-      rmesa->TexGenMatrix[unit].m[11] = r_plane[2];
-      rmesa->TexGenMatrix[unit].m[15] = r_plane[3];
-
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-   }
-}
+   GLfloat m[16];
+
+   m[0]  = s_plane[0];
+   m[4]  = s_plane[1];
+   m[8]  = s_plane[2];
+   m[12] = s_plane[3];
+
+   m[1]  = t_plane[0];
+   m[5]  = t_plane[1];
+   m[9]  = t_plane[2];
+   m[13] = t_plane[3];
+
+   m[2]  = r_plane[0];
+   m[6]  = r_plane[1];
+   m[10] = r_plane[2];
+   m[14] = r_plane[3];
+
+   m[3]  = q_plane[0];
+   m[7]  = q_plane[1];
+   m[11] = q_plane[2];
+   m[15] = q_plane[3];
 
-/* Need this special matrix to get correct reflection map coords */
-static void
-set_texgen_reflection_matrix( r200ContextPtr rmesa, GLuint unit )
-{
-   static const GLfloat m[16] = {
-      -1,  0,  0,  0,
-       0, -1,  0,  0,
-       0,  0,  0, -1,
-       0,  0, -1,  0 };
    _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
    _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
    rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
 }
 
-/* Need this special matrix to get correct normal map coords */
-static void
-set_texgen_normal_map_matrix( r200ContextPtr rmesa, GLuint unit )
+
+static GLuint r200_need_dis_texgen(const GLbitfield texGenEnabled,
+                                  const GLfloat *planeS,
+                                  const GLfloat *planeT,
+                                  const GLfloat *planeR,
+                                  const GLfloat *planeQ)
 {
-   static const GLfloat m[16] = {
-      1, 0, 0, 0,
-      0, 1, 0, 0,
-      0, 0, 0, 1,
-      0, 0, 1, 0 };
-   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
-   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
-   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+   GLuint needtgenable = 0;
+
+   if (!(texGenEnabled & S_BIT)) {
+      if (((texGenEnabled & T_BIT) && planeT[0] != 0.0) ||
+        ((texGenEnabled & R_BIT) && planeR[0] != 0.0) ||
+        ((texGenEnabled & Q_BIT) && planeQ[0] != 0.0)) {
+        needtgenable |= S_BIT;
+      }
+   }
+   if (!(texGenEnabled & T_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[1] != 0.0) ||
+        ((texGenEnabled & R_BIT) && planeR[1] != 0.0) ||
+        ((texGenEnabled & Q_BIT) && planeQ[1] != 0.0)) {
+        needtgenable |= T_BIT;
+     }
+   }
+   if (!(texGenEnabled & R_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[2] != 0.0) ||
+        ((texGenEnabled & T_BIT) && planeT[2] != 0.0) ||
+        ((texGenEnabled & Q_BIT) && planeQ[2] != 0.0)) {
+        needtgenable |= R_BIT;
+      }
+   }
+   if (!(texGenEnabled & Q_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[3] != 0.0) ||
+        ((texGenEnabled & T_BIT) && planeT[3] != 0.0) ||
+        ((texGenEnabled & R_BIT) && planeR[3] != 0.0)) {
+        needtgenable |= Q_BIT;
+      }
+   }
+
+   return needtgenable;
 }
 
 
-/* Ignoring the Q texcoord for now.
- *
+/*
  * Returns GL_FALSE if fallback required.  
  */
 static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
@@ -1315,466 +1090,491 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
-   GLuint tmp = rmesa->TexGenEnabled;
+   GLuint tgi, tgcm;
+   GLuint mode = 0;
+   GLboolean mixed_fallback = GL_FALSE;
+   static const GLfloat I[16] = {
+      1,  0,  0,  0,
+      0,  1,  0,  0,
+      0,  0,  1,  0,
+      0,  0,  0,  1 };
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  -1, 0,
+       0,  0,  0,  1 };
 
    rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
    rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
    rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
-   rmesa->TexGenInputs &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
-   rmesa->TexGenNeedNormals[unit] = 0;
+   rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+   tgi = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] & ~(R200_TEXGEN_INPUT_MASK <<
+                                                  inputshift);
+   tgcm = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] & ~(R200_TEXGEN_COMP_MASK <<
+                                                   (unit * 4));
 
    if (0) 
       fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
 
-   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT)) == 0) {
-      /* Disabled, no fallback:
-       */
-      rmesa->TexGenInputs |= 
-        (R200_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-      return GL_TRUE;
+   if (texUnit->TexGenEnabled & S_BIT) {
+      mode = texUnit->GenModeS;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_S << (unit * 4);
    }
-   else if (texUnit->TexGenEnabled & Q_BIT) {
-      /* Very easy to do this, in fact would remove a fallback case
-       * elsewhere, but I haven't done it yet...  Fallback: 
-       */
-      /*fprintf(stderr, "fallback Q_BIT\n");*/
-      return GL_FALSE;
+
+   if (texUnit->TexGenEnabled & T_BIT) {
+      if (texUnit->GenModeT != mode)
+        mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
    }
-   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT) &&
-           texUnit->GenModeS == texUnit->GenModeT) {
-      /* OK */
-      rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
-      /* continue */
+   if (texUnit->TexGenEnabled & R_BIT) {
+      if (texUnit->GenModeR != mode)
+        mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_R << (unit * 4);
    }
-   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT) &&
-           texUnit->GenModeS == texUnit->GenModeT &&
-            texUnit->GenModeT == texUnit->GenModeR) {
-      /* OK */
-      rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
-      /* continue */
+
+   if (texUnit->TexGenEnabled & Q_BIT) {
+      if (texUnit->GenModeQ != mode)
+        mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_Q << (unit * 4);
    }
-   else {
-      /* Mixed modes, fallback:
-       */
-      /* fprintf(stderr, "fallback mixed texgen\n"); */
+
+   if (mixed_fallback) {
+      if (R200_DEBUG & DEBUG_FALLBACKS)
+        fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n",
+                texUnit->TexGenEnabled, texUnit->GenModeS, texUnit->GenModeT,
+                texUnit->GenModeR, texUnit->GenModeQ);
       return GL_FALSE;
    }
 
-   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+/* we CANNOT do mixed mode if the texgen mode requires a plane where the input
+   is not enabled for texgen, since the planes are concatenated into texmat,
+   and thus the input will come from texcoord rather than tex gen equation!
+   Either fallback or just hope that those texcoords aren't really needed...
+   Assuming the former will cause lots of unnecessary fallbacks, the latter will
+   generate bogus results sometimes - it's pretty much impossible to really know
+   when a fallback is needed, depends on texmat and what sort of texture is bound
+   etc, - for now fallback if we're missing either S or T bits, there's a high
+   probability we need the texcoords in that case.
+   That's a lot of work for some obscure texgen mixed mode fixup - why oh why
+   doesn't the chip just directly accept the plane parameters :-(. */
+   switch (mode) {
+   case GL_OBJECT_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                               texUnit->ObjectPlaneS, texUnit->ObjectPlaneT,
+                               texUnit->ObjectPlaneR, texUnit->ObjectPlaneQ );
+      if (needtgenable & (S_BIT | T_BIT)) {
+        if (R200_DEBUG & DEBUG_FALLBACKS)
+        fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n",
+                texUnit->TexGenEnabled);
+        return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+        tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+        tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
 
-   switch (texUnit->GenModeS) {
-   case GL_OBJECT_LINEAR:
-      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_OBJ << inputshift;
+      tgi |= R200_TEXGEN_INPUT_OBJ << inputshift;
       set_texgen_matrix( rmesa, unit, 
-                        texUnit->ObjectPlaneS,
-                        texUnit->ObjectPlaneT,
-                         texUnit->ObjectPlaneR);
+        (texUnit->TexGenEnabled & S_BIT) ? texUnit->ObjectPlaneS : I,
+        (texUnit->TexGenEnabled & T_BIT) ? texUnit->ObjectPlaneT : I + 4,
+        (texUnit->TexGenEnabled & R_BIT) ? texUnit->ObjectPlaneR : I + 8,
+        (texUnit->TexGenEnabled & Q_BIT) ? texUnit->ObjectPlaneQ : I + 12);
+      }
       break;
 
-   case GL_EYE_LINEAR:
-      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE << inputshift;
-      set_texgen_matrix( rmesa, unit, 
-                        texUnit->EyePlaneS,
-                        texUnit->EyePlaneT,
-                        texUnit->EyePlaneR);
+   case GL_EYE_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                               texUnit->EyePlaneS, texUnit->EyePlaneT,
+                               texUnit->EyePlaneR, texUnit->EyePlaneQ );
+      if (needtgenable & (S_BIT | T_BIT)) {
+        if (R200_DEBUG & DEBUG_FALLBACKS)
+        fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n",
+                texUnit->TexGenEnabled);
+        return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+        tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+        tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
+      tgi |= R200_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit,
+        (texUnit->TexGenEnabled & S_BIT) ? texUnit->EyePlaneS : I,
+        (texUnit->TexGenEnabled & T_BIT) ? texUnit->EyePlaneT : I + 4,
+        (texUnit->TexGenEnabled & R_BIT) ? texUnit->EyePlaneR : I + 8,
+        (texUnit->TexGenEnabled & Q_BIT) ? texUnit->EyePlaneQ : I + 12);
+      }
       break;
 
    case GL_REFLECTION_MAP_NV:
       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
-      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
-      set_texgen_reflection_matrix(rmesa, unit);
+      tgi |= R200_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* pretty weird, must only negate when lighting is enabled? */
+      if (ctx->Light.Enabled)
+        set_texgen_matrix( rmesa, unit, 
+           (texUnit->TexGenEnabled & S_BIT) ? reflect : I,
+           (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4,
+           (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8,
+           I + 12);
       break;
 
    case GL_NORMAL_MAP_NV:
       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
-      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
-      set_texgen_normal_map_matrix(rmesa, unit);
+      tgi |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
       break;
 
    case GL_SPHERE_MAP:
       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
-      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
+      tgi |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
+      break;
+
+   case 0:
+      /* All texgen units were disabled, so just pass coords through. */
+      tgi |= unit << inputshift;
       break;
 
    default:
       /* Unsupported mode, fallback:
        */
-      /*  fprintf(stderr, "fallback unsupported texgen\n"); */
+      if (R200_DEBUG & DEBUG_FALLBACKS)
+        fprintf(stderr, "fallback unsupported texgen, %d\n",
+                texUnit->GenModeS);
       return GL_FALSE;
    }
 
+   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
    rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit;
 
-   if (tmp != rmesa->TexGenEnabled) {
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   if (tgi != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] || 
+       tgcm != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2])
+   {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = tgi;
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = tgcm;
    }
 
    return GL_TRUE;
 }
 
-
-static void disable_tex( GLcontext *ctx, int unit )
+void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
-   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
-      /* Texture unit disabled */
-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
-        /* The old texture is no longer bound to this texture unit.
-         * Mark it as such.
-         */
-
-        rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
-        rmesa->state.texture.unit[unit].texobj = NULL;
-      }
-
-      R200_STATECHANGE( rmesa, ctx );
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((R200_TEX_0_ENABLE |
-                                          R200_TEX_BLEND_0_ENABLE) << unit);
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_BLEND_0_ENABLE; 
-        
-      R200_STATECHANGE( rmesa, tcl );
-      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-        
-      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
-        TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-      }
-
-      /* Actually want to keep all units less than max active texture
-       * enabled, right?  Fix this for >2 texunits.
-       */
-      /* FIXME: What should happen here if r200UpdateTextureEnv fails? */
-      if (unit == 0) 
-        r200UpdateTextureEnv( ctx, unit ); 
+   GLuint re_cntl;
 
+   re_cntl = rmesa->hw.set.cmd[SET_RE_CNTL] & ~(R200_VTX_STQ0_D3D << (2 * unit));
+   if (use_d3d)
+      re_cntl |= R200_VTX_STQ0_D3D << (2 * unit);
 
-      {
-        GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
-        GLuint tmp = rmesa->TexGenEnabled;
-
-        rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
-        rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
-        rmesa->TexGenEnabled &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
-        rmesa->TexGenNeedNormals[unit] = 0;
-        rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
-        rmesa->TexGenInputs &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
-
-        if (tmp != rmesa->TexGenEnabled) {
-           rmesa->recheck_texgen[unit] = GL_TRUE;
-           rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-        }
-      }
+   if ( re_cntl != rmesa->hw.set.cmd[SET_RE_CNTL] ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_RE_CNTL] = re_cntl;
    }
 }
 
-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
 {
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
-   /* Need to load the 2d images associated with this unit.
-    */
-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
-      t->base.dirty_images[0] = ~0;
+   const struct gl_texture_image *firstImage =
+      t->base.Image[0][t->mt->firstLevel];
+   GLint log2Width, log2Height, log2Depth, texelBytes;
+   
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   log2Depth  = firstImage->DepthLog2;
+   texelBytes = firstImage->TexFormat->TexelBytes;
+
+
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+        const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+           tx_table_be;
+        
+        t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+                            R200_TXFORMAT_ALPHA_IN_MAP);
+        t->pp_txfilter &= ~R200_YUV_TO_RGB;
+        
+        t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
+        t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+      } else {
+        _mesa_problem(NULL, "unexpected texture format in %s",
+                      __FUNCTION__);
+        return;
+      }
    }
+   
+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
+       
+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+                      R200_TXFORMAT_HEIGHT_MASK |
+                      R200_TXFORMAT_CUBIC_MAP_ENABLE |
+                      R200_TXFORMAT_F5_WIDTH_MASK |
+                      R200_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
+                     (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
+   
+   t->tile_bits = 0;
+   
+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
+   if (t->base.Target == GL_TEXTURE_3D) {
+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
 
-   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
-
-   if ( t->base.dirty_images[0] ) {
-      R200_FIREVERTICES( rmesa );
-      r200SetTexImages( rmesa, tObj );
-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
-      if ( !t->base.memBlock ) 
-        return GL_FALSE;
    }
-
-   return GL_TRUE;
-}
-
-#if ENABLE_HW_3D_TEXTURE
-static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
-{
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
-   /* Need to load the 3d images associated with this unit.
-    */
-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
-      t->base.dirty_images[0] = ~0;
+   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+                        (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+                        /* don't think we need this bit, if it exists at all - fglrx does not set it */
+                        (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+   }
+   else {
+      /* If we don't in fact send enough texture coordinates, q will be 1,
+       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+       */
+      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
    }
 
-   ASSERT(tObj->Target == GL_TEXTURE_3D);
+   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
+                  | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
 
-   /* R100 & R200 do not support mipmaps for 3D textures.
-    */
-   if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
-      return GL_FALSE;
+   if ( !t->image_override ) {
+      if (firstImage->IsCompressed)
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
    }
 
-   if ( t->base.dirty_images[0] ) {
-      R200_FIREVERTICES( rmesa );
-      r200SetTexImages( rmesa, tObj );
-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
-      if ( !t->base.memBlock ) 
-        return GL_FALSE;
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
    }
-
-   return GL_TRUE;
+   
 }
-#endif
 
-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-   GLuint face;
+   radeonTexObj *t = radeon_tex_obj(texObj);
 
-   /* Need to load the 2d images associated with this unit.
-    */
-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
-      for (face = 0; face < 6; face++)
-         t->base.dirty_images[face] = ~0;
-   }
-
-   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+   if (!radeon_validate_texture_miptree(ctx, texObj))
+      return GL_FALSE;
 
-   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
-        t->base.dirty_images[2] || t->base.dirty_images[3] ||
-        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
-      /* flush */
-      R200_FIREVERTICES( rmesa );
-      /* layout memory space, once for all faces */
-      r200SetTexImages( rmesa, tObj );
-   }
+   r200_validate_texgen(ctx, unit);
+   /* Configure the hardware registers (more precisely, the cached version
+    * of the hardware registers). */
+   setup_hardware_state(rmesa, t);
 
-   /* upload (per face) */
-   for (face = 0; face < 6; face++) {
-      if (t->base.dirty_images[face]) {
-         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
-      }
-   }
-      
-   if ( !t->base.memBlock ) {
-      /* texmem alloc failed, use s/w fallback */
-      return GL_FALSE;
+   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
+       texObj->Target == GL_TEXTURE_2D ||
+       texObj->Target == GL_TEXTURE_1D)
+      set_re_cntl_d3d( ctx, unit, GL_FALSE );
+   else
+      set_re_cntl_d3d( ctx, unit, GL_TRUE );
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
+   
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+   if (t->dirty_state & (1<<unit)) {
+      import_tex_obj_state( rmesa, unit, t );
    }
 
+   t->dirty_state = R200_TEX_ALL;
+   
+   t->validated = GL_TRUE;
    return GL_TRUE;
 }
 
-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-
-   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
-      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
-      t->base.dirty_images[0] = ~0;
-   }
+   GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
 
-   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+   if (!unitneeded)
+     return GL_TRUE;
 
-   if ( t->base.dirty_images[0] ) {
-      R200_FIREVERTICES( rmesa );
-      r200SetTexImages( rmesa, tObj );
-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
-      if ( !t->base.memBlock && !rmesa->prefer_gart_client_texturing ) 
-        return GL_FALSE;
-   }
+   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+                 "failed to validate texture for unit %d.\n",
+                 unit);
+    rmesa->state.texture.unit[unit].texobj = NULL;
+    return GL_FALSE;
+  }
 
-   return GL_TRUE;
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+  return GL_TRUE;
 }
 
 
-static GLboolean update_tex_common( GLcontext *ctx, int unit )
+void r200UpdateTextureState( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
-   GLenum format;
-
-   /* Fallback if there's a texture border */
-   if ( tObj->Image[tObj->BaseLevel]->Border > 0 )
-       return GL_FALSE;
+   GLboolean ok;
+   GLuint dbg;
 
-   /* Update state if this is a different texture object to last
-    * time.
-    */
-   if ( rmesa->state.texture.unit[unit].texobj != t ) {
-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
-        /* The old texture is no longer bound to this texture unit.
-         * Mark it as such.
-         */
+   /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or
+      rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since
+      we use these to determine if we want to emit the corresponding state
+      atoms. */
+   R200_NEWPRIM( rmesa );
 
-        rmesa->state.texture.unit[unit].texobj->base.bound &= 
-            ~(1UL << unit);
+   if (ctx->ATIFragmentShader._Enabled) {
+      GLuint i;
+      for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+        rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
       }
-
-      rmesa->state.texture.unit[unit].texobj = t;
-      t->base.bound |= (1UL << unit);
-      t->dirty_state |= 1<<unit;
-      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+      ok = GL_TRUE;
    }
-
-
-   /* Newly enabled?
-    */
-   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
-      R200_STATECHANGE( rmesa, ctx );
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_0_ENABLE | 
-                                        R200_TEX_BLEND_0_ENABLE) << unit;
-
-      R200_STATECHANGE( rmesa, vtx );
-      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
-
-      rmesa->recheck_texgen[unit] = GL_TRUE;
-   }
-
-   if (t->dirty_state & (1<<unit)) {
-      import_tex_obj_state( rmesa, unit, t );
+   else {
+      ok = r200UpdateAllTexEnv( ctx );
    }
-
-   if (rmesa->recheck_texgen[unit]) {
-      GLboolean fallback = !r200_validate_texgen( ctx, unit );
-      TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
-      rmesa->recheck_texgen[unit] = 0;
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   if (ok) {
+      ok = (r200UpdateTextureUnit( ctx, 0 ) &&
+        r200UpdateTextureUnit( ctx, 1 ) &&
+        r200UpdateTextureUnit( ctx, 2 ) &&
+        r200UpdateTextureUnit( ctx, 3 ) &&
+        r200UpdateTextureUnit( ctx, 4 ) &&
+        r200UpdateTextureUnit( ctx, 5 ));
    }
 
-   format = tObj->Image[tObj->BaseLevel]->Format;
-   if ( rmesa->state.texture.unit[unit].format != format ||
-       rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
-      rmesa->state.texture.unit[unit].format = format;
-      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
-      if ( ! r200UpdateTextureEnv( ctx, unit ) ) {
-        return GL_FALSE;
-      }
+   if (ok && ctx->ATIFragmentShader._Enabled) {
+      r200UpdateFragmentShader(ctx);
    }
 
-   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
-   return !t->border_fallback;
-}
-
+   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
 
+   if (rmesa->radeon.TclFallback)
+      r200ChooseVertexState( ctx );
 
-static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
-{
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
-   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
-      return (enable_tex_rect( ctx, unit ) &&
-             update_tex_common( ctx, unit ));
-   }
-   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
-      return (enable_tex_2d( ctx, unit ) &&
-             update_tex_common( ctx, unit ));
-   }
-#if ENABLE_HW_3D_TEXTURE
-   else if ( texUnit->_ReallyEnabled & (TEXTURE_3D_BIT) ) {
-      return (enable_tex_3d( ctx, unit ) &&
-             update_tex_common( ctx, unit ));
-   }
-#endif
-   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
-      return (enable_tex_cube( ctx, unit ) &&
-             update_tex_common( ctx, unit ));
-   }
-   else if ( texUnit->_ReallyEnabled ) {
-      return GL_FALSE;
-   }
-   else {
-      disable_tex( ctx, unit );
-      return GL_TRUE;
-   }
-}
+   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
 
+      /*
+       * T0 hang workaround -------------
+       * not needed for r200 derivatives
+        */
+      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
+        (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
 
-void r200UpdateTextureState( GLcontext *ctx )
-{
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLboolean ok;
-   GLuint dbg;
+        R200_STATECHANGE(rmesa, ctx);
+        R200_STATECHANGE(rmesa, tex[1]);
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+        if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+          rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+        rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
+      }
+      else if (!ctx->ATIFragmentShader._Enabled) {
+        if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+           (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
+           R200_STATECHANGE(rmesa, tex[1]);
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
+         }
+      }
+      /* do the same workaround for the first pass of a fragment shader.
+       * completely unknown if necessary / sufficient.
+       */
+      if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
+        (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
 
-   ok = (r200UpdateTextureUnit( ctx, 0 ) &&
-        r200UpdateTextureUnit( ctx, 1 ));
+        R200_STATECHANGE(rmesa, cst);
+        R200_STATECHANGE(rmesa, tex[1]);
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
+        if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+        rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+      }
 
-   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+      /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
+         looks like that's not the case, if 8500/9100 owners don't complain remove this...
+      for ( i = 0; i < ctx->Const.MaxTextureUnits; i += 2) {
+         if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ((R200_TEX_0_ENABLE |
+            R200_TEX_1_ENABLE ) << i)) == (R200_TEX_0_ENABLE << i)) &&
+            ((rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) >
+            R200_MIN_FILTER_LINEAR)) {
+            R200_STATECHANGE(rmesa, ctx);
+            R200_STATECHANGE(rmesa, tex[i+1]);
+            rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_1_ENABLE << i);
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+         }
+         else {
+            if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE << i)) &&
+               (rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
+               R200_STATECHANGE(rmesa, tex[i+1]);
+               rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+            }
+         }
+      } */
 
-   if (rmesa->TclFallback)
-      r200ChooseVertexState( ctx );
+      /*
+       * Texture cache LRU hang workaround -------------
+       * not needed for r200 derivatives
+       * hopefully this covers first pass of a shader as well
+       */
 
-   /*
-    * T0 hang workaround -------------
-    */
-#if 1
-   if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
-       (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
-
-      R200_STATECHANGE(rmesa, ctx);
-      R200_STATECHANGE(rmesa, tex[1]);
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
-      rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-      rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
-   }
-   else {
-      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
-         (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
-        R200_STATECHANGE(rmesa, tex[1]);
-        rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+      /* While the cases below attempt to only enable the workaround in the
+       * specific cases necessary, they were insufficient.  See bugzilla #1519,
+       * #729, #814.  Tests with quake3 showed no impact on performance.
+       */
+      dbg = 0x6;
+
+      /*
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE )) &&
+         ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
+         ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
+         ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)))
+      {
+         dbg |= 0x02;
       }
-   }
-#endif
-
-#if 1
-   /*
-    * Texture cache LRU hang workaround -------------
-    */
-   dbg = 0x0;
-   if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_0_ENABLE) &&
-       ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 
-         0x04) == 0)))
-   {
-      dbg |= 0x02;
-   }
 
-   if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
-       ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 
-         0x04) == 0)))
-   {
-      dbg |= 0x04;
-   }
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE )) &&
+         ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
+         ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
+         ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)))
+      {
+         dbg |= 0x04;
+      }*/
 
-   if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
-      R200_STATECHANGE( rmesa, tam );
-      rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
-      if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
+      if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
+         R200_STATECHANGE( rmesa, tam );
+         rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
+         if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
+      }
    }
-#endif
 }
-
-/*
-  also tests for higher texunits:
-
-       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
-       ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)) ||
-       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
-       ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)))
-
-       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
-       ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)) ||
-       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
-       ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)))
-
-*/