remove accidentally commited printf for tiling support

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_texstate.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c

index a3be9280c800a46f529e4cd85ff484f3ebafb11f..b96ad740d15d343de15ac9a37600c40393e69abf 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -50,19 +50,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "radeon_tcl.h"
  
  
+#define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
  #define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
  #define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
  #define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
+#define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
+#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
  
  #define _COLOR(f) \
      [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
  #define _ALPHA(f) \
      [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
  #define _YUV(f) \
     [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
  #define _INVALID(f) \
      [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
-#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_YCBCR_REV) \
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
                              && (tx_table[f].format != 0xffffffff) )
  
  static const struct {
@@ -71,18 +81,30 @@ static const struct {
  tx_table[] =
  {
     _ALPHA(RGBA8888),
+   _ALPHA_REV(RGBA8888),
     _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
     _INVALID(RGB888),
     _COLOR(RGB565),
+   _COLOR_REV(RGB565),
     _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
     _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
     _ALPHA(AL88),
-   _INVALID(A8),
-   _INVALID(L8),
-   _COLOR(I8),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
     _INVALID(CI8),
     _YUV(YCBCR),
     _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
  };
  
  #undef _COLOR
@@ -105,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
  {
     radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
     const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset;
-   GLint i;
+   GLint curOffset, blitWidth;
+   GLint i, texelBytes;
     GLint numLevels;
     GLint log2Width, log2Height, log2Depth;
  
@@ -126,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
        return;
     }
  
+   texelBytes = baseImage->TexFormat->TexelBytes;
  
     /* Compute which mipmap levels we really want to send to the hardware.
      */
@@ -144,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
      * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
      */
     curOffset = 0;
+   blitWidth = BLIT_WIDTH_BYTES;
+   t->tile_bits = 0;
+
+   /* figure out if this texture is suitable for tiling. */
+   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+        /* allow 32 (bytes) x 1 mip (which will use two times the space
+           the non-tiled version would use) max if base texture is large enough */
+        if ((numLevels == 1) ||
+          (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+              (baseImage->Width * texelBytes > 64)) ||
+           ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+           /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+              weird: X2 + OPT: 32bit correct, 16bit completely hosed
+                     X2: 32bit correct, 16bit correct
+                     OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+           t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+        }
+      }
+      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+        /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+           in the case if height is smaller than 16 (not 100% sure), as does the r200,
+           so need to disable macro tiling in that case */
+        if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+           t->tile_bits |= RADEON_TXO_MACRO_TILE;
+        }
+      }
+   }
  
     for (i = 0; i < numLevels; i++) {
        const struct gl_texture_image *texImage;
@@ -155,31 +206,61 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
  
        /* find image size in bytes */
        if (texImage->IsCompressed) {
-         size = texImage->CompressedSize;
+      /* need to calculate the size AFTER padding even though the texture is
+         submitted without padding.
+         Only handle pot textures currently - don't know if npot is even possible,
+         size calculation would certainly need (trivial) adjustments.
+         Align (and later pad) to 32byte, not sure what that 64byte blit width is
+         good for? */
+         if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
+            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+            if ((texImage->Width + 3) < 8) /* width one block */
+               size = texImage->CompressedSize * 4;
+            else if ((texImage->Width + 3) < 16)
+               size = texImage->CompressedSize * 2;
+            else size = texImage->CompressedSize;
+         }
+         else /* DXT3/5, 16 bytes per block */
+            if ((texImage->Width + 3) < 8)
+               size = texImage->CompressedSize * 2;
+            else size = texImage->CompressedSize;
        }
        else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-        size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
-                & ~63) * texImage->Height;
+        size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+      }
+      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+        /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+           though the actual offset may be different (if texture is less than
+           32 bytes width) to the untiled case */
+        int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+        size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        else {
-         int w = texImage->Width * texImage->TexFormat->TexelBytes;
-         if (w < 32)
-            w = 32;
-         size = w * texImage->Height * texImage->Depth;
+        int w = (texImage->Width * texelBytes + 31) & ~31;
+        size = w * texImage->Height * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        assert(size > 0);
  
-
        /* Align to 32-byte offset.  It is faster to do this unconditionally
         * (no branch penalty).
         */
  
        curOffset = (curOffset + 0x1f) & ~0x1f;
  
-      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-      t->image[0][i].height = size / t->image[0][i].width;
+      if (texelBytes) {
+        t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+        t->image[0][i].y = 0;
+        t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+        t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+      }
+      else {
+         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+         t->image[0][i].height = size / t->image[0][i].width;     
+      }
  
  #if 0
        /* for debugging only and only  applicable to non-rectangle targets */
@@ -224,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
     if (baseImage->IsCompressed)
        t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
     else
-      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
     t->pp_txpitch -= 32;
  
     t->dirty_state = TEX_ALL;
@@ -383,6 +464,13 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
     const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
     GLuint color_combine, alpha_combine;
+   const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+         | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+   const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
+         | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+
  
     /* texUnit->_Current can be NULL if and only if the texture unit is
      * not actually enabled.
@@ -400,17 +488,14 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
      * reduces the amount of special-casing we have to do, alpha-only
      * textures being a notable exception.
      */
+    /* Don't cache these results.
+    */
+   rmesa->state.texture.unit[unit].format = 0;
+   rmesa->state.texture.unit[unit].envMode = 0;
+
     if ( !texUnit->_ReallyEnabled ) {
-      /* Don't cache these results.
-       */
-      rmesa->state.texture.unit[unit].format = 0;
-      rmesa->state.texture.unit[unit].envMode = 0;
-      color_combine = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
-         | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
-         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
-      alpha_combine = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
-         | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
-         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+      color_combine = color_combine0;
+      alpha_combine = alpha_combine0;
     }
     else {
        GLuint color_arg[3], alpha_arg[3];
@@ -420,20 +505,16 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
        GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
        GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
  
-      /* Don't cache these results.
-       */
-      rmesa->state.texture.unit[unit].format = 0;
-      rmesa->state.texture.unit[unit].envMode = 0;
-
  
        /* Step 1:
         * Extract the color and alpha combine function arguments.
         */
        for ( i = 0 ; i < numColorArgs ; i++ ) {
-        const GLuint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+        const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+        const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
          assert(op >= 0);
          assert(op <= 3);
-        switch ( texUnit->_CurrentCombine->SourceRGB[i] ) {
+        switch ( srcRGBi ) {
          case GL_TEXTURE:
             color_arg[i] = radeon_texture_color[op][unit];
             break;
@@ -452,16 +533,25 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
          case GL_ONE:
             color_arg[i] = radeon_zero_color[op+1];
             break;
+        case GL_TEXTURE0:
+        case GL_TEXTURE1:
+        case GL_TEXTURE2:
+        /* implement ogl 1.4/1.5 core spec here, not specification of
+         * GL_ARB_texture_env_crossbar (which would require disabling blending
+         * instead of undefined results when referencing not enabled texunit) */
+          color_arg[i] = radeon_texture_color[op][srcRGBi - GL_TEXTURE0];
+          break;
          default:
             return GL_FALSE;
          }
        }
  
        for ( i = 0 ; i < numAlphaArgs ; i++ ) {
-        const GLuint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+        const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+        const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
          assert(op >= 0);
          assert(op <= 1);
-        switch ( texUnit->_CurrentCombine->SourceA[i] ) {
+        switch ( srcAi ) {
          case GL_TEXTURE:
             alpha_arg[i] = radeon_texture_alpha[op][unit];
             break;
@@ -480,6 +570,11 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
          case GL_ONE:
             alpha_arg[i] = radeon_zero_alpha[op+1];
             break;
+        case GL_TEXTURE0:
+        case GL_TEXTURE1:
+        case GL_TEXTURE2:
+          alpha_arg[i] = radeon_texture_alpha[op][srcAi - GL_TEXTURE0];
+          break;
          default:
             return GL_FALSE;
          }
@@ -542,7 +637,6 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
           * 1.3) does.
           */
          RGBshift = 0;
-        Ashift = 0;
          /* FALLTHROUGH */
  
        case GL_DOT3_RGB:
@@ -555,7 +649,11 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
          }
  
          RGBshift += 2;
-        Ashift = RGBshift;
+        if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+           || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+            /* is it necessary to set this or will it be ignored anyway? */
+           Ashift = RGBshift;
+        }
  
          color_combine = (RADEON_COLOR_ARG_C_ZERO |
                           RADEON_BLEND_CTL_DOT3 |