add texture micro and macro tiling to radeon/r200 driver. This can improve performanc...

author Roland Scheidegger <rscheidegger@gmx.ch>

Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)

committer Roland Scheidegger <rscheidegger@gmx.ch>

Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)
author Roland Scheidegger <rscheidegger@gmx.ch>
Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)
committer Roland Scheidegger <rscheidegger@gmx.ch>
Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c

index 4eca4ad7e506f6f939ac7589a1a931a55d4e2dcd..baaca087555238d6f732862d82e9a04fad2e70f9 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -272,6 +272,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
        else
          rmesa->using_hyperz = GL_TRUE;
     }
+ 
+   if ( sPriv->drmMinor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
  
     /* Init default driver functions then plug in our R200-specific functions
      * (the texture functions are especially important)
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h

index cedf1b974f15dc2457b57f2a0294b861c199badb..7e0a46ae51c674aa1c71605672a9bef6d071f76d 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -167,6 +167,8 @@ struct r200_tex_obj {
     GLuint pp_cubic_faces;              /* cube face 1,2,3,4 log2 sizes */
  
     GLboolean  border_fallback;
+
+   GLuint tile_bits;                   /* hw texture tile bits used on this texture */
  };
  
  
@@ -931,6 +933,7 @@ struct r200_context {
     driOptionCache optionCache;
  
     GLboolean using_hyperz;
+   GLboolean texmicrotile;
  };
  
  #define R200_CONTEXT(ctx)              ((r200ContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h

index c1132e54ab7a6319b36533e5ceb22510e8de2025..2468c6cebfabf604207dc5d9413b04cadeadad49 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_reg.h
+++ b/src/mesa/drivers/dri/r200/r200_reg.h
@@ -968,6 +968,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define     R200_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
  #define     R200_TXO_ENDIAN_WORD_SWAP   (2 << 0)
  #define     R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#define     R200_TXO_MACRO_TILE         (1 << 2)
+#define     R200_TXO_MICRO_TILE         (1 << 3)
  #define     R200_TXO_OFFSET_MASK        0xffffffe0
  #define     R200_TXO_OFFSET_SHIFT       5
  #define R200_PP_CUBIC_OFFSET_F1_0         0x2d04
diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c

index 3f8e5d6e7f89345a14b6202a53dd5d039b27c5c4..7472afeedd7888e46bfaed2d4bc8fedfd3010a58 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_texmem.c
+++ b/src/mesa/drivers/dri/r200/r200_texmem.c
@@ -43,12 +43,10 @@ SOFTWARE.
  #include "context.h"
  #include "colormac.h"
  #include "macros.h"
-#include "radeon_reg.h" /* gets definition for usleep */
  #include "r200_context.h"
-#include "r200_state.h"
  #include "r200_ioctl.h"
-#include "r200_swtcl.h"
  #include "r200_tex.h"
+#include "radeon_reg.h"
  
  #include <unistd.h>  /* for usleep() */
  
@@ -253,12 +251,13 @@ static void r200UploadRectSubImage( r200ContextPtr rmesa,
  
          /* Blit to framebuffer
           */
-        r200EmitBlit( rmesa, 
-                      blit_format, 
-                      dstPitch, GET_START( &region ),   
-                      dstPitch, t->bufAddr,
-                      0, 0, 
-                      0, done, 
+        r200EmitBlit( rmesa,
+                      blit_format,
+                      dstPitch, GET_START( &region ),
+                      dstPitch | (t->tile_bits >> 16),
+                      t->bufAddr,
+                      0, 0,
+                      0, done,
                        width, lines );
          
          r200EmitWait( rmesa, RADEON_WAIT_2D );
@@ -339,7 +338,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
     imageWidth = texImage->Width;
     imageHeight = texImage->Height;
  
-   offset = t->bufAddr;
+   offset = t->bufAddr + t->base.totalSize / 6 * face;
  
     if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
        GLint imageX = 0;
@@ -363,19 +362,47 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
      * We used to use 1, 2 and 4-byte texels and used to use the texture
      * width to dictate the blit width - but that won't work for compressed
      * textures. (Brian)
+    * NOTE: can't do that with texture tiling. (sroland)
      */
     tex.offset = offset;
-   tex.pitch = BLIT_WIDTH_BYTES / 64;
-   tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+   tex.image = &tmp;
+   /* copy (x,y,width,height,data) */
+   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
+   
     if (texImage->TexFormat->TexelBytes) {
-      tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+      /* use multi-byte upload scheme */
        tex.height = imageHeight;
+      tex.width = imageWidth;
+      tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
+      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+      tex.offset += tmp.x & ~1023;
+      tmp.x = tmp.x % 1024;
+      if (t->tile_bits & R200_TXO_MICRO_TILE) {
+        /* need something like "tiled coordinates" ? */
+        tmp.y = tmp.x / (tex.pitch * 128) * 2;
+        tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+        tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+      }
+      else {
+        tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+      }
+      if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
+        (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
+        ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
+           (texImage->Height >= 16))) {
+        /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+           OR if height is smaller than 8 automatically, but if micro tiling is active
+           the limit is height 16 instead ? */
+        tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+      }
     }
     else {
        /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
           needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
        /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
           so the kernel module reads the right amount of data. */
+      tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+      tex.pitch = (BLIT_WIDTH_BYTES / 64);
        tex.height = (imageHeight + 3) / 4;
        tex.width = (imageWidth + 3) / 4;
        switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
@@ -390,19 +417,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
            fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
        }
     }
-   tex.image = &tmp;
  
-   /* copy (x,y,width,height,data) */
-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
-
-   /* Adjust the base offset to account for the Y-offset.  This is done,
-    * instead of just letting the Y-offset automatically take care of it,
-    * because it is possible, for very large textures, for the Y-offset
-    * to exceede the [-8192,+8191] range.
-    */
-   tex.offset += tmp.y * 1024;
-   tmp.y = 0;
-    
     LOCK_HARDWARE( rmesa );
     do {
        ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
@@ -473,7 +488,11 @@ int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
        t->bufAddr = rmesa->r200Screen->texOffset[heap] 
            + t->base.memBlock->ofs;
        t->pp_txoffset = t->bufAddr;
-
+       
+      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+        /* hope it's safe to add that here... */
+        t->pp_txoffset |= t->tile_bits;
+      }
  
        /* Mark this texobj as dirty on all units:
         */
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c

index 1e56c78f9bee0800d63dc3b3565e057cffb54aea..3fba25d0b5086ed4fdda82a24d57461a1e0003c9 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -125,8 +125,8 @@ static void r200SetTexImages( r200ContextPtr rmesa,
  {
     r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
     const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset;
-   GLint i;
+   GLint curOffset, blitWidth;
+   GLint i, texelBytes;
     GLint numLevels;
     GLint log2Width, log2Height, log2Depth;
  
@@ -146,6 +146,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
        return;
     }
  
+   texelBytes = baseImage->TexFormat->TexelBytes;
  
     /* Compute which mipmap levels we really want to send to the hardware.
      */
@@ -164,6 +165,28 @@ static void r200SetTexImages( r200ContextPtr rmesa,
      * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
      */
     curOffset = 0;
+   blitWidth = BLIT_WIDTH_BYTES;
+   t->tile_bits = 0;
+
+   /* figure out if this texture is suitable for tiling. */
+   if (texelBytes) {
+      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+      /* texrect might be able to use micro tiling too in theory? */
+        (baseImage->Height > 1)) {
+        /* allow 32 (bytes) x 1 mip (which will use two times the space
+        the non-tiled version would use) max if base texture is large enough */
+        if ((numLevels == 1) ||
+          (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+              (baseImage->Width * texelBytes > 64)) ||
+           ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+           t->tile_bits |= R200_TXO_MICRO_TILE;
+        }
+      }
+      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+        /* we can set macro tiling even for small textures, they will be untiled anyway */
+        t->tile_bits |= R200_TXO_MACRO_TILE;
+      }
+   }
  
     for (i = 0; i < numLevels; i++) {
        const struct gl_texture_image *texImage;
@@ -195,28 +218,41 @@ static void r200SetTexImages( r200ContextPtr rmesa,
              else size = texImage->CompressedSize;
        }
        else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-         size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
-                 & ~63) * texImage->Height;
+        size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+      }
+      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
+        /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+           though the actual offset may be different (if texture is less than
+           32 bytes width) to the untiled case */
+        int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+        size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        else {
-         int w = texImage->Width * texImage->TexFormat->TexelBytes;
-         if (w < 32)
-            w = 32;
-         size = w * texImage->Height * texImage->Depth;
+        int w = (texImage->Width * texelBytes + 31) & ~31;
+        size = w * texImage->Height * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        assert(size > 0);
  
-
        /* Align to 32-byte offset.  It is faster to do this unconditionally
         * (no branch penalty).
         */
  
        curOffset = (curOffset + 0x1f) & ~0x1f;
  
-      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-      t->image[0][i].height = size / t->image[0][i].width;
+      if (texelBytes) {
+        t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+        t->image[0][i].y = 0;
+        t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+        t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+      }
+      else {
+         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+         t->image[0][i].height = size / t->image[0][i].width;     
+      }
  
  #if 0
        /* for debugging only and only  applicable to non-rectangle targets */
@@ -242,16 +278,13 @@ static void r200SetTexImages( r200ContextPtr rmesa,
  
     /* Setup remaining cube face blits, if needed */
     if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */
-      const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1)
-                              & ~(BLIT_WIDTH_BYTES-1);
-      const GLuint lines = faceSize / BLIT_WIDTH_BYTES;
+      const GLuint faceSize = t->base.totalSize;
        GLuint face;
-      /* reuse face 0 x/y/width/height - just adjust y */
+      /* reuse face 0 x/y/width/height - just update the offset when uploading */
        for (face = 1; face < 6; face++) {
           for (i = 0; i < numLevels; i++) {
              t->image[face][i].x =  t->image[0][i].x;
-            t->image[face][i].y =  t->image[0][i].y + face * lines;
+            t->image[face][i].y =  t->image[0][i].y;
              t->image[face][i].width  = t->image[0][i].width;
              t->image[face][i].height = t->image[0][i].height;
           }
@@ -310,7 +343,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
     if (baseImage->IsCompressed)
        t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
     else
-      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
     t->pp_txpitch -= 32;
  
     t->dirty_state = TEX_ALL;
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c

index 4229d5cb5e55da7932209f4aaab7f63c878ad36d..5d7e28cf89a0cc4ab6fa76355e8e357ba67d7ab3 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -255,6 +255,9 @@ radeonCreateContext( const __GLcontextModes *glVisual,
          rmesa->using_hyperz = GL_TRUE;
     }
  
+   if ( sPriv->drmMinor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
+
     /* Init default driver functions then plug in our Radeon-specific functions
      * (the texture functions are especially important)
      */
@@ -445,6 +448,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
     }
     (*rmesa->get_ust)( & rmesa->swap_ust );
  
+   if (rmesa->sarea->tiling_enabled != 0) fprintf(stderr, "color tiling enabled!\n");
  
  #if DO_DEBUG
     RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h

index 53860c12b843180e2650aa6315c6bbef7ae87bef..8d0637ca326f0beba11ad0640451f19fe556f588 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -162,6 +162,8 @@ struct radeon_tex_obj {
     GLuint pp_cubic_faces;              /* cube face 1,2,3,4 log2 sizes */
  
     GLboolean  border_fallback;
+
+   GLuint tile_bits;                   /* hw texture tile bits used on this texture */
  };
  
  
@@ -186,7 +188,7 @@ struct radeon_state_atom {
     GLboolean dirty;                      /* dirty-mark in emit_state_list */
     GLboolean (*check)( GLcontext * );    /* is this state active? */
  };
-   
+
  
  
  /* Trying to keep these relatively short as the variables are becoming
@@ -781,6 +783,7 @@ struct radeon_context {
     driOptionCache optionCache;
  
     GLboolean using_hyperz;
+   GLboolean texmicrotile;
  
     /* Performance counters
      */
diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c

index d910a6c15a5e90ff7b69cdea3a34689d92143bff..d492e190c12c5c53795cd92461e28dee059d0474 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c
@@ -46,6 +46,8 @@ SOFTWARE.
  #include "radeon_ioctl.h"
  #include "radeon_tex.h"
  
+#include <unistd.h>  /* for usleep() */
+
  
  /**
   * Destroy any device-dependent state associated with the texture.  This may
@@ -151,12 +153,12 @@ static void radeonUploadRectSubImage( radeonContextPtr rmesa,
  
          /* Blit to framebuffer
           */
-        radeonEmitBlit( rmesa, 
-                      blit_format, 
-                      dstPitch, GET_START( &region ),    
-                      dstPitch, t->bufAddr, 
-                      0, 0, 
-                      0, done, 
+        radeonEmitBlit( rmesa,
+                      blit_format,
+                      dstPitch, GET_START( &region ),
+                      dstPitch, t->bufAddr,
+                      0, 0,
+                      0, done,
                        width, lines );
          
          radeonEmitWait( rmesa, RADEON_WAIT_2D );
@@ -248,19 +250,43 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
      * We used to use 1, 2 and 4-byte texels and used to use the texture
      * width to dictate the blit width - but that won't work for compressed
      * textures. (Brian)
+    * NOTE: can't do that with texture tiling. (sroland)
      */
     tex.offset = offset;
-   tex.pitch = BLIT_WIDTH_BYTES / 64;
-   tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+   tex.image = &tmp;
+   /* copy (x,y,width,height,data) */
+   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
+
     if (texImage->TexFormat->TexelBytes) {
-      tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+      /* use multi-byte upload scheme */
        tex.height = imageHeight;
+      tex.width = imageWidth;
+      tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+      tex.offset += tmp.x & ~1023;
+      tmp.x = tmp.x % 1024;
+      if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+        /* need something like "tiled coordinates" ? */
+        tmp.y = tmp.x / (tex.pitch * 128) * 2;
+        tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+        tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+      }
+      else {
+        tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+      }
+      if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
+        (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
+        /* radeon switches off macro tiling for small textures/mipmaps it seems */
+        tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+      }
     }
     else {
        /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
           needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
        /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
           so the kernel module reads the right amount of data. */
+      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+      tex.pitch = (BLIT_WIDTH_BYTES / 64);
        tex.height = (imageHeight + 3) / 4;
        tex.width = (imageWidth + 3) / 4;
        switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
@@ -273,10 +299,6 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
           break;
        }
     }
-   tex.image = &tmp;
-
-   /* copy (x,y,width,height,data) */
-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
  
     LOCK_HARDWARE( rmesa );
     do {
@@ -344,6 +366,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac
            + t->base.memBlock->ofs;
        t->pp_txoffset = t->bufAddr;
  
+      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+        /* hope it's safe to add that here... */
+        t->pp_txoffset |= t->tile_bits;
+      }
  
        /* Mark this texobj as dirty on all units:
         */
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c

index 5e818da9fd17af04dfb0bcc8ab9160cae013f94f..b96ad740d15d343de15ac9a37600c40393e69abf 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -127,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
  {
     radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
     const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset;
-   GLint i;
+   GLint curOffset, blitWidth;
+   GLint i, texelBytes;
     GLint numLevels;
     GLint log2Width, log2Height, log2Depth;
  
@@ -148,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
        return;
     }
  
+   texelBytes = baseImage->TexFormat->TexelBytes;
  
     /* Compute which mipmap levels we really want to send to the hardware.
      */
@@ -166,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
      * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
      */
     curOffset = 0;
+   blitWidth = BLIT_WIDTH_BYTES;
+   t->tile_bits = 0;
+
+   /* figure out if this texture is suitable for tiling. */
+   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+        /* allow 32 (bytes) x 1 mip (which will use two times the space
+           the non-tiled version would use) max if base texture is large enough */
+        if ((numLevels == 1) ||
+          (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+              (baseImage->Width * texelBytes > 64)) ||
+           ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+           /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+              weird: X2 + OPT: 32bit correct, 16bit completely hosed
+                     X2: 32bit correct, 16bit correct
+                     OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+           t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+        }
+      }
+      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+        /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+           in the case if height is smaller than 16 (not 100% sure), as does the r200,
+           so need to disable macro tiling in that case */
+        if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+           t->tile_bits |= RADEON_TXO_MACRO_TILE;
+        }
+      }
+   }
  
     for (i = 0; i < numLevels; i++) {
        const struct gl_texture_image *texImage;
@@ -197,28 +226,41 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
              else size = texImage->CompressedSize;
        }
        else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-        size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
-                & ~63) * texImage->Height;
+        size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+      }
+      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+        /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+           though the actual offset may be different (if texture is less than
+           32 bytes width) to the untiled case */
+        int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+        size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        else {
-         int w = texImage->Width * texImage->TexFormat->TexelBytes;
-         if (w < 32)
-            w = 32;
-         size = w * texImage->Height * texImage->Depth;
+        int w = (texImage->Width * texelBytes + 31) & ~31;
+        size = w * texImage->Height * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
        }
        assert(size > 0);
  
-
        /* Align to 32-byte offset.  It is faster to do this unconditionally
         * (no branch penalty).
         */
  
        curOffset = (curOffset + 0x1f) & ~0x1f;
  
-      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-      t->image[0][i].height = size / t->image[0][i].width;
+      if (texelBytes) {
+        t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+        t->image[0][i].y = 0;
+        t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+        t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+      }
+      else {
+         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+         t->image[0][i].height = size / t->image[0][i].width;     
+      }
  
  #if 0
        /* for debugging only and only  applicable to non-rectangle targets */
@@ -263,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
     if (baseImage->IsCompressed)
        t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
     else
-      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
     t->pp_txpitch -= 32;
  
     t->dirty_state = TEX_ALL;
author	Roland Scheidegger <rscheidegger@gmx.ch>
	Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)
committer	Roland Scheidegger <rscheidegger@gmx.ch>
	Thu, 10 Feb 2005 22:36:06 +0000 (22:36 +0000)
src/mesa/drivers/dri/r200/r200_context.c		patch \| blob \| history
src/mesa/drivers/dri/r200/r200_context.h		patch \| blob \| history
src/mesa/drivers/dri/r200/r200_reg.h		patch \| blob \| history
src/mesa/drivers/dri/r200/r200_texmem.c		patch \| blob \| history
src/mesa/drivers/dri/r200/r200_texstate.c		patch \| blob \| history
src/mesa/drivers/dri/radeon/radeon_context.c		patch \| blob \| history
src/mesa/drivers/dri/radeon/radeon_context.h		patch \| blob \| history
src/mesa/drivers/dri/radeon/radeon_texmem.c		patch \| blob \| history
src/mesa/drivers/dri/radeon/radeon_texstate.c		patch \| blob \| history