intel: Add support for tiled textures.
authorEric Anholt <eric@anholt.net>
Wed, 3 Jun 2009 16:40:20 +0000 (16:40 +0000)
committerEric Anholt <eric@anholt.net>
Thu, 4 Jun 2009 14:00:43 +0000 (14:00 +0000)
This is about a 30% performance win in OA with high settings on my GM45,
and experiments with 915GM indicate that it'll be around a 20% win there.
Currently, 915-class hardware is seriously hurt by the fact that we use
fence regs to control the tiling even for 3D instructions that could live
without them, so we spend a bunch of time waiting on previous rendering in
order to pull fences off.  Thus, the texture_tiling driconf option defaults
off there for now.

13 files changed:
src/mesa/drivers/dri/i915/i915_tex_layout.c
src/mesa/drivers/dri/i965/brw_tex_layout.c
src/mesa/drivers/dri/intel/intel_context.c
src/mesa/drivers/dri/intel/intel_context.h
src/mesa/drivers/dri/intel/intel_fbo.c
src/mesa/drivers/dri/intel/intel_mipmap_tree.c
src/mesa/drivers/dri/intel/intel_mipmap_tree.h
src/mesa/drivers/dri/intel/intel_regions.c
src/mesa/drivers/dri/intel/intel_regions.h
src/mesa/drivers/dri/intel/intel_screen.c
src/mesa/drivers/dri/intel/intel_tex_copy.c
src/mesa/drivers/dri/intel/intel_tex_layout.c
src/mesa/drivers/dri/intel/intel_tex_layout.h

index 40bcf7a9aff7ee6223be291d00094fb83c735a7b..d9588e5b56da655e89d8dbca5c4b190cd68bf295 100644 (file)
@@ -112,7 +112,8 @@ static GLint bottom_offsets[6] = {
  */
 static void
 i915_miptree_layout_cube(struct intel_context *intel,
-                        struct intel_mipmap_tree * mt)
+                        struct intel_mipmap_tree * mt,
+                        uint32_t tiling)
 {
    const GLuint dim = mt->width0;
    GLuint face;
@@ -122,7 +123,7 @@ i915_miptree_layout_cube(struct intel_context *intel,
    assert(lvlWidth == lvlHeight); /* cubemap images are square */
 
    /* double pitch for cube layouts */
-   mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2);
    mt->total_height = dim * 4;
 
    for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -156,7 +157,8 @@ i915_miptree_layout_cube(struct intel_context *intel,
 
 static void
 i915_miptree_layout_3d(struct intel_context *intel,
-                      struct intel_mipmap_tree * mt)
+                      struct intel_mipmap_tree * mt,
+                      uint32_t tiling)
 {
    GLuint width = mt->width0;
    GLuint height = mt->height0;
@@ -165,7 +167,7 @@ i915_miptree_layout_3d(struct intel_context *intel,
    GLint level;
 
    /* Calculate the size of a single slice. */
-   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
 
    /* XXX: hardware expects/requires 9 levels at minimum. */
    for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
@@ -200,14 +202,15 @@ i915_miptree_layout_3d(struct intel_context *intel,
 
 static void
 i915_miptree_layout_2d(struct intel_context *intel,
-                      struct intel_mipmap_tree * mt)
+                      struct intel_mipmap_tree * mt,
+                      uint32_t tiling)
 {
    GLuint width = mt->width0;
    GLuint height = mt->height0;
    GLuint img_height;
    GLint level;
 
-   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
    mt->total_height = 0;
 
    for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -228,19 +231,20 @@ i915_miptree_layout_2d(struct intel_context *intel,
 }
 
 GLboolean
-i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+                   uint32_t tiling)
 {
    switch (mt->target) {
    case GL_TEXTURE_CUBE_MAP:
-      i915_miptree_layout_cube(intel, mt);
+      i915_miptree_layout_cube(intel, mt, tiling);
       break;
    case GL_TEXTURE_3D:
-      i915_miptree_layout_3d(intel, mt);
+      i915_miptree_layout_3d(intel, mt, tiling);
       break;
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_RECTANGLE_ARB:
-      i915_miptree_layout_2d(intel, mt);
+      i915_miptree_layout_2d(intel, mt, tiling);
       break;
    default:
       _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()");
@@ -317,7 +321,8 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
 
 static void
 i945_miptree_layout_cube(struct intel_context *intel,
-                        struct intel_mipmap_tree * mt)
+                        struct intel_mipmap_tree * mt,
+                        uint32_t tiling)
 {
    const GLuint dim = mt->width0;
    GLuint face;
@@ -331,9 +336,9 @@ i945_miptree_layout_cube(struct intel_context *intel,
     * or the final row of 4x4, 2x2 and 1x1 faces below this.
     */
    if (dim > 32)
-      mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+      mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2);
    else
-      mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8);
+      mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, 14 * 8);
 
    if (dim >= 4)
       mt->total_height = dim * 4 + 4;
@@ -408,7 +413,8 @@ i945_miptree_layout_cube(struct intel_context *intel,
 
 static void
 i945_miptree_layout_3d(struct intel_context *intel,
-                      struct intel_mipmap_tree * mt)
+                      struct intel_mipmap_tree * mt,
+                      uint32_t tiling)
 {
    GLuint width = mt->width0;
    GLuint height = mt->height0;
@@ -417,7 +423,7 @@ i945_miptree_layout_3d(struct intel_context *intel,
    GLuint pack_y_pitch;
    GLuint level;
 
-   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
    mt->total_height = 0;
 
    pack_y_pitch = MAX2(mt->height0, 2);
@@ -462,22 +468,23 @@ i945_miptree_layout_3d(struct intel_context *intel,
 }
 
 GLboolean
-i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+                   uint32_t tiling)
 {
    switch (mt->target) {
    case GL_TEXTURE_CUBE_MAP:
       if (mt->compressed)
-        i945_miptree_layout_cube(intel, mt);
+        i945_miptree_layout_cube(intel, mt, tiling);
       else
-        i915_miptree_layout_cube(intel, mt);
+        i915_miptree_layout_cube(intel, mt, tiling);
       break;
    case GL_TEXTURE_3D:
-      i945_miptree_layout_3d(intel, mt);
+      i945_miptree_layout_3d(intel, mt, tiling);
       break;
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_RECTANGLE_ARB:
-      i945_miptree_layout_2d(intel, mt);
+      i945_miptree_layout_2d(intel, mt, tiling);
       break;
    default:
       _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
index be8ce546a9ed1d60d949a50887140e7b414133d5..5c5455813a4500b4f4ac5e1e6ee4b4fc5af8e432 100644 (file)
@@ -40,7 +40,8 @@
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
 GLboolean brw_miptree_layout(struct intel_context *intel,
-                            struct intel_mipmap_tree *mt)
+                            struct intel_mipmap_tree *mt,
+                            uint32_t tiling)
 {
    /* XXX: these vary depending on image format: */
    /* GLint align_w = 4; */
@@ -64,8 +65,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
           mt->pitch = ALIGN(width, align_w);
           pack_y_pitch = (height + 3) / 4;
       } else {
-          mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
-          pack_y_pitch = ALIGN(mt->height0, align_h);
+        mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+        pack_y_pitch = ALIGN(mt->height0, align_h);
       }
 
       pack_x_pitch = mt->pitch;
@@ -122,7 +123,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
    }
 
    default:
-      i945_miptree_layout_2d(intel, mt);
+      i945_miptree_layout_2d(intel, mt, tiling);
       break;
    }
    DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
index ea43009f4c8416954089b684f5c624145a4cd961..fa931d7f625df32cc4e93bee26dbea71ea16d25f 100644 (file)
@@ -724,6 +724,8 @@ intelInitContext(struct intel_context *intel,
    else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) {
       _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
    }
+   intel->use_texture_tiling = driQueryOptionb(&intel->optionCache,
+                                              "texture_tiling");
 
    intel->prim.primitive = ~0;
 
index 810f3e62d98741b5b8337301e94f9821b12fa4cd..4e45f1a91fb5408fff5060ee881eb594f3252170 100644 (file)
@@ -305,6 +305,8 @@ struct intel_context
     */
    GLboolean is_front_buffer_rendering;
 
+   GLboolean use_texture_tiling;
+
    drm_clip_rect_t fboRect;     /**< cliprect for FBO rendering */
 
    int perf_boxes;
index 04723a2f917916721134e07d25e5cefdca72b3d7..0ea413aee1da195a3dc8caa8f838aacfe8f58fbc 100644 (file)
@@ -217,7 +217,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
          height, pitch);
 
-      irb->region = intel_region_alloc(intel, cpp, width, height, pitch,
+      irb->region = intel_region_alloc(intel, I915_TILING_NONE,
+                                      cpp, width, height, pitch,
                                       GL_TRUE);
       if (!irb->region)
          return GL_FALSE;       /* out of memory? */
index f3652720ecea76de838be1adcdeedcdddd09ce6b..0d34f28311e10d3abd959aff2bca72b756f360d2 100644 (file)
@@ -57,7 +57,8 @@ intel_miptree_create_internal(struct intel_context *intel,
                              GLuint last_level,
                              GLuint width0,
                              GLuint height0,
-                             GLuint depth0, GLuint cpp, GLuint compress_byte)
+                             GLuint depth0, GLuint cpp, GLuint compress_byte,
+                             uint32_t tiling)
 {
    GLboolean ok;
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
@@ -81,11 +82,11 @@ intel_miptree_create_internal(struct intel_context *intel,
 
 #ifdef I915
    if (IS_945(intel->intelScreen->deviceID))
-      ok = i945_miptree_layout(intel, mt);
+      ok = i945_miptree_layout(intel, mt, tiling);
    else
-      ok = i915_miptree_layout(intel, mt);
+      ok = i915_miptree_layout(intel, mt, tiling);
 #else
-   ok = brw_miptree_layout(intel, mt);
+   ok = brw_miptree_layout(intel, mt, tiling);
 #endif
 
    if (!ok) {
@@ -109,10 +110,18 @@ intel_miptree_create(struct intel_context *intel,
                     GLboolean expect_accelerated_upload)
 {
    struct intel_mipmap_tree *mt;
+   uint32_t tiling;
+
+   if (intel->use_texture_tiling && compress_byte == 0 &&
+       intel->intelScreen->kernel_exec_fencing)
+      tiling = I915_TILING_X;
+   else
+      tiling = I915_TILING_NONE;
 
    mt = intel_miptree_create_internal(intel, target, internal_format,
                                      first_level, last_level, width0,
-                                     height0, depth0, cpp, compress_byte);
+                                     height0, depth0, cpp, compress_byte,
+                                     tiling);
    /*
     * pitch == 0 || height == 0  indicates the null texture
     */
@@ -120,6 +129,7 @@ intel_miptree_create(struct intel_context *intel,
       return NULL;
 
    mt->region = intel_region_alloc(intel,
+                                  tiling,
                                   mt->cpp,
                                   mt->pitch,
                                   mt->total_height,
@@ -149,7 +159,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
    mt = intel_miptree_create_internal(intel, target, internal_format,
                                      first_level, last_level,
                                      region->width, region->height, 1,
-                                     region->cpp, compress_byte);
+                                     region->cpp, compress_byte,
+                                     I915_TILING_NONE);
    if (!mt)
       return mt;
 #if 0
@@ -187,6 +198,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
 
 int intel_miptree_pitch_align (struct intel_context *intel,
                               struct intel_mipmap_tree *mt,
+                              uint32_t tiling,
                               int pitch)
 {
 #ifdef I915
@@ -207,6 +219,11 @@ int intel_miptree_pitch_align (struct intel_context *intel,
         pitch_align = 4;
       }
 
+      if (tiling == I915_TILING_X)
+        pitch_align = 512;
+      else if (tiling == I915_TILING_Y)
+        pitch_align = 128;
+
       pitch = ALIGN(pitch * mt->cpp, pitch_align);
 
 #ifdef I915
index 4060b9df78f8857e1329c959a6d8e36958e69495..3af9966827f40edda7d06417234091627842b59d 100644 (file)
@@ -148,6 +148,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
 
 int intel_miptree_pitch_align (struct intel_context *intel,
                               struct intel_mipmap_tree *mt,
+                              uint32_t tiling,
                               int pitch);
 
 void intel_miptree_reference(struct intel_mipmap_tree **dst,
@@ -218,10 +219,13 @@ void intel_miptree_image_copy(struct intel_context *intel,
 /* i915_mipmap_tree.c:
  */
 GLboolean i915_miptree_layout(struct intel_context *intel,
-                             struct intel_mipmap_tree *mt);
+                             struct intel_mipmap_tree *mt,
+                             uint32_t tiling);
 GLboolean i945_miptree_layout(struct intel_context *intel,
-                             struct intel_mipmap_tree *mt);
+                             struct intel_mipmap_tree *mt,
+                             uint32_t tiling);
 GLboolean brw_miptree_layout(struct intel_context *intel,
-                            struct intel_mipmap_tree *mt);
+                            struct intel_mipmap_tree *mt,
+                            uint32_t tiling);
 
 #endif
index 534e75efe1f78600a6c90a49e305eb86e01ba60b..fd9bf7b17410f768165d3e24f178d133e05f9875 100644 (file)
@@ -116,7 +116,10 @@ intel_region_map(struct intel_context *intel, struct intel_region *region)
       if (region->pbo)
          intel_region_cow(intel, region);
 
-      dri_bo_map(region->buffer, GL_TRUE);
+      if (intel->intelScreen->kernel_exec_fencing)
+        drm_intel_gem_bo_map_gtt(region->buffer);
+      else
+        dri_bo_map(region->buffer, GL_TRUE);
       region->map = region->buffer->virtual;
    }
 
@@ -128,7 +131,10 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
 {
    _DBG("%s %p\n", __FUNCTION__, region);
    if (!--region->map_refcount) {
-      dri_bo_unmap(region->buffer);
+      if (intel->intelScreen->kernel_exec_fencing)
+        drm_intel_gem_bo_map_gtt(region->buffer);
+      else
+        dri_bo_unmap(region->buffer);
       region->map = NULL;
    }
 }
@@ -164,10 +170,12 @@ intel_region_alloc_internal(struct intel_context *intel,
 
 struct intel_region *
 intel_region_alloc(struct intel_context *intel,
+                  uint32_t tiling,
                    GLuint cpp, GLuint width, GLuint height, GLuint pitch,
                   GLboolean expect_accelerated_upload)
 {
    dri_bo *buffer;
+   struct intel_region *region;
 
    if (expect_accelerated_upload) {
       buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
@@ -177,7 +185,16 @@ intel_region_alloc(struct intel_context *intel,
                                  pitch * cpp * height, 64);
    }
 
-   return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer);
+   region = intel_region_alloc_internal(intel, cpp, width, height,
+                                       pitch, buffer);
+
+   if (tiling != I915_TILING_NONE) {
+      assert(((pitch * cpp) & 511) == 0);
+      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
+      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
+   }
+
+   return region;
 }
 
 struct intel_region *
index 45e2bf4e77ab3626b7c1493380b1561f428823a6..bd3c8e7325bb9e4628a90a0fec5920021a4ed350 100644 (file)
@@ -73,7 +73,8 @@ struct intel_region
  * copied by calling intel_reference_region().
  */
 struct intel_region *intel_region_alloc(struct intel_context *intel,
-                                        GLuint cpp, GLuint width,
+                                        uint32_t tiling,
+                                       GLuint cpp, GLuint width,
                                         GLuint height, GLuint pitch,
                                        GLboolean expect_accelerated_upload);
 
index 27288231426fbccc438375cc34990e6cb88af7c6..6521b4ef313f611afcc7bc53cd80a67e532db54a 100644 (file)
 #include "i915_drm.h"
 #include "i830_dri.h"
 
+#define DRI_CONF_TEXTURE_TILING(def) \
+       DRI_CONF_OPT_BEGIN(texture_tiling, bool, def)           \
+               DRI_CONF_DESC(en, "Enable texture tiling")      \
+       DRI_CONF_OPT_END                                        \
 
 PUBLIC const char __driConfigOptions[] =
    DRI_CONF_BEGIN
@@ -64,6 +68,13 @@ PUBLIC const char __driConfigOptions[] =
            DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
         DRI_CONF_DESC_END
       DRI_CONF_OPT_END
+
+#ifdef I915
+     DRI_CONF_TEXTURE_TILING(false)
+#else
+     DRI_CONF_TEXTURE_TILING(true)
+#endif
+
    DRI_CONF_SECTION_END
    DRI_CONF_SECTION_QUALITY
       DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -76,7 +87,7 @@ PUBLIC const char __driConfigOptions[] =
    DRI_CONF_SECTION_END
 DRI_CONF_END;
 
-const GLuint __driNConfigOptions = 8;
+const GLuint __driNConfigOptions = 9;
 
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
index a25626ae28adb02b42a56bc5974c8a5f08ec1cbe..673b8fa6a11241a7869916fad999bc1d5b15d427 100644 (file)
@@ -118,8 +118,12 @@ do_copy_texsubimage(struct intel_context *intel,
       dstx += x - orig_x;
       dsty += y - orig_y;
 
-      /* image_offset may be non-page-aligned, but that's illegal for tiling. */
-      assert(intelImage->mt->region->tiling == I915_TILING_NONE);
+      /* Can't blit to tiled buffers with non-tile-aligned offset. */
+      if (intelImage->mt->region->tiling != I915_TILING_NONE &&
+         (image_offset & 4095) != 0) {
+        UNLOCK_HARDWARE(intel);
+        return GL_FALSE;
+      }
 
       if (ctx->ReadBuffer->Name == 0) {
         /* reading from a window, adjust x, y */
index e6f9a417790fa2117e2997e87b130cf65e667e5e..b8be7ef41a2ffe4a974afaf7ec71a9465320ead6 100644 (file)
@@ -52,7 +52,9 @@ GLuint intel_compressed_alignment(GLenum internalFormat)
     return alignment;
 }
 
-void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
+void i945_miptree_layout_2d( struct intel_context *intel,
+                            struct intel_mipmap_tree *mt,
+                            uint32_t tiling )
 {
    GLint align_h = 2, align_w = 4;
    GLuint level;
@@ -86,13 +88,18 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
 
        if (mip1_width > mt->pitch) {
            mt->pitch = mip1_width;
+
+          if (tiling == I915_TILING_X)
+             mt->pitch = ALIGN(mt->pitch * mt->cpp, 512) / mt->cpp;
+          if (tiling == I915_TILING_Y)
+             mt->pitch = ALIGN(mt->pitch * mt->cpp, 128) / mt->cpp;
        }
    }
 
    /* Pitch must be a whole number of dwords, even though we
     * express it in texels.
     */
-   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
    mt->total_height = 0;
 
    for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
index dbc90e6f9b74edc3b66a43b072193b47b7cdbe8b..7bc25b6bcb195ff9cff2544651ebfa3af3378b20 100644 (file)
@@ -38,5 +38,7 @@ static GLuint minify( GLuint d )
    return MAX2(1, d>>1);
 }
 
-extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
+extern void i945_miptree_layout_2d(struct intel_context *intel,
+                                  struct intel_mipmap_tree *mt,
+                                  uint32_t tiling);
 extern GLuint intel_compressed_alignment(GLenum);