i965: fixup W-tile offset computation to take swizzling into account
authorDaniel Vetter <daniel.vetter@ffwll.ch>
Fri, 2 Mar 2012 20:38:44 +0000 (21:38 +0100)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 5 Mar 2012 20:02:47 +0000 (12:02 -0800)
There's even a comment in the code containing the right swizzling
computations!

Previously this has not been noticed because we need to manually
enabled swizzling on snb/ivb (kernel 3.4 will do that) and we
don't use the separate stencil on ilk (where the bios enables
swizzling). This fixes

piglit ./bin/fbo-stencil  readpixels GL_DEPTH32F_STENCIL8 -auto

on recent drm-intel-next kernels.

Also remove the comment about ivb, it's stale now.

Swizzling detection is done by allocating a temporary x-tiled
buffer object. Unfortunately kernels before v3.2 lie on snb/ivb
because they claim that swizzling is enable, but it isn't. The
kernel commit that fixes this for backport to pre-v3.2 is

commit acc83eb5a1e0ae7dbbf89ca2a1a943ade224bb84
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Mon Sep 12 20:49:16 2011 +0200

    drm/i915: fix swizzling on gen6+

But if the kernel doesn't lie, this now works on swizzling and
not swizzling machines.

NOTE: This is a candidate for the 8.0 branch.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/intel/intel_context.c
src/mesa/drivers/dri/intel/intel_context.h
src/mesa/drivers/dri/intel/intel_mipmap_tree.c
src/mesa/drivers/dri/intel/intel_screen.c
src/mesa/drivers/dri/intel/intel_screen.h
src/mesa/drivers/dri/intel/intel_span.c
src/mesa/drivers/dri/intel/intel_span.h

index c876694ceadfaa4e1c832c386943562a5cd6d38c..7b2bdadc9deaf683b9e807d05c91ca42cc4a2db0 100644 (file)
@@ -629,6 +629,7 @@ intelInitContext(struct intel_context *intel,
    intel->must_use_separate_stencil = intel->intelScreen->hw_must_use_separate_stencil;
    intel->has_hiz = intel->intelScreen->hw_has_hiz;
    intel->has_llc = intel->intelScreen->hw_has_llc;
+   intel->has_swizzling = intel->intelScreen->hw_has_swizzling;
 
    memset(&ctx->TextureFormatSupported,
          0, sizeof(ctx->TextureFormatSupported));
index 150e55f9797a02f0fafa5b462b1495eb07fad138..ef024b10e96dc6739b2f1e76674a8107b410c4ec 100644 (file)
@@ -214,6 +214,7 @@ struct intel_context
    bool must_use_separate_stencil;
    bool has_hiz;
    bool has_llc;
+   bool has_swizzling;
 
    int urb_size;
 
index 5290da4d6f337924ea4b46ce2e8f86b1a5962bb6..3601f5e8daf8ce22bb24c7f326ea7bd4e1da8b71 100644 (file)
@@ -830,7 +830,8 @@ intel_miptree_map_s8(struct intel_context *intel,
         for (uint32_t x = 0; x < map->w; x++) {
            ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
                                               x + image_x + map->x,
-                                              y + image_y + map->y);
+                                              y + image_y + map->y,
+                                              intel->has_swizzling);
            untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
         }
       }
@@ -865,7 +866,8 @@ intel_miptree_unmap_s8(struct intel_context *intel,
         for (uint32_t x = 0; x < map->w; x++) {
            ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
                                               x + map->x,
-                                              y + map->y);
+                                              y + map->y,
+                                              intel->has_swizzling);
            tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
         }
       }
@@ -925,7 +927,8 @@ intel_miptree_map_depthstencil(struct intel_context *intel,
            int map_x = map->x + x, map_y = map->y + y;
            ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
                                                 map_x + s_image_x,
-                                                map_y + s_image_y);
+                                                map_y + s_image_y,
+                                                intel->has_swizzling);
            ptrdiff_t z_offset = ((map_y + z_image_y) * z_mt->region->pitch +
                                  (map_x + z_image_x));
            uint8_t s = s_map[s_offset];
@@ -983,7 +986,8 @@ intel_miptree_unmap_depthstencil(struct intel_context *intel,
         for (uint32_t x = 0; x < map->w; x++) {
            ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
                                                 x + s_image_x + map->x,
-                                                y + s_image_y + map->y);
+                                                y + s_image_y + map->y,
+                                                intel->has_swizzling);
            ptrdiff_t z_offset = ((y + z_image_y) * z_mt->region->pitch +
                                  (x + z_image_x));
 
index ab15740026af4d668929ace02b21ac888be18689..48762d00a3ad3543fb591c4b4fc5761fbd8ce05f 100644 (file)
@@ -667,6 +667,30 @@ intel_override_separate_stencil(struct intel_screen *screen)
    }
 }
 
+static bool
+intel_detect_swizzling(struct intel_screen *screen)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   uint32_t tiling = I915_TILING_X;
+   uint32_t swizzle_mode = 0;
+
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
+                                    64, 64, 4,
+                                    &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return false;
+
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   drm_intel_bo_unreference(buffer);
+
+   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
+      return false;
+   else
+      return true;
+}
+
 /**
  * This is the driver specific part of the createNewScreen entry point.
  * Called when using DRI2.
@@ -765,6 +789,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
    if (!intel_init_bufmgr(intelScreen))
        return false;
 
+   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+
    psp->extensions = intelScreenExtensions;
 
    msaa_samples_array[0] = 0;
index a6baf1601b55c5a25b25a40632836003c32a4164..1998f7eb675eaeb323cb95123b765151e5d08d78 100644 (file)
@@ -117,6 +117,7 @@ struct intel_screen
    bool kernel_has_gen7_sol_reset;
 
    bool hw_has_llc;
+   bool hw_has_swizzling;
 
    bool no_vbo;
    dri_bufmgr *bufmgr;
index 3645720c060b9780fee5cdfb10c6130af3d2e9b7..b0dc72023f30d6094b2c279f0c0ec3b3f3dd0e87 100644 (file)
@@ -65,7 +65,7 @@
  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
  */
 intptr_t
-intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
 {
    uint32_t tile_size = 4096;
    uint32_t tile_width = 64;
@@ -90,22 +90,16 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
                +   2 * (byte_y % 2)
                +   1 * (byte_x % 2);
 
-   /*
-    * Errata for Gen5:
-    *
-    * An additional offset is needed which is not documented in the PRM.
-    *
-    * if ((byte_x / 8) % 2 == 1) {
-    *    if ((byte_y / 8) % 2) == 0) {
-    *       u += 64;
-    *    } else {
-    *       u -= 64;
-    *    }
-    * }
-    *
-    * The offset is expressed more tersely as
-    * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
-    */
+   if (swizzled) {
+      /* adjust for bit6 swizzling */
+      if (((byte_x / 8) % 2) == 1) {
+        if (((byte_y / 8) % 2) == 0) {
+           u += 64;
+        } else {
+           u -= 64;
+        }
+      }
+   }
 
    return u;
 }
index b2bd416412f62fb492ca2ed32e630e44ba5d8ba7..e5218691c6558777bddbd67860d757c01a8c0430 100644 (file)
@@ -38,6 +38,6 @@ extern void intelSpanRenderStart(struct gl_context * ctx);
 
 void intel_map_vertex_shader_textures(struct gl_context *ctx);
 void intel_unmap_vertex_shader_textures(struct gl_context *ctx);
-intptr_t intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y);
+intptr_t intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled);
 
 #endif