i965: Avoid unnecessary copy when depthstencil workaround invoked by clear.
authorPaul Berry <stereotype441@gmail.com>
Fri, 8 Mar 2013 21:39:43 +0000 (13:39 -0800)
committerPaul Berry <stereotype441@gmail.com>
Tue, 19 Mar 2013 23:56:51 +0000 (16:56 -0700)
Since apps typically begin rendering with a call to glClear(), it is
likely that when brw_workaround_depthstencil_alignment() moves a
miplevel to a temporary buffer, it can avoid doing a blit, since the
contents of the miplevel are about to be erased.

This patch adds the necessary plumbing to determine when
brw_workaround_depthstencil_alignment() is being called as a
consequence of glClear(), and avoids the unnecessary blit when it is
safe to do so.

Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
v2: Eliminate unnecessary call to _mesa_is_depthstencil_format().  Fix
handling of depth buffer in depth/stencil format.

v3: Use correct bitfields for clear_mask.  Fix handling of depth
buffer in depth/stencil format when hardware uses separate stencil.
When invalidating, make sure we still reassociate the image to the new
miptree.

Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_clear.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/intel/intel_fbo.c
src/mesa/drivers/dri/intel/intel_fbo.h
src/mesa/drivers/dri/intel/intel_mipmap_tree.c
src/mesa/drivers/dri/intel/intel_mipmap_tree.h
src/mesa/drivers/dri/intel/intel_tex_validate.c

index cde1a06193bdc434514564efce3188d962046d5d..e740f655f16f629752cce5b1bc3e2eaf6abaed1f 100644 (file)
@@ -223,6 +223,8 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = &brw->intel;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   bool partial_clear = ctx->Scissor.Enabled && !noop_scissor(ctx, fb);
 
    if (!_mesa_check_conditional_render(ctx))
       return;
@@ -232,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    }
 
    intel_prepare_render(intel);
-   brw_workaround_depthstencil_alignment(brw);
+   brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
 
    if (mask & BUFFER_BIT_DEPTH) {
       if (brw_fast_clear_depth(ctx)) {
index 0f1d45196382e0d55cb91e4a52f66e4b768ffa03..9f1aaf5beb95936f794414637a062031037aeb3c 100644 (file)
@@ -1138,7 +1138,8 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
                                      struct intel_mipmap_tree *stencil_mt,
                                      uint32_t *out_tile_mask_x,
                                      uint32_t *out_tile_mask_y);
-void brw_workaround_depthstencil_alignment(struct brw_context *brw);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                           GLbitfield clear_mask);
 
 /*======================================================================
  * brw_queryobj.c
index e40818597057fc7fdcb420571c2d97163161e075..809bcc5f432403e86d5ec2d39369bd16a68cb95c 100644 (file)
@@ -439,7 +439,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
    /* This workaround has to happen outside of brw_upload_state() because it
     * may flush the batchbuffer for a blit, affecting the state flags.
     */
-   brw_workaround_depthstencil_alignment(brw);
+   brw_workaround_depthstencil_alignment(brw, 0);
 
    /* Resolves must occur after updating renderbuffers, updating context state,
     * and finalizing textures but before setting up any hardware state for
index 1024c4247fdc24f5d0dd247e88e22a7792a7b103..d6bd86c3a6c4c7a883874a8cdb8d559445c1ab51 100644 (file)
@@ -41,6 +41,7 @@
 #include "brw_defines.h"
 
 #include "main/fbobject.h"
+#include "main/glformats.h"
 
 /* Constant single cliprect for framebuffer object or DRI2 drawing */
 static void upload_drawing_rect(struct brw_context *brw)
@@ -328,7 +329,8 @@ get_stencil_miptree(struct intel_renderbuffer *irb)
 }
 
 void
-brw_workaround_depthstencil_alignment(struct brw_context *brw)
+brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                      GLbitfield clear_mask)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
@@ -341,10 +343,29 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
    struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
    uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
    uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
+   bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
+   bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
 
    if (depth_irb)
       depth_mt = depth_irb->mt;
 
+   /* Check if depth buffer is in depth/stencil format.  If so, then it's only
+    * safe to invalidate it if we're also clearing stencil, and both depth_irb
+    * and stencil_irb point to the same miptree.
+    *
+    * Note: it's not sufficient to check for the case where
+    * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
+    * because this fails to catch depth/stencil buffers on hardware that uses
+    * separate stencil.  To catch that case, we check whether
+    * depth_mt->stencil_mt is non-NULL.
+    */
+   if (depth_irb && invalidate_depth &&
+       (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
+        depth_mt->stencil_mt)) {
+      invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
+         && depth_irb->mt == stencil_irb->mt;
+   }
+
    uint32_t tile_mask_x, tile_mask_y;
    brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
                                    &tile_mask_x, &tile_mask_y);
@@ -373,8 +394,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
          perf_debug("HW workaround: blitting depth level %d to a temporary "
                     "to fix alignment (depth tile offset %d,%d)\n",
                     depth_irb->mt_level, tile_x, tile_y);
-
-         intel_renderbuffer_move_to_temp(intel, depth_irb);
+         intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth);
          /* In the case of stencil_irb being the same packed depth/stencil
           * texture but not the same rb, make it point at our rebased mt, too.
           */
@@ -435,7 +455,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
                  "to fix alignment (stencil tile offset %d,%d)\n",
                  stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
 
-      intel_renderbuffer_move_to_temp(intel, stencil_irb);
+      intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil);
       stencil_mt = get_stencil_miptree(stencil_irb);
 
       intel_miptree_get_image_offset(stencil_mt,
@@ -459,7 +479,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
                        tile_x, tile_y,
                        stencil_tile_x, stencil_tile_y);
 
-            intel_renderbuffer_move_to_temp(intel, depth_irb);
+            intel_renderbuffer_move_to_temp(intel, depth_irb,
+                                            invalidate_depth);
 
             tile_x = depth_irb->draw_x & tile_mask_x;
             tile_y = depth_irb->draw_y & tile_mask_y;
index 0cb4b2d8f962c1d0dae606d9b790e50ee07b9494..932e4728f27fffd5db3193566cd43a9b4f4b23e6 100644 (file)
@@ -1224,7 +1224,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
          * select the image.  So, instead, we just make a new single-level
          * miptree and render into that.
          */
-        intel_renderbuffer_move_to_temp(intel, irb);
+        intel_renderbuffer_move_to_temp(intel, irb, false);
         mt = irb->mt;
       }
    }
index 7186978b4a0c6eca08c46f58e75ca9fba276628d..b91d6e0b8d53806a6b09c4983d59987e4311d0af 100644 (file)
@@ -991,7 +991,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
 
 void
 intel_renderbuffer_move_to_temp(struct intel_context *intel,
-                                struct intel_renderbuffer *irb)
+                                struct intel_renderbuffer *irb,
+                                bool invalidate)
 {
    struct intel_texture_image *intel_image =
       intel_texture_image(irb->tex_image);
@@ -1009,7 +1010,8 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
                                  irb->mt->num_samples,
                                  false /* force_y_tiling */);
 
-   intel_miptree_copy_teximage(intel, intel_image, new_mt);
+   intel_miptree_copy_teximage(intel, intel_image, new_mt, invalidate);
+
    intel_miptree_reference(&irb->mt, intel_image->mt);
    intel_renderbuffer_set_draw_offset(irb);
    intel_miptree_release(&new_mt);
index ce744bf13843dd98e319c613c4cb47b6b40d78d4..9313c3506ebe9c8ad766eeea20ccacd63bd5786f 100644 (file)
@@ -198,7 +198,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
                                 struct intel_renderbuffer *irb);
 
 void intel_renderbuffer_move_to_temp(struct intel_context *intel,
-                                     struct intel_renderbuffer *irb);
+                                     struct intel_renderbuffer *irb,
+                                     bool invalidate);
 
 unsigned
 intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
index a47f6d8367dba5dfe85b8cb3b5f619402e02899f..66cadebc90555c1aacca25364dd318018da201a1 100644 (file)
@@ -869,11 +869,16 @@ intel_miptree_copy_slice(struct intel_context *intel,
 /**
  * Copies the image's current data to the given miptree, and associates that
  * miptree with the image.
+ *
+ * If \c invalidate is true, then the actual image data does not need to be
+ * copied, but the image still needs to be associated to the new miptree (this
+ * is set to true if we're about to clear the image).
  */
 void
 intel_miptree_copy_teximage(struct intel_context *intel,
                            struct intel_texture_image *intelImage,
-                           struct intel_mipmap_tree *dst_mt)
+                           struct intel_mipmap_tree *dst_mt,
+                            bool invalidate)
 {
    struct intel_mipmap_tree *src_mt = intelImage->mt;
    struct intel_texture_object *intel_obj =
@@ -882,8 +887,10 @@ intel_miptree_copy_teximage(struct intel_context *intel,
    int face = intelImage->base.Base.Face;
    GLuint depth = intelImage->base.Base.Depth;
 
-   for (int slice = 0; slice < depth; slice++) {
-      intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice);
+   if (!invalidate) {
+      for (int slice = 0; slice < depth; slice++) {
+         intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice);
+      }
    }
 
    intel_miptree_reference(&intelImage->mt, dst_mt);
index 2de4bc2033af7ca9bd805016de00274c6e4e11c3..3bdda07edceae4b873c0f97462d60a89215b93eb 100644 (file)
@@ -482,7 +482,7 @@ void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
 void
 intel_miptree_copy_teximage(struct intel_context *intel,
                             struct intel_texture_image *intelImage,
-                            struct intel_mipmap_tree *dst_mt);
+                            struct intel_mipmap_tree *dst_mt, bool invalidate);
 
 /**
  * Copy the stencil data from \c mt->stencil_mt->region to \c mt->region for
index 654001d055dc6c5cc975aef3fcf4a503c57da6d9..c880bcee1027b03ea4b9113dd47967d80b05d641 100644 (file)
@@ -123,7 +123,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
                 break;
 
          if (intelObj->mt != intelImage->mt) {
-            intel_miptree_copy_teximage(intel, intelImage, intelObj->mt);
+            intel_miptree_copy_teximage(intel, intelImage, intelObj->mt,
+                                        false /* invalidate */);
          }
 
          /* After we're done, we'd better agree that our layout is