i965/fs: Handle instruction predication in SIMD lowering pass.
[mesa.git] / src / mesa / drivers / dri / i965 / gen6_depth_state.c
index 9e0357746b216947b7041189b84ff0eb5a5c3e68..1a29860580dfb67fd21f377b1c1416e32e40ef5a 100644 (file)
@@ -50,6 +50,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
    unsigned int depth = 1;
    GLenum gl_target = GL_TEXTURE_2D;
    unsigned int lod;
+   const struct intel_mipmap_tree *mt = depth_mt ? depth_mt : stencil_mt;
    const struct intel_renderbuffer *irb = NULL;
    const struct gl_renderbuffer *rb = NULL;
 
@@ -64,12 +65,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
     */
    bool enable_hiz_ss = hiz || separate_stencil;
 
-
-   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
-    * non-pipelined state that will need the PIPE_CONTROL workaround.
-    */
-   intel_emit_post_sync_nonzero_flush(brw);
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    if (!irb)
@@ -77,7 +73,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
    rb = (struct gl_renderbuffer*) irb;
 
    if (rb) {
-      depth = MAX2(rb->Depth, 1);
+      depth = MAX2(irb->layer_count, 1);
       if (rb->TexImage)
          gl_target = rb->TexImage->TexObject->Target;
    }
@@ -93,15 +89,29 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
       surftype = BRW_SURFACE_2D;
       depth *= 6;
       break;
+   case GL_TEXTURE_3D:
+      assert(mt);
+      depth = MAX2(mt->logical_depth0, 1);
+      /* fallthrough */
    default:
       surftype = translate_tex_target(gl_target);
       break;
    }
 
+   const unsigned min_array_element = irb ? irb->mt_layer : 0;
+
    lod = irb ? irb->mt_level - irb->mt->first_level : 0;
 
+   if (mt) {
+      width = mt->logical_width0;
+      height = mt->logical_height0;
+   }
+
    BEGIN_BATCH(7);
+   /* 3DSTATE_DEPTH_BUFFER dw0 */
    OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+
+   /* 3DSTATE_DEPTH_BUFFER dw1 */
    OUT_BATCH((depth_mt ? depth_mt->pitch - 1 : 0) |
              (depthbuffer_format << 18) |
              ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
@@ -109,22 +119,32 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
              (BRW_TILEWALK_YMAJOR << 26) |
              ((depth_mt ? depth_mt->tiling != I915_TILING_NONE : 1)
               << 27) |
-             (depth_surface_type << 29));
+             (surftype << 29));
 
+   /* 3DSTATE_DEPTH_BUFFER dw2 */
    if (depth_mt) {
       OUT_RELOC(depth_mt->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-               depth_offset);
+               0);
    } else {
       OUT_BATCH(0);
    }
 
-   OUT_BATCH(((width + tile_x - 1) << 6) |
-             ((height + tile_y - 1) << 19));
-   OUT_BATCH(0);
+   /* 3DSTATE_DEPTH_BUFFER dw3 */
+   OUT_BATCH(((width - 1) << 6) |
+             ((height - 1) << 19) |
+             lod << 2);
+
+   /* 3DSTATE_DEPTH_BUFFER dw4 */
+   OUT_BATCH((depth - 1) << 21 |
+             min_array_element << 10 |
+             (depth - 1) << 1);
 
-   OUT_BATCH(tile_x | (tile_y << 16));
+   /* 3DSTATE_DEPTH_BUFFER dw5 */
+   OUT_BATCH(0);
+   assert(tile_x == 0 && tile_y == 0);
 
+   /* 3DSTATE_DEPTH_BUFFER dw6 */
    OUT_BATCH(0);
 
    ADVANCE_BATCH();
@@ -140,13 +160,24 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 
       /* Emit hiz buffer. */
       if (hiz) {
-         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+         assert(depth_mt);
+         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
+         uint32_t offset = 0;
+
+         if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+            offset = intel_miptree_get_aligned_offset(
+                        hiz_mt,
+                        hiz_mt->level[lod].level_x,
+                        hiz_mt->level[lod].level_y,
+                        false);
+         }
+
         BEGIN_BATCH(3);
         OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
         OUT_BATCH(hiz_mt->pitch - 1);
         OUT_RELOC(hiz_mt->bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                  brw->depthstencil.hiz_offset);
+                  offset);
         ADVANCE_BATCH();
       } else {
         BEGIN_BATCH(3);
@@ -158,6 +189,26 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 
       /* Emit stencil buffer. */
       if (separate_stencil) {
+         uint32_t offset = 0;
+
+         if (stencil_mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+            if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
+               /* Note: we can't compute the stencil offset using
+                * intel_region_get_aligned_offset(), because stencil_region
+                * claims that the region is untiled even though it's W tiled.
+                */
+               offset =
+                  stencil_mt->level[lod].level_y * stencil_mt->pitch +
+                  stencil_mt->level[lod].level_x * 64;
+            } else {
+               offset = intel_miptree_get_aligned_offset(
+                           stencil_mt,
+                           stencil_mt->level[lod].level_x,
+                           stencil_mt->level[lod].level_y,
+                           false);
+            }
+         }
+
         BEGIN_BATCH(3);
         OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
          /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -168,7 +219,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
         OUT_BATCH(2 * stencil_mt->pitch - 1);
         OUT_RELOC(stencil_mt->bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                  brw->depthstencil.stencil_offset);
+                  offset);
         ADVANCE_BATCH();
       } else {
         BEGIN_BATCH(3);
@@ -187,8 +238,6 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
     *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
     */
-   intel_emit_post_sync_nonzero_flush(brw);
-
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
              GEN5_DEPTH_CLEAR_VALID |