i965: Use force_compat_profile driconf option
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_depth_state.c
index c961c8d7aedf2ca0f665d97172d5a5b0eab795ea..1ea5884f8182be2b16bc3bbb239e3484e672360a 100644 (file)
 #include "brw_wm.h"
 #include "main/framebuffer.h"
 
-/**
- * Helper function to emit depth related command packets.
- */
-static void
-emit_depth_packets(struct brw_context *brw,
-                   struct intel_mipmap_tree *depth_mt,
-                   uint32_t depthbuffer_format,
-                   uint32_t depth_surface_type,
-                   bool depth_writable,
-                   struct intel_mipmap_tree *stencil_mt,
-                   bool stencil_writable,
-                   bool hiz,
-                   uint32_t width,
-                   uint32_t height,
-                   uint32_t depth,
-                   uint32_t lod,
-                   uint32_t min_array_element)
-{
-   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
-
-   /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
-   if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) {
-      assert(brw->hw_ctx);
-      return;
-   }
-
-   brw_emit_depth_stall_flushes(brw);
-
-   /* _NEW_BUFFERS, _NEW_DEPTH, _NEW_STENCIL */
-   BEGIN_BATCH(8);
-   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2));
-   OUT_BATCH(depth_surface_type << 29 |
-             (depth_writable ? (1 << 28) : 0) |
-             (stencil_mt != NULL && stencil_writable) << 27 |
-             (hiz ? 1 : 0) << 22 |
-             depthbuffer_format << 18 |
-             (depth_mt ? depth_mt->pitch - 1 : 0));
-   if (depth_mt) {
-      OUT_RELOC64(depth_mt->bo,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-   } else {
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-   }
-   OUT_BATCH(((width - 1) << 4) | ((height - 1) << 18) | lod);
-   OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10) | mocs_wb);
-   OUT_BATCH(0);
-   OUT_BATCH(((depth - 1) << 21) | (depth_mt ? depth_mt->qpitch >> 2 : 0));
-   ADVANCE_BATCH();
-
-   if (!hiz) {
-      BEGIN_BATCH(5);
-      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      assert(depth_mt);
-      BEGIN_BATCH(5);
-      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2));
-      OUT_BATCH((depth_mt->hiz_buf->pitch - 1) | mocs_wb << 25);
-      OUT_RELOC64(depth_mt->hiz_buf->bo,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-      OUT_BATCH(depth_mt->hiz_buf->qpitch >> 2);
-      ADVANCE_BATCH();
-   }
-
-   if (stencil_mt == NULL) {
-      BEGIN_BATCH(5);
-      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(5);
-      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
-      /* The stencil buffer has quirky pitch requirements.  From the Graphics
-       * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
-       * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
-       * field "Surface Pitch":
-       *
-       *    The pitch must be set to 2x the value computed based on width, as
-       *    the stencil buffer is stored with two rows interleaved.
-       *
-       * (Note that it is not 100% clear whether this intended to apply to
-       * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
-       * imply that it doesn't), however the comment appears on a "DevIVB+"
-       * page (which would imply that it does).  Experiments with the hardware
-       * indicate that it does.
-       */
-      OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 |
-                (2 * stencil_mt->pitch - 1));
-      OUT_RELOC64(stencil_mt->bo,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-      OUT_BATCH(stencil_mt ? stencil_mt->qpitch >> 2 : 0);
-      ADVANCE_BATCH();
-   }
-
-   BEGIN_BATCH(3);
-   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
-   OUT_BATCH(depth_mt ? depth_mt->fast_clear_color.u32[0] : 0);
-   OUT_BATCH(1);
-   ADVANCE_BATCH();
-
-   brw->no_depth_or_stencil = !depth_mt && !stencil_mt;
-}
-
-/* Awful vtable-compatible function; should be cleaned up in the future. */
-void
-gen8_emit_depth_stencil_hiz(struct brw_context *brw,
-                            struct intel_mipmap_tree *depth_mt,
-                            uint32_t depth_offset,
-                            uint32_t depthbuffer_format,
-                            uint32_t depth_surface_type,
-                            struct intel_mipmap_tree *stencil_mt,
-                            bool hiz, bool separate_stencil,
-                            uint32_t width, uint32_t height,
-                            uint32_t tile_x, uint32_t tile_y)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   uint32_t surftype;
-   unsigned int depth = 1;
-   unsigned int min_array_element;
-   GLenum gl_target = GL_TEXTURE_2D;
-   unsigned int lod;
-   const struct intel_mipmap_tree *mt = depth_mt ? depth_mt : stencil_mt;
-   const struct intel_renderbuffer *irb = NULL;
-   const struct gl_renderbuffer *rb = NULL;
-
-   irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
-   if (!irb)
-      irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-   rb = (struct gl_renderbuffer *) irb;
-
-   if (rb) {
-      depth = MAX2(irb->layer_count, 1);
-      if (rb->TexImage)
-         gl_target = rb->TexImage->TexObject->Target;
-   }
-
-   switch (gl_target) {
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-   case GL_TEXTURE_CUBE_MAP:
-      /* The PRM claims that we should use BRW_SURFACE_CUBE for this
-       * situation, but experiments show that gl_Layer doesn't work when we do
-       * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
-       * equivalent.
-       */
-      surftype = BRW_SURFACE_2D;
-      depth *= 6;
-      break;
-   case GL_TEXTURE_3D:
-      assert(mt);
-      depth = MAX2(mt->logical_depth0, 1);
-      surftype = translate_tex_target(gl_target);
-      break;
-   case GL_TEXTURE_1D_ARRAY:
-   case GL_TEXTURE_1D:
-      if (brw->gen >= 9) {
-         /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth
-          * textures but it does allow pretending it's a 2D texture
-          * instead.
-          */
-         surftype = BRW_SURFACE_2D;
-         break;
-      }
-      /* fallthrough */
-   default:
-      surftype = translate_tex_target(gl_target);
-      break;
-   }
-
-   min_array_element = irb ? irb->mt_layer : 0;
-
-   lod = irb ? irb->mt_level - irb->mt->first_level : 0;
-
-   if (mt) {
-      width = mt->logical_width0;
-      height = mt->logical_height0;
-   }
-
-   emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype,
-                      brw_depth_writes_enabled(brw),
-                      stencil_mt, ctx->Stencil._WriteEnabled,
-                      hiz, width, height, depth, lod, min_array_element);
-}
-
 /**
  * Should we set the PMA FIX ENABLE bit?
  *
@@ -287,7 +95,7 @@ pma_fix_enable(const struct brw_context *brw)
     * !3DSTATE_DEPTH_BUFFER::Stencil Buffer Enable ||
     * !3DSTATE_STENCIL_BUFFER::Stencil Buffer Enable
     */
-   const bool stencil_writes_enabled = ctx->Stencil._WriteEnabled;
+   const bool stencil_writes_enabled = brw->stencil_write_enabled;
 
    /* 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF */
    const bool ps_computes_depth =
@@ -324,8 +132,6 @@ pma_fix_enable(const struct brw_context *brw)
 void
 gen8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits)
 {
-   struct gl_context *ctx = &brw->ctx;
-
    /* If we haven't actually changed the value, bail now to avoid unnecessary
     * pipeline stalls and register writes.
     */
@@ -340,18 +146,16 @@ gen8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits)
     * Flush is also necessary.
     */
    const uint32_t render_cache_flush =
-      ctx->Stencil._WriteEnabled ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0;
+      brw->stencil_write_enabled ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0;
    brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_CS_STALL |
                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                render_cache_flush);
 
    /* CACHE_MODE_1 is a non-privileged register. */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(GEN7_CACHE_MODE_1);
-   OUT_BATCH(GEN8_HIZ_PMA_MASK_BITS | pma_stall_bits);
-   ADVANCE_BATCH();
+   brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
+                           GEN8_HIZ_PMA_MASK_BITS |
+                           pma_stall_bits );
 
    /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
     * Flush bits is often necessary.  We do it regardless because it's easier.
@@ -367,9 +171,10 @@ gen8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits)
 static void
 gen8_emit_pma_stall_workaround(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    uint32_t bits = 0;
 
-   if (brw->gen >= 9)
+   if (devinfo->gen >= 9)
       return;
 
    if (pma_fix_enable(brw))