i965: Refactor SIMD16-to-2xSIMD8 checks.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index 3c908c8314a1c35cc08cca8f4925219148bc3667..bc810769793f8c816aaea41eaa6fb883abc6bb2b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -1,6 +1,6 @@
  /*
   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ Intel funded Tungsten Graphics to
   develop this 3D driver.
  
   Permission is hereby granted, free of charge, to any person obtaining
@@ -26,7 +26,7 @@
   **********************************************************************/
   /*
    * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Keith Whitwell <keithw@vmware.com>
    */
  
  
@@ -34,7 +34,6 @@
  #include "intel_batchbuffer.h"
  #include "intel_fbo.h"
  #include "intel_mipmap_tree.h"
-#include "intel_regions.h"
  
  #include "brw_context.h"
  #include "brw_state.h"
@@ -48,10 +47,6 @@ static void upload_drawing_rect(struct brw_context *brw)
  {
     struct gl_context *ctx = &brw->ctx;
  
-   /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(4);
     OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
     OUT_BATCH(0); /* xmin, ymin */
@@ -65,7 +60,6 @@ const struct brw_tracked_state brw_drawing_rect = {
     .dirty = {
        .mesa = _NEW_BUFFERS,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_drawing_rect
  };
@@ -117,16 +111,11 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
  const struct brw_tracked_state brw_psp_urb_cbs = {
     .dirty = {
        .mesa = 0,
-      .brw = (BRW_NEW_URB_FENCE |
-             BRW_NEW_BATCH |
-             BRW_NEW_STATE_BASE_ADDRESS),
-      .cache = (CACHE_NEW_VS_UNIT |
-               CACHE_NEW_FF_GS_UNIT |
-               CACHE_NEW_FF_GS_PROG |
-               CACHE_NEW_CLIP_UNIT |
-               CACHE_NEW_SF_UNIT |
-               CACHE_NEW_WM_UNIT |
-               CACHE_NEW_CC_UNIT)
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_FF_GS_PROG_DATA |
+             BRW_NEW_GEN4_UNIT_STATE |
+             BRW_NEW_STATE_BASE_ADDRESS |
+             BRW_NEW_URB_FENCE,
     },
     .emit = upload_psp_urb_cbs,
  };
@@ -142,46 +131,15 @@ brw_depthbuffer_format(struct brw_context *brw)
     if (!drb &&
         (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
         !srb->mt->stencil_mt &&
-       (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
-       intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
+       (intel_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
+       intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
        drb = srb;
     }
  
     if (!drb)
        return BRW_DEPTHFORMAT_D32_FLOAT;
  
-   switch (drb->mt->format) {
-   case MESA_FORMAT_Z16:
-      return BRW_DEPTHFORMAT_D16_UNORM;
-   case MESA_FORMAT_Z32_FLOAT:
-      return BRW_DEPTHFORMAT_D32_FLOAT;
-   case MESA_FORMAT_X8_Z24:
-      if (brw->gen >= 6) {
-        return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
-      } else {
-        /* Use D24_UNORM_S8, not D24_UNORM_X8.
-         *
-         * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
-         * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
-         * 3DSTATE_DEPTH_BUFFER.Surface_Format).
-         *
-         * However, on Gen5, D24_UNORM_X8 may be used only if separate
-         * stencil is enabled, and we never enable it. From the Ironlake PRM,
-         * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
-         *     If this field is disabled, the Surface Format of the depth
-         *     buffer cannot be D24_UNORM_X8_UINT.
-         */
-        return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-      }
-   case MESA_FORMAT_S8_Z24:
-      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-   case MESA_FORMAT_Z32_FLOAT_X24S8:
-      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
-   default:
-      _mesa_problem(ctx, "Unexpected depth format %s\n",
-                   _mesa_get_format_name(intel_rb_format(drb)));
-      return BRW_DEPTHFORMAT_D16_UNORM;
-   }
+   return brw_depth_format(brw, drb->mt->format);
  }
  
  /**
@@ -210,13 +168,13 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
     uint32_t tile_mask_x = 0, tile_mask_y = 0;
  
     if (depth_mt) {
-      intel_region_get_tile_masks(depth_mt->region,
-                                  &tile_mask_x, &tile_mask_y, false);
+      intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
  
-      if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) {
+      if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
           uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
-         intel_region_get_tile_masks(depth_mt->hiz_mt->region,
-                                     &hiz_tile_mask_x, &hiz_tile_mask_y, false);
+         intel_miptree_get_tile_masks(depth_mt->hiz_buf->mt,
+                                      &hiz_tile_mask_x, &hiz_tile_mask_y,
+                                      false);
  
           /* Each HiZ row represents 2 rows of pixels */
           hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
@@ -230,15 +188,15 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
        if (stencil_mt->stencil_mt)
          stencil_mt = stencil_mt->stencil_mt;
  
-      if (stencil_mt->format == MESA_FORMAT_S8) {
+      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
           /* Separate stencil buffer uses 64x64 tiles. */
           tile_mask_x |= 63;
           tile_mask_y |= 63;
        } else {
           uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
-         intel_region_get_tile_masks(stencil_mt->region,
-                                     &stencil_tile_mask_x,
-                                     &stencil_tile_mask_y, false);
+         intel_miptree_get_tile_masks(stencil_mt,
+                                      &stencil_tile_mask_x,
+                                      &stencil_tile_mask_y, false);
  
           tile_mask_x |= stencil_tile_mask_x;
           tile_mask_y |= stencil_tile_mask_y;
@@ -293,10 +251,10 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
     if (stencil_irb)
        brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb);
  
-   /* Gen7+ doesn't require the workarounds, since we always program the
+   /* Gen6+ doesn't require the workarounds, since we always program the
      * surface state at the start of the whole surface.
      */
-   if (brw->gen >= 7)
+   if (brw->gen >= 6)
        return;
  
     /* Check if depth buffer is in depth/stencil format.  If so, then it's only
@@ -477,30 +435,29 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
        depth_mt = depth_irb->mt;
        brw->depthstencil.depth_mt = depth_mt;
        brw->depthstencil.depth_offset =
-         intel_region_get_aligned_offset(depth_mt->region,
-                                         depth_irb->draw_x & ~tile_mask_x,
-                                         depth_irb->draw_y & ~tile_mask_y,
-                                         false);
+         intel_miptree_get_aligned_offset(depth_mt,
+                                          depth_irb->draw_x & ~tile_mask_x,
+                                          depth_irb->draw_y & ~tile_mask_y,
+                                          false);
        if (intel_renderbuffer_has_hiz(depth_irb)) {
           brw->depthstencil.hiz_offset =
-            intel_region_get_aligned_offset(depth_mt->region,
-                                            depth_irb->draw_x & ~tile_mask_x,
-                                            (depth_irb->draw_y & ~tile_mask_y) /
-                                            2,
-                                            false);
+            intel_miptree_get_aligned_offset(depth_mt,
+                                             depth_irb->draw_x & ~tile_mask_x,
+                                             (depth_irb->draw_y & ~tile_mask_y) / 2,
+                                             false);
        }
     }
     if (stencil_irb) {
        stencil_mt = get_stencil_miptree(stencil_irb);
  
        brw->depthstencil.stencil_mt = stencil_mt;
-      if (stencil_mt->format == MESA_FORMAT_S8) {
+      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
           /* Note: we can't compute the stencil offset using
            * intel_region_get_aligned_offset(), because stencil_region claims
            * that the region is untiled even though it's W tiled.
            */
           brw->depthstencil.stencil_offset =
-            (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
+            (stencil_draw_y & ~tile_mask_y) * stencil_mt->pitch +
              (stencil_draw_x & ~tile_mask_x) * 64;
        }
     }
@@ -526,7 +483,7 @@ brw_emit_depthbuffer(struct brw_context *brw)
     uint32_t width = 1, height = 1;
  
     if (stencil_mt) {
-      separate_stencil = stencil_mt->format == MESA_FORMAT_S8;
+      separate_stencil = stencil_mt->format == MESA_FORMAT_S_UINT8;
  
        /* Gen7 supports only separate stencil */
        assert(separate_stencil || brw->gen < 7);
@@ -556,8 +513,8 @@ brw_emit_depthbuffer(struct brw_context *brw)
        /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
        assert(brw->gen >= 7 || !separate_stencil || hiz);
  
-      assert(brw->gen < 6 || depth_mt->region->tiling == I915_TILING_Y);
-      assert(!hiz || depth_mt->region->tiling == I915_TILING_Y);
+      assert(brw->gen < 6 || depth_mt->tiling == I915_TILING_Y);
+      assert(!hiz || depth_mt->tiling == I915_TILING_Y);
  
        depthbuffer_format = brw_depthbuffer_format(brw);
        depth_surface_type = BRW_SURFACE_2D;
@@ -583,6 +540,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
        height = stencil_irb->Base.Base.Height;
     }
  
+   if (depth_mt)
+      brw_render_cache_set_check_flush(brw, depth_mt->bo);
+   if (stencil_mt)
+      brw_render_cache_set_check_flush(brw, stencil_mt->bo);
+
     brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
                                      depthbuffer_format, depth_surface_type,
                                      stencil_mt, hiz, separate_stencil,
@@ -615,7 +577,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
      * non-pipelined state that will need the PIPE_CONTROL workaround.
      */
     if (brw->gen == 6) {
-      intel_emit_post_sync_nonzero_flush(brw);
        intel_emit_depth_stall_flushes(brw);
     }
  
@@ -629,17 +590,17 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
  
     BEGIN_BATCH(len);
     OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
-   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+   OUT_BATCH((depth_mt ? depth_mt->pitch - 1 : 0) |
               (depthbuffer_format << 18) |
               ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
               ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
               (BRW_TILEWALK_YMAJOR << 26) |
-             ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
+             ((depth_mt ? depth_mt->tiling != I915_TILING_NONE : 1)
                << 27) |
               (depth_surface_type << 29));
  
     if (depth_mt) {
-      OUT_RELOC(depth_mt->region->bo,
+      OUT_RELOC(depth_mt->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                 depth_offset);
     } else {
@@ -671,11 +632,11 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
  
        /* Emit hiz buffer. */
        if (hiz) {
-         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
-        OUT_BATCH(hiz_mt->region->pitch - 1);
-        OUT_RELOC(hiz_mt->region->bo,
+        OUT_BATCH(hiz_mt->pitch - 1);
+        OUT_RELOC(hiz_mt->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    brw->depthstencil.hiz_offset);
          ADVANCE_BATCH();
@@ -689,8 +650,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
  
        /* Emit stencil buffer. */
        if (separate_stencil) {
-        struct intel_region *region = stencil_mt->region;
-
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
           /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -698,8 +657,8 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
            *    The pitch must be set to 2x the value computed based on width, as
            *    the stencil buffer is stored with two rows interleaved.
            */
-        OUT_BATCH(2 * region->pitch - 1);
-        OUT_RELOC(region->bo,
+        OUT_BATCH(2 * stencil_mt->pitch - 1);
+        OUT_RELOC(stencil_mt->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    brw->depthstencil.stencil_offset);
          ADVANCE_BATCH();
@@ -721,9 +680,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
      *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
      */
     if (brw->gen >= 6 || hiz) {
-      if (brw->gen == 6)
-        intel_emit_post_sync_nonzero_flush(brw);
-
        BEGIN_BATCH(2);
        OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
                 GEN5_DEPTH_CLEAR_VALID |
@@ -737,7 +693,6 @@ const struct brw_tracked_state brw_depthbuffer = {
     .dirty = {
        .mesa = _NEW_BUFFERS,
        .brw = BRW_NEW_BATCH,
-      .cache = 0,
     },
     .emit = brw_emit_depthbuffer,
  };
@@ -757,9 +712,6 @@ static void upload_polygon_stipple(struct brw_context *brw)
     if (!ctx->Polygon.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(33);
     OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
  
@@ -778,15 +730,14 @@ static void upload_polygon_stipple(struct brw_context *brw)
        for (i = 0; i < 32; i++)
          OUT_BATCH(ctx->PolygonStipple[i]);
     }
-   CACHED_BATCH();
+   ADVANCE_BATCH();
  }
  
  const struct brw_tracked_state brw_polygon_stipple = {
     .dirty = {
-      .mesa = (_NEW_POLYGONSTIPPLE |
-              _NEW_POLYGON),
+      .mesa = _NEW_POLYGON |
+              _NEW_POLYGONSTIPPLE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_polygon_stipple
  };
@@ -804,9 +755,6 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
     if (!ctx->Polygon.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(2);
     OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
  
@@ -822,15 +770,14 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
        OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
     else
        OUT_BATCH(0);
-   CACHED_BATCH();
+   ADVANCE_BATCH();
  }
  
  const struct brw_tracked_state brw_polygon_stipple_offset = {
     .dirty = {
-      .mesa = (_NEW_BUFFERS |
-              _NEW_POLYGON),
+      .mesa = _NEW_BUFFERS |
+              _NEW_POLYGON,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_polygon_stipple_offset
  };
@@ -849,21 +796,18 @@ static void upload_aa_line_parameters(struct brw_context *brw)
     if (brw->gen == 4 && !brw->is_g4x)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
+   BEGIN_BATCH(3);
     OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
     /* use legacy aa line coverage computation */
     OUT_BATCH(0);
     OUT_BATCH(0);
-   CACHED_BATCH();
+   ADVANCE_BATCH();
  }
  
  const struct brw_tracked_state brw_aa_line_parameters = {
     .dirty = {
        .mesa = _NEW_LINE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_aa_line_parameters
  };
@@ -881,9 +825,6 @@ static void upload_line_stipple(struct brw_context *brw)
     if (!ctx->Line.StippleFlag)
        return;
  
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
-
     BEGIN_BATCH(3);
     OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
     OUT_BATCH(ctx->Line.StipplePattern);
@@ -901,14 +842,13 @@ static void upload_line_stipple(struct brw_context *brw)
        OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
     }
  
-   CACHED_BATCH();
+   ADVANCE_BATCH();
  }
  
  const struct brw_tracked_state brw_line_stipple = {
     .dirty = {
        .mesa = _NEW_LINE,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = upload_line_stipple
  };
@@ -921,13 +861,13 @@ const struct brw_tracked_state brw_line_stipple = {
  void
  brw_upload_invariant_state(struct brw_context *brw)
  {
-   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
-   if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
  
     /* Select the 3D pipeline (as opposed to media) */
+   const uint32_t _3DSTATE_PIPELINE_SELECT =
+      is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
     BEGIN_BATCH(1);
-   OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
+   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | (brw->gen >= 9 ? (3 << 8) : 0));
     ADVANCE_BATCH();
  
     if (brw->gen < 6) {
@@ -938,14 +878,23 @@ brw_upload_invariant_state(struct brw_context *brw)
        ADVANCE_BATCH();
     }
  
-   BEGIN_BATCH(2);
-   OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   if (brw->gen >= 8) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
  
+   const uint32_t _3DSTATE_VF_STATISTICS =
+      is_965 ? GEN4_3DSTATE_VF_STATISTICS : GM45_3DSTATE_VF_STATISTICS;
     BEGIN_BATCH(1);
-   OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
-            (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+   OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 | 1);
     ADVANCE_BATCH();
  }
  
@@ -953,7 +902,6 @@ const struct brw_tracked_state brw_invariant_state = {
     .dirty = {
        .mesa = 0,
        .brw = BRW_NEW_CONTEXT,
-      .cache = 0
     },
     .emit = brw_upload_invariant_state
  };
@@ -982,9 +930,6 @@ static void upload_state_base_address( struct brw_context *brw )
     if (brw->gen >= 6) {
        uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
  
-      if (brw->gen == 6)
-        intel_emit_post_sync_nonzero_flush(brw);
-
         BEGIN_BATCH(10);
         OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
         OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
@@ -1075,9 +1020,8 @@ static void upload_state_base_address( struct brw_context *brw )
  const struct brw_tracked_state brw_state_base_address = {
     .dirty = {
        .mesa = 0,
-      .brw = (BRW_NEW_BATCH |
-             BRW_NEW_PROGRAM_CACHE),
-      .cache = 0,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_PROGRAM_CACHE,
     },
     .emit = upload_state_base_address
  };