i965: Enable hardware-generated binding tables on render path.
authorAbdiel Janulgue <abdiel.janulgue@linux.intel.com>
Wed, 15 Apr 2015 10:04:45 +0000 (13:04 +0300)
committerAbdiel Janulgue <abdiel.janulgue@linux.intel.com>
Sat, 18 Jul 2015 13:16:54 +0000 (16:16 +0300)
This patch implements the binding table enable command which is also
used to allocate a binding table pool where where hardware-generated
binding table entries are flushed into. Each binding table offset in
the binding table pool is unique per each shader stage that are
enabled within a batch.

Also insert the required brw_tracked_state objects to enable
hw-generated binding tables in normal render path.

v2: - Use MOCS in binding table pool alloc for GEN8
    - Fix spurious offset when allocating binding table pool entry
      and start from zero instead.
v3: - Include GEN8 fix for spurious offset above.
v4: - Fixup wrong packet length in enable/disable hw-binding table
      for GEN8 (Ville).
    - Don't invoke HW-binding table disable command when we dont
      have resource streamer (Chris).
v5: - Reorder the state cache invalidate flush so it happens in-between
      enabling hw-generated binding tables and the previous sw-binding
      table GPU state (Chris).
v6: - Do the same fix in v5 for gen7_disable_hw_binding_tables().
    - Adhere to coding guidelines and make comments more informative.

Cc: kenneth@whitecape.org
Cc: syrjala@sci.fi
Cc: chris@chris-wilson.co.uk
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
src/mesa/drivers/dri/i965/brw_binding_tables.c
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen7_disable.c
src/mesa/drivers/dri/i965/gen8_disable.c
src/mesa/drivers/dri/i965/intel_batchbuffer.c

index 98ff0ddcd5828aa6f4cc950b0507df2ff4fcc940..6769f0cd1abd235cec0f9e70b3e905c0597ee057 100644 (file)
@@ -170,6 +170,106 @@ const struct brw_tracked_state brw_gs_binding_table = {
    .emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Disable hardware binding table support, falling back to the
+ * older software-generated binding table mechanism.
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->use_resource_streamer)
+      return;
+   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+    *
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+   int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+   BEGIN_BATCH(pkt_len);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+   if (brw->gen >= 8) {
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
+      OUT_BATCH(0);
+   }
+   ADVANCE_BATCH();
+}
+
+/**
+ * Enable hardware binding tables and set up the binding table pool.
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->use_resource_streamer)
+      return;
+
+   if (!brw->hw_bt_pool.bo) {
+      /* We use a single re-usable buffer object for the lifetime of the
+       * context and size it to maximum allowed binding tables that can be
+       * programmed per batch:
+       *
+       * From the Haswell PRM, Volume 7: 3D Media GPGPU,
+       * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+       * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+       */
+      static const int max_size = 16383 * 4;
+      brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+                                              max_size, 64);
+      brw->hw_bt_pool.next_offset = 0;
+   }
+
+   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+    *
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+   int pkt_len = brw->gen >= 8 ? 4 : 3;
+   uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+   if (brw->is_haswell) {
+      dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
+             HSW_BT_POOL_ALLOC_MUST_BE_ONE;
+   } else if (brw->gen >= 8) {
+      dw1 |= BDW_MOCS_WB;
+   }
+
+   BEGIN_BATCH(pkt_len);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+   if (brw->gen >= 8) {
+      OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+      OUT_BATCH(brw->hw_bt_pool.bo->size);
+   } else {
+      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+             brw->hw_bt_pool.bo->size);
+   }
+   ADVANCE_BATCH();
+}
+
+void
+gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
+{
+   brw->hw_bt_pool.next_offset = 0;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
 /** @} */
 
 /**
index 05cb53b37110b8f3ed3f8de5ace28bccea958646..efcd91aad84916dbcbb1e951cc90e70405c35ce9 100644 (file)
@@ -941,6 +941,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    if (brw->wm.base.scratch_bo)
       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 
+   gen7_reset_hw_bt_pool_offsets(brw);
+   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+   brw->hw_bt_pool.bo = NULL;
+
    drm_intel_gem_context_destroy(brw->hw_ctx);
 
    if (ctx->swrast_context) {
index a9f1f61b2685dac98b2cc1d71d7662388362503c..8bbeb34075c1f8bb0e52546417e6fba2eb42becc 100644 (file)
@@ -1398,6 +1398,12 @@ struct brw_context
       struct brw_cs_prog_data *prog_data;
    } cs;
 
+   /* RS hardware binding table */
+   struct {
+      drm_intel_bo *bo;
+      uint32_t next_offset;
+   } hw_bt_pool;
+
    struct {
       uint32_t state_offset;
       uint32_t blend_state_offset;
index 987672f881592dbc15943367bfbe8b84f3ac2db6..f8ef98f2db96ab005b3e74e291c4557ce6e5e360 100644 (file)
@@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen7_vs_state;
 extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_hw_binding_tables;
 extern const struct brw_tracked_state haswell_cut_index;
 extern const struct brw_tracked_state gen8_blend_state;
 extern const struct brw_tracked_state gen8_disable_stages;
@@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw,
                            const struct brw_stage_state *stage_state,
                            bool active, unsigned opcode);
 
+void gen7_rs_control(struct brw_context *brw, int enable);
+void gen7_enable_hw_binding_tables(struct brw_context *brw);
+void gen7_disable_hw_binding_tables(struct brw_context *brw);
+void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
index 7662c3b580c66aceff8c607d4e8dd17fb59cc860..6096b4946a00d7b6d20041a8130881426f137a74 100644 (file)
@@ -192,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &gen6_color_calc_state,     /* must do before cc unit */
    &gen6_depth_stencil_state,  /* must do before cc unit */
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -268,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
    &gen8_blend_state,
    &gen6_color_calc_state,
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
index 2c43cd77f07e13146947f60b28085dfabfb95ea2..bb509696d7270070c1cdfbd29d1c6e8e683c044c 100644 (file)
@@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    /* Disable the TE */
@@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 }
 
index da0d4a5fe7afd593cf8548faa70a1951908570b7..32508e377c94df433c659d20ad21271e51276776 100644 (file)
@@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    /* Disable the TE */
@@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    BEGIN_BATCH(2);
index d40e67133e2b55a45573686c8cc9a5cc882513ed..85f20a057295fba9677ab7b360d0da965b2cf4f9 100644 (file)
@@ -33,6 +33,7 @@
 #include "intel_fbo.h"
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 
 #include <xf86drm.h>
 #include <i915_drm.h>
@@ -391,6 +392,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
       drm_intel_bo_wait_rendering(brw->batch.bo);
    }
 
+   if (brw->use_resource_streamer)
+      gen7_reset_hw_bt_pool_offsets(brw);
+
    /* Start a new batch buffer. */
    brw_new_batch(brw);