From 190756482e62cb57e2bc8c798181e5f0171726fb Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Wed, 15 Apr 2015 13:04:45 +0300 Subject: [PATCH] i965: Enable hardware-generated binding tables on render path. This patch implements the binding table enable command which is also used to allocate a binding table pool where where hardware-generated binding table entries are flushed into. Each binding table offset in the binding table pool is unique per each shader stage that are enabled within a batch. Also insert the required brw_tracked_state objects to enable hw-generated binding tables in normal render path. v2: - Use MOCS in binding table pool alloc for GEN8 - Fix spurious offset when allocating binding table pool entry and start from zero instead. v3: - Include GEN8 fix for spurious offset above. v4: - Fixup wrong packet length in enable/disable hw-binding table for GEN8 (Ville). - Don't invoke HW-binding table disable command when we dont have resource streamer (Chris). v5: - Reorder the state cache invalidate flush so it happens in-between enabling hw-generated binding tables and the previous sw-binding table GPU state (Chris). v6: - Do the same fix in v5 for gen7_disable_hw_binding_tables(). - Adhere to coding guidelines and make comments more informative. Cc: kenneth@whitecape.org Cc: syrjala@sci.fi Cc: chris@chris-wilson.co.uk Reviewed-by: Kenneth Graunke Signed-off-by: Abdiel Janulgue --- .../drivers/dri/i965/brw_binding_tables.c | 100 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_context.c | 4 + src/mesa/drivers/dri/i965/brw_context.h | 6 ++ src/mesa/drivers/dri/i965/brw_state.h | 6 ++ src/mesa/drivers/dri/i965/brw_state_upload.c | 4 + src/mesa/drivers/dri/i965/gen7_disable.c | 4 +- src/mesa/drivers/dri/i965/gen8_disable.c | 4 +- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 + 8 files changed, 128 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index 98ff0ddcd58..6769f0cd1ab 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -170,6 +170,106 @@ const struct brw_tracked_state brw_gs_binding_table = { .emit = brw_gs_upload_binding_table, }; +/** + * Disable hardware binding table support, falling back to the + * older software-generated binding table mechanism. + */ +void +gen7_disable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->use_resource_streamer) + return; + /* From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * + * "When switching between HW and SW binding table generation, SW must + * issue a state cache invalidate." + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); + + int pkt_len = brw->gen >= 8 ? 4 : 3; + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); + if (brw->gen >= 8) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +/** + * Enable hardware binding tables and set up the binding table pool. + */ +void +gen7_enable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->use_resource_streamer) + return; + + if (!brw->hw_bt_pool.bo) { + /* We use a single re-usable buffer object for the lifetime of the + * context and size it to maximum allowed binding tables that can be + * programmed per batch: + * + * From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * "A maximum of 16,383 Binding tables are allowed in any batch buffer" + */ + static const int max_size = 16383 * 4; + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", + max_size, 64); + brw->hw_bt_pool.next_offset = 0; + } + + /* From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * + * "When switching between HW and SW binding table generation, SW must + * issue a state cache invalidate." + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); + + int pkt_len = brw->gen >= 8 ? 4 : 3; + uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE; + if (brw->is_haswell) { + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) | + HSW_BT_POOL_ALLOC_MUST_BE_ONE; + } else if (brw->gen >= 8) { + dw1 |= BDW_MOCS_WB; + } + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); + if (brw->gen >= 8) { + OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); + OUT_BATCH(brw->hw_bt_pool.bo->size); + } else { + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->hw_bt_pool.bo->size); + } + ADVANCE_BATCH(); +} + +void +gen7_reset_hw_bt_pool_offsets(struct brw_context *brw) +{ + brw->hw_bt_pool.next_offset = 0; +} + +const struct brw_tracked_state gen7_hw_binding_tables = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + }, + .emit = gen7_enable_hw_binding_tables +}; + /** @} */ /** diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 05cb53b3711..efcd91aad84 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -941,6 +941,10 @@ intelDestroyContext(__DRIcontext * driContextPriv) if (brw->wm.base.scratch_bo) drm_intel_bo_unreference(brw->wm.base.scratch_bo); + gen7_reset_hw_bt_pool_offsets(brw); + drm_intel_bo_unreference(brw->hw_bt_pool.bo); + brw->hw_bt_pool.bo = NULL; + drm_intel_gem_context_destroy(brw->hw_ctx); if (ctx->swrast_context) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a9f1f61b268..8bbeb34075c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1398,6 +1398,12 @@ struct brw_context struct brw_cs_prog_data *prog_data; } cs; + /* RS hardware binding table */ + struct { + drm_intel_bo *bo; + uint32_t next_offset; + } hw_bt_pool; + struct { uint32_t state_offset; uint32_t blend_state_offset; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 987672f8815..f8ef98f2db9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state; extern const struct brw_tracked_state gen7_urb; extern const struct brw_tracked_state gen7_vs_state; extern const struct brw_tracked_state gen7_wm_state; +extern const struct brw_tracked_state gen7_hw_binding_tables; extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_blend_state; extern const struct brw_tracked_state gen8_disable_stages; @@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw, const struct brw_stage_state *stage_state, bool active, unsigned opcode); +void gen7_rs_control(struct brw_context *brw, int enable); +void gen7_enable_hw_binding_tables(struct brw_context *brw); +void gen7_disable_hw_binding_tables(struct brw_context *brw); +void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7662c3b580c..6096b4946a0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -192,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] = &gen6_color_calc_state, /* must do before cc unit */ &gen6_depth_stencil_state, /* must do before cc unit */ + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */ + &gen6_vs_push_constants, /* Before vs_state */ &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ @@ -268,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] = &gen8_blend_state, &gen6_color_calc_state, + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */ + &gen6_vs_push_constants, /* Before vs_state */ &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c index 2c43cd77f07..bb509696d72 100644 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ b/src/mesa/drivers/dri/i965/gen7_disable.c @@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); /* Disable the TE */ @@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c index da0d4a5fe7a..32508e377c9 100644 --- a/src/mesa/drivers/dri/i965/gen8_disable.c +++ b/src/mesa/drivers/dri/i965/gen8_disable.c @@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); /* Disable the TE */ @@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index d40e67133e2..85f20a05729 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -33,6 +33,7 @@ #include "intel_fbo.h" #include "brw_context.h" #include "brw_defines.h" +#include "brw_state.h" #include #include @@ -391,6 +392,9 @@ _intel_batchbuffer_flush(struct brw_context *brw, drm_intel_bo_wait_rendering(brw->batch.bo); } + if (brw->use_resource_streamer) + gen7_reset_hw_bt_pool_offsets(brw); + /* Start a new batch buffer. */ brw_new_batch(brw); -- 2.30.2