From 3fdf2bb313b7e91f223fc45ad68adea9d5e76407 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Nov 2019 10:04:38 -0800 Subject: [PATCH] iris: Disable VF cache partial address workaround on Gen11+ The vertex cache uses the full 48-bit address on Gen11+. See the documentation for 3DSTATE_VERTEX_BUFFERS, which describes the workaround and lists it as pre-Icelake. Interestingly, the docs don't mention index buffers as needing a workaround at all. So either we've been overzealous, or the docs never got updated to record that. Which begs the question of whether the issue there was fixed, if there was one... Cuts 40% of the PIPE_CONTROLs from Civilization VI's benchmark; appears that it improves performance by about 1-2% on Icelake 8x8 (not frequency locked). --- src/gallium/drivers/iris/iris_blorp.c | 2 ++ src/gallium/drivers/iris/iris_state.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index a1c0dbbcf4e..c2253ac913a 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -204,6 +204,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch, const struct blorp_address *addrs, unsigned num_vbs) { +#if GEN_GEN < 11 struct iris_context *ice = blorp_batch->blorp->driver_ctx; struct iris_batch *batch = blorp_batch->driver_batch; bool need_invalidate = false; @@ -224,6 +225,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); } +#endif } static struct blorp_address diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index a30aa61b3ce..51dffb45eba 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -5784,6 +5784,15 @@ iris_upload_dirty_render_state(struct iris_context *ice, } if (count) { +#if GEN_GEN >= 11 + /* Gen11+ doesn't need the cache workaround below */ + uint64_t bound = dynamic_bound; + while (bound) { + const int i = u_bit_scan64(&bound); + iris_use_optional_res(batch, genx->vertex_buffers[i].resource, + false); + } +#else /* The VF cache designers cut corners, and made the cache key's * tuple only consider the bottom * 32 bits of the address. If you have two vertex buffers which get @@ -5819,6 +5828,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, "workaround: VF cache 32-bit key [VB]", flush_flags); } +#endif const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); @@ -6034,6 +6044,7 @@ iris_upload_render_state(struct iris_context *ice, iris_use_pinned_bo(batch, bo, false); } +#if GEN_GEN < 11 /* The VF cache key only uses 32-bits, see vertex buffer comment above */ uint16_t high_bits = bo->gtt_offset >> 32ull; if (high_bits != ice->state.last_index_bo_high_bits) { @@ -6043,6 +6054,7 @@ iris_upload_render_state(struct iris_context *ice, PIPE_CONTROL_CS_STALL); ice->state.last_index_bo_high_bits = high_bits; } +#endif } #define _3DPRIM_END_OFFSET 0x2420 -- 2.30.2