iris: Disable VF cache partial address workaround on Gen11+
authorKenneth Graunke <kenneth@whitecape.org>
Mon, 25 Nov 2019 18:04:38 +0000 (10:04 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 26 Nov 2019 20:13:34 +0000 (12:13 -0800)
The vertex cache uses the full 48-bit address on Gen11+.  See the
documentation for 3DSTATE_VERTEX_BUFFERS, which describes the
workaround and lists it as pre-Icelake.

Interestingly, the docs don't mention index buffers as needing a
workaround at all.  So either we've been overzealous, or the docs
never got updated to record that.  Which begs the question of whether
the issue there was fixed, if there was one...

Cuts 40% of the PIPE_CONTROLs from Civilization VI's benchmark; appears
that it improves performance by about 1-2% on Icelake 8x8 (not frequency
locked).

src/gallium/drivers/iris/iris_blorp.c
src/gallium/drivers/iris/iris_state.c

index a1c0dbbcf4e9c8464b610291c3d81ff78714007f..c2253ac913a23dd06945df4647bdb4ca6aa800fa 100644 (file)
@@ -204,6 +204,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                            const struct blorp_address *addrs,
                                            unsigned num_vbs)
 {
+#if GEN_GEN < 11
    struct iris_context *ice = blorp_batch->blorp->driver_ctx;
    struct iris_batch *batch = blorp_batch->driver_batch;
    bool need_invalidate = false;
@@ -224,6 +225,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                    PIPE_CONTROL_VF_CACHE_INVALIDATE |
                                    PIPE_CONTROL_CS_STALL);
    }
+#endif
 }
 
 static struct blorp_address
index a30aa61b3ce9465dc4fd4e897b70442ee23b822b..51dffb45eba515058e168f9ab5ad911b36727c0f 100644 (file)
@@ -5784,6 +5784,15 @@ iris_upload_dirty_render_state(struct iris_context *ice,
       }
 
       if (count) {
+#if GEN_GEN >= 11
+         /* Gen11+ doesn't need the cache workaround below */
+         uint64_t bound = dynamic_bound;
+         while (bound) {
+            const int i = u_bit_scan64(&bound);
+            iris_use_optional_res(batch, genx->vertex_buffers[i].resource,
+                                  false);
+         }
+#else
          /* The VF cache designers cut corners, and made the cache key's
           * <VertexBufferIndex, Memory Address> tuple only consider the bottom
           * 32 bits of the address.  If you have two vertex buffers which get
@@ -5819,6 +5828,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
                                          "workaround: VF cache 32-bit key [VB]",
                                          flush_flags);
          }
+#endif
 
          const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
 
@@ -6034,6 +6044,7 @@ iris_upload_render_state(struct iris_context *ice,
          iris_use_pinned_bo(batch, bo, false);
       }
 
+#if GEN_GEN < 11
       /* The VF cache key only uses 32-bits, see vertex buffer comment above */
       uint16_t high_bits = bo->gtt_offset >> 32ull;
       if (high_bits != ice->state.last_index_bo_high_bits) {
@@ -6043,6 +6054,7 @@ iris_upload_render_state(struct iris_context *ice,
                                       PIPE_CONTROL_CS_STALL);
          ice->state.last_index_bo_high_bits = high_bits;
       }
+#endif
    }
 
 #define _3DPRIM_END_OFFSET          0x2420