iris: Do the 48-bit vertex buffer address invalidation workaround
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 21 Nov 2018 08:06:46 +0000 (00:06 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:10 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_blorp.c
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_state.c

index b474e1032ffd3408a02bb60f6711354e8d84f9af..6f359bb5cb9b726a9f39364fb4b02dabf88b40fc 100644 (file)
@@ -188,15 +188,14 @@ blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,
 }
 
 /**
- * See vf_invalidate_for_vb_48b_transitions in iris_state.c.
- * XXX: actually add this
+ * See iris_upload_render_state's IRIS_DIRTY_VERTEX_BUFFERS handling for
+ * a comment about why these VF invalidations are needed.
  */
 static void
-blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
+blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                            const struct blorp_address *addrs,
                                            unsigned num_vbs)
 {
-#if 0
    struct iris_context *ice = blorp_batch->blorp->driver_ctx;
    struct iris_batch *batch = blorp_batch->driver_batch;
    bool need_invalidate = false;
@@ -214,7 +213,6 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
    if (need_invalidate) {
       iris_emit_pipe_control_flush(batch, PIPE_CONTROL_VF_CACHE_INVALIDATE);
    }
-#endif
 }
 
 static struct blorp_address
index 3eb50238b0cf5ba8868c6c0e7680732b178c4a91..42adbf60d4e34bfdb369305d80a507f4f11a3a35 100644 (file)
@@ -482,6 +482,10 @@ struct iris_context {
 
       struct iris_border_color_pool border_color_pool;
 
+      /** The high 16-bits of the last VBO/index buffer addresses */
+      uint16_t last_vbo_high_bits[33];
+      uint16_t last_index_bo_high_bits;
+
       /**
        * Resources containing streamed state which our render context
        * currently points to.  Used to re-add these to the validation
index 075753b170f5cbc6cf0ae39d9d9b65cbc8028b44..7c898a6cd851ff74e9ed1f19914049366d32aa55 100644 (file)
@@ -4337,14 +4337,39 @@ iris_upload_dirty_render_state(struct iris_context *ice,
       const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
 
       if (cso->num_buffers > 0) {
-         iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
-                         (1 + vb_dwords * cso->num_buffers));
+         /* The VF cache designers cut corners, and made the cache key's
+          * <VertexBufferIndex, Memory Address> tuple only consider the bottom
+          * 32 bits of the address.  If you have two vertex buffers which get
+          * placed exactly 4 GiB apart and use them in back-to-back draw calls,
+          * you can get collisions (even within a single batch).
+          *
+          * So, we need to do a VF cache invalidate if the buffer for a VB
+          * slot slot changes [48:32] address bits from the previous time.
+          */
+         bool need_invalidate = false;
 
          for (unsigned i = 0; i < cso->num_buffers; i++) {
+            uint16_t high_bits = 0;
+
             struct iris_resource *res = (void *) cso->resources[i];
-            if (res)
+            if (res) {
                iris_use_pinned_bo(batch, res->bo, false);
+
+               high_bits = res->bo->gtt_offset >> 32ull;
+               if (high_bits != ice->state.last_vbo_high_bits[i]) {
+                  need_invalidate = true;
+                  ice->state.last_vbo_high_bits[i] = high_bits;
+               }
+            }
          }
+
+         if (need_invalidate) {
+            iris_emit_pipe_control_flush(batch,
+                                         PIPE_CONTROL_VF_CACHE_INVALIDATE);
+         }
+
+         iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
+                         (1 + vb_dwords * cso->num_buffers));
       }
    }
 
@@ -4424,6 +4449,13 @@ iris_upload_render_state(struct iris_context *ice,
          ib.BufferSize = bo->size;
          ib.BufferStartingAddress = ro_bo(bo, offset);
       }
+
+      /* The VF cache key only uses 32-bits, see vertex buffer comment above */
+      uint16_t high_bits = bo->gtt_offset >> 32ull;
+      if (high_bits != ice->state.last_index_bo_high_bits) {
+         iris_emit_pipe_control_flush(batch, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+         ice->state.last_index_bo_high_bits = high_bits;
+      }
    }
 
 #define _3DPRIM_END_OFFSET          0x2420