iris: CS stall for stream out -> VB
authorKenneth Graunke <kenneth@whitecape.org>
Sun, 2 Dec 2018 22:16:08 +0000 (14:16 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:10 +0000 (10:26 -0800)
i965 doesn't do this, but I suspect it just stalls a lot and doesn't hit
this.  Fixes ext_transform_feedback-position render among others.

src/gallium/drivers/iris/iris_state.c

index 9ef6df7770eb3721d1e5037daa7141a7ca801b89..d5cd6ae82012622d66d3461f45561a59aba3a926 100644 (file)
@@ -4368,7 +4368,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
           * So, we need to do a VF cache invalidate if the buffer for a VB
           * slot slot changes [48:32] address bits from the previous time.
           */
-         bool need_invalidate = false;
+         unsigned flush_flags = 0;
 
          for (unsigned i = 0; i < cso->num_buffers; i++) {
             uint16_t high_bits = 0;
@@ -4379,16 +4379,23 @@ iris_upload_dirty_render_state(struct iris_context *ice,
 
                high_bits = res->bo->gtt_offset >> 32ull;
                if (high_bits != ice->state.last_vbo_high_bits[i]) {
-                  need_invalidate = true;
+                  flush_flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
                   ice->state.last_vbo_high_bits[i] = high_bits;
                }
+
+               /* If the buffer was written to by streamout, we may need
+                * to stall so those writes land and become visible to the
+                * vertex fetcher.
+                *
+                * TODO: This may stall more than necessary.
+                */
+               if (res->bind_history & PIPE_BIND_STREAM_OUTPUT)
+                  flush_flags |= PIPE_CONTROL_CS_STALL;
             }
          }
 
-         if (need_invalidate) {
-            iris_emit_pipe_control_flush(batch,
-                                         PIPE_CONTROL_VF_CACHE_INVALIDATE);
-         }
+         if (flush_flags)
+            iris_emit_pipe_control_flush(batch, flush_flags);
 
          iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
                          (1 + vb_dwords * cso->num_buffers));