From: Francisco Jerez Date: Thu, 6 Feb 2020 02:59:46 +0000 (-0800) Subject: iris: Implement buffer-local memory barrier based on cache coherency matrix. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e22659089837aacf6c97544fcc4c9acdda516297;p=mesa.git iris: Implement buffer-local memory barrier based on cache coherency matrix. This takes advantage of the previously introduced cache tracking infrastructure in order to define a multi-purpose barrier operation that allows the caller to order memory operations with respect to previous operations performed on the same buffer from any other cache domain. v2: Assorted CPU overhead micro-optimizations (Francisco). v3: Use C99 designated initializers (Ken). Reviewed-by: Kenneth Graunke Part-of: --- diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 68dcdb5a59a..eac01536334 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -813,6 +813,9 @@ void iris_emit_pipe_control_write(struct iris_batch *batch, uint64_t imm); void iris_emit_end_of_pipe_sync(struct iris_batch *batch, const char *reason, uint32_t flags); +void iris_emit_buffer_barrier_for(struct iris_batch *batch, + struct iris_bo *bo, + enum iris_domain access); void iris_flush_all_caches(struct iris_batch *batch); #define iris_handle_always_flush_cache(batch) \ diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c index 193bdef6c26..74d21c8c589 100644 --- a/src/gallium/drivers/iris/iris_pipe_control.c +++ b/src/gallium/drivers/iris/iris_pipe_control.c @@ -152,6 +152,135 @@ iris_emit_end_of_pipe_sync(struct iris_batch *batch, batch->screen->workaround_address.offset, 0); } +/** + * Emits appropriate flushes and invalidations for any previous memory + * operations on \p bo to be strictly ordered relative to any subsequent + * memory operations performed from the caching domain \p access. + * + * This is useful because the GPU has separate incoherent caches for the + * render target, sampler, etc., which need to be explicitly invalidated or + * flushed in order to obtain the expected memory ordering in cases where the + * same surface is accessed through multiple caches (e.g. due to + * render-to-texture). + * + * This provides the expected memory ordering guarantees whether or not the + * previous access was performed from the same batch or a different one, but + * only the former case needs to be handled explicitly here, since the kernel + * already inserts implicit flushes and synchronization in order to guarantee + * that any data dependencies between batches are satisfied. + * + * Even though no flushing nor invalidation is required in order to account + * for concurrent updates from other batches, we provide the guarantee that a + * required synchronization operation due to a previous batch-local update + * will never be omitted due to the influence of another thread accessing the + * same buffer concurrently from the same caching domain: Such a concurrent + * update will only ever change the seqno of the last update to a value + * greater than the local value (see iris_bo_bump_seqno()), which means that + * we will always emit at least as much flushing and invalidation as we would + * have for the local seqno (see the coherent_seqnos comparisons below). + */ +void +iris_emit_buffer_barrier_for(struct iris_batch *batch, + struct iris_bo *bo, + enum iris_domain access) +{ + const uint32_t all_flush_bits = (PIPE_CONTROL_CACHE_FLUSH_BITS | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_FLUSH_ENABLE); + const uint32_t flush_bits[NUM_IRIS_DOMAINS] = { + [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH, + [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH, + [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE, + [IRIS_DOMAIN_OTHER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD, + }; + const uint32_t invalidate_bits[NUM_IRIS_DOMAINS] = { + [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH, + [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH, + [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE, + [IRIS_DOMAIN_OTHER_READ] = (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE), + }; + uint32_t bits = 0; + + /* Iterate over all read/write domains first in order to handle RaW + * and WaW dependencies, which might involve flushing the domain of + * the previous access and invalidating the specified domain. + */ + for (unsigned i = 0; i < IRIS_DOMAIN_OTHER_WRITE; i++) { + assert(!iris_domain_is_read_only(i)); + if (i != access) { + const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); + + /* Invalidate unless the most recent read/write access from + * this domain is already guaranteed to be visible to the + * specified domain. Flush if the most recent access from + * this domain occurred after its most recent flush. + */ + if (seqno > batch->coherent_seqnos[access][i]) { + bits |= invalidate_bits[access]; + + if (seqno > batch->coherent_seqnos[i][i]) + bits |= flush_bits[i]; + } + } + } + + /* All read-only domains can be considered mutually coherent since + * the order of read-only memory operations is immaterial. If the + * specified domain is read/write we need to iterate over them too, + * in order to handle any WaR dependencies. + */ + if (!iris_domain_is_read_only(access)) { + for (unsigned i = IRIS_DOMAIN_OTHER_READ; i < NUM_IRIS_DOMAINS; i++) { + assert(iris_domain_is_read_only(i)); + const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); + + /* Flush if the most recent access from this domain occurred + * after its most recent flush. + */ + if (seqno > batch->coherent_seqnos[i][i]) + bits |= flush_bits[i]; + } + } + + /* The IRIS_DOMAIN_OTHER_WRITE kitchen-sink domain cannot be + * considered coherent with itself since it's really a collection + * of multiple incoherent read/write domains, so we special-case it + * here. + */ + const unsigned i = IRIS_DOMAIN_OTHER_WRITE; + const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); + + /* Invalidate unless the most recent read/write access from this + * domain is already guaranteed to be visible to the specified + * domain. Flush if the most recent access from this domain + * occurred after its most recent flush. + */ + if (seqno > batch->coherent_seqnos[access][i]) { + bits |= invalidate_bits[access]; + + if (seqno > batch->coherent_seqnos[i][i]) + bits |= flush_bits[i]; + } + + if (bits) { + /* Stall-at-scoreboard is not expected to work in combination with other + * flush bits. + */ + if (bits & PIPE_CONTROL_CACHE_FLUSH_BITS) + bits &= ~PIPE_CONTROL_STALL_AT_SCOREBOARD; + + /* Emit any required flushes and invalidations. */ + if (bits & all_flush_bits) + iris_emit_end_of_pipe_sync(batch, "cache tracker: flush", + bits & all_flush_bits); + + if (bits & ~all_flush_bits) + iris_emit_pipe_control_flush(batch, "cache tracker: invalidate", + bits & ~all_flush_bits); + } +} + /** * Flush and invalidate all caches (for debugging purposes). */