From e31b703c4232fd59d512ab2a865161c9ce859706 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 18 Feb 2020 22:22:51 +0000 Subject: [PATCH] iris: Place a seqno at the end of every batch We can use seqno as a basic for fast userspace fences: where we can check a value directly to test for fence completion without having to query using the kernel. To do so we need to write a breadcrumb from the batch and track those writes as the basis for our lightweight fences. Reviewed-by: Kenneth Graunke Part-of: --- src/gallium/drivers/iris/iris_batch.c | 31 ++++++-- src/gallium/drivers/iris/iris_batch.h | 23 ++++-- src/gallium/drivers/iris/iris_fence.c | 6 +- src/gallium/drivers/iris/iris_seqno.c | 76 +++++++++++++++++++ src/gallium/drivers/iris/iris_seqno.h | 105 ++++++++++++++++++++++++++ src/gallium/drivers/iris/meson.build | 2 + 6 files changed, 230 insertions(+), 13 deletions(-) create mode 100644 src/gallium/drivers/iris/iris_seqno.c create mode 100644 src/gallium/drivers/iris/iris_seqno.h diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 324ae016ae3..6533e11349d 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -48,6 +48,7 @@ #include "intel/common/gen_gem.h" #include "util/hash_table.h" #include "util/set.h" +#include "util/u_upload_mgr.h" #include "main/macros.h" #include @@ -180,6 +181,11 @@ iris_init_batch(struct iris_context *ice, batch->state_sizes = ice->state.sizes; batch->name = name; + batch->seqno.uploader = + u_upload_create(&ice->ctx, 4096, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 0); + iris_seqno_init(batch); + batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr); assert(batch->hw_ctx_id); @@ -313,7 +319,8 @@ iris_use_pinned_bo(struct iris_batch *batch, if (other_entry && ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) { iris_batch_flush(batch->other_batches[b]); - iris_batch_add_syncobj(batch, batch->other_batches[b]->last_syncobj, + iris_batch_add_syncobj(batch, + batch->other_batches[b]->last_seqno->syncobj, I915_EXEC_FENCE_WAIT); } } @@ -408,11 +415,14 @@ iris_batch_free(struct iris_batch *batch) ralloc_free(batch->exec_fences.mem_ctx); + pipe_resource_reference(&batch->seqno.ref.res, NULL); + util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) iris_syncobj_reference(screen, s, NULL); ralloc_free(batch->syncobjs.mem_ctx); - iris_syncobj_reference(screen, &batch->last_syncobj, NULL); + iris_seqno_reference(batch->screen, &batch->last_seqno, NULL); + u_upload_destroy(batch->seqno.uploader); iris_bo_unreference(batch->bo); batch->bo = NULL; @@ -497,6 +507,17 @@ add_aux_map_bos_to_batch(struct iris_batch *batch) } } +static void +finish_seqno(struct iris_batch *batch) +{ + struct iris_seqno *sq = iris_seqno_new(batch, IRIS_SEQNO_END); + if (!sq) + return; + + iris_seqno_reference(batch->screen, &batch->last_seqno, sq); + iris_seqno_reference(batch->screen, &sq, NULL); +} + /** * Terminate a batch with MI_BATCH_BUFFER_END. */ @@ -505,6 +526,8 @@ iris_finish_batch(struct iris_batch *batch) { add_aux_map_bos_to_batch(batch); + finish_seqno(batch); + /* Emit MI_BATCH_BUFFER_END to finish our batch. */ uint32_t *map = batch->map_next; @@ -687,10 +710,6 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) batch->exec_count = 0; batch->aperture_space = 0; - struct iris_syncobj *syncobj = - ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0]; - iris_syncobj_reference(screen, &batch->last_syncobj, syncobj); - util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) iris_syncobj_reference(screen, s, NULL); util_dynarray_clear(&batch->syncobjs); diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 0afab81c8b2..d8278279551 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -34,6 +34,7 @@ #include "common/gen_decoder.h" #include "iris_fence.h" +#include "iris_seqno.h" struct iris_context; @@ -41,10 +42,10 @@ struct iris_context; #define MAX_BATCH_SIZE (256 * 1024) /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END - * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may - * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16. + * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus another + * 24 bytes for the seqno write (using PIPE_CONTROL). */ -#define BATCH_RESERVED 16 +#define BATCH_RESERVED 36 /* Our target batch size - flush approximately at this point. */ #define BATCH_SZ (64 * 1024 - BATCH_RESERVED) @@ -112,8 +113,20 @@ struct iris_batch { /** The amount of aperture space (in bytes) used by all exec_bos */ int aperture_space; - /** A drm_syncobj for the last batch that was submitted. */ - struct iris_syncobj *last_syncobj; + struct { + /** Uploader to use for sequence numbers */ + struct u_upload_mgr *uploader; + + /** GPU buffer and CPU map where our seqno's will be written. */ + struct iris_state_ref ref; + uint32_t *map; + + /** The sequence number to write the next time we add a fence. */ + uint32_t next; + } seqno; + + /** A seqno (and syncobj) for the last batch that was submitted. */ + struct iris_seqno *last_seqno; /** List of other batches which we might need to flush to use a BO */ struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1]; diff --git a/src/gallium/drivers/iris/iris_fence.c b/src/gallium/drivers/iris/iris_fence.c index 09c1b427ef8..69a3d608bdc 100644 --- a/src/gallium/drivers/iris/iris_fence.c +++ b/src/gallium/drivers/iris/iris_fence.c @@ -196,11 +196,13 @@ iris_fence_flush(struct pipe_context *ctx, pipe_reference_init(&fence->ref, 1); for (unsigned b = 0; b < IRIS_BATCH_COUNT; b++) { - if (!iris_wait_syncobj(ctx->screen, ice->batches[b].last_syncobj, 0)) + struct iris_batch *batch = &ice->batches[b]; + + if (!iris_wait_syncobj(ctx->screen, batch->last_seqno->syncobj, 0)) continue; iris_syncobj_reference(screen, &fence->syncobj[fence->count++], - ice->batches[b].last_syncobj); + batch->last_seqno->syncobj); } iris_fence_reference(ctx->screen, out_fence, NULL); diff --git a/src/gallium/drivers/iris/iris_seqno.c b/src/gallium/drivers/iris/iris_seqno.c new file mode 100644 index 00000000000..99f57fdf892 --- /dev/null +++ b/src/gallium/drivers/iris/iris_seqno.c @@ -0,0 +1,76 @@ +#include "iris_context.h" +#include "iris_seqno.h" +#include "util/u_upload_mgr.h" + +static void +iris_seqno_reset(struct iris_batch *batch) +{ + u_upload_alloc(batch->seqno.uploader, 0, sizeof(uint64_t), sizeof(uint64_t), + &batch->seqno.ref.offset, &batch->seqno.ref.res, + (void **)&batch->seqno.map); + WRITE_ONCE(*batch->seqno.map, 0); + batch->seqno.next++; +} + +void +iris_seqno_init(struct iris_batch *batch) +{ + batch->seqno.ref.res = NULL; + batch->seqno.next = 0; + iris_seqno_reset(batch); +} + +static uint32_t +iris_seqno_next(struct iris_batch *batch) +{ + uint32_t seqno = batch->seqno.next++; + + if (batch->seqno.next == 0) + iris_seqno_reset(batch); + + return seqno; +} + +void +iris_seqno_destroy(struct iris_screen *screen, struct iris_seqno *sq) +{ + iris_syncobj_reference(screen, &sq->syncobj, NULL); + pipe_resource_reference(&sq->ref.res, NULL); + free(sq); +} + +struct iris_seqno * +iris_seqno_new(struct iris_batch *batch, unsigned flags) +{ + struct iris_seqno *sq = calloc(1, sizeof(*sq)); + if (!sq) + return NULL; + + pipe_reference_init(&sq->reference, 1); + + sq->seqno = iris_seqno_next(batch); + + iris_syncobj_reference(batch->screen, &sq->syncobj, + iris_batch_get_signal_syncobj(batch)); + + pipe_resource_reference(&sq->ref.res, batch->seqno.ref.res); + sq->ref.offset = batch->seqno.ref.offset; + sq->map = batch->seqno.map; + sq->flags = flags; + + unsigned pc; + if (flags & IRIS_SEQNO_TOP_OF_PIPE) { + pc = PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_CS_STALL; + } else { + pc = PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DATA_CACHE_FLUSH; + } + iris_emit_pipe_control_write(batch, "fence: seqno", pc, + iris_resource_bo(sq->ref.res), + sq->ref.offset, + sq->seqno); + + return sq; +} diff --git a/src/gallium/drivers/iris/iris_seqno.h b/src/gallium/drivers/iris/iris_seqno.h new file mode 100644 index 00000000000..70d8caf53b2 --- /dev/null +++ b/src/gallium/drivers/iris/iris_seqno.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef IRIS_SEQNO_DOT_H +#define IRIS_SEQNO_DOT_H + +#include +#include + +#include "iris_screen.h" +#include "iris_resource.h" + +/** + * A lightweight sequence number fence. + * + * We emit PIPE_CONTROLs inside a batch (possibly in the middle) + * which update a monotonically increasing, 32-bit counter. We + * can then check if that moment has passed by either: + * + * 1. Checking on the CPU by snooping on the DWord via a coherent map + * + * 2. Blocking on the GPU with MI_SEMAPHORE_WAIT from a second batch + * (relying on mid-batch preemption to switch GPU execution to the + * batch that writes it). + */ +struct iris_seqno { + struct pipe_reference reference; + + /** Buffer where the seqno lives */ + struct iris_state_ref ref; + + /** Coherent CPU map of the buffer containing the seqno DWord. */ + const uint32_t *map; + + /** + * A drm_syncobj pointing which will be signaled at the end of the + * batch which writes this seqno. This can be used to block until + * the seqno has definitely passed (but may wait longer than necessary). + */ + struct iris_syncobj *syncobj; + +#define IRIS_SEQNO_BOTTOM_OF_PIPE 0x0 /**< Written by bottom-of-pipe flush */ +#define IRIS_SEQNO_TOP_OF_PIPE 0x1 /**< Written by top-of-pipe flush */ +#define IRIS_SEQNO_END 0x2 /**< Written at the end of a batch */ + + /** Information about the type of flush involved (see IRIS_SEQNO_*) */ + uint32_t flags; + + /** + * Sequence number expected to be written by the flush we inserted + * when creating this fence. The iris_seqno is 'signaled' when *@map + * (written by the flush on the GPU) is greater-than-or-equal to @seqno. + */ + uint32_t seqno; +}; + +void iris_seqno_init(struct iris_batch *batch); + +struct iris_seqno *iris_seqno_new(struct iris_batch *batch, unsigned flags); + +void iris_seqno_destroy(struct iris_screen *screen, struct iris_seqno *sq); + +static inline void +iris_seqno_reference(struct iris_screen *screen, + struct iris_seqno **dst, + struct iris_seqno *src) +{ + if (pipe_reference(&(*dst)->reference, &src->reference)) + iris_seqno_destroy(screen, *dst); + + *dst = src; +} + +/** + * Return true if this seqno has passed. + * + * NULL is considered signaled. + */ +static inline bool +iris_seqno_signaled(const struct iris_seqno *sq) +{ + return !sq || (READ_ONCE(*sq->map) >= sq->seqno); +} + +#endif diff --git a/src/gallium/drivers/iris/meson.build b/src/gallium/drivers/iris/meson.build index 580391ac10e..0f3bf7ac1ee 100644 --- a/src/gallium/drivers/iris/meson.build +++ b/src/gallium/drivers/iris/meson.build @@ -50,6 +50,8 @@ files_libiris = files( 'iris_resource.h', 'iris_screen.c', 'iris_screen.h', + 'iris_seqno.c', + 'iris_seqno.h', 'iris_disk_cache.c', ) -- 2.30.2