From: Kenneth Graunke Date: Sun, 9 Sep 2018 02:43:34 +0000 (-0700) Subject: iris: Support multiple binder BOs, update Surface State Base Address X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=eff081cdd9524f787339c13433b6b7758be474df;p=mesa.git iris: Support multiple binder BOs, update Surface State Base Address --- diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 2271513f6c9..b35466d69f1 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -38,7 +38,6 @@ */ #include "iris_batch.h" -#include "iris_binder.h" #include "iris_bufmgr.h" #include "iris_context.h" @@ -158,8 +157,6 @@ iris_init_batch(struct iris_batch *batch, batch->validation_list = malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); - batch->binder.bo = NULL; - batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer, @@ -254,9 +251,6 @@ iris_batch_reset(struct iris_batch *batch) create_batch(batch); assert(batch->bo->index == 0); - iris_destroy_binder(&batch->binder); - iris_init_binder(&batch->binder, batch->bo->bufmgr); - if (batch->state_sizes) _mesa_hash_table_clear(batch->state_sizes, NULL); @@ -281,8 +275,6 @@ iris_batch_free(struct iris_batch *batch) _mesa_hash_table_destroy(batch->cache.render, NULL); _mesa_set_destroy(batch->cache.depth, NULL); - iris_destroy_binder(&batch->binder); - if (batch->state_sizes) { _mesa_hash_table_destroy(batch->state_sizes, NULL); gen_batch_decode_ctx_finish(&batch->decoder); @@ -432,18 +424,16 @@ _iris_batch_flush_fence(struct iris_batch *batch, if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { int bytes_for_commands = iris_batch_bytes_used(batch); - int bytes_for_binder = batch->binder.insert_point; int second_bytes = 0; if (batch->bo != batch->exec_bos[0]) { second_bytes = bytes_for_commands; bytes_for_commands += batch->primary_batch_size; } fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5d+%5db (%0.1f%%) " - "(cmds), %5db (%0.1f%%) (binder), %4d BOs (%0.1fMb aperture)\n", + "(cmds), %4d BOs (%0.1fMb aperture)\n", file, line, batch->primary_batch_size, second_bytes, 100.0f * bytes_for_commands / BATCH_SZ, - bytes_for_binder, 100.0f * bytes_for_binder / IRIS_BINDER_SIZE, batch->exec_count, (float) batch->aperture_space / (1024 * 1024)); dump_validation_list(batch); diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 7d446817d3d..8ff3f60fa9d 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -29,7 +29,6 @@ #include #include "i915_drm.h" #include "common/gen_decoder.h" -#include "iris_binder.h" /* The kernel assumes batchbuffers are smaller than 256kB. */ #define MAX_BATCH_SIZE (256 * 1024) @@ -58,6 +57,9 @@ struct iris_batch { /** Last BO submitted to the hardware. Used for glFinish(). */ struct iris_bo *last_bo; + /** Last Surface State Base Address set in this hardware context. */ + uint64_t last_surface_base_address; + uint32_t hw_ctx_id; /** Which engine this batch targets - a I915_EXEC_RING_MASK value */ @@ -72,9 +74,6 @@ struct iris_batch { /** The amount of aperture space (in bytes) used by all exec_bos */ int aperture_space; - /** Binder (containing binding tables) */ - struct iris_binder binder; - struct { /** * Set of struct brw_bo * that have been rendered to within this diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c index cba84f5fa53..2cac1b71256 100644 --- a/src/gallium/drivers/iris/iris_binder.c +++ b/src/gallium/drivers/iris/iris_binder.c @@ -49,6 +49,8 @@ * and cycling back around where possible to avoid replacing it at all costs. * * XXX: if we do have to flush, we should emit a performance warning. + * + * XXX: these comments are out of date */ #include @@ -62,98 +64,131 @@ /* Avoid using offset 0, tools consider it NULL */ #define INIT_INSERT_POINT BTP_ALIGNMENT -/** - * Reserve a block of space in the binder, given the raw size in bytes. - */ -uint32_t -iris_binder_reserve(struct iris_batch *batch, unsigned size) +static bool +binder_has_space(struct iris_binder *binder, unsigned size) +{ + return binder->insert_point + size <= IRIS_BINDER_SIZE; +} + +static void +binder_realloc(struct iris_context *ice) { - struct iris_binder *binder = &batch->binder; + struct iris_screen *screen = (void *) ice->ctx.screen; + struct iris_bufmgr *bufmgr = screen->bufmgr; + struct iris_binder *binder = &ice->state.binder; - assert(size > 0); - assert((binder->insert_point % BTP_ALIGNMENT) == 0); + iris_bo_unreference(binder->bo); + + binder->bo = + iris_bo_alloc(bufmgr, "binder", IRIS_BINDER_SIZE, IRIS_MEMZONE_BINDER); + binder->map = iris_bo_map(NULL, binder->bo, MAP_WRITE); + binder->insert_point = INIT_INSERT_POINT; - /* If we can't fit all stages in the binder, flush the batch which - * will cause us to gain a new empty binder. + /* Allocating a new binder requires changing Surface State Base Address, + * which also invalidates all our previous binding tables - each entry + * in those tables is an offset from the old base. + * + * We do this here so that iris_binder_reserve_3d correctly gets a new + * larger total_size when making the updated reservation. */ - if (binder->insert_point + size > IRIS_BINDER_SIZE) - iris_batch_flush(batch); + ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; +} +static uint32_t +binder_insert(struct iris_binder *binder, unsigned size) +{ uint32_t offset = binder->insert_point; - /* It had better fit now. */ - assert(offset + size <= IRIS_BINDER_SIZE); - binder->insert_point = align(binder->insert_point + size, BTP_ALIGNMENT); - iris_use_pinned_bo(batch, binder->bo, false); - return offset; } +/** + * Reserve a block of space in the binder, given the raw size in bytes. + */ +uint32_t +iris_binder_reserve(struct iris_context *ice, + unsigned size) +{ + struct iris_binder *binder = &ice->state.binder; + + if (!binder_has_space(binder, size)) + binder_realloc(ice); + + assert(size > 0); + return binder_insert(binder, size); +} + /** * Reserve and record binder space for 3D pipeline shader stages. * * Note that you must actually populate the new binding tables after * calling this command - the new area is uninitialized. */ -bool -iris_binder_reserve_3d(struct iris_batch *batch, - struct iris_context *ice) +void +iris_binder_reserve_3d(struct iris_context *ice) { struct iris_compiled_shader **shaders = ice->shaders.prog; - struct iris_binder *binder = &batch->binder; - unsigned total_size = 0; + struct iris_binder *binder = &ice->state.binder; unsigned sizes[MESA_SHADER_STAGES] = {}; + unsigned total_size; - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - if (!(ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage))) - continue; + /* If nothing is dirty, skip all this. */ + if (!(ice->state.dirty & IRIS_ALL_DIRTY_BINDINGS)) + return; + /* Get the binding table sizes for each stage */ + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!shaders[stage]) continue; const struct brw_stage_prog_data *prog_data = (const void *) shaders[stage]->prog_data; + /* Round up the size so our next table has an aligned starting offset */ sizes[stage] = align(prog_data->binding_table.size_bytes, BTP_ALIGNMENT); - total_size += sizes[stage]; } - if (total_size == 0) - return false; + /* Make space for the new binding tables...this may take two tries. */ + while (true) { + total_size = 0; + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) + total_size += sizes[stage]; + } - uint32_t offset = iris_binder_reserve(batch, total_size); - bool flushed = offset == INIT_INSERT_POINT; + assert(total_size < IRIS_BINDER_SIZE); - /* Assign space and record the current binding table. */ - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - if (!(ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage))) - continue; + if (total_size == 0) + return; + + if (binder_has_space(binder, total_size)) + break; - binder->bt_offset[stage] = sizes[stage] > 0 ? offset : 0; - offset += sizes[stage]; + /* It didn't fit. Allocate a new buffer and try again. Note that + * this will flag all bindings dirty, which may increase total_size + * on the next iteration. + */ + binder_realloc(ice); } - return flushed; -} + /* Assign space and record the new binding table offsets. */ + uint32_t offset = binder_insert(binder, total_size); -void -iris_init_binder(struct iris_binder *binder, struct iris_bufmgr *bufmgr) -{ - binder->bo = - iris_bo_alloc(bufmgr, "binder", IRIS_BINDER_SIZE, IRIS_MEMZONE_BINDER); - binder->map = iris_bo_map(NULL, binder->bo, MAP_WRITE); - binder->insert_point = INIT_INSERT_POINT; + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { + binder->bt_offset[stage] = sizes[stage] > 0 ? offset : 0; + offset += sizes[stage]; + } + } } -/** - * Is the binder empty? (If so, old binding table pointers are stale.) - */ -bool -iris_binder_is_empty(struct iris_binder *binder) +void +iris_init_binder(struct iris_context *ice) { - return binder->insert_point <= INIT_INSERT_POINT; + memset(&ice->state.binder, 0, sizeof(struct iris_binder)); + binder_realloc(ice); } void diff --git a/src/gallium/drivers/iris/iris_binder.h b/src/gallium/drivers/iris/iris_binder.h index bd1e17ae4c4..e63170e298f 100644 --- a/src/gallium/drivers/iris/iris_binder.h +++ b/src/gallium/drivers/iris/iris_binder.h @@ -49,11 +49,9 @@ struct iris_binder uint32_t bt_offset[MESA_SHADER_STAGES]; }; -void iris_init_binder(struct iris_binder *binder, struct iris_bufmgr *bufmgr); -bool iris_binder_is_empty(struct iris_binder *binder); +void iris_init_binder(struct iris_context *ice); void iris_destroy_binder(struct iris_binder *binder); -uint32_t iris_binder_reserve(struct iris_batch *batch, unsigned size); -bool iris_binder_reserve_3d(struct iris_batch *batch, - struct iris_context *ice); +uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size); +void iris_binder_reserve_3d(struct iris_context *ice); #endif diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 3ff48ed00f5..e7718eab7eb 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -120,7 +120,7 @@ blorp_get_surface_address(struct blorp_batch *blorp_batch, UNUSED static struct blorp_address blorp_get_surface_base_address(UNUSED struct blorp_batch *blorp_batch) { - return (struct blorp_address) { .offset = IRIS_MEMZONE_SURFACE_START }; + return (struct blorp_address) { .offset = IRIS_MEMZONE_BINDER_START }; } static void * @@ -146,17 +146,22 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch, void **surface_maps) { struct iris_context *ice = blorp_batch->blorp->driver_ctx; + struct iris_binder *binder = &ice->state.binder; struct iris_batch *batch = blorp_batch->driver_batch; - *bt_offset = iris_binder_reserve(batch, num_entries * sizeof(uint32_t)); - uint32_t *bt_map = batch->binder.map + *bt_offset; + *bt_offset = iris_binder_reserve(ice, num_entries * sizeof(uint32_t)); + uint32_t *bt_map = binder->map + *bt_offset; for (unsigned i = 0; i < num_entries; i++) { surface_maps[i] = stream_state(batch, ice->state.surface_uploader, state_size, state_alignment, &surface_offsets[i], NULL); - bt_map[i] = surface_offsets[i]; + bt_map[i] = surface_offsets[i] - (uint32_t) binder->bo->gtt_offset; } + + iris_use_pinned_bo(batch, binder->bo, false); + + ice->vtbl.update_surface_base_address(batch, binder); } static void * diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c index 058ae15ab81..50e7d4f715d 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.c +++ b/src/gallium/drivers/iris/iris_bufmgr.c @@ -244,10 +244,10 @@ bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size) static enum iris_memory_zone memzone_for_address(uint64_t address) { - STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START); + STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START); STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START); - STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SHADER_START); - STATIC_ASSERT(IRIS_BINDER_ADDRESS == IRIS_MEMZONE_SURFACE_START); + STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDER_START); + STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START); STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START); if (address >= IRIS_MEMZONE_OTHER_START) @@ -259,7 +259,7 @@ memzone_for_address(uint64_t address) if (address > IRIS_MEMZONE_DYNAMIC_START) return IRIS_MEMZONE_DYNAMIC; - if (address == IRIS_BINDER_ADDRESS) + if (address > IRIS_MEMZONE_BINDER_START) return IRIS_MEMZONE_BINDER; if (address > IRIS_MEMZONE_SURFACE_START) @@ -365,8 +365,14 @@ bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address) } static struct bo_cache_bucket * -get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size) +get_bucket_allocator(struct iris_bufmgr *bufmgr, + enum iris_memory_zone memzone, + uint64_t size) { + /* Bucketing is not worth using for binders...we'll never have 64... */ + if (memzone == IRIS_MEMZONE_BINDER) + return NULL; + /* Skip using the bucket allocator for very large sizes, as it allocates * 64 of them and this can balloon rather quickly. */ @@ -393,12 +399,11 @@ vma_alloc(struct iris_bufmgr *bufmgr, uint64_t size, uint64_t alignment) { - if (memzone == IRIS_MEMZONE_BINDER) - return IRIS_BINDER_ADDRESS; - else if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL) + if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL) return IRIS_BORDER_COLOR_POOL_ADDRESS; - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); + struct bo_cache_bucket *bucket = + get_bucket_allocator(bufmgr, memzone, size); uint64_t addr; if (bucket) { @@ -419,8 +424,7 @@ vma_free(struct iris_bufmgr *bufmgr, uint64_t address, uint64_t size) { - if (address == IRIS_BINDER_ADDRESS || - address == IRIS_BORDER_COLOR_POOL_ADDRESS) + if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) return; /* Un-canonicalize the address. */ @@ -429,12 +433,13 @@ vma_free(struct iris_bufmgr *bufmgr, if (address == 0ull) return; - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); + enum iris_memory_zone memzone = memzone_for_address(address); + struct bo_cache_bucket *bucket = + get_bucket_allocator(bufmgr, memzone, size); if (bucket) { bucket_vma_free(bucket, address); } else { - enum iris_memory_zone memzone = memzone_for_address(address); util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); } } @@ -1599,9 +1604,12 @@ iris_bufmgr_init(struct gen_device_info *devinfo, int fd) util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER], PAGE_SIZE, _4GB); + util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDER], + IRIS_MEMZONE_BINDER_START, + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE], - IRIS_MEMZONE_SURFACE_START + IRIS_BINDER_SIZE, - _4GB - IRIS_BINDER_SIZE); + IRIS_MEMZONE_SURFACE_START, + _4GB - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC], IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE, _4GB - IRIS_BORDER_COLOR_POOL_SIZE); diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h index 8be545cb04b..9210f44c944 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.h +++ b/src/gallium/drivers/iris/iris_bufmgr.h @@ -48,14 +48,11 @@ struct pipe_debug_callback; * * We lay out the virtual address space as follows: * - * - [0, 4K): Nothing (empty page for null address) - * - [4K, 4G): Shaders (Instruction Base Address) - * - [4G, 8G): Surfaces (Surface State Base Address, Bindless ...) - * - [8G, 12G): Dynamic (Dynamic State Base Address) - * - [12G, *): Other (everything else in the full 48-bit VMA) - * - * A special 64kB "binder" buffer lives at the start of the surface memory - * zone, holding binding tables referring to objects in the rest of the zone. + * - [0, 4K): Nothing (empty page for null address) + * - [4K, 4G): Shaders (Instruction Base Address) + * - [4G, 8G): Surfaces & Binders (Surface State Base Address, Bindless ...) + * - [8G, 12G): Dynamic (Dynamic State Base Address) + * - [12G, *): Other (everything else in the full 48-bit VMA) * * A special buffer for border color lives at the start of the dynamic state * memory zone. This unfortunately has to be handled specially because the @@ -65,32 +62,29 @@ struct pipe_debug_callback; * each a separate VMA. However, we assign address globally, so buffers will * have the same address in all GEM contexts. This lets us have a single BO * field for the address, which is easy and cheap. - * - * One exception is the special "binder" BO. Binders are context-local, - * so while there are many of them, all binders are stored at the same - * fixed address (in different VMAs). */ enum iris_memory_zone { IRIS_MEMZONE_SHADER, + IRIS_MEMZONE_BINDER, IRIS_MEMZONE_SURFACE, IRIS_MEMZONE_DYNAMIC, IRIS_MEMZONE_OTHER, - IRIS_MEMZONE_BINDER, IRIS_MEMZONE_BORDER_COLOR_POOL, }; /* Intentionally exclude single buffer "zones" */ #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 2) +#define IRIS_BINDER_SIZE (64 * 1024) +#define IRIS_MAX_BINDERS 100 + #define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32)) -#define IRIS_MEMZONE_SURFACE_START (1ull * (1ull << 32)) +#define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32)) +#define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE) #define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32)) #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32)) -#define IRIS_BINDER_ADDRESS IRIS_MEMZONE_SURFACE_START -#define IRIS_BINDER_SIZE (64 * 1024) - #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024) diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index daaa9409d2e..bc637ea0492 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -130,6 +130,7 @@ iris_destroy_context(struct pipe_context *ctx) slab_destroy_child(&ice->transfer_pool); iris_batch_free(&ice->render_batch); + iris_destroy_binder(&ice->state.binder); ralloc_free(ice); } @@ -189,14 +190,15 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) iris_init_program_cache(ice); iris_init_border_color_pool(ice); + iris_init_binder(ice); slab_create_child(&ice->transfer_pool, &screen->transfer_pool); ice->state.surface_uploader = - u_upload_create(&ice->ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, + u_upload_create(ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, IRIS_RESOURCE_FLAG_SURFACE_MEMZONE); ice->state.dynamic_uploader = - u_upload_create(&ice->ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, + u_upload_create(ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE); genX_call(devinfo, init_state, ice); diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index f7411727bb0..a01e0d13eb4 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -30,6 +30,7 @@ #include "intel/common/gen_debug.h" #include "intel/compiler/brw_compiler.h" #include "iris_batch.h" +#include "iris_binder.h" #include "iris_resource.h" #include "iris_screen.h" @@ -109,6 +110,13 @@ struct blorp_params; #define IRIS_DIRTY_VF (1ull << 52) #define IRIS_DIRTY_VF_TOPOLOGY (1ull << 53) +#define IRIS_ALL_DIRTY_BINDINGS (IRIS_DIRTY_BINDINGS_VS | \ + IRIS_DIRTY_BINDINGS_TCS | \ + IRIS_DIRTY_BINDINGS_TES | \ + IRIS_DIRTY_BINDINGS_GS | \ + IRIS_DIRTY_BINDINGS_FS | \ + IRIS_DIRTY_BINDINGS_CS) + /** * Non-orthogonal state (NOS) dependency flags. * @@ -262,6 +270,8 @@ struct iris_vtable { void (*upload_render_state)(struct iris_context *ice, struct iris_batch *batch, const struct pipe_draw_info *draw); + void (*update_surface_base_address)(struct iris_batch *batch, + struct iris_binder *binder); void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm); @@ -382,6 +392,8 @@ struct iris_context { // "I'm streaming this out at draw time and never want it again!" struct u_upload_mgr *dynamic_uploader; + struct iris_binder binder; + struct iris_border_color_pool border_color_pool; /** diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index f6911350a7b..0567bbac72e 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -80,21 +80,9 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) iris_predraw_resolve_inputs(ice, batch); iris_predraw_resolve_framebuffer(ice, batch); - if (iris_binder_is_empty(&batch->binder)) { - ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS | - IRIS_DIRTY_BINDINGS_TCS | - IRIS_DIRTY_BINDINGS_TES | - IRIS_DIRTY_BINDINGS_GS | - IRIS_DIRTY_BINDINGS_FS; - } + iris_binder_reserve_3d(ice); - if (iris_binder_reserve_3d(batch, ice)) { - ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS | - IRIS_DIRTY_BINDINGS_TCS | - IRIS_DIRTY_BINDINGS_TES | - IRIS_DIRTY_BINDINGS_GS | - IRIS_DIRTY_BINDINGS_FS; - } + ice->vtbl.update_surface_base_address(batch, &ice->state.binder); ice->vtbl.upload_render_state(ice, batch, info); ice->state.dirty = 0ull; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 454e05979e4..54bf3fd6023 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -445,6 +445,36 @@ emit_state(struct iris_batch *batch, #define cso_changed_memcmp(x) \ (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) +static void +flush_for_state_base_change(struct iris_batch *batch) +{ + /* Flush before emitting STATE_BASE_ADDRESS. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. We've + * seen issues in Vulkan where we get GPU hangs when using multi-level + * command buffers which clear depth, reset state base address, and then + * go render stuff. + * + * Normally, in GL, we would trust the kernel to do sufficient stalls + * and flushes prior to executing our batch. However, it doesn't seem + * as if the kernel's flushing is always sufficient and we don't want to + * rely on it. + * + * We make this an end-of-pipe sync instead of a normal flush because we + * do not know the current status of the GPU. On Haswell at least, + * having a fast-clear operation in flight at the same time as a normal + * rendering operation can cause hangs. Since the kernel's flushing is + * insufficient, we need to ensure that any rendering operations from + * other processes are definitely complete before we try to do our own + * rendering. It's a bit of a big hammer but it appears to work. + */ + iris_emit_end_of_pipe_sync(batch, + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DATA_CACHE_FLUSH); +} + /** * Upload the initial GPU state for a render context. * @@ -459,18 +489,19 @@ iris_init_render_context(struct iris_screen *screen, { iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER); - /* XXX: PIPE_CONTROLs */ + flush_for_state_base_change(batch); /* We program STATE_BASE_ADDRESS once at context initialization time. * Each base address points at a 4GB memory zone, and never needs to * change. See iris_bufmgr.h for a description of the memory zones. + * + * Except for Surface State Base Address. That one changes. */ iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { #if 0 // XXX: MOCS is stupid for this. sba.GeneralStateMemoryObjectControlState = MOCS_WB; sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB; - sba.SurfaceStateMemoryObjectControlState = MOCS_WB; sba.DynamicStateMemoryObjectControlState = MOCS_WB; sba.IndirectObjectMemoryObjectControlState = MOCS_WB; sba.InstructionMemoryObjectControlState = MOCS_WB; @@ -478,7 +509,6 @@ iris_init_render_context(struct iris_screen *screen, #endif sba.GeneralStateBaseAddressModifyEnable = true; - sba.SurfaceStateBaseAddressModifyEnable = true; sba.DynamicStateBaseAddressModifyEnable = true; sba.IndirectObjectBaseAddressModifyEnable = true; sba.InstructionBaseAddressModifyEnable = true; @@ -489,7 +519,6 @@ iris_init_render_context(struct iris_screen *screen, sba.InstructionBuffersizeModifyEnable = true; sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); - sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START); sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); sba.GeneralStateBufferSize = 0xfffff; @@ -3063,6 +3092,9 @@ use_ssbo(struct iris_batch *batch, struct iris_context *ice, return surf_state->offset; } +#define push_bt_entry(addr) \ + assert(addr >= binder_addr); bt_map[s++] = (addr) - binder_addr; + /** * Populate the binding table for a given shader stage. * @@ -3075,13 +3107,14 @@ iris_populate_binding_table(struct iris_context *ice, struct iris_batch *batch, gl_shader_stage stage) { - const struct iris_binder *binder = &batch->binder; + const struct iris_binder *binder = &ice->state.binder; struct iris_compiled_shader *shader = ice->shaders.prog[stage]; if (!shader) return; const struct shader_info *info = iris_get_shader_info(ice, stage); struct iris_shader_state *shs = &ice->state.shaders[stage]; + uint32_t binder_addr = binder->bo->gtt_offset; //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; uint32_t *bt_map = binder->map + binder->bt_offset[stage]; @@ -3092,13 +3125,14 @@ iris_populate_binding_table(struct iris_context *ice, /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */ if (cso_fb->nr_cbufs) { for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { - if (cso_fb->cbufs[i]) - bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true); - else - bt_map[s++] = use_null_fb_surface(batch, ice); + uint32_t addr = + cso_fb->cbufs[i] ? use_surface(batch, cso_fb->cbufs[i], true) + : use_null_fb_surface(batch, ice); + push_bt_entry(addr); } } else { - bt_map[s++] = use_null_fb_surface(batch, ice); + uint32_t addr = use_null_fb_surface(batch, ice); + push_bt_entry(addr); } } @@ -3107,8 +3141,9 @@ iris_populate_binding_table(struct iris_context *ice, for (int i = 0; i < shs->num_textures; i++) { struct iris_sampler_view *view = shs->textures[i]; - bt_map[s++] = view ? use_sampler_view(batch, view) - : use_null_surface(batch, ice); + uint32_t addr = view ? use_sampler_view(batch, view) + : use_null_surface(batch, ice); + push_bt_entry(addr); } for (int i = 0; i < 1 + info->num_ubos; i++) { @@ -3116,7 +3151,8 @@ iris_populate_binding_table(struct iris_context *ice, if (!cbuf->surface_state.res) break; - bt_map[s++] = use_const_buffer(batch, cbuf); + uint32_t addr = use_const_buffer(batch, cbuf); + push_bt_entry(addr); } /* XXX: st is wasting 16 binding table slots for ABOs. Should add a cap @@ -3126,7 +3162,8 @@ iris_populate_binding_table(struct iris_context *ice, */ if (info->num_abos + info->num_ssbos > 0) { for (int i = 0; i < IRIS_MAX_ABOS + info->num_ssbos; i++) { - bt_map[s++] = use_ssbo(batch, ice, shs, i); + uint32_t addr = use_ssbo(batch, ice, shs, i); + push_bt_entry(addr); } } @@ -3263,6 +3300,27 @@ iris_restore_context_saved_bos(struct iris_context *ice, } } +/** + * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address. + */ +static void +iris_update_surface_base_address(struct iris_batch *batch, + struct iris_binder *binder) +{ + if (batch->last_surface_base_address == binder->bo->gtt_offset) + return; + + flush_for_state_base_change(batch); + + iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { + // XXX: sba.SurfaceStateMemoryObjectControlState = MOCS_WB; + sba.SurfaceStateBaseAddressModifyEnable = true; + sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0); + } + + batch->last_surface_base_address = binder->bo->gtt_offset; +} + static void iris_upload_dirty_render_state(struct iris_context *ice, struct iris_batch *batch, @@ -3274,6 +3332,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, return; struct iris_genx_state *genx = ice->state.genx; + struct iris_binder *binder = &ice->state.binder; struct brw_wm_prog_data *wm_prog_data = (void *) ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; @@ -3426,7 +3485,12 @@ iris_upload_dirty_render_state(struct iris_context *ice, } } - struct iris_binder *binder = &batch->binder; + /* Always pin the binder. If we're emitting new binding table pointers, + * we need it. If not, we're probably inheriting old tables via the + * context, and need it anyway. Since true zero-bindings cases are + * practically non-existent, just pin it and avoid last_res tracking. + */ + iris_use_pinned_bo(batch, binder->bo, false); for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { @@ -4309,6 +4373,7 @@ genX(init_state)(struct iris_context *ice) ice->vtbl.destroy_state = iris_destroy_state; ice->vtbl.init_render_context = iris_init_render_context; ice->vtbl.upload_render_state = iris_upload_render_state; + ice->vtbl.update_surface_base_address = iris_update_surface_base_address; ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; ice->vtbl.derived_program_state_size = iris_derived_program_state_size; ice->vtbl.store_derived_program_state = iris_store_derived_program_state;