From 60d708bb80f0b587b99f87aac4c9faa9ee9c760b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 20 Apr 2018 23:28:03 -0700 Subject: [PATCH] iris: copy over i965's cache tracking needed to split out vtbl so I can pipe control without ice --- src/gallium/drivers/iris/iris_batch.c | 16 +- src/gallium/drivers/iris/iris_batch.h | 18 +++ src/gallium/drivers/iris/iris_context.c | 3 +- src/gallium/drivers/iris/iris_context.h | 77 +++++---- src/gallium/drivers/iris/iris_draw.c | 2 +- src/gallium/drivers/iris/iris_pipe_control.c | 147 ++++++++++++++++-- src/gallium/drivers/iris/iris_program.c | 6 +- src/gallium/drivers/iris/iris_program_cache.c | 4 +- src/gallium/drivers/iris/iris_state.c | 25 +-- 9 files changed, 233 insertions(+), 65 deletions(-) diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 7d2279981bb..8acd968911f 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -29,6 +29,7 @@ #include "drm-uapi/i915_drm.h" #include "util/hash_table.h" +#include "util/set.h" #include "main/macros.h" #include @@ -123,10 +124,12 @@ create_batch_buffer(struct iris_bufmgr *bufmgr, void iris_init_batch(struct iris_batch *batch, struct iris_screen *screen, + struct iris_vtable *vtbl, struct pipe_debug_callback *dbg, uint8_t ring) { batch->screen = screen; + batch->vtbl = vtbl; batch->dbg = dbg; /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */ @@ -141,6 +144,10 @@ iris_init_batch(struct iris_batch *batch, batch->validation_list = malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); + batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); if (unlikely(INTEL_DEBUG)) { batch->state_sizes = _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare); @@ -223,10 +230,10 @@ iris_batch_reset(struct iris_batch *batch) } static void -iris_batch_reset_and_clear_render_cache(struct iris_batch *batch) +iris_batch_reset_and_clear_caches(struct iris_batch *batch) { iris_batch_reset(batch); - // XXX: iris_render_cache_set_clear(batch); + iris_cache_sets_clear(batch); } static void @@ -250,6 +257,9 @@ iris_batch_free(struct iris_batch *batch) iris_bo_unreference(batch->last_cmd_bo); + _mesa_hash_table_destroy(batch->cache.render, NULL); + _mesa_set_destroy(batch->cache.depth, NULL); + if (batch->state_sizes) { _mesa_hash_table_destroy(batch->state_sizes, NULL); gen_batch_decode_ctx_finish(&batch->decoder); @@ -581,7 +591,7 @@ _iris_batch_flush_fence(struct iris_batch *batch, batch->aperture_space = 0; /* Start a new batch buffer. */ - iris_batch_reset_and_clear_render_cache(batch); + iris_batch_reset_and_clear_caches(batch); return 0; } diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 8af1415b649..b58f0836156 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -48,6 +48,7 @@ struct iris_batch_buffer { struct iris_batch { struct iris_screen *screen; + struct iris_vtable *vtbl; struct pipe_debug_callback *dbg; /** Current batchbuffer being queued up. */ @@ -72,6 +73,22 @@ struct iris_batch { /** The amount of aperture space (in bytes) used by all exec_bos */ int aperture_space; + struct { + /** + * Set of struct brw_bo * that have been rendered to within this + * batchbuffer and would need flushing before being used from another + * cache domain that isn't coherent with it (i.e. the sampler). + */ + struct hash_table *render; + + /** + * Set of struct brw_bo * that have been used as a depth buffer within + * this batchbuffer and would need flushing before being used from + * another cache domain that isn't coherent with it (i.e. the sampler). + */ + struct set *depth; + } cache; + #if DEBUG /** Map from batch offset to iris_alloc_state data (with DEBUG_BATCH) */ // XXX: unused @@ -82,6 +99,7 @@ struct iris_batch { void iris_init_batch(struct iris_batch *batch, struct iris_screen *screen, + struct iris_vtable *vtbl, struct pipe_debug_callback *dbg, uint8_t ring); void iris_batch_free(struct iris_batch *batch); diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index dbf4759c96b..c6b1c3525fc 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -147,7 +147,8 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE); genX_call(devinfo, init_state, ice); - ice->state.init_render_context(screen, &ice->render_batch, &ice->dbg); + ice->vtbl.init_render_context(screen, &ice->render_batch, &ice->vtbl, + &ice->dbg); return ctx; } diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index cc79f72b767..185687c6564 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -33,6 +33,7 @@ #include "iris_screen.h" struct iris_bo; +struct iris_context; #define IRIS_RESOURCE_FLAG_SHADER_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) #define IRIS_RESOURCE_FLAG_SURFACE_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) @@ -195,11 +196,41 @@ struct iris_shader_state { unsigned const_size; }; +struct iris_vtable { + void (*destroy_state)(struct iris_context *ice); + void (*init_render_context)(struct iris_screen *screen, + struct iris_batch *batch, + struct iris_vtable *vtbl, + struct pipe_debug_callback *dbg); + void (*upload_render_state)(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw); + void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags, + struct iris_bo *bo, uint32_t offset, + uint64_t imm); + unsigned (*derived_program_state_size)(enum iris_program_cache_id id); + void (*set_derived_program_state)(const struct gen_device_info *devinfo, + enum iris_program_cache_id cache_id, + struct iris_compiled_shader *shader); + void (*populate_vs_key)(const struct iris_context *ice, + struct brw_vs_prog_key *key); + void (*populate_tcs_key)(const struct iris_context *ice, + struct brw_tcs_prog_key *key); + void (*populate_tes_key)(const struct iris_context *ice, + struct brw_tes_prog_key *key); + void (*populate_gs_key)(const struct iris_context *ice, + struct brw_gs_prog_key *key); + void (*populate_fs_key)(const struct iris_context *ice, + struct brw_wm_prog_key *key); +}; + struct iris_context { struct pipe_context ctx; struct pipe_debug_callback dbg; + struct iris_vtable vtbl; + struct { struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES]; struct iris_compiled_shader *prog[MESA_SHADER_STAGES]; @@ -247,30 +278,6 @@ struct iris_context { // "I'm streaming this out at draw time and never want it again!" struct u_upload_mgr *dynamic_uploader; - void (*destroy_state)(struct iris_context *ice); - void (*init_render_context)(struct iris_screen *screen, - struct iris_batch *batch, - struct pipe_debug_callback *dbg); - void (*upload_render_state)(struct iris_context *ice, - struct iris_batch *batch, - const struct pipe_draw_info *draw); - void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags, - struct iris_bo *bo, uint32_t offset, - uint64_t imm); - unsigned (*derived_program_state_size)(enum iris_program_cache_id id); - void (*set_derived_program_state)(const struct gen_device_info *devinfo, - enum iris_program_cache_id cache_id, - struct iris_compiled_shader *shader); - void (*populate_vs_key)(const struct iris_context *ice, - struct brw_vs_prog_key *key); - void (*populate_tcs_key)(const struct iris_context *ice, - struct brw_tcs_prog_key *key); - void (*populate_tes_key)(const struct iris_context *ice, - struct brw_tes_prog_key *key); - void (*populate_gs_key)(const struct iris_context *ice, - struct brw_gs_prog_key *key); - void (*populate_fs_key)(const struct iris_context *ice, - struct brw_wm_prog_key *key); } state; }; @@ -299,17 +306,27 @@ void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); /* iris_pipe_control.c */ -void iris_emit_pipe_control_flush(struct iris_context *ice, - struct iris_batch *batch, +void iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags); -void iris_emit_pipe_control_write(struct iris_context *ice, - struct iris_batch *batch, uint32_t flags, +void iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm); -void iris_emit_end_of_pipe_sync(struct iris_context *ice, - struct iris_batch *batch, +void iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags); +void iris_cache_sets_clear(struct iris_batch *batch); +void iris_cache_flush_for_read(struct iris_batch *batch, struct iris_bo *bo); +void iris_cache_flush_for_render(struct iris_batch *batch, + struct iris_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage); +void iris_render_cache_add_bo(struct iris_batch *batch, + struct iris_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage); +void iris_cache_flush_for_depth(struct iris_batch *batch, struct iris_bo *bo); +void iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo); + /* iris_state.c */ void gen9_init_state(struct iris_context *ice); diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index 96e05fa5f24..d00419ad8d5 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -37,5 +37,5 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct iris_context *ice = (struct iris_context *) ctx; iris_update_compiled_shaders(ice); - ice->state.upload_render_state(ice, &ice->render_batch, info); + ice->vtbl.upload_render_state(ice, &ice->render_batch, info); } diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c index 66a559d5cc3..a296dcc495a 100644 --- a/src/gallium/drivers/iris/iris_pipe_control.c +++ b/src/gallium/drivers/iris/iris_pipe_control.c @@ -22,6 +22,8 @@ */ #include "iris_context.h" +#include "util/hash_table.h" +#include "util/set.h" /** * Emit a PIPE_CONTROL with various flushing flags. @@ -30,9 +32,7 @@ * given generation. */ void -iris_emit_pipe_control_flush(struct iris_context *ice, - struct iris_batch *batch, - uint32_t flags) +iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags) { if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { @@ -47,12 +47,11 @@ iris_emit_pipe_control_flush(struct iris_context *ice, * with any write cache flush, so this shouldn't be a concern. In order * to ensure a full stall, we do an end-of-pipe sync. */ - iris_emit_end_of_pipe_sync(ice, batch, - flags & PIPE_CONTROL_CACHE_FLUSH_BITS); + iris_emit_end_of_pipe_sync(batch, flags & PIPE_CONTROL_CACHE_FLUSH_BITS); flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); } - ice->state.emit_raw_pipe_control(batch, flags, NULL, 0, 0); + batch->vtbl->emit_raw_pipe_control(batch, flags, NULL, 0, 0); } /** @@ -64,12 +63,11 @@ iris_emit_pipe_control_flush(struct iris_context *ice, * - PIPE_CONTROL_WRITE_DEPTH_COUNT */ void -iris_emit_pipe_control_write(struct iris_context *ice, - struct iris_batch *batch, uint32_t flags, +iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags, struct iris_bo *bo, uint32_t offset, uint64_t imm) { - ice->state.emit_raw_pipe_control(batch, flags, bo, offset, imm); + batch->vtbl->emit_raw_pipe_control(batch, flags, bo, offset, imm); } /* @@ -95,9 +93,7 @@ iris_emit_pipe_control_write(struct iris_context *ice, * Data" in the PIPE_CONTROL command. */ void -iris_emit_end_of_pipe_sync(struct iris_context *ice, - struct iris_batch *batch, - uint32_t flags) +iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags) { /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": * @@ -121,7 +117,132 @@ iris_emit_end_of_pipe_sync(struct iris_context *ice, * Data, Required Write Cache Flush bits set) * - Workload-2 (Can use the data produce or output by Workload-1) */ - iris_emit_pipe_control_write(ice, batch, flags | PIPE_CONTROL_CS_STALL | + iris_emit_pipe_control_write(batch, flags | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE, batch->screen->workaround_bo, 0, 0); } + +void +iris_cache_sets_clear(struct iris_batch *batch) +{ + struct hash_entry *render_entry; + hash_table_foreach(batch->cache.render, render_entry) + _mesa_hash_table_remove(batch->cache.render, render_entry); + + struct set_entry *depth_entry; + set_foreach(batch->cache.depth, depth_entry) + _mesa_set_remove(batch->cache.depth, depth_entry); +} + +/** + * Emits an appropriate flush for a BO if it has been rendered to within the + * same batchbuffer as a read that's about to be emitted. + * + * The GPU has separate, incoherent caches for the render cache and the + * sampler cache, along with other caches. Usually data in the different + * caches don't interact (e.g. we don't render to our driver-generated + * immediate constant data), but for render-to-texture in FBOs we definitely + * do. When a batchbuffer is flushed, the kernel will ensure that everything + * necessary is flushed before another use of that BO, but for reuse from + * different caches within a batchbuffer, it's all our responsibility. + */ +static void +flush_depth_and_render_caches(struct iris_batch *batch, struct iris_bo *bo) +{ + iris_emit_pipe_control_flush(batch, + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_CS_STALL); + + iris_emit_pipe_control_flush(batch, + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE); + + iris_cache_sets_clear(batch); +} + +void +iris_cache_flush_for_read(struct iris_batch *batch, + struct iris_bo *bo) +{ + if (_mesa_hash_table_search(batch->cache.render, bo) || + _mesa_set_search(batch->cache.depth, bo)) + flush_depth_and_render_caches(batch, bo); +} + +static void * +format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) +{ + return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); +} + +void +iris_cache_flush_for_render(struct iris_batch *batch, + struct iris_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage) +{ + if (_mesa_set_search(batch->cache.depth, bo)) + flush_depth_and_render_caches(batch, bo); + + /* Check to see if this bo has been used by a previous rendering operation + * but with a different format or aux usage. If it has, flush the render + * cache so we ensure that it's only in there with one format or aux usage + * at a time. + * + * Even though it's not obvious, this can easily happen in practice. + * Suppose a client is blending on a surface with sRGB encode enabled on + * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client + * then disables sRGB decode and continues blending we will flip on + * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is + * perfectly valid since CCS_E is a subset of CCS_D). However, this means + * that we have fragments in-flight which are rendering with UNORM+CCS_E + * and other fragments in-flight with SRGB+CCS_D on the same surface at the + * same time and the pixel scoreboard and color blender are trying to sort + * it all out. This ends badly (i.e. GPU hangs). + * + * To date, we have never observed GPU hangs or even corruption to be + * associated with switching the format, only the aux usage. However, + * there are comments in various docs which indicate that the render cache + * isn't 100% resilient to format changes. We may as well be conservative + * and flush on format changes too. We can always relax this later if we + * find it to be a performance problem. + */ + struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo); + if (entry && entry->data != format_aux_tuple(format, aux_usage)) + flush_depth_and_render_caches(batch, bo); +} + +void +iris_render_cache_add_bo(struct iris_batch *batch, + struct iris_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage) +{ +#ifndef NDEBUG + struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo); + if (entry) { + /* Otherwise, someone didn't do a flush_for_render and that would be + * very bad indeed. + */ + assert(entry->data == format_aux_tuple(format, aux_usage)); + } +#endif + + _mesa_hash_table_insert(batch->cache.render, bo, + format_aux_tuple(format, aux_usage)); +} + +void +iris_cache_flush_for_depth(struct iris_batch *batch, + struct iris_bo *bo) +{ + if (_mesa_hash_table_search(batch->cache.render, bo)) + flush_depth_and_render_caches(batch, bo); +} + +void +iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo) +{ + _mesa_set_add(batch->cache.depth, bo); +} diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 3263723f11f..4d6853eae32 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -278,7 +278,7 @@ static void iris_update_compiled_vs(struct iris_context *ice) { struct brw_vs_prog_key key; - ice->state.populate_vs_key(ice, &key); + ice->vtbl.populate_vs_key(ice, &key); if (iris_bind_cached_shader(ice, IRIS_CACHE_VS, &key)) return; @@ -344,7 +344,7 @@ iris_update_compiled_tes(struct iris_context *ice) return; struct brw_tes_prog_key key; - ice->state.populate_tes_key(ice, &key); + ice->vtbl.populate_tes_key(ice, &key); if (iris_bind_cached_shader(ice, IRIS_CACHE_TES, &key)) return; @@ -404,7 +404,7 @@ static void iris_update_compiled_fs(struct iris_context *ice) { struct brw_wm_prog_key key; - ice->state.populate_fs_key(ice, &key); + ice->vtbl.populate_fs_key(ice, &key); if (iris_bind_cached_shader(ice, IRIS_CACHE_FS, &key)) return; diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index b10f62dd41e..e1fc21a08c1 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -193,7 +193,7 @@ iris_upload_and_bind_shader(struct iris_context *ice, struct hash_table *cache = ice->shaders.cache; struct iris_compiled_shader *shader = ralloc_size(cache, sizeof(struct iris_compiled_shader) + - ice->state.derived_program_state_size(cache_id)); + ice->vtbl.derived_program_state_size(cache_id)); const struct iris_compiled_shader *existing = find_existing_assembly(cache, assembly, prog_data->program_size); @@ -219,7 +219,7 @@ iris_upload_and_bind_shader(struct iris_context *ice, ralloc_steal(shader->prog_data, prog_data->pull_param); /* Store the 3DSTATE shader packets and other derived state. */ - ice->state.set_derived_program_state(devinfo, cache_id, shader); + ice->vtbl.set_derived_program_state(devinfo, cache_id, shader); struct keybox *keybox = make_keybox(cache, cache_id, key); _mesa_hash_table_insert(ice->shaders.cache, keybox, shader); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 38c5f9c03f3..2e10b3970ef 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -332,9 +332,10 @@ emit_state(struct iris_batch *batch, static void iris_init_render_context(struct iris_screen *screen, struct iris_batch *batch, + struct iris_vtable *vtbl, struct pipe_debug_callback *dbg) { - iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER); + iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER); /* XXX: PIPE_CONTROLs */ @@ -2912,17 +2913,17 @@ genX(init_state)(struct iris_context *ice) ctx->stream_output_target_destroy = iris_stream_output_target_destroy; ctx->set_stream_output_targets = iris_set_stream_output_targets; - ice->state.destroy_state = iris_destroy_state; - ice->state.init_render_context = iris_init_render_context; - ice->state.upload_render_state = iris_upload_render_state; - ice->state.emit_raw_pipe_control = iris_emit_raw_pipe_control; - ice->state.derived_program_state_size = iris_derived_program_state_size; - ice->state.set_derived_program_state = iris_set_derived_program_state; - ice->state.populate_vs_key = iris_populate_vs_key; - ice->state.populate_tcs_key = iris_populate_tcs_key; - ice->state.populate_tes_key = iris_populate_tes_key; - ice->state.populate_gs_key = iris_populate_gs_key; - ice->state.populate_fs_key = iris_populate_fs_key; + ice->vtbl.destroy_state = iris_destroy_state; + ice->vtbl.init_render_context = iris_init_render_context; + ice->vtbl.upload_render_state = iris_upload_render_state; + ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + ice->vtbl.derived_program_state_size = iris_derived_program_state_size; + ice->vtbl.set_derived_program_state = iris_set_derived_program_state; + ice->vtbl.populate_vs_key = iris_populate_vs_key; + ice->vtbl.populate_tcs_key = iris_populate_tcs_key; + ice->vtbl.populate_tes_key = iris_populate_tes_key; + ice->vtbl.populate_gs_key = iris_populate_gs_key; + ice->vtbl.populate_fs_key = iris_populate_fs_key; ice->state.dirty = ~0ull; } -- 2.30.2