From: Kenneth Graunke Date: Tue, 27 Nov 2018 23:30:16 +0000 (-0800) Subject: iris: fix conditional compute, don't stomp predicate for pipelined queries X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=761a5fb36ac86516ffc16f2af7f7b07250756e43;p=mesa.git iris: fix conditional compute, don't stomp predicate for pipelined queries --- diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c index b8562d08945..10b326e1884 100644 --- a/src/gallium/drivers/iris/iris_blit.c +++ b/src/gallium/drivers/iris/iris_blit.c @@ -257,10 +257,10 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) enum blorp_batch_flags blorp_flags = 0; if (info->render_condition_enable) { - if (ice->predicate == IRIS_PREDICATE_STATE_DONT_RENDER) + if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER) return; - if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT) + if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE; } diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index 493ebaa9ad7..be458a8dcc1 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -53,11 +53,11 @@ iris_clear(struct pipe_context *ctx, struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - if (ice->predicate == IRIS_PREDICATE_STATE_DONT_RENDER) + if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER) return; enum blorp_batch_flags blorp_flags = 0; - if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT) + if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE; iris_batch_maybe_flush(batch, 1500); diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 91c0c3afcc5..601dabb4cef 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -430,7 +430,6 @@ struct iris_context { struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES]; } shaders; - enum iris_predicate_state predicate; struct { uint64_t dirty; uint64_t dirty_for_nos[IRIS_NOS_COUNT]; @@ -483,6 +482,15 @@ struct iris_context { bool statistics_counters_enabled; + /** Current conditional rendering mode */ + enum iris_predicate_state predicate; + + /** + * Query BO with a MI_PREDICATE_DATA snapshot calculated on the + * render context that needs to be uploaded to the compute context. + */ + struct iris_bo *compute_predicate; + /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */ bool prims_generated_query_active; diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index a5b4c63d54f..4b8205a2bbe 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -37,6 +37,7 @@ #include "util/u_upload_mgr.h" #include "intel/compiler/brw_compiler.h" #include "iris_context.h" +#include "iris_defines.h" /** * Record the current primitive mode and restart information, flagging @@ -70,6 +71,9 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct iris_context *ice = (struct iris_context *) ctx; struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER) + return; + if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) ice->state.dirty |= ~0ull; @@ -148,6 +152,9 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid) struct iris_context *ice = (struct iris_context *) ctx; struct iris_batch *batch = &ice->batches[IRIS_BATCH_COMPUTE]; + if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER) + return; + if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) ice->state.dirty |= ~0ull; @@ -164,6 +171,13 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid) iris_binder_reserve_compute(ice); ice->vtbl.update_surface_base_address(batch, &ice->state.binder); + + if (ice->state.compute_predicate) { + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_DATA, + ice->state.compute_predicate, 0); + ice->state.compute_predicate = NULL; + } + ice->vtbl.upload_compute_state(ice, batch, grid); ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_COMPUTE; diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index 61b0e27a0cb..68bdb4b1ff1 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -94,6 +94,8 @@ struct iris_query { bool ready; + bool stalled; + uint64_t result; struct iris_bo *bo; @@ -103,13 +105,21 @@ struct iris_query { }; struct iris_query_snapshots { + /** iris_render_condition's saved MI_PREDICATE_DATA value. */ + uint64_t predicate_data; + + /** Have the start/end snapshots landed? */ uint64_t snapshots_landed; + + /** Starting and ending counter snapshots */ uint64_t start; uint64_t end; }; struct iris_query_so_overflow { + uint64_t predicate_data; uint64_t snapshots_landed; + struct { uint64_t prim_storage_needed[2]; uint64_t num_prims[2]; @@ -179,6 +189,7 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); + q->stalled = true; } switch (q->type) { @@ -694,7 +705,7 @@ iris_get_query_result_resource(struct pipe_context *ctx, /* Calculate the result to CS_GPR0 */ calculate_result_on_gpu(ice, q); - bool predicated = !wait && iris_is_query_pipelined(q); + bool predicated = !wait && !q->stalled; if (predicated) { ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull); @@ -740,102 +751,90 @@ iris_set_active_query_state(struct pipe_context *ctx, boolean enable) } static void -set_predicate_enable(struct iris_context *ice, - bool value) +set_predicate_enable(struct iris_context *ice, bool value) { if (value) - ice->predicate = IRIS_PREDICATE_STATE_RENDER; + ice->state.predicate = IRIS_PREDICATE_STATE_RENDER; else - ice->predicate = IRIS_PREDICATE_STATE_DONT_RENDER; + ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER; } static void -set_predicate_for_overflow(struct iris_context *ice, - struct iris_query *q) +set_predicate_for_result(struct iris_context *ice, + struct iris_query *q, + bool inverted) { struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - ice->predicate = IRIS_PREDICATE_STATE_USE_BIT; - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ - iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE); - - overflow_result_to_gpr0(ice, q); - ice->vtbl.load_register_reg64(batch, CS_GPR(0), MI_PREDICATE_SRC0); - ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull); -} + /* The CPU doesn't have the query result yet; use hardware predication */ + ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT; -static void -set_predicate_for_occlusion(struct iris_context *ice, - struct iris_query *q) -{ - struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - ice->predicate = IRIS_PREDICATE_STATE_USE_BIT; - - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ + /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */ iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE); - - ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo, offsetof(struct iris_query_snapshots, start)); - ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo, offsetof(struct iris_query_snapshots, end)); -} - -static void -set_predicate_for_result(struct iris_context *ice, - struct iris_query *q, - bool condition) -{ - struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - int load_op; + q->stalled = true; switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - set_predicate_for_overflow(ice, q); + overflow_result_to_gpr0(ice, q); + + ice->vtbl.load_register_reg64(batch, CS_GPR(0), MI_PREDICATE_SRC0); + ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull); break; default: - set_predicate_for_occlusion(ice, q); + /* PIPE_QUERY_OCCLUSION_* */ + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo, + offsetof(struct iris_query_snapshots, start)); + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo, + offsetof(struct iris_query_snapshots, end)); break; } - if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT) { - if (condition) - load_op = MI_PREDICATE_LOADOP_LOAD; - else - load_op = MI_PREDICATE_LOADOP_LOADINV; - - // batch emit - uint32_t predicate = MI_PREDICATE | load_op | + uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL; - iris_batch_emit(batch, &predicate, sizeof(uint32_t)); - } + MI_PREDICATE_COMPAREOP_SRCS_EQUAL | + (inverted ? MI_PREDICATE_LOADOP_LOAD + : MI_PREDICATE_LOADOP_LOADINV); + iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); + + /* We immediately set the predicate on the render batch, as all the + * counters come from 3D operations. However, we may need to predicate + * a compute dispatch, which executes in a different GEM context and has + * a different MI_PREDICATE_DATA register. So, we save the result to + * memory and reload it in iris_launch_grid. + */ + unsigned offset = offsetof(struct iris_query_snapshots, predicate_data); + ice->vtbl.store_register_mem64(batch, MI_PREDICATE_DATA, + q->bo, offset, false); + ice->state.compute_predicate = q->bo; } static void iris_render_condition(struct pipe_context *ctx, - struct pipe_query *query, - boolean condition, - enum pipe_render_cond_flag mode) + struct pipe_query *query, + boolean condition, + enum pipe_render_cond_flag mode) { struct iris_context *ice = (void *) ctx; struct iris_query *q = (void *) query; if (!q) { - ice->predicate = IRIS_PREDICATE_STATE_RENDER; + ice->state.predicate = IRIS_PREDICATE_STATE_RENDER; return; } iris_check_query_no_flush(ice, q); - if (q->result || q->ready) + if (q->result || q->ready) { set_predicate_enable(ice, (q->result != 0) ^ condition); - else + } else { + if (mode == PIPE_RENDER_COND_NO_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) { + perf_debug(&ice->dbg, "Conditional rendering demoted from " + "\"no wait\" to \"wait\"."); + } set_predicate_for_result(ice, q, condition); + } } void diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 99073088b00..d241831edf8 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4542,7 +4542,9 @@ iris_upload_render_state(struct iris_context *ice, prim.InstanceCount = draw->instance_count; prim.VertexCountPerInstance = draw->count; prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; - prim.PredicateEnable = ice->predicate == IRIS_PREDICATE_STATE_USE_BIT ? 1 : 0; + prim.PredicateEnable = + ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT; + // XXX: this is probably bonkers. prim.StartVertexLocation = draw->start;