From ad412d63193c1a6cceff392f7ebd8e63e5742a54 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 1 Mar 2017 16:41:05 -0800 Subject: [PATCH] i965: Improve conditional rendering in fallback paths. We need to fall back in a couple of cases: - Sandybridge (it just doesn't do this in hardware) - Occlusion queries on Gen7-7.5 with command parser version < 2 - Transform feedback overflow queries on Gen7, or on Gen7.5 with command parser version < 7 In these cases, we printed a perf_debug message and fell back to _mesa_check_conditional_render(), which stalls until the full query result is available. Additionally, the code to handle this was a bit of a mess. We can do better by using our normal conditional rendering code, and setting a new state, BRW_PREDICATE_STATE_STALL_FOR_QUERY, when we would have set BRW_PREDICATE_STATE_USE_BIT. Only if that state is set do we perf_debug and potentially stall. This means we avoid stalls when we have a partial query result (i.e. we know it's > 0, but don't have the full value). The perf_debug should trigger less often as well. Still, this is primarily intended as a cleanup. Reviewed-by: Jason Ekstrand --- .../drivers/dri/i965/brw_conditional_render.c | 86 +++++++++---------- src/mesa/drivers/dri/i965/brw_context.c | 3 +- src/mesa/drivers/dri/i965/brw_context.h | 6 +- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c index 046a42b5f52..6be4d48d21d 100644 --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -52,6 +52,19 @@ set_predicate_for_overflow_query(struct brw_context *brw, struct brw_query_object *query, int stream_start, int count) { + if (!can_do_mi_math_and_lrr(brw->screen)) { + brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY; + return; + } + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; + + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering. + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + hsw_overflow_result_to_gpr0(brw, query, count); brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_SRC0); brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull); @@ -61,6 +74,19 @@ static void set_predicate_for_occlusion_query(struct brw_context *brw, struct brw_query_object *query) { + if (!brw->predicate.supported) { + brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY; + return; + } + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; + + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering. + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, @@ -80,17 +106,10 @@ set_predicate_for_result(struct brw_context *brw, struct brw_query_object *query, bool inverted) { - int load_op; assert(query->bo != NULL); - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); - switch (query->Base.Target) { case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: set_predicate_for_overflow_query(brw, query, 0, 1); @@ -102,19 +121,19 @@ set_predicate_for_result(struct brw_context *brw, set_predicate_for_occlusion_query(brw, query); } - if (inverted) - load_op = MI_PREDICATE_LOADOP_LOAD; - else - load_op = MI_PREDICATE_LOADOP_LOADINV; - - BEGIN_BATCH(1); - OUT_BATCH(GEN7_MI_PREDICATE | - load_op | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); - - brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; + if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) { + if (inverted) + load_op = MI_PREDICATE_LOADOP_LOAD; + else + load_op = MI_PREDICATE_LOADOP_LOADINV; + + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + load_op | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + } } static void @@ -126,14 +145,6 @@ brw_begin_conditional_render(struct gl_context *ctx, struct brw_query_object *query = (struct brw_query_object *) q; bool inverted; - if (!brw->predicate.supported) - return; - - if ((query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB || - query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB) && - !can_do_mi_math_and_lrr(brw->screen)) - return; - switch (mode) { case GL_QUERY_WAIT: case GL_QUERY_NO_WAIT: @@ -183,24 +194,11 @@ brw_init_conditional_render_functions(struct dd_function_table *functions) bool brw_check_conditional_render(struct brw_context *brw) { - const struct gl_query_object *query = brw->ctx.Query.CondRenderQuery; - - const bool query_is_xfb = query && - (query->Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB || - query->Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB); - - if (brw->predicate.supported && - (can_do_mi_math_and_lrr(brw->screen) || !query_is_xfb)) { - /* In some cases it is possible to determine that the primitives should - * be skipped without needing the predicate enable bit and still without - * stalling. - */ - return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; - } else if (query) { + if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) { perf_debug("Conditional rendering is implemented in software and may " "stall.\n"); return _mesa_check_conditional_render(&brw->ctx); - } else { - return true; } + + return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 5433f9080ee..2d91b37744a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -407,8 +407,7 @@ brw_init_driver_functions(struct brw_context *brw, else gen4_init_queryobj_functions(functions); brw_init_compute_functions(functions); - if (brw->gen >= 7) - brw_init_conditional_render_functions(functions); + brw_init_conditional_render_functions(functions); functions->QueryInternalFormat = brw_query_internal_format; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b1374092bac..36a1ec6c9d1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -576,7 +576,11 @@ enum brw_predicate_state { /* In this case whether to draw or not depends on the result of an * MI_PREDICATE command so the predicate enable bit needs to be checked. */ - BRW_PREDICATE_STATE_USE_BIT + BRW_PREDICATE_STATE_USE_BIT, + /* In this case, either MI_PREDICATE doesn't exist or we lack the + * necessary kernel features to use it. Stall for the query result. + */ + BRW_PREDICATE_STATE_STALL_FOR_QUERY, }; struct shader_times; -- 2.30.2