i965: Improve conditional rendering in fallback paths.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 2 Mar 2017 00:41:05 +0000 (16:41 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 16 Jun 2017 05:42:50 +0000 (22:42 -0700)
We need to fall back in a couple of cases:
- Sandybridge (it just doesn't do this in hardware)
- Occlusion queries on Gen7-7.5 with command parser version < 2
- Transform feedback overflow queries on Gen7, or on Gen7.5 with
  command parser version < 7

In these cases, we printed a perf_debug message and fell back to
_mesa_check_conditional_render(), which stalls until the full
query result is available.  Additionally, the code to handle this
was a bit of a mess.

We can do better by using our normal conditional rendering code,
and setting a new state, BRW_PREDICATE_STATE_STALL_FOR_QUERY, when
we would have set BRW_PREDICATE_STATE_USE_BIT.  Only if that state
is set do we perf_debug and potentially stall.  This means we avoid
stalls when we have a partial query result (i.e. we know it's > 0,
but don't have the full value).  The perf_debug should trigger less
often as well.

Still, this is primarily intended as a cleanup.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_conditional_render.c
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h

index 046a42b5f52076f75c2b875a8c87dfece73ceb75..6be4d48d21d4b6dcd6690edede83e6b9192a17ee 100644 (file)
@@ -52,6 +52,19 @@ set_predicate_for_overflow_query(struct brw_context *brw,
                                  struct brw_query_object *query,
                                  int stream_start, int count)
 {
+   if (!can_do_mi_math_and_lrr(brw->screen)) {
+      brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
+      return;
+   }
+
+   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+    * command when loading the values into the predicate source registers for
+    * conditional rendering.
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
    hsw_overflow_result_to_gpr0(brw, query, count);
    brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_SRC0);
    brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
@@ -61,6 +74,19 @@ static void
 set_predicate_for_occlusion_query(struct brw_context *brw,
                                   struct brw_query_object *query)
 {
+   if (!brw->predicate.supported) {
+      brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
+      return;
+   }
+
+   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+    * command when loading the values into the predicate source registers for
+    * conditional rendering.
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
    brw_load_register_mem64(brw,
                            MI_PREDICATE_SRC0,
                            query->bo,
@@ -80,17 +106,10 @@ set_predicate_for_result(struct brw_context *brw,
                          struct brw_query_object *query,
                          bool inverted)
 {
-
    int load_op;
 
    assert(query->bo != NULL);
 
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
    switch (query->Base.Target) {
    case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
       set_predicate_for_overflow_query(brw, query, 0, 1);
@@ -102,19 +121,19 @@ set_predicate_for_result(struct brw_context *brw,
       set_predicate_for_occlusion_query(brw, query);
    }
 
-   if (inverted)
-      load_op = MI_PREDICATE_LOADOP_LOAD;
-   else
-      load_op = MI_PREDICATE_LOADOP_LOADINV;
-
-   BEGIN_BATCH(1);
-   OUT_BATCH(GEN7_MI_PREDICATE |
-             load_op |
-             MI_PREDICATE_COMBINEOP_SET |
-             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-   ADVANCE_BATCH();
-
-   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+   if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) {
+      if (inverted)
+         load_op = MI_PREDICATE_LOADOP_LOAD;
+      else
+         load_op = MI_PREDICATE_LOADOP_LOADINV;
+
+      BEGIN_BATCH(1);
+      OUT_BATCH(GEN7_MI_PREDICATE |
+                load_op |
+                MI_PREDICATE_COMBINEOP_SET |
+                MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+      ADVANCE_BATCH();
+   }
 }
 
 static void
@@ -126,14 +145,6 @@ brw_begin_conditional_render(struct gl_context *ctx,
    struct brw_query_object *query = (struct brw_query_object *) q;
    bool inverted;
 
-   if (!brw->predicate.supported)
-      return;
-
-   if ((query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB ||
-        query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB) &&
-       !can_do_mi_math_and_lrr(brw->screen))
-      return;
-
    switch (mode) {
    case GL_QUERY_WAIT:
    case GL_QUERY_NO_WAIT:
@@ -183,24 +194,11 @@ brw_init_conditional_render_functions(struct dd_function_table *functions)
 bool
 brw_check_conditional_render(struct brw_context *brw)
 {
-   const struct gl_query_object *query = brw->ctx.Query.CondRenderQuery;
-
-   const bool query_is_xfb = query &&
-      (query->Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB ||
-       query->Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB);
-
-   if (brw->predicate.supported &&
-       (can_do_mi_math_and_lrr(brw->screen) || !query_is_xfb)) {
-      /* In some cases it is possible to determine that the primitives should
-       * be skipped without needing the predicate enable bit and still without
-       * stalling.
-       */
-      return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
-   } else if (query) {
+   if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
       perf_debug("Conditional rendering is implemented in software and may "
                  "stall.\n");
       return _mesa_check_conditional_render(&brw->ctx);
-   } else {
-      return true;
    }
+
+   return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
 }
index 5433f9080eefdb6eb81e3b7d9a07f0f2cf9942d8..2d91b37744a912027aa49f814d4ed70274a1f41c 100644 (file)
@@ -407,8 +407,7 @@ brw_init_driver_functions(struct brw_context *brw,
    else
       gen4_init_queryobj_functions(functions);
    brw_init_compute_functions(functions);
-   if (brw->gen >= 7)
-      brw_init_conditional_render_functions(functions);
+   brw_init_conditional_render_functions(functions);
 
    functions->QueryInternalFormat = brw_query_internal_format;
 
index b1374092bacade30de3b54d91b07f1fbaa3faee9..36a1ec6c9d1ad77a38f4cd63c08b0e52e47af2c2 100644 (file)
@@ -576,7 +576,11 @@ enum brw_predicate_state {
    /* In this case whether to draw or not depends on the result of an
     * MI_PREDICATE command so the predicate enable bit needs to be checked.
     */
-   BRW_PREDICATE_STATE_USE_BIT
+   BRW_PREDICATE_STATE_USE_BIT,
+   /* In this case, either MI_PREDICATE doesn't exist or we lack the
+    * necessary kernel features to use it.  Stall for the query result.
+    */
+   BRW_PREDICATE_STATE_STALL_FOR_QUERY,
 };
 
 struct shader_times;