i965: Use predicate enable bit for conditional rendering w/o stalling
authorNeil Roberts <neil@linux.intel.com>
Tue, 4 Nov 2014 19:15:00 +0000 (19:15 +0000)
committerNeil Roberts <neil@linux.intel.com>
Tue, 12 May 2015 10:20:47 +0000 (11:20 +0100)
Previously whenever a primitive is drawn the driver would call
_mesa_check_conditional_render which blocks waiting for the result of
the query to determine whether to render. On Gen7+ there is a bit in
the 3DPRIMITIVE command which can be used to disable the primitive
based on the value of a state bit. This state bit can be set based on
whether two registers have different values using the MI_PREDICATE
command. We can load these two registers with the pixel count values
stored in the query begin and end to implement conditional rendering
without stalling.

Unfortunately these two source registers were not in the whitelist of
available registers in the kernel driver until v3.19. This patch uses
the command parser version from intel_screen to detect whether to
attempt to set the predicate data registers.

The predicate enable bit is currently only used for drawing 3D
primitives. For blits, clears, bitmaps, copypixels and drawpixels it
still causes a stall. For most of these it would probably just work to
call the new brw_check_conditional_render function instead of
_mesa_check_conditional_render because they already work in terms of
rendering primitives. However it's a bit trickier for blits because it
can use the BLT ring or the blorp codepath. I think these operations
are less useful for conditional rendering than rendering primitives so
it might be best to leave it for a later patch.

v2: Use the command parser version to detect whether we can write to
    the predicate data registers instead of trying to execute a
    register load command.
v3: Simple rebase
v4: Changes suggested by Kenneth Graunke: Split the
    load_64bit_register function out to a separate patch so it can be
    a shared public function. Avoid calling
    _mesa_check_conditional_render if we've already determined that
    there's no query object. Some styling fixes.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_conditional_render.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_queryobj.c
src/mesa/drivers/dri/i965/intel_extensions.c
src/mesa/drivers/dri/i965/intel_reg.h

index 1ae93e1d5f3e3c8942b9c0e5a55026184ebb98ac..a24c20aada4fbb74d9cad72dbbc78028e1fcc7c5 100644 (file)
@@ -18,6 +18,7 @@ i965_FILES = \
        brw_clip_unfilled.c \
        brw_clip_util.c \
        brw_compute.c \
+       brw_conditional_render.c \
        brw_context.c \
        brw_context.h \
        brw_cs.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c
new file mode 100644 (file)
index 0000000..6d37c3b
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Neil Roberts <neil@linux.intel.com>
+ */
+
+/** @file brw_conditional_render.c
+ *
+ * Support for conditional rendering based on query objects
+ * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+.
+ */
+
+#include "main/imports.h"
+#include "main/condrender.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+set_predicate_enable(struct brw_context *brw,
+                     bool value)
+{
+   if (value)
+      brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+   else
+      brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
+}
+
+static void
+set_predicate_for_result(struct brw_context *brw,
+                         struct brw_query_object *query,
+                         bool inverted)
+{
+   int load_op;
+
+   assert(query->bo != NULL);
+
+   brw_load_register_mem64(brw,
+                           MI_PREDICATE_SRC0,
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           0, /* write domain */
+                           0 /* offset */);
+   brw_load_register_mem64(brw,
+                           MI_PREDICATE_SRC1,
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           0, /* write domain */
+                           8 /* offset */);
+
+   if (inverted)
+      load_op = MI_PREDICATE_LOADOP_LOAD;
+   else
+      load_op = MI_PREDICATE_LOADOP_LOADINV;
+
+   BEGIN_BATCH(1);
+   OUT_BATCH(GEN7_MI_PREDICATE |
+             load_op |
+             MI_PREDICATE_COMBINEOP_SET |
+             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+   ADVANCE_BATCH();
+
+   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+}
+
+static void
+brw_begin_conditional_render(struct gl_context *ctx,
+                             struct gl_query_object *q,
+                             GLenum mode)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *) q;
+   bool inverted;
+
+   if (!brw->predicate.supported)
+      return;
+
+   switch (mode) {
+   case GL_QUERY_WAIT:
+   case GL_QUERY_NO_WAIT:
+   case GL_QUERY_BY_REGION_WAIT:
+   case GL_QUERY_BY_REGION_NO_WAIT:
+      inverted = false;
+      break;
+   case GL_QUERY_WAIT_INVERTED:
+   case GL_QUERY_NO_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
+      inverted = true;
+      break;
+   default:
+      unreachable("Unexpected conditional render mode");
+   }
+
+   /* If there are already samples from a BLT operation or if the query object
+    * is ready then we can avoid looking at the values in the buffer and just
+    * decide whether to draw using the CPU without stalling.
+    */
+   if (query->Base.Result || query->Base.Ready)
+      set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
+   else
+      set_predicate_for_result(brw, query, inverted);
+}
+
+static void
+brw_end_conditional_render(struct gl_context *ctx,
+                           struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   /* When there is no longer a conditional render in progress it should
+    * always render.
+    */
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+}
+
+void
+brw_init_conditional_render_functions(struct dd_function_table *functions)
+{
+   functions->BeginConditionalRender = brw_begin_conditional_render;
+   functions->EndConditionalRender = brw_end_conditional_render;
+}
+
+bool
+brw_check_conditional_render(struct brw_context *brw)
+{
+   if (brw->predicate.supported) {
+      /* In some cases it is possible to determine that the primitives should
+       * be skipped without needing the predicate enable bit and still without
+       * stalling.
+       */
+      return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
+   } else if (brw->ctx.Query.CondRenderQuery) {
+      perf_debug("Conditional rendering is implemented in software and may "
+                 "stall.\n");
+      return _mesa_check_conditional_render(&brw->ctx);
+   } else {
+      return true;
+   }
+}
index fd7420a6c6f4d808458eb1671adbd7547718054c..673529a28cdbcad6034cf8a5779e468e70ae5493 100644 (file)
@@ -289,6 +289,8 @@ brw_init_driver_functions(struct brw_context *brw,
    else
       gen4_init_queryobj_functions(functions);
    brw_init_compute_functions(functions);
+   if (brw->gen >= 7)
+      brw_init_conditional_render_functions(functions);
 
    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 
@@ -891,6 +893,8 @@ brwCreateContext(gl_api api,
    brw->gs.enabled = false;
    brw->sf.viewport_transform_enable = true;
 
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 
index c794fa4397353a82b3ea8aa15552f51eb2d8ac24..2dcc23c5fc65cc2a5a45860d44788c5e94c5786c 100644 (file)
@@ -966,6 +966,20 @@ struct brw_stage_state
    uint32_t sampler_offset;
 };
 
+enum brw_predicate_state {
+   /* The first two states are used if we can determine whether to draw
+    * without having to look at the values in the query object buffer. This
+    * will happen if there is no conditional render in progress, if the query
+    * object is already completed or if something else has already added
+    * samples to the preliminary result such as via a BLT command.
+    */
+   BRW_PREDICATE_STATE_RENDER,
+   BRW_PREDICATE_STATE_DONT_RENDER,
+   /* In this case whether to draw or not depends on the result of an
+    * MI_PREDICATE command so the predicate enable bit needs to be checked.
+    */
+   BRW_PREDICATE_STATE_USE_BIT
+};
 
 /**
  * brw_context is derived from gl_context.
@@ -1401,6 +1415,11 @@ struct brw_context
       bool begin_emitted;
    } query;
 
+   struct {
+      enum brw_predicate_state state;
+      bool supported;
+   } predicate;
+
    struct {
       /** A map from pipeline statistics counter IDs to MMIO addresses. */
       const int *statistics_registers;
@@ -1600,6 +1619,10 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
 void brw_store_register_mem64(struct brw_context *brw,
                               drm_intel_bo *bo, uint32_t reg, int idx);
 
+/** brw_conditional_render.c */
+void brw_init_conditional_render_functions(struct dd_function_table *functions);
+bool brw_check_conditional_render(struct brw_context *brw);
+
 /** intel_batchbuffer.c */
 void brw_load_register_mem(struct brw_context *brw,
                            uint32_t reg,
index 83d7a3535e4f37bee583564bee94adbc70742b3b..11cb3fa490b65003db5e3246d7b277ac27845fa9 100644 (file)
@@ -51,6 +51,7 @@
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
 # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE      (1 << 10)
+# define GEN7_3DPRIM_PREDICATE_ENABLE               (1 << 8)
 /* DW1 */
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
index 96e2369792394d806595eb27fa533481f9dd6bc1..a7164dbf7d8bd5fcc42c02e8bfa7b00f0c71e134 100644 (file)
@@ -178,6 +178,7 @@ static void brw_emit_prim(struct brw_context *brw,
    int verts_per_instance;
    int vertex_access_type;
    int indirect_flag;
+   int predicate_enable;
 
    DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
        prim->start, prim->count);
@@ -258,10 +259,14 @@ static void brw_emit_prim(struct brw_context *brw,
       indirect_flag = 0;
    }
 
-
    if (brw->gen >= 7) {
+      if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
+         predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
+      else
+         predicate_enable = 0;
+
       BEGIN_BATCH(7);
-      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag);
+      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
       OUT_BATCH(hw_prim | vertex_access_type);
    } else {
       BEGIN_BATCH(6);
@@ -561,12 +566,7 @@ void brw_draw_prims( struct gl_context *ctx,
 
    assert(unused_tfb_object == NULL);
 
-   if (ctx->Query.CondRenderQuery) {
-      perf_debug("Conditional rendering is implemented in software and may "
-                 "stall.  This should be fixed in the driver.\n");
-   }
-
-   if (!_mesa_check_conditional_render(ctx))
+   if (!brw_check_conditional_render(brw))
       return;
 
    /* Handle primitive restart if needed */
index 667c90093047748d09b106f1a06d6fcf44842bf9..aea4d9b77d340d894768bef6fca5f447a0bc72dd 100644 (file)
@@ -66,10 +66,20 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 void
 brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_WRITE_DEPTH_COUNT
-                               | PIPE_CONTROL_DEPTH_STALL,
-                               query_bo, idx * sizeof(uint64_t), 0, 0);
+   uint32_t flags;
+
+   flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT |
+            PIPE_CONTROL_DEPTH_STALL);
+
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+    * command when loading the values into the predicate source registers for
+    * conditional rendering.
+    */
+   if (brw->predicate.supported)
+      flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+   brw_emit_pipe_control_write(brw, flags, query_bo,
+                               idx * sizeof(uint64_t), 0, 0);
 }
 
 /**
index c3eee31d017173349bd7299db7c97fc5d32d50e0..cafb77455d77b359872177ed61d11bed3483ac94 100644 (file)
@@ -320,6 +320,8 @@ intelInitExtensions(struct gl_context *ctx)
       }
    }
 
+   brw->predicate.supported = false;
+
    if (brw->gen >= 7) {
       ctx->Extensions.ARB_conservative_depth = true;
       ctx->Extensions.ARB_derivative_control = true;
@@ -333,6 +335,9 @@ intelInitExtensions(struct gl_context *ctx)
          ctx->Extensions.ARB_transform_feedback2 = true;
          ctx->Extensions.ARB_transform_feedback3 = true;
          ctx->Extensions.ARB_transform_feedback_instanced = true;
+
+         if (brw->intelScreen->cmd_parser_version >= 2)
+            brw->predicate.supported = true;
       }
 
       /* Only enable this in core profile because other parts of Mesa behave
index 488fb5b98f8a0013afaaab6cc3346a50d0097761..bd14e189da3818c037ea55076275549f0363f290 100644 (file)
 #define GEN7_MI_LOAD_REGISTER_MEM      (CMD_MI | (0x29 << 23))
 # define MI_LOAD_REGISTER_MEM_USE_GGTT         (1 << 22)
 
+/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
+#define GEN7_MI_PREDICATE              (CMD_MI | (0xC << 23))
+# define MI_PREDICATE_LOADOP_KEEP              (0 << 6)
+# define MI_PREDICATE_LOADOP_LOAD              (2 << 6)
+# define MI_PREDICATE_LOADOP_LOADINV           (3 << 6)
+# define MI_PREDICATE_COMBINEOP_SET            (0 << 3)
+# define MI_PREDICATE_COMBINEOP_AND            (1 << 3)
+# define MI_PREDICATE_COMBINEOP_OR             (2 << 3)
+# define MI_PREDICATE_COMBINEOP_XOR            (3 << 3)
+# define MI_PREDICATE_COMPAREOP_TRUE           (0 << 0)
+# define MI_PREDICATE_COMPAREOP_FALSE          (1 << 0)
+# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL     (2 << 0)
+# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL   (3 << 0)
+
 /** @{
  *
  * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
@@ -69,6 +83,7 @@
 #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE  (1 << 10) /* GM45+ only */
 #define PIPE_CONTROL_ISP_DIS           (1 << 9)
 #define PIPE_CONTROL_INTERRUPT_ENABLE  (1 << 8)
+#define PIPE_CONTROL_FLUSH_ENABLE      (1 << 7) /* Gen7+ only */
 /* GT */
 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE     (1 << 5)
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE       (1 << 4)
 # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
 # define GEN8_HIZ_PMA_MASK_BITS \
    ((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16)
+
+/* Predicate registers */
+#define MI_PREDICATE_SRC0               0x2400
+#define MI_PREDICATE_SRC1               0x2408
+#define MI_PREDICATE_DATA               0x2410
+#define MI_PREDICATE_RESULT             0x2418
+#define MI_PREDICATE_RESULT_1           0x241C
+#define MI_PREDICATE_RESULT_2           0x2214