iris: fix conditional compute, don't stomp predicate for pipelined queries

author Kenneth Graunke <kenneth@whitecape.org>

Tue, 27 Nov 2018 23:30:16 +0000 (15:30 -0800)

committer Kenneth Graunke <kenneth@whitecape.org>

Thu, 21 Feb 2019 18:26:10 +0000 (10:26 -0800)
author Kenneth Graunke <kenneth@whitecape.org>
Tue, 27 Nov 2018 23:30:16 +0000 (15:30 -0800)
committer Kenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:10 +0000 (10:26 -0800)
diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c

index b8562d0894501358ca699d35998218c875c20ab2..10b326e1884f6a9d322f9d91977d689dd8fcda62 100644 (file)
--- a/src/gallium/drivers/iris/iris_blit.c
+++ b/src/gallium/drivers/iris/iris_blit.c
@@ -257,10 +257,10 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
     enum blorp_batch_flags blorp_flags = 0;
  
     if (info->render_condition_enable) {
-      if (ice->predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
+      if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
           return;
  
-      if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT)
+      if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT)
           blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
     }
  
diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c

index 493ebaa9ad71f9261c2aed7e86774e019d7b18aa..be458a8dcc1e0842662d1bf21a5e73815f6888ea 100644 (file)
--- a/src/gallium/drivers/iris/iris_clear.c
+++ b/src/gallium/drivers/iris/iris_clear.c
@@ -53,11 +53,11 @@ iris_clear(struct pipe_context *ctx,
  
     struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
  
-   if (ice->predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
+   if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
        return;
  
     enum blorp_batch_flags blorp_flags = 0;
-   if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT)
+   if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT)
        blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
  
     iris_batch_maybe_flush(batch, 1500);
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h

index 91c0c3afcc5b9603ec223e78368185a9e41187a9..601dabb4ceffbe7bd2d744738a843753cb20b036 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -430,7 +430,6 @@ struct iris_context {
        struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
     } shaders;
  
-   enum iris_predicate_state predicate;
     struct {
        uint64_t dirty;
        uint64_t dirty_for_nos[IRIS_NOS_COUNT];
@@ -483,6 +482,15 @@ struct iris_context {
  
        bool statistics_counters_enabled;
  
+      /** Current conditional rendering mode */
+      enum iris_predicate_state predicate;
+
+      /**
+       * Query BO with a MI_PREDICATE_DATA snapshot calculated on the
+       * render context that needs to be uploaded to the compute context.
+       */
+      struct iris_bo *compute_predicate;
+
        /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
        bool prims_generated_query_active;
  
diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c

index a5b4c63d54f8a71a595ac2ba38f50771d56fad51..4b8205a2bbe41fac192f30786290b27060163fe4 100644 (file)
--- a/src/gallium/drivers/iris/iris_draw.c
+++ b/src/gallium/drivers/iris/iris_draw.c
@@ -37,6 +37,7 @@
  #include "util/u_upload_mgr.h"
  #include "intel/compiler/brw_compiler.h"
  #include "iris_context.h"
+#include "iris_defines.h"
  
  /**
   * Record the current primitive mode and restart information, flagging
@@ -70,6 +71,9 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
     struct iris_context *ice = (struct iris_context *) ctx;
     struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
  
+   if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
+      return;
+
     if (unlikely(INTEL_DEBUG & DEBUG_REEMIT))
        ice->state.dirty |= ~0ull;
  
@@ -148,6 +152,9 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
     struct iris_context *ice = (struct iris_context *) ctx;
     struct iris_batch *batch = &ice->batches[IRIS_BATCH_COMPUTE];
  
+   if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
+      return;
+
     if (unlikely(INTEL_DEBUG & DEBUG_REEMIT))
        ice->state.dirty |= ~0ull;
  
@@ -164,6 +171,13 @@ iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
  
     iris_binder_reserve_compute(ice);
     ice->vtbl.update_surface_base_address(batch, &ice->state.binder);
+
+   if (ice->state.compute_predicate) {
+      ice->vtbl.load_register_mem64(batch, MI_PREDICATE_DATA,
+                                    ice->state.compute_predicate, 0);
+      ice->state.compute_predicate = NULL;
+   }
+
     ice->vtbl.upload_compute_state(ice, batch, grid);
  
     ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_COMPUTE;
diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c

index 61b0e27a0cb359b992bd8772a721d935a12afe09..68bdb4b1ff18402ac7345e7389b2ad050df1c3f2 100644 (file)
--- a/src/gallium/drivers/iris/iris_query.c
+++ b/src/gallium/drivers/iris/iris_query.c
@@ -94,6 +94,8 @@ struct iris_query {
  
     bool ready;
  
+   bool stalled;
+
     uint64_t result;
  
     struct iris_bo *bo;
@@ -103,13 +105,21 @@ struct iris_query {
  };
  
  struct iris_query_snapshots {
+   /** iris_render_condition's saved MI_PREDICATE_DATA value. */
+   uint64_t predicate_data;
+
+   /** Have the start/end snapshots landed? */
     uint64_t snapshots_landed;
+
+   /** Starting and ending counter snapshots */
     uint64_t start;
     uint64_t end;
  };
  
  struct iris_query_so_overflow {
+   uint64_t predicate_data;
     uint64_t snapshots_landed;
+
     struct {
        uint64_t prim_storage_needed[2];
        uint64_t num_prims[2];
@@ -179,6 +189,7 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
        iris_emit_pipe_control_flush(batch,
                                     PIPE_CONTROL_CS_STALL |
                                     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+      q->stalled = true;
     }
  
     switch (q->type) {
@@ -694,7 +705,7 @@ iris_get_query_result_resource(struct pipe_context *ctx,
     /* Calculate the result to CS_GPR0 */
     calculate_result_on_gpu(ice, q);
  
-   bool predicated = !wait && iris_is_query_pipelined(q);
+   bool predicated = !wait && !q->stalled;
  
     if (predicated) {
        ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
@@ -740,102 +751,90 @@ iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
  }
  
  static void
-set_predicate_enable(struct iris_context *ice,
-                     bool value)
+set_predicate_enable(struct iris_context *ice, bool value)
  {
     if (value)
-      ice->predicate = IRIS_PREDICATE_STATE_RENDER;
+      ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
     else
-      ice->predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
+      ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
  }
  
  static void
-set_predicate_for_overflow(struct iris_context *ice,
-                           struct iris_query *q)
+set_predicate_for_result(struct iris_context *ice,
+                         struct iris_query *q,
+                         bool inverted)
  {
     struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
-   ice->predicate = IRIS_PREDICATE_STATE_USE_BIT;
  
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
-   iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
-
-   overflow_result_to_gpr0(ice, q);
-   ice->vtbl.load_register_reg64(batch, CS_GPR(0), MI_PREDICATE_SRC0);
-   ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
-}
+   /* The CPU doesn't have the query result yet; use hardware predication */
+   ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
  
-static void
-set_predicate_for_occlusion(struct iris_context *ice,
-                     struct iris_query *q)
-{
-   struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
-   ice->predicate = IRIS_PREDICATE_STATE_USE_BIT;
-
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
+   /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
     iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
-
-   ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo, offsetof(struct iris_query_snapshots, start));
-   ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo, offsetof(struct iris_query_snapshots, end));
-}
-
-static void
-set_predicate_for_result(struct iris_context *ice,
-                         struct iris_query *q,
-                         bool condition)
-{
-   struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
-   int load_op;
+   q->stalled = true;
  
     switch (q->type) {
     case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
     case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
-      set_predicate_for_overflow(ice, q);
+      overflow_result_to_gpr0(ice, q);
+
+      ice->vtbl.load_register_reg64(batch, CS_GPR(0), MI_PREDICATE_SRC0);
+      ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
        break;
     default:
-      set_predicate_for_occlusion(ice, q);
+      /* PIPE_QUERY_OCCLUSION_* */
+      ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
+         offsetof(struct iris_query_snapshots, start));
+      ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo,
+         offsetof(struct iris_query_snapshots, end));
        break;
     }
  
-   if (ice->predicate == IRIS_PREDICATE_STATE_USE_BIT) {
-      if (condition)
-         load_op = MI_PREDICATE_LOADOP_LOAD;
-      else
-         load_op = MI_PREDICATE_LOADOP_LOADINV;
-
-      // batch emit
-      uint32_t predicate = MI_PREDICATE | load_op |
+   uint32_t mi_predicate = MI_PREDICATE |
                             MI_PREDICATE_COMBINEOP_SET |
-                           MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
-      iris_batch_emit(batch, &predicate, sizeof(uint32_t));
-   }
+                           MI_PREDICATE_COMPAREOP_SRCS_EQUAL |
+                           (inverted ? MI_PREDICATE_LOADOP_LOAD
+                                     : MI_PREDICATE_LOADOP_LOADINV);
+   iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+
+   /* We immediately set the predicate on the render batch, as all the
+    * counters come from 3D operations.  However, we may need to predicate
+    * a compute dispatch, which executes in a different GEM context and has
+    * a different MI_PREDICATE_DATA register.  So, we save the result to
+    * memory and reload it in iris_launch_grid.
+    */
+   unsigned offset = offsetof(struct iris_query_snapshots, predicate_data);
+   ice->vtbl.store_register_mem64(batch, MI_PREDICATE_DATA,
+                                  q->bo, offset, false);
+   ice->state.compute_predicate = q->bo;
  }
  
  static void
  iris_render_condition(struct pipe_context *ctx,
-                     struct pipe_query *query,
-                     boolean condition,
-                     enum pipe_render_cond_flag mode)
+                      struct pipe_query *query,
+                      boolean condition,
+                      enum pipe_render_cond_flag mode)
  {
     struct iris_context *ice = (void *) ctx;
     struct iris_query *q = (void *) query;
  
     if (!q) {
-      ice->predicate = IRIS_PREDICATE_STATE_RENDER;
+      ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
        return;
     }
  
     iris_check_query_no_flush(ice, q);
  
-   if (q->result || q->ready)
+   if (q->result || q->ready) {
        set_predicate_enable(ice, (q->result != 0) ^ condition);
-   else
+   } else {
+      if (mode == PIPE_RENDER_COND_NO_WAIT ||
+          mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
+         perf_debug(&ice->dbg, "Conditional rendering demoted from "
+                    "\"no wait\" to \"wait\".");
+      }
        set_predicate_for_result(ice, q, condition);
+   }
  }
  
  void
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c

index 99073088b00e4846f083a02812f0a18bc5d34c60..d241831edf8a62e3aa4bd2cc51ee7747be1282b1 100644 (file)
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4542,7 +4542,9 @@ iris_upload_render_state(struct iris_context *ice,
        prim.InstanceCount = draw->instance_count;
        prim.VertexCountPerInstance = draw->count;
        prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
-      prim.PredicateEnable = ice->predicate == IRIS_PREDICATE_STATE_USE_BIT ? 1 : 0;
+      prim.PredicateEnable =
+         ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
+
        // XXX: this is probably bonkers.
        prim.StartVertexLocation = draw->start;
author	Kenneth Graunke <kenneth@whitecape.org>
	Tue, 27 Nov 2018 23:30:16 +0000 (15:30 -0800)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 21 Feb 2019 18:26:10 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_blit.c		patch \| blob \| history
src/gallium/drivers/iris/iris_clear.c		patch \| blob \| history
src/gallium/drivers/iris/iris_context.h		patch \| blob \| history
src/gallium/drivers/iris/iris_draw.c		patch \| blob \| history
src/gallium/drivers/iris/iris_query.c		patch \| blob \| history
src/gallium/drivers/iris/iris_state.c		patch \| blob \| history