i965 gen6: Ensure correct transform feedback indices on new batch.
authorPaul Berry <stereotype441@gmail.com>
Mon, 19 Dec 2011 20:59:04 +0000 (12:59 -0800)
committerPaul Berry <stereotype441@gmail.com>
Tue, 20 Dec 2011 23:22:28 +0000 (15:22 -0800)
We don't currently have kernel support for saving GPU registers on a
context switch, so if multiple processes are performing transform
feedback at the same time, their SVBI registers will interfere with
each other.  To avoid this situation, we keep a software shadow of the
state of the SVBI 0 register (which is the only register we use), and
re-upload it on every new batch.

The function that updates the shadow state of SVBI 0 is called
brw_update_primitive_count, since it will also be used to update the
counters for the PRIMITIVES_GENERATED and
TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen6_sol.c

index e8ebea80d1742db170d029a6758aa468fc5dd2a0..8840a83d49760a332e10f7fc47f29a71af5600b9 100644 (file)
@@ -145,6 +145,7 @@ enum brw_state_id {
    BRW_STATE_PROGRAM_CACHE,
    BRW_STATE_STATE_BASE_ADDRESS,
    BRW_STATE_HIZ,
+   BRW_STATE_SOL_INDICES,
 };
 
 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
@@ -174,6 +175,7 @@ enum brw_state_id {
 #define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
 #define BRW_NEW_HIZ                    (1 << BRW_STATE_HIZ)
+#define BRW_NEW_SOL_INDICES            (1 << BRW_STATE_SOL_INDICES)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -983,6 +985,11 @@ struct brw_context
       struct gl_renderbuffer *depth_rb;
    } hiz;
 
+   struct brw_sol_state {
+      uint32_t svbi_0_starting_index;
+      uint32_t svbi_0_max_index;
+   } sol;
+
    uint32_t render_target_format[MESA_FORMAT_COUNT];
    bool format_supported_as_render_target[MESA_FORMAT_COUNT];
 };
index 6627a484a42aba7da6b3784f4bb99e25aded7a6e..774a5ca46eeacd0120d57f95d35949fef4347b7e 100644 (file)
 #include "main/samplerobj.h"
 #include "main/state.h"
 #include "main/enums.h"
+#include "main/macros.h"
 #include "tnl/tnl.h"
 #include "vbo/vbo_context.h"
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
@@ -377,6 +379,34 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    }
 }
 
+/**
+ * Update internal counters based on the the drawing operation described in
+ * prim.
+ */
+static void
+brw_update_primitive_count(struct brw_context *brw,
+                           const struct _mesa_prim *prim)
+{
+   uint32_t count = count_tessellated_primitives(prim);
+   if (brw->intel.ctx.TransformFeedback.CurrentObject->Active) {
+      /* Update brw->sol.svbi_0_max_index to reflect the amount by which the
+       * hardware is going to increment SVBI 0 when this drawing operation
+       * occurs.  This is necessary because the kernel does not (yet) save and
+       * restore GPU registers when context switching, so we'll need to be
+       * able to reload SVBI 0 with the correct value in case we have to start
+       * a new batch buffer.
+       */
+      unsigned svbi_postincrement_value =
+         brw->gs.prog_data->svbi_postincrement_value;
+      uint32_t space_avail =
+         (brw->sol.svbi_0_max_index - brw->sol.svbi_0_starting_index)
+         / svbi_postincrement_value;
+      uint32_t primitives_written = MIN2 (space_avail, count);
+      brw->sol.svbi_0_starting_index +=
+         svbi_postincrement_value * primitives_written;
+   }
+}
+
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
@@ -498,6 +528,9 @@ retry:
            }
         }
       }
+
+      if (!_mesa_meta_in_progress(ctx))
+         brw_update_primitive_count(brw, &prim[i]);
    }
 
    if (intel->always_flush_batch)
index a3a470fee6b0a084a761b942e662c098a27fa289..d2715697c7233a2655ad21203c4cb0535278101d 100644 (file)
@@ -92,6 +92,7 @@ extern const struct brw_tracked_state gen6_gs_state;
 extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
 extern const struct brw_tracked_state gen6_sampler_state;
 extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sol_indices;
 extern const struct brw_tracked_state gen6_sol_surface;
 extern const struct brw_tracked_state gen6_sf_state;
 extern const struct brw_tracked_state gen6_sf_vp;
index 463689224dfce59a615b2dc74a1db52eda7560e2..74d01d8b5f292a4e594c9944be7ec11b7d82d2dc 100644 (file)
@@ -171,6 +171,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    &brw_drawing_rect,
 
+   &gen6_sol_indices,
    &brw_indices,
    &brw_index_buffer,
    &brw_vertices,
index 2f2051bc6138f6caf88c3b38cdddb422975f873a..5d11481cb033cbbe9e56ffe7356efe84dcf76473 100644 (file)
@@ -72,11 +72,34 @@ const struct brw_tracked_state gen6_sol_surface = {
    .emit = gen6_update_sol_surfaces,
 };
 
+static void
+gen6_update_sol_indices(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+   OUT_BATCH(brw->sol.svbi_0_starting_index); /* BRW_NEW_SOL_INDICES */
+   OUT_BATCH(0);
+   OUT_BATCH(brw->sol.svbi_0_max_index); /* BRW_NEW_SOL_INDICES */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen6_sol_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_SOL_INDICES),
+      .cache = 0
+   },
+   .emit = gen6_update_sol_indices,
+};
+
 void
 brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
                             struct gl_transform_feedback_object *obj)
 {
-   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
    const struct gl_shader_program *vs_prog =
       ctx->Shader.CurrentVertexProgram;
    const struct gl_transform_feedback_info *linked_xfb_info =
@@ -100,13 +123,12 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
       max_index = MIN2(max_index, max_for_this_buffer);
    }
 
-   /* Initialize the SVBI 0 register to zero and set the maximum index. */
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
-   OUT_BATCH(0); /* SVBI 0 */
-   OUT_BATCH(0);
-   OUT_BATCH(max_index);
-   ADVANCE_BATCH();
+   /* Initialize the SVBI 0 register to zero and set the maximum index.
+    * These values will be sent to the hardware on the next draw.
+    */
+   brw->state.dirty.brw |= BRW_NEW_SOL_INDICES;
+   brw->sol.svbi_0_starting_index = 0;
+   brw->sol.svbi_0_max_index = max_index;
 }
 
 void