i965: Use force_compat_profile driconf option
[mesa.git] / src / mesa / drivers / dri / i965 / gen6_sol.c
index 41158bd580c7d4111770a9a851631aa395ac37f8..bda015e6cc5b74961f7937a83c3b99160d80d600 100644 (file)
@@ -49,13 +49,13 @@ gen6_update_sol_surfaces(struct brw_context *brw)
    }
 
    for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
-      const int surf_index = SURF_INDEX_GEN6_SOL_BINDING(i);
+      const int surf_index = BRW_GEN6_SOL_BINDING_START + i;
       if (xfb_active && i < linked_xfb_info->NumOutputs) {
          unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
          unsigned buffer_offset =
             xfb_obj->Offset[buffer] / 4 +
             linked_xfb_info->Outputs[i].DstOffset;
-         if (brw->geometry_program) {
+         if (brw->programs[MESA_SHADER_GEOMETRY]) {
             brw_update_sol_surface(
                brw, xfb_obj->Buffers[buffer],
                &brw->gs.base.surf_offset[surf_index],
@@ -69,7 +69,7 @@ gen6_update_sol_surfaces(struct brw_context *brw)
                linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
          }
       } else {
-         if (!brw->geometry_program)
+         if (!brw->programs[MESA_SHADER_GEOMETRY])
             brw->ff_gs.surf_offset[surf_index] = 0;
          else
             brw->gs.base.surf_offset[surf_index] = 0;
@@ -103,14 +103,15 @@ brw_gs_upload_binding_table(struct brw_context *brw)
 
    /* We have two scenarios here:
     * 1) We are using a geometry shader only to implement transform feedback
-    *    for a vertex shader (brw->geometry_program == NULL). In this case, we
-    *    only need surfaces for transform feedback in the GS stage.
+    *    for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
+    *    In this case, we only need surfaces for transform feedback in the
+    *    GS stage.
     * 2) We have a user-provided geometry shader. In this case we may need
     *    surfaces for transform feedback and/or other stuff, like textures,
     *    in the GS stage.
     */
 
-   if (!brw->geometry_program) {
+   if (!brw->programs[MESA_SHADER_GEOMETRY]) {
       /* BRW_NEW_VERTEX_PROGRAM */
       prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
       if (prog) {
@@ -131,8 +132,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
        * space for the binding table. Anyway, in this case we know that we only
        * use BRW_MAX_SOL_BINDINGS surfaces at most.
        */
-      bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-                             sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
+      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
                              32, &brw->ff_gs.bind_bo_offset);
 
       /* BRW_NEW_SURFACES */
@@ -160,8 +160,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
       /* Might want to calculate nr_surfaces first, to avoid taking up so much
        * space for the binding table.
        */
-      bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-                             sizeof(uint32_t) * BRW_MAX_SURFACES,
+      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES,
                              32, &brw->gs.base.bind_bo_offset);
 
       /* BRW_NEW_SURFACES */
@@ -196,9 +195,11 @@ brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
    _mesa_init_transform_feedback_object(&brw_obj->base, name);
 
    brw_obj->offset_bo =
-      drm_intel_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64);
+      brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16,
+                   BRW_MEMZONE_OTHER);
    brw_obj->prim_count_bo =
-      drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64);
+      brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384,
+                   BRW_MEMZONE_OTHER);
 
    return &brw_obj->base;
 }
@@ -210,14 +211,10 @@ brw_delete_transform_feedback(struct gl_context *ctx,
    struct brw_transform_feedback_object *brw_obj =
       (struct brw_transform_feedback_object *) obj;
 
-   for (unsigned i = 0; i < ARRAY_SIZE(obj->Buffers); i++) {
-      _mesa_reference_buffer_object(ctx, &obj->Buffers[i], NULL);
-   }
-
-   drm_intel_bo_unreference(brw_obj->offset_bo);
-   drm_intel_bo_unreference(brw_obj->prim_count_bo);
+   brw_bo_unreference(brw_obj->offset_bo);
+   brw_bo_unreference(brw_obj->prim_count_bo);
 
-   free(brw_obj);
+   _mesa_delete_transform_feedback_object(ctx, obj);
 }
 
 /**
@@ -234,38 +231,36 @@ brw_delete_transform_feedback(struct gl_context *ctx,
  * Note that we expose one stream pre-Gen7, so the above is just (start, end).
  */
 static void
-tally_prims_generated(struct brw_context *brw,
-                      struct brw_transform_feedback_object *obj)
+aggregate_transform_feedback_counter(
+   struct brw_context *brw,
+   struct brw_bo *bo,
+   struct brw_transform_feedback_counter *counter)
 {
-   const struct gl_context *ctx = &brw->ctx;
-   const int streams = ctx->Const.MaxVertexStreams;
+   const unsigned streams = brw->ctx.Const.MaxVertexStreams;
 
    /* If the current batch is still contributing to the number of primitives
     * generated, flush it now so the results will be present when mapped.
     */
-   if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
+   if (brw_batch_references(&brw->batch, bo))
       intel_batchbuffer_flush(brw);
 
-   if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
+   if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
       perf_debug("Stalling for # of transform feedback primitives written.\n");
 
-   drm_intel_bo_map(obj->prim_count_bo, false);
-   uint64_t *prim_counts = obj->prim_count_bo->virtual;
+   uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
+   prim_counts += counter->bo_start * streams;
 
-   assert(obj->prim_count_buffer_index % (2 * streams) == 0);
-   int pairs = obj->prim_count_buffer_index / (2 * streams);
+   for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
+      for (unsigned s = 0; s < streams; s++)
+         counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
 
-   for (int i = 0; i < pairs; i++) {
-      for (int s = 0; s < streams; s++) {
-         obj->prims_generated[s] += prim_counts[streams + s] - prim_counts[s];
-      }
-      prim_counts += 2 * streams; /* move to the next pair */
+      prim_counts += 2 * streams;
    }
 
-   drm_intel_bo_unmap(obj->prim_count_bo);
+   brw_bo_unmap(bo);
 
    /* We've already gathered up the old data; we can safely overwrite it now. */
-   obj->prim_count_buffer_index = 0;
+   counter->bo_start = counter->bo_end = 0;
 }
 
 /**
@@ -283,43 +278,49 @@ void
 brw_save_primitives_written_counters(struct brw_context *brw,
                                      struct brw_transform_feedback_object *obj)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    const struct gl_context *ctx = &brw->ctx;
    const int streams = ctx->Const.MaxVertexStreams;
 
+   assert(obj->prim_count_bo != NULL);
+
    /* Check if there's enough space for a new pair of four values. */
-   if (obj->prim_count_bo != NULL &&
-       obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
-      /* Gather up the results so far and release the BO. */
-      tally_prims_generated(brw, obj);
+   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
+       obj->prim_count_bo->size) {
+      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
+                                           &obj->previous_counter);
+      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
+                                           &obj->counter);
    }
 
    /* Flush any drawing so that the counters have the right values. */
    brw_emit_mi_flush(brw);
 
    /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
-   for (int i = 0; i < streams; i++) {
-      int offset = (obj->prim_count_buffer_index + i) * sizeof(uint64_t);
+   if (devinfo->gen >= 7) {
+      for (int i = 0; i < streams; i++) {
+         int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t);
+         brw_store_register_mem64(brw, obj->prim_count_bo,
+                                  GEN7_SO_NUM_PRIMS_WRITTEN(i),
+                                  offset);
+      }
+   } else {
       brw_store_register_mem64(brw, obj->prim_count_bo,
-                               GEN7_SO_NUM_PRIMS_WRITTEN(i),
-                               offset);
+                               GEN6_SO_NUM_PRIMS_WRITTEN,
+                               obj->counter.bo_end * sizeof(uint64_t));
    }
 
    /* Update where to write data to. */
-   obj->prim_count_buffer_index += streams;
+   obj->counter.bo_end++;
 }
 
-/**
- * Compute the number of vertices written by this transform feedback operation.
- */
-void
-brw_compute_xfb_vertices_written(struct brw_context *brw,
-                                 struct brw_transform_feedback_object *obj)
+static void
+compute_vertices_written_so_far(struct brw_context *brw,
+                                struct brw_transform_feedback_object *obj,
+                                struct brw_transform_feedback_counter *counter,
+                                uint64_t *vertices_written)
 {
    const struct gl_context *ctx = &brw->ctx;
-
-   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
-      return;
-
    unsigned vertices_per_prim = 0;
 
    switch (obj->primitive_mode) {
@@ -337,11 +338,26 @@ brw_compute_xfb_vertices_written(struct brw_context *brw,
    }
 
    /* Get the number of primitives generated. */
-   tally_prims_generated(brw, obj);
+   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
 
    for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
-      obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i];
+      vertices_written[i] = vertices_per_prim * counter->accum[i];
    }
+}
+
+/**
+ * Compute the number of vertices written by the last transform feedback
+ * begin/end block.
+ */
+static void
+compute_xfb_vertices_written(struct brw_context *brw,
+                             struct brw_transform_feedback_object *obj)
+{
+   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
+      return;
+
+   compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
+                                   obj->vertices_written);
    obj->vertices_written_valid = true;
 }
 
@@ -363,7 +379,7 @@ brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
    assert(obj->EndedAnytime);
    assert(stream < ctx->Const.MaxVertexStreams);
 
-   brw_compute_xfb_vertices_written(brw, brw_obj);
+   compute_xfb_vertices_written(brw, brw_obj);
    return brw_obj->vertices_written[stream];
 }
 
@@ -376,8 +392,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    const struct gl_transform_feedback_info *linked_xfb_info;
    struct gl_transform_feedback_object *xfb_obj =
       ctx->TransformFeedback.CurrentObject;
+   struct brw_transform_feedback_object *brw_obj =
+      (struct brw_transform_feedback_object *) xfb_obj;
 
-   assert(brw->gen == 6);
+   assert(brw->screen->devinfo.gen == 6);
 
    if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
       /* BRW_NEW_GEOMETRY_PROGRAM */
@@ -391,7 +409,7 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    /* Compute the maximum number of vertices that we can write without
     * overflowing any of the buffers currently being used for feedback.
     */
-   unsigned max_index
+   brw_obj->max_index
       = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj,
                                                       linked_xfb_info);
 
@@ -400,7 +418,7 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
    OUT_BATCH(0); /* SVBI 0 */
    OUT_BATCH(0); /* starting index */
-   OUT_BATCH(max_index);
+   OUT_BATCH(brw_obj->max_index);
    ADVANCE_BATCH();
 
    /* Initialize the rest of the unused streams to sane values.  Otherwise,
@@ -415,18 +433,89 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
       OUT_BATCH(0xffffffff);
       ADVANCE_BATCH();
    }
+
+   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+   brw_save_primitives_written_counters(brw, brw_obj);
+
+   brw_obj->primitive_mode = mode;
 }
 
 void
 brw_end_transform_feedback(struct gl_context *ctx,
                            struct gl_transform_feedback_object *obj)
 {
-   /* After EndTransformFeedback, it's likely that the client program will try
-    * to draw using the contents of the transform feedback buffer as vertex
-    * input.  In order for this to work, we need to flush the data through at
-    * least the GS stage of the pipeline, and flush out the render cache.  For
-    * simplicity, just do a full flush.
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_transform_feedback_object *brw_obj =
+      (struct brw_transform_feedback_object *) obj;
+
+   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
+   if (!obj->Paused)
+      brw_save_primitives_written_counters(brw, brw_obj);
+
+   /* We've reached the end of a transform feedback begin/end block.  This
+    * means that future DrawTransformFeedback() calls will need to pick up the
+    * results of the current counter, and that it's time to roll back the
+    * current primitive counter to zero.
+    */
+   brw_obj->previous_counter = brw_obj->counter;
+   brw_reset_transform_feedback_counter(&brw_obj->counter);
+
+   /* EndTransformFeedback() means that we need to update the number of
+    * vertices written.  Since it's only necessary if DrawTransformFeedback()
+    * is called and it means mapping a buffer object, we delay computing it
+    * until it's absolutely necessary to try and avoid stalls.
+    */
+   brw_obj->vertices_written_valid = false;
+}
+
+void
+brw_pause_transform_feedback(struct gl_context *ctx,
+                             struct gl_transform_feedback_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_transform_feedback_object *brw_obj =
+      (struct brw_transform_feedback_object *) obj;
+
+   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
+    * While this operation is paused, other transform feedback actions may
+    * occur, which will contribute to the counters.  We need to exclude that
+    * from our counts.
     */
+   brw_save_primitives_written_counters(brw, brw_obj);
+}
+
+void
+brw_resume_transform_feedback(struct gl_context *ctx,
+                              struct gl_transform_feedback_object *obj)
+{
    struct brw_context *brw = brw_context(ctx);
-   brw_emit_mi_flush(brw);
+   struct brw_transform_feedback_object *brw_obj =
+      (struct brw_transform_feedback_object *) obj;
+
+   /* Reload SVBI 0 with the count of vertices written so far. */
+   uint64_t svbi;
+   compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+   OUT_BATCH(0); /* SVBI 0 */
+   OUT_BATCH((uint32_t) svbi); /* starting index */
+   OUT_BATCH(brw_obj->max_index);
+   ADVANCE_BATCH();
+
+   /* Initialize the rest of the unused streams to sane values.  Otherwise,
+    * they may indicate that there is no room to write data and prevent
+    * anything from happening at all.
+    */
+   for (int i = 1; i < 4; i++) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+      OUT_BATCH(i << SVB_INDEX_SHIFT);
+      OUT_BATCH(0); /* starting index */
+      OUT_BATCH(0xffffffff);
+      ADVANCE_BATCH();
+   }
+
+   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+   brw_save_primitives_written_counters(brw, brw_obj);
 }