v3d: fix primitive queries for geometry shaders
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 30 Oct 2019 13:19:30 +0000 (14:19 +0100)
committerIago Toral Quiroga <itoral@igalia.com>
Mon, 16 Dec 2019 07:42:37 +0000 (08:42 +0100)
With geometry shaders the number of emitted primitived is decided
at run time, so we cannot precompute it in the CPU and we need to
use the PRIMITIVE_COUNTS_FEEDBACK commands to have the GPU provide
the number like we do for the number of primitives written to
transform feedback. This may have a performance impact though, since
it requires a sync wait for the draw to complete, so we only do
it when geometry shaders are present.

v2: remove '> 0' comparison for ponter type (Alejandro)

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
src/gallium/drivers/v3d/v3d_context.c
src/gallium/drivers/v3d/v3d_context.h
src/gallium/drivers/v3d/v3d_job.c
src/gallium/drivers/v3d/v3d_query.c
src/gallium/drivers/v3d/v3dx_draw.c
src/gallium/drivers/v3d/v3dx_state.c

index 93f0caabc357e078e1e107306d8d599254879bfc..f3dc3a92fec397ff9a4faa5bc5e51fb47f99cc6f 100644 (file)
@@ -123,7 +123,7 @@ v3d_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
  * output targets.
  */
 void
-v3d_tf_update_counters(struct v3d_context *v3d)
+v3d_update_primitive_counters(struct v3d_context *v3d)
 {
         struct v3d_job *job = v3d_get_job_for_fbo(v3d);
         if (job->draw_calls_queued == 0)
index f8d146847de714515b17778d2db7068170e96ee2..e3a5be5fea2fce3f8d6a3651bdd9a431839d0da7 100644 (file)
@@ -693,7 +693,7 @@ bool v3d_generate_mipmap(struct pipe_context *pctx,
 
 struct v3d_fence *v3d_fence_create(struct v3d_context *v3d);
 
-void v3d_tf_update_counters(struct v3d_context *v3d);
+void v3d_update_primitive_counters(struct v3d_context *v3d);
 
 #ifdef v3dX
 #  include "v3dx_context.h"
index 0acd8b033c2b9b2c5579c8bc61fab7b0bab4f885..32c1157c06c650defe1857cfd5cddaf458fd4df3 100644 (file)
@@ -459,6 +459,11 @@ v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
         if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
                 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
                 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
+                /* When we only have a vertex shader we determine the primitive
+                 * count in the CPU so don't update it here again.
+                 */
+                if (v3d->prog.gs)
+                        v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
         }
 }
 
index f8419605dc260e3f8d84529e477b27e631b0e35d..334713e5cc54f90892bbebcb1712939149b5fe1f 100644 (file)
@@ -72,6 +72,13 @@ v3d_begin_query(struct pipe_context *pctx, struct pipe_query *query)
 
         switch (q->type) {
         case PIPE_QUERY_PRIMITIVES_GENERATED:
+                /* If we are using PRIMITIVE_COUNTS_FEEDBACK to retrieve
+                 * primitive counts from the GPU (which we need when a GS
+                 * is present), then we need to update our counters now
+                 * to discard any primitives generated before this.
+                 */
+                if (v3d->prog.gs)
+                        v3d_update_primitive_counters(v3d);
                 q->start = v3d->prims_generated;
                 break;
         case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -79,7 +86,7 @@ v3d_begin_query(struct pipe_context *pctx, struct pipe_query *query)
                  * primitive counts to skip primtives recorded before this.
                  */
                 if (v3d->streamout.num_targets > 0)
-                        v3d_tf_update_counters(v3d);
+                        v3d_update_primitive_counters(v3d);
                 q->start = v3d->tf_prims_generated;
                 break;
         case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -107,6 +114,12 @@ v3d_end_query(struct pipe_context *pctx, struct pipe_query *query)
 
         switch (q->type) {
         case PIPE_QUERY_PRIMITIVES_GENERATED:
+                /* If we are using PRIMITIVE_COUNTS_FEEDBACK to retrieve
+                 * primitive counts from the GPU (which we need when a GS
+                 * is present), then we need to update our counters now.
+                 */
+                if (v3d->prog.gs)
+                        v3d_update_primitive_counters(v3d);
                 q->end = v3d->prims_generated;
                 break;
         case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -115,7 +128,7 @@ v3d_end_query(struct pipe_context *pctx, struct pipe_query *query)
                  * time. Otherwise, we have to do it now.
                  */
                 if (v3d->streamout.num_targets > 0)
-                        v3d_tf_update_counters(v3d);
+                        v3d_update_primitive_counters(v3d);
                 q->end = v3d->tf_prims_generated;
                 break;
         case PIPE_QUERY_OCCLUSION_COUNTER:
index 812ca3b94a763d77a04a7fe67fcf233be96af6bf..a0714d4a02efd7aee702cf38d482a4365ffbf899 100644 (file)
@@ -961,15 +961,18 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
 }
 
 /**
- * Updates the number of primitvies generated from the number of vertices
- * to draw. We do this here instead of using PRIMITIVE_COUNTS_FEEDBACK because
- * using the GPU packet for this might require sync waits and this is trivial
- * to handle in the CPU instead.
+ * Updates the number of primitives generated from the number of vertices
+ * to draw. This only works when no GS is present, since otherwise the number
+ * of primitives generated cannot be determined in advance and we need to
+ * use the PRIMITIVE_COUNTS_FEEDBACK command instead, however, that requires
+ * a sync wait for the draw to complete, so we only use that when GS is present.
  */
 static void
 v3d_update_primitives_generated_counter(struct v3d_context *v3d,
                                         const struct pipe_draw_info *info)
 {
+        assert(!v3d->prog.gs);
+
         if (!v3d->active_queries)
                 return;
 
@@ -1131,7 +1134,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
          */
         if (v3d->streamout.num_targets > 0 &&
             u_base_prim_type(info->mode) != u_base_prim_type(v3d->prim_mode)) {
-                v3d_tf_update_counters(v3d);
+                v3d_update_primitive_counters(v3d);
         }
 
         struct v3d_job *job = v3d_get_job_for_fbo(v3d);
@@ -1239,7 +1242,8 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
 #endif
 
-        v3d_update_primitives_generated_counter(v3d, info);
+        if (!v3d->prog.gs)
+                v3d_update_primitives_generated_counter(v3d, info);
 
         uint32_t hw_prim_type = v3d_hw_prim_type(info->mode);
         if (info->index_size) {
index 840d8288298e3996a7af548dfbd73cb4e81ab5aa..a0bb71ca540d3f4530dc985756b3c19f74e3eb5f 100644 (file)
@@ -1234,7 +1234,7 @@ v3d_set_stream_output_targets(struct pipe_context *pctx,
          * draw we need to do it here as well.
          */
         if (num_targets == 0 && so->num_targets > 0)
-                v3d_tf_update_counters(ctx);
+                v3d_update_primitive_counters(ctx);
 
         for (i = 0; i < num_targets; i++) {
                 if (offsets[i] != -1)