broadcom/vc5: Add occlusion query support.
authorEric Anholt <eric@anholt.net>
Mon, 6 Nov 2017 23:41:40 +0000 (15:41 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 7 Nov 2017 20:56:40 +0000 (12:56 -0800)
Fixes all of piglit's OQ tests.

src/broadcom/cle/v3d_packet_v33.xml
src/gallium/drivers/vc5/vc5_context.c
src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_draw.c
src/gallium/drivers/vc5/vc5_emit.c
src/gallium/drivers/vc5/vc5_job.c
src/gallium/drivers/vc5/vc5_query.c

index 2b0665537e88104e1c85e5f26b26f259357d90c6..165e489d4cd426dfd16fb6ba4655aea1494ac304 100644 (file)
     <field name="Render Target 0 per colour component write masks" size="4" start="0" type="uint"/>
   </packet>
 
+  <packet code="92" name="Occlusion Query Counter">
+    <field name="address" size="32" start="0" type="address"/>
+  </packet>
+
   <packet code="96" name="Configuration Bits">
     <field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
     <field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
index f80020ab31e0c5ae35883c0a04f688608597b4a4..d27f41bb5f88a016395f143dffc7adf0c0ae1f14 100644 (file)
@@ -162,6 +162,7 @@ vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
         V3D_DEBUG |= saved_shaderdb_flag;
 
         vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+        vc5->active_queries = true;
 
         return &vc5->base;
 
index 298dfacf872be4f2b612af366ae8e132f1321a8e..2fec7a77da4a41c682a81d52ee09096311834b18 100644 (file)
@@ -77,6 +77,7 @@ void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
 #define VC5_DIRTY_COMPILED_FS   (1 << 25)
 #define VC5_DIRTY_FS_INPUTS     (1 << 26)
 #define VC5_DIRTY_STREAMOUT     (1 << 27)
+#define VC5_DIRTY_OQ            (1 << 28)
 
 #define VC5_MAX_FS_INPUTS 64
 
@@ -262,6 +263,13 @@ struct vc5_job {
          */
         bool needs_flush;
 
+        /**
+         * Set if there is a nonzero address for OCCLUSION_QUERY_COUNTER.  If
+         * so, we need to disable it and flush before ending the CL, to keep
+         * the next tile from starting with it enabled.
+         */
+        bool oq_enabled;
+
         bool uses_early_z;
 
         /**
@@ -353,12 +361,15 @@ struct vc5_context {
          */
         uint8_t blend_dst_alpha_one;
 
+        bool active_queries;
+
         struct pipe_poly_stipple stipple;
         struct pipe_clip_state clip;
         struct pipe_viewport_state viewport;
         struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
         struct vc5_vertexbuf_stateobj vertexbuf;
         struct vc5_streamout_stateobj streamout;
+        struct vc5_bo *current_oq;
         /** @} */
 };
 
index edc528591542e50b3b974b0d341c83636f6ed9c3..8020e26802a8c2c394287ad12c72fd289b67dceb 100644 (file)
@@ -93,6 +93,9 @@ vc5_start_draw(struct vc5_context *vc5)
         /* There's definitely nothing in the VCD cache we want. */
         cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
 
+        /* Disable any leftover OQ state from another job. */
+        cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+
         /* "Binning mode lists must have a Start Tile Binning item (6) after
          *  any prefix state data before the binning list proper starts."
          */
index de4737eeec860b387ccb2761aac1bb53bf342366..a4a1af7ddf48237018af585f31bce329b7e92bd1 100644 (file)
@@ -492,4 +492,13 @@ vc5_emit_state(struct pipe_context *pctx)
                         /* XXX? */
                 }
         }
+
+        if (vc5->dirty & VC5_DIRTY_OQ) {
+                cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
+                        job->oq_enabled = vc5->active_queries && vc5->current_oq;
+                        if (job->oq_enabled) {
+                                counter.address = cl_address(vc5->current_oq, 0);
+                        }
+                }
+        }
 }
index ed1a64be89196adf1309afbf22bc3bd63b931e20..46c85e7edf41c0fa5944f1d8efdcf488b96a92c4 100644 (file)
@@ -381,7 +381,17 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
         vc5_emit_rcl(job);
 
         if (cl_offset(&job->bcl) > 0) {
-                vc5_cl_ensure_space_with_branch(&job->bcl, 2);
+                vc5_cl_ensure_space_with_branch(&job->bcl,
+                                                7 +
+                                                cl_packet_length(OCCLUSION_QUERY_COUNTER));
+
+                if (job->oq_enabled) {
+                        /* Disable the OQ at the end of the CL, so that the
+                         * draw calls at the start of the CL don't inherit the
+                         * OQ counter.
+                         */
+                        cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+                }
 
                 /* Increment the semaphore indicating that binning is done and
                  * unblocking the render thread.  Note that this doesn't act
@@ -389,10 +399,12 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
                  */
                 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
 
-                /* The FLUSH caps all of our bin lists with a
-                 * VC5_PACKET_RETURN.
+                /* The FLUSH_ALL emits any unwritten state changes in each
+                 * tile.  We can use this to reset any state that needs to be
+                 * present at the start of the next tile, as we do with
+                 * OCCLUSION_QUERY_COUNTER above.
                  */
-                cl_emit(&job->bcl, FLUSH, flush);
+                cl_emit(&job->bcl, FLUSH_ALL_STATE, flush);
         }
 
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
index c114e76eef0663977e2074bc0ab3c1f0226ce80e..a412b384081feb13d1fa2ed377b11eefafcf8179 100644 (file)
  */
 
 /**
- * Stub support for occlusion queries.
+ * Gallium query object support.
  *
- * Since we expose support for GL 2.0, we have to expose occlusion queries,
- * but the spec allows you to expose 0 query counter bits, so we just return 0
- * as the result of all our queries.
+ * So far we just support occlusion queries.  The HW has native support for
+ * them, with the query result being loaded and stored by the TLB unit.
+ *
+ * From a SW perspective, we have to be careful to make sure that the jobs
+ * that need to be tracking queries are bracketed by the start and end of
+ * counting, even across FBO transitions.
  */
+
 #include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
 
 struct vc5_query
 {
-        uint8_t pad;
+        enum pipe_query_type type;
+        struct vc5_bo *bo;
 };
 
 static struct pipe_query *
-vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+vc5_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
 {
-        struct vc5_query *query = calloc(1, sizeof(*query));
+        struct vc5_query *q = calloc(1, sizeof(*q));
+
+        assert(query_type == PIPE_QUERY_OCCLUSION_COUNTER ||
+               query_type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+               query_type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE);
+
+        q->type = query_type;
 
         /* Note that struct pipe_query isn't actually defined anywhere. */
-        return (struct pipe_query *)query;
+        return (struct pipe_query *)q;
 }
 
 static void
-vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+vc5_destroy_query(struct pipe_context *pctx, struct pipe_query *query)
 {
-        free(query);
+        struct vc5_query *q = (struct vc5_query *)query;
+
+        vc5_bo_unreference(&q->bo);
+        free(q);
 }
 
 static boolean
-vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+vc5_begin_query(struct pipe_context *pctx, struct pipe_query *query)
 {
+        struct vc5_context *vc5 = vc5_context(pctx);
+        struct vc5_query *q = (struct vc5_query *)query;
+
+        q->bo = vc5_bo_alloc(vc5->screen, 4096, "query");
+
+        uint32_t *map = vc5_bo_map(q->bo);
+        *map = 0;
+
+        vc5->current_oq = q->bo;
+        vc5->dirty |= VC5_DIRTY_OQ;
+
         return true;
 }
 
 static bool
-vc5_end_query(struct pipe_context *ctx, struct pipe_query *query)
+vc5_end_query(struct pipe_context *pctx, struct pipe_query *query)
 {
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        vc5->current_oq = NULL;
+        vc5->dirty |= VC5_DIRTY_OQ;
+
         return true;
 }
 
 static boolean
-vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
+vc5_get_query_result(struct pipe_context *pctx, struct pipe_query *query,
                      boolean wait, union pipe_query_result *vresult)
 {
-        uint64_t *result = &vresult->u64;
+        struct vc5_query *q = (struct vc5_query *)query;
+        uint32_t result = 0;
+
+        if (q->bo) {
+                /* XXX: Only flush the jobs using this BO. */
+                vc5_flush(pctx);
 
-        *result = 0;
+                if (wait) {
+                        if (!vc5_bo_wait(q->bo, 0, "query"))
+                                return false;
+                } else {
+                        if (!vc5_bo_wait(q->bo, ~0ull, "query"))
+                                return false;
+                }
+
+                /* XXX: Sum up per-core values. */
+                uint32_t *map = vc5_bo_map(q->bo);
+                result = *map;
+
+                vc5_bo_unreference(&q->bo);
+        }
+
+        switch (q->type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+                vresult->u64 = result;
+                break;
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+        case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+                vresult->b = result != 0;
+                break;
+        default:
+                unreachable("unsupported query type");
+        }
 
         return true;
 }
 
 static void
-vc5_set_active_query_state(struct pipe_context *pipe, boolean enable)
+vc5_set_active_query_state(struct pipe_context *pctx, boolean enable)
 {
+        struct vc5_context *vc5 = vc5_context(pctx);
+
+        vc5->active_queries = enable;
+        vc5->dirty |= VC5_DIRTY_OQ;
 }
 
 void