freedreno/a6xx: Implement primitive count queries on GPU
authorKristian H. Kristensen <hoegsberg@google.com>
Thu, 5 Sep 2019 22:12:23 +0000 (15:12 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Fri, 6 Sep 2019 16:53:28 +0000 (09:53 -0700)
The driver can't determine PIPE_QUERY_PRIMITIVES_GENERATED or
PIPE_QUERY_PRIMITIVES_EMITTED once we support geometry or
tessellation, since these stages add primitives at runtime.  Use the
WRITE_PRIMITIVE_COUNTS event to write back the primitive counts and
implement a hw query for this.

Reviewed-by: Rob Clark <robdclark@gmail.com>
15 files changed:
src/freedreno/registers/a6xx.xml
src/freedreno/registers/adreno_pm4.xml
src/gallium/drivers/freedreno/a2xx/fd2_query.c
src/gallium/drivers/freedreno/a5xx/fd5_query.c
src/gallium/drivers/freedreno/a6xx/fd6_query.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_query.c
src/gallium/drivers/freedreno/freedreno_query.h
src/gallium/drivers/freedreno/freedreno_query_acc.c
src/gallium/drivers/freedreno/freedreno_query_acc.h
src/gallium/drivers/freedreno/freedreno_query_hw.c
src/gallium/drivers/freedreno/freedreno_query_hw.h
src/gallium/drivers/freedreno/freedreno_query_sw.c
src/gallium/drivers/freedreno/freedreno_query_sw.h

index 6868ed5528bbc46d82a21c96f9954bce994c07b3..13c3dcd5a01aee0a0bf1be95d416092c4eab7c01 100644 (file)
@@ -2394,6 +2394,10 @@ to upconvert to 32b float internally?
                <bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/>
                <bitfield name="B_EN" pos="23" type="boolean"/>
        </reg32>
+
+       <reg32 offset="0x9218" name="VPC_SO_STREAM_COUNTS_LO"/>
+       <reg32 offset="0x9219" name="VPC_SO_STREAM_COUNTS_HI"/>
+
        <array offset="0x921a" name="VPC_SO" stride="7" length="4">
                <reg32 offset="0" name="BUFFER_BASE_LO"/>
                <reg32 offset="1" name="BUFFER_BASE_HI"/>
index 06175d3e1aded293e69cc62824560962714dee19..efadcd9a3fa8a267f36951dadb3a351a8abc6e25 100644 (file)
@@ -15,6 +15,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
        <value name="VIZQUERY_START" value="7"/> <!-- on a2xx (??) -->
        <value name="VIZQUERY_END" value="8"/>
        <value name="SC_WAIT_WC" value="9"/>
+       <value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/>
        <value name="RST_PIX_CNT" value="13"/>
        <value name="RST_VTX_CNT" value="14"/>
        <value name="TILE_FLUSH" value="15"/>
index 9e5bb450cd42f367c0c17a395fc080f41c61efbe..b80096c2c8178befef20d41feea8db19cbb46ab9 100644 (file)
@@ -218,7 +218,7 @@ fd2_create_batch_query(struct pipe_context *pctx,
                counters_per_group[entry->gid]++;
        }
 
-       q = fd_acc_create_query2(ctx, 0, &perfcntr);
+       q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
        aq = fd_acc_query(q);
 
        /* sample buffer size is based on # of queries: */
index b438c7a5634570a604b6b118de2c363953278516..28f296d57cff143b1b77a71692ccbfb5fe876127 100644 (file)
@@ -433,7 +433,7 @@ fd5_create_batch_query(struct pipe_context *pctx,
                counters_per_group[entry->gid]++;
        }
 
-       q = fd_acc_create_query2(ctx, 0, &perfcntr);
+       q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
        aq = fd_acc_query(q);
 
        /* sample buffer size is based on # of queries: */
index cdd2778757ec70a201e431cdd5467505f1d86b9a..d950144bc7e353015a741bf37083950d0ef98d3e 100644 (file)
@@ -252,6 +252,96 @@ static const struct fd_acc_sample_provider timestamp = {
                .result = timestamp_accumulate_result,
 };
 
+struct PACKED fd6_primitives_sample {
+       struct {
+               uint64_t generated, emitted;
+       } start[4], stop[4], result;
+};
+
+
+#define primitives_relocw(ring, aq, field) \
+       OUT_RELOCW(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
+#define primitives_reloc(ring, aq, field) \
+       OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
+
+static void
+primitive_counts_resume(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+       struct fd_ringbuffer *ring = batch->draw;
+
+       OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
+       primitives_relocw(ring, aq, start[0]);
+
+       fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
+}
+
+static void
+primitive_counts_pause(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+       struct fd_ringbuffer *ring = batch->draw;
+
+       OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
+       primitives_relocw(ring, aq, stop[0]);
+
+       fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
+
+       fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
+
+       /* result += stop - start: */
+       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
+                       CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
+       primitives_relocw(ring, aq, result.emitted);
+       primitives_reloc(ring, aq, result.emitted);
+       primitives_reloc(ring, aq, stop[aq->base.index].emitted);
+       primitives_reloc(ring, aq, start[aq->base.index].emitted);
+
+       /* result += stop - start: */
+       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
+                       CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
+       primitives_relocw(ring, aq, result.generated);
+       primitives_reloc(ring, aq, result.generated);
+       primitives_reloc(ring, aq, stop[aq->base.index].generated);
+       primitives_reloc(ring, aq, start[aq->base.index].generated);
+}
+
+static void
+primitives_generated_result(struct fd_acc_query *aq, void *buf,
+               union pipe_query_result *result)
+{
+       struct fd6_primitives_sample *ps = buf;
+
+       result->u64 = ps->result.generated;
+}
+
+static const struct fd_acc_sample_provider primitives_generated = {
+       .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
+       .active = FD_STAGE_DRAW,
+       .size = sizeof(struct fd6_primitives_sample),
+       .resume = primitive_counts_resume,
+       .pause = primitive_counts_pause,
+       .result = primitives_generated_result,
+};
+
+static void
+primitives_emitted_result(struct fd_acc_query *aq, void *buf,
+               union pipe_query_result *result)
+{
+       struct fd6_primitives_sample *ps = buf;
+
+       result->u64 = ps->result.emitted;
+}
+
+static const struct fd_acc_sample_provider primitives_emitted = {
+       .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
+       .active = FD_STAGE_DRAW,
+       .size = sizeof(struct fd6_primitives_sample),
+       .resume = primitive_counts_resume,
+       .pause = primitive_counts_pause,
+       .result = primitives_emitted_result,
+};
+
 /*
  * Performance Counter (batch) queries:
  *
@@ -433,7 +523,7 @@ fd6_create_batch_query(struct pipe_context *pctx,
                counters_per_group[entry->gid]++;
        }
 
-       q = fd_acc_create_query2(ctx, 0, &perfcntr);
+       q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
        aq = fd_acc_query(q);
 
        /* sample buffer size is based on # of queries: */
@@ -463,4 +553,7 @@ fd6_query_context_init(struct pipe_context *pctx)
 
        fd_acc_query_register_provider(pctx, &time_elapsed);
        fd_acc_query_register_provider(pctx, &timestamp);
+
+       fd_acc_query_register_provider(pctx, &primitives_generated);
+       fd_acc_query_register_provider(pctx, &primitives_emitted);
 }
index edf0840825be470855be6c77bc112d1512b9aad7..f5ae07eb892898b73e2314f08d199d2ad952d566 100644 (file)
@@ -55,7 +55,7 @@ enum fd_render_stage {
        FD_STAGE_ALL      = 0xff,
 };
 
-#define MAX_HW_SAMPLE_PROVIDERS 5
+#define MAX_HW_SAMPLE_PROVIDERS 7
 struct fd_hw_sample_provider;
 struct fd_hw_sample;
 
index af3506fd685cbb6b416cca704946881c65ed6cfb..60318cfe9e2bf378a20cde4047e7bbe0fe59d235 100644 (file)
@@ -331,7 +331,7 @@ struct fd_context {
        void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
 
        /* query: */
-       struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type);
+       struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type, unsigned index);
        void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles);
        void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
                        struct fd_ringbuffer *ring);
index def6166479b11fb6e6c10eb473c71239431e2525..1c98f4db15637de4d02950f4e13522fb4f62e46a 100644 (file)
@@ -41,11 +41,12 @@ static struct pipe_query *
 fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
 {
        struct fd_context *ctx = fd_context(pctx);
-       struct fd_query *q;
+       struct fd_query *q = NULL;
 
-       q = fd_sw_create_query(ctx, query_type);
-       if (!q && ctx->create_query)
-               q = ctx->create_query(ctx, query_type);
+       if (ctx->create_query)
+               q = ctx->create_query(ctx, query_type, index);
+       if (!q)
+               q = fd_sw_create_query(ctx, query_type, index);
 
        return (struct pipe_query *) q;
 }
index 15e9ce450dee8c523719a4610a2391c0b3bd32c1..e69ff7a88a1dfe5cd2c99d6f1df3a4d9c1cf5ef0 100644 (file)
@@ -46,6 +46,7 @@ struct fd_query {
        const struct fd_query_funcs *funcs;
        bool active;
        int type;
+       unsigned index;
 };
 
 static inline struct fd_query *
@@ -102,6 +103,12 @@ int pidx(unsigned query_type)
                return 3;
        case PIPE_QUERY_TIMESTAMP:
                return 4;
+
+       case PIPE_QUERY_PRIMITIVES_GENERATED:
+               return 5;
+       case PIPE_QUERY_PRIMITIVES_EMITTED:
+               return 6;
+
        default:
                return -1;
        }
index 2d4d79925bc6ba2e6f58b8c6f397b24569d104ee..b0be16c233e3130c795ccf85a7f5f29a9e6d8db6 100644 (file)
@@ -173,7 +173,7 @@ static const struct fd_query_funcs acc_query_funcs = {
 
 struct fd_query *
 fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
-               const struct fd_acc_sample_provider *provider)
+               unsigned index, const struct fd_acc_sample_provider *provider)
 {
        struct fd_acc_query *aq;
        struct fd_query *q;
@@ -192,19 +192,21 @@ fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
        q = &aq->base;
        q->funcs = &acc_query_funcs;
        q->type = query_type;
+       q->index = index;
 
        return q;
 }
 
 struct fd_query *
-fd_acc_create_query(struct fd_context *ctx, unsigned query_type)
+fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
+               unsigned index)
 {
        int idx = pidx(query_type);
 
        if ((idx < 0) || !ctx->acc_sample_providers[idx])
                return NULL;
 
-       return fd_acc_create_query2(ctx, query_type,
+       return fd_acc_create_query2(ctx, query_type, index,
                        ctx->acc_sample_providers[idx]);
 }
 
index 3bbffe4436f02758c1da780ed30522edac92f20d..fdde07fcc4fee9525d0bdbaf6527fc197b60cbc7 100644 (file)
@@ -97,9 +97,10 @@ fd_acc_query(struct fd_query *q)
        return (struct fd_acc_query *)q;
 }
 
-struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type);
+struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type,
+       unsigned index);
 struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
-               const struct fd_acc_sample_provider *provider);
+               unsigned index, const struct fd_acc_sample_provider *provider);
 void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage);
 void fd_acc_query_register_provider(struct pipe_context *pctx,
                const struct fd_acc_sample_provider *provider);
index 1c7048d3f28e109fe24faf86db7d9bd4741ad0e1..b37126dc0da90eb2c9566813d96259d82098eaee 100644 (file)
@@ -266,7 +266,7 @@ static const struct fd_query_funcs hw_query_funcs = {
 };
 
 struct fd_query *
-fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
+fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
 {
        struct fd_hw_query *hq;
        struct fd_query *q;
@@ -289,6 +289,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
        q = &hq->base;
        q->funcs = &hw_query_funcs;
        q->type = query_type;
+       q->index = index;
 
        return q;
 }
index e711b837905e711b5e725535be1b95c74e365ff3..c9723ed16c039e452eb8f9b103a442d156457855 100644 (file)
@@ -136,7 +136,7 @@ fd_hw_query(struct fd_query *q)
        return (struct fd_hw_query *)q;
 }
 
-struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
+struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index);
 /* helper for sample providers: */
 struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
 /* don't call directly, use fd_hw_sample_reference() */
index 7a610d3b21b5800fa538cc296069b2554172b2a6..96bc814ccb1a27f5d99120320e76a737d23289f6 100644 (file)
@@ -162,7 +162,7 @@ static const struct fd_query_funcs sw_query_funcs = {
 };
 
 struct fd_query *
-fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
+fd_sw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
 {
        struct fd_sw_query *sq;
        struct fd_query *q;
index 8b754e08783f6c9a716acc4a7e6681435e2b24e2..967e4af2117cc4916dcb0dba8732ba7c8d27d2c0 100644 (file)
@@ -48,6 +48,6 @@ fd_sw_query(struct fd_query *q)
 }
 
 struct fd_query * fd_sw_create_query(struct fd_context *ctx,
-               unsigned query_type);
+               unsigned query_type, unsigned index);
 
 #endif /* FREEDRENO_QUERY_SW_H_ */