From 30ab3e39fd4d9cceb8a1a7d06dc3d99b0f477a72 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Thu, 5 Sep 2019 15:12:23 -0700 Subject: [PATCH] freedreno/a6xx: Implement primitive count queries on GPU The driver can't determine PIPE_QUERY_PRIMITIVES_GENERATED or PIPE_QUERY_PRIMITIVES_EMITTED once we support geometry or tessellation, since these stages add primitives at runtime. Use the WRITE_PRIMITIVE_COUNTS event to write back the primitive counts and implement a hw query for this. Reviewed-by: Rob Clark --- src/freedreno/registers/a6xx.xml | 4 + src/freedreno/registers/adreno_pm4.xml | 1 + .../drivers/freedreno/a2xx/fd2_query.c | 2 +- .../drivers/freedreno/a5xx/fd5_query.c | 2 +- .../drivers/freedreno/a6xx/fd6_query.c | 95 ++++++++++++++++++- .../drivers/freedreno/freedreno_batch.h | 2 +- .../drivers/freedreno/freedreno_context.h | 2 +- .../drivers/freedreno/freedreno_query.c | 9 +- .../drivers/freedreno/freedreno_query.h | 7 ++ .../drivers/freedreno/freedreno_query_acc.c | 8 +- .../drivers/freedreno/freedreno_query_acc.h | 5 +- .../drivers/freedreno/freedreno_query_hw.c | 3 +- .../drivers/freedreno/freedreno_query_hw.h | 2 +- .../drivers/freedreno/freedreno_query_sw.c | 2 +- .../drivers/freedreno/freedreno_query_sw.h | 2 +- 15 files changed, 128 insertions(+), 18 deletions(-) diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index 6868ed5528b..13c3dcd5a01 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -2394,6 +2394,10 @@ to upconvert to 32b float internally? + + + + diff --git a/src/freedreno/registers/adreno_pm4.xml b/src/freedreno/registers/adreno_pm4.xml index 06175d3e1ad..efadcd9a3fa 100644 --- a/src/freedreno/registers/adreno_pm4.xml +++ b/src/freedreno/registers/adreno_pm4.xml @@ -15,6 +15,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> + diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.c b/src/gallium/drivers/freedreno/a2xx/fd2_query.c index 9e5bb450cd4..b80096c2c81 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_query.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_query.c @@ -218,7 +218,7 @@ fd2_create_batch_query(struct pipe_context *pctx, counters_per_group[entry->gid]++; } - q = fd_acc_create_query2(ctx, 0, &perfcntr); + q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); aq = fd_acc_query(q); /* sample buffer size is based on # of queries: */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/src/gallium/drivers/freedreno/a5xx/fd5_query.c index b438c7a5634..28f296d57cf 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_query.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -433,7 +433,7 @@ fd5_create_batch_query(struct pipe_context *pctx, counters_per_group[entry->gid]++; } - q = fd_acc_create_query2(ctx, 0, &perfcntr); + q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); aq = fd_acc_query(q); /* sample buffer size is based on # of queries: */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.c b/src/gallium/drivers/freedreno/a6xx/fd6_query.c index cdd2778757e..d950144bc7e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_query.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.c @@ -252,6 +252,96 @@ static const struct fd_acc_sample_provider timestamp = { .result = timestamp_accumulate_result, }; +struct PACKED fd6_primitives_sample { + struct { + uint64_t generated, emitted; + } start[4], stop[4], result; +}; + + +#define primitives_relocw(ring, aq, field) \ + OUT_RELOCW(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0); +#define primitives_reloc(ring, aq, field) \ + OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0); + +static void +primitive_counts_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2); + primitives_relocw(ring, aq, start[0]); + + fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false); +} + +static void +primitive_counts_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2); + primitives_relocw(ring, aq, stop[0]); + + fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false); + + fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true); + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C | 0x80000000); + primitives_relocw(ring, aq, result.emitted); + primitives_reloc(ring, aq, result.emitted); + primitives_reloc(ring, aq, stop[aq->base.index].emitted); + primitives_reloc(ring, aq, start[aq->base.index].emitted); + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C | 0x80000000); + primitives_relocw(ring, aq, result.generated); + primitives_reloc(ring, aq, result.generated); + primitives_reloc(ring, aq, stop[aq->base.index].generated); + primitives_reloc(ring, aq, start[aq->base.index].generated); +} + +static void +primitives_generated_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_primitives_sample *ps = buf; + + result->u64 = ps->result.generated; +} + +static const struct fd_acc_sample_provider primitives_generated = { + .query_type = PIPE_QUERY_PRIMITIVES_GENERATED, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd6_primitives_sample), + .resume = primitive_counts_resume, + .pause = primitive_counts_pause, + .result = primitives_generated_result, +}; + +static void +primitives_emitted_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_primitives_sample *ps = buf; + + result->u64 = ps->result.emitted; +} + +static const struct fd_acc_sample_provider primitives_emitted = { + .query_type = PIPE_QUERY_PRIMITIVES_EMITTED, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd6_primitives_sample), + .resume = primitive_counts_resume, + .pause = primitive_counts_pause, + .result = primitives_emitted_result, +}; + /* * Performance Counter (batch) queries: * @@ -433,7 +523,7 @@ fd6_create_batch_query(struct pipe_context *pctx, counters_per_group[entry->gid]++; } - q = fd_acc_create_query2(ctx, 0, &perfcntr); + q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); aq = fd_acc_query(q); /* sample buffer size is based on # of queries: */ @@ -463,4 +553,7 @@ fd6_query_context_init(struct pipe_context *pctx) fd_acc_query_register_provider(pctx, &time_elapsed); fd_acc_query_register_provider(pctx, ×tamp); + + fd_acc_query_register_provider(pctx, &primitives_generated); + fd_acc_query_register_provider(pctx, &primitives_emitted); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index edf0840825b..f5ae07eb892 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -55,7 +55,7 @@ enum fd_render_stage { FD_STAGE_ALL = 0xff, }; -#define MAX_HW_SAMPLE_PROVIDERS 5 +#define MAX_HW_SAMPLE_PROVIDERS 7 struct fd_hw_sample_provider; struct fd_hw_sample; diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index af3506fd685..60318cfe9e2 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -331,7 +331,7 @@ struct fd_context { void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info); /* query: */ - struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type); + struct fd_query * (*create_query)(struct fd_context *ctx, unsigned query_type, unsigned index); void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles); void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n, struct fd_ringbuffer *ring); diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index def6166479b..1c98f4db156 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -41,11 +41,12 @@ static struct pipe_query * fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) { struct fd_context *ctx = fd_context(pctx); - struct fd_query *q; + struct fd_query *q = NULL; - q = fd_sw_create_query(ctx, query_type); - if (!q && ctx->create_query) - q = ctx->create_query(ctx, query_type); + if (ctx->create_query) + q = ctx->create_query(ctx, query_type, index); + if (!q) + q = fd_sw_create_query(ctx, query_type, index); return (struct pipe_query *) q; } diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index 15e9ce450de..e69ff7a88a1 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -46,6 +46,7 @@ struct fd_query { const struct fd_query_funcs *funcs; bool active; int type; + unsigned index; }; static inline struct fd_query * @@ -102,6 +103,12 @@ int pidx(unsigned query_type) return 3; case PIPE_QUERY_TIMESTAMP: return 4; + + case PIPE_QUERY_PRIMITIVES_GENERATED: + return 5; + case PIPE_QUERY_PRIMITIVES_EMITTED: + return 6; + default: return -1; } diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.c b/src/gallium/drivers/freedreno/freedreno_query_acc.c index 2d4d79925bc..b0be16c233e 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_acc.c +++ b/src/gallium/drivers/freedreno/freedreno_query_acc.c @@ -173,7 +173,7 @@ static const struct fd_query_funcs acc_query_funcs = { struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type, - const struct fd_acc_sample_provider *provider) + unsigned index, const struct fd_acc_sample_provider *provider) { struct fd_acc_query *aq; struct fd_query *q; @@ -192,19 +192,21 @@ fd_acc_create_query2(struct fd_context *ctx, unsigned query_type, q = &aq->base; q->funcs = &acc_query_funcs; q->type = query_type; + q->index = index; return q; } struct fd_query * -fd_acc_create_query(struct fd_context *ctx, unsigned query_type) +fd_acc_create_query(struct fd_context *ctx, unsigned query_type, + unsigned index) { int idx = pidx(query_type); if ((idx < 0) || !ctx->acc_sample_providers[idx]) return NULL; - return fd_acc_create_query2(ctx, query_type, + return fd_acc_create_query2(ctx, query_type, index, ctx->acc_sample_providers[idx]); } diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.h b/src/gallium/drivers/freedreno/freedreno_query_acc.h index 3bbffe4436f..fdde07fcc4f 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_acc.h +++ b/src/gallium/drivers/freedreno/freedreno_query_acc.h @@ -97,9 +97,10 @@ fd_acc_query(struct fd_query *q) return (struct fd_acc_query *)q; } -struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type); +struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type, + unsigned index); struct fd_query * fd_acc_create_query2(struct fd_context *ctx, unsigned query_type, - const struct fd_acc_sample_provider *provider); + unsigned index, const struct fd_acc_sample_provider *provider); void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage); void fd_acc_query_register_provider(struct pipe_context *pctx, const struct fd_acc_sample_provider *provider); diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 1c7048d3f28..b37126dc0da 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -266,7 +266,7 @@ static const struct fd_query_funcs hw_query_funcs = { }; struct fd_query * -fd_hw_create_query(struct fd_context *ctx, unsigned query_type) +fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index) { struct fd_hw_query *hq; struct fd_query *q; @@ -289,6 +289,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type) q = &hq->base; q->funcs = &hw_query_funcs; q->type = query_type; + q->index = index; return q; } diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h index e711b837905..c9723ed16c0 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.h +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -136,7 +136,7 @@ fd_hw_query(struct fd_query *q) return (struct fd_hw_query *)q; } -struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type); +struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index); /* helper for sample providers: */ struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size); /* don't call directly, use fd_hw_sample_reference() */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.c b/src/gallium/drivers/freedreno/freedreno_query_sw.c index 7a610d3b21b..96bc814ccb1 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_sw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_sw.c @@ -162,7 +162,7 @@ static const struct fd_query_funcs sw_query_funcs = { }; struct fd_query * -fd_sw_create_query(struct fd_context *ctx, unsigned query_type) +fd_sw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index) { struct fd_sw_query *sq; struct fd_query *q; diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.h b/src/gallium/drivers/freedreno/freedreno_query_sw.h index 8b754e08783..967e4af2117 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_sw.h +++ b/src/gallium/drivers/freedreno/freedreno_query_sw.h @@ -48,6 +48,6 @@ fd_sw_query(struct fd_query *q) } struct fd_query * fd_sw_create_query(struct fd_context *ctx, - unsigned query_type); + unsigned query_type, unsigned index); #endif /* FREEDRENO_QUERY_SW_H_ */ -- 2.30.2