From d310ea0f3258b49f6268df08a7e372764ad5e2d8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 21 Apr 2017 15:30:33 -0400 Subject: [PATCH] freedreno: add support for hw accumulating queries Some queries on a4xx and all queries on a5xx can do result accumulation on CP so we don't need to track per-tile samples. We do still need to handle pausing/resuming while switching batches (in case the query is active over multiple draws which are executed out of order). So introduce new accumulated-query helpers for these sorts of queries, since it doesn't really fit in cleanly with the original query infra- structure. Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 2 + .../drivers/freedreno/freedreno_context.c | 3 + .../drivers/freedreno/freedreno_context.h | 15 +- .../drivers/freedreno/freedreno_draw.c | 7 + .../drivers/freedreno/freedreno_query.h | 21 ++ .../drivers/freedreno/freedreno_query_acc.c | 230 ++++++++++++++++++ .../drivers/freedreno/freedreno_query_acc.h | 98 ++++++++ .../drivers/freedreno/freedreno_query_hw.c | 41 +--- 8 files changed, 384 insertions(+), 33 deletions(-) create mode 100644 src/gallium/drivers/freedreno/freedreno_query_acc.c create mode 100644 src/gallium/drivers/freedreno/freedreno_query_acc.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index b53a23e0480..21ad2ad10be 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -18,6 +18,8 @@ C_SOURCES := \ freedreno_program.h \ freedreno_query.c \ freedreno_query.h \ + freedreno_query_acc.c \ + freedreno_query_acc.h \ freedreno_query_hw.c \ freedreno_query_hw.h \ freedreno_query_sw.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index c8d2138cfea..d9d334e5926 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -301,6 +301,9 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, if (!ctx->primconvert) goto fail; + list_inithead(&ctx->hw_active_queries); + list_inithead(&ctx->acc_active_queries); + return pctx; fail: diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 041e2260561..9bc87b4f33d 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -159,15 +159,26 @@ struct fd_context { /* slab for pipe_transfer allocations: */ struct slab_child_pool transfer_pool; + /** + * query related state: + */ + /*@{*/ /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ struct slab_mempool sample_pool; struct slab_mempool sample_period_pool; /* sample-providers for hw queries: */ - const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + const struct fd_hw_sample_provider *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; /* list of active queries: */ - struct list_head active_queries; + struct list_head hw_active_queries; + + /* sample-providers for accumulating hw queries: */ + const struct fd_acc_sample_provider *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + + /* list of active accumulating queries: */ + struct list_head acc_active_queries; + /*@}*/ /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index dc9effbb7b7..7d3fc8e32da 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -37,6 +37,7 @@ #include "freedreno_context.h" #include "freedreno_state.h" #include "freedreno_resource.h" +#include "freedreno_query_acc.h" #include "freedreno_query_hw.h" #include "freedreno_util.h" @@ -177,6 +178,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) resource_written(batch, batch->query_buf); + list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node) + resource_written(batch, aq->prsc); + mtx_unlock(&ctx->screen->lock); batch->num_draws++; @@ -353,6 +357,9 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, resource_written(batch, batch->query_buf); + list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node) + resource_written(batch, aq->prsc); + mtx_unlock(&ctx->screen->lock); DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers, diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index 1e4f45ffcd3..49a86803c4d 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -77,4 +77,25 @@ skip_begin_query(int type) } } +/* maps query_type to sample provider idx: */ +static inline +int pidx(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + return 0; + case PIPE_QUERY_OCCLUSION_PREDICATE: + return 1; + /* TODO currently queries only emitted in main pass (not in binning pass).. + * which is fine for occlusion query, but pretty much not anything else. + */ + case PIPE_QUERY_TIME_ELAPSED: + return 2; + case PIPE_QUERY_TIMESTAMP: + return 3; + default: + return -1; + } +} + #endif /* FREEDRENO_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.c b/src/gallium/drivers/freedreno/freedreno_query_acc.c new file mode 100644 index 00000000000..96cee1aee84 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_acc.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "freedreno_query_acc.h" +#include "freedreno_context.h" +#include "freedreno_resource.h" +#include "freedreno_util.h" + + +static bool +is_active(struct fd_acc_query *aq, enum fd_render_stage stage) +{ + return !!(aq->provider->active & stage); +} + +static void +fd_acc_destroy_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_acc_query *aq = fd_acc_query(q); + + DBG("%p: active=%d", q, q->active); + + pipe_resource_reference(&aq->prsc, NULL); + list_del(&aq->node); + + free(aq); +} + +static void +realloc_query_bo(struct fd_context *ctx, struct fd_acc_query *aq) +{ + struct fd_resource *rsc; + void *map; + + pipe_resource_reference(&aq->prsc, NULL); + + aq->prsc = pipe_buffer_create(&ctx->screen->base, + PIPE_BIND_QUERY_BUFFER, 0, 0x1000); + + /* don't assume the buffer is zero-initialized: */ + rsc = fd_resource(aq->prsc); + + fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_WRITE); + + map = fd_bo_map(rsc->bo); + memset(map, 0, aq->provider->size); + fd_bo_cpu_fini(rsc->bo); +} + +static boolean +fd_acc_begin_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_batch *batch = ctx->batch; + struct fd_acc_query *aq = fd_acc_query(q); + const struct fd_acc_sample_provider *p = aq->provider; + + DBG("%p: active=%d", q, q->active); + + /* ->begin_query() discards previous results, so realloc bo: */ + realloc_query_bo(ctx, aq); + + /* then resume query if needed to collect first sample: */ + if (batch && is_active(aq, batch->stage)) + p->resume(aq, batch); + + /* add to active list: */ + assert(list_empty(&aq->node)); + list_addtail(&aq->node, &ctx->acc_active_queries); + + return true; +} + +static void +fd_acc_end_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_batch *batch = ctx->batch; + struct fd_acc_query *aq = fd_acc_query(q); + const struct fd_acc_sample_provider *p = aq->provider; + + DBG("%p: active=%d", q, q->active); + + if (batch && is_active(aq, batch->stage)) + p->pause(aq, batch); + + /* remove from active list: */ + list_delinit(&aq->node); +} + +static boolean +fd_acc_get_query_result(struct fd_context *ctx, struct fd_query *q, + boolean wait, union pipe_query_result *result) +{ + struct fd_acc_query *aq = fd_acc_query(q); + const struct fd_acc_sample_provider *p = aq->provider; + struct fd_resource *rsc = fd_resource(aq->prsc); + + DBG("%p: wait=%d, active=%d", q, wait, q->active); + + assert(LIST_IS_EMPTY(&aq->node)); + + /* if !wait, then check the last sample (the one most likely to + * not be ready yet) and bail if it is not ready: + */ + if (!wait) { + int ret; + + if (pending(rsc, false)) { + /* piglit spec@arb_occlusion_query@occlusion_query_conform + * test, and silly apps perhaps, get stuck in a loop trying + * to get query result forever with wait==false.. we don't + * wait to flush unnecessarily but we also don't want to + * spin forever: + */ + if (aq->no_wait_cnt++ > 5) + fd_batch_flush(rsc->write_batch, false); + return false; + } + + ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, + DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC); + if (ret) + return false; + + fd_bo_cpu_fini(rsc->bo); + } + + if (rsc->write_batch) + fd_batch_flush(rsc->write_batch, true); + + /* get the result: */ + fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ); + + void *ptr = fd_bo_map(rsc->bo); + p->result(ctx, ptr, result); + fd_bo_cpu_fini(rsc->bo); + + return true; +} + +static const struct fd_query_funcs acc_query_funcs = { + .destroy_query = fd_acc_destroy_query, + .begin_query = fd_acc_begin_query, + .end_query = fd_acc_end_query, + .get_query_result = fd_acc_get_query_result, +}; + +struct fd_query * +fd_acc_create_query(struct fd_context *ctx, unsigned query_type) +{ + struct fd_acc_query *aq; + struct fd_query *q; + int idx = pidx(query_type); + + if ((idx < 0) || !ctx->acc_sample_providers[idx]) + return NULL; + + aq = CALLOC_STRUCT(fd_acc_query); + if (!aq) + return NULL; + + DBG("%p: query_type=%u", aq, query_type); + + aq->provider = ctx->acc_sample_providers[idx]; + + list_inithead(&aq->node); + + q = &aq->base; + q->funcs = &acc_query_funcs; + q->type = query_type; + + return q; +} + +void +fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage) +{ + if (stage != batch->stage) { + struct fd_acc_query *aq; + LIST_FOR_EACH_ENTRY(aq, &batch->ctx->acc_active_queries, node) { + const struct fd_acc_sample_provider *p = aq->provider; + + bool was_active = is_active(aq, batch->stage); + bool now_active = is_active(aq, stage); + + if (now_active && !was_active) + p->resume(aq, batch); + else if (was_active && !now_active) + p->pause(aq, batch); + } + } +} + +void +fd_acc_query_register_provider(struct pipe_context *pctx, + const struct fd_acc_sample_provider *provider) +{ + struct fd_context *ctx = fd_context(pctx); + int idx = pidx(provider->query_type); + + assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS)); + assert(!ctx->acc_sample_providers[idx]); + + ctx->acc_sample_providers[idx] = provider; +} diff --git a/src/gallium/drivers/freedreno/freedreno_query_acc.h b/src/gallium/drivers/freedreno/freedreno_query_acc.h new file mode 100644 index 00000000000..f8dfabc5e8e --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_acc.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2017 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FREEDRENO_QUERY_ACC_H_ +#define FREEDRENO_QUERY_ACC_H_ + +#include "util/list.h" + +#include "freedreno_query.h" +#include "freedreno_context.h" + + +/* + * Accumulated HW Queries: + * + * Unlike the original HW Queries in earlier adreno generations (see + * freedreno_query_hw.[ch], later generations can accumulate the per- + * tile results of some (a4xx) or all (a5xx+?) queries in the cmdstream. + * But we still need to handle pausing/resuming the query across stage + * changes (in particular when switching between batches). + * + * fd_acc_sample_provider: + * - one per accumulated query type, registered/implemented by gpu + * generation specific code + * - knows how to emit cmdstream to pause/resume a query instance + * + * fd_acc_query: + * - one instance per query object + * - each query object has it's own result buffer, which may + * span multiple batches, etc. + */ + + +struct fd_acc_query; + +struct fd_acc_sample_provider { + unsigned query_type; + + /* stages applicable to the query type: */ + enum fd_render_stage active; + + unsigned size; + + void (*resume)(struct fd_acc_query *aq, struct fd_batch *batch); + void (*pause)(struct fd_acc_query *aq, struct fd_batch *batch); + + void (*result)(struct fd_context *ctx, void *buf, + union pipe_query_result *result); +}; + +struct fd_acc_query { + struct fd_query base; + + const struct fd_acc_sample_provider *provider; + + struct pipe_resource *prsc; + unsigned offset; + + struct list_head node; /* list-node in ctx->active_acc_queries */ + + int no_wait_cnt; /* see fd_acc_get_query_result() */ +}; + +static inline struct fd_acc_query * +fd_acc_query(struct fd_query *q) +{ + return (struct fd_acc_query *)q; +} + +struct fd_query * fd_acc_create_query(struct fd_context *ctx, unsigned query_type); +void fd_acc_query_set_stage(struct fd_batch *batch, enum fd_render_stage stage); +void fd_acc_query_register_provider(struct pipe_context *pctx, + const struct fd_acc_sample_provider *provider); + +#endif /* FREEDRENO_QUERY_ACC_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index ef458ce5db2..2ebf8ade585 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -40,26 +40,6 @@ struct fd_hw_sample_period { struct list_head list; }; -/* maps query_type to sample provider idx: */ -static int pidx(unsigned query_type) -{ - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - return 0; - case PIPE_QUERY_OCCLUSION_PREDICATE: - return 1; - /* TODO currently queries only emitted in main pass (not in binning pass).. - * which is fine for occlusion query, but pretty much not anything else. - */ - case PIPE_QUERY_TIME_ELAPSED: - return 2; - case PIPE_QUERY_TIMESTAMP: - return 3; - default: - return -1; - } -} - static struct fd_hw_sample * get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring, unsigned query_type) @@ -72,7 +52,7 @@ get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring, if (!batch->sample_cache[idx]) { struct fd_hw_sample *new_samp = - ctx->sample_providers[idx]->get_sample(batch, ring); + ctx->hw_sample_providers[idx]->get_sample(batch, ring); fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp); util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp); batch->needs_flush = true; @@ -170,7 +150,7 @@ fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) /* add to active list: */ assert(list_empty(&hq->list)); - list_addtail(&hq->list, &ctx->active_queries); + list_addtail(&hq->list, &ctx->hw_active_queries); return true; } @@ -294,7 +274,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type) struct fd_query *q; int idx = pidx(query_type); - if ((idx < 0) || !ctx->sample_providers[idx]) + if ((idx < 0) || !ctx->hw_sample_providers[idx]) return NULL; hq = CALLOC_STRUCT(fd_hw_query); @@ -303,7 +283,7 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type) DBG("%p: query_type=%u", hq, query_type); - hq->provider = ctx->sample_providers[idx]; + hq->provider = ctx->hw_sample_providers[idx]; list_inithead(&hq->periods); list_inithead(&hq->list); @@ -405,7 +385,7 @@ fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring, { if (stage != batch->stage) { struct fd_hw_query *hq; - LIST_FOR_EACH_ENTRY(hq, &batch->ctx->active_queries, list) { + LIST_FOR_EACH_ENTRY(hq, &batch->ctx->hw_active_queries, list) { bool was_active = is_active(hq, batch->stage); bool now_active = is_active(hq, stage); @@ -428,9 +408,9 @@ fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring) struct fd_context *ctx = batch->ctx; for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) { if (batch->active_providers & (1 << idx)) { - assert(ctx->sample_providers[idx]); - if (ctx->sample_providers[idx]->enable) - ctx->sample_providers[idx]->enable(ctx, ring); + assert(ctx->hw_sample_providers[idx]); + if (ctx->hw_sample_providers[idx]->enable) + ctx->hw_sample_providers[idx]->enable(ctx, ring); } } batch->active_providers = 0; /* clear it for next frame */ @@ -444,9 +424,9 @@ fd_hw_query_register_provider(struct pipe_context *pctx, int idx = pidx(provider->query_type); assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS)); - assert(!ctx->sample_providers[idx]); + assert(!ctx->hw_sample_providers[idx]); - ctx->sample_providers[idx] = provider; + ctx->hw_sample_providers[idx] = provider; } void @@ -458,7 +438,6 @@ fd_hw_query_init(struct pipe_context *pctx) 16); slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period), 16); - list_inithead(&ctx->active_queries); } void -- 2.30.2