From b8f78e18907be379415c8c804b634808349fc1d9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 10 May 2014 13:45:54 -0400 Subject: [PATCH] freedreno: add support for hw queries Real GPU queries need some infrastructure to track samples per tile and accumulate the results. But fortunately this can be shared across GPU generation. See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 1 + .../drivers/freedreno/freedreno_context.c | 3 + .../drivers/freedreno/freedreno_context.h | 69 ++- .../drivers/freedreno/freedreno_draw.c | 4 + .../drivers/freedreno/freedreno_gmem.c | 19 +- .../drivers/freedreno/freedreno_query.c | 5 +- .../drivers/freedreno/freedreno_query_hw.c | 465 ++++++++++++++++++ .../drivers/freedreno/freedreno_query_hw.h | 164 ++++++ .../drivers/freedreno/freedreno_resource.c | 3 + .../drivers/freedreno/freedreno_util.h | 9 +- 10 files changed, 734 insertions(+), 8 deletions(-) create mode 100644 src/gallium/drivers/freedreno/freedreno_query_hw.c create mode 100644 src/gallium/drivers/freedreno/freedreno_query_hw.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 3621abddb28..afb4c35ba75 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -3,6 +3,7 @@ C_SOURCES := \ freedreno_lowering.c \ freedreno_program.c \ freedreno_query.c \ + freedreno_query_hw.c \ freedreno_query_sw.c \ freedreno_fence.c \ freedreno_resource.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index a8fe3111c3d..496a4227099 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -34,6 +34,7 @@ #include "freedreno_state.h" #include "freedreno_gmem.h" #include "freedreno_query.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" static struct fd_ringbuffer *next_rb(struct fd_context *ctx) @@ -145,6 +146,7 @@ fd_context_destroy(struct pipe_context *pctx) DBG(""); fd_prog_fini(pctx); + fd_hw_query_fini(pctx); util_slab_destroy(&ctx->transfer_pool); @@ -221,6 +223,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, fd_query_context_init(pctx); fd_texture_init(pctx); fd_state_init(pctx); + fd_hw_query_init(pctx); ctx->blitter = util_blitter_create(pctx); if (!ctx->blitter) diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 2e7fdedd9ae..46984823427 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -33,6 +33,7 @@ #include "pipe/p_context.h" #include "indices/u_primconvert.h" #include "util/u_blitter.h" +#include "util/u_double_list.h" #include "util/u_slab.h" #include "util/u_string.h" @@ -82,16 +83,80 @@ struct fd_vertex_stateobj { unsigned num_elements; }; +/* Bitmask of stages in rendering that a particular query query is + * active. Queries will be automatically started/stopped (generating + * additional fd_hw_sample_period's) on entrance/exit from stages that + * are applicable to the query. + * + * NOTE: set the stage to NULL at end of IB to ensure no query is still + * active. Things aren't going to work out the way you want if a query + * is active across IB's (or between tile IB and draw IB) + */ +enum fd_render_stage { + FD_STAGE_NULL = 0x00, + FD_STAGE_DRAW = 0x01, + FD_STAGE_CLEAR = 0x02, + /* TODO before queries which include MEM2GMEM or GMEM2MEM will + * work we will need to call fd_hw_query_prepare() from somewhere + * appropriate so that queries in the tiling IB get backed with + * memory to write results to. + */ + FD_STAGE_MEM2GMEM = 0x04, + FD_STAGE_GMEM2MEM = 0x08, + /* used for driver internal draws (ie. util_blitter_blit()): */ + FD_STAGE_BLIT = 0x10, +}; + +#define MAX_HW_SAMPLE_PROVIDERS 4 +struct fd_hw_sample_provider; +struct fd_hw_sample; + struct fd_context { struct pipe_context base; struct fd_device *dev; struct fd_screen *screen; + struct blitter_context *blitter; struct primconvert_context *primconvert; + /* slab for pipe_transfer allocations: */ struct util_slab_mempool transfer_pool; + /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ + struct util_slab_mempool sample_pool; + struct util_slab_mempool sample_period_pool; + + /* next sample offset.. incremented for each sample in the batch/ + * submit, reset to zero on next submit. + */ + uint32_t next_sample_offset; + + /* sample-providers for hw queries: */ + const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + + /* cached samples (in case multiple queries need to reference + * the same sample snapshot) + */ + struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; + + /* tracking for current stage, to know when to start/stop + * any active queries: + */ + enum fd_render_stage stage; + + /* list of active queries: */ + struct list_head active_queries; + + /* list of queries that are not active, but were active in the + * current submit: + */ + struct list_head current_queries; + + /* current query result bo and tile stride: */ + struct fd_bo *query_bo; + uint32_t query_tile_stride; + /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation: @@ -258,10 +323,6 @@ struct fd_context { void (*draw)(struct fd_context *pctx, const struct pipe_draw_info *info); void (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); - - /* queries: */ - struct fd_query * (*create_query)(struct fd_context *ctx, - unsigned query_type); }; static INLINE struct fd_context * diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 11bb8d8333d..1289bb4b8f2 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -36,6 +36,7 @@ #include "freedreno_context.h" #include "freedreno_state.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" @@ -156,6 +157,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* and any buffers used, need to be resolved: */ ctx->resolve |= buffers; + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW); ctx->draw(ctx, info); } @@ -188,6 +190,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR); + ctx->clear(ctx, buffers, color, depth, stencil); ctx->dirty |= FD_DIRTY_ZSA | diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 2d4de442452..861ebf5675e 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -35,6 +35,7 @@ #include "freedreno_gmem.h" #include "freedreno_context.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" /* @@ -273,17 +274,24 @@ render_tiles(struct fd_context *ctx) ctx->emit_tile_prep(ctx, tile); - if (ctx->restore) + if (ctx->restore) { + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM); ctx->emit_tile_mem2gmem(ctx, tile); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + } ctx->emit_tile_renderprep(ctx, tile); + fd_hw_query_prepare_tile(ctx, i, ctx->ring); + /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); fd_reset_wfi(ctx); /* emit gmem2mem to transfer tile back to system memory: */ + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM); ctx->emit_tile_gmem2mem(ctx, tile); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); } } @@ -292,6 +300,8 @@ render_sysmem(struct fd_context *ctx) { ctx->emit_sysmem_prep(ctx); + fd_hw_query_prepare_tile(ctx, 0, ctx->ring); + /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); fd_reset_wfi(ctx); @@ -314,6 +324,11 @@ fd_gmem_render_tiles(struct pipe_context *pctx) } } + /* close out the draw cmds by making sure any active queries are + * paused: + */ + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + /* mark the end of the clear/draw cmds before emitting per-tile cmds: */ fd_ringmarker_mark(ctx->draw_end); fd_ringmarker_mark(ctx->binning_end); @@ -326,6 +341,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx) DBG("rendering sysmem (%s/%s)", util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_prepare(ctx, 1); render_sysmem(ctx); ctx->stats.batch_sysmem++; } else { @@ -334,6 +350,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx) DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y); render_tiles(ctx); ctx->stats.batch_gmem++; } diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index fb9908b39ee..8753a4b02c9 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -31,6 +31,7 @@ #include "freedreno_query.h" #include "freedreno_query_sw.h" +#include "freedreno_query_hw.h" #include "freedreno_context.h" #include "freedreno_util.h" @@ -45,8 +46,8 @@ fd_create_query(struct pipe_context *pctx, unsigned query_type) struct fd_query *q; q = fd_sw_create_query(ctx, query_type); - if (ctx->create_query && !q) - q = ctx->create_query(ctx, query_type); + if (!q) + q = fd_hw_create_query(ctx, query_type); return (struct pipe_query *) q; } diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c new file mode 100644 index 00000000000..38bd3dedad4 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -0,0 +1,465 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "freedreno_query_hw.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +struct fd_hw_sample_period { + struct fd_hw_sample *start, *end; + struct list_head list; +}; + +/* maps query_type to sample provider idx: */ +static int pidx(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + return 0; + case PIPE_QUERY_OCCLUSION_PREDICATE: + return 1; + default: + return -1; + } +} + +static struct fd_hw_sample * +get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring, + unsigned query_type) +{ + struct fd_hw_sample *samp = NULL; + int idx = pidx(query_type); + + if (!ctx->sample_cache[idx]) { + ctx->sample_cache[idx] = + ctx->sample_providers[idx]->get_sample(ctx, ring); + } + + fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]); + + return samp; +} + +static void +clear_sample_cache(struct fd_context *ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++) + fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL); +} + +static bool +is_active(struct fd_hw_query *hq, enum fd_render_stage stage) +{ + return !!(hq->provider->active & stage); +} + + +static void +resume_query(struct fd_context *ctx, struct fd_hw_query *hq, + struct fd_ringbuffer *ring) +{ + assert(!hq->period); + hq->period = util_slab_alloc(&ctx->sample_period_pool); + list_inithead(&hq->period->list); + hq->period->start = get_sample(ctx, ring, hq->base.type); + /* NOTE: util_slab_alloc() does not zero out the buffer: */ + hq->period->end = NULL; +} + +static void +pause_query(struct fd_context *ctx, struct fd_hw_query *hq, + struct fd_ringbuffer *ring) +{ + assert(hq->period && !hq->period->end); + hq->period->end = get_sample(ctx, ring, hq->base.type); + list_addtail(&hq->period->list, &hq->current_periods); + hq->period = NULL; +} + +static void +destroy_periods(struct fd_context *ctx, struct list_head *list) +{ + struct fd_hw_sample_period *period, *s; + LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) { + fd_hw_sample_reference(ctx, &period->start, NULL); + fd_hw_sample_reference(ctx, &period->end, NULL); + list_del(&period->list); + util_slab_free(&ctx->sample_period_pool, period); + } +} + +static void +fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + + destroy_periods(ctx, &hq->periods); + destroy_periods(ctx, &hq->current_periods); + list_del(&hq->list); + + free(hq); +} + +static void +fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + if (q->active) + return; + + /* begin_query() should clear previous results: */ + destroy_periods(ctx, &hq->periods); + + if (is_active(hq, ctx->stage)) + resume_query(ctx, hq, ctx->ring); + + q->active = true; + + /* add to active list: */ + list_del(&hq->list); + list_addtail(&hq->list, &ctx->active_queries); +} + +static void +fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + if (!q->active) + return; + if (is_active(hq, ctx->stage)) + pause_query(ctx, hq, ctx->ring); + q->active = false; + /* move to current list: */ + list_del(&hq->list); + list_addtail(&hq->list, &ctx->current_queries); +} + +/* helper to get ptr to specified sample: */ +static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr) +{ + return ((char *)ptr) + (samp->tile_stride * n) + samp->offset; +} + +static boolean +fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, + boolean wait, union pipe_query_result *result) +{ + struct fd_hw_query *hq = fd_hw_query(q); + const struct fd_hw_sample_provider *p = hq->provider; + struct fd_hw_sample_period *period; + + if (q->active) + return false; + + /* if the app tries to read back the query result before the + * back is submitted, that forces us to flush so that there + * are actually results to wait for: + */ + if (!LIST_IS_EMPTY(&hq->list)) { + DBG("reading query result forces flush!"); + ctx->needs_flush = true; + fd_context_render(&ctx->base); + } + + util_query_clear_result(result, q->type); + + if (LIST_IS_EMPTY(&hq->periods)) + return true; + + assert(LIST_IS_EMPTY(&hq->list)); + assert(LIST_IS_EMPTY(&hq->current_periods)); + assert(!hq->period); + + if (LIST_IS_EMPTY(&hq->periods)) + return true; + + /* if !wait, then check the last sample (the one most likely to + * not be ready yet) and bail if it is not ready: + */ + if (!wait) { + int ret; + + period = LIST_ENTRY(struct fd_hw_sample_period, + hq->periods.prev, list); + + ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe, + DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC); + if (ret) + return false; + + fd_bo_cpu_fini(period->end->bo); + } + + /* sum the result across all sample periods: */ + LIST_FOR_EACH_ENTRY(period, &hq->periods, list) { + struct fd_hw_sample *start = period->start; + struct fd_hw_sample *end = period->end; + unsigned i; + + /* start and end samples should be from same batch: */ + assert(start->bo == end->bo); + assert(start->num_tiles == end->num_tiles); + + for (i = 0; i < start->num_tiles; i++) { + void *ptr; + + fd_bo_cpu_prep(start->bo, ctx->screen->pipe, + DRM_FREEDRENO_PREP_READ); + + ptr = fd_bo_map(start->bo); + + p->accumulate_result(ctx, sampptr(period->start, i, ptr), + sampptr(period->end, i, ptr), result); + + fd_bo_cpu_fini(start->bo); + } + } + + return true; +} + +static const struct fd_query_funcs hw_query_funcs = { + .destroy_query = fd_hw_destroy_query, + .begin_query = fd_hw_begin_query, + .end_query = fd_hw_end_query, + .get_query_result = fd_hw_get_query_result, +}; + +struct fd_query * +fd_hw_create_query(struct fd_context *ctx, unsigned query_type) +{ + struct fd_hw_query *hq; + struct fd_query *q; + int idx = pidx(query_type); + + if ((idx < 0) || !ctx->sample_providers[idx]) + return NULL; + + hq = CALLOC_STRUCT(fd_hw_query); + if (!hq) + return NULL; + + hq->provider = ctx->sample_providers[idx]; + + list_inithead(&hq->periods); + list_inithead(&hq->current_periods); + list_inithead(&hq->list); + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = query_type; + + return q; +} + +struct fd_hw_sample * +fd_hw_sample_init(struct fd_context *ctx, uint32_t size) +{ + struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); + pipe_reference_init(&samp->reference, 1); + samp->size = size; + samp->offset = ctx->next_sample_offset; + /* NOTE: util_slab_alloc() does not zero out the buffer: */ + samp->bo = NULL; + samp->num_tiles = 0; + samp->tile_stride = 0; + ctx->next_sample_offset += size; + return samp; +} + +void +__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp) +{ + if (samp->bo) + fd_bo_del(samp->bo); + util_slab_free(&ctx->sample_pool, samp); +} + +static void +prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride) +{ + if (samp->bo) { + assert(samp->bo == bo); + assert(samp->num_tiles == num_tiles); + assert(samp->tile_stride == tile_stride); + return; + } + samp->bo = bo; + samp->num_tiles = num_tiles; + samp->tile_stride = tile_stride; +} + +static void +prepare_query(struct fd_hw_query *hq, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride) +{ + struct fd_hw_sample_period *period, *s; + + /* prepare all the samples in the query: */ + LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) { + prepare_sample(period->start, bo, num_tiles, tile_stride); + prepare_sample(period->end, bo, num_tiles, tile_stride); + + /* move from current_periods list to periods list: */ + list_del(&period->list); + list_addtail(&period->list, &hq->periods); + } +} + +static void +prepare_queries(struct fd_context *ctx, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride, + struct list_head *list, bool remove) +{ + struct fd_hw_query *hq, *s; + LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) { + prepare_query(hq, bo, num_tiles, tile_stride); + if (remove) + list_delinit(&hq->list); + } +} + +/* called from gmem code once total storage requirements are known (ie. + * number of samples times number of tiles) + */ +void +fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles) +{ + uint32_t tile_stride = ctx->next_sample_offset; + struct fd_bo *bo; + + if (ctx->query_bo) + fd_bo_del(ctx->query_bo); + + if (tile_stride > 0) { + bo = fd_bo_new(ctx->dev, tile_stride * num_tiles, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM); + } else { + bo = NULL; + } + + ctx->query_bo = bo; + ctx->query_tile_stride = tile_stride; + + prepare_queries(ctx, bo, num_tiles, tile_stride, + &ctx->active_queries, false); + prepare_queries(ctx, bo, num_tiles, tile_stride, + &ctx->current_queries, true); + + /* reset things for next batch: */ + ctx->next_sample_offset = 0; +} + +void +fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, + struct fd_ringbuffer *ring) +{ + uint32_t tile_stride = ctx->query_tile_stride; + uint32_t offset = tile_stride * n; + + /* bail if no queries: */ + if (tile_stride == 0) + return; + + fd_wfi(ctx, ring); + OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1); + OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0); +} + +void +fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum fd_render_stage stage) +{ + /* special case: internal blits (like mipmap level generation) + * go through normal draw path (via util_blitter_blit()).. but + * we need to ignore the FD_STAGE_DRAW which will be set, so we + * don't enable queries which should be paused during internal + * blits: + */ + if ((ctx->stage == FD_STAGE_BLIT) && + (stage != FD_STAGE_NULL)) + return; + + if (stage != ctx->stage) { + struct fd_hw_query *hq; + LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) { + bool was_active = is_active(hq, ctx->stage); + bool now_active = is_active(hq, stage); + + if (now_active && !was_active) + resume_query(ctx, hq, ring); + else if (was_active && !now_active) + pause_query(ctx, hq, ring); + } + } + clear_sample_cache(ctx); + ctx->stage = stage; +} + +void +fd_hw_query_register_provider(struct pipe_context *pctx, + const struct fd_hw_sample_provider *provider) +{ + struct fd_context *ctx = fd_context(pctx); + int idx = pidx(provider->query_type); + + assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS)); + assert(!ctx->sample_providers[idx]); + + ctx->sample_providers[idx] = provider; +} + +void +fd_hw_query_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), + 16, UTIL_SLAB_SINGLETHREADED); + util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period), + 16, UTIL_SLAB_SINGLETHREADED); + list_inithead(&ctx->active_queries); + list_inithead(&ctx->current_queries); +} + +void +fd_hw_query_fini(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + util_slab_destroy(&ctx->sample_pool); + util_slab_destroy(&ctx->sample_period_pool); +} diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h new file mode 100644 index 00000000000..62baa3ac5b5 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -0,0 +1,164 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FREEDRENO_QUERY_HW_H_ +#define FREEDRENO_QUERY_HW_H_ + +#include "util/u_double_list.h" + +#include "freedreno_query.h" +#include "freedreno_context.h" + + +/* + * HW Queries: + * + * See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries + * + * Hardware queries will be specific to gpu generation, but they need + * some common infrastructure for triggering start/stop samples at + * various points (for example, to exclude mem2gmem/gmem2mem or clear) + * as well as per tile tracking. + * + * NOTE: in at least some cases hw writes sample values to memory addr + * specified in some register. So we don't really have the option to + * just sample the same counter multiple times for multiple different + * queries with the same query_type. So we cache per sample provider + * the most recent sample since the last draw. This way multiple + * sample periods for multiple queries can reference the same sample. + * + * fd_hw_sample_provider: + * - one per query type, registered/implemented by gpu generation + * specific code + * - can construct fd_hw_samples on demand + * - most recent sample (since last draw) cached so multiple + * different queries can ref the same sample + * + * fd_hw_sample: + * - abstracts one snapshot of counter value(s) across N tiles + * - backing object not allocated until submit time when number + * of samples and number of tiles is known + * + * fd_hw_sample_period: + * - consists of start and stop sample + * - a query accumulates a list of sample periods + * - the query result is the sum of the sample periods + */ + +struct fd_hw_sample_provider { + unsigned query_type; + + /* stages applicable to the query type: */ + enum fd_render_stage active; + + /* when a new sample is required, emit appropriate cmdstream + * and return a sample object: + */ + struct fd_hw_sample *(*get_sample)(struct fd_context *ctx, + struct fd_ringbuffer *ring); + + /* accumulate the results from specified sample period: */ + void (*accumulate_result)(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result); +}; + +struct fd_hw_sample { + struct pipe_reference reference; /* keep this first */ + + /* offset and size of the sample are know at the time the + * sample is constructed. + */ + uint32_t size; + uint32_t offset; + + /* backing object, offset/stride/etc are determined not when + * the sample is constructed, but when the batch is submitted. + * This way we can defer allocation until total # of requested + * samples, and total # of tiles, is known. + */ + struct fd_bo *bo; + uint32_t num_tiles; + uint32_t tile_stride; +}; + +struct fd_hw_sample_period; + +struct fd_hw_query { + struct fd_query base; + + const struct fd_hw_sample_provider *provider; + + /* list of fd_hw_sample_period in previous submits: */ + struct list_head periods; + + /* list of fd_hw_sample_period's in current submit: */ + struct list_head current_periods; + + /* if active and not paused, the current sample period (not + * yet added to current_periods): + */ + struct fd_hw_sample_period *period; + + struct list_head list; /* list-node in ctx->active_queries */ +}; + +static inline struct fd_hw_query * +fd_hw_query(struct fd_query *q) +{ + return (struct fd_hw_query *)q; +} + +struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type); +/* helper for sample providers: */ +struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size); +/* don't call directly, use fd_hw_sample_reference() */ +void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp); +void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles); +void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, + struct fd_ringbuffer *ring); +void fd_hw_query_set_stage(struct fd_context *ctx, + struct fd_ringbuffer *ring, enum fd_render_stage stage); +void fd_hw_query_register_provider(struct pipe_context *pctx, + const struct fd_hw_sample_provider *provider); +void fd_hw_query_init(struct pipe_context *pctx); +void fd_hw_query_fini(struct pipe_context *pctx); + +static inline void +fd_hw_sample_reference(struct fd_context *ctx, + struct fd_hw_sample **ptr, struct fd_hw_sample *samp) +{ + struct fd_hw_sample *old_samp = *ptr; + + if (pipe_reference(&(*ptr)->reference, &samp->reference)) + __fd_hw_sample_destroy(ctx, old_samp); + if (ptr) + *ptr = samp; +} + +#endif /* FREEDRENO_QUERY_HW_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index bd8c6cb9907..289f3653e12 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -36,6 +36,7 @@ #include "freedreno_screen.h" #include "freedreno_surface.h" #include "freedreno_context.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" #include @@ -401,7 +402,9 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info) util_blitter_save_fragment_sampler_views(ctx->blitter, ctx->fragtex.num_textures, ctx->fragtex.textures); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT); util_blitter_blit(ctx->blitter, info); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); return true; } diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 356f4165792..b57702c54c8 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -223,11 +223,18 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, emit_marker(ring, 6); } +/* CP_SCRATCH_REG4 is used to hold base address for query results: */ +#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 + static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx) { extern unsigned marker_cnt; - OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG0 + scratch_idx, 1); + unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx; + assert(reg != HW_QUERY_BASE_REG); + if (reg == HW_QUERY_BASE_REG) + return; + OUT_PKT0(ring, reg, 1); OUT_RING(ring, ++marker_cnt); } -- 2.30.2