freedreno: re-order support for hw queries
authorRob Clark <robdclark@gmail.com>
Mon, 11 Jul 2016 21:36:45 +0000 (17:36 -0400)
committerRob Clark <robdclark@gmail.com>
Sat, 30 Jul 2016 13:23:42 +0000 (09:23 -0400)
Push query state down to batch, and use the resource tracking to figure
out which batch(es) need to be flushed to get the query result.

This means we actually need to allocate the prsc up front, before we
know the size.  So we have to add a special way to allocate an un-
backed resource, and then later allocate the backing storage.

Signed-off-by: Rob Clark <robdclark@gmail.com>
19 files changed:
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/a3xx/fd3_query.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.h
src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
src/gallium/drivers/freedreno/a4xx/fd4_query.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/freedreno_context.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.c
src/gallium/drivers/freedreno/freedreno_gmem.c
src/gallium/drivers/freedreno/freedreno_query_hw.c
src/gallium/drivers/freedreno/freedreno_query_hw.h
src/gallium/drivers/freedreno/freedreno_resource.c
src/gallium/drivers/freedreno/freedreno_resource.h
src/gallium/drivers/freedreno/freedreno_state.c

index eef5b52f12cf32578221a4ba2d64889e28dd8181..7e83157e38e9b334b9f5f3d5bcd14de34d4189c9 100644 (file)
@@ -757,8 +757,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
  * state, there could have been a context switch between ioctls):
  */
 void
-fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
+fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
+       struct fd_context *ctx = batch->ctx;
        struct fd3_context *fd3_ctx = fd3_context(ctx);
        int i;
 
@@ -894,7 +895,7 @@ fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
 
        fd_wfi(ctx, ring);
 
-       fd_hw_query_enable(ctx, ring);
+       fd_hw_query_enable(batch, ring);
 
        ctx->needs_rb_fbd = true;
 }
index 110f30e89be2a2db5d5c2233e669eec9447ea2c9..dfe7758954219660c4eb0bbeb1249d11b8a5a31a 100644 (file)
@@ -93,7 +93,7 @@ void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit);
 void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct fd3_emit *emit);
 
-void fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
+void fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
 
 void fd3_emit_init(struct pipe_context *pctx);
 
index b9af45683f957e8db27ad8a9d19698da86d3c72e..1788c0c738416045d838e66c061a6c1520033216 100644 (file)
@@ -734,7 +734,7 @@ fd3_emit_sysmem_prep(struct fd_batch *batch)
                pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
        }
 
-       fd3_emit_restore(batch->ctx, ring);
+       fd3_emit_restore(batch, ring);
 
        OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
        OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
@@ -927,7 +927,7 @@ fd3_emit_tile_init(struct fd_batch *batch)
        struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
        uint32_t rb_render_control;
 
-       fd3_emit_restore(batch->ctx, ring);
+       fd3_emit_restore(batch, ring);
 
        /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
         * at the right and bottom edge tiles
index 8fc0a0d42295c69d9f051b1606f929d7c71cf03d..ec034fc127dc5d5bc1af53afde5cc7ff4f454faf 100644 (file)
@@ -46,10 +46,10 @@ struct fd_rb_samp_ctrs {
  */
 
 static struct fd_hw_sample *
-occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
        struct fd_hw_sample *samp =
-                       fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+                       fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
 
        /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
         * HW_QUERY_BASE_REG register:
@@ -68,7 +68,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
                                                INDEX_SIZE_IGN, USE_VISIBILITY, 0));
        OUT_RING(ring, 0);             /* NumIndices */
 
-       fd_event_write(ctx, ring, ZPASS_DONE);
+       fd_event_write(batch->ctx, ring, ZPASS_DONE);
 
        OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
        OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
index 88e1a40ec90585d371dd08c19b110a71725e98c2..9ce93f6e33f93728b81b8a9663f5cdfee96baf4c 100644 (file)
@@ -736,8 +736,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
  * state, there could have been a context switch between ioctls):
  */
 void
-fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
+fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
+       struct fd_context *ctx = batch->ctx;
        struct fd4_context *fd4_ctx = fd4_context(ctx);
 
        OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
@@ -885,7 +886,7 @@ fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
        OUT_RING(ring, 0x0);
 
-       fd_hw_query_enable(ctx, ring);
+       fd_hw_query_enable(batch, ring);
 
        ctx->needs_rb_fbd = true;
 }
index 89dc51ad1ee8c8115501a1e2843926343dd9cc38..42e0e5e645a788af53be5a2f71dd4da313623d13 100644 (file)
@@ -102,7 +102,7 @@ void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit);
 void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct fd4_emit *emit);
 
-void fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
+void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
 
 void fd4_emit_init(struct pipe_context *pctx);
 
index afd37a88f43fc3bfad68834c8b1217b5b3881032..3f3847c2a2861b35f5374d47c3ae316d404157f8 100644 (file)
@@ -527,7 +527,7 @@ fd4_emit_sysmem_prep(struct fd_batch *batch)
        struct pipe_framebuffer_state *pfb = &batch->framebuffer;
        struct fd_ringbuffer *ring = batch->gmem;
 
-       fd4_emit_restore(batch->ctx, ring);
+       fd4_emit_restore(batch, ring);
 
        OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
        OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
@@ -666,7 +666,7 @@ fd4_emit_tile_init(struct fd_batch *batch)
        struct fd_ringbuffer *ring = batch->gmem;
        struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
 
-       fd4_emit_restore(batch->ctx, ring);
+       fd4_emit_restore(batch, ring);
 
        OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
        OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
index 41e3e6506bdc38c1da2a83983ad9b66fb990892b..921384c19118367d28287c345cd3044cfd2a82cc 100644 (file)
@@ -48,10 +48,10 @@ struct fd_rb_samp_ctrs {
  */
 
 static struct fd_hw_sample *
-occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
        struct fd_hw_sample *samp =
-                       fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+                       fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
 
        /* low bits of sample addr should be zero (since they are control
         * flags in RB_SAMPLE_COUNT_CONTROL):
@@ -73,7 +73,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
        OUT_RING(ring, 1);             /* NumInstances */
        OUT_RING(ring, 0);             /* NumIndices */
 
-       fd_event_write(ctx, ring, ZPASS_DONE);
+       fd_event_write(batch->ctx, ring, ZPASS_DONE);
 
        return samp;
 }
@@ -123,18 +123,18 @@ time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
 }
 
 static struct fd_hw_sample *
-time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
-       struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
+       struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
 
        /* use unused part of vsc_size_mem as scratch space, to avoid
         * extra allocation:
         */
-       struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
+       struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
        const int sample_off = 128;
        const int addr_off = sample_off + 8;
 
-       debug_assert(ctx->screen->max_freq > 0);
+       debug_assert(batch->ctx->screen->max_freq > 0);
 
        /* Basic issue is that we need to read counter value to a relative
         * destination (with per-tile offset) rather than absolute dest
@@ -161,7 +161,7 @@ time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
         * shot, but that's really just polishing a turd..
         */
 
-       fd_wfi(ctx, ring);
+       fd_wfi(batch->ctx, ring);
 
        /* copy sample counter _LO and _HI to scratch: */
        OUT_PKT3(ring, CP_REG_TO_MEM, 2);
index 2dd7eda72adfd1f9a507e9a9bc35a5e665249372..5008f5dbe5673e44bbf9f643257390679f3a0c22 100644 (file)
@@ -32,6 +32,7 @@
 #include "freedreno_batch.h"
 #include "freedreno_context.h"
 #include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
 
 static void
 batch_init(struct fd_batch *batch)
@@ -61,6 +62,7 @@ batch_init(struct fd_batch *batch)
        batch->needs_flush = false;
        batch->gmem_reason = 0;
        batch->num_draws = 0;
+       batch->stage = FD_STAGE_NULL;
 
        /* reset maximal bounds: */
        batch->max_scissor.minx = batch->max_scissor.miny = ~0;
@@ -72,6 +74,8 @@ batch_init(struct fd_batch *batch)
                util_dynarray_init(&batch->rbrc_patches);
 
        assert(batch->resources->entries == 0);
+
+       util_dynarray_init(&batch->samples);
 }
 
 struct fd_batch *
@@ -98,6 +102,8 @@ fd_batch_create(struct fd_context *ctx)
 static void
 batch_fini(struct fd_batch *batch)
 {
+       pipe_resource_reference(&batch->query_buf, NULL);
+
        fd_ringbuffer_del(batch->draw);
        fd_ringbuffer_del(batch->binning);
        fd_ringbuffer_del(batch->gmem);
@@ -106,6 +112,13 @@ batch_fini(struct fd_batch *batch)
 
        if (is_a3xx(batch->ctx->screen))
                util_dynarray_fini(&batch->rbrc_patches);
+
+       while (batch->samples.size > 0) {
+               struct fd_hw_sample *samp =
+                       util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
+               fd_hw_sample_reference(batch->ctx, &samp, NULL);
+       }
+       util_dynarray_fini(&batch->samples);
 }
 
 static void
index 89d1d9fea7b8e86e13d58ea25e24886c4b392643..228a1b72bf64d87006427935ec19a4ba1153147b 100644 (file)
@@ -28,6 +28,7 @@
 #define FREEDRENO_BATCH_H_
 
 #include "util/u_inlines.h"
+#include "util/list.h"
 
 #include "freedreno_util.h"
 
@@ -35,6 +36,35 @@ struct fd_context;
 struct fd_resource;
 enum fd_resource_status;
 
+/* Bitmask of stages in rendering that a particular query query is
+ * active.  Queries will be automatically started/stopped (generating
+ * additional fd_hw_sample_period's) on entrance/exit from stages that
+ * are applicable to the query.
+ *
+ * NOTE: set the stage to NULL at end of IB to ensure no query is still
+ * active.  Things aren't going to work out the way you want if a query
+ * is active across IB's (or between tile IB and draw IB)
+ */
+enum fd_render_stage {
+       FD_STAGE_NULL     = 0x01,
+       FD_STAGE_DRAW     = 0x02,
+       FD_STAGE_CLEAR    = 0x04,
+       /* TODO before queries which include MEM2GMEM or GMEM2MEM will
+        * work we will need to call fd_hw_query_prepare() from somewhere
+        * appropriate so that queries in the tiling IB get backed with
+        * memory to write results to.
+        */
+       FD_STAGE_MEM2GMEM = 0x08,
+       FD_STAGE_GMEM2MEM = 0x10,
+       /* used for driver internal draws (ie. util_blitter_blit()): */
+       FD_STAGE_BLIT     = 0x20,
+       FD_STAGE_ALL      = 0xff,
+};
+
+#define MAX_HW_SAMPLE_PROVIDERS 4
+struct fd_hw_sample_provider;
+struct fd_hw_sample;
+
 /* A batch tracks everything about a cmdstream batch/submit, including the
  * ringbuffers used for binning, draw, and gmem cmds, list of associated
  * fd_resource-s, etc.
@@ -118,6 +148,37 @@ struct fd_batch {
        /** tiling/gmem (IB0) cmdstream: */
        struct fd_ringbuffer *gmem;
 
+       /**
+        * hw query related state:
+        */
+       /*@{*/
+       /* next sample offset.. incremented for each sample in the batch/
+        * submit, reset to zero on next submit.
+        */
+       uint32_t next_sample_offset;
+
+       /* cached samples (in case multiple queries need to reference
+        * the same sample snapshot)
+        */
+       struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+
+       /* which sample providers were active in the current batch: */
+       uint32_t active_providers;
+
+       /* tracking for current stage, to know when to start/stop
+        * any active queries:
+        */
+       enum fd_render_stage stage;
+
+       /* list of samples in current batch: */
+       struct util_dynarray samples;
+
+       /* current query result bo and tile stride: */
+       struct pipe_resource *query_buf;
+       uint32_t query_tile_stride;
+       /*@}*/
+
+
        /* Set of resources used by currently-unsubmitted batch (read or
         * write).. does not hold a reference to the resource.
         */
index 13a17e2a78e46e99c779ef24e38a211e9c248d0d..1c32cd9ae92e6906e14ae60a802d6bc745a09152 100644 (file)
@@ -168,8 +168,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
         */
        ctx->sample_mask = 0xffff;
 
-       ctx->stage = FD_STAGE_NULL;
-
        pctx = &ctx->base;
        pctx->screen = pscreen;
        pctx->priv = priv;
index 74f53ee554f702827cb125cbf26a819373f99607..45876259fd87c72006e444b9e6f1c371a5c91b94 100644 (file)
@@ -107,34 +107,6 @@ struct fd_vertex_state {
        struct fd_vertexbuf_stateobj vertexbuf;
 };
 
-/* Bitmask of stages in rendering that a particular query query is
- * active.  Queries will be automatically started/stopped (generating
- * additional fd_hw_sample_period's) on entrance/exit from stages that
- * are applicable to the query.
- *
- * NOTE: set the stage to NULL at end of IB to ensure no query is still
- * active.  Things aren't going to work out the way you want if a query
- * is active across IB's (or between tile IB and draw IB)
- */
-enum fd_render_stage {
-       FD_STAGE_NULL     = 0x01,
-       FD_STAGE_DRAW     = 0x02,
-       FD_STAGE_CLEAR    = 0x04,
-       /* TODO before queries which include MEM2GMEM or GMEM2MEM will
-        * work we will need to call fd_hw_query_prepare() from somewhere
-        * appropriate so that queries in the tiling IB get backed with
-        * memory to write results to.
-        */
-       FD_STAGE_MEM2GMEM = 0x08,
-       FD_STAGE_GMEM2MEM = 0x10,
-       /* used for driver internal draws (ie. util_blitter_blit()): */
-       FD_STAGE_BLIT     = 0x20,
-       FD_STAGE_ALL      = 0xff,
-};
-
-#define MAX_HW_SAMPLE_PROVIDERS 4
-struct fd_hw_sample_provider;
-struct fd_hw_sample;
 
 struct fd_context {
        struct pipe_context base;
@@ -152,39 +124,12 @@ struct fd_context {
        struct util_slab_mempool sample_pool;
        struct util_slab_mempool sample_period_pool;
 
-       /* next sample offset.. incremented for each sample in the batch/
-        * submit, reset to zero on next submit.
-        */
-       uint32_t next_sample_offset;
-
        /* sample-providers for hw queries: */
        const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS];
 
-       /* cached samples (in case multiple queries need to reference
-        * the same sample snapshot)
-        */
-       struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
-
-       /* which sample providers were active in the current batch: */
-       uint32_t active_providers;
-
-       /* tracking for current stage, to know when to start/stop
-        * any active queries:
-        */
-       enum fd_render_stage stage;
-
        /* list of active queries: */
        struct list_head active_queries;
 
-       /* list of queries that are not active, but were active in the
-        * current submit:
-        */
-       struct list_head current_queries;
-
-       /* current query result bo and tile stride: */
-       struct pipe_resource *query_buf;
-       uint32_t query_tile_stride;
-
        /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
         * DI_PT_x value to use for draw initiator.  There are some
         * slight differences between generation:
index 112bf5cb6244d122c58c6f7ba98c83249910b28e..fd3da1f20e5723efe36c175764769166ea2b53d1 100644 (file)
@@ -89,6 +89,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                ctx->discard = false;
        }
 
+       /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
+        * query_buf may not be created yet.
+        */
+       fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_DRAW);
        /*
         * Figure out the buffers/features we need:
         */
@@ -154,6 +158,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                if (ctx->streamout.targets[i])
                        resource_written(batch, ctx->streamout.targets[i]->buffer);
 
+       resource_written(batch, batch->query_buf);
+
        batch->num_draws++;
 
        prims = u_reduced_prims_for_vertices(info->mode, info->count);
@@ -180,7 +186,6 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
-       fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_DRAW);
        if (ctx->draw_vbo(ctx, info))
                batch->needs_flush = true;
 
@@ -253,12 +258,14 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
                batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
        }
 
+       resource_written(batch, batch->query_buf);
+
        DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
                pfb->width, pfb->height, depth, stencil,
                util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
-       fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_CLEAR);
+       fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_CLEAR);
 
        ctx->clear(ctx, buffers, color, depth, stencil);
 
index a075a8b5c95e0d84f06b2d781452513783870658..d57b6a36d8ba896224df87ba68518b653730c37a 100644 (file)
@@ -323,23 +323,23 @@ render_tiles(struct fd_batch *batch)
                ctx->emit_tile_prep(batch, tile);
 
                if (batch->restore) {
-                       fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_MEM2GMEM);
+                       fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_MEM2GMEM);
                        ctx->emit_tile_mem2gmem(batch, tile);
-                       fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL);
+                       fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_NULL);
                }
 
                ctx->emit_tile_renderprep(batch, tile);
 
-               fd_hw_query_prepare_tile(ctx, i, batch->gmem);
+               fd_hw_query_prepare_tile(batch, i, batch->gmem);
 
                /* emit IB to drawcmds: */
                ctx->emit_ib(batch->gmem, batch->draw);
                fd_reset_wfi(ctx);
 
                /* emit gmem2mem to transfer tile back to system memory: */
-               fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_GMEM2MEM);
+               fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_GMEM2MEM);
                ctx->emit_tile_gmem2mem(batch, tile);
-               fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL);
+               fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_NULL);
        }
 }
 
@@ -350,7 +350,7 @@ render_sysmem(struct fd_batch *batch)
 
        ctx->emit_sysmem_prep(batch);
 
-       fd_hw_query_prepare_tile(ctx, 0, batch->gmem);
+       fd_hw_query_prepare_tile(batch, 0, batch->gmem);
 
        /* emit IB to drawcmds: */
        ctx->emit_ib(batch->gmem, batch->draw);
@@ -376,7 +376,7 @@ fd_gmem_render_tiles(struct fd_batch *batch)
        /* close out the draw cmds by making sure any active queries are
         * paused:
         */
-       fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_NULL);
+       fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL);
 
        fd_reset_wfi(ctx);
 
@@ -387,7 +387,7 @@ fd_gmem_render_tiles(struct fd_batch *batch)
                        batch, pfb->width, pfb->height,
                        util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                        util_format_short_name(pipe_surface_format(pfb->zsbuf)));
-               fd_hw_query_prepare(ctx, 1);
+               fd_hw_query_prepare(batch, 1);
                render_sysmem(batch);
                ctx->stats.batch_sysmem++;
        } else {
@@ -397,7 +397,7 @@ fd_gmem_render_tiles(struct fd_batch *batch)
                        batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y,
                        util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                        util_format_short_name(pipe_surface_format(pfb->zsbuf)));
-               fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
+               fd_hw_query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
                render_tiles(batch);
                ctx->stats.batch_gmem++;
        }
index 808bcefc2ad37c97cf9269ebcbba9f702eb40669..12d40d04cda576cd2812082e1bbb475582d71df5 100644 (file)
@@ -61,32 +61,35 @@ static int pidx(unsigned query_type)
 }
 
 static struct fd_hw_sample *
-get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
+get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
                unsigned query_type)
 {
+       struct fd_context *ctx = batch->ctx;
        struct fd_hw_sample *samp = NULL;
        int idx = pidx(query_type);
 
        assume(idx >= 0);   /* query never would have been created otherwise */
 
-       if (!ctx->sample_cache[idx]) {
-               ctx->sample_cache[idx] =
-                       ctx->sample_providers[idx]->get_sample(ctx, ring);
-               ctx->batch->needs_flush = true;
+       if (!batch->sample_cache[idx]) {
+               struct fd_hw_sample *new_samp =
+                       ctx->sample_providers[idx]->get_sample(batch, ring);
+               fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
+               util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
+               batch->needs_flush = true;
        }
 
-       fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
+       fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
 
        return samp;
 }
 
 static void
-clear_sample_cache(struct fd_context *ctx)
+clear_sample_cache(struct fd_batch *batch)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
-               fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
+       for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
+               fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
 }
 
 static bool
@@ -97,38 +100,38 @@ is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
 
 
 static void
-resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
+resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
                struct fd_ringbuffer *ring)
 {
        int idx = pidx(hq->provider->query_type);
        assert(idx >= 0);   /* query never would have been created otherwise */
        assert(!hq->period);
-       ctx->active_providers |= (1 << idx);
-       hq->period = util_slab_alloc(&ctx->sample_period_pool);
+       batch->active_providers |= (1 << idx);
+       hq->period = util_slab_alloc(&batch->ctx->sample_period_pool);
        list_inithead(&hq->period->list);
-       hq->period->start = get_sample(ctx, ring, hq->base.type);
+       hq->period->start = get_sample(batch, ring, hq->base.type);
        /* NOTE: util_slab_alloc() does not zero out the buffer: */
        hq->period->end = NULL;
 }
 
 static void
-pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
+pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
                struct fd_ringbuffer *ring)
 {
        int idx = pidx(hq->provider->query_type);
        assert(idx >= 0);   /* query never would have been created otherwise */
        assert(hq->period && !hq->period->end);
-       assert(ctx->active_providers & (1 << idx));
-       hq->period->end = get_sample(ctx, ring, hq->base.type);
-       list_addtail(&hq->period->list, &hq->current_periods);
+       assert(batch->active_providers & (1 << idx));
+       hq->period->end = get_sample(batch, ring, hq->base.type);
+       list_addtail(&hq->period->list, &hq->periods);
        hq->period = NULL;
 }
 
 static void
-destroy_periods(struct fd_context *ctx, struct list_head *list)
+destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
 {
        struct fd_hw_sample_period *period, *s;
-       LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
+       LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
                fd_hw_sample_reference(ctx, &period->start, NULL);
                fd_hw_sample_reference(ctx, &period->end, NULL);
                list_del(&period->list);
@@ -141,8 +144,7 @@ fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
 {
        struct fd_hw_query *hq = fd_hw_query(q);
 
-       destroy_periods(ctx, &hq->periods);
-       destroy_periods(ctx, &hq->current_periods);
+       destroy_periods(ctx, hq);
        list_del(&hq->list);
 
        free(hq);
@@ -151,27 +153,31 @@ fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
 static boolean
 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
 {
+       struct fd_batch *batch = ctx->batch;
        struct fd_hw_query *hq = fd_hw_query(q);
+
        if (q->active)
                return false;
 
        /* begin_query() should clear previous results: */
-       destroy_periods(ctx, &hq->periods);
+       destroy_periods(ctx, hq);
 
-       if (is_active(hq, ctx->stage))
-               resume_query(ctx, hq, ctx->batch->draw);
+       if (batch && is_active(hq, batch->stage))
+               resume_query(batch, hq, batch->draw);
 
        q->active = true;
 
        /* add to active list: */
-       list_del(&hq->list);
+       assert(list_empty(&hq->list));
        list_addtail(&hq->list, &ctx->active_queries);
-   return true;
+
+       return true;
 }
 
 static void
 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
 {
+       struct fd_batch *batch = ctx->batch;
        struct fd_hw_query *hq = fd_hw_query(q);
        /* there are a couple special cases, which don't have
         * a matching ->begin_query():
@@ -181,12 +187,11 @@ fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
        }
        if (!q->active)
                return;
-       if (is_active(hq, ctx->stage))
-               pause_query(ctx, hq, ctx->batch->draw);
+       if (batch && is_active(hq, batch->stage))
+               pause_query(batch, hq, batch->draw);
        q->active = false;
-       /* move to current list: */
-       list_del(&hq->list);
-       list_addtail(&hq->list, &ctx->current_queries);
+       /* remove from active list: */
+       list_delinit(&hq->list);
 }
 
 /* helper to get ptr to specified sample: */
@@ -206,27 +211,12 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
        if (q->active)
                return false;
 
-       /* if the app tries to read back the query result before the
-        * batch is submitted, that forces us to flush so that there
-        * are actually results to wait for:
-        */
-       if (!LIST_IS_EMPTY(&hq->list)) {
-               /* if app didn't actually trigger any cmdstream, then
-                * we have nothing to do:
-                */
-               if (!ctx->batch->needs_flush)
-                       return true;
-               DBG("reading query result forces flush!");
-               fd_batch_flush(ctx->batch);
-       }
-
        util_query_clear_result(result, q->type);
 
        if (LIST_IS_EMPTY(&hq->periods))
                return true;
 
        assert(LIST_IS_EMPTY(&hq->list));
-       assert(LIST_IS_EMPTY(&hq->current_periods));
        assert(!hq->period);
 
        /* if !wait, then check the last sample (the one most likely to
@@ -240,6 +230,21 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
 
                struct fd_resource *rsc = fd_resource(period->end->prsc);
 
+               if (pending(rsc, false)) {
+                       /* piglit spec@arb_occlusion_query@occlusion_query_conform
+                        * test, and silly apps perhaps, get stuck in a loop trying
+                        * to get  query result forever with wait==false..  we don't
+                        * wait to flush unnecessarily but we also don't want to
+                        * spin forever:
+                        */
+                       if (hq->no_wait_cnt++ > 5)
+                               fd_batch_flush(rsc->write_batch);
+                       return false;
+               }
+
+               if (!rsc->bo)
+                       return false;
+
                ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe,
                                DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
                if (ret)
@@ -260,6 +265,13 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
 
                struct fd_resource *rsc = fd_resource(start->prsc);
 
+               if (rsc->write_batch)
+                       fd_batch_flush(rsc->write_batch);
+
+               /* some piglit tests at least do query with no draws, I guess: */
+               if (!rsc->bo)
+                       continue;
+
                fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ);
 
                void *ptr = fd_bo_map(rsc->bo);
@@ -299,7 +311,6 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
        hq->provider = ctx->sample_providers[idx];
 
        list_inithead(&hq->periods);
-       list_inithead(&hq->current_periods);
        list_inithead(&hq->list);
 
        q = &hq->base;
@@ -310,19 +321,38 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
 }
 
 struct fd_hw_sample *
-fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
+fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
 {
-       struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
+       struct fd_hw_sample *samp = util_slab_alloc(&batch->ctx->sample_pool);
        pipe_reference_init(&samp->reference, 1);
        samp->size = size;
        debug_assert(util_is_power_of_two(size));
-       ctx->next_sample_offset = align(ctx->next_sample_offset, size);
-       samp->offset = ctx->next_sample_offset;
+       batch->next_sample_offset = align(batch->next_sample_offset, size);
+       samp->offset = batch->next_sample_offset;
        /* NOTE: util_slab_alloc() does not zero out the buffer: */
        samp->prsc = NULL;
        samp->num_tiles = 0;
        samp->tile_stride = 0;
-       ctx->next_sample_offset += size;
+       batch->next_sample_offset += size;
+
+       if (!batch->query_buf) {
+               struct pipe_screen *pscreen = &batch->ctx->screen->base;
+               struct pipe_resource templ = {
+                       .target  = PIPE_BUFFER,
+                       .format  = PIPE_FORMAT_R8_UNORM,
+                       .bind    = PIPE_BIND_QUERY_BUFFER,
+                       .width0  = 0,    /* create initially zero size buffer */
+                       .height0 = 1,
+                       .depth0  = 1,
+                       .array_size = 1,
+                       .last_level = 0,
+                       .nr_samples = 1,
+               };
+               batch->query_buf = pscreen->resource_create(pscreen, &templ);
+       }
+
+       pipe_resource_reference(&samp->prsc, batch->query_buf);
+
        return samp;
 }
 
@@ -333,110 +363,49 @@ __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
        util_slab_free(&ctx->sample_pool, samp);
 }
 
-static void
-prepare_sample(struct fd_hw_sample *samp, struct pipe_resource *prsc,
-               uint32_t num_tiles, uint32_t tile_stride)
-{
-       if (samp->prsc) {
-               assert(samp->prsc == prsc);
-               assert(samp->num_tiles == num_tiles);
-               assert(samp->tile_stride == tile_stride);
-               return;
-       }
-       pipe_resource_reference(&samp->prsc, prsc);
-       samp->num_tiles = num_tiles;
-       samp->tile_stride = tile_stride;
-}
-
-static void
-prepare_query(struct fd_hw_query *hq, struct pipe_resource *prsc,
-               uint32_t num_tiles, uint32_t tile_stride)
-{
-       struct fd_hw_sample_period *period, *s;
-
-       /* prepare all the samples in the query: */
-       LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
-               prepare_sample(period->start, prsc, num_tiles, tile_stride);
-               prepare_sample(period->end, prsc, num_tiles, tile_stride);
-
-               /* move from current_periods list to periods list: */
-               list_del(&period->list);
-               list_addtail(&period->list, &hq->periods);
-       }
-}
-
-static void
-prepare_queries(struct fd_context *ctx, struct pipe_resource *prsc,
-               uint32_t num_tiles, uint32_t tile_stride,
-               struct list_head *list, bool remove)
-{
-       struct fd_hw_query *hq, *s;
-       LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
-               prepare_query(hq, prsc, num_tiles, tile_stride);
-               if (remove)
-                       list_delinit(&hq->list);
-       }
-}
-
 /* called from gmem code once total storage requirements are known (ie.
  * number of samples times number of tiles)
  */
 void
-fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
+fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
 {
-       uint32_t tile_stride = ctx->next_sample_offset;
-       struct pipe_resource *prsc;
+       uint32_t tile_stride = batch->next_sample_offset;
 
-       pipe_resource_reference(&ctx->query_buf, NULL);
+       if (tile_stride > 0)
+               fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
 
-       if (tile_stride > 0) {
-               struct pipe_screen *pscreen = &ctx->screen->base;
-               struct pipe_resource templ = {
-                       .target  = PIPE_BUFFER,
-                       .format  = PIPE_FORMAT_R8_UNORM,
-                       .bind    = PIPE_BIND_QUERY_BUFFER,
-                       .width0  = tile_stride * num_tiles,
-                       .height0 = 1,
-                       .depth0  = 1,
-                       .array_size = 1,
-                       .last_level = 0,
-                       .nr_samples = 1,
-               };
-               prsc = pscreen->resource_create(pscreen, &templ);
-       } else {
-               prsc = NULL;
-       }
-
-       ctx->query_buf = prsc;
-       ctx->query_tile_stride = tile_stride;
+       batch->query_tile_stride = tile_stride;
 
-       prepare_queries(ctx, prsc, num_tiles, tile_stride,
-                       &ctx->active_queries, false);
-       prepare_queries(ctx, prsc, num_tiles, tile_stride,
-                       &ctx->current_queries, true);
+       while (batch->samples.size > 0) {
+               struct fd_hw_sample *samp =
+                       util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
+               samp->num_tiles = num_tiles;
+               samp->tile_stride = tile_stride;
+               fd_hw_sample_reference(batch->ctx, &samp, NULL);
+       }
 
        /* reset things for next batch: */
-       ctx->next_sample_offset = 0;
+       batch->next_sample_offset = 0;
 }
 
 void
-fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
                struct fd_ringbuffer *ring)
 {
-       uint32_t tile_stride = ctx->query_tile_stride;
+       uint32_t tile_stride = batch->query_tile_stride;
        uint32_t offset = tile_stride * n;
 
        /* bail if no queries: */
        if (tile_stride == 0)
                return;
 
-       fd_wfi(ctx, ring);
+       fd_wfi(batch->ctx, ring);
        OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
-       OUT_RELOCW(ring, fd_resource(ctx->query_buf)->bo, offset, 0, 0);
+       OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
 }
 
 void
-fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
+fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring,
                enum fd_render_stage stage)
 {
        /* special case: internal blits (like mipmap level generation)
@@ -445,24 +414,24 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
         * don't enable queries which should be paused during internal
         * blits:
         */
-       if ((ctx->stage == FD_STAGE_BLIT) &&
+       if ((batch->stage == FD_STAGE_BLIT) &&
                        (stage != FD_STAGE_NULL))
                return;
 
-       if (stage != ctx->stage) {
+       if (stage != batch->stage) {
                struct fd_hw_query *hq;
-               LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
-                       bool was_active = is_active(hq, ctx->stage);
+               LIST_FOR_EACH_ENTRY(hq, &batch->ctx->active_queries, list) {
+                       bool was_active = is_active(hq, batch->stage);
                        bool now_active = is_active(hq, stage);
 
                        if (now_active && !was_active)
-                               resume_query(ctx, hq, ring);
+                               resume_query(batch, hq, ring);
                        else if (was_active && !now_active)
-                               pause_query(ctx, hq, ring);
+                               pause_query(batch, hq, ring);
                }
        }
-       clear_sample_cache(ctx);
-       ctx->stage = stage;
+       clear_sample_cache(batch);
+       batch->stage = stage;
 }
 
 /* call the provider->enable() for all the hw queries that were active
@@ -470,16 +439,17 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
  * for the duration of the batch.
  */
 void
-fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
+       struct fd_context *ctx = batch->ctx;
        for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
-               if (ctx->active_providers & (1 << idx)) {
+               if (batch->active_providers & (1 << idx)) {
                        assert(ctx->sample_providers[idx]);
                        if (ctx->sample_providers[idx]->enable)
                                ctx->sample_providers[idx]->enable(ctx, ring);
                }
        }
-       ctx->active_providers = 0;  /* clear it for next frame */
+       batch->active_providers = 0;  /* clear it for next frame */
 }
 
 void
@@ -505,7 +475,6 @@ fd_hw_query_init(struct pipe_context *pctx)
        util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
                        16, UTIL_SLAB_SINGLETHREADED);
        list_inithead(&ctx->active_queries);
-       list_inithead(&ctx->current_queries);
 }
 
 void
index 0afece3495f8193135114b7cc836a12815b9e896..abd86682a9ff5f328081ba911ae27c59fb9824d5 100644 (file)
@@ -84,7 +84,7 @@ struct fd_hw_sample_provider {
        /* when a new sample is required, emit appropriate cmdstream
         * and return a sample object:
         */
-       struct fd_hw_sample *(*get_sample)(struct fd_context *ctx,
+       struct fd_hw_sample *(*get_sample)(struct fd_batch *batch,
                        struct fd_ringbuffer *ring);
 
        /* accumulate the results from specified sample period: */
@@ -119,18 +119,17 @@ struct fd_hw_query {
 
        const struct fd_hw_sample_provider *provider;
 
-       /* list of fd_hw_sample_period in previous submits: */
+       /* list of fd_hw_sample_periods: */
        struct list_head periods;
 
-       /* list of fd_hw_sample_period's in current submit: */
-       struct list_head current_periods;
-
        /* if active and not paused, the current sample period (not
         * yet added to current_periods):
         */
        struct fd_hw_sample_period *period;
 
-       struct list_head list;  /* list-node in ctx->active_queries */
+       struct list_head list;   /* list-node in batch->active_queries */
+
+       int no_wait_cnt;         /* see fd_hw_get_query_result */
 };
 
 static inline struct fd_hw_query *
@@ -141,15 +140,15 @@ fd_hw_query(struct fd_query *q)
 
 struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
 /* helper for sample providers: */
-struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size);
+struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size);
 /* don't call directly, use fd_hw_sample_reference() */
 void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
-void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles);
-void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+void fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles);
+void fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
                struct fd_ringbuffer *ring);
-void fd_hw_query_set_stage(struct fd_context *ctx,
+void fd_hw_query_set_stage(struct fd_batch *batch,
                struct fd_ringbuffer *ring, enum fd_render_stage stage);
-void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
+void fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring);
 void fd_hw_query_register_provider(struct pipe_context *pctx,
                const struct fd_hw_sample_provider *provider);
 void fd_hw_query_init(struct pipe_context *pctx);
index a9b94610e46be69bda9b8d7f15175d3448c9ab87..b6c9488ec655afc22676a14a498a9e14329138c1 100644 (file)
 /* XXX this should go away, needed for 'struct winsys_handle' */
 #include "state_tracker/drm_driver.h"
 
-static bool
-pending(struct fd_resource *rsc, bool write)
-{
-       /* if we have a pending GPU write, we are busy in any case: */
-       if (rsc->write_batch)
-               return true;
-
-       /* if CPU wants to write, but we are pending a GPU read, we are busy: */
-       if (write && rsc->batch_mask)
-               return true;
-
-       if (rsc->stencil && pending(rsc->stencil, write))
-               return true;
-
-       return false;
-}
-
 static void
 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
 {
@@ -755,6 +738,20 @@ slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
        }
 }
 
+/* special case to resize query buf after allocated.. */
+void
+fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
+{
+       struct fd_resource *rsc = fd_resource(prsc);
+
+       debug_assert(prsc->width0 == 0);
+       debug_assert(prsc->target == PIPE_BUFFER);
+       debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
+
+       prsc->width0 = sz;
+       realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
+}
+
 /**
  * Create a new texture object, using the given template info.
  */
@@ -812,6 +809,15 @@ fd_resource_create(struct pipe_screen *pscreen,
 
        size = setup_slices(rsc, alignment, format);
 
+       /* special case for hw-query buffer, which we need to allocate before we
+        * know the size:
+        */
+       if (size == 0) {
+               /* note, semi-intention == instead of & */
+               debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
+               return prsc;
+       }
+
        if (rsc->layer_first) {
                rsc->layer_size = align(size, 4096);
                size = rsc->layer_size * prsc->array_size;
@@ -1048,7 +1054,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
                util_blitter_save_render_condition(ctx->blitter,
                        ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
 
-       fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_BLIT);
+       if (ctx->batch)
+               fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_BLIT);
 
        ctx->discard = discard;
 }
@@ -1056,7 +1063,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
 static void
 fd_blitter_pipe_end(struct fd_context *ctx)
 {
-       fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_NULL);
+       if (ctx->batch)
+               fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
 }
 
 static void
index fcdb4c1e3648021bfec6f042149d97c5ad5d3112..8caab6b8a5a84dfd7981fb1e6d3d0510fd869297 100644 (file)
@@ -104,6 +104,23 @@ fd_resource(struct pipe_resource *ptex)
        return (struct fd_resource *)ptex;
 }
 
+static inline bool
+pending(struct fd_resource *rsc, bool write)
+{
+       /* if we have a pending GPU write, we are busy in any case: */
+       if (rsc->write_batch)
+               return true;
+
+       /* if CPU wants to write, but we are pending a GPU read, we are busy: */
+       if (write && rsc->batch_mask)
+               return true;
+
+       if (rsc->stencil && pending(rsc->stencil, write))
+               return true;
+
+       return false;
+}
+
 struct fd_transfer {
        struct pipe_transfer base;
        void *staging;
@@ -140,6 +157,8 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
 void fd_resource_screen_init(struct pipe_screen *pscreen);
 void fd_resource_context_init(struct pipe_context *pctx);
 
+void fd_resource_resize(struct pipe_resource *prsc, uint32_t sz);
+
 bool fd_render_condition_check(struct pipe_context *pctx);
 
 #endif /* FREEDRENO_RESOURCE_H_ */
index 8ac41d290778b5a924979980b87c4bac01c0d09e..849ea08037dbe8d54ac4abdd7b7bc7f9601a0fa9 100644 (file)
@@ -37,6 +37,7 @@
 #include "freedreno_resource.h"
 #include "freedreno_texture.h"
 #include "freedreno_gmem.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
 /* All the generic state handling.. In case of CSO's that are specific
@@ -118,8 +119,10 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
        struct pipe_framebuffer_state *cso;
 
        if (ctx->screen->reorder) {
-               struct fd_batch *batch =
-                       fd_batch_from_fb(&ctx->screen->batch_cache, ctx, framebuffer);
+               struct fd_batch *batch;
+               if (likely(ctx->batch))
+                       fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
+               batch = fd_batch_from_fb(&ctx->screen->batch_cache, ctx, framebuffer);
                fd_batch_reference(&ctx->batch, NULL);
                ctx->batch = batch;
                ctx->dirty = ~0;