freedreno: Split ir3_const's user buffer and indirect upload APIs.
[mesa.git] / src / gallium / drivers / freedreno / a4xx / fd4_query.c
index 41e3e6506bdc38c1da2a83983ad9b66fb990892b..465e4b3c03cf0d1d35fc6221da0501c9439f5326 100644 (file)
@@ -1,5 +1,3 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
 /*
  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  *
@@ -48,10 +46,10 @@ struct fd_rb_samp_ctrs {
  */
 
 static struct fd_hw_sample *
-occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
        struct fd_hw_sample *samp =
-                       fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+                       fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs));
 
        /* low bits of sample addr should be zero (since they are control
         * flags in RB_SAMPLE_COUNT_CONTROL):
@@ -73,7 +71,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
        OUT_RING(ring, 1);             /* NumInstances */
        OUT_RING(ring, 0);             /* NumIndices */
 
-       fd_event_write(ctx, ring, ZPASS_DONE);
+       fd_event_write(batch, ring, ZPASS_DONE);
 
        return samp;
 }
@@ -117,24 +115,25 @@ time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
         * just hard coded.  If we start exposing more countables than we
         * have counters, we will need to be more clever.
         */
-       fd_wfi(ctx, ring);
+       struct fd_batch *batch = fd_context_batch(ctx);
+       fd_wfi(batch, ring);
        OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
        OUT_RING(ring, CP_ALWAYS_COUNT);
 }
 
 static struct fd_hw_sample *
-time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
-       struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
+       struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t));
 
        /* use unused part of vsc_size_mem as scratch space, to avoid
         * extra allocation:
         */
-       struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
+       struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem;
        const int sample_off = 128;
        const int addr_off = sample_off + 8;
 
-       debug_assert(ctx->screen->max_freq > 0);
+       debug_assert(batch->ctx->screen->max_freq > 0);
 
        /* Basic issue is that we need to read counter value to a relative
         * destination (with per-tile offset) rather than absolute dest
@@ -161,14 +160,14 @@ time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
         * shot, but that's really just polishing a turd..
         */
 
-       fd_wfi(ctx, ring);
+       fd_wfi(batch, ring);
 
        /* copy sample counter _LO and _HI to scratch: */
        OUT_PKT3(ring, CP_REG_TO_MEM, 2);
        OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
                        CP_REG_TO_MEM_0_64B |
-                       CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
-       OUT_RELOCW(ring, scratch_bo, sample_off, 0, 0);
+                       CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
+       OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
 
        /* ok... here we really *would* like to use the CP_SET_CONSTANT
         * mode which can add a constant to value in reg2 and write to
@@ -182,15 +181,15 @@ time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
 
        /* per-sample offset to scratch bo: */
        OUT_PKT3(ring, CP_MEM_WRITE, 2);
-       OUT_RELOCW(ring, scratch_bo, addr_off, 0, 0);
+       OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
        OUT_RING(ring, samp->offset);
 
        /* now add to that the per-tile base: */
        OUT_PKT3(ring, CP_REG_TO_MEM, 2);
        OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
                        CP_REG_TO_MEM_0_ACCUMULATE |
-                       CP_REG_TO_MEM_0_CNT(1-1));       /* readback 1 regs */
-       OUT_RELOCW(ring, scratch_bo, addr_off, 0, 0);
+                       CP_REG_TO_MEM_0_CNT(0));       /* readback 1 regs */
+       OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
 
        /* now copy that back to CP_ME_NRT_ADDR: */
        OUT_PKT3(ring, CP_MEM_TO_REG, 2);
@@ -251,9 +250,16 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
                .accumulate_result = occlusion_predicate_accumulate_result,
 };
 
+static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
+               .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+               .active = FD_STAGE_DRAW,
+               .get_sample = occlusion_get_sample,
+               .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
 static const struct fd_hw_sample_provider time_elapsed = {
                .query_type = PIPE_QUERY_TIME_ELAPSED,
-               .active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
+               .active = FD_STAGE_ALL,
                .enable = time_elapsed_enable,
                .get_sample = time_elapsed_get_sample,
                .accumulate_result = time_elapsed_accumulate_result,
@@ -275,8 +281,16 @@ static const struct fd_hw_sample_provider timestamp = {
 
 void fd4_query_context_init(struct pipe_context *pctx)
 {
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->create_query = fd_hw_create_query;
+       ctx->query_prepare = fd_hw_query_prepare;
+       ctx->query_prepare_tile = fd_hw_query_prepare_tile;
+       ctx->query_set_stage = fd_hw_query_set_stage;
+
        fd_hw_query_register_provider(pctx, &occlusion_counter);
        fd_hw_query_register_provider(pctx, &occlusion_predicate);
+       fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
        fd_hw_query_register_provider(pctx, &time_elapsed);
        fd_hw_query_register_provider(pctx, &timestamp);
 }