freedreno/a6xx: Avoid stalling for occlusion queries
authorKristian H. Kristensen <hoegsberg@google.com>
Fri, 15 May 2020 19:23:18 +0000 (12:23 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 21 May 2020 00:16:55 +0000 (00:16 +0000)
If we postpone computing the counter delta until after each tile (or
sysmem pass), we don't have to stall in the middle of the draw stream.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5064>

src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
src/gallium/drivers/freedreno/a6xx/fd6_query.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h

index 0e053d62ce38bcd03cf3e417ee00fbb117593cf7..b71c77ddc6cb046e5cbc800c287d850fd888e9d5 100644 (file)
@@ -1377,6 +1377,9 @@ fd6_emit_tile_fini(struct fd_batch *batch)
 {
        struct fd_ringbuffer *ring = batch->gmem;
 
+       if (batch->epilogue)
+               fd6_emit_ib(batch->gmem, batch->epilogue);
+
        OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
        OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
 
@@ -1527,6 +1530,9 @@ fd6_emit_sysmem_fini(struct fd_batch *batch)
 {
        struct fd_ringbuffer *ring = batch->gmem;
 
+       if (batch->epilogue)
+               fd6_emit_ib(batch->gmem, batch->epilogue);
+
        OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
        OUT_RING(ring, 0x0);
 
index 8258760e2e60255495b6749ed8a7d240c016ded4..a23b02f4e0bdadc34ef408dd7e65b897468e15e0 100644 (file)
@@ -95,21 +95,20 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
 
        fd6_event_write(batch, ring, ZPASS_DONE, false);
 
-       OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
-       OUT_RING(ring, 0x00000014);   // XXX
-       OUT_RELOC(ring, query_sample(aq, stop));
-       OUT_RING(ring, 0xffffffff);
-       OUT_RING(ring, 0xffffffff);
-       OUT_RING(ring, 0x00000010);   // XXX
+       /* To avoid stalling in the draw buffer, emit code the code to compute the
+        * counter delta in the epilogue ring.
+        */
+       struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
+       fd_wfi(batch, epilogue);
 
        /* result += stop - start: */
-       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
-       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
+       OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
+       OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE |
                        CP_MEM_TO_MEM_0_NEG_C);
-       OUT_RELOC(ring, query_sample(aq, result));     /* dst */
-       OUT_RELOC(ring, query_sample(aq, result));      /* srcA */
-       OUT_RELOC(ring, query_sample(aq, stop));        /* srcB */
-       OUT_RELOC(ring, query_sample(aq, start));       /* srcC */
+       OUT_RELOC(epilogue, query_sample(aq, result));     /* dst */
+       OUT_RELOC(epilogue, query_sample(aq, result));      /* srcA */
+       OUT_RELOC(epilogue, query_sample(aq, stop));        /* srcB */
+       OUT_RELOC(epilogue, query_sample(aq, start));       /* srcC */
 
        fd6_context(batch->ctx)->samples_passed_queries--;
 }
index 288f3307ad345d1929c104cbdce0bc4929fbc6b0..b31dd2c02c8240a04eff24b38df822d4201ccf3e 100644 (file)
@@ -161,6 +161,11 @@ batch_fini(struct fd_batch *batch)
                batch->lrz_clear = NULL;
        }
 
+       if (batch->epilogue) {
+               fd_ringbuffer_del(batch->epilogue);
+               batch->epilogue = NULL;
+       }
+
        if (batch->tile_setup) {
                fd_ringbuffer_del(batch->tile_setup);
                batch->tile_setup = NULL;
index 6a48c3435acd608331d6a2453d6d8734922fea72..e64d9061b6b854c1d0c7374ac77587ab5a664646 100644 (file)
@@ -184,6 +184,9 @@ struct fd_batch {
        /** tiling/gmem (IB0) cmdstream: */
        struct fd_ringbuffer *gmem;
 
+       /** epilogue cmdstream: */
+       struct fd_ringbuffer *epilogue;
+
        // TODO maybe more generically split out clear and clear_binning rings?
        struct fd_ringbuffer *lrz_clear;
        struct fd_ringbuffer *tile_setup;
@@ -336,4 +339,14 @@ fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
        fd_reset_wfi(batch);
 }
 
+static inline struct fd_ringbuffer *
+fd_batch_get_epilogue(struct fd_batch *batch)
+{
+       if (batch->epilogue == NULL)
+               batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
+
+       return batch->epilogue;
+}
+
+
 #endif /* FREEDRENO_BATCH_H_ */