freedreno: a2xx: fix fast clear not being used for Z24X8 buffers
[mesa.git] / src / gallium / drivers / freedreno / freedreno_batch.h
index 228a1b72bf64d87006427935ec19a4ba1153147b..edf0840825be470855be6c77bc112d1512b9aad7 100644 (file)
@@ -28,6 +28,7 @@
 #define FREEDRENO_BATCH_H_
 
 #include "util/u_inlines.h"
+#include "util/u_queue.h"
 #include "util/list.h"
 
 #include "freedreno_util.h"
@@ -49,19 +50,12 @@ enum fd_render_stage {
        FD_STAGE_NULL     = 0x01,
        FD_STAGE_DRAW     = 0x02,
        FD_STAGE_CLEAR    = 0x04,
-       /* TODO before queries which include MEM2GMEM or GMEM2MEM will
-        * work we will need to call fd_hw_query_prepare() from somewhere
-        * appropriate so that queries in the tiling IB get backed with
-        * memory to write results to.
-        */
-       FD_STAGE_MEM2GMEM = 0x08,
-       FD_STAGE_GMEM2MEM = 0x10,
        /* used for driver internal draws (ie. util_blitter_blit()): */
-       FD_STAGE_BLIT     = 0x20,
+       FD_STAGE_BLIT     = 0x08,
        FD_STAGE_ALL      = 0xff,
 };
 
-#define MAX_HW_SAMPLE_PROVIDERS 4
+#define MAX_HW_SAMPLE_PROVIDERS 5
 struct fd_hw_sample_provider;
 struct fd_hw_sample;
 
@@ -72,10 +66,16 @@ struct fd_hw_sample;
 struct fd_batch {
        struct pipe_reference reference;
        unsigned seqno;
-       unsigned idx;
+       unsigned idx;       /* index into cache->batches[] */
+
+       int in_fence_fd;
+       bool needs_out_fence_fd;
+       struct pipe_fence_handle *fence;
 
        struct fd_context *ctx;
 
+       struct util_queue_fence flush_fence;
+
        /* do we need to mem2gmem before rendering.  We don't, if for example,
         * there was a glClear() that invalidated the entire previous buffer
         * contents.  Keep track of which buffer(s) are cleared, or needs
@@ -84,6 +84,10 @@ struct fd_batch {
         * The 'cleared' bits will be set for buffers which are *entirely*
         * cleared, and 'partial_cleared' bits will be set if you must
         * check cleared_scissor.
+        *
+        * The 'invalidated' bits are set for cleared buffers, and buffers
+        * where the contents are undefined, ie. what we don't need to restore
+        * to gmem.
         */
        enum {
                /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
@@ -91,9 +95,19 @@ struct fd_batch {
                FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
                FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
                FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
-       } cleared, partial_cleared, restore, resolve;
+       } invalidated, cleared, fast_cleared, restore, resolve;
+
+       /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
+       bool nondraw : 1;
+       bool needs_flush : 1;
+       bool flushed : 1;
+       bool blit : 1;
+       bool back_blit : 1;      /* only blit so far is resource shadowing back-blit */
 
-       bool needs_flush;
+       /* Keep track if WAIT_FOR_IDLE is needed for registers we need
+        * to update via RMW:
+        */
+       bool needs_wfi : 1;
 
        /* To decide whether to render to system memory, keep track of the
         * number of draws, and whether any of them require multisample,
@@ -106,11 +120,12 @@ struct fd_batch {
                FD_GMEM_DEPTH_ENABLED        = 0x02,
                FD_GMEM_STENCIL_ENABLED      = 0x04,
 
-               FD_GMEM_MSAA_ENABLED         = 0x08,
                FD_GMEM_BLEND_ENABLED        = 0x10,
                FD_GMEM_LOGICOP_ENABLED      = 0x20,
+               FD_GMEM_FB_READ              = 0x40,
        } gmem_reason;
        unsigned num_draws;   /* number of draws in current batch */
+       unsigned num_vertices;   /* number of vertices in current batch */
 
        /* Track the maximal bounds of the scissor of all the draws within a
         * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
@@ -118,19 +133,14 @@ struct fd_batch {
         */
        struct pipe_scissor_state max_scissor;
 
-       /* Track the cleared scissor for color/depth/stencil, so we know
-        * which, if any, tiles need to be restored (mem2gmem).  Only valid
-        * if the corresponding bit in ctx->cleared is set.
-        */
-       struct {
-               struct pipe_scissor_state color, depth, stencil;
-       } cleared_scissor;
-
        /* Keep track of DRAW initiators that need to be patched up depending
         * on whether we using binning or not:
         */
        struct util_dynarray draw_patches;
 
+       /* texture state that needs patching for fb_read: */
+       struct util_dynarray fb_read_patches;
+
        /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
         * once we know whether or not to use GMEM, and GMEM tile pitch.
         *
@@ -139,8 +149,22 @@ struct fd_batch {
         */
        struct util_dynarray rbrc_patches;
 
+       /* Keep track of GMEM related values that need to be patched up once we
+        * know the gmem layout:
+        */
+       struct util_dynarray gmem_patches;
+
+       /* Keep track of pointer to start of MEM exports for a20x binning shaders
+        *
+        * this is so the end of the shader can be cut off at the right point
+        * depending on the GMEM configuration
+        */
+       struct util_dynarray shader_patches;
+
        struct pipe_framebuffer_state framebuffer;
 
+       struct fd_submit *submit;
+
        /** draw pass cmdstream: */
        struct fd_ringbuffer *draw;
        /** binning pass cmdstream: */
@@ -148,6 +172,15 @@ struct fd_batch {
        /** tiling/gmem (IB0) cmdstream: */
        struct fd_ringbuffer *gmem;
 
+       // TODO maybe more generically split out clear and clear_binning rings?
+       struct fd_ringbuffer *lrz_clear;
+       struct fd_ringbuffer *tile_setup;
+       struct fd_ringbuffer *tile_fini;
+
+       union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+       double clear_depth;
+       unsigned clear_stencil;
+
        /**
         * hw query related state:
         */
@@ -192,10 +225,12 @@ struct fd_batch {
        uint32_t dependents_mask;
 };
 
-struct fd_batch * fd_batch_create(struct fd_context *ctx);
+struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
 
 void fd_batch_reset(struct fd_batch *batch);
-void fd_batch_flush(struct fd_batch *batch);
+void fd_batch_sync(struct fd_batch *batch);
+void fd_batch_flush(struct fd_batch *batch, bool sync);
+void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep);
 void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write);
 void fd_batch_check_size(struct fd_batch *batch);
 
@@ -203,14 +238,76 @@ void fd_batch_check_size(struct fd_batch *batch);
 void __fd_batch_describe(char* buf, const struct fd_batch *batch);
 void __fd_batch_destroy(struct fd_batch *batch);
 
+/*
+ * NOTE the rule is, you need to hold the screen->lock when destroying
+ * a batch..  so either use fd_batch_reference() (which grabs the lock
+ * for you) if you don't hold the lock, or fd_batch_reference_locked()
+ * if you do hold the lock.
+ *
+ * WARNING the _locked() version can briefly drop the lock.  Without
+ * recursive mutexes, I'm not sure there is much else we can do (since
+ * __fd_batch_destroy() needs to unref resources)
+ *
+ * WARNING you must acquire the screen->lock and use the _locked()
+ * version in case that the batch being ref'd can disappear under
+ * you.
+ */
+
+/* fwd-decl prototypes to untangle header dependency :-/ */
+static inline void fd_context_assert_locked(struct fd_context *ctx);
+static inline void fd_context_lock(struct fd_context *ctx);
+static inline void fd_context_unlock(struct fd_context *ctx);
+
 static inline void
-fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
+fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
 {
        struct fd_batch *old_batch = *ptr;
+
+       /* only need lock if a reference is dropped: */
+       if (old_batch)
+               fd_context_assert_locked(old_batch->ctx);
+
        if (pipe_reference_described(&(*ptr)->reference, &batch->reference,
                        (debug_reference_descriptor)__fd_batch_describe))
                __fd_batch_destroy(old_batch);
+
        *ptr = batch;
 }
 
+static inline void
+fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
+{
+       struct fd_batch *old_batch = *ptr;
+       struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
+
+       if (ctx)
+               fd_context_lock(ctx);
+
+       fd_batch_reference_locked(ptr, batch);
+
+       if (ctx)
+               fd_context_unlock(ctx);
+}
+
+#include "freedreno_context.h"
+
+static inline void
+fd_reset_wfi(struct fd_batch *batch)
+{
+       batch->needs_wfi = true;
+}
+
+void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring);
+
+/* emit a CP_EVENT_WRITE:
+ */
+static inline void
+fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
+               enum vgt_event_type evt)
+{
+       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+       OUT_RING(ring, evt);
+       fd_reset_wfi(batch);
+}
+
 #endif /* FREEDRENO_BATCH_H_ */