src/gallium/drivers/freedreno/freedreno_batch.h

   1 /*
   2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #ifndef FREEDRENO_BATCH_H_
  28 #define FREEDRENO_BATCH_H_
  29
  30 #include "util/u_inlines.h"
  31 #include "util/u_queue.h"
  32 #include "util/list.h"
  33
  34 #include "freedreno_util.h"
  35
  36 struct fd_context;
  37 struct fd_resource;
  38 enum fd_resource_status;
  39
  40 /* Bitmask of stages in rendering that a particular query query is
  41  * active.  Queries will be automatically started/stopped (generating
  42  * additional fd_hw_sample_period's) on entrance/exit from stages that
  43  * are applicable to the query.
  44  *
  45  * NOTE: set the stage to NULL at end of IB to ensure no query is still
  46  * active.  Things aren't going to work out the way you want if a query
  47  * is active across IB's (or between tile IB and draw IB)
  48  */
  49 enum fd_render_stage {
  50         FD_STAGE_NULL     = 0x00,
  51         FD_STAGE_DRAW     = 0x01,
  52         FD_STAGE_CLEAR    = 0x02,
  53         /* used for driver internal draws (ie. util_blitter_blit()): */
  54         FD_STAGE_BLIT     = 0x04,
  55         FD_STAGE_ALL      = 0xff,
  56 };
  57
  58 #define MAX_HW_SAMPLE_PROVIDERS 7
  59 struct fd_hw_sample_provider;
  60 struct fd_hw_sample;
  61
  62 /* A batch tracks everything about a cmdstream batch/submit, including the
  63  * ringbuffers used for binning, draw, and gmem cmds, list of associated
  64  * fd_resource-s, etc.
  65  */
  66 struct fd_batch {
  67         struct pipe_reference reference;
  68         unsigned seqno;
  69         unsigned idx;       /* index into cache->batches[] */
  70
  71         int in_fence_fd;
  72         bool needs_out_fence_fd;
  73         struct pipe_fence_handle *fence;
  74
  75         struct fd_context *ctx;
  76
  77         /* do we need to mem2gmem before rendering.  We don't, if for example,
  78          * there was a glClear() that invalidated the entire previous buffer
  79          * contents.  Keep track of which buffer(s) are cleared, or needs
  80          * restore.  Masks of PIPE_CLEAR_*
  81          *
  82          * The 'cleared' bits will be set for buffers which are *entirely*
  83          * cleared, and 'partial_cleared' bits will be set if you must
  84          * check cleared_scissor.
  85          *
  86          * The 'invalidated' bits are set for cleared buffers, and buffers
  87          * where the contents are undefined, ie. what we don't need to restore
  88          * to gmem.
  89          */
  90         enum {
  91                 /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
  92                 FD_BUFFER_COLOR   = PIPE_CLEAR_COLOR,
  93                 FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
  94                 FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
  95                 FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
  96         } invalidated, cleared, fast_cleared, restore, resolve;
  97
  98         /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
  99         bool nondraw : 1;
 100         bool needs_flush : 1;
 101         bool flushed : 1;
 102         bool blit : 1;
 103         bool back_blit : 1;      /* only blit so far is resource shadowing back-blit */
 104         bool tessellation : 1;      /* tessellation used in batch */
 105
 106         /* Keep track if WAIT_FOR_IDLE is needed for registers we need
 107          * to update via RMW:
 108          */
 109         bool needs_wfi : 1;
 110
 111         /* To decide whether to render to system memory, keep track of the
 112          * number of draws, and whether any of them require multisample,
 113          * depth_test (or depth write), stencil_test, blending, and
 114          * color_logic_Op (since those functions are disabled when by-
 115          * passing GMEM.
 116          */
 117         enum {
 118                 FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01,
 119                 FD_GMEM_DEPTH_ENABLED        = 0x02,
 120                 FD_GMEM_STENCIL_ENABLED      = 0x04,
 121
 122                 FD_GMEM_BLEND_ENABLED        = 0x10,
 123                 FD_GMEM_LOGICOP_ENABLED      = 0x20,
 124                 FD_GMEM_FB_READ              = 0x40,
 125         } gmem_reason;
 126
 127         /* At submit time, once we've decided that this batch will use GMEM
 128          * rendering, the appropriate gmem state is looked up:
 129          */
 130         const struct fd_gmem_stateobj *gmem_state;
 131
 132         unsigned num_draws;      /* number of draws in current batch */
 133         unsigned num_vertices;   /* number of vertices in current batch */
 134
 135         /* Currently only used on a6xx, to calculate vsc prim/draw stream
 136          * sizes:
 137          */
 138         unsigned num_bins_per_pipe;
 139         unsigned prim_strm_bits;
 140         unsigned draw_strm_bits;
 141
 142         /* Track the maximal bounds of the scissor of all the draws within a
 143          * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
 144          * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
 145          */
 146         struct pipe_scissor_state max_scissor;
 147
 148         /* Keep track of DRAW initiators that need to be patched up depending
 149          * on whether we using binning or not:
 150          */
 151         struct util_dynarray draw_patches;
 152
 153         /* texture state that needs patching for fb_read: */
 154         struct util_dynarray fb_read_patches;
 155
 156         /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
 157          * once we know whether or not to use GMEM, and GMEM tile pitch.
 158          *
 159          * (only for a3xx.. but having gen specific subclasses of fd_batch
 160          * seemed overkill for now)
 161          */
 162         struct util_dynarray rbrc_patches;
 163
 164         /* Keep track of GMEM related values that need to be patched up once we
 165          * know the gmem layout:
 166          */
 167         struct util_dynarray gmem_patches;
 168
 169         /* Keep track of pointer to start of MEM exports for a20x binning shaders
 170          *
 171          * this is so the end of the shader can be cut off at the right point
 172          * depending on the GMEM configuration
 173          */
 174         struct util_dynarray shader_patches;
 175
 176         struct pipe_framebuffer_state framebuffer;
 177
 178         struct fd_submit *submit;
 179
 180         /** draw pass cmdstream: */
 181         struct fd_ringbuffer *draw;
 182         /** binning pass cmdstream: */
 183         struct fd_ringbuffer *binning;
 184         /** tiling/gmem (IB0) cmdstream: */
 185         struct fd_ringbuffer *gmem;
 186
 187         /** epilogue cmdstream: */
 188         struct fd_ringbuffer *epilogue;
 189
 190         // TODO maybe more generically split out clear and clear_binning rings?
 191         struct fd_ringbuffer *lrz_clear;
 192         struct fd_ringbuffer *tile_setup;
 193         struct fd_ringbuffer *tile_fini;
 194
 195         union pipe_color_union clear_color[MAX_RENDER_TARGETS];
 196         double clear_depth;
 197         unsigned clear_stencil;
 198
 199         /**
 200          * hw query related state:
 201          */
 202         /*@{*/
 203         /* next sample offset.. incremented for each sample in the batch/
 204          * submit, reset to zero on next submit.
 205          */
 206         uint32_t next_sample_offset;
 207
 208         /* cached samples (in case multiple queries need to reference
 209          * the same sample snapshot)
 210          */
 211         struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
 212
 213         /* which sample providers were active in the current batch: */
 214         uint32_t active_providers;
 215
 216         /* tracking for current stage, to know when to start/stop
 217          * any active queries:
 218          */
 219         enum fd_render_stage stage;
 220
 221         /* list of samples in current batch: */
 222         struct util_dynarray samples;
 223
 224         /* current query result bo and tile stride: */
 225         struct pipe_resource *query_buf;
 226         uint32_t query_tile_stride;
 227         /*@}*/
 228
 229
 230         /* Set of resources used by currently-unsubmitted batch (read or
 231          * write).. does not hold a reference to the resource.
 232          */
 233         struct set *resources;
 234
 235         /** key in batch-cache (if not null): */
 236         const void *key;
 237         uint32_t hash;
 238
 239         /** set of dependent batches.. holds refs to dependent batches: */
 240         uint32_t dependents_mask;
 241
 242         /* Buffer for tessellation engine input
 243          */
 244         struct fd_bo *tessfactor_bo;
 245         uint32_t tessfactor_size;
 246
 247         /* Buffer for passing parameters between TCS and TES
 248          */
 249         struct fd_bo *tessparam_bo;
 250         uint32_t tessparam_size;
 251
 252         struct fd_ringbuffer *tess_addrs_constobj;
 253
 254         struct list_head log_chunks;  /* list of unflushed log chunks in fifo order */
 255 };
 256
 257 struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
 258
 259 void fd_batch_reset(struct fd_batch *batch);
 260 void fd_batch_flush(struct fd_batch *batch);
 261 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep);
 262 void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc);
 263 void fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc);
 264 void fd_batch_check_size(struct fd_batch *batch);
 265
 266 /* not called directly: */
 267 void __fd_batch_describe(char* buf, const struct fd_batch *batch);
 268 void __fd_batch_destroy(struct fd_batch *batch);
 269
 270 /*
 271  * NOTE the rule is, you need to hold the screen->lock when destroying
 272  * a batch..  so either use fd_batch_reference() (which grabs the lock
 273  * for you) if you don't hold the lock, or fd_batch_reference_locked()
 274  * if you do hold the lock.
 275  *
 276  * WARNING the _locked() version can briefly drop the lock.  Without
 277  * recursive mutexes, I'm not sure there is much else we can do (since
 278  * __fd_batch_destroy() needs to unref resources)
 279  *
 280  * WARNING you must acquire the screen->lock and use the _locked()
 281  * version in case that the batch being ref'd can disappear under
 282  * you.
 283  */
 284
 285 /* fwd-decl prototypes to untangle header dependency :-/ */
 286 static inline void fd_context_assert_locked(struct fd_context *ctx);
 287 static inline void fd_context_lock(struct fd_context *ctx);
 288 static inline void fd_context_unlock(struct fd_context *ctx);
 289
 290 static inline void
 291 fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
 292 {
 293         struct fd_batch *old_batch = *ptr;
 294
 295         /* only need lock if a reference is dropped: */
 296         if (old_batch)
 297                 fd_context_assert_locked(old_batch->ctx);
 298
 299         if (pipe_reference_described(&(*ptr)->reference, &batch->reference,
 300                         (debug_reference_descriptor)__fd_batch_describe))
 301                 __fd_batch_destroy(old_batch);
 302
 303         *ptr = batch;
 304 }
 305
 306 static inline void
 307 fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
 308 {
 309         struct fd_batch *old_batch = *ptr;
 310         struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
 311
 312         if (ctx)
 313                 fd_context_lock(ctx);
 314
 315         fd_batch_reference_locked(ptr, batch);
 316
 317         if (ctx)
 318                 fd_context_unlock(ctx);
 319 }
 320
 321 #include "freedreno_context.h"
 322
 323 static inline void
 324 fd_reset_wfi(struct fd_batch *batch)
 325 {
 326         batch->needs_wfi = true;
 327 }
 328
 329 void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring);
 330
 331 /* emit a CP_EVENT_WRITE:
 332  */
 333 static inline void
 334 fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
 335                 enum vgt_event_type evt)
 336 {
 337         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 338         OUT_RING(ring, evt);
 339         fd_reset_wfi(batch);
 340 }
 341
 342 static inline struct fd_ringbuffer *
 343 fd_batch_get_epilogue(struct fd_batch *batch)
 344 {
 345         if (batch->epilogue == NULL)
 346                 batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
 347
 348         return batch->epilogue;
 349 }
 350
 351
 352 #endif /* FREEDRENO_BATCH_H_ */