freedreno/a6xx: Avoid stalling for occlusion queries
[mesa.git] / src / gallium / drivers / freedreno / freedreno_batch.h
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef FREEDRENO_BATCH_H_
28 #define FREEDRENO_BATCH_H_
29
30 #include "util/u_inlines.h"
31 #include "util/u_queue.h"
32 #include "util/list.h"
33
34 #include "freedreno_util.h"
35
36 struct fd_context;
37 struct fd_resource;
38 enum fd_resource_status;
39
40 /* Bitmask of stages in rendering that a particular query query is
41 * active. Queries will be automatically started/stopped (generating
42 * additional fd_hw_sample_period's) on entrance/exit from stages that
43 * are applicable to the query.
44 *
45 * NOTE: set the stage to NULL at end of IB to ensure no query is still
46 * active. Things aren't going to work out the way you want if a query
47 * is active across IB's (or between tile IB and draw IB)
48 */
49 enum fd_render_stage {
50 FD_STAGE_NULL = 0x00,
51 FD_STAGE_DRAW = 0x01,
52 FD_STAGE_CLEAR = 0x02,
53 /* used for driver internal draws (ie. util_blitter_blit()): */
54 FD_STAGE_BLIT = 0x04,
55 FD_STAGE_ALL = 0xff,
56 };
57
58 #define MAX_HW_SAMPLE_PROVIDERS 7
59 struct fd_hw_sample_provider;
60 struct fd_hw_sample;
61
62 /* A batch tracks everything about a cmdstream batch/submit, including the
63 * ringbuffers used for binning, draw, and gmem cmds, list of associated
64 * fd_resource-s, etc.
65 */
66 struct fd_batch {
67 struct pipe_reference reference;
68 unsigned seqno;
69 unsigned idx; /* index into cache->batches[] */
70
71 int in_fence_fd;
72 bool needs_out_fence_fd;
73 struct pipe_fence_handle *fence;
74
75 struct fd_context *ctx;
76
77 /* do we need to mem2gmem before rendering. We don't, if for example,
78 * there was a glClear() that invalidated the entire previous buffer
79 * contents. Keep track of which buffer(s) are cleared, or needs
80 * restore. Masks of PIPE_CLEAR_*
81 *
82 * The 'cleared' bits will be set for buffers which are *entirely*
83 * cleared, and 'partial_cleared' bits will be set if you must
84 * check cleared_scissor.
85 *
86 * The 'invalidated' bits are set for cleared buffers, and buffers
87 * where the contents are undefined, ie. what we don't need to restore
88 * to gmem.
89 */
90 enum {
91 /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
92 FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
93 FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
94 FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
95 FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
96 } invalidated, cleared, fast_cleared, restore, resolve;
97
98 /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
99 bool nondraw : 1;
100 bool needs_flush : 1;
101 bool flushed : 1;
102 bool blit : 1;
103 bool back_blit : 1; /* only blit so far is resource shadowing back-blit */
104 bool tessellation : 1; /* tessellation used in batch */
105
106 /* Keep track if WAIT_FOR_IDLE is needed for registers we need
107 * to update via RMW:
108 */
109 bool needs_wfi : 1;
110
111 /* To decide whether to render to system memory, keep track of the
112 * number of draws, and whether any of them require multisample,
113 * depth_test (or depth write), stencil_test, blending, and
114 * color_logic_Op (since those functions are disabled when by-
115 * passing GMEM.
116 */
117 enum {
118 FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01,
119 FD_GMEM_DEPTH_ENABLED = 0x02,
120 FD_GMEM_STENCIL_ENABLED = 0x04,
121
122 FD_GMEM_BLEND_ENABLED = 0x10,
123 FD_GMEM_LOGICOP_ENABLED = 0x20,
124 FD_GMEM_FB_READ = 0x40,
125 } gmem_reason;
126
127 /* At submit time, once we've decided that this batch will use GMEM
128 * rendering, the appropriate gmem state is looked up:
129 */
130 const struct fd_gmem_stateobj *gmem_state;
131
132 unsigned num_draws; /* number of draws in current batch */
133 unsigned num_vertices; /* number of vertices in current batch */
134
135 /* Currently only used on a6xx, to calculate vsc prim/draw stream
136 * sizes:
137 */
138 unsigned num_bins_per_pipe;
139 unsigned prim_strm_bits;
140 unsigned draw_strm_bits;
141
142 /* Track the maximal bounds of the scissor of all the draws within a
143 * batch. Used at the tile rendering step (fd_gmem_render_tiles(),
144 * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
145 */
146 struct pipe_scissor_state max_scissor;
147
148 /* Keep track of DRAW initiators that need to be patched up depending
149 * on whether we using binning or not:
150 */
151 struct util_dynarray draw_patches;
152
153 /* texture state that needs patching for fb_read: */
154 struct util_dynarray fb_read_patches;
155
156 /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
157 * once we know whether or not to use GMEM, and GMEM tile pitch.
158 *
159 * (only for a3xx.. but having gen specific subclasses of fd_batch
160 * seemed overkill for now)
161 */
162 struct util_dynarray rbrc_patches;
163
164 /* Keep track of GMEM related values that need to be patched up once we
165 * know the gmem layout:
166 */
167 struct util_dynarray gmem_patches;
168
169 /* Keep track of pointer to start of MEM exports for a20x binning shaders
170 *
171 * this is so the end of the shader can be cut off at the right point
172 * depending on the GMEM configuration
173 */
174 struct util_dynarray shader_patches;
175
176 struct pipe_framebuffer_state framebuffer;
177
178 struct fd_submit *submit;
179
180 /** draw pass cmdstream: */
181 struct fd_ringbuffer *draw;
182 /** binning pass cmdstream: */
183 struct fd_ringbuffer *binning;
184 /** tiling/gmem (IB0) cmdstream: */
185 struct fd_ringbuffer *gmem;
186
187 /** epilogue cmdstream: */
188 struct fd_ringbuffer *epilogue;
189
190 // TODO maybe more generically split out clear and clear_binning rings?
191 struct fd_ringbuffer *lrz_clear;
192 struct fd_ringbuffer *tile_setup;
193 struct fd_ringbuffer *tile_fini;
194
195 union pipe_color_union clear_color[MAX_RENDER_TARGETS];
196 double clear_depth;
197 unsigned clear_stencil;
198
199 /**
200 * hw query related state:
201 */
202 /*@{*/
203 /* next sample offset.. incremented for each sample in the batch/
204 * submit, reset to zero on next submit.
205 */
206 uint32_t next_sample_offset;
207
208 /* cached samples (in case multiple queries need to reference
209 * the same sample snapshot)
210 */
211 struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
212
213 /* which sample providers were active in the current batch: */
214 uint32_t active_providers;
215
216 /* tracking for current stage, to know when to start/stop
217 * any active queries:
218 */
219 enum fd_render_stage stage;
220
221 /* list of samples in current batch: */
222 struct util_dynarray samples;
223
224 /* current query result bo and tile stride: */
225 struct pipe_resource *query_buf;
226 uint32_t query_tile_stride;
227 /*@}*/
228
229
230 /* Set of resources used by currently-unsubmitted batch (read or
231 * write).. does not hold a reference to the resource.
232 */
233 struct set *resources;
234
235 /** key in batch-cache (if not null): */
236 const void *key;
237 uint32_t hash;
238
239 /** set of dependent batches.. holds refs to dependent batches: */
240 uint32_t dependents_mask;
241
242 /* Buffer for tessellation engine input
243 */
244 struct fd_bo *tessfactor_bo;
245 uint32_t tessfactor_size;
246
247 /* Buffer for passing parameters between TCS and TES
248 */
249 struct fd_bo *tessparam_bo;
250 uint32_t tessparam_size;
251
252 struct fd_ringbuffer *tess_addrs_constobj;
253
254 struct list_head log_chunks; /* list of unflushed log chunks in fifo order */
255 };
256
257 struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
258
259 void fd_batch_reset(struct fd_batch *batch);
260 void fd_batch_flush(struct fd_batch *batch);
261 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep);
262 void fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc);
263 void fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc);
264 void fd_batch_check_size(struct fd_batch *batch);
265
266 /* not called directly: */
267 void __fd_batch_describe(char* buf, const struct fd_batch *batch);
268 void __fd_batch_destroy(struct fd_batch *batch);
269
270 /*
271 * NOTE the rule is, you need to hold the screen->lock when destroying
272 * a batch.. so either use fd_batch_reference() (which grabs the lock
273 * for you) if you don't hold the lock, or fd_batch_reference_locked()
274 * if you do hold the lock.
275 *
276 * WARNING the _locked() version can briefly drop the lock. Without
277 * recursive mutexes, I'm not sure there is much else we can do (since
278 * __fd_batch_destroy() needs to unref resources)
279 *
280 * WARNING you must acquire the screen->lock and use the _locked()
281 * version in case that the batch being ref'd can disappear under
282 * you.
283 */
284
285 /* fwd-decl prototypes to untangle header dependency :-/ */
286 static inline void fd_context_assert_locked(struct fd_context *ctx);
287 static inline void fd_context_lock(struct fd_context *ctx);
288 static inline void fd_context_unlock(struct fd_context *ctx);
289
290 static inline void
291 fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
292 {
293 struct fd_batch *old_batch = *ptr;
294
295 /* only need lock if a reference is dropped: */
296 if (old_batch)
297 fd_context_assert_locked(old_batch->ctx);
298
299 if (pipe_reference_described(&(*ptr)->reference, &batch->reference,
300 (debug_reference_descriptor)__fd_batch_describe))
301 __fd_batch_destroy(old_batch);
302
303 *ptr = batch;
304 }
305
306 static inline void
307 fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
308 {
309 struct fd_batch *old_batch = *ptr;
310 struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
311
312 if (ctx)
313 fd_context_lock(ctx);
314
315 fd_batch_reference_locked(ptr, batch);
316
317 if (ctx)
318 fd_context_unlock(ctx);
319 }
320
321 #include "freedreno_context.h"
322
323 static inline void
324 fd_reset_wfi(struct fd_batch *batch)
325 {
326 batch->needs_wfi = true;
327 }
328
329 void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring);
330
331 /* emit a CP_EVENT_WRITE:
332 */
333 static inline void
334 fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
335 enum vgt_event_type evt)
336 {
337 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
338 OUT_RING(ring, evt);
339 fd_reset_wfi(batch);
340 }
341
342 static inline struct fd_ringbuffer *
343 fd_batch_get_epilogue(struct fd_batch *batch)
344 {
345 if (batch->epilogue == NULL)
346 batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
347
348 return batch->epilogue;
349 }
350
351
352 #endif /* FREEDRENO_BATCH_H_ */