freedreno/batch: replace lrz_clear with prologue
[mesa.git] / src / gallium / drivers / freedreno / freedreno_batch.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/list.h"
28 #include "util/set.h"
29 #include "util/hash_table.h"
30 #include "util/u_string.h"
31
32 #include "freedreno_batch.h"
33 #include "freedreno_context.h"
34 #include "freedreno_fence.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_query_hw.h"
37
38 static struct fd_ringbuffer *
39 alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags)
40 {
41 struct fd_context *ctx = batch->ctx;
42
43 /* if kernel is too old to support unlimited # of cmd buffers, we
44 * have no option but to allocate large worst-case sizes so that
45 * we don't need to grow the ringbuffer. Performance is likely to
46 * suffer, but there is no good alternative.
47 *
48 * Otherwise if supported, allocate a growable ring with initial
49 * size of zero.
50 */
51 if ((fd_device_version(ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) &&
52 !(fd_mesa_debug & FD_DBG_NOGROW)){
53 flags |= FD_RINGBUFFER_GROWABLE;
54 sz = 0;
55 }
56
57 return fd_submit_new_ringbuffer(batch->submit, sz, flags);
58 }
59
60 static void
61 batch_init(struct fd_batch *batch)
62 {
63 struct fd_context *ctx = batch->ctx;
64
65 batch->submit = fd_submit_new(ctx->pipe);
66 if (batch->nondraw) {
67 batch->draw = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
68 } else {
69 batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
70 batch->draw = alloc_ring(batch, 0x100000, 0);
71
72 /* a6xx+ re-uses draw rb for both draw and binning pass: */
73 if (ctx->screen->gpu_id < 600) {
74 batch->binning = alloc_ring(batch, 0x100000, 0);
75 }
76 }
77
78 batch->in_fence_fd = -1;
79 batch->fence = fd_fence_create(batch);
80
81 batch->cleared = 0;
82 batch->fast_cleared = 0;
83 batch->invalidated = 0;
84 batch->restore = batch->resolve = 0;
85 batch->needs_flush = false;
86 batch->flushed = false;
87 batch->gmem_reason = 0;
88 batch->num_draws = 0;
89 batch->num_vertices = 0;
90 batch->num_bins_per_pipe = 0;
91 batch->prim_strm_bits = 0;
92 batch->draw_strm_bits = 0;
93 batch->stage = FD_STAGE_NULL;
94
95 fd_reset_wfi(batch);
96
97 util_dynarray_init(&batch->draw_patches, NULL);
98 util_dynarray_init(&batch->fb_read_patches, NULL);
99
100 if (is_a2xx(ctx->screen)) {
101 util_dynarray_init(&batch->shader_patches, NULL);
102 util_dynarray_init(&batch->gmem_patches, NULL);
103 }
104
105 if (is_a3xx(ctx->screen))
106 util_dynarray_init(&batch->rbrc_patches, NULL);
107
108 assert(batch->resources->entries == 0);
109
110 util_dynarray_init(&batch->samples, NULL);
111
112 list_inithead(&batch->log_chunks);
113 }
114
115 struct fd_batch *
116 fd_batch_create(struct fd_context *ctx, bool nondraw)
117 {
118 struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
119
120 if (!batch)
121 return NULL;
122
123 DBG("%p", batch);
124
125 pipe_reference_init(&batch->reference, 1);
126 batch->ctx = ctx;
127 batch->nondraw = nondraw;
128
129 batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer,
130 _mesa_key_pointer_equal);
131
132 batch_init(batch);
133
134 fd_screen_assert_locked(ctx->screen);
135 if (BATCH_DEBUG) {
136 _mesa_set_add(ctx->screen->live_batches, batch);
137 }
138
139 return batch;
140 }
141
142 static void
143 batch_fini(struct fd_batch *batch)
144 {
145 DBG("%p", batch);
146
147 pipe_resource_reference(&batch->query_buf, NULL);
148
149 if (batch->in_fence_fd != -1)
150 close(batch->in_fence_fd);
151
152 /* in case batch wasn't flushed but fence was created: */
153 fd_fence_populate(batch->fence, 0, -1);
154
155 fd_fence_ref(&batch->fence, NULL);
156
157 fd_ringbuffer_del(batch->draw);
158 if (!batch->nondraw) {
159 if (batch->binning)
160 fd_ringbuffer_del(batch->binning);
161 fd_ringbuffer_del(batch->gmem);
162 } else {
163 debug_assert(!batch->binning);
164 debug_assert(!batch->gmem);
165 }
166
167 if (batch->prologue) {
168 fd_ringbuffer_del(batch->prologue);
169 batch->prologue = NULL;
170 }
171
172 if (batch->epilogue) {
173 fd_ringbuffer_del(batch->epilogue);
174 batch->epilogue = NULL;
175 }
176
177 if (batch->tile_setup) {
178 fd_ringbuffer_del(batch->tile_setup);
179 batch->tile_setup = NULL;
180 }
181
182 if (batch->tile_fini) {
183 fd_ringbuffer_del(batch->tile_fini);
184 batch->tile_fini = NULL;
185 }
186
187 if (batch->tessellation) {
188 fd_bo_del(batch->tessfactor_bo);
189 fd_bo_del(batch->tessparam_bo);
190 fd_ringbuffer_del(batch->tess_addrs_constobj);
191 }
192
193 fd_submit_del(batch->submit);
194
195 util_dynarray_fini(&batch->draw_patches);
196 util_dynarray_fini(&batch->fb_read_patches);
197
198 if (is_a2xx(batch->ctx->screen)) {
199 util_dynarray_fini(&batch->shader_patches);
200 util_dynarray_fini(&batch->gmem_patches);
201 }
202
203 if (is_a3xx(batch->ctx->screen))
204 util_dynarray_fini(&batch->rbrc_patches);
205
206 while (batch->samples.size > 0) {
207 struct fd_hw_sample *samp =
208 util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
209 fd_hw_sample_reference(batch->ctx, &samp, NULL);
210 }
211 util_dynarray_fini(&batch->samples);
212
213 assert(list_is_empty(&batch->log_chunks));
214 }
215
216 static void
217 batch_flush_reset_dependencies(struct fd_batch *batch, bool flush)
218 {
219 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
220 struct fd_batch *dep;
221
222 foreach_batch(dep, cache, batch->dependents_mask) {
223 if (flush)
224 fd_batch_flush(dep);
225 fd_batch_reference(&dep, NULL);
226 }
227
228 batch->dependents_mask = 0;
229 }
230
231 static void
232 batch_reset_resources_locked(struct fd_batch *batch)
233 {
234 fd_screen_assert_locked(batch->ctx->screen);
235
236 set_foreach(batch->resources, entry) {
237 struct fd_resource *rsc = (struct fd_resource *)entry->key;
238 _mesa_set_remove(batch->resources, entry);
239 debug_assert(rsc->batch_mask & (1 << batch->idx));
240 rsc->batch_mask &= ~(1 << batch->idx);
241 if (rsc->write_batch == batch)
242 fd_batch_reference_locked(&rsc->write_batch, NULL);
243 }
244 }
245
246 static void
247 batch_reset_resources(struct fd_batch *batch)
248 {
249 fd_screen_lock(batch->ctx->screen);
250 batch_reset_resources_locked(batch);
251 fd_screen_unlock(batch->ctx->screen);
252 }
253
254 static void
255 batch_reset(struct fd_batch *batch)
256 {
257 DBG("%p", batch);
258
259 batch_flush_reset_dependencies(batch, false);
260 batch_reset_resources(batch);
261
262 batch_fini(batch);
263 batch_init(batch);
264 }
265
266 void
267 fd_batch_reset(struct fd_batch *batch)
268 {
269 if (batch->needs_flush)
270 batch_reset(batch);
271 }
272
273 void
274 __fd_batch_destroy(struct fd_batch *batch)
275 {
276 struct fd_context *ctx = batch->ctx;
277
278 DBG("%p", batch);
279
280 fd_context_assert_locked(batch->ctx);
281
282 if (BATCH_DEBUG) {
283 _mesa_set_remove_key(ctx->screen->live_batches, batch);
284 }
285
286 fd_bc_invalidate_batch(batch, true);
287
288 batch_reset_resources_locked(batch);
289 debug_assert(batch->resources->entries == 0);
290 _mesa_set_destroy(batch->resources, NULL);
291
292 fd_context_unlock(ctx);
293 batch_flush_reset_dependencies(batch, false);
294 debug_assert(batch->dependents_mask == 0);
295
296 util_copy_framebuffer_state(&batch->framebuffer, NULL);
297 batch_fini(batch);
298 free(batch);
299 fd_context_lock(ctx);
300 }
301
302 void
303 __fd_batch_describe(char* buf, const struct fd_batch *batch)
304 {
305 sprintf(buf, "fd_batch<%u>", batch->seqno);
306 }
307
308 static void
309 batch_flush(struct fd_batch *batch)
310 {
311 DBG("%p: needs_flush=%d", batch, batch->needs_flush);
312
313 if (batch->flushed)
314 return;
315
316 batch->needs_flush = false;
317
318 /* close out the draw cmds by making sure any active queries are
319 * paused:
320 */
321 fd_batch_set_stage(batch, FD_STAGE_NULL);
322
323 batch_flush_reset_dependencies(batch, true);
324
325 batch->flushed = true;
326
327 fd_fence_ref(&batch->ctx->last_fence, batch->fence);
328
329 fd_gmem_render_tiles(batch);
330 batch_reset_resources(batch);
331
332 debug_assert(batch->reference.count > 0);
333
334 fd_screen_lock(batch->ctx->screen);
335 fd_bc_invalidate_batch(batch, false);
336 fd_screen_unlock(batch->ctx->screen);
337 }
338
339 /* Get per-batch prologue */
340 struct fd_ringbuffer *
341 fd_batch_get_prologue(struct fd_batch *batch)
342 {
343 if (!batch->prologue)
344 batch->prologue = alloc_ring(batch, 0x1000, 0);
345 return batch->prologue;
346 }
347
348 /* NOTE: could drop the last ref to batch
349 *
350 * @sync: synchronize with flush_queue, ensures batch is *actually* flushed
351 * to kernel before this returns, as opposed to just being queued to be
352 * flushed
353 * @force: force a flush even if no rendering, mostly useful if you need
354 * a fence to sync on
355 */
356 void
357 fd_batch_flush(struct fd_batch *batch)
358 {
359 struct fd_batch *tmp = NULL;
360
361 /* NOTE: we need to hold an extra ref across the body of flush,
362 * since the last ref to this batch could be dropped when cleaning
363 * up used_resources
364 */
365 fd_batch_reference(&tmp, batch);
366
367 batch_flush(tmp);
368
369 if (batch == batch->ctx->batch) {
370 fd_batch_reference(&batch->ctx->batch, NULL);
371 }
372
373 fd_batch_reference(&tmp, NULL);
374 }
375
376 /* find a batches dependents mask, including recursive dependencies: */
377 static uint32_t
378 recursive_dependents_mask(struct fd_batch *batch)
379 {
380 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
381 struct fd_batch *dep;
382 uint32_t dependents_mask = batch->dependents_mask;
383
384 foreach_batch(dep, cache, batch->dependents_mask)
385 dependents_mask |= recursive_dependents_mask(dep);
386
387 return dependents_mask;
388 }
389
390 void
391 fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
392 {
393 fd_screen_assert_locked(batch->ctx->screen);
394
395 if (batch->dependents_mask & (1 << dep->idx))
396 return;
397
398 /* a loop should not be possible */
399 debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));
400
401 struct fd_batch *other = NULL;
402 fd_batch_reference_locked(&other, dep);
403 batch->dependents_mask |= (1 << dep->idx);
404 DBG("%p: added dependency on %p", batch, dep);
405 }
406
407 static void
408 flush_write_batch(struct fd_resource *rsc)
409 {
410 struct fd_batch *b = NULL;
411 fd_batch_reference_locked(&b, rsc->write_batch);
412
413 fd_screen_unlock(b->ctx->screen);
414 fd_batch_flush(b);
415 fd_screen_lock(b->ctx->screen);
416
417 fd_bc_invalidate_batch(b, false);
418 fd_batch_reference_locked(&b, NULL);
419 }
420
421 static void
422 fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
423 {
424
425 if (likely(fd_batch_references_resource(batch, rsc))) {
426 debug_assert(_mesa_set_search(batch->resources, rsc));
427 return;
428 }
429
430 debug_assert(!_mesa_set_search(batch->resources, rsc));
431
432 _mesa_set_add(batch->resources, rsc);
433 rsc->batch_mask |= (1 << batch->idx);
434 }
435
436 void
437 fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
438 {
439 fd_screen_assert_locked(batch->ctx->screen);
440
441 if (rsc->stencil)
442 fd_batch_resource_write(batch, rsc->stencil);
443
444 DBG("%p: write %p", batch, rsc);
445
446 rsc->valid = true;
447
448 /* note, invalidate write batch, to avoid further writes to rsc
449 * resulting in a write-after-read hazard.
450 */
451 /* if we are pending read or write by any other batch: */
452 if (unlikely(rsc->batch_mask & ~(1 << batch->idx))) {
453 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
454 struct fd_batch *dep;
455
456 if (rsc->write_batch && rsc->write_batch != batch)
457 flush_write_batch(rsc);
458
459 foreach_batch(dep, cache, rsc->batch_mask) {
460 struct fd_batch *b = NULL;
461 if (dep == batch)
462 continue;
463 /* note that batch_add_dep could flush and unref dep, so
464 * we need to hold a reference to keep it live for the
465 * fd_bc_invalidate_batch()
466 */
467 fd_batch_reference(&b, dep);
468 fd_batch_add_dep(batch, b);
469 fd_bc_invalidate_batch(b, false);
470 fd_batch_reference_locked(&b, NULL);
471 }
472 }
473 fd_batch_reference_locked(&rsc->write_batch, batch);
474
475 fd_batch_add_resource(batch, rsc);
476 }
477
478 void
479 fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)
480 {
481 fd_screen_assert_locked(batch->ctx->screen);
482
483 if (rsc->stencil)
484 fd_batch_resource_read(batch, rsc->stencil);
485
486 DBG("%p: read %p", batch, rsc);
487
488 /* If reading a resource pending a write, go ahead and flush the
489 * writer. This avoids situations where we end up having to
490 * flush the current batch in _resource_used()
491 */
492 if (unlikely(rsc->write_batch && rsc->write_batch != batch))
493 flush_write_batch(rsc);
494
495 fd_batch_add_resource(batch, rsc);
496 }
497
498 void
499 fd_batch_check_size(struct fd_batch *batch)
500 {
501 debug_assert(!batch->flushed);
502
503 if (unlikely(fd_mesa_debug & FD_DBG_FLUSH)) {
504 fd_batch_flush(batch);
505 return;
506 }
507
508 if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS)
509 return;
510
511 struct fd_ringbuffer *ring = batch->draw;
512 if ((ring->cur - ring->start) > (ring->size/4 - 0x1000))
513 fd_batch_flush(batch);
514 }
515
516 /* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
517 * been one since last draw:
518 */
519 void
520 fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
521 {
522 if (batch->needs_wfi) {
523 if (batch->ctx->screen->gpu_id >= 500)
524 OUT_WFI5(ring);
525 else
526 OUT_WFI(ring);
527 batch->needs_wfi = false;
528 }
529 }