vc4: Skip sending down the clear colors if not clearing.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "indices/u_primconvert.h"
33 #include "pipe/p_screen.h"
34
35 #include "vc4_screen.h"
36 #include "vc4_context.h"
37 #include "vc4_resource.h"
38
39 /**
40 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
41 *
42 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
43 * some sort before another load is triggered.
44 */
45 static void
46 vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
47 {
48 if (!*coords_emitted)
49 return;
50
51 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
52 cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
53 cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
54 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
55 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
56 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
57
58 *coords_emitted = false;
59 }
60
61 /**
62 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
63 *
64 * The tile coordinates packet triggers a pending load if there is one, are
65 * used for clipping during rendering, and determine where loads/stores happen
66 * relative to their base address.
67 */
68 static void
69 vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
70 bool *coords_emitted)
71 {
72 if (*coords_emitted)
73 return;
74
75 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
76 cl_u8(&vc4->rcl, x);
77 cl_u8(&vc4->rcl, y);
78
79 *coords_emitted = true;
80 }
81
82 static void
83 vc4_setup_rcl(struct vc4_context *vc4)
84 {
85 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
86 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
87 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
88 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
89
90 if (!csurf)
91 vc4->resolve &= ~PIPE_CLEAR_COLOR0;
92 if (!zsurf)
93 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
94 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
95 uint32_t width = vc4->framebuffer.width;
96 uint32_t height = vc4->framebuffer.height;
97 uint32_t stride_in_tiles = align(width, 64) / 64;
98
99 assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
100 uint32_t min_x_tile = vc4->draw_min_x / 64;
101 uint32_t min_y_tile = vc4->draw_min_y / 64;
102 uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
103 uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
104 uint32_t xtiles = max_x_tile - min_x_tile + 1;
105 uint32_t ytiles = max_y_tile - min_y_tile + 1;
106
107 #if 0
108 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
109 vc4->resolve,
110 vc4->cleared,
111 resolve_uncleared);
112 #endif
113
114 uint32_t reloc_size = 9;
115 uint32_t clear_size = 14;
116 uint32_t config_size = 11 + reloc_size;
117 uint32_t loadstore_size = 7 + reloc_size;
118 uint32_t tilecoords_size = 3;
119 uint32_t branch_size = 5 + reloc_size;
120 uint32_t color_store_size = 1;
121 uint32_t semaphore_size = 1;
122 cl_ensure_space(&vc4->rcl,
123 clear_size +
124 config_size +
125 loadstore_size +
126 semaphore_size +
127 xtiles * ytiles * (loadstore_size * 4 +
128 tilecoords_size * 3 +
129 branch_size +
130 color_store_size));
131
132 if (vc4->cleared) {
133 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
134 cl_u32(&vc4->rcl, vc4->clear_color[0]);
135 cl_u32(&vc4->rcl, vc4->clear_color[1]);
136 cl_u32(&vc4->rcl, vc4->clear_depth);
137 cl_u8(&vc4->rcl, vc4->clear_stencil);
138 }
139
140 /* The rendering mode config determines the pointer that's used for
141 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
142 * could handle a no-relocation rendering mode config and deny those
143 * packets, but instead we just tell the kernel we're doing our color
144 * rendering to the Z buffer, and just don't emit any of those
145 * packets.
146 */
147 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
148 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
149 cl_start_reloc(&vc4->rcl, 1);
150 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
151 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
152 cl_u16(&vc4->rcl, width);
153 cl_u16(&vc4->rcl, height);
154 cl_u16(&vc4->rcl, ((render_surf->tiling <<
155 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
156 (vc4_rt_format_is_565(render_surf->base.format) ?
157 VC4_RENDER_CONFIG_FORMAT_BGR565 :
158 VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
159
160 /* The tile buffer normally gets cleared when the previous tile is
161 * stored. If the clear values changed between frames, then the tile
162 * buffer has stale clear values in it, so we have to do a store in
163 * None mode (no writes) so that we trigger the tile buffer clear.
164 *
165 * Excess clearing is only a performance cost, since per-tile contents
166 * will be loaded/stored in the loop below.
167 */
168 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
169 PIPE_CLEAR_DEPTH |
170 PIPE_CLEAR_STENCIL)) {
171 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
172 cl_u8(&vc4->rcl, 0);
173 cl_u8(&vc4->rcl, 0);
174
175 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
176 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
177 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
178 }
179
180 uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
181 uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
182 uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
183
184 for (int y = min_y_tile; y <= max_y_tile; y++) {
185 for (int x = min_x_tile; x <= max_x_tile; x++) {
186 bool end_of_frame = (x == max_x_tile &&
187 y == max_y_tile);
188 bool coords_emitted = false;
189
190 /* Note that the load doesn't actually occur until the
191 * tile coords packet is processed, and only one load
192 * may be outstanding at a time.
193 */
194 if (resolve_uncleared & PIPE_CLEAR_COLOR) {
195 vc4_store_before_load(vc4, &coords_emitted);
196
197 cl_start_reloc(&vc4->rcl, 1);
198 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
199 cl_u8(&vc4->rcl,
200 VC4_LOADSTORE_TILE_BUFFER_COLOR |
201 (csurf->tiling <<
202 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
203 cl_u8(&vc4->rcl,
204 vc4_rt_format_is_565(csurf->base.format) ?
205 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
206 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
207 cl_reloc_hindex(&vc4->rcl, color_hindex,
208 csurf->offset);
209
210 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
211 }
212
213 if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
214 vc4_store_before_load(vc4, &coords_emitted);
215
216 cl_start_reloc(&vc4->rcl, 1);
217 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
218 cl_u8(&vc4->rcl,
219 VC4_LOADSTORE_TILE_BUFFER_ZS |
220 (zsurf->tiling <<
221 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
222 cl_u8(&vc4->rcl, 0);
223 cl_reloc_hindex(&vc4->rcl, depth_hindex,
224 zsurf->offset);
225
226 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
227 }
228
229 /* Clipping depends on tile coordinates having been
230 * emitted, so make sure it's happened even if
231 * everything was cleared to start.
232 */
233 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
234
235 /* Wait for the binner before jumping to the first
236 * tile's lists.
237 */
238 if (x == min_x_tile && y == min_y_tile)
239 cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
240
241 cl_start_reloc(&vc4->rcl, 1);
242 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
243 cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
244 (y * stride_in_tiles + x) * 32);
245
246 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
247 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
248
249 cl_start_reloc(&vc4->rcl, 1);
250 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
251 cl_u8(&vc4->rcl,
252 VC4_LOADSTORE_TILE_BUFFER_ZS |
253 (zsurf->tiling <<
254 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
255 cl_u8(&vc4->rcl,
256 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
257 cl_reloc_hindex(&vc4->rcl, depth_hindex,
258 zsurf->offset |
259 ((end_of_frame &&
260 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
261 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
262
263 coords_emitted = false;
264 }
265
266 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
267 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
268 if (end_of_frame) {
269 cl_u8(&vc4->rcl,
270 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
271 } else {
272 cl_u8(&vc4->rcl,
273 VC4_PACKET_STORE_MS_TILE_BUFFER);
274 }
275
276 coords_emitted = false;
277 }
278
279 /* One of the bits needs to have been set that would
280 * have triggered an EOF.
281 */
282 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
283 PIPE_CLEAR_DEPTH |
284 PIPE_CLEAR_STENCIL));
285 /* Any coords emitted must also have been consumed by
286 * a store.
287 */
288 assert(!coords_emitted);
289 }
290 }
291
292 if (vc4->resolve & PIPE_CLEAR_COLOR0)
293 ctex->writes++;
294
295 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
296 ztex->writes++;
297 }
298
299 static void
300 vc4_draw_reset(struct vc4_context *vc4)
301 {
302 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
303 for (int i = 0; i < (vc4->bo_handles.next -
304 vc4->bo_handles.base) / 4; i++) {
305 vc4_bo_unreference(&referenced_bos[i]);
306 }
307 vc4_reset_cl(&vc4->bcl);
308 vc4_reset_cl(&vc4->rcl);
309 vc4_reset_cl(&vc4->shader_rec);
310 vc4_reset_cl(&vc4->uniforms);
311 vc4_reset_cl(&vc4->bo_handles);
312 vc4_reset_cl(&vc4->bo_pointers);
313 vc4->shader_rec_count = 0;
314
315 vc4->needs_flush = false;
316 vc4->draw_call_queued = false;
317
318 /* We have no hardware context saved between our draw calls, so we
319 * need to flag the next draw as needing all state emitted. Emitting
320 * all state at the start of our draws is also what ensures that we
321 * return to the state we need after a previous tile has finished.
322 */
323 vc4->dirty = ~0;
324 vc4->resolve = 0;
325 vc4->cleared = 0;
326
327 vc4->draw_min_x = ~0;
328 vc4->draw_min_y = ~0;
329 vc4->draw_max_x = 0;
330 vc4->draw_max_y = 0;
331 }
332
333 void
334 vc4_flush(struct pipe_context *pctx)
335 {
336 struct vc4_context *vc4 = vc4_context(pctx);
337
338 if (!vc4->needs_flush)
339 return;
340
341 /* The RCL setup would choke if the draw bounds cause no drawing, so
342 * just drop the drawing if that's the case.
343 */
344 if (vc4->draw_max_x <= vc4->draw_min_x ||
345 vc4->draw_max_y <= vc4->draw_min_y) {
346 vc4_draw_reset(vc4);
347 return;
348 }
349
350 /* Increment the semaphore indicating that binning is done and
351 * unblocking the render thread. Note that this doesn't act until the
352 * FLUSH completes.
353 */
354 cl_ensure_space(&vc4->bcl, 8);
355 cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
356 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
357 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
358
359 vc4_setup_rcl(vc4);
360
361 if (vc4_debug & VC4_DEBUG_CL) {
362 fprintf(stderr, "BCL:\n");
363 vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
364 fprintf(stderr, "RCL:\n");
365 vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true);
366 }
367
368 struct drm_vc4_submit_cl submit;
369 memset(&submit, 0, sizeof(submit));
370
371 submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
372 submit.bo_handle_count = (vc4->bo_handles.next -
373 vc4->bo_handles.base) / 4;
374 submit.bin_cl = (uintptr_t)vc4->bcl.base;
375 submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
376 submit.render_cl = (uintptr_t)vc4->rcl.base;
377 submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
378 submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
379 submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
380 submit.shader_rec_count = vc4->shader_rec_count;
381 submit.uniforms = (uintptr_t)vc4->uniforms.base;
382 submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
383
384 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
385 int ret;
386
387 #ifndef USE_VC4_SIMULATOR
388 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
389 #else
390 ret = vc4_simulator_flush(vc4, &submit);
391 #endif
392 if (ret) {
393 fprintf(stderr, "VC4 submit failed\n");
394 abort();
395 }
396 }
397
398 vc4->last_emit_seqno = submit.seqno;
399
400 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
401 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
402 PIPE_TIMEOUT_INFINITE)) {
403 fprintf(stderr, "Wait failed.\n");
404 abort();
405 }
406 }
407
408 vc4_draw_reset(vc4);
409 }
410
411 static void
412 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
413 unsigned flags)
414 {
415 struct vc4_context *vc4 = vc4_context(pctx);
416
417 vc4_flush(pctx);
418
419 if (fence) {
420 struct vc4_fence *f = vc4_fence_create(vc4->screen,
421 vc4->last_emit_seqno);
422 *fence = (struct pipe_fence_handle *)f;
423 }
424 }
425
426 /**
427 * Flushes the current command lists if they reference the given BO.
428 *
429 * This helps avoid flushing the command buffers when unnecessary.
430 */
431 bool
432 vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
433 {
434 struct vc4_context *vc4 = vc4_context(pctx);
435
436 if (!vc4->needs_flush)
437 return false;
438
439 /* Walk all the referenced BOs in the drawing command list to see if
440 * they match.
441 */
442 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
443 for (int i = 0; i < (vc4->bo_handles.next -
444 vc4->bo_handles.base) / 4; i++) {
445 if (referenced_bos[i] == bo) {
446 return true;
447 }
448 }
449
450 /* Also check for the Z/color buffers, since the references to those
451 * are only added immediately before submit.
452 */
453 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
454 if (csurf) {
455 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
456 if (ctex->bo == bo) {
457 return true;
458 }
459 }
460
461 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
462 if (zsurf) {
463 struct vc4_resource *ztex =
464 vc4_resource(zsurf->base.texture);
465 if (ztex->bo == bo) {
466 return true;
467 }
468 }
469
470 return false;
471 }
472
473 static void
474 vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
475 {
476 struct vc4_context *vc4 = vc4_context(pctx);
477 struct pipe_surface *zsurf = vc4->framebuffer.zsbuf;
478
479 if (zsurf && zsurf->texture == prsc)
480 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
481 }
482
483 static void
484 vc4_context_destroy(struct pipe_context *pctx)
485 {
486 struct vc4_context *vc4 = vc4_context(pctx);
487
488 if (vc4->blitter)
489 util_blitter_destroy(vc4->blitter);
490
491 if (vc4->primconvert)
492 util_primconvert_destroy(vc4->primconvert);
493
494 util_slab_destroy(&vc4->transfer_pool);
495
496 pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
497 pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
498 vc4_bo_unreference(&vc4->tile_alloc);
499 vc4_bo_unreference(&vc4->tile_state);
500
501 vc4_program_fini(pctx);
502
503 ralloc_free(vc4);
504 }
505
506 struct pipe_context *
507 vc4_context_create(struct pipe_screen *pscreen, void *priv)
508 {
509 struct vc4_screen *screen = vc4_screen(pscreen);
510 struct vc4_context *vc4;
511
512 /* Prevent dumping of the shaders built during context setup. */
513 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
514 vc4_debug &= ~VC4_DEBUG_SHADERDB;
515
516 vc4 = rzalloc(NULL, struct vc4_context);
517 if (vc4 == NULL)
518 return NULL;
519 struct pipe_context *pctx = &vc4->base;
520
521 vc4->screen = screen;
522
523 pctx->screen = pscreen;
524 pctx->priv = priv;
525 pctx->destroy = vc4_context_destroy;
526 pctx->flush = vc4_pipe_flush;
527 pctx->invalidate_resource = vc4_invalidate_resource;
528
529 vc4_draw_init(pctx);
530 vc4_state_init(pctx);
531 vc4_program_init(pctx);
532 vc4_query_init(pctx);
533 vc4_resource_context_init(pctx);
534
535 vc4_init_cl(vc4, &vc4->bcl);
536 vc4_init_cl(vc4, &vc4->rcl);
537 vc4_init_cl(vc4, &vc4->shader_rec);
538 vc4_init_cl(vc4, &vc4->uniforms);
539 vc4_init_cl(vc4, &vc4->bo_handles);
540 vc4_init_cl(vc4, &vc4->bo_pointers);
541 vc4_draw_reset(vc4);
542
543 vc4->fd = screen->fd;
544
545 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
546 16, UTIL_SLAB_SINGLETHREADED);
547 vc4->blitter = util_blitter_create(pctx);
548 if (!vc4->blitter)
549 goto fail;
550
551 vc4->primconvert = util_primconvert_create(pctx,
552 (1 << PIPE_PRIM_QUADS) - 1);
553 if (!vc4->primconvert)
554 goto fail;
555
556 vc4_debug |= saved_shaderdb_flag;
557
558 return &vc4->base;
559
560 fail:
561 pctx->destroy(pctx);
562 return NULL;
563 }