d4a9eec7b081b249f628150ba183ec755ffd1d0d
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "indices/u_primconvert.h"
33 #include "pipe/p_screen.h"
34
35 #include "vc4_screen.h"
36 #include "vc4_context.h"
37 #include "vc4_resource.h"
38
39 /**
40 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
41 *
42 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
43 * some sort before another load is triggered.
44 */
45 static void
46 vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
47 {
48 if (!*coords_emitted)
49 return;
50
51 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
52 cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
53 cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
54 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
55 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
56 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
57
58 *coords_emitted = false;
59 }
60
61 /**
62 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
63 *
64 * The tile coordinates packet triggers a pending load if there is one, are
65 * used for clipping during rendering, and determine where loads/stores happen
66 * relative to their base address.
67 */
68 static void
69 vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
70 bool *coords_emitted)
71 {
72 if (*coords_emitted)
73 return;
74
75 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
76 cl_u8(&vc4->rcl, x);
77 cl_u8(&vc4->rcl, y);
78
79 *coords_emitted = true;
80 }
81
82 static void
83 vc4_setup_rcl(struct vc4_context *vc4)
84 {
85 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
86 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
87 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
88 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
89
90 if (!csurf)
91 vc4->resolve &= ~PIPE_CLEAR_COLOR0;
92 if (!zsurf)
93 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
94 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
95 uint32_t width = vc4->framebuffer.width;
96 uint32_t height = vc4->framebuffer.height;
97 uint32_t xtiles = align(width, 64) / 64;
98 uint32_t ytiles = align(height, 64) / 64;
99
100 #if 0
101 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
102 vc4->resolve,
103 vc4->cleared,
104 resolve_uncleared);
105 #endif
106
107 uint32_t reloc_size = 9;
108 uint32_t clear_size = 14;
109 uint32_t config_size = 11 + reloc_size;
110 uint32_t loadstore_size = 7 + reloc_size;
111 uint32_t tilecoords_size = 3;
112 uint32_t branch_size = 5 + reloc_size;
113 uint32_t color_store_size = 1;
114 cl_ensure_space(&vc4->rcl,
115 clear_size +
116 config_size +
117 loadstore_size +
118 xtiles * ytiles * (loadstore_size * 4 +
119 tilecoords_size * 3 +
120 branch_size +
121 color_store_size));
122
123 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
124 cl_u32(&vc4->rcl, vc4->clear_color[0]);
125 cl_u32(&vc4->rcl, vc4->clear_color[1]);
126 cl_u32(&vc4->rcl, vc4->clear_depth);
127 cl_u8(&vc4->rcl, vc4->clear_stencil);
128
129 /* The rendering mode config determines the pointer that's used for
130 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
131 * could handle a no-relocation rendering mode config and deny those
132 * packets, but instead we just tell the kernel we're doing our color
133 * rendering to the Z buffer, and just don't emit any of those
134 * packets.
135 */
136 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
137 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
138 cl_start_reloc(&vc4->rcl, 1);
139 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
140 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
141 cl_u16(&vc4->rcl, width);
142 cl_u16(&vc4->rcl, height);
143 cl_u16(&vc4->rcl, ((render_surf->tiling <<
144 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
145 (vc4_rt_format_is_565(render_surf->base.format) ?
146 VC4_RENDER_CONFIG_FORMAT_BGR565 :
147 VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
148 VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
149
150 /* The tile buffer normally gets cleared when the previous tile is
151 * stored. If the clear values changed between frames, then the tile
152 * buffer has stale clear values in it, so we have to do a store in
153 * None mode (no writes) so that we trigger the tile buffer clear.
154 *
155 * Excess clearing is only a performance cost, since per-tile contents
156 * will be loaded/stored in the loop below.
157 */
158 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
159 PIPE_CLEAR_DEPTH |
160 PIPE_CLEAR_STENCIL)) {
161 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
162 cl_u8(&vc4->rcl, 0);
163 cl_u8(&vc4->rcl, 0);
164
165 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
166 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
167 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
168 }
169
170 uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
171 uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
172 uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
173
174 for (int y = 0; y < ytiles; y++) {
175 for (int x = 0; x < xtiles; x++) {
176 bool end_of_frame = (x == xtiles - 1 &&
177 y == ytiles - 1);
178 bool coords_emitted = false;
179
180 /* Note that the load doesn't actually occur until the
181 * tile coords packet is processed, and only one load
182 * may be outstanding at a time.
183 */
184 if (resolve_uncleared & PIPE_CLEAR_COLOR) {
185 vc4_store_before_load(vc4, &coords_emitted);
186
187 cl_start_reloc(&vc4->rcl, 1);
188 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
189 cl_u8(&vc4->rcl,
190 VC4_LOADSTORE_TILE_BUFFER_COLOR |
191 (csurf->tiling <<
192 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
193 cl_u8(&vc4->rcl,
194 vc4_rt_format_is_565(csurf->base.format) ?
195 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
196 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
197 cl_reloc_hindex(&vc4->rcl, color_hindex,
198 csurf->offset);
199
200 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
201 }
202
203 if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
204 vc4_store_before_load(vc4, &coords_emitted);
205
206 cl_start_reloc(&vc4->rcl, 1);
207 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
208 cl_u8(&vc4->rcl,
209 VC4_LOADSTORE_TILE_BUFFER_ZS |
210 (zsurf->tiling <<
211 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
212 cl_u8(&vc4->rcl, 0);
213 cl_reloc_hindex(&vc4->rcl, depth_hindex,
214 zsurf->offset);
215
216 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
217 }
218
219 /* Clipping depends on tile coordinates having been
220 * emitted, so make sure it's happened even if
221 * everything was cleared to start.
222 */
223 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
224
225 /* Wait for the binner before jumping to the first
226 * tile's lists.
227 */
228 if (x == 0 && y == 0)
229 cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
230
231 cl_start_reloc(&vc4->rcl, 1);
232 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
233 cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
234 (y * xtiles + x) * 32);
235
236 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
237 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
238
239 cl_start_reloc(&vc4->rcl, 1);
240 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
241 cl_u8(&vc4->rcl,
242 VC4_LOADSTORE_TILE_BUFFER_ZS |
243 (zsurf->tiling <<
244 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
245 cl_u8(&vc4->rcl,
246 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
247 cl_reloc_hindex(&vc4->rcl, depth_hindex,
248 zsurf->offset |
249 ((end_of_frame &&
250 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
251 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
252
253 coords_emitted = false;
254 }
255
256 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
257 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
258 if (end_of_frame) {
259 cl_u8(&vc4->rcl,
260 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
261 } else {
262 cl_u8(&vc4->rcl,
263 VC4_PACKET_STORE_MS_TILE_BUFFER);
264 }
265
266 coords_emitted = false;
267 }
268
269 /* One of the bits needs to have been set that would
270 * have triggered an EOF.
271 */
272 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
273 PIPE_CLEAR_DEPTH |
274 PIPE_CLEAR_STENCIL));
275 /* Any coords emitted must also have been consumed by
276 * a store.
277 */
278 assert(!coords_emitted);
279 }
280 }
281
282 if (vc4->resolve & PIPE_CLEAR_COLOR0)
283 ctex->writes++;
284
285 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
286 ztex->writes++;
287 }
288
289 void
290 vc4_flush(struct pipe_context *pctx)
291 {
292 struct vc4_context *vc4 = vc4_context(pctx);
293
294 if (!vc4->needs_flush)
295 return;
296
297 /* Increment the semaphore indicating that binning is done and
298 * unblocking the render thread. Note that this doesn't act until the
299 * FLUSH completes.
300 */
301 cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
302 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
303 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
304
305 vc4_setup_rcl(vc4);
306
307 if (vc4_debug & VC4_DEBUG_CL) {
308 fprintf(stderr, "BCL:\n");
309 vc4_dump_cl(vc4->bcl.base, vc4->bcl.size, false);
310 fprintf(stderr, "RCL:\n");
311 vc4_dump_cl(vc4->rcl.base, vc4->rcl.size, true);
312 }
313
314 struct drm_vc4_submit_cl submit;
315 memset(&submit, 0, sizeof(submit));
316
317 submit.bo_handles = vc4->bo_handles.base;
318 submit.bo_handle_count = (vc4->bo_handles.next -
319 vc4->bo_handles.base) / 4;
320 submit.bin_cl = vc4->bcl.base;
321 submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
322 submit.render_cl = vc4->rcl.base;
323 submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
324 submit.shader_rec = vc4->shader_rec.base;
325 submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
326 submit.shader_rec_count = vc4->shader_rec_count;
327 submit.uniforms = vc4->uniforms.base;
328 submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
329
330 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
331 int ret;
332
333 #ifndef USE_VC4_SIMULATOR
334 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
335 #else
336 ret = vc4_simulator_flush(vc4, &submit);
337 #endif
338 if (ret) {
339 fprintf(stderr, "VC4 submit failed\n");
340 abort();
341 }
342 }
343
344 vc4->last_emit_seqno = submit.seqno;
345
346 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
347 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
348 PIPE_TIMEOUT_INFINITE)) {
349 fprintf(stderr, "Wait failed.\n");
350 abort();
351 }
352 }
353
354 vc4_reset_cl(&vc4->bcl);
355 vc4_reset_cl(&vc4->rcl);
356 vc4_reset_cl(&vc4->shader_rec);
357 vc4_reset_cl(&vc4->uniforms);
358 vc4_reset_cl(&vc4->bo_handles);
359 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
360 for (int i = 0; i < submit.bo_handle_count; i++)
361 vc4_bo_unreference(&referenced_bos[i]);
362 vc4_reset_cl(&vc4->bo_pointers);
363 vc4->shader_rec_count = 0;
364
365 vc4->needs_flush = false;
366 vc4->draw_call_queued = false;
367
368 /* We have no hardware context saved between our draw calls, so we
369 * need to flag the next draw as needing all state emitted. Emitting
370 * all state at the start of our draws is also what ensures that we
371 * return to the state we need after a previous tile has finished.
372 */
373 vc4->dirty = ~0;
374 vc4->resolve = 0;
375 vc4->cleared = 0;
376 }
377
378 static void
379 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
380 unsigned flags)
381 {
382 struct vc4_context *vc4 = vc4_context(pctx);
383
384 vc4_flush(pctx);
385
386 if (fence) {
387 struct vc4_fence *f = vc4_fence_create(vc4->screen,
388 vc4->last_emit_seqno);
389 *fence = (struct pipe_fence_handle *)f;
390 }
391 }
392
393 /**
394 * Flushes the current command lists if they reference the given BO.
395 *
396 * This helps avoid flushing the command buffers when unnecessary.
397 */
398 bool
399 vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
400 {
401 struct vc4_context *vc4 = vc4_context(pctx);
402
403 if (!vc4->needs_flush)
404 return false;
405
406 /* Walk all the referenced BOs in the drawing command list to see if
407 * they match.
408 */
409 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
410 for (int i = 0; i < (vc4->bo_handles.next -
411 vc4->bo_handles.base) / 4; i++) {
412 if (referenced_bos[i] == bo) {
413 return true;
414 }
415 }
416
417 /* Also check for the Z/color buffers, since the references to those
418 * are only added immediately before submit.
419 */
420 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
421 if (csurf) {
422 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
423 if (ctex->bo == bo) {
424 return true;
425 }
426 }
427
428 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
429 if (zsurf) {
430 struct vc4_resource *ztex =
431 vc4_resource(zsurf->base.texture);
432 if (ztex->bo == bo) {
433 return true;
434 }
435 }
436
437 return false;
438 }
439
440 static void
441 vc4_context_destroy(struct pipe_context *pctx)
442 {
443 struct vc4_context *vc4 = vc4_context(pctx);
444
445 if (vc4->blitter)
446 util_blitter_destroy(vc4->blitter);
447
448 if (vc4->primconvert)
449 util_primconvert_destroy(vc4->primconvert);
450
451 util_slab_destroy(&vc4->transfer_pool);
452
453 pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
454 pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
455 vc4_bo_unreference(&vc4->tile_alloc);
456 vc4_bo_unreference(&vc4->tile_state);
457
458 vc4_program_fini(pctx);
459
460 ralloc_free(vc4);
461 }
462
463 struct pipe_context *
464 vc4_context_create(struct pipe_screen *pscreen, void *priv)
465 {
466 struct vc4_screen *screen = vc4_screen(pscreen);
467 struct vc4_context *vc4;
468
469 /* Prevent dumping of the shaders built during context setup. */
470 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
471 vc4_debug &= ~VC4_DEBUG_SHADERDB;
472
473 vc4 = rzalloc(NULL, struct vc4_context);
474 if (vc4 == NULL)
475 return NULL;
476 struct pipe_context *pctx = &vc4->base;
477
478 vc4->screen = screen;
479
480 pctx->screen = pscreen;
481 pctx->priv = priv;
482 pctx->destroy = vc4_context_destroy;
483 pctx->flush = vc4_pipe_flush;
484
485 vc4_draw_init(pctx);
486 vc4_state_init(pctx);
487 vc4_program_init(pctx);
488 vc4_query_init(pctx);
489 vc4_resource_context_init(pctx);
490
491 vc4_init_cl(vc4, &vc4->bcl);
492 vc4_init_cl(vc4, &vc4->rcl);
493 vc4_init_cl(vc4, &vc4->shader_rec);
494 vc4_init_cl(vc4, &vc4->uniforms);
495 vc4_init_cl(vc4, &vc4->bo_handles);
496 vc4_init_cl(vc4, &vc4->bo_pointers);
497
498 vc4->dirty = ~0;
499 vc4->fd = screen->fd;
500
501 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
502 16, UTIL_SLAB_SINGLETHREADED);
503 vc4->blitter = util_blitter_create(pctx);
504 if (!vc4->blitter)
505 goto fail;
506
507 vc4->primconvert = util_primconvert_create(pctx,
508 (1 << PIPE_PRIM_QUADS) - 1);
509 if (!vc4->primconvert)
510 goto fail;
511
512 vc4_debug |= saved_shaderdb_flag;
513
514 return &vc4->base;
515
516 fail:
517 pctx->destroy(pctx);
518 return NULL;
519 }