2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "indices/u_primconvert.h"
33 #include "pipe/p_screen.h"
35 #include "vc4_screen.h"
36 #include "vc4_context.h"
37 #include "vc4_resource.h"
40 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
42 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
43 * some sort before another load is triggered.
46 vc4_store_before_load(struct vc4_context
*vc4
, bool *coords_emitted
)
51 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
52 cl_u8(&vc4
->rcl
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
53 cl_u8(&vc4
->rcl
, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
|
54 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR
|
55 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR
));
56 cl_u32(&vc4
->rcl
, 0); /* no address, since we're in None mode */
58 *coords_emitted
= false;
62 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
64 * The tile coordinates packet triggers a pending load if there is one, are
65 * used for clipping during rendering, and determine where loads/stores happen
66 * relative to their base address.
69 vc4_tile_coordinates(struct vc4_context
*vc4
, uint32_t x
, uint32_t y
,
75 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_COORDINATES
);
79 *coords_emitted
= true;
83 vc4_setup_rcl(struct vc4_context
*vc4
)
85 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
86 struct vc4_resource
*ctex
= csurf
? vc4_resource(csurf
->base
.texture
) : NULL
;
87 struct vc4_surface
*zsurf
= vc4_surface(vc4
->framebuffer
.zsbuf
);
88 struct vc4_resource
*ztex
= zsurf
? vc4_resource(zsurf
->base
.texture
) : NULL
;
91 vc4
->resolve
&= ~PIPE_CLEAR_COLOR0
;
93 vc4
->resolve
&= ~(PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
);
94 uint32_t resolve_uncleared
= vc4
->resolve
& ~vc4
->cleared
;
95 uint32_t width
= vc4
->framebuffer
.width
;
96 uint32_t height
= vc4
->framebuffer
.height
;
97 uint32_t xtiles
= align(width
, 64) / 64;
98 uint32_t ytiles
= align(height
, 64) / 64;
101 fprintf(stderr
, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
107 cl_u8(&vc4
->rcl
, VC4_PACKET_CLEAR_COLORS
);
108 cl_u32(&vc4
->rcl
, vc4
->clear_color
[0]);
109 cl_u32(&vc4
->rcl
, vc4
->clear_color
[1]);
110 cl_u32(&vc4
->rcl
, vc4
->clear_depth
);
111 cl_u8(&vc4
->rcl
, vc4
->clear_stencil
);
113 /* The rendering mode config determines the pointer that's used for
114 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
115 * could handle a no-relocation rendering mode config and deny those
116 * packets, but instead we just tell the kernel we're doing our color
117 * rendering to the Z buffer, and just don't emit any of those
120 struct vc4_surface
*render_surf
= csurf
? csurf
: zsurf
;
121 struct vc4_resource
*render_tex
= vc4_resource(render_surf
->base
.texture
);
122 cl_start_reloc(&vc4
->rcl
, 1);
123 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_RENDERING_MODE_CONFIG
);
124 cl_reloc(vc4
, &vc4
->rcl
, render_tex
->bo
, render_surf
->offset
);
125 cl_u16(&vc4
->rcl
, width
);
126 cl_u16(&vc4
->rcl
, height
);
127 cl_u16(&vc4
->rcl
, ((render_surf
->tiling
<<
128 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT
) |
129 (vc4_rt_format_is_565(render_surf
->base
.format
) ?
130 VC4_RENDER_CONFIG_FORMAT_BGR565
:
131 VC4_RENDER_CONFIG_FORMAT_RGBA8888
) |
132 VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE
));
134 /* The tile buffer normally gets cleared when the previous tile is
135 * stored. If the clear values changed between frames, then the tile
136 * buffer has stale clear values in it, so we have to do a store in
137 * None mode (no writes) so that we trigger the tile buffer clear.
139 * Excess clearing is only a performance cost, since per-tile contents
140 * will be loaded/stored in the loop below.
142 if (vc4
->cleared
& (PIPE_CLEAR_COLOR0
|
144 PIPE_CLEAR_STENCIL
)) {
145 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_COORDINATES
);
149 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
150 cl_u16(&vc4
->rcl
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
151 cl_u32(&vc4
->rcl
, 0); /* no address, since we're in None mode */
154 uint32_t color_hindex
= ctex
? vc4_gem_hindex(vc4
, ctex
->bo
) : 0;
155 uint32_t depth_hindex
= ztex
? vc4_gem_hindex(vc4
, ztex
->bo
) : 0;
156 uint32_t tile_alloc_hindex
= vc4_gem_hindex(vc4
, vc4
->tile_alloc
);
158 for (int y
= 0; y
< ytiles
; y
++) {
159 for (int x
= 0; x
< xtiles
; x
++) {
160 bool end_of_frame
= (x
== xtiles
- 1 &&
162 bool coords_emitted
= false;
164 /* Note that the load doesn't actually occur until the
165 * tile coords packet is processed, and only one load
166 * may be outstanding at a time.
168 if (resolve_uncleared
& PIPE_CLEAR_COLOR
) {
169 vc4_store_before_load(vc4
, &coords_emitted
);
171 cl_start_reloc(&vc4
->rcl
, 1);
172 cl_u8(&vc4
->rcl
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
174 VC4_LOADSTORE_TILE_BUFFER_COLOR
|
176 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
178 vc4_rt_format_is_565(csurf
->base
.format
) ?
179 VC4_LOADSTORE_TILE_BUFFER_BGR565
:
180 VC4_LOADSTORE_TILE_BUFFER_RGBA8888
);
181 cl_reloc_hindex(&vc4
->rcl
, color_hindex
,
184 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
187 if (resolve_uncleared
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)) {
188 vc4_store_before_load(vc4
, &coords_emitted
);
190 cl_start_reloc(&vc4
->rcl
, 1);
191 cl_u8(&vc4
->rcl
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
193 VC4_LOADSTORE_TILE_BUFFER_ZS
|
195 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
197 cl_reloc_hindex(&vc4
->rcl
, depth_hindex
,
200 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
203 /* Clipping depends on tile coordinates having been
204 * emitted, so make sure it's happened even if
205 * everything was cleared to start.
207 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
209 /* Wait for the binner before jumping to the first
212 if (x
== 0 && y
== 0)
213 cl_u8(&vc4
->rcl
, VC4_PACKET_WAIT_ON_SEMAPHORE
);
215 cl_start_reloc(&vc4
->rcl
, 1);
216 cl_u8(&vc4
->rcl
, VC4_PACKET_BRANCH_TO_SUB_LIST
);
217 cl_reloc_hindex(&vc4
->rcl
, tile_alloc_hindex
,
218 (y
* xtiles
+ x
) * 32);
220 if (vc4
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)) {
221 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
223 cl_start_reloc(&vc4
->rcl
, 1);
224 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
226 VC4_LOADSTORE_TILE_BUFFER_ZS
|
228 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
230 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
);
231 cl_reloc_hindex(&vc4
->rcl
, depth_hindex
,
234 !(vc4
->resolve
& PIPE_CLEAR_COLOR0
)) ?
235 VC4_LOADSTORE_TILE_BUFFER_EOF
: 0));
237 coords_emitted
= false;
240 if (vc4
->resolve
& PIPE_CLEAR_COLOR0
) {
241 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
244 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
);
247 VC4_PACKET_STORE_MS_TILE_BUFFER
);
250 coords_emitted
= false;
253 /* One of the bits needs to have been set that would
254 * have triggered an EOF.
256 assert(vc4
->resolve
& (PIPE_CLEAR_COLOR0
|
258 PIPE_CLEAR_STENCIL
));
259 /* Any coords emitted must also have been consumed by
262 assert(!coords_emitted
);
266 if (vc4
->resolve
& PIPE_CLEAR_COLOR0
)
269 if (vc4
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))
274 vc4_flush(struct pipe_context
*pctx
)
276 struct vc4_context
*vc4
= vc4_context(pctx
);
278 if (!vc4
->needs_flush
)
281 /* Increment the semaphore indicating that binning is done and
282 * unblocking the render thread. Note that this doesn't act until the
285 cl_u8(&vc4
->bcl
, VC4_PACKET_INCREMENT_SEMAPHORE
);
286 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
287 cl_u8(&vc4
->bcl
, VC4_PACKET_FLUSH
);
291 if (vc4_debug
& VC4_DEBUG_CL
) {
292 fprintf(stderr
, "BCL:\n");
293 vc4_dump_cl(vc4
->bcl
.base
, vc4
->bcl
.end
- vc4
->bcl
.base
, false);
294 fprintf(stderr
, "RCL:\n");
295 vc4_dump_cl(vc4
->rcl
.base
, vc4
->rcl
.end
- vc4
->rcl
.base
, true);
298 struct drm_vc4_submit_cl submit
;
299 memset(&submit
, 0, sizeof(submit
));
301 submit
.bo_handles
= vc4
->bo_handles
.base
;
302 submit
.bo_handle_count
= (vc4
->bo_handles
.next
-
303 vc4
->bo_handles
.base
) / 4;
304 submit
.bin_cl
= vc4
->bcl
.base
;
305 submit
.bin_cl_size
= vc4
->bcl
.next
- vc4
->bcl
.base
;
306 submit
.render_cl
= vc4
->rcl
.base
;
307 submit
.render_cl_size
= vc4
->rcl
.next
- vc4
->rcl
.base
;
308 submit
.shader_rec
= vc4
->shader_rec
.base
;
309 submit
.shader_rec_size
= vc4
->shader_rec
.next
- vc4
->shader_rec
.base
;
310 submit
.shader_rec_count
= vc4
->shader_rec_count
;
311 submit
.uniforms
= vc4
->uniforms
.base
;
312 submit
.uniforms_size
= vc4
->uniforms
.next
- vc4
->uniforms
.base
;
314 if (!(vc4_debug
& VC4_DEBUG_NORAST
)) {
317 #ifndef USE_VC4_SIMULATOR
318 ret
= drmIoctl(vc4
->fd
, DRM_IOCTL_VC4_SUBMIT_CL
, &submit
);
320 ret
= vc4_simulator_flush(vc4
, &submit
);
323 fprintf(stderr
, "VC4 submit failed\n");
328 vc4
->last_emit_seqno
= submit
.seqno
;
330 if (vc4_debug
& VC4_DEBUG_ALWAYS_SYNC
) {
331 if (!vc4_wait_seqno(vc4
->screen
, vc4
->last_emit_seqno
,
332 PIPE_TIMEOUT_INFINITE
)) {
333 fprintf(stderr
, "Wait failed.\n");
338 vc4_reset_cl(&vc4
->bcl
);
339 vc4_reset_cl(&vc4
->rcl
);
340 vc4_reset_cl(&vc4
->shader_rec
);
341 vc4_reset_cl(&vc4
->uniforms
);
342 vc4_reset_cl(&vc4
->bo_handles
);
343 struct vc4_bo
**referenced_bos
= vc4
->bo_pointers
.base
;
344 for (int i
= 0; i
< submit
.bo_handle_count
; i
++)
345 vc4_bo_unreference(&referenced_bos
[i
]);
346 vc4_reset_cl(&vc4
->bo_pointers
);
347 vc4
->shader_rec_count
= 0;
349 vc4
->needs_flush
= false;
350 vc4
->draw_call_queued
= false;
352 /* We have no hardware context saved between our draw calls, so we
353 * need to flag the next draw as needing all state emitted. Emitting
354 * all state at the start of our draws is also what ensures that we
355 * return to the state we need after a previous tile has finished.
363 vc4_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
366 struct vc4_context
*vc4
= vc4_context(pctx
);
371 struct vc4_fence
*f
= vc4_fence_create(vc4
->screen
,
372 vc4
->last_emit_seqno
);
373 *fence
= (struct pipe_fence_handle
*)f
;
378 * Flushes the current command lists if they reference the given BO.
380 * This helps avoid flushing the command buffers when unnecessary.
383 vc4_cl_references_bo(struct pipe_context
*pctx
, struct vc4_bo
*bo
)
385 struct vc4_context
*vc4
= vc4_context(pctx
);
387 if (!vc4
->needs_flush
)
390 /* Walk all the referenced BOs in the drawing command list to see if
393 struct vc4_bo
**referenced_bos
= vc4
->bo_pointers
.base
;
394 for (int i
= 0; i
< (vc4
->bo_handles
.next
-
395 vc4
->bo_handles
.base
) / 4; i
++) {
396 if (referenced_bos
[i
] == bo
) {
401 /* Also check for the Z/color buffers, since the references to those
402 * are only added immediately before submit.
404 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
406 struct vc4_resource
*ctex
= vc4_resource(csurf
->base
.texture
);
407 if (ctex
->bo
== bo
) {
412 struct vc4_surface
*zsurf
= vc4_surface(vc4
->framebuffer
.zsbuf
);
414 struct vc4_resource
*ztex
=
415 vc4_resource(zsurf
->base
.texture
);
416 if (ztex
->bo
== bo
) {
425 vc4_context_destroy(struct pipe_context
*pctx
)
427 struct vc4_context
*vc4
= vc4_context(pctx
);
430 util_blitter_destroy(vc4
->blitter
);
432 if (vc4
->primconvert
)
433 util_primconvert_destroy(vc4
->primconvert
);
435 util_slab_destroy(&vc4
->transfer_pool
);
437 pipe_surface_reference(&vc4
->framebuffer
.cbufs
[0], NULL
);
438 pipe_surface_reference(&vc4
->framebuffer
.zsbuf
, NULL
);
439 vc4_bo_unreference(&vc4
->tile_alloc
);
440 vc4_bo_unreference(&vc4
->tile_state
);
442 vc4_program_fini(pctx
);
447 struct pipe_context
*
448 vc4_context_create(struct pipe_screen
*pscreen
, void *priv
)
450 struct vc4_screen
*screen
= vc4_screen(pscreen
);
451 struct vc4_context
*vc4
;
453 /* Prevent dumping of the shaders built during context setup. */
454 uint32_t saved_shaderdb_flag
= vc4_debug
& VC4_DEBUG_SHADERDB
;
455 vc4_debug
&= ~VC4_DEBUG_SHADERDB
;
457 vc4
= rzalloc(NULL
, struct vc4_context
);
460 struct pipe_context
*pctx
= &vc4
->base
;
462 vc4
->screen
= screen
;
464 pctx
->screen
= pscreen
;
466 pctx
->destroy
= vc4_context_destroy
;
467 pctx
->flush
= vc4_pipe_flush
;
470 vc4_state_init(pctx
);
471 vc4_program_init(pctx
);
472 vc4_query_init(pctx
);
473 vc4_resource_context_init(pctx
);
475 vc4_init_cl(vc4
, &vc4
->bcl
);
476 vc4_init_cl(vc4
, &vc4
->rcl
);
477 vc4_init_cl(vc4
, &vc4
->shader_rec
);
478 vc4_init_cl(vc4
, &vc4
->uniforms
);
479 vc4_init_cl(vc4
, &vc4
->bo_handles
);
480 vc4_init_cl(vc4
, &vc4
->bo_pointers
);
483 vc4
->fd
= screen
->fd
;
485 util_slab_create(&vc4
->transfer_pool
, sizeof(struct vc4_transfer
),
486 16, UTIL_SLAB_SINGLETHREADED
);
487 vc4
->blitter
= util_blitter_create(pctx
);
491 vc4
->primconvert
= util_primconvert_create(pctx
,
492 (1 << PIPE_PRIM_QUADS
) - 1);
493 if (!vc4
->primconvert
)
496 vc4_debug
|= saved_shaderdb_flag
;