2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "util/u_upload_mgr.h"
33 #include "indices/u_primconvert.h"
34 #include "pipe/p_screen.h"
36 #include "vc4_screen.h"
37 #include "vc4_context.h"
38 #include "vc4_resource.h"
41 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
43 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
44 * some sort before another load is triggered.
47 vc4_store_before_load(struct vc4_context
*vc4
, bool *coords_emitted
)
52 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
53 cl_u8(&vc4
->rcl
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
54 cl_u8(&vc4
->rcl
, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
|
55 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR
|
56 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR
));
57 cl_u32(&vc4
->rcl
, 0); /* no address, since we're in None mode */
59 *coords_emitted
= false;
63 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
65 * The tile coordinates packet triggers a pending load if there is one, are
66 * used for clipping during rendering, and determine where loads/stores happen
67 * relative to their base address.
70 vc4_tile_coordinates(struct vc4_context
*vc4
, uint32_t x
, uint32_t y
,
76 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_COORDINATES
);
80 *coords_emitted
= true;
84 vc4_setup_rcl(struct vc4_context
*vc4
)
86 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
87 struct vc4_resource
*ctex
= csurf
? vc4_resource(csurf
->base
.texture
) : NULL
;
88 struct vc4_surface
*zsurf
= vc4_surface(vc4
->framebuffer
.zsbuf
);
89 struct vc4_resource
*ztex
= zsurf
? vc4_resource(zsurf
->base
.texture
) : NULL
;
92 vc4
->resolve
&= ~PIPE_CLEAR_COLOR0
;
94 vc4
->resolve
&= ~(PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
);
95 uint32_t resolve_uncleared
= vc4
->resolve
& ~vc4
->cleared
;
96 uint32_t width
= vc4
->framebuffer
.width
;
97 uint32_t height
= vc4
->framebuffer
.height
;
98 uint32_t stride_in_tiles
= align(width
, 64) / 64;
100 assert(vc4
->draw_min_x
!= ~0 && vc4
->draw_min_y
!= ~0);
101 uint32_t min_x_tile
= vc4
->draw_min_x
/ 64;
102 uint32_t min_y_tile
= vc4
->draw_min_y
/ 64;
103 uint32_t max_x_tile
= (vc4
->draw_max_x
- 1) / 64;
104 uint32_t max_y_tile
= (vc4
->draw_max_y
- 1) / 64;
105 uint32_t xtiles
= max_x_tile
- min_x_tile
+ 1;
106 uint32_t ytiles
= max_y_tile
- min_y_tile
+ 1;
109 fprintf(stderr
, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
115 uint32_t reloc_size
= 9;
116 uint32_t clear_size
= 14;
117 uint32_t config_size
= 11 + reloc_size
;
118 uint32_t loadstore_size
= 7 + reloc_size
;
119 uint32_t tilecoords_size
= 3;
120 uint32_t branch_size
= 5 + reloc_size
;
121 uint32_t color_store_size
= 1;
122 uint32_t semaphore_size
= 1;
123 cl_ensure_space(&vc4
->rcl
,
128 xtiles
* ytiles
* (loadstore_size
* 4 +
129 tilecoords_size
* 3 +
134 cl_u8(&vc4
->rcl
, VC4_PACKET_CLEAR_COLORS
);
135 cl_u32(&vc4
->rcl
, vc4
->clear_color
[0]);
136 cl_u32(&vc4
->rcl
, vc4
->clear_color
[1]);
137 cl_u32(&vc4
->rcl
, vc4
->clear_depth
);
138 cl_u8(&vc4
->rcl
, vc4
->clear_stencil
);
141 /* The rendering mode config determines the pointer that's used for
142 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
143 * could handle a no-relocation rendering mode config and deny those
144 * packets, but instead we just tell the kernel we're doing our color
145 * rendering to the Z buffer, and just don't emit any of those
148 struct vc4_surface
*render_surf
= csurf
? csurf
: zsurf
;
149 struct vc4_resource
*render_tex
= vc4_resource(render_surf
->base
.texture
);
150 cl_start_reloc(&vc4
->rcl
, 1);
151 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_RENDERING_MODE_CONFIG
);
152 cl_reloc(vc4
, &vc4
->rcl
, render_tex
->bo
, render_surf
->offset
);
153 cl_u16(&vc4
->rcl
, width
);
154 cl_u16(&vc4
->rcl
, height
);
155 cl_u16(&vc4
->rcl
, ((render_surf
->tiling
<<
156 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT
) |
157 (vc4_rt_format_is_565(render_surf
->base
.format
) ?
158 VC4_RENDER_CONFIG_FORMAT_BGR565
:
159 VC4_RENDER_CONFIG_FORMAT_RGBA8888
)));
161 /* The tile buffer normally gets cleared when the previous tile is
162 * stored. If the clear values changed between frames, then the tile
163 * buffer has stale clear values in it, so we have to do a store in
164 * None mode (no writes) so that we trigger the tile buffer clear.
166 * Excess clearing is only a performance cost, since per-tile contents
167 * will be loaded/stored in the loop below.
169 if (vc4
->cleared
& (PIPE_CLEAR_COLOR0
|
171 PIPE_CLEAR_STENCIL
)) {
172 cl_u8(&vc4
->rcl
, VC4_PACKET_TILE_COORDINATES
);
176 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
177 cl_u16(&vc4
->rcl
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
178 cl_u32(&vc4
->rcl
, 0); /* no address, since we're in None mode */
181 uint32_t color_hindex
= ctex
? vc4_gem_hindex(vc4
, ctex
->bo
) : 0;
182 uint32_t depth_hindex
= ztex
? vc4_gem_hindex(vc4
, ztex
->bo
) : 0;
183 uint32_t tile_alloc_hindex
= vc4_gem_hindex(vc4
, vc4
->tile_alloc
);
185 for (int y
= min_y_tile
; y
<= max_y_tile
; y
++) {
186 for (int x
= min_x_tile
; x
<= max_x_tile
; x
++) {
187 bool end_of_frame
= (x
== max_x_tile
&&
189 bool coords_emitted
= false;
191 /* Note that the load doesn't actually occur until the
192 * tile coords packet is processed, and only one load
193 * may be outstanding at a time.
195 if (resolve_uncleared
& PIPE_CLEAR_COLOR
) {
196 vc4_store_before_load(vc4
, &coords_emitted
);
198 cl_start_reloc(&vc4
->rcl
, 1);
199 cl_u8(&vc4
->rcl
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
201 VC4_LOADSTORE_TILE_BUFFER_COLOR
|
203 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
205 vc4_rt_format_is_565(csurf
->base
.format
) ?
206 VC4_LOADSTORE_TILE_BUFFER_BGR565
:
207 VC4_LOADSTORE_TILE_BUFFER_RGBA8888
);
208 cl_reloc_hindex(&vc4
->rcl
, color_hindex
,
211 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
214 if (resolve_uncleared
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)) {
215 vc4_store_before_load(vc4
, &coords_emitted
);
217 cl_start_reloc(&vc4
->rcl
, 1);
218 cl_u8(&vc4
->rcl
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
220 VC4_LOADSTORE_TILE_BUFFER_ZS
|
222 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
224 cl_reloc_hindex(&vc4
->rcl
, depth_hindex
,
227 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
230 /* Clipping depends on tile coordinates having been
231 * emitted, so make sure it's happened even if
232 * everything was cleared to start.
234 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
236 /* Wait for the binner before jumping to the first
239 if (x
== min_x_tile
&& y
== min_y_tile
)
240 cl_u8(&vc4
->rcl
, VC4_PACKET_WAIT_ON_SEMAPHORE
);
242 cl_start_reloc(&vc4
->rcl
, 1);
243 cl_u8(&vc4
->rcl
, VC4_PACKET_BRANCH_TO_SUB_LIST
);
244 cl_reloc_hindex(&vc4
->rcl
, tile_alloc_hindex
,
245 (y
* stride_in_tiles
+ x
) * 32);
247 if (vc4
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)) {
248 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
250 cl_start_reloc(&vc4
->rcl
, 1);
251 cl_u8(&vc4
->rcl
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
253 VC4_LOADSTORE_TILE_BUFFER_ZS
|
255 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
));
257 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
);
258 cl_reloc_hindex(&vc4
->rcl
, depth_hindex
,
261 !(vc4
->resolve
& PIPE_CLEAR_COLOR0
)) ?
262 VC4_LOADSTORE_TILE_BUFFER_EOF
: 0));
264 coords_emitted
= false;
267 if (vc4
->resolve
& PIPE_CLEAR_COLOR0
) {
268 vc4_tile_coordinates(vc4
, x
, y
, &coords_emitted
);
271 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
);
274 VC4_PACKET_STORE_MS_TILE_BUFFER
);
277 coords_emitted
= false;
280 /* One of the bits needs to have been set that would
281 * have triggered an EOF.
283 assert(vc4
->resolve
& (PIPE_CLEAR_COLOR0
|
285 PIPE_CLEAR_STENCIL
));
286 /* Any coords emitted must also have been consumed by
289 assert(!coords_emitted
);
293 if (vc4
->resolve
& PIPE_CLEAR_COLOR0
)
296 if (vc4
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))
301 vc4_flush(struct pipe_context
*pctx
)
303 struct vc4_context
*vc4
= vc4_context(pctx
);
305 if (!vc4
->needs_flush
)
308 /* The RCL setup would choke if the draw bounds cause no drawing, so
309 * just drop the drawing if that's the case.
311 if (vc4
->draw_max_x
<= vc4
->draw_min_x
||
312 vc4
->draw_max_y
<= vc4
->draw_min_y
) {
317 /* Increment the semaphore indicating that binning is done and
318 * unblocking the render thread. Note that this doesn't act until the
321 cl_ensure_space(&vc4
->bcl
, 8);
322 cl_u8(&vc4
->bcl
, VC4_PACKET_INCREMENT_SEMAPHORE
);
323 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
324 cl_u8(&vc4
->bcl
, VC4_PACKET_FLUSH
);
332 vc4_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
335 struct vc4_context
*vc4
= vc4_context(pctx
);
340 struct vc4_fence
*f
= vc4_fence_create(vc4
->screen
,
341 vc4
->last_emit_seqno
);
342 *fence
= (struct pipe_fence_handle
*)f
;
347 * Flushes the current command lists if they reference the given BO.
349 * This helps avoid flushing the command buffers when unnecessary.
352 vc4_cl_references_bo(struct pipe_context
*pctx
, struct vc4_bo
*bo
)
354 struct vc4_context
*vc4
= vc4_context(pctx
);
356 if (!vc4
->needs_flush
)
359 /* Walk all the referenced BOs in the drawing command list to see if
362 struct vc4_bo
**referenced_bos
= vc4
->bo_pointers
.base
;
363 for (int i
= 0; i
< (vc4
->bo_handles
.next
-
364 vc4
->bo_handles
.base
) / 4; i
++) {
365 if (referenced_bos
[i
] == bo
) {
370 /* Also check for the Z/color buffers, since the references to those
371 * are only added immediately before submit.
373 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
375 struct vc4_resource
*ctex
= vc4_resource(csurf
->base
.texture
);
376 if (ctex
->bo
== bo
) {
381 struct vc4_surface
*zsurf
= vc4_surface(vc4
->framebuffer
.zsbuf
);
383 struct vc4_resource
*ztex
=
384 vc4_resource(zsurf
->base
.texture
);
385 if (ztex
->bo
== bo
) {
394 vc4_invalidate_resource(struct pipe_context
*pctx
, struct pipe_resource
*prsc
)
396 struct vc4_context
*vc4
= vc4_context(pctx
);
397 struct pipe_surface
*zsurf
= vc4
->framebuffer
.zsbuf
;
399 if (zsurf
&& zsurf
->texture
== prsc
)
400 vc4
->resolve
&= ~(PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
);
404 vc4_context_destroy(struct pipe_context
*pctx
)
406 struct vc4_context
*vc4
= vc4_context(pctx
);
409 util_blitter_destroy(vc4
->blitter
);
411 if (vc4
->primconvert
)
412 util_primconvert_destroy(vc4
->primconvert
);
415 u_upload_destroy(vc4
->uploader
);
417 util_slab_destroy(&vc4
->transfer_pool
);
419 pipe_surface_reference(&vc4
->framebuffer
.cbufs
[0], NULL
);
420 pipe_surface_reference(&vc4
->framebuffer
.zsbuf
, NULL
);
421 vc4_bo_unreference(&vc4
->tile_alloc
);
422 vc4_bo_unreference(&vc4
->tile_state
);
424 vc4_program_fini(pctx
);
429 struct pipe_context
*
430 vc4_context_create(struct pipe_screen
*pscreen
, void *priv
)
432 struct vc4_screen
*screen
= vc4_screen(pscreen
);
433 struct vc4_context
*vc4
;
435 /* Prevent dumping of the shaders built during context setup. */
436 uint32_t saved_shaderdb_flag
= vc4_debug
& VC4_DEBUG_SHADERDB
;
437 vc4_debug
&= ~VC4_DEBUG_SHADERDB
;
439 vc4
= rzalloc(NULL
, struct vc4_context
);
442 struct pipe_context
*pctx
= &vc4
->base
;
444 vc4
->screen
= screen
;
446 pctx
->screen
= pscreen
;
448 pctx
->destroy
= vc4_context_destroy
;
449 pctx
->flush
= vc4_pipe_flush
;
450 pctx
->invalidate_resource
= vc4_invalidate_resource
;
453 vc4_state_init(pctx
);
454 vc4_program_init(pctx
);
455 vc4_query_init(pctx
);
456 vc4_resource_context_init(pctx
);
460 vc4
->fd
= screen
->fd
;
462 util_slab_create(&vc4
->transfer_pool
, sizeof(struct vc4_transfer
),
463 16, UTIL_SLAB_SINGLETHREADED
);
464 vc4
->blitter
= util_blitter_create(pctx
);
468 vc4
->primconvert
= util_primconvert_create(pctx
,
469 (1 << PIPE_PRIM_QUADS
) - 1);
470 if (!vc4
->primconvert
)
473 vc4
->uploader
= u_upload_create(pctx
, 16 * 1024, 4,
474 PIPE_BIND_INDEX_BUFFER
);
476 vc4_debug
|= saved_shaderdb_flag
;