2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/u_prim.h"
26 #include "util/u_format.h"
27 #include "util/u_pack_color.h"
28 #include "util/u_upload_mgr.h"
29 #include "indices/u_primconvert.h"
31 #include "vc4_context.h"
32 #include "vc4_resource.h"
35 vc4_get_draw_cl_space(struct vc4_context
*vc4
)
37 /* Binner gets our packet state -- vc4_emit.c contents,
38 * and the primitive itself.
40 cl_ensure_space(&vc4
->bcl
, 256);
42 /* Nothing for rcl -- that's covered by vc4_context.c */
44 /* shader_rec gets up to 12 dwords of reloc handles plus a maximally
45 * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
48 cl_ensure_space(&vc4
->shader_rec
, 12 * sizeof(uint32_t) + 104 + 8 * 32);
50 /* Uniforms are covered by vc4_write_uniforms(). */
52 /* There could be up to 16 textures per stage, plus misc other
55 cl_ensure_space(&vc4
->bo_handles
, (2 * 16 + 20) * sizeof(uint32_t));
56 cl_ensure_space(&vc4
->bo_pointers
,
57 (2 * 16 + 20) * sizeof(struct vc4_bo
*));
61 * Does the initial bining command list setup for drawing to a given FBO.
64 vc4_start_draw(struct vc4_context
*vc4
)
69 vc4_get_draw_cl_space(vc4
);
71 uint32_t width
= vc4
->framebuffer
.width
;
72 uint32_t height
= vc4
->framebuffer
.height
;
73 uint32_t tilew
= align(width
, 64) / 64;
74 uint32_t tileh
= align(height
, 64) / 64;
75 struct vc4_cl_out
*bcl
= cl_start(&vc4
->bcl
);
77 // Tile state data is 48 bytes per tile, I think it can be thrown away
78 // as soon as binning is finished.
79 cl_u8(&bcl
, VC4_PACKET_TILE_BINNING_MODE_CONFIG
);
80 cl_u32(&bcl
, 0); /* tile alloc addr, filled by kernel */
81 cl_u32(&bcl
, 0); /* tile alloc size, filled by kernel */
82 cl_u32(&bcl
, 0); /* tile state addr, filled by kernel */
85 cl_u8(&bcl
, 0); /* flags, filled by kernel. */
87 /* START_TILE_BINNING resets the statechange counters in the hardware,
88 * which are what is used when a primitive is binned to a tile to
89 * figure out what new state packets need to be written to that tile's
92 cl_u8(&bcl
, VC4_PACKET_START_TILE_BINNING
);
94 /* Reset the current compressed primitives format. This gets modified
95 * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
96 * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
99 cl_u8(&bcl
, VC4_PACKET_PRIMITIVE_LIST_FORMAT
);
100 cl_u8(&bcl
, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX
|
101 VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES
));
103 vc4
->needs_flush
= true;
104 vc4
->draw_calls_queued
++;
105 vc4
->draw_width
= width
;
106 vc4
->draw_height
= height
;
108 cl_end(&vc4
->bcl
, bcl
);
112 vc4_update_shadow_textures(struct pipe_context
*pctx
,
113 struct vc4_texture_stateobj
*stage_tex
)
115 for (int i
= 0; i
< stage_tex
->num_textures
; i
++) {
116 struct pipe_sampler_view
*view
= stage_tex
->textures
[i
];
119 struct vc4_resource
*rsc
= vc4_resource(view
->texture
);
120 if (rsc
->shadow_parent
)
121 vc4_update_shadow_baselevel_texture(pctx
, view
);
126 vc4_emit_gl_shader_state(struct vc4_context
*vc4
, const struct pipe_draw_info
*info
)
128 /* VC4_DIRTY_VTXSTATE */
129 struct vc4_vertex_stateobj
*vtx
= vc4
->vtx
;
130 /* VC4_DIRTY_VTXBUF */
131 struct vc4_vertexbuf_stateobj
*vertexbuf
= &vc4
->vertexbuf
;
133 /* The simulator throws a fit if VS or CS don't read an attribute, so
134 * we emit a dummy read.
136 uint32_t num_elements_emit
= MAX2(vtx
->num_elements
, 1);
137 /* Emit the shader record. */
138 struct vc4_cl_out
*shader_rec
=
139 cl_start_shader_reloc(&vc4
->shader_rec
, 3 + num_elements_emit
);
140 /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
142 VC4_SHADER_FLAG_ENABLE_CLIPPING
|
143 VC4_SHADER_FLAG_FS_SINGLE_THREAD
|
144 ((info
->mode
== PIPE_PRIM_POINTS
&&
145 vc4
->rasterizer
->base
.point_size_per_vertex
) ?
146 VC4_SHADER_FLAG_VS_POINT_SIZE
: 0));
148 /* VC4_DIRTY_COMPILED_FS */
149 cl_u8(&shader_rec
, 0); /* fs num uniforms (unused) */
150 cl_u8(&shader_rec
, vc4
->prog
.fs
->num_inputs
);
151 cl_reloc(vc4
, &vc4
->shader_rec
, &shader_rec
, vc4
->prog
.fs
->bo
, 0);
152 cl_u32(&shader_rec
, 0); /* UBO offset written by kernel */
154 /* VC4_DIRTY_COMPILED_VS */
155 cl_u16(&shader_rec
, 0); /* vs num uniforms */
156 cl_u8(&shader_rec
, vc4
->prog
.vs
->vattrs_live
);
157 cl_u8(&shader_rec
, vc4
->prog
.vs
->vattr_offsets
[8]);
158 cl_reloc(vc4
, &vc4
->shader_rec
, &shader_rec
, vc4
->prog
.vs
->bo
, 0);
159 cl_u32(&shader_rec
, 0); /* UBO offset written by kernel */
161 /* VC4_DIRTY_COMPILED_CS */
162 cl_u16(&shader_rec
, 0); /* cs num uniforms */
163 cl_u8(&shader_rec
, vc4
->prog
.cs
->vattrs_live
);
164 cl_u8(&shader_rec
, vc4
->prog
.cs
->vattr_offsets
[8]);
165 cl_reloc(vc4
, &vc4
->shader_rec
, &shader_rec
, vc4
->prog
.cs
->bo
, 0);
166 cl_u32(&shader_rec
, 0); /* UBO offset written by kernel */
168 uint32_t max_index
= 0xffff;
169 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
170 struct pipe_vertex_element
*elem
= &vtx
->pipe
[i
];
171 struct pipe_vertex_buffer
*vb
=
172 &vertexbuf
->vb
[elem
->vertex_buffer_index
];
173 struct vc4_resource
*rsc
= vc4_resource(vb
->buffer
);
174 /* not vc4->dirty tracked: vc4->last_index_bias */
175 uint32_t offset
= (vb
->buffer_offset
+
177 vb
->stride
* info
->index_bias
);
178 uint32_t vb_size
= rsc
->bo
->size
- offset
;
180 util_format_get_blocksize(elem
->src_format
);
182 cl_reloc(vc4
, &vc4
->shader_rec
, &shader_rec
, rsc
->bo
, offset
);
183 cl_u8(&shader_rec
, elem_size
- 1);
184 cl_u8(&shader_rec
, vb
->stride
);
185 cl_u8(&shader_rec
, vc4
->prog
.vs
->vattr_offsets
[i
]);
186 cl_u8(&shader_rec
, vc4
->prog
.cs
->vattr_offsets
[i
]);
188 if (vb
->stride
> 0) {
189 max_index
= MIN2(max_index
,
190 (vb_size
- elem_size
) / vb
->stride
);
194 if (vtx
->num_elements
== 0) {
195 assert(num_elements_emit
== 1);
196 struct vc4_bo
*bo
= vc4_bo_alloc(vc4
->screen
, 4096, "scratch VBO");
197 cl_reloc(vc4
, &vc4
->shader_rec
, &shader_rec
, bo
, 0);
198 cl_u8(&shader_rec
, 16 - 1); /* element size */
199 cl_u8(&shader_rec
, 0); /* stride */
200 cl_u8(&shader_rec
, 0); /* VS VPM offset */
201 cl_u8(&shader_rec
, 0); /* CS VPM offset */
202 vc4_bo_unreference(&bo
);
204 cl_end(&vc4
->shader_rec
, shader_rec
);
206 struct vc4_cl_out
*bcl
= cl_start(&vc4
->bcl
);
207 /* the actual draw call. */
208 cl_u8(&bcl
, VC4_PACKET_GL_SHADER_STATE
);
209 assert(vtx
->num_elements
<= 8);
210 /* Note that number of attributes == 0 in the packet means 8
211 * attributes. This field also contains the offset into shader_rec.
213 cl_u32(&bcl
, num_elements_emit
& 0x7);
214 cl_end(&vc4
->bcl
, bcl
);
216 vc4_write_uniforms(vc4
, vc4
->prog
.fs
,
217 &vc4
->constbuf
[PIPE_SHADER_FRAGMENT
],
219 vc4_write_uniforms(vc4
, vc4
->prog
.vs
,
220 &vc4
->constbuf
[PIPE_SHADER_VERTEX
],
222 vc4_write_uniforms(vc4
, vc4
->prog
.cs
,
223 &vc4
->constbuf
[PIPE_SHADER_VERTEX
],
226 vc4
->last_index_bias
= info
->index_bias
;
227 vc4
->max_index
= max_index
;
231 * HW-2116 workaround: Flush the batch before triggering the hardware state
232 * counter wraparound behavior.
234 * State updates are tracked by a global counter which increments at the first
235 * state update after a draw or a START_BINNING. Tiles can then have their
236 * state updated at draw time with a set of cheap checks for whether the
237 * state's copy of the global counter matches the global counter the last time
238 * that state was written to the tile.
240 * The state counters are relatively small and wrap around quickly, so you
241 * could get false negatives for needing to update a particular state in the
242 * tile. To avoid this, the hardware attempts to write all of the state in
243 * the tile at wraparound time. This apparently is broken, so we just flush
244 * everything before that behavior is triggered. A batch flush is sufficient
245 * to get our current contents drawn and reset the counters to 0.
247 * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
248 * tiles with VC4_PACKET_RETURN_FROM_LIST.
251 vc4_hw_2116_workaround(struct pipe_context
*pctx
)
253 struct vc4_context
*vc4
= vc4_context(pctx
);
255 if (vc4
->draw_calls_queued
== 0x1ef0) {
256 perf_debug("Flushing batch due to HW-2116 workaround "
257 "(too many draw calls per scene\n");
263 vc4_draw_vbo(struct pipe_context
*pctx
, const struct pipe_draw_info
*info
)
265 struct vc4_context
*vc4
= vc4_context(pctx
);
267 if (info
->mode
>= PIPE_PRIM_QUADS
) {
268 util_primconvert_save_index_buffer(vc4
->primconvert
, &vc4
->indexbuf
);
269 util_primconvert_save_rasterizer_state(vc4
->primconvert
, &vc4
->rasterizer
->base
);
270 util_primconvert_draw_vbo(vc4
->primconvert
, info
);
271 perf_debug("Fallback conversion for %d %s vertices\n",
272 info
->count
, u_prim_name(info
->mode
));
276 /* Before setting up the draw, do any fixup blits necessary. */
277 vc4_update_shadow_textures(pctx
, &vc4
->verttex
);
278 vc4_update_shadow_textures(pctx
, &vc4
->fragtex
);
280 vc4_hw_2116_workaround(pctx
);
282 vc4_get_draw_cl_space(vc4
);
284 if (vc4
->prim_mode
!= info
->mode
) {
285 vc4
->prim_mode
= info
->mode
;
286 vc4
->dirty
|= VC4_DIRTY_PRIM_MODE
;
290 vc4_update_compiled_shaders(vc4
, info
->mode
);
292 vc4_emit_state(pctx
);
294 if ((vc4
->dirty
& (VC4_DIRTY_VTXBUF
|
296 VC4_DIRTY_PRIM_MODE
|
297 VC4_DIRTY_RASTERIZER
|
298 VC4_DIRTY_COMPILED_CS
|
299 VC4_DIRTY_COMPILED_VS
|
300 VC4_DIRTY_COMPILED_FS
|
301 vc4
->prog
.cs
->uniform_dirty_bits
|
302 vc4
->prog
.vs
->uniform_dirty_bits
|
303 vc4
->prog
.fs
->uniform_dirty_bits
)) ||
304 vc4
->last_index_bias
!= info
->index_bias
) {
305 vc4_emit_gl_shader_state(vc4
, info
);
310 /* Note that the primitive type fields match with OpenGL/gallium
311 * definitions, up to but not including QUADS.
313 struct vc4_cl_out
*bcl
= cl_start(&vc4
->bcl
);
315 uint32_t offset
= vc4
->indexbuf
.offset
;
316 uint32_t index_size
= vc4
->indexbuf
.index_size
;
317 struct pipe_resource
*prsc
;
318 if (vc4
->indexbuf
.index_size
== 4) {
319 prsc
= vc4_get_shadow_index_buffer(pctx
, &vc4
->indexbuf
,
320 info
->count
, &offset
);
323 if (vc4
->indexbuf
.user_buffer
) {
325 u_upload_data(vc4
->uploader
, 0,
326 info
->count
* index_size
,
327 vc4
->indexbuf
.user_buffer
,
330 prsc
= vc4
->indexbuf
.buffer
;
333 struct vc4_resource
*rsc
= vc4_resource(prsc
);
335 cl_start_reloc(&vc4
->bcl
, &bcl
, 1);
336 cl_u8(&bcl
, VC4_PACKET_GL_INDEXED_PRIMITIVE
);
340 VC4_INDEX_BUFFER_U16
:
341 VC4_INDEX_BUFFER_U8
));
342 cl_u32(&bcl
, info
->count
);
343 cl_reloc(vc4
, &vc4
->bcl
, &bcl
, rsc
->bo
, offset
);
344 cl_u32(&bcl
, vc4
->max_index
);
346 if (vc4
->indexbuf
.index_size
== 4 || vc4
->indexbuf
.user_buffer
)
347 pipe_resource_reference(&prsc
, NULL
);
349 cl_u8(&bcl
, VC4_PACKET_GL_ARRAY_PRIMITIVE
);
350 cl_u8(&bcl
, info
->mode
);
351 cl_u32(&bcl
, info
->count
);
352 cl_u32(&bcl
, info
->start
);
354 cl_end(&vc4
->bcl
, bcl
);
356 if (vc4
->zsa
&& vc4
->zsa
->base
.depth
.enabled
) {
357 vc4
->resolve
|= PIPE_CLEAR_DEPTH
;
359 if (vc4
->zsa
&& vc4
->zsa
->base
.stencil
[0].enabled
)
360 vc4
->resolve
|= PIPE_CLEAR_STENCIL
;
361 vc4
->resolve
|= PIPE_CLEAR_COLOR0
;
363 vc4
->shader_rec_count
++;
365 if (vc4_debug
& VC4_DEBUG_ALWAYS_FLUSH
)
370 pack_rgba(enum pipe_format format
, const float *rgba
)
373 util_pack_color(rgba
, format
, &uc
);
374 if (util_format_get_blocksize(format
) == 2)
381 vc4_clear(struct pipe_context
*pctx
, unsigned buffers
,
382 const union pipe_color_union
*color
, double depth
, unsigned stencil
)
384 struct vc4_context
*vc4
= vc4_context(pctx
);
386 /* We can't flag new buffers for clearing once we've queued draws. We
387 * could avoid this by using the 3d engine to clear.
389 if (vc4
->draw_calls_queued
) {
390 perf_debug("Flushing rendering to process new clear.\n");
394 if (buffers
& PIPE_CLEAR_COLOR0
) {
395 vc4
->clear_color
[0] = vc4
->clear_color
[1] =
396 pack_rgba(vc4
->framebuffer
.cbufs
[0]->format
,
400 if (buffers
& PIPE_CLEAR_DEPTH
) {
401 /* Though the depth buffer is stored with Z in the high 24,
402 * for this field we just need to store it in the low 24.
404 vc4
->clear_depth
= util_pack_z(PIPE_FORMAT_Z24X8_UNORM
, depth
);
407 if (buffers
& PIPE_CLEAR_STENCIL
)
408 vc4
->clear_stencil
= stencil
;
412 vc4
->draw_max_x
= vc4
->framebuffer
.width
;
413 vc4
->draw_max_y
= vc4
->framebuffer
.height
;
414 vc4
->cleared
|= buffers
;
415 vc4
->resolve
|= buffers
;
421 vc4_clear_render_target(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
422 const union pipe_color_union
*color
,
423 unsigned x
, unsigned y
, unsigned w
, unsigned h
)
425 fprintf(stderr
, "unimpl: clear RT\n");
429 vc4_clear_depth_stencil(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
430 unsigned buffers
, double depth
, unsigned stencil
,
431 unsigned x
, unsigned y
, unsigned w
, unsigned h
)
433 fprintf(stderr
, "unimpl: clear DS\n");
437 vc4_draw_init(struct pipe_context
*pctx
)
439 pctx
->draw_vbo
= vc4_draw_vbo
;
440 pctx
->clear
= vc4_clear
;
441 pctx
->clear_render_target
= vc4_clear_render_target
;
442 pctx
->clear_depth_stencil
= vc4_clear_depth_stencil
;