2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/u_format.h"
26 #include "util/u_pack_color.h"
27 #include "indices/u_primconvert.h"
29 #include "vc4_context.h"
30 #include "vc4_resource.h"
33 * Does the initial bining command list setup for drawing to a given FBO.
36 vc4_start_draw(struct vc4_context
*vc4
)
41 uint32_t width
= vc4
->framebuffer
.width
;
42 uint32_t height
= vc4
->framebuffer
.height
;
43 uint32_t tilew
= align(width
, 64) / 64;
44 uint32_t tileh
= align(height
, 64) / 64;
46 /* Tile alloc memory setup: We use an initial alloc size of 32b. The
47 * hardware then aligns that to 256b (we use 4096, because all of our
48 * BO allocations align to that anyway), then for some reason the
49 * simulator wants an extra page available, even if you have overflow
52 * XXX: The binner only does 28-bit addressing math, so the tile alloc
53 * and tile state should be in the same BO and that BO needs to not
54 * cross a 256MB boundary, somehow.
56 uint32_t tile_alloc_size
= 32 * tilew
* tileh
;
57 tile_alloc_size
= align(tile_alloc_size
, 4096);
58 tile_alloc_size
+= 4096;
59 uint32_t tile_state_size
= 48 * tilew
* tileh
;
60 if (!vc4
->tile_alloc
|| vc4
->tile_alloc
->size
< tile_alloc_size
) {
61 vc4_bo_unreference(&vc4
->tile_alloc
);
62 vc4
->tile_alloc
= vc4_bo_alloc(vc4
->screen
, tile_alloc_size
,
65 if (!vc4
->tile_state
|| vc4
->tile_state
->size
< tile_state_size
) {
66 vc4_bo_unreference(&vc4
->tile_state
);
67 vc4
->tile_state
= vc4_bo_alloc(vc4
->screen
, tile_state_size
,
71 // Tile state data is 48 bytes per tile, I think it can be thrown away
72 // as soon as binning is finished.
73 cl_start_reloc(&vc4
->bcl
, 2);
74 cl_u8(&vc4
->bcl
, VC4_PACKET_TILE_BINNING_MODE_CONFIG
);
75 cl_reloc(vc4
, &vc4
->bcl
, vc4
->tile_alloc
, 0);
76 cl_u32(&vc4
->bcl
, vc4
->tile_alloc
->size
);
77 cl_reloc(vc4
, &vc4
->bcl
, vc4
->tile_state
, 0);
78 cl_u8(&vc4
->bcl
, tilew
);
79 cl_u8(&vc4
->bcl
, tileh
);
81 VC4_BIN_CONFIG_AUTO_INIT_TSDA
|
82 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32
|
83 VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32
);
85 /* START_TILE_BINNING resets the statechange counters in the hardware,
86 * which are what is used when a primitive is binned to a tile to
87 * figure out what new state packets need to be written to that tile's
90 cl_u8(&vc4
->bcl
, VC4_PACKET_START_TILE_BINNING
);
92 /* Reset the current compressed primitives format. This gets modified
93 * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
94 * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
97 cl_u8(&vc4
->bcl
, VC4_PACKET_PRIMITIVE_LIST_FORMAT
);
98 cl_u8(&vc4
->bcl
, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX
|
99 VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES
));
101 vc4
->needs_flush
= true;
102 vc4
->draw_call_queued
= true;
106 vc4_draw_vbo(struct pipe_context
*pctx
, const struct pipe_draw_info
*info
)
108 struct vc4_context
*vc4
= vc4_context(pctx
);
110 if (info
->mode
>= PIPE_PRIM_QUADS
) {
111 util_primconvert_save_index_buffer(vc4
->primconvert
, &vc4
->indexbuf
);
112 util_primconvert_save_rasterizer_state(vc4
->primconvert
, &vc4
->rasterizer
->base
);
113 util_primconvert_draw_vbo(vc4
->primconvert
, info
);
117 struct vc4_vertex_stateobj
*vtx
= vc4
->vtx
;
118 struct vc4_vertexbuf_stateobj
*vertexbuf
= &vc4
->vertexbuf
;
120 if (vc4
->prim_mode
!= info
->mode
) {
121 vc4
->prim_mode
= info
->mode
;
122 vc4
->dirty
|= VC4_DIRTY_PRIM_MODE
;
126 vc4_update_compiled_shaders(vc4
, info
->mode
);
128 vc4_emit_state(pctx
);
131 vc4_write_uniforms(vc4
, vc4
->prog
.fs
,
132 &vc4
->constbuf
[PIPE_SHADER_FRAGMENT
],
134 vc4_write_uniforms(vc4
, vc4
->prog
.vs
,
135 &vc4
->constbuf
[PIPE_SHADER_VERTEX
],
137 vc4_write_uniforms(vc4
, vc4
->prog
.cs
,
138 &vc4
->constbuf
[PIPE_SHADER_VERTEX
],
141 /* The simulator throws a fit if VS or CS don't read an attribute, so
142 * we emit a dummy read.
144 uint32_t num_elements_emit
= MAX2(vtx
->num_elements
, 1);
145 /* Emit the shader record. */
146 cl_start_shader_reloc(&vc4
->shader_rec
, 3 + num_elements_emit
);
147 cl_u16(&vc4
->shader_rec
,
148 VC4_SHADER_FLAG_ENABLE_CLIPPING
|
149 ((info
->mode
== PIPE_PRIM_POINTS
&&
150 vc4
->rasterizer
->base
.point_size_per_vertex
) ?
151 VC4_SHADER_FLAG_VS_POINT_SIZE
: 0));
152 cl_u8(&vc4
->shader_rec
, 0); /* fs num uniforms (unused) */
153 cl_u8(&vc4
->shader_rec
, vc4
->prog
.fs
->num_inputs
);
154 cl_reloc(vc4
, &vc4
->shader_rec
, vc4
->prog
.fs
->bo
, 0);
155 cl_u32(&vc4
->shader_rec
, 0); /* UBO offset written by kernel */
157 cl_u16(&vc4
->shader_rec
, 0); /* vs num uniforms */
158 cl_u8(&vc4
->shader_rec
, (1 << num_elements_emit
) - 1); /* vs attribute array bitfield */
159 cl_u8(&vc4
->shader_rec
, 16 * num_elements_emit
); /* vs total attribute size */
160 cl_reloc(vc4
, &vc4
->shader_rec
, vc4
->prog
.vs
->bo
, 0);
161 cl_u32(&vc4
->shader_rec
, 0); /* UBO offset written by kernel */
163 cl_u16(&vc4
->shader_rec
, 0); /* cs num uniforms */
164 cl_u8(&vc4
->shader_rec
, (1 << num_elements_emit
) - 1); /* cs attribute array bitfield */
165 cl_u8(&vc4
->shader_rec
, 16 * num_elements_emit
); /* cs total attribute size */
166 cl_reloc(vc4
, &vc4
->shader_rec
, vc4
->prog
.cs
->bo
, 0);
167 cl_u32(&vc4
->shader_rec
, 0); /* UBO offset written by kernel */
169 uint32_t max_index
= 0xffff;
170 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
171 struct pipe_vertex_element
*elem
= &vtx
->pipe
[i
];
172 struct pipe_vertex_buffer
*vb
=
173 &vertexbuf
->vb
[elem
->vertex_buffer_index
];
174 struct vc4_resource
*rsc
= vc4_resource(vb
->buffer
);
175 uint32_t offset
= vb
->buffer_offset
+ elem
->src_offset
;
176 uint32_t vb_size
= rsc
->bo
->size
- offset
;
178 util_format_get_blocksize(elem
->src_format
);
180 cl_reloc(vc4
, &vc4
->shader_rec
, rsc
->bo
, offset
);
181 cl_u8(&vc4
->shader_rec
, elem_size
- 1);
182 cl_u8(&vc4
->shader_rec
, vb
->stride
);
183 cl_u8(&vc4
->shader_rec
, i
* 16); /* VS VPM offset */
184 cl_u8(&vc4
->shader_rec
, i
* 16); /* CS VPM offset */
186 if (vb
->stride
> 0) {
187 max_index
= MIN2(max_index
,
188 (vb_size
- elem_size
) / vb
->stride
);
192 if (vtx
->num_elements
== 0) {
193 assert(num_elements_emit
== 1);
194 struct vc4_bo
*bo
= vc4_bo_alloc(vc4
->screen
, 4096, "scratch VBO");
195 cl_reloc(vc4
, &vc4
->shader_rec
, bo
, 0);
196 cl_u8(&vc4
->shader_rec
, 16 - 1); /* element size */
197 cl_u8(&vc4
->shader_rec
, 0); /* stride */
198 cl_u8(&vc4
->shader_rec
, 0); /* VS VPM offset */
199 cl_u8(&vc4
->shader_rec
, 0); /* CS VPM offset */
200 vc4_bo_unreference(&bo
);
203 /* the actual draw call. */
204 cl_u8(&vc4
->bcl
, VC4_PACKET_GL_SHADER_STATE
);
205 assert(vtx
->num_elements
<= 8);
206 /* Note that number of attributes == 0 in the packet means 8
207 * attributes. This field also contains the offset into shader_rec.
209 cl_u32(&vc4
->bcl
, num_elements_emit
& 0x7);
211 /* Note that the primitive type fields match with OpenGL/gallium
212 * definitions, up to but not including QUADS.
215 struct vc4_resource
*rsc
= vc4_resource(vc4
->indexbuf
.buffer
);
216 uint32_t offset
= vc4
->indexbuf
.offset
;
217 uint32_t index_size
= vc4
->indexbuf
.index_size
;
218 if (rsc
->shadow_parent
) {
219 vc4_update_shadow_index_buffer(pctx
, &vc4
->indexbuf
);
223 cl_start_reloc(&vc4
->bcl
, 1);
224 cl_u8(&vc4
->bcl
, VC4_PACKET_GL_INDEXED_PRIMITIVE
);
228 VC4_INDEX_BUFFER_U16
:
229 VC4_INDEX_BUFFER_U8
));
230 cl_u32(&vc4
->bcl
, info
->count
);
231 cl_reloc(vc4
, &vc4
->bcl
, rsc
->bo
, offset
);
232 cl_u32(&vc4
->bcl
, max_index
);
234 cl_u8(&vc4
->bcl
, VC4_PACKET_GL_ARRAY_PRIMITIVE
);
235 cl_u8(&vc4
->bcl
, info
->mode
);
236 cl_u32(&vc4
->bcl
, info
->count
);
237 cl_u32(&vc4
->bcl
, info
->start
);
240 if (vc4
->zsa
&& vc4
->zsa
->base
.depth
.enabled
) {
241 vc4
->resolve
|= PIPE_CLEAR_DEPTH
;
243 if (vc4
->zsa
&& vc4
->zsa
->base
.stencil
[0].enabled
)
244 vc4
->resolve
|= PIPE_CLEAR_STENCIL
;
245 vc4
->resolve
|= PIPE_CLEAR_COLOR0
;
247 vc4
->shader_rec_count
++;
249 if (vc4_debug
& VC4_DEBUG_ALWAYS_FLUSH
)
254 pack_rgba(enum pipe_format format
, const float *rgba
)
257 util_pack_color(rgba
, format
, &uc
);
262 vc4_clear(struct pipe_context
*pctx
, unsigned buffers
,
263 const union pipe_color_union
*color
, double depth
, unsigned stencil
)
265 struct vc4_context
*vc4
= vc4_context(pctx
);
267 /* We can't flag new buffers for clearing once we've queued draws. We
268 * could avoid this by using the 3d engine to clear.
270 if (vc4
->draw_call_queued
)
273 if (buffers
& PIPE_CLEAR_COLOR0
) {
274 vc4
->clear_color
[0] = vc4
->clear_color
[1] =
275 pack_rgba(vc4
->framebuffer
.cbufs
[0]->format
,
279 if (buffers
& PIPE_CLEAR_DEPTH
) {
280 /* Though the depth buffer is stored with Z in the high 24,
281 * for this field we just need to store it in the low 24.
283 vc4
->clear_depth
= util_pack_z(PIPE_FORMAT_Z24X8_UNORM
, depth
);
286 if (buffers
& PIPE_CLEAR_STENCIL
)
287 vc4
->clear_stencil
= stencil
;
289 vc4
->cleared
|= buffers
;
290 vc4
->resolve
|= buffers
;
296 vc4_clear_render_target(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
297 const union pipe_color_union
*color
,
298 unsigned x
, unsigned y
, unsigned w
, unsigned h
)
300 fprintf(stderr
, "unimpl: clear RT\n");
304 vc4_clear_depth_stencil(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
305 unsigned buffers
, double depth
, unsigned stencil
,
306 unsigned x
, unsigned y
, unsigned w
, unsigned h
)
308 fprintf(stderr
, "unimpl: clear DS\n");
312 vc4_draw_init(struct pipe_context
*pctx
)
314 pctx
->draw_vbo
= vc4_draw_vbo
;
315 pctx
->clear
= vc4_clear
;
316 pctx
->clear_render_target
= vc4_clear_render_target
;
317 pctx
->clear_depth_stencil
= vc4_clear_depth_stencil
;