vc4: Crank up the tile allocation BO size
[mesa.git] / src / gallium / drivers / vc4 / vc4_draw.c
1 /*
2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26
27 #include "util/u_format.h"
28 #include "indices/u_primconvert.h"
29
30 #include "vc4_context.h"
31 #include "vc4_resource.h"
32
33 static void
34 vc4_rcl_tile_calls(struct vc4_context *vc4,
35 struct vc4_surface *csurf,
36 uint32_t xtiles, uint32_t ytiles)
37 {
38 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
39
40 for (int x = 0; x < xtiles; x++) {
41 for (int y = 0; y < ytiles; y++) {
42 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
43 cl_u8(&vc4->rcl, x);
44 cl_u8(&vc4->rcl, y);
45
46 cl_start_reloc(&vc4->rcl, 1);
47 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
48 cl_u8(&vc4->rcl,
49 VC4_LOADSTORE_TILE_BUFFER_COLOR |
50 VC4_LOADSTORE_TILE_BUFFER_FORMAT_RASTER);
51 cl_u8(&vc4->rcl,
52 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
53 cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset);
54
55 cl_start_reloc(&vc4->rcl, 1);
56 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
57 cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
58 (y * xtiles + x) * 32);
59
60 if (x == xtiles - 1 && y == ytiles - 1) {
61 cl_u8(&vc4->rcl,
62 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
63 } else {
64 cl_u8(&vc4->rcl,
65 VC4_PACKET_STORE_MS_TILE_BUFFER);
66 }
67 }
68 }
69 }
70
71 static void
72 vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
73 {
74 struct vc4_context *vc4 = vc4_context(pctx);
75
76 if (info->mode >= PIPE_PRIM_QUADS) {
77 util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
78 util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
79 util_primconvert_draw_vbo(vc4->primconvert, info);
80 return;
81 }
82
83 uint32_t width = vc4->framebuffer.width;
84 uint32_t height = vc4->framebuffer.height;
85 uint32_t tilew = align(width, 64) / 64;
86 uint32_t tileh = align(height, 64) / 64;
87
88 uint32_t tile_alloc_size = 32 * tilew * tileh * 16;
89 uint32_t tile_state_size = 48 * tilew * tileh;
90 if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
91 vc4_bo_unreference(&vc4->tile_alloc);
92 vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size,
93 "tile_alloc");
94 }
95 if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) {
96 vc4_bo_unreference(&vc4->tile_state);
97 vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size,
98 "tile_state");
99 }
100
101 vc4_update_compiled_shaders(vc4);
102
103 vc4->needs_flush = true;
104
105 // Tile state data is 48 bytes per tile, I think it can be thrown away
106 // as soon as binning is finished.
107 cl_start_reloc(&vc4->bcl, 2);
108 cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
109 cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0);
110 cl_u32(&vc4->bcl, vc4->tile_alloc->size);
111 cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
112 cl_u8(&vc4->bcl, tilew);
113 cl_u8(&vc4->bcl, tileh);
114 cl_u8(&vc4->bcl, VC4_BIN_CONFIG_AUTO_INIT_TSDA);
115
116 cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
117
118 cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
119 cl_u8(&vc4->bcl, 0x12); // 16 bit triangle
120
121 vc4_emit_state(pctx);
122
123 /* the actual draw call. */
124 struct vc4_vertex_stateobj *vtx = vc4->vtx;
125 struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
126 cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
127 assert(vtx->num_elements <= 8);
128 #ifndef USE_VC4_SIMULATOR
129 /* Note that number of attributes == 0 in the packet means 8
130 * attributes. This field also contains the offset into shader_rec.
131 */
132 cl_u32(&vc4->bcl, vtx->num_elements & 0x7);
133 #else
134 cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next) |
135 (vtx->num_elements & 0x7));
136 #endif
137
138 /* Note that the primitive type fields match with OpenGL/gallium
139 * definitions, up to but not including QUADS.
140 */
141 if (info->indexed) {
142 struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
143
144 assert(vc4->indexbuf.index_size == 1 ||
145 vc4->indexbuf.index_size == 2);
146
147 cl_start_reloc(&vc4->bcl, 1);
148 cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
149 cl_u8(&vc4->bcl,
150 info->mode |
151 (vc4->indexbuf.index_size == 2 ?
152 VC4_INDEX_BUFFER_U16:
153 VC4_INDEX_BUFFER_U8));
154 cl_u32(&vc4->bcl, info->count);
155 cl_reloc(vc4, &vc4->bcl, rsc->bo, vc4->indexbuf.offset);
156 cl_u32(&vc4->bcl, info->max_index);
157 } else {
158 cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
159 cl_u8(&vc4->bcl, info->mode);
160 cl_u32(&vc4->bcl, info->count);
161 cl_u32(&vc4->bcl, info->start);
162 }
163
164 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH_ALL);
165 cl_u8(&vc4->bcl, VC4_PACKET_NOP);
166 cl_u8(&vc4->bcl, VC4_PACKET_HALT);
167
168 // Shader Record
169
170 struct vc4_bo *fs_ubo, *vs_ubo, *cs_ubo;
171 uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
172 vc4_get_uniform_bo(vc4, vc4->prog.fs,
173 &vc4->constbuf[PIPE_SHADER_FRAGMENT],
174 0, &fs_ubo, &fs_ubo_offset);
175 vc4_get_uniform_bo(vc4, vc4->prog.vs,
176 &vc4->constbuf[PIPE_SHADER_VERTEX],
177 0, &vs_ubo, &vs_ubo_offset);
178 vc4_get_uniform_bo(vc4, vc4->prog.vs,
179 &vc4->constbuf[PIPE_SHADER_VERTEX],
180 1, &cs_ubo, &cs_ubo_offset);
181
182 cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
183 cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING);
184 cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
185 cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
186 cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
187 cl_reloc(vc4, &vc4->shader_rec, fs_ubo, fs_ubo_offset);
188
189 cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
190 cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* vs attribute array bitfield */
191 cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
192 cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
193 cl_reloc(vc4, &vc4->shader_rec, vs_ubo, vs_ubo_offset);
194
195 cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
196 cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* cs attribute array bitfield */
197 cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
198 cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo,
199 vc4->prog.vs->coord_shader_offset);
200 cl_reloc(vc4, &vc4->shader_rec, cs_ubo, cs_ubo_offset);
201
202 for (int i = 0; i < vtx->num_elements; i++) {
203 struct pipe_vertex_element *elem = &vtx->pipe[i];
204 struct pipe_vertex_buffer *vb =
205 &vertexbuf->vb[elem->vertex_buffer_index];
206 struct vc4_resource *rsc = vc4_resource(vb->buffer);
207
208 if (elem->src_format != PIPE_FORMAT_R32G32B32A32_FLOAT) {
209 fprintf(stderr, "Unsupported attribute format %s\n",
210 util_format_name(elem->src_format));
211 }
212
213 cl_reloc(vc4, &vc4->shader_rec, rsc->bo,
214 vb->buffer_offset + elem->src_offset);
215 cl_u8(&vc4->shader_rec,
216 util_format_get_blocksize(elem->src_format) - 1);
217 cl_u8(&vc4->shader_rec, vb->stride);
218 cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */
219 cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */
220 }
221
222
223 vc4->shader_rec_count++;
224
225 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
226 cl_u32(&vc4->rcl, 0xff000000); // Opaque Black
227 cl_u32(&vc4->rcl, 0xff000000); // 32 bit clear colours need to be repeated twice
228 cl_u32(&vc4->rcl, 0);
229 cl_u8(&vc4->rcl, 0);
230
231 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
232 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
233
234 cl_start_reloc(&vc4->rcl, 1);
235 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
236 cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset);
237 cl_u16(&vc4->rcl, width);
238 cl_u16(&vc4->rcl, height);
239 cl_u8(&vc4->rcl, (VC4_RENDER_CONFIG_MEMORY_FORMAT_LINEAR |
240 VC4_RENDER_CONFIG_FORMAT_RGBA8888));
241 cl_u8(&vc4->rcl, 0);
242
243 // Do a store of the first tile to force the tile buffer to be cleared
244 /* XXX: I think these two packets may be unnecessary. */
245 if (0) {
246 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
247 cl_u8(&vc4->rcl, 0);
248 cl_u8(&vc4->rcl, 0);
249
250 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
251 cl_u16(&vc4->rcl, 0); // Store nothing (just clear)
252 cl_u32(&vc4->rcl, 0); // no address is needed
253 }
254
255 vc4_rcl_tile_calls(vc4, csurf, tilew, tileh);
256
257 vc4_flush(pctx);
258 }
259
260 static void
261 vc4_clear(struct pipe_context *pctx, unsigned buffers,
262 const union pipe_color_union *color, double depth, unsigned stencil)
263 {
264 struct vc4_context *vc4 = vc4_context(pctx);
265
266 vc4->needs_flush = true;
267 }
268
269 static void
270 vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
271 const union pipe_color_union *color,
272 unsigned x, unsigned y, unsigned w, unsigned h)
273 {
274 fprintf(stderr, "unimpl: clear RT\n");
275 }
276
277 static void
278 vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
279 unsigned buffers, double depth, unsigned stencil,
280 unsigned x, unsigned y, unsigned w, unsigned h)
281 {
282 fprintf(stderr, "unimpl: clear DS\n");
283 }
284
285 void
286 vc4_draw_init(struct pipe_context *pctx)
287 {
288 pctx->draw_vbo = vc4_draw_vbo;
289 pctx->clear = vc4_clear;
290 pctx->clear_render_target = vc4_clear_render_target;
291 pctx->clear_depth_stencil = vc4_clear_depth_stencil;
292 }