34a100d2ab079d91b526cc826c898a658c9ba184
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "indices/u_primconvert.h"
33 #include "pipe/p_screen.h"
34
35 #include "vc4_screen.h"
36 #include "vc4_context.h"
37 #include "vc4_resource.h"
38
39 static void
40 vc4_setup_rcl(struct vc4_context *vc4)
41 {
42 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
43 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
44 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
45 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
46 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
47 uint32_t width = vc4->framebuffer.width;
48 uint32_t height = vc4->framebuffer.height;
49 uint32_t xtiles = align(width, 64) / 64;
50 uint32_t ytiles = align(height, 64) / 64;
51
52 #if 0
53 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
54 vc4->resolve,
55 vc4->cleared,
56 resolve_uncleared);
57 #endif
58
59 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
60 cl_u32(&vc4->rcl, vc4->clear_color[0]);
61 cl_u32(&vc4->rcl, vc4->clear_color[1]);
62 cl_u32(&vc4->rcl, vc4->clear_depth);
63 cl_u8(&vc4->rcl, vc4->clear_stencil);
64
65 /* The rendering mode config determines the pointer that's used for
66 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
67 * could handle a no-relocation rendering mode config and deny those
68 * packets, but instead we just tell the kernel we're doing our color
69 * rendering to the Z buffer, and just don't emit any of those
70 * packets.
71 */
72 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
73 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
74
75 cl_start_reloc(&vc4->rcl, 1);
76 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
77 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
78 cl_u16(&vc4->rcl, width);
79 cl_u16(&vc4->rcl, height);
80 cl_u16(&vc4->rcl, ((render_surf->tiling <<
81 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
82 (vc4_rt_format_is_565(render_surf->base.format) ?
83 VC4_RENDER_CONFIG_FORMAT_BGR565 :
84 VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
85 VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
86
87 /* The tile buffer normally gets cleared when the previous tile is
88 * stored. If the clear values changed between frames, then the tile
89 * buffer has stale clear values in it, so we have to do a store in
90 * None mode (no writes) so that we trigger the tile buffer clear.
91 *
92 * Excess clearing is only a performance cost, since per-tile contents
93 * will be loaded/stored in the loop below.
94 */
95 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
96 PIPE_CLEAR_DEPTH |
97 PIPE_CLEAR_STENCIL)) {
98 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
99 cl_u8(&vc4->rcl, 0);
100 cl_u8(&vc4->rcl, 0);
101
102 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
103 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
104 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
105 }
106
107 for (int y = 0; y < ytiles; y++) {
108 for (int x = 0; x < xtiles; x++) {
109 bool end_of_frame = (x == xtiles - 1 &&
110 y == ytiles - 1);
111 bool coords_emitted = false;
112
113 /* Note that the load doesn't actually occur until the
114 * tile coords packet is processed.
115 */
116 if (csurf && (resolve_uncleared & PIPE_CLEAR_COLOR)) {
117 cl_start_reloc(&vc4->rcl, 1);
118 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
119 cl_u8(&vc4->rcl,
120 VC4_LOADSTORE_TILE_BUFFER_COLOR |
121 (csurf->tiling <<
122 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
123 cl_u8(&vc4->rcl,
124 vc4_rt_format_is_565(csurf->base.format) ?
125 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
126 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
127 cl_reloc(vc4, &vc4->rcl, ctex->bo,
128 csurf->offset);
129
130 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
131 cl_u8(&vc4->rcl, x);
132 cl_u8(&vc4->rcl, y);
133 coords_emitted = true;
134 }
135
136 if (zsurf && (resolve_uncleared & (PIPE_CLEAR_DEPTH |
137 PIPE_CLEAR_STENCIL))) {
138 cl_start_reloc(&vc4->rcl, 1);
139 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
140 cl_u8(&vc4->rcl,
141 VC4_LOADSTORE_TILE_BUFFER_ZS |
142 (zsurf->tiling <<
143 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
144 cl_u8(&vc4->rcl, 0);
145 cl_reloc(vc4, &vc4->rcl, ztex->bo,
146 zsurf->offset);
147
148 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
149 cl_u8(&vc4->rcl, x);
150 cl_u8(&vc4->rcl, y);
151 coords_emitted = true;
152 }
153
154 /* Clipping depends on tile coordinates having been
155 * emitted, so make sure it's happened even if
156 * everything was cleared to start.
157 */
158 if (!coords_emitted) {
159 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
160 cl_u8(&vc4->rcl, x);
161 cl_u8(&vc4->rcl, y);
162 }
163
164 cl_start_reloc(&vc4->rcl, 1);
165 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
166 cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
167 (y * xtiles + x) * 32);
168
169 if (zsurf && (vc4->resolve & (PIPE_CLEAR_DEPTH |
170 PIPE_CLEAR_STENCIL))) {
171 cl_start_reloc(&vc4->rcl, 1);
172 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
173 cl_u8(&vc4->rcl,
174 VC4_LOADSTORE_TILE_BUFFER_Z |
175 (zsurf->tiling <<
176 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
177 cl_u8(&vc4->rcl,
178 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
179 cl_reloc(vc4, &vc4->rcl, ztex->bo,
180 zsurf->offset |
181 ((end_of_frame &&
182 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
183 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
184 }
185
186 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
187 if (end_of_frame) {
188 cl_u8(&vc4->rcl,
189 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
190 } else {
191 cl_u8(&vc4->rcl,
192 VC4_PACKET_STORE_MS_TILE_BUFFER);
193 }
194 }
195
196 /* One of the bits needs to have been set that would
197 * have triggered an EOFq
198 */
199 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
200 PIPE_CLEAR_DEPTH |
201 PIPE_CLEAR_STENCIL));
202 }
203 }
204 }
205
206 void
207 vc4_flush(struct pipe_context *pctx)
208 {
209 struct vc4_context *vc4 = vc4_context(pctx);
210
211 if (!vc4->needs_flush)
212 return;
213
214 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH_ALL);
215 cl_u8(&vc4->bcl, VC4_PACKET_NOP);
216 cl_u8(&vc4->bcl, VC4_PACKET_HALT);
217
218 vc4_setup_rcl(vc4);
219
220 if (vc4_debug & VC4_DEBUG_CL) {
221 fprintf(stderr, "BCL:\n");
222 vc4_dump_cl(&vc4->bcl, false);
223 fprintf(stderr, "RCL:\n");
224 vc4_dump_cl(&vc4->rcl, true);
225 }
226
227 struct drm_vc4_submit_cl submit;
228 memset(&submit, 0, sizeof(submit));
229
230 submit.bo_handles = vc4->bo_handles.base;
231 submit.bo_handle_count = (vc4->bo_handles.next -
232 vc4->bo_handles.base) / 4;
233 submit.bin_cl = vc4->bcl.base;
234 submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
235 submit.render_cl = vc4->rcl.base;
236 submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
237 submit.shader_rec = vc4->shader_rec.base;
238 submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
239 submit.shader_rec_count = vc4->shader_rec_count;
240 submit.uniforms = vc4->uniforms.base;
241 submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
242
243 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
244 int ret;
245
246 #ifndef USE_VC4_SIMULATOR
247 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
248 #else
249 ret = vc4_simulator_flush(vc4, &submit);
250 #endif
251 if (ret) {
252 fprintf(stderr, "VC4 submit failed\n");
253 abort();
254 }
255 }
256
257 vc4_reset_cl(&vc4->bcl);
258 vc4_reset_cl(&vc4->rcl);
259 vc4_reset_cl(&vc4->shader_rec);
260 vc4_reset_cl(&vc4->uniforms);
261 vc4_reset_cl(&vc4->bo_handles);
262 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
263 for (int i = 0; i < submit.bo_handle_count; i++)
264 vc4_bo_unreference(&referenced_bos[i]);
265 vc4_reset_cl(&vc4->bo_pointers);
266 vc4->shader_rec_count = 0;
267
268 vc4->needs_flush = false;
269 vc4->draw_call_queued = false;
270 vc4->dirty = ~0;
271 vc4->resolve = 0;
272 vc4->cleared = 0;
273 }
274
275 static void
276 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
277 unsigned flags)
278 {
279 vc4_flush(pctx);
280 }
281
282 /**
283 * Flushes the current command lists if they reference the given BO.
284 *
285 * This helps avoid flushing the command buffers when unnecessary.
286 */
287 void
288 vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
289 {
290 struct vc4_context *vc4 = vc4_context(pctx);
291
292 if (!vc4->needs_flush)
293 return;
294
295 /* Walk all the referenced BOs in the drawing command list to see if
296 * they match.
297 */
298 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
299 for (int i = 0; i < (vc4->bo_handles.next -
300 vc4->bo_handles.base) / 4; i++) {
301 if (referenced_bos[i] == bo) {
302 vc4_flush(pctx);
303 return;
304 }
305 }
306
307 /* Also check for the Z/color buffers, since the references to those
308 * are only added immediately before submit.
309 */
310 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
311 if (csurf) {
312 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
313 if (ctex->bo == bo) {
314 vc4_flush(pctx);
315 return;
316 }
317 }
318
319 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
320 if (zsurf) {
321 struct vc4_resource *ztex =
322 vc4_resource(zsurf->base.texture);
323 if (ztex->bo == bo) {
324 vc4_flush(pctx);
325 return;
326 }
327 }
328 }
329
330 static void
331 vc4_context_destroy(struct pipe_context *pctx)
332 {
333 struct vc4_context *vc4 = vc4_context(pctx);
334
335 if (vc4->blitter)
336 util_blitter_destroy(vc4->blitter);
337
338 if (vc4->primconvert)
339 util_primconvert_destroy(vc4->primconvert);
340
341 util_slab_destroy(&vc4->transfer_pool);
342
343 ralloc_free(vc4);
344 }
345
346 struct pipe_context *
347 vc4_context_create(struct pipe_screen *pscreen, void *priv)
348 {
349 struct vc4_screen *screen = vc4_screen(pscreen);
350 struct vc4_context *vc4;
351
352 /* Prevent dumping of the shaders built during context setup. */
353 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
354 vc4_debug &= ~VC4_DEBUG_SHADERDB;
355
356 vc4 = rzalloc(NULL, struct vc4_context);
357 if (vc4 == NULL)
358 return NULL;
359 struct pipe_context *pctx = &vc4->base;
360
361 vc4->screen = screen;
362
363 pctx->screen = pscreen;
364 pctx->priv = priv;
365 pctx->destroy = vc4_context_destroy;
366 pctx->flush = vc4_pipe_flush;
367
368 vc4_draw_init(pctx);
369 vc4_state_init(pctx);
370 vc4_program_init(pctx);
371 vc4_query_init(pctx);
372 vc4_resource_context_init(pctx);
373
374 vc4_init_cl(vc4, &vc4->bcl);
375 vc4_init_cl(vc4, &vc4->rcl);
376 vc4_init_cl(vc4, &vc4->shader_rec);
377 vc4_init_cl(vc4, &vc4->bo_handles);
378
379 vc4->dirty = ~0;
380 vc4->fd = screen->fd;
381
382 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
383 16, UTIL_SLAB_SINGLETHREADED);
384 vc4->blitter = util_blitter_create(pctx);
385 if (!vc4->blitter)
386 goto fail;
387
388 vc4->primconvert = util_primconvert_create(pctx,
389 (1 << PIPE_PRIM_QUADS) - 1);
390 if (!vc4->primconvert)
391 goto fail;
392
393 vc4_debug |= saved_shaderdb_flag;
394
395 return &vc4->base;
396
397 fail:
398 pctx->destroy(pctx);
399 return NULL;
400 }