vc4: Just stream out fallback IB contents.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "util/u_upload_mgr.h"
33 #include "indices/u_primconvert.h"
34 #include "pipe/p_screen.h"
35
36 #include "vc4_screen.h"
37 #include "vc4_context.h"
38 #include "vc4_resource.h"
39
40 /**
41 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
42 *
43 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
44 * some sort before another load is triggered.
45 */
46 static void
47 vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
48 {
49 if (!*coords_emitted)
50 return;
51
52 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
53 cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
54 cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
55 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
56 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
57 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
58
59 *coords_emitted = false;
60 }
61
62 /**
63 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
64 *
65 * The tile coordinates packet triggers a pending load if there is one, are
66 * used for clipping during rendering, and determine where loads/stores happen
67 * relative to their base address.
68 */
69 static void
70 vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
71 bool *coords_emitted)
72 {
73 if (*coords_emitted)
74 return;
75
76 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
77 cl_u8(&vc4->rcl, x);
78 cl_u8(&vc4->rcl, y);
79
80 *coords_emitted = true;
81 }
82
83 static void
84 vc4_setup_rcl(struct vc4_context *vc4)
85 {
86 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
87 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
88 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
89 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
90
91 if (!csurf)
92 vc4->resolve &= ~PIPE_CLEAR_COLOR0;
93 if (!zsurf)
94 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
95 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
96 uint32_t width = vc4->framebuffer.width;
97 uint32_t height = vc4->framebuffer.height;
98 uint32_t stride_in_tiles = align(width, 64) / 64;
99
100 assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
101 uint32_t min_x_tile = vc4->draw_min_x / 64;
102 uint32_t min_y_tile = vc4->draw_min_y / 64;
103 uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
104 uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
105 uint32_t xtiles = max_x_tile - min_x_tile + 1;
106 uint32_t ytiles = max_y_tile - min_y_tile + 1;
107
108 #if 0
109 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
110 vc4->resolve,
111 vc4->cleared,
112 resolve_uncleared);
113 #endif
114
115 uint32_t reloc_size = 9;
116 uint32_t clear_size = 14;
117 uint32_t config_size = 11 + reloc_size;
118 uint32_t loadstore_size = 7 + reloc_size;
119 uint32_t tilecoords_size = 3;
120 uint32_t branch_size = 5 + reloc_size;
121 uint32_t color_store_size = 1;
122 uint32_t semaphore_size = 1;
123 cl_ensure_space(&vc4->rcl,
124 clear_size +
125 config_size +
126 loadstore_size +
127 semaphore_size +
128 xtiles * ytiles * (loadstore_size * 4 +
129 tilecoords_size * 3 +
130 branch_size +
131 color_store_size));
132
133 if (vc4->cleared) {
134 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
135 cl_u32(&vc4->rcl, vc4->clear_color[0]);
136 cl_u32(&vc4->rcl, vc4->clear_color[1]);
137 cl_u32(&vc4->rcl, vc4->clear_depth);
138 cl_u8(&vc4->rcl, vc4->clear_stencil);
139 }
140
141 /* The rendering mode config determines the pointer that's used for
142 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
143 * could handle a no-relocation rendering mode config and deny those
144 * packets, but instead we just tell the kernel we're doing our color
145 * rendering to the Z buffer, and just don't emit any of those
146 * packets.
147 */
148 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
149 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
150 cl_start_reloc(&vc4->rcl, 1);
151 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
152 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
153 cl_u16(&vc4->rcl, width);
154 cl_u16(&vc4->rcl, height);
155 cl_u16(&vc4->rcl, ((render_surf->tiling <<
156 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
157 (vc4_rt_format_is_565(render_surf->base.format) ?
158 VC4_RENDER_CONFIG_FORMAT_BGR565 :
159 VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
160
161 /* The tile buffer normally gets cleared when the previous tile is
162 * stored. If the clear values changed between frames, then the tile
163 * buffer has stale clear values in it, so we have to do a store in
164 * None mode (no writes) so that we trigger the tile buffer clear.
165 *
166 * Excess clearing is only a performance cost, since per-tile contents
167 * will be loaded/stored in the loop below.
168 */
169 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
170 PIPE_CLEAR_DEPTH |
171 PIPE_CLEAR_STENCIL)) {
172 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
173 cl_u8(&vc4->rcl, 0);
174 cl_u8(&vc4->rcl, 0);
175
176 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
177 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
178 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
179 }
180
181 uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
182 uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
183 uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
184
185 for (int y = min_y_tile; y <= max_y_tile; y++) {
186 for (int x = min_x_tile; x <= max_x_tile; x++) {
187 bool end_of_frame = (x == max_x_tile &&
188 y == max_y_tile);
189 bool coords_emitted = false;
190
191 /* Note that the load doesn't actually occur until the
192 * tile coords packet is processed, and only one load
193 * may be outstanding at a time.
194 */
195 if (resolve_uncleared & PIPE_CLEAR_COLOR) {
196 vc4_store_before_load(vc4, &coords_emitted);
197
198 cl_start_reloc(&vc4->rcl, 1);
199 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
200 cl_u8(&vc4->rcl,
201 VC4_LOADSTORE_TILE_BUFFER_COLOR |
202 (csurf->tiling <<
203 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
204 cl_u8(&vc4->rcl,
205 vc4_rt_format_is_565(csurf->base.format) ?
206 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
207 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
208 cl_reloc_hindex(&vc4->rcl, color_hindex,
209 csurf->offset);
210
211 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
212 }
213
214 if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
215 vc4_store_before_load(vc4, &coords_emitted);
216
217 cl_start_reloc(&vc4->rcl, 1);
218 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
219 cl_u8(&vc4->rcl,
220 VC4_LOADSTORE_TILE_BUFFER_ZS |
221 (zsurf->tiling <<
222 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
223 cl_u8(&vc4->rcl, 0);
224 cl_reloc_hindex(&vc4->rcl, depth_hindex,
225 zsurf->offset);
226
227 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
228 }
229
230 /* Clipping depends on tile coordinates having been
231 * emitted, so make sure it's happened even if
232 * everything was cleared to start.
233 */
234 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
235
236 /* Wait for the binner before jumping to the first
237 * tile's lists.
238 */
239 if (x == min_x_tile && y == min_y_tile)
240 cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
241
242 cl_start_reloc(&vc4->rcl, 1);
243 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
244 cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
245 (y * stride_in_tiles + x) * 32);
246
247 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
248 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
249
250 cl_start_reloc(&vc4->rcl, 1);
251 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
252 cl_u8(&vc4->rcl,
253 VC4_LOADSTORE_TILE_BUFFER_ZS |
254 (zsurf->tiling <<
255 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
256 cl_u8(&vc4->rcl,
257 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
258 cl_reloc_hindex(&vc4->rcl, depth_hindex,
259 zsurf->offset |
260 ((end_of_frame &&
261 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
262 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
263
264 coords_emitted = false;
265 }
266
267 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
268 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
269 if (end_of_frame) {
270 cl_u8(&vc4->rcl,
271 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
272 } else {
273 cl_u8(&vc4->rcl,
274 VC4_PACKET_STORE_MS_TILE_BUFFER);
275 }
276
277 coords_emitted = false;
278 }
279
280 /* One of the bits needs to have been set that would
281 * have triggered an EOF.
282 */
283 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
284 PIPE_CLEAR_DEPTH |
285 PIPE_CLEAR_STENCIL));
286 /* Any coords emitted must also have been consumed by
287 * a store.
288 */
289 assert(!coords_emitted);
290 }
291 }
292
293 if (vc4->resolve & PIPE_CLEAR_COLOR0)
294 ctex->writes++;
295
296 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
297 ztex->writes++;
298 }
299
300 void
301 vc4_flush(struct pipe_context *pctx)
302 {
303 struct vc4_context *vc4 = vc4_context(pctx);
304
305 if (!vc4->needs_flush)
306 return;
307
308 /* The RCL setup would choke if the draw bounds cause no drawing, so
309 * just drop the drawing if that's the case.
310 */
311 if (vc4->draw_max_x <= vc4->draw_min_x ||
312 vc4->draw_max_y <= vc4->draw_min_y) {
313 vc4_job_reset(vc4);
314 return;
315 }
316
317 /* Increment the semaphore indicating that binning is done and
318 * unblocking the render thread. Note that this doesn't act until the
319 * FLUSH completes.
320 */
321 cl_ensure_space(&vc4->bcl, 8);
322 cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
323 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
324 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
325
326 vc4_setup_rcl(vc4);
327
328 vc4_job_submit(vc4);
329 }
330
331 static void
332 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
333 unsigned flags)
334 {
335 struct vc4_context *vc4 = vc4_context(pctx);
336
337 vc4_flush(pctx);
338
339 if (fence) {
340 struct vc4_fence *f = vc4_fence_create(vc4->screen,
341 vc4->last_emit_seqno);
342 *fence = (struct pipe_fence_handle *)f;
343 }
344 }
345
346 /**
347 * Flushes the current command lists if they reference the given BO.
348 *
349 * This helps avoid flushing the command buffers when unnecessary.
350 */
351 bool
352 vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
353 {
354 struct vc4_context *vc4 = vc4_context(pctx);
355
356 if (!vc4->needs_flush)
357 return false;
358
359 /* Walk all the referenced BOs in the drawing command list to see if
360 * they match.
361 */
362 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
363 for (int i = 0; i < (vc4->bo_handles.next -
364 vc4->bo_handles.base) / 4; i++) {
365 if (referenced_bos[i] == bo) {
366 return true;
367 }
368 }
369
370 /* Also check for the Z/color buffers, since the references to those
371 * are only added immediately before submit.
372 */
373 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
374 if (csurf) {
375 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
376 if (ctex->bo == bo) {
377 return true;
378 }
379 }
380
381 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
382 if (zsurf) {
383 struct vc4_resource *ztex =
384 vc4_resource(zsurf->base.texture);
385 if (ztex->bo == bo) {
386 return true;
387 }
388 }
389
390 return false;
391 }
392
393 static void
394 vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
395 {
396 struct vc4_context *vc4 = vc4_context(pctx);
397 struct pipe_surface *zsurf = vc4->framebuffer.zsbuf;
398
399 if (zsurf && zsurf->texture == prsc)
400 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
401 }
402
403 static void
404 vc4_context_destroy(struct pipe_context *pctx)
405 {
406 struct vc4_context *vc4 = vc4_context(pctx);
407
408 if (vc4->blitter)
409 util_blitter_destroy(vc4->blitter);
410
411 if (vc4->primconvert)
412 util_primconvert_destroy(vc4->primconvert);
413
414 if (vc4->uploader)
415 u_upload_destroy(vc4->uploader);
416
417 util_slab_destroy(&vc4->transfer_pool);
418
419 pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
420 pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
421 vc4_bo_unreference(&vc4->tile_alloc);
422 vc4_bo_unreference(&vc4->tile_state);
423
424 vc4_program_fini(pctx);
425
426 ralloc_free(vc4);
427 }
428
429 struct pipe_context *
430 vc4_context_create(struct pipe_screen *pscreen, void *priv)
431 {
432 struct vc4_screen *screen = vc4_screen(pscreen);
433 struct vc4_context *vc4;
434
435 /* Prevent dumping of the shaders built during context setup. */
436 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
437 vc4_debug &= ~VC4_DEBUG_SHADERDB;
438
439 vc4 = rzalloc(NULL, struct vc4_context);
440 if (vc4 == NULL)
441 return NULL;
442 struct pipe_context *pctx = &vc4->base;
443
444 vc4->screen = screen;
445
446 pctx->screen = pscreen;
447 pctx->priv = priv;
448 pctx->destroy = vc4_context_destroy;
449 pctx->flush = vc4_pipe_flush;
450 pctx->invalidate_resource = vc4_invalidate_resource;
451
452 vc4_draw_init(pctx);
453 vc4_state_init(pctx);
454 vc4_program_init(pctx);
455 vc4_query_init(pctx);
456 vc4_resource_context_init(pctx);
457
458 vc4_job_init(vc4);
459
460 vc4->fd = screen->fd;
461
462 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
463 16, UTIL_SLAB_SINGLETHREADED);
464 vc4->blitter = util_blitter_create(pctx);
465 if (!vc4->blitter)
466 goto fail;
467
468 vc4->primconvert = util_primconvert_create(pctx,
469 (1 << PIPE_PRIM_QUADS) - 1);
470 if (!vc4->primconvert)
471 goto fail;
472
473 vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
474 PIPE_BIND_INDEX_BUFFER);
475
476 vc4_debug |= saved_shaderdb_flag;
477
478 return &vc4->base;
479
480 fail:
481 pctx->destroy(pctx);
482 return NULL;
483 }