vc4: Add support for rebasing texture levels so firstlevel == 0.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/ralloc.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_blitter.h"
32 #include "indices/u_primconvert.h"
33 #include "pipe/p_screen.h"
34
35 #include "vc4_screen.h"
36 #include "vc4_context.h"
37 #include "vc4_resource.h"
38
39 /**
40 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
41 *
42 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
43 * some sort before another load is triggered.
44 */
45 static void
46 vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
47 {
48 if (!*coords_emitted)
49 return;
50
51 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
52 cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
53 cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
54 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
55 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
56 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
57
58 *coords_emitted = false;
59 }
60
61 /**
62 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
63 *
64 * The tile coordinates packet triggers a pending load if there is one, are
65 * used for clipping during rendering, and determine where loads/stores happen
66 * relative to their base address.
67 */
68 static void
69 vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
70 bool *coords_emitted)
71 {
72 if (*coords_emitted)
73 return;
74
75 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
76 cl_u8(&vc4->rcl, x);
77 cl_u8(&vc4->rcl, y);
78
79 *coords_emitted = true;
80 }
81
82 static void
83 vc4_setup_rcl(struct vc4_context *vc4)
84 {
85 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
86 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
87 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
88 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
89
90 if (!csurf)
91 vc4->resolve &= ~PIPE_CLEAR_COLOR0;
92 if (!zsurf)
93 vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
94 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
95 uint32_t width = vc4->framebuffer.width;
96 uint32_t height = vc4->framebuffer.height;
97 uint32_t xtiles = align(width, 64) / 64;
98 uint32_t ytiles = align(height, 64) / 64;
99
100 #if 0
101 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
102 vc4->resolve,
103 vc4->cleared,
104 resolve_uncleared);
105 #endif
106
107 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
108 cl_u32(&vc4->rcl, vc4->clear_color[0]);
109 cl_u32(&vc4->rcl, vc4->clear_color[1]);
110 cl_u32(&vc4->rcl, vc4->clear_depth);
111 cl_u8(&vc4->rcl, vc4->clear_stencil);
112
113 /* The rendering mode config determines the pointer that's used for
114 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
115 * could handle a no-relocation rendering mode config and deny those
116 * packets, but instead we just tell the kernel we're doing our color
117 * rendering to the Z buffer, and just don't emit any of those
118 * packets.
119 */
120 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
121 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
122
123 cl_start_reloc(&vc4->rcl, 1);
124 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
125 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
126 cl_u16(&vc4->rcl, width);
127 cl_u16(&vc4->rcl, height);
128 cl_u16(&vc4->rcl, ((render_surf->tiling <<
129 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
130 (vc4_rt_format_is_565(render_surf->base.format) ?
131 VC4_RENDER_CONFIG_FORMAT_BGR565 :
132 VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
133 VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
134
135 /* The tile buffer normally gets cleared when the previous tile is
136 * stored. If the clear values changed between frames, then the tile
137 * buffer has stale clear values in it, so we have to do a store in
138 * None mode (no writes) so that we trigger the tile buffer clear.
139 *
140 * Excess clearing is only a performance cost, since per-tile contents
141 * will be loaded/stored in the loop below.
142 */
143 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
144 PIPE_CLEAR_DEPTH |
145 PIPE_CLEAR_STENCIL)) {
146 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
147 cl_u8(&vc4->rcl, 0);
148 cl_u8(&vc4->rcl, 0);
149
150 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
151 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
152 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
153 }
154
155 for (int y = 0; y < ytiles; y++) {
156 for (int x = 0; x < xtiles; x++) {
157 bool end_of_frame = (x == xtiles - 1 &&
158 y == ytiles - 1);
159 bool coords_emitted = false;
160
161 /* Note that the load doesn't actually occur until the
162 * tile coords packet is processed, and only one load
163 * may be outstanding at a time.
164 */
165 if (resolve_uncleared & PIPE_CLEAR_COLOR) {
166 vc4_store_before_load(vc4, &coords_emitted);
167
168 cl_start_reloc(&vc4->rcl, 1);
169 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
170 cl_u8(&vc4->rcl,
171 VC4_LOADSTORE_TILE_BUFFER_COLOR |
172 (csurf->tiling <<
173 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
174 cl_u8(&vc4->rcl,
175 vc4_rt_format_is_565(csurf->base.format) ?
176 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
177 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
178 cl_reloc(vc4, &vc4->rcl, ctex->bo,
179 csurf->offset);
180
181 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
182 }
183
184 if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
185 vc4_store_before_load(vc4, &coords_emitted);
186
187 cl_start_reloc(&vc4->rcl, 1);
188 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
189 cl_u8(&vc4->rcl,
190 VC4_LOADSTORE_TILE_BUFFER_ZS |
191 (zsurf->tiling <<
192 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
193 cl_u8(&vc4->rcl, 0);
194 cl_reloc(vc4, &vc4->rcl, ztex->bo,
195 zsurf->offset);
196
197 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
198 }
199
200 /* Clipping depends on tile coordinates having been
201 * emitted, so make sure it's happened even if
202 * everything was cleared to start.
203 */
204 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
205
206 cl_start_reloc(&vc4->rcl, 1);
207 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
208 cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
209 (y * xtiles + x) * 32);
210
211 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
212 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
213
214 cl_start_reloc(&vc4->rcl, 1);
215 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
216 cl_u8(&vc4->rcl,
217 VC4_LOADSTORE_TILE_BUFFER_ZS |
218 (zsurf->tiling <<
219 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
220 cl_u8(&vc4->rcl,
221 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
222 cl_reloc(vc4, &vc4->rcl, ztex->bo,
223 zsurf->offset |
224 ((end_of_frame &&
225 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
226 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
227
228 coords_emitted = false;
229 }
230
231 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
232 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
233 if (end_of_frame) {
234 cl_u8(&vc4->rcl,
235 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
236 } else {
237 cl_u8(&vc4->rcl,
238 VC4_PACKET_STORE_MS_TILE_BUFFER);
239 }
240
241 coords_emitted = false;
242 }
243
244 /* One of the bits needs to have been set that would
245 * have triggered an EOF.
246 */
247 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
248 PIPE_CLEAR_DEPTH |
249 PIPE_CLEAR_STENCIL));
250 /* Any coords emitted must also have been consumed by
251 * a store.
252 */
253 assert(!coords_emitted);
254 }
255 }
256
257 if (vc4->resolve & PIPE_CLEAR_COLOR0)
258 ctex->writes++;
259
260 if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
261 ztex->writes++;
262 }
263
264 void
265 vc4_flush(struct pipe_context *pctx)
266 {
267 struct vc4_context *vc4 = vc4_context(pctx);
268
269 if (!vc4->needs_flush)
270 return;
271
272 /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
273 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
274
275 cl_u8(&vc4->bcl, VC4_PACKET_NOP);
276 cl_u8(&vc4->bcl, VC4_PACKET_HALT);
277
278 vc4_setup_rcl(vc4);
279
280 if (vc4_debug & VC4_DEBUG_CL) {
281 fprintf(stderr, "BCL:\n");
282 vc4_dump_cl(vc4->bcl.base, vc4->bcl.end - vc4->bcl.base, false);
283 fprintf(stderr, "RCL:\n");
284 vc4_dump_cl(vc4->rcl.base, vc4->rcl.end - vc4->rcl.base, true);
285 }
286
287 struct drm_vc4_submit_cl submit;
288 memset(&submit, 0, sizeof(submit));
289
290 submit.bo_handles = vc4->bo_handles.base;
291 submit.bo_handle_count = (vc4->bo_handles.next -
292 vc4->bo_handles.base) / 4;
293 submit.bin_cl = vc4->bcl.base;
294 submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
295 submit.render_cl = vc4->rcl.base;
296 submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
297 submit.shader_rec = vc4->shader_rec.base;
298 submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
299 submit.shader_rec_count = vc4->shader_rec_count;
300 submit.uniforms = vc4->uniforms.base;
301 submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
302
303 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
304 int ret;
305
306 #ifndef USE_VC4_SIMULATOR
307 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
308 #else
309 ret = vc4_simulator_flush(vc4, &submit);
310 #endif
311 if (ret) {
312 fprintf(stderr, "VC4 submit failed\n");
313 abort();
314 }
315 }
316
317 vc4_reset_cl(&vc4->bcl);
318 vc4_reset_cl(&vc4->rcl);
319 vc4_reset_cl(&vc4->shader_rec);
320 vc4_reset_cl(&vc4->uniforms);
321 vc4_reset_cl(&vc4->bo_handles);
322 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
323 for (int i = 0; i < submit.bo_handle_count; i++)
324 vc4_bo_unreference(&referenced_bos[i]);
325 vc4_reset_cl(&vc4->bo_pointers);
326 vc4->shader_rec_count = 0;
327
328 vc4->needs_flush = false;
329 vc4->draw_call_queued = false;
330
331 /* We have no hardware context saved between our draw calls, so we
332 * need to flag the next draw as needing all state emitted. Emitting
333 * all state at the start of our draws is also what ensures that we
334 * return to the state we need after a previous tile has finished.
335 */
336 vc4->dirty = ~0;
337 vc4->resolve = 0;
338 vc4->cleared = 0;
339 }
340
341 static void
342 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
343 unsigned flags)
344 {
345 vc4_flush(pctx);
346 }
347
348 /**
349 * Flushes the current command lists if they reference the given BO.
350 *
351 * This helps avoid flushing the command buffers when unnecessary.
352 */
353 void
354 vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
355 {
356 struct vc4_context *vc4 = vc4_context(pctx);
357
358 if (!vc4->needs_flush)
359 return;
360
361 /* Walk all the referenced BOs in the drawing command list to see if
362 * they match.
363 */
364 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
365 for (int i = 0; i < (vc4->bo_handles.next -
366 vc4->bo_handles.base) / 4; i++) {
367 if (referenced_bos[i] == bo) {
368 vc4_flush(pctx);
369 return;
370 }
371 }
372
373 /* Also check for the Z/color buffers, since the references to those
374 * are only added immediately before submit.
375 */
376 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
377 if (csurf) {
378 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
379 if (ctex->bo == bo) {
380 vc4_flush(pctx);
381 return;
382 }
383 }
384
385 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
386 if (zsurf) {
387 struct vc4_resource *ztex =
388 vc4_resource(zsurf->base.texture);
389 if (ztex->bo == bo) {
390 vc4_flush(pctx);
391 return;
392 }
393 }
394 }
395
396 static void
397 vc4_context_destroy(struct pipe_context *pctx)
398 {
399 struct vc4_context *vc4 = vc4_context(pctx);
400
401 if (vc4->blitter)
402 util_blitter_destroy(vc4->blitter);
403
404 if (vc4->primconvert)
405 util_primconvert_destroy(vc4->primconvert);
406
407 util_slab_destroy(&vc4->transfer_pool);
408
409 ralloc_free(vc4);
410 }
411
412 struct pipe_context *
413 vc4_context_create(struct pipe_screen *pscreen, void *priv)
414 {
415 struct vc4_screen *screen = vc4_screen(pscreen);
416 struct vc4_context *vc4;
417
418 /* Prevent dumping of the shaders built during context setup. */
419 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
420 vc4_debug &= ~VC4_DEBUG_SHADERDB;
421
422 vc4 = rzalloc(NULL, struct vc4_context);
423 if (vc4 == NULL)
424 return NULL;
425 struct pipe_context *pctx = &vc4->base;
426
427 vc4->screen = screen;
428
429 pctx->screen = pscreen;
430 pctx->priv = priv;
431 pctx->destroy = vc4_context_destroy;
432 pctx->flush = vc4_pipe_flush;
433
434 vc4_draw_init(pctx);
435 vc4_state_init(pctx);
436 vc4_program_init(pctx);
437 vc4_query_init(pctx);
438 vc4_resource_context_init(pctx);
439
440 vc4_init_cl(vc4, &vc4->bcl);
441 vc4_init_cl(vc4, &vc4->rcl);
442 vc4_init_cl(vc4, &vc4->shader_rec);
443 vc4_init_cl(vc4, &vc4->bo_handles);
444
445 vc4->dirty = ~0;
446 vc4->fd = screen->fd;
447
448 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
449 16, UTIL_SLAB_SINGLETHREADED);
450 vc4->blitter = util_blitter_create(pctx);
451 if (!vc4->blitter)
452 goto fail;
453
454 vc4->primconvert = util_primconvert_create(pctx,
455 (1 << PIPE_PRIM_QUADS) - 1);
456 if (!vc4->primconvert)
457 goto fail;
458
459 vc4_debug |= saved_shaderdb_flag;
460
461 return &vc4->base;
462
463 fail:
464 pctx->destroy(pctx);
465 return NULL;
466 }