vc4: Restructure depth input/output in fragment shaders.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <xf86drm.h>
25 #include <err.h>
26
27 #include "pipe/p_defines.h"
28 #include "util/u_inlines.h"
29 #include "util/u_memory.h"
30 #include "util/u_blitter.h"
31 #include "indices/u_primconvert.h"
32 #include "pipe/p_screen.h"
33
34 #include "vc4_screen.h"
35 #include "vc4_context.h"
36 #include "vc4_resource.h"
37
38 static void
39 vc4_setup_rcl(struct vc4_context *vc4)
40 {
41 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
42 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
43 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
44 struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
45 uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
46 uint32_t width = vc4->framebuffer.width;
47 uint32_t height = vc4->framebuffer.height;
48 uint32_t xtiles = align(width, 64) / 64;
49 uint32_t ytiles = align(height, 64) / 64;
50
51 #if 0
52 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
53 vc4->resolve,
54 vc4->cleared,
55 resolve_uncleared);
56 #endif
57
58 cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
59 cl_u32(&vc4->rcl, vc4->clear_color[0]);
60 cl_u32(&vc4->rcl, vc4->clear_color[1]);
61 cl_u32(&vc4->rcl, vc4->clear_depth);
62 cl_u8(&vc4->rcl, vc4->clear_stencil);
63
64 /* The rendering mode config determines the pointer that's used for
65 * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
66 * could handle a no-relocation rendering mode config and deny those
67 * packets, but instead we just tell the kernel we're doing our color
68 * rendering to the Z buffer, and just don't emit any of those
69 * packets.
70 */
71 struct vc4_surface *render_surf = csurf ? csurf : zsurf;
72 struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
73
74 cl_start_reloc(&vc4->rcl, 1);
75 cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
76 cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
77 cl_u16(&vc4->rcl, width);
78 cl_u16(&vc4->rcl, height);
79 cl_u16(&vc4->rcl, ((render_surf->tiling <<
80 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
81 (vc4_rt_format_is_565(render_surf->base.format) ?
82 VC4_RENDER_CONFIG_FORMAT_BGR565 :
83 VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
84 VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
85
86 /* The tile buffer normally gets cleared when the previous tile is
87 * stored. If the clear values changed between frames, then the tile
88 * buffer has stale clear values in it, so we have to do a store in
89 * None mode (no writes) so that we trigger the tile buffer clear.
90 *
91 * Excess clearing is only a performance cost, since per-tile contents
92 * will be loaded/stored in the loop below.
93 */
94 if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
95 PIPE_CLEAR_DEPTH |
96 PIPE_CLEAR_STENCIL)) {
97 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
98 cl_u8(&vc4->rcl, 0);
99 cl_u8(&vc4->rcl, 0);
100
101 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
102 cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
103 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
104 }
105
106 for (int y = 0; y < ytiles; y++) {
107 for (int x = 0; x < xtiles; x++) {
108 bool end_of_frame = (x == xtiles - 1 &&
109 y == ytiles - 1);
110 bool coords_emitted = false;
111
112 /* Note that the load doesn't actually occur until the
113 * tile coords packet is processed.
114 */
115 if (csurf && (resolve_uncleared & PIPE_CLEAR_COLOR)) {
116 cl_start_reloc(&vc4->rcl, 1);
117 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
118 cl_u8(&vc4->rcl,
119 VC4_LOADSTORE_TILE_BUFFER_COLOR |
120 (csurf->tiling <<
121 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
122 cl_u8(&vc4->rcl,
123 vc4_rt_format_is_565(csurf->base.format) ?
124 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
125 VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
126 cl_reloc(vc4, &vc4->rcl, ctex->bo,
127 csurf->offset);
128
129 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
130 cl_u8(&vc4->rcl, x);
131 cl_u8(&vc4->rcl, y);
132 coords_emitted = true;
133 }
134
135 if (zsurf && (resolve_uncleared & (PIPE_CLEAR_DEPTH |
136 PIPE_CLEAR_STENCIL))) {
137 cl_start_reloc(&vc4->rcl, 1);
138 cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
139 cl_u8(&vc4->rcl,
140 VC4_LOADSTORE_TILE_BUFFER_ZS |
141 (zsurf->tiling <<
142 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
143 cl_u8(&vc4->rcl, 0);
144 cl_reloc(vc4, &vc4->rcl, ztex->bo,
145 zsurf->offset);
146
147 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
148 cl_u8(&vc4->rcl, x);
149 cl_u8(&vc4->rcl, y);
150 coords_emitted = true;
151 }
152
153 /* Clipping depends on tile coordinates having been
154 * emitted, so make sure it's happened even if
155 * everything was cleared to start.
156 */
157 if (!coords_emitted) {
158 cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
159 cl_u8(&vc4->rcl, x);
160 cl_u8(&vc4->rcl, y);
161 }
162
163 cl_start_reloc(&vc4->rcl, 1);
164 cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
165 cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
166 (y * xtiles + x) * 32);
167
168 if (zsurf && (vc4->resolve & (PIPE_CLEAR_DEPTH |
169 PIPE_CLEAR_STENCIL))) {
170 cl_start_reloc(&vc4->rcl, 1);
171 cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
172 cl_u8(&vc4->rcl,
173 VC4_LOADSTORE_TILE_BUFFER_Z |
174 (zsurf->tiling <<
175 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
176 cl_u8(&vc4->rcl,
177 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
178 cl_reloc(vc4, &vc4->rcl, ztex->bo,
179 zsurf->offset |
180 ((end_of_frame &&
181 !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
182 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
183 }
184
185 if (vc4->resolve & PIPE_CLEAR_COLOR0) {
186 if (end_of_frame) {
187 cl_u8(&vc4->rcl,
188 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
189 } else {
190 cl_u8(&vc4->rcl,
191 VC4_PACKET_STORE_MS_TILE_BUFFER);
192 }
193 }
194
195 /* One of the bits needs to have been set that would
196 * have triggered an EOFq
197 */
198 assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
199 PIPE_CLEAR_DEPTH |
200 PIPE_CLEAR_STENCIL));
201 }
202 }
203 }
204
205 void
206 vc4_flush(struct pipe_context *pctx)
207 {
208 struct vc4_context *vc4 = vc4_context(pctx);
209
210 if (!vc4->needs_flush)
211 return;
212
213 cl_u8(&vc4->bcl, VC4_PACKET_FLUSH_ALL);
214 cl_u8(&vc4->bcl, VC4_PACKET_NOP);
215 cl_u8(&vc4->bcl, VC4_PACKET_HALT);
216
217 vc4_setup_rcl(vc4);
218
219 struct drm_vc4_submit_cl submit;
220 memset(&submit, 0, sizeof(submit));
221
222 submit.bo_handles = vc4->bo_handles.base;
223 submit.bo_handle_count = (vc4->bo_handles.next -
224 vc4->bo_handles.base) / 4;
225 submit.bin_cl = vc4->bcl.base;
226 submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
227 submit.render_cl = vc4->rcl.base;
228 submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
229 submit.shader_rec = vc4->shader_rec.base;
230 submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
231 submit.shader_rec_count = vc4->shader_rec_count;
232 submit.uniforms = vc4->uniforms.base;
233 submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
234
235 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
236 int ret;
237
238 #ifndef USE_VC4_SIMULATOR
239 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
240 #else
241 ret = vc4_simulator_flush(vc4, &submit);
242 #endif
243 if (ret)
244 errx(1, "VC4 submit failed\n");
245 }
246
247 vc4_reset_cl(&vc4->bcl);
248 vc4_reset_cl(&vc4->rcl);
249 vc4_reset_cl(&vc4->shader_rec);
250 vc4_reset_cl(&vc4->uniforms);
251 vc4_reset_cl(&vc4->bo_handles);
252 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
253 for (int i = 0; i < submit.bo_handle_count; i++)
254 vc4_bo_unreference(&referenced_bos[i]);
255 vc4_reset_cl(&vc4->bo_pointers);
256 vc4->shader_rec_count = 0;
257
258 vc4->needs_flush = false;
259 vc4->draw_call_queued = false;
260 vc4->dirty = ~0;
261 vc4->resolve = 0;
262 vc4->cleared = 0;
263 }
264
265 static void
266 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
267 unsigned flags)
268 {
269 vc4_flush(pctx);
270 }
271
272 /**
273 * Flushes the current command lists if they reference the given BO.
274 *
275 * This helps avoid flushing the command buffers when unnecessary.
276 */
277 void
278 vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
279 {
280 struct vc4_context *vc4 = vc4_context(pctx);
281
282 if (!vc4->needs_flush)
283 return;
284
285 /* Walk all the referenced BOs in the drawing command list to see if
286 * they match.
287 */
288 struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
289 for (int i = 0; i < (vc4->bo_handles.next -
290 vc4->bo_handles.base) / 4; i++) {
291 if (referenced_bos[i] == bo) {
292 vc4_flush(pctx);
293 return;
294 }
295 }
296
297 /* Also check for the Z/color buffers, since the references to those
298 * are only added immediately before submit.
299 */
300 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
301 if (csurf) {
302 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
303 if (ctex->bo == bo) {
304 vc4_flush(pctx);
305 return;
306 }
307 }
308
309 struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
310 if (zsurf) {
311 struct vc4_resource *ztex =
312 vc4_resource(zsurf->base.texture);
313 if (ztex->bo == bo) {
314 vc4_flush(pctx);
315 return;
316 }
317 }
318 }
319
320 static void
321 vc4_context_destroy(struct pipe_context *pctx)
322 {
323 struct vc4_context *vc4 = vc4_context(pctx);
324
325 if (vc4->blitter)
326 util_blitter_destroy(vc4->blitter);
327
328 if (vc4->primconvert)
329 util_primconvert_destroy(vc4->primconvert);
330
331 util_slab_destroy(&vc4->transfer_pool);
332
333 free(vc4);
334 }
335
336 struct pipe_context *
337 vc4_context_create(struct pipe_screen *pscreen, void *priv)
338 {
339 struct vc4_screen *screen = vc4_screen(pscreen);
340 struct vc4_context *vc4;
341
342 /* Prevent dumping of the shaders built during context setup. */
343 uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
344 vc4_debug &= ~VC4_DEBUG_SHADERDB;
345
346 vc4 = CALLOC_STRUCT(vc4_context);
347 if (vc4 == NULL)
348 return NULL;
349 struct pipe_context *pctx = &vc4->base;
350
351 vc4->screen = screen;
352
353 pctx->screen = pscreen;
354 pctx->priv = priv;
355 pctx->destroy = vc4_context_destroy;
356 pctx->flush = vc4_pipe_flush;
357
358 vc4_draw_init(pctx);
359 vc4_state_init(pctx);
360 vc4_program_init(pctx);
361 vc4_resource_context_init(pctx);
362
363 vc4_init_cl(vc4, &vc4->bcl);
364 vc4_init_cl(vc4, &vc4->rcl);
365 vc4_init_cl(vc4, &vc4->shader_rec);
366 vc4_init_cl(vc4, &vc4->bo_handles);
367
368 vc4->dirty = ~0;
369 vc4->fd = screen->fd;
370
371 util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
372 16, UTIL_SLAB_SINGLETHREADED);
373 vc4->blitter = util_blitter_create(pctx);
374 if (!vc4->blitter)
375 goto fail;
376
377 vc4->primconvert = util_primconvert_create(pctx,
378 (1 << PIPE_PRIM_QUADS) - 1);
379 if (!vc4->primconvert)
380 goto fail;
381
382 vc4_debug |= saved_shaderdb_flag;
383
384 return &vc4->base;
385
386 fail:
387 pctx->destroy(pctx);
388 return NULL;
389 }