freedreno/a4xx: add s8/z32/z32_s8x24 support
[mesa.git] / src / gallium / drivers / freedreno / a4xx / fd4_gmem.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "util/u_format.h"
34
35 #include "freedreno_draw.h"
36 #include "freedreno_state.h"
37 #include "freedreno_resource.h"
38
39 #include "fd4_gmem.h"
40 #include "fd4_context.h"
41 #include "fd4_draw.h"
42 #include "fd4_emit.h"
43 #include "fd4_program.h"
44 #include "fd4_format.h"
45 #include "fd4_zsa.h"
46
47 static void
48 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
49 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
50 {
51 enum a4xx_tile_mode tile_mode;
52 unsigned i;
53
54 if (bin_w) {
55 tile_mode = 2;
56 } else {
57 tile_mode = TILE4_LINEAR;
58 }
59
60 for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
61 enum a4xx_color_fmt format = 0;
62 enum a3xx_color_swap swap = WZYX;
63 struct fd_resource *rsc = NULL;
64 struct fd_resource_slice *slice = NULL;
65 uint32_t stride = 0;
66 uint32_t base = 0;
67 uint32_t offset = 0;
68
69 if ((i < nr_bufs) && bufs[i]) {
70 struct pipe_surface *psurf = bufs[i];
71 enum pipe_format pformat = 0;
72
73 rsc = fd_resource(psurf->texture);
74 pformat = psurf->format;
75
76 /* In case we're drawing to Z32F_S8, the "color" actually goes to
77 * the stencil
78 */
79 if (rsc->stencil) {
80 rsc = rsc->stencil;
81 pformat = rsc->base.b.format;
82 bases++;
83 }
84
85 slice = fd_resource_slice(rsc, psurf->u.tex.level);
86 format = fd4_pipe2color(pformat);
87 swap = fd4_pipe2swap(pformat);
88
89 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
90
91 offset = fd_resource_offset(rsc, psurf->u.tex.level,
92 psurf->u.tex.first_layer);
93
94 if (bin_w) {
95 stride = bin_w * rsc->cpp;
96
97 if (bases) {
98 base = bases[i];
99 }
100 } else {
101 stride = slice->pitch * rsc->cpp;
102 }
103 } else if ((i < nr_bufs) && bases) {
104 base = bases[i];
105 }
106
107 OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
108 OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
109 A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
110 A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
111 A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
112 if (bin_w || (i >= nr_bufs) || !bufs[i]) {
113 OUT_RING(ring, base);
114 OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
115 } else {
116 OUT_RELOCW(ring, rsc->bo, offset, 0, 0);
117 /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
118 * not sure if we need to skip it for bypass or
119 * not.
120 */
121 OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
122 }
123 }
124 }
125
126 /* transfer from gmem to system memory (ie. normal RAM) */
127
128 static void
129 emit_gmem2mem_surf(struct fd_context *ctx, bool stencil,
130 uint32_t base, struct pipe_surface *psurf)
131 {
132 struct fd_ringbuffer *ring = ctx->ring;
133 struct fd_resource *rsc = fd_resource(psurf->texture);
134 enum pipe_format pformat = psurf->format;
135 struct fd_resource_slice *slice;
136 uint32_t offset;
137
138 if (stencil) {
139 debug_assert(rsc->stencil);
140 rsc = rsc->stencil;
141 pformat = rsc->base.b.format;
142 }
143
144 slice = &rsc->slices[psurf->u.tex.level];
145 offset = fd_resource_offset(rsc, psurf->u.tex.level,
146 psurf->u.tex.first_layer);
147
148 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
149
150 OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
151 OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
152 A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
153 A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
154 OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
155 OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
156 OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
157 A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
158 A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
159 A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
160 A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
161
162 fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
163 DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
164 }
165
166 static void
167 fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
168 {
169 struct fd4_context *fd4_ctx = fd4_context(ctx);
170 struct fd_gmem_stateobj *gmem = &ctx->gmem;
171 struct fd_ringbuffer *ring = ctx->ring;
172 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
173 struct fd4_emit emit = {
174 .vtx = &fd4_ctx->solid_vbuf_state,
175 .prog = &ctx->solid_prog,
176 .key = {
177 .half_precision = true,
178 },
179 };
180
181 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
182 OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
183
184 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
185 OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
186 A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
187 A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
188 A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
189 A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
190 A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
191 A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
192 A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
193 OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
194
195 OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
196 OUT_RING(ring, 0xff000000 |
197 A4XX_RB_STENCILREFMASK_STENCILREF(0) |
198 A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
199 A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
200 OUT_RING(ring, 0xff000000 |
201 A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
202 A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
203 A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
204
205 OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
206 OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
207
208 fd_wfi(ctx, ring);
209
210 OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
211 OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
212
213 OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
214 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
215 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
216 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
217 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
218 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
219 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
220
221 OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
222 OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
223 0xa); /* XXX */
224
225 OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
226 OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
227 A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
228 A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
229 A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
230
231 OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
232 OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
233
234 OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
235 OUT_RING(ring, 0x00000002);
236
237 OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
238 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
239 A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
240 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
241 A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
242
243 OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
244 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
245 OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
246
247 fd4_program_emit(ring, &emit, 0, NULL);
248 fd4_emit_vertex_bufs(ring, &emit);
249
250 if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
251 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
252 if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH))
253 emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
254 if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL))
255 emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
256 }
257
258 if (ctx->resolve & FD_BUFFER_COLOR) {
259 unsigned i;
260 for (i = 0; i < pfb->nr_cbufs; i++) {
261 if (!pfb->cbufs[i])
262 continue;
263 if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
264 continue;
265 emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]);
266 }
267 }
268
269 OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
270 OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
271 A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
272 A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
273 A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
274 }
275
276 /* transfer from system memory to gmem */
277
278 static void
279 emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases,
280 struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
281 {
282 struct fd_ringbuffer *ring = ctx->ring;
283 struct pipe_surface *zsbufs[2];
284
285 emit_mrt(ring, nr_bufs, bufs, bases, bin_w);
286
287 if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
288 /* The gmem_restore_tex logic will put the first buffer's stencil
289 * as color. Supply it with the proper information to make that
290 * happen.
291 */
292 zsbufs[0] = zsbufs[1] = bufs[0];
293 bufs = zsbufs;
294 nr_bufs = 2;
295 }
296
297 fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
298
299 fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
300 DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
301 }
302
303 static void
304 fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
305 {
306 struct fd4_context *fd4_ctx = fd4_context(ctx);
307 struct fd_gmem_stateobj *gmem = &ctx->gmem;
308 struct fd_ringbuffer *ring = ctx->ring;
309 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
310 struct fd4_emit emit = {
311 .vtx = &fd4_ctx->blit_vbuf_state,
312 /* NOTE: They all use the same VP, this is for vtx bufs. */
313 .prog = &ctx->blit_prog[0],
314 .key = {
315 .half_precision = fd_half_precision(pfb),
316 },
317 };
318 unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
319 float x0, y0, x1, y1;
320 unsigned bin_w = tile->bin_w;
321 unsigned bin_h = tile->bin_h;
322 unsigned i;
323
324 /* write texture coordinates to vertexbuf: */
325 x0 = ((float)tile->xoff) / ((float)pfb->width);
326 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
327 y0 = ((float)tile->yoff) / ((float)pfb->height);
328 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
329
330 OUT_PKT3(ring, CP_MEM_WRITE, 5);
331 OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
332 OUT_RING(ring, fui(x0));
333 OUT_RING(ring, fui(y0));
334 OUT_RING(ring, fui(x1));
335 OUT_RING(ring, fui(y1));
336
337 for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
338 mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
339
340 OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
341 OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
342 A4XX_RB_MRT_CONTROL_B11 |
343 A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
344
345 OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
346 OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
347 A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
348 A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
349 A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
350 A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
351 A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
352 }
353
354 OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
355 OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
356 A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
357 A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
358 A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
359 A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
360 A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
361 A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
362 A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
363
364 OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
365 OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
366
367 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
368 OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
369
370 OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
371 OUT_RING(ring, 0x280000); /* XXX GRAS_CL_CLIP_CNTL */
372
373 OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
374 OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
375 A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
376
377 OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
378 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
379 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
380 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
381 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
382 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
383 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
384
385 OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
386 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
387 A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
388 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
389 A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
390
391 OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
392 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
393 A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
394 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
395 A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
396
397 OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
398 OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
399 A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
400
401 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
402 OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
403 A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
404 A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
405 A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
406 A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
407 A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
408 A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
409 A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
410 OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
411
412 OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
413 OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
414 A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
415 A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
416 A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
417
418 OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
419 OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
420 A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
421
422 OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
423 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
424 OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
425
426 fd4_emit_vertex_bufs(ring, &emit);
427
428 /* for gmem pitch/base calculations, we need to use the non-
429 * truncated tile sizes:
430 */
431 bin_w = gmem->bin_w;
432 bin_h = gmem->bin_h;
433
434 if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
435 emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
436 emit.fp = NULL; /* frag shader changed so clear cache */
437 fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
438 emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
439 }
440
441 if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
442 switch (pfb->zsbuf->format) {
443 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
444 case PIPE_FORMAT_Z32_FLOAT:
445 emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
446 &ctx->blit_z : &ctx->blit_zs;
447 emit.key.half_precision = false;
448
449 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
450 OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
451 A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
452 A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
453 A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
454
455 OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
456 OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
457
458 OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
459 OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
460
461 break;
462 default:
463 /* Non-float can use a regular color write. It's split over 8-bit
464 * components, so half precision is always sufficient.
465 */
466 emit.prog = &ctx->blit_prog[0];
467 emit.key.half_precision = true;
468 break;
469 }
470 emit.fp = NULL; /* frag shader changed so clear cache */
471 fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
472 emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
473 }
474
475 OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
476 OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
477 A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
478 A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
479
480 OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
481 OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
482 A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
483 0x00010000); /* XXX */
484 }
485
486 static void
487 patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
488 {
489 unsigned i;
490 for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
491 struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
492 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
493 }
494 util_dynarray_resize(&ctx->draw_patches, 0);
495 }
496
497 static void
498 patch_rbrc(struct fd_context *ctx, uint32_t val)
499 {
500 struct fd4_context *fd4_ctx = fd4_context(ctx);
501 unsigned i;
502 for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
503 struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
504 *patch->cs = patch->val | val;
505 }
506 util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
507 }
508
509 /* for rendering directly to system memory: */
510 static void
511 fd4_emit_sysmem_prep(struct fd_context *ctx)
512 {
513 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
514 struct fd_ringbuffer *ring = ctx->ring;
515
516 fd4_emit_restore(ctx);
517
518 OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
519 OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
520 A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
521
522 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
523
524 /* setup scissor/offset for current tile: */
525 OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
526 OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
527 A4XX_RB_BIN_OFFSET_Y(0));
528
529 OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
530 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
531 A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
532 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
533 A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
534
535 OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
536 OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
537 A4XX_RB_MODE_CONTROL_HEIGHT(0) |
538 0x00c00000); /* XXX */
539
540 patch_draws(ctx, IGNORE_VISIBILITY);
541 patch_rbrc(ctx, 0); // XXX
542 }
543
544 static void
545 update_vsc_pipe(struct fd_context *ctx)
546 {
547 struct fd4_context *fd4_ctx = fd4_context(ctx);
548 struct fd_ringbuffer *ring = ctx->ring;
549 int i;
550
551 OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
552 OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
553
554 OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
555 for (i = 0; i < 8; i++) {
556 struct fd_vsc_pipe *pipe = &ctx->pipe[i];
557 OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
558 A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
559 A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
560 A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
561 }
562
563 OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
564 for (i = 0; i < 8; i++) {
565 struct fd_vsc_pipe *pipe = &ctx->pipe[i];
566 if (!pipe->bo) {
567 pipe->bo = fd_bo_new(ctx->dev, 0x40000,
568 DRM_FREEDRENO_GEM_TYPE_KMEM);
569 }
570 OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */
571 }
572
573 OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
574 for (i = 0; i < 8; i++) {
575 struct fd_vsc_pipe *pipe = &ctx->pipe[i];
576 OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
577 }
578 }
579
580 /* before first tile */
581 static void
582 fd4_emit_tile_init(struct fd_context *ctx)
583 {
584 struct fd_ringbuffer *ring = ctx->ring;
585 struct fd_gmem_stateobj *gmem = &ctx->gmem;
586 uint32_t rb_render_control;
587
588 fd4_emit_restore(ctx);
589
590 OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
591 OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
592 A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
593
594 OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
595 OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
596 A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
597 0x00010000); /* XXX */
598
599 update_vsc_pipe(ctx);
600 patch_draws(ctx, IGNORE_VISIBILITY);
601
602 rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
603 patch_rbrc(ctx, rb_render_control);
604 }
605
606 /* before mem2gmem */
607 static void
608 fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
609 {
610 struct fd_ringbuffer *ring = ctx->ring;
611 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
612 struct fd_gmem_stateobj *gmem = &ctx->gmem;
613
614 if (pfb->zsbuf) {
615 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
616 uint32_t cpp = rsc->cpp;
617
618 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
619 OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
620 A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
621 OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
622 OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
623
624 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
625 if (rsc->stencil) {
626 OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
627 A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
628 OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
629 } else {
630 OUT_RING(ring, 0x00000000);
631 OUT_RING(ring, 0x00000000);
632 }
633 } else {
634 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
635 OUT_RING(ring, 0x00000000);
636 OUT_RING(ring, 0x00000000);
637 OUT_RING(ring, 0x00000000);
638
639 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
640 OUT_RING(ring, 0); /* RB_STENCIL_INFO */
641 OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
642 }
643
644 OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
645 if (pfb->zsbuf) {
646 OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
647 fd4_pipe2depth(pfb->zsbuf->format)));
648 } else {
649 OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
650 }
651
652 if (ctx->needs_rb_fbd) {
653 fd_wfi(ctx, ring);
654 OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
655 OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
656 A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
657 ctx->needs_rb_fbd = false;
658 }
659 }
660
661 /* before IB to rendering cmds: */
662 static void
663 fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
664 {
665 struct fd_ringbuffer *ring = ctx->ring;
666 struct fd_gmem_stateobj *gmem = &ctx->gmem;
667 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
668
669 uint32_t x1 = tile->xoff;
670 uint32_t y1 = tile->yoff;
671 uint32_t x2 = tile->xoff + tile->bin_w - 1;
672 uint32_t y2 = tile->yoff + tile->bin_h - 1;
673
674 OUT_PKT3(ring, CP_SET_BIN, 3);
675 OUT_RING(ring, 0x00000000);
676 OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
677 OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
678
679 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
680
681 /* setup scissor/offset for current tile: */
682 OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
683 OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
684 A4XX_RB_BIN_OFFSET_Y(tile->yoff));
685
686 OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
687 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
688 A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
689 OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
690 A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
691 }
692
693 void
694 fd4_gmem_init(struct pipe_context *pctx)
695 {
696 struct fd_context *ctx = fd_context(pctx);
697
698 ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
699 ctx->emit_tile_init = fd4_emit_tile_init;
700 ctx->emit_tile_prep = fd4_emit_tile_prep;
701 ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
702 ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
703 ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
704 }