freedreno/a6xx: add some more tracepoints
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_gmem.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "pipe/p_state.h"
31 #include "util/u_string.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "util/format/u_format.h"
35
36 #include "freedreno_draw.h"
37 #include "freedreno_log.h"
38 #include "freedreno_state.h"
39 #include "freedreno_resource.h"
40
41 #include "fd6_blitter.h"
42 #include "fd6_gmem.h"
43 #include "fd6_context.h"
44 #include "fd6_draw.h"
45 #include "fd6_emit.h"
46 #include "fd6_program.h"
47 #include "fd6_format.h"
48 #include "fd6_resource.h"
49 #include "fd6_zsa.h"
50 #include "fd6_pack.h"
51
52 /**
53 * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
54 * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
55 */
56 void
57 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
58 int level, int layer)
59 {
60 if (fd_resource_ubwc_enabled(rsc, level)) {
61 OUT_RELOCW(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0, 0);
62 OUT_RING(ring,
63 A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(rsc->layout.ubwc_slices[level].pitch) |
64 A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
65 } else {
66 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
67 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
68 OUT_RING(ring, 0x00000000);
69 }
70 }
71
72 static void
73 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
74 const struct fd_gmem_stateobj *gmem)
75 {
76 unsigned char mrt_comp[A6XX_MAX_RENDER_TARGETS] = {0};
77 unsigned srgb_cntl = 0;
78 unsigned i;
79
80 bool layered = false;
81 unsigned type = 0;
82
83 for (i = 0; i < pfb->nr_cbufs; i++) {
84 enum a6xx_format format = 0;
85 enum a3xx_color_swap swap = WZYX;
86 bool sint = false, uint = false;
87 struct fd_resource *rsc = NULL;
88 struct fdl_slice *slice = NULL;
89 uint32_t stride = 0;
90 uint32_t offset;
91 uint32_t tile_mode;
92
93 if (!pfb->cbufs[i])
94 continue;
95
96 mrt_comp[i] = 0xf;
97
98 struct pipe_surface *psurf = pfb->cbufs[i];
99 enum pipe_format pformat = psurf->format;
100 rsc = fd_resource(psurf->texture);
101 if (!rsc->bo)
102 continue;
103
104 uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
105 slice = fd_resource_slice(rsc, psurf->u.tex.level);
106 format = fd6_pipe2color(pformat);
107 sint = util_format_is_pure_sint(pformat);
108 uint = util_format_is_pure_uint(pformat);
109
110 if (util_format_is_srgb(pformat))
111 srgb_cntl |= (1 << i);
112
113 offset = fd_resource_offset(rsc, psurf->u.tex.level,
114 psurf->u.tex.first_layer);
115
116 stride = slice->pitch * rsc->layout.cpp;
117 swap = fd6_resource_swap(rsc, pformat);
118
119 tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
120
121 if (psurf->u.tex.first_layer < psurf->u.tex.last_layer) {
122 layered = true;
123 if (psurf->texture->target == PIPE_TEXTURE_2D_ARRAY && psurf->texture->nr_samples > 0)
124 type = LAYER_MULTISAMPLE_ARRAY;
125 else if (psurf->texture->target == PIPE_TEXTURE_2D_ARRAY)
126 type = LAYER_2D_ARRAY;
127 else if (psurf->texture->target == PIPE_TEXTURE_CUBE)
128 type = LAYER_CUBEMAP;
129 else if (psurf->texture->target == PIPE_TEXTURE_3D)
130 type = LAYER_3D;
131 }
132
133 debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
134
135 OUT_REG(ring,
136 A6XX_RB_MRT_BUF_INFO(i,
137 .color_format = format,
138 .color_tile_mode = tile_mode,
139 .color_swap = swap),
140 A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
141 A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = slice->size0),
142 A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
143 A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
144
145 OUT_REG(ring,
146 A6XX_SP_FS_MRT_REG(i, .color_format = format,
147 .color_sint = sint, .color_uint = uint));
148
149 OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
150 fd6_emit_flag_reference(ring, rsc,
151 psurf->u.tex.level, psurf->u.tex.first_layer);
152 }
153
154 OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
155 OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
156
157 OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(
158 .rt0 = mrt_comp[0],
159 .rt1 = mrt_comp[1],
160 .rt2 = mrt_comp[2],
161 .rt3 = mrt_comp[3],
162 .rt4 = mrt_comp[4],
163 .rt5 = mrt_comp[5],
164 .rt6 = mrt_comp[6],
165 .rt7 = mrt_comp[7]));
166
167 OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(
168 .rt0 = mrt_comp[0],
169 .rt1 = mrt_comp[1],
170 .rt2 = mrt_comp[2],
171 .rt3 = mrt_comp[3],
172 .rt4 = mrt_comp[4],
173 .rt5 = mrt_comp[5],
174 .rt6 = mrt_comp[6],
175 .rt7 = mrt_comp[7]));
176
177 OUT_REG(ring, A6XX_GRAS_LAYER_CNTL(.layered = layered, .type = type));
178 }
179
180 static void
181 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
182 const struct fd_gmem_stateobj *gmem)
183 {
184 if (zsbuf) {
185 struct fd_resource *rsc = fd_resource(zsbuf->texture);
186 enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
187 struct fdl_slice *slice = fd_resource_slice(rsc, 0);
188 uint32_t stride = slice->pitch * rsc->layout.cpp;
189 uint32_t size = slice->size0;
190 uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
191 uint32_t offset = fd_resource_offset(rsc, zsbuf->u.tex.level,
192 zsbuf->u.tex.first_layer);
193
194 OUT_REG(ring,
195 A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
196 A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
197 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch = size),
198 A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
199 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
200
201 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
202
203 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
204 fd6_emit_flag_reference(ring, rsc,
205 zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
206
207 if (rsc->lrz) {
208 OUT_REG(ring,
209 A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
210 A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
211 // XXX a6xx seems to use a different buffer here.. not sure what for..
212 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO(0),
213 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI(0));
214 } else {
215 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
216 OUT_RING(ring, 0x00000000);
217 OUT_RING(ring, 0x00000000);
218 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
219 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
220 OUT_RING(ring, 0x00000000);
221 }
222
223 /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
224 * plus this CP_EVENT_WRITE at the end in it's own IB..
225 */
226 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
227 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
228
229 if (rsc->stencil) {
230 struct fdl_slice *slice = fd_resource_slice(rsc->stencil, 0);
231 stride = slice->pitch * rsc->stencil->layout.cpp;
232 size = slice->size0;
233 uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
234
235 OUT_REG(ring,
236 A6XX_RB_STENCIL_INFO(.separate_stencil = true),
237 A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch = stride),
238 A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(.a6xx_rb_stencil_buffer_array_pitch = size),
239 A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
240 A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
241 } else {
242 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
243 }
244 } else {
245 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
246 OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
247 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
248 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
249 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
250 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
251 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
252
253 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
254
255 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
256 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
257 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
258 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
259 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
260 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
261
262 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
263 }
264 }
265
266 static bool
267 use_hw_binning(struct fd_batch *batch)
268 {
269 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
270
271 // TODO figure out hw limits for binning
272
273 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
274 (batch->num_draws > 0);
275 }
276
277 static void
278 patch_fb_read(struct fd_batch *batch)
279 {
280 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
281
282 for (unsigned i = 0; i < fd_patch_num_elements(&batch->fb_read_patches); i++) {
283 struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
284 *patch->cs = patch->val | A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]);
285 }
286 util_dynarray_clear(&batch->fb_read_patches);
287 }
288
289 static void
290 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, bool binning)
291 {
292 struct fd_ringbuffer *ring = batch->gmem;
293 uint32_t cntl = 0;
294 bool depth_ubwc_enable = false;
295 uint32_t mrts_ubwc_enable = 0;
296 int i;
297
298 if (pfb->zsbuf) {
299 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
300 depth_ubwc_enable = fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
301 }
302
303 for (i = 0; i < pfb->nr_cbufs; i++) {
304 if (!pfb->cbufs[i])
305 continue;
306
307 struct pipe_surface *psurf = pfb->cbufs[i];
308 struct fd_resource *rsc = fd_resource(psurf->texture);
309 if (!rsc->bo)
310 continue;
311
312 if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
313 mrts_ubwc_enable |= 1 << i;
314 }
315
316 cntl |= A6XX_RB_RENDER_CNTL_UNK4;
317 if (binning)
318 cntl |= A6XX_RB_RENDER_CNTL_BINNING;
319
320 OUT_PKT7(ring, CP_REG_WRITE, 3);
321 OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
322 OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
323 OUT_RING(ring, cntl |
324 COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
325 A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
326 }
327
328 #define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
329 #define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
330
331 static void
332 update_vsc_pipe(struct fd_batch *batch)
333 {
334 struct fd_context *ctx = batch->ctx;
335 struct fd6_context *fd6_ctx = fd6_context(ctx);
336 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
337 struct fd_ringbuffer *ring = batch->gmem;
338 int i;
339
340
341 if (!fd6_ctx->vsc_data) {
342 fd6_ctx->vsc_data = fd_bo_new(ctx->screen->dev,
343 VSC_DATA_SIZE(fd6_ctx->vsc_data_pitch),
344 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data");
345 }
346
347 if (!fd6_ctx->vsc_data2) {
348 fd6_ctx->vsc_data2 = fd_bo_new(ctx->screen->dev,
349 VSC_DATA2_SIZE(fd6_ctx->vsc_data2_pitch),
350 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data2");
351 }
352
353 OUT_REG(ring,
354 A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
355 A6XX_VSC_SIZE_ADDRESS(.bo = fd6_ctx->vsc_data, .bo_offset = 32 * fd6_ctx->vsc_data_pitch));
356
357 OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x,
358 .ny = gmem->nbins_y));
359
360 OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
361 for (i = 0; i < 32; i++) {
362 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
363 OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
364 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
365 A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
366 A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
367 }
368
369 OUT_REG(ring,
370 A6XX_VSC_PIPE_DATA2_ADDRESS(.bo = fd6_ctx->vsc_data2),
371 A6XX_VSC_PIPE_DATA2_PITCH(.dword = fd6_ctx->vsc_data2_pitch),
372 A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_data2)));
373
374 OUT_REG(ring,
375 A6XX_VSC_PIPE_DATA_ADDRESS(.bo = fd6_ctx->vsc_data),
376 A6XX_VSC_PIPE_DATA_PITCH(.dword = fd6_ctx->vsc_data_pitch),
377 A6XX_VSC_PIPE_DATA_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_data)));
378 }
379
380 /* TODO we probably have more than 8 scratch regs.. although the first
381 * 8 is what kernel dumps, and it is kinda useful to be able to see
382 * the value in kernel traces
383 */
384 #define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
385
386 /*
387 * If overflow is detected, either 0x1 (VSC_DATA overflow) or 0x3
388 * (VSC_DATA2 overflow) plus the size of the overflowed buffer is
389 * written to control->vsc_overflow. This allows the CPU to
390 * detect which buffer overflowed (and, since the current size is
391 * encoded as well, this protects against already-submitted but
392 * not executed batches from fooling the CPU into increasing the
393 * size again unnecessarily).
394 *
395 * To conditionally use VSC data in draw pass only if there is no
396 * overflow, we use a scratch reg (OVERFLOW_FLAG_REG) to hold 1
397 * if no overflow, or 0 in case of overflow. The value is inverted
398 * to make the CP_COND_REG_EXEC stuff easier.
399 */
400 static void
401 emit_vsc_overflow_test(struct fd_batch *batch)
402 {
403 struct fd_ringbuffer *ring = batch->gmem;
404 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
405 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
406
407 debug_assert((fd6_ctx->vsc_data_pitch & 0x3) == 0);
408 debug_assert((fd6_ctx->vsc_data2_pitch & 0x3) == 0);
409
410 /* Clear vsc_scratch: */
411 OUT_PKT7(ring, CP_MEM_WRITE, 3);
412 OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_scratch));
413 OUT_RING(ring, 0x0);
414
415 /* Check for overflow, write vsc_scratch if detected: */
416 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
417 OUT_PKT7(ring, CP_COND_WRITE5, 8);
418 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
419 CP_COND_WRITE5_0_WRITE_MEMORY);
420 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
421 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
422 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_data_pitch));
423 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
424 OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_scratch)); /* WRITE_ADDR_LO/HI */
425 OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_data_pitch));
426
427 OUT_PKT7(ring, CP_COND_WRITE5, 8);
428 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
429 CP_COND_WRITE5_0_WRITE_MEMORY);
430 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
431 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
432 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_data2_pitch));
433 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
434 OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_scratch)); /* WRITE_ADDR_LO/HI */
435 OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_data2_pitch));
436 }
437
438 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
439
440 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
441
442 OUT_PKT7(ring, CP_MEM_TO_REG, 3);
443 OUT_RING(ring, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
444 CP_MEM_TO_REG_0_CNT(0));
445 OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_scratch)); /* SRC_LO/HI */
446
447 /*
448 * This is a bit awkward, we really want a way to invert the
449 * CP_REG_TEST/CP_COND_REG_EXEC logic, so that we can conditionally
450 * execute cmds to use hwbinning when a bit is *not* set. This
451 * dance is to invert OVERFLOW_FLAG_REG
452 *
453 * A CP_NOP packet is used to skip executing the 'else' clause
454 * if (b0 set)..
455 */
456
457 BEGIN_RING(ring, 10); /* ensure if/else doesn't get split */
458
459 /* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
460 OUT_PKT7(ring, CP_REG_TEST, 1);
461 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
462 A6XX_CP_REG_TEST_0_BIT(0) |
463 A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
464
465 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
466 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
467 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(7));
468
469 /* if (b0 set) */ {
470 /*
471 * On overflow, mirror the value to control->vsc_overflow
472 * which CPU is checking to detect overflow (see
473 * check_vsc_overflow())
474 */
475 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
476 OUT_RING(ring, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
477 CP_REG_TO_MEM_0_CNT(1 - 1));
478 OUT_RELOCW(ring, control_ptr(fd6_ctx, vsc_overflow));
479
480 OUT_PKT4(ring, OVERFLOW_FLAG_REG, 1);
481 OUT_RING(ring, 0x0);
482
483 OUT_PKT7(ring, CP_NOP, 2); /* skip 'else' when 'if' is taken */
484 } /* else */ {
485 OUT_PKT4(ring, OVERFLOW_FLAG_REG, 1);
486 OUT_RING(ring, 0x1);
487 }
488 }
489
490 static void
491 check_vsc_overflow(struct fd_context *ctx)
492 {
493 struct fd6_context *fd6_ctx = fd6_context(ctx);
494 struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
495 uint32_t vsc_overflow = control->vsc_overflow;
496
497 if (!vsc_overflow)
498 return;
499
500 /* clear overflow flag: */
501 control->vsc_overflow = 0;
502
503 unsigned buffer = vsc_overflow & 0x3;
504 unsigned size = vsc_overflow & ~0x3;
505
506 if (buffer == 0x1) {
507 /* VSC_PIPE_DATA overflow: */
508
509 if (size < fd6_ctx->vsc_data_pitch) {
510 /* we've already increased the size, this overflow is
511 * from a batch submitted before resize, but executed
512 * after
513 */
514 return;
515 }
516
517 fd_bo_del(fd6_ctx->vsc_data);
518 fd6_ctx->vsc_data = NULL;
519 fd6_ctx->vsc_data_pitch *= 2;
520
521 debug_printf("resized VSC_DATA_PITCH to: 0x%x\n", fd6_ctx->vsc_data_pitch);
522
523 } else if (buffer == 0x3) {
524 /* VSC_PIPE_DATA2 overflow: */
525
526 if (size < fd6_ctx->vsc_data2_pitch) {
527 /* we've already increased the size */
528 return;
529 }
530
531 fd_bo_del(fd6_ctx->vsc_data2);
532 fd6_ctx->vsc_data2 = NULL;
533 fd6_ctx->vsc_data2_pitch *= 2;
534
535 debug_printf("resized VSC_DATA2_PITCH to: 0x%x\n", fd6_ctx->vsc_data2_pitch);
536
537 } else {
538 /* NOTE: it's possible, for example, for overflow to corrupt the
539 * control page. I mostly just see this hit if I set initial VSC
540 * buffer size extremely small. Things still seem to recover,
541 * but maybe we should pre-emptively realloc vsc_data/vsc_data2
542 * and hope for different memory placement?
543 */
544 DBG("invalid vsc_overflow value: 0x%08x", vsc_overflow);
545 }
546 }
547
548 /*
549 * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
550 * is skipped for tiles that have no visible geometry.
551 */
552 static void
553 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
554 struct fd_ringbuffer *target)
555 {
556 struct fd_ringbuffer *ring = batch->gmem;
557
558 if (target->cur == target->start)
559 return;
560
561 emit_marker6(ring, 6);
562
563 unsigned count = fd_ringbuffer_cmd_count(target);
564
565 BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
566
567 OUT_PKT7(ring, CP_REG_TEST, 1);
568 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
569 A6XX_CP_REG_TEST_0_BIT(tile->n) |
570 A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
571
572 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
573 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
574 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
575
576 for (unsigned i = 0; i < count; i++) {
577 uint32_t dwords;
578 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
579 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
580 assert(dwords > 0);
581 OUT_RING(ring, dwords);
582 }
583
584 emit_marker6(ring, 6);
585 }
586
587 static void
588 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2)
589 {
590 OUT_REG(ring,
591 A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
592 A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
593
594 OUT_REG(ring,
595 A6XX_GRAS_RESOLVE_CNTL_1(.x = x1, .y = y1),
596 A6XX_GRAS_RESOLVE_CNTL_2(.x = x2, .y = y2));
597 }
598
599 static void
600 set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
601 {
602 OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
603 OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
604 /* no flag for RB_BIN_CONTROL2... */
605 OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
606 }
607
608 static void
609 emit_binning_pass(struct fd_batch *batch)
610 {
611 struct fd_ringbuffer *ring = batch->gmem;
612 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
613 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
614
615 uint32_t x1 = gmem->minx;
616 uint32_t y1 = gmem->miny;
617 uint32_t x2 = gmem->minx + gmem->width - 1;
618 uint32_t y2 = gmem->miny + gmem->height - 1;
619
620 debug_assert(!batch->tessellation);
621
622 set_scissor(ring, x1, y1, x2, y2);
623
624 emit_marker6(ring, 7);
625 OUT_PKT7(ring, CP_SET_MARKER, 1);
626 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
627 emit_marker6(ring, 7);
628
629 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
630 OUT_RING(ring, 0x1);
631
632 OUT_PKT7(ring, CP_SET_MODE, 1);
633 OUT_RING(ring, 0x1);
634
635 OUT_WFI5(ring);
636
637 OUT_REG(ring, A6XX_VFD_MODE_CNTL(.binning_pass = true));
638
639 update_vsc_pipe(batch);
640
641 OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
642 OUT_RING(ring, fd6_ctx->magic.PC_UNKNOWN_9805);
643
644 OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
645 OUT_RING(ring, fd6_ctx->magic.SP_UNKNOWN_A0F8);
646
647 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
648 OUT_RING(ring, UNK_2C);
649
650 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
651 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) |
652 A6XX_RB_WINDOW_OFFSET_Y(0));
653
654 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
655 OUT_RING(ring, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
656 A6XX_SP_TP_WINDOW_OFFSET_Y(0));
657
658 /* emit IB to binning drawcmds: */
659 fd_log(batch, "GMEM: START BINNING IB");
660 fd6_emit_ib(ring, batch->draw);
661 fd_log(batch, "GMEM: END BINNING IB");
662
663 fd_reset_wfi(batch);
664
665 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
666 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
667 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
668 CP_SET_DRAW_STATE__0_GROUP_ID(0));
669 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
670 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
671
672 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
673 OUT_RING(ring, UNK_2D);
674
675 fd6_cache_inv(batch, ring);
676 fd6_cache_flush(batch, ring);
677 fd_wfi(batch, ring);
678
679 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
680
681 fd_log(batch, "START VSC OVERFLOW TEST");
682 emit_vsc_overflow_test(batch);
683 fd_log(batch, "END VSC OVERFLOW TEST");
684
685 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
686 OUT_RING(ring, 0x0);
687
688 OUT_PKT7(ring, CP_SET_MODE, 1);
689 OUT_RING(ring, 0x0);
690
691 OUT_WFI5(ring);
692
693 OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
694 OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_gmem);
695 }
696
697 static void
698 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
699 {
700 enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
701
702 OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
703 OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
704 OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
705 COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
706
707 OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
708 OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
709 OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
710 COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
711
712 OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
713 OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
714 OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
715 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
716
717 OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
718 OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
719 }
720
721 static void prepare_tile_setup_ib(struct fd_batch *batch);
722 static void prepare_tile_fini_ib(struct fd_batch *batch);
723
724 /* before first tile */
725 static void
726 fd6_emit_tile_init(struct fd_batch *batch)
727 {
728 struct fd_context *ctx = batch->ctx;
729 struct fd_ringbuffer *ring = batch->gmem;
730 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
731 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
732
733 fd6_emit_restore(batch, ring);
734
735 fd6_emit_lrz_flush(ring);
736
737 if (batch->lrz_clear) {
738 fd_log(batch, "START LRZ CLEAR");
739 fd6_emit_ib(ring, batch->lrz_clear);
740 fd_log(batch, "END LRZ CLEAR");
741 }
742
743 fd6_cache_inv(batch, ring);
744
745 prepare_tile_setup_ib(batch);
746 prepare_tile_fini_ib(batch);
747
748 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
749 OUT_RING(ring, 0x0);
750
751 /* blob controls "local" in IB2, but I think that is not required */
752 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
753 OUT_RING(ring, 0x1);
754
755 fd_wfi(batch, ring);
756 OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
757 OUT_RING(ring, fd6_context(ctx)->magic.RB_CCU_CNTL_gmem);
758
759 emit_zs(ring, pfb->zsbuf, batch->gmem_state);
760 emit_mrt(ring, pfb, batch->gmem_state);
761 emit_msaa(ring, pfb->samples);
762 patch_fb_read(batch);
763
764 if (use_hw_binning(batch)) {
765 /* enable stream-out during binning pass: */
766 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
767 OUT_RING(ring, 0);
768
769 set_bin_size(ring, gmem->bin_w, gmem->bin_h,
770 A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
771 update_render_cntl(batch, pfb, true);
772 emit_binning_pass(batch);
773
774 /* and disable stream-out for draw pass: */
775 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
776 OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
777
778 /*
779 * NOTE: even if we detect VSC overflow and disable use of
780 * visibility stream in draw pass, it is still safe to execute
781 * the reset of these cmds:
782 */
783
784 // NOTE a618 not setting .USE_VIZ .. from a quick check on a630, it
785 // does not appear that this bit changes much (ie. it isn't actually
786 // .USE_VIZ like previous gens)
787 set_bin_size(ring, gmem->bin_w, gmem->bin_h,
788 A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
789
790 OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
791 OUT_RING(ring, 0x0);
792
793 OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
794 OUT_RING(ring, fd6_context(ctx)->magic.PC_UNKNOWN_9805);
795
796 OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
797 OUT_RING(ring, fd6_context(ctx)->magic.SP_UNKNOWN_A0F8);
798
799 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
800 OUT_RING(ring, 0x1);
801 } else {
802 /* no binning pass, so enable stream-out for draw pass:: */
803 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
804 OUT_RING(ring, 0);
805
806 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
807 }
808
809 update_render_cntl(batch, pfb, false);
810 }
811
812 static void
813 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
814 {
815 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
816 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) |
817 A6XX_RB_WINDOW_OFFSET_Y(y1));
818
819 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
820 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) |
821 A6XX_RB_WINDOW_OFFSET2_Y(y1));
822
823 OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
824 OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) |
825 A6XX_SP_WINDOW_OFFSET_Y(y1));
826
827 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
828 OUT_RING(ring, A6XX_SP_TP_WINDOW_OFFSET_X(x1) |
829 A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
830 }
831
832 /* before mem2gmem */
833 static void
834 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
835 {
836 struct fd_context *ctx = batch->ctx;
837 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
838 struct fd6_context *fd6_ctx = fd6_context(ctx);
839 struct fd_ringbuffer *ring = batch->gmem;
840
841 emit_marker6(ring, 7);
842 OUT_PKT7(ring, CP_SET_MARKER, 1);
843 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
844 emit_marker6(ring, 7);
845
846 uint32_t x1 = tile->xoff;
847 uint32_t y1 = tile->yoff;
848 uint32_t x2 = tile->xoff + tile->bin_w - 1;
849 uint32_t y2 = tile->yoff + tile->bin_h - 1;
850
851 set_scissor(ring, x1, y1, x2, y2);
852
853 if (use_hw_binning(batch)) {
854 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
855
856 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
857
858 OUT_PKT7(ring, CP_SET_MODE, 1);
859 OUT_RING(ring, 0x0);
860
861 /*
862 * Conditionally execute if no VSC overflow:
863 */
864
865 BEGIN_RING(ring, 18); /* ensure if/else doesn't get split */
866
867 OUT_PKT7(ring, CP_REG_TEST, 1);
868 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
869 A6XX_CP_REG_TEST_0_BIT(0) |
870 A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
871
872 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
873 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
874 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(11));
875
876 /* if (no overflow) */ {
877 OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
878 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
879 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
880 OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_PIPE[p].DATA_ADDRESS */
881 (tile->p * fd6_ctx->vsc_data_pitch), 0, 0);
882 OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_SIZE_ADDRESS + (p * 4) */
883 (tile->p * 4) + (32 * fd6_ctx->vsc_data_pitch), 0, 0);
884 OUT_RELOC(ring, fd6_ctx->vsc_data2,
885 (tile->p * fd6_ctx->vsc_data2_pitch), 0, 0);
886
887 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
888 OUT_RING(ring, 0x0);
889
890 /* use a NOP packet to skip over the 'else' side: */
891 OUT_PKT7(ring, CP_NOP, 2);
892 } /* else */ {
893 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
894 OUT_RING(ring, 0x1);
895 }
896
897 set_window_offset(ring, x1, y1);
898
899 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
900 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
901
902 OUT_PKT7(ring, CP_SET_MODE, 1);
903 OUT_RING(ring, 0x0);
904
905 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1);
906 OUT_RING(ring, 0x0);
907
908 OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
909 OUT_RING(ring, 0x0);
910
911 OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
912 OUT_RING(ring, 0x0);
913 } else {
914 set_window_offset(ring, x1, y1);
915
916 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
917 OUT_RING(ring, 0x1);
918
919 OUT_PKT7(ring, CP_SET_MODE, 1);
920 OUT_RING(ring, 0x0);
921 }
922 }
923
924 static void
925 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
926 {
927 struct pipe_scissor_state blit_scissor;
928 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
929
930 blit_scissor.minx = 0;
931 blit_scissor.miny = 0;
932 blit_scissor.maxx = align(pfb->width, batch->ctx->screen->gmem_alignw);
933 blit_scissor.maxy = align(pfb->height, batch->ctx->screen->gmem_alignh);
934
935 OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
936 OUT_RING(ring,
937 A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
938 A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
939 OUT_RING(ring,
940 A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
941 A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
942 }
943
944 static void
945 emit_blit(struct fd_batch *batch,
946 struct fd_ringbuffer *ring,
947 uint32_t base,
948 struct pipe_surface *psurf,
949 bool stencil)
950 {
951 struct fdl_slice *slice;
952 struct fd_resource *rsc = fd_resource(psurf->texture);
953 enum pipe_format pfmt = psurf->format;
954 uint32_t offset;
955 bool ubwc_enabled;
956
957 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
958
959 /* separate stencil case: */
960 if (stencil) {
961 rsc = rsc->stencil;
962 pfmt = rsc->base.format;
963 }
964
965 slice = fd_resource_slice(rsc, psurf->u.tex.level);
966 offset = fd_resource_offset(rsc, psurf->u.tex.level,
967 psurf->u.tex.first_layer);
968 ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
969
970 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
971
972 enum a6xx_format format = fd6_pipe2color(pfmt);
973 uint32_t stride = slice->pitch * rsc->layout.cpp;
974 uint32_t size = slice->size0;
975 enum a3xx_color_swap swap = fd6_resource_swap(rsc, pfmt);
976 enum a3xx_msaa_samples samples =
977 fd_msaa_samples(rsc->base.nr_samples);
978 uint32_t tile_mode = fd_resource_tile_mode(&rsc->base, psurf->u.tex.level);
979
980 OUT_REG(ring,
981 A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
982 .color_format = format, .color_swap = swap, .flags = ubwc_enabled),
983 A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
984 A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
985 A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
986
987 OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
988
989 if (ubwc_enabled) {
990 OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
991 fd6_emit_flag_reference(ring, rsc,
992 psurf->u.tex.level, psurf->u.tex.first_layer);
993 }
994
995 fd6_emit_blit(batch, ring);
996 }
997
998 static void
999 emit_restore_blit(struct fd_batch *batch,
1000 struct fd_ringbuffer *ring,
1001 uint32_t base,
1002 struct pipe_surface *psurf,
1003 unsigned buffer)
1004 {
1005 bool stencil = (buffer == FD_BUFFER_STENCIL);
1006
1007 OUT_REG(ring, A6XX_RB_BLIT_INFO(
1008 .gmem = true, .unk0 = true,
1009 .depth = (buffer == FD_BUFFER_DEPTH),
1010 .integer = util_format_is_pure_integer(psurf->format)));
1011
1012 emit_blit(batch, ring, base, psurf, stencil);
1013 }
1014
1015 static void
1016 emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1017 {
1018 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1019 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1020 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1021
1022 uint32_t buffers = batch->fast_cleared;
1023
1024 if (buffers & PIPE_CLEAR_COLOR) {
1025
1026 for (int i = 0; i < pfb->nr_cbufs; i++) {
1027 union pipe_color_union *color = &batch->clear_color[i];
1028 union util_color uc = {0};
1029
1030 if (!pfb->cbufs[i])
1031 continue;
1032
1033 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1034 continue;
1035
1036 enum pipe_format pfmt = pfb->cbufs[i]->format;
1037
1038 // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1039 union pipe_color_union swapped;
1040 switch (fd6_pipe2swap(pfmt)) {
1041 case WZYX:
1042 swapped.ui[0] = color->ui[0];
1043 swapped.ui[1] = color->ui[1];
1044 swapped.ui[2] = color->ui[2];
1045 swapped.ui[3] = color->ui[3];
1046 break;
1047 case WXYZ:
1048 swapped.ui[2] = color->ui[0];
1049 swapped.ui[1] = color->ui[1];
1050 swapped.ui[0] = color->ui[2];
1051 swapped.ui[3] = color->ui[3];
1052 break;
1053 case ZYXW:
1054 swapped.ui[3] = color->ui[0];
1055 swapped.ui[0] = color->ui[1];
1056 swapped.ui[1] = color->ui[2];
1057 swapped.ui[2] = color->ui[3];
1058 break;
1059 case XYZW:
1060 swapped.ui[3] = color->ui[0];
1061 swapped.ui[2] = color->ui[1];
1062 swapped.ui[1] = color->ui[2];
1063 swapped.ui[0] = color->ui[3];
1064 break;
1065 }
1066
1067 util_pack_color_union(pfmt, &uc, &swapped);
1068
1069 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1070 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1071 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1072 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
1073
1074 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1075 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1076 A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1077
1078 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1079 OUT_RING(ring, gmem->cbuf_base[i]);
1080
1081 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1082 OUT_RING(ring, 0);
1083
1084 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1085 OUT_RING(ring, uc.ui[0]);
1086 OUT_RING(ring, uc.ui[1]);
1087 OUT_RING(ring, uc.ui[2]);
1088 OUT_RING(ring, uc.ui[3]);
1089
1090 fd6_emit_blit(batch, ring);
1091 }
1092 }
1093
1094 const bool has_depth = pfb->zsbuf;
1095 const bool has_separate_stencil =
1096 has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1097
1098 /* First clear depth or combined depth/stencil. */
1099 if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1100 (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1101 enum pipe_format pfmt = pfb->zsbuf->format;
1102 uint32_t clear_value;
1103 uint32_t mask = 0;
1104
1105 if (has_separate_stencil) {
1106 pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1107 clear_value = util_pack_z(pfmt, batch->clear_depth);
1108 } else {
1109 pfmt = pfb->zsbuf->format;
1110 clear_value = util_pack_z_stencil(pfmt, batch->clear_depth,
1111 batch->clear_stencil);
1112 }
1113
1114 if (buffers & PIPE_CLEAR_DEPTH)
1115 mask |= 0x1;
1116
1117 if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1118 mask |= 0x2;
1119
1120 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1121 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1122 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1123 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
1124
1125 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1126 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1127 // XXX UNK0 for separate stencil ??
1128 A6XX_RB_BLIT_INFO_DEPTH |
1129 A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1130
1131 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1132 OUT_RING(ring, gmem->zsbuf_base[0]);
1133
1134 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1135 OUT_RING(ring, 0);
1136
1137 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1138 OUT_RING(ring, clear_value);
1139
1140 fd6_emit_blit(batch, ring);
1141 }
1142
1143 /* Then clear the separate stencil buffer in case of 32 bit depth
1144 * formats with separate stencil. */
1145 if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1146 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1147 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1148 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1149 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1150
1151 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1152 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1153 //A6XX_RB_BLIT_INFO_UNK0 |
1154 A6XX_RB_BLIT_INFO_DEPTH |
1155 A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1156
1157 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1158 OUT_RING(ring, gmem->zsbuf_base[1]);
1159
1160 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1161 OUT_RING(ring, 0);
1162
1163 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1164 OUT_RING(ring, batch->clear_stencil & 0xff);
1165
1166 fd6_emit_blit(batch, ring);
1167 }
1168 }
1169
1170 /*
1171 * transfer from system memory to gmem
1172 */
1173 static void
1174 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1175 {
1176 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1177 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1178
1179 if (batch->restore & FD_BUFFER_COLOR) {
1180 unsigned i;
1181 for (i = 0; i < pfb->nr_cbufs; i++) {
1182 if (!pfb->cbufs[i])
1183 continue;
1184 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1185 continue;
1186 emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1187 FD_BUFFER_COLOR);
1188 }
1189 }
1190
1191 if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1192 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1193
1194 if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1195 emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1196 FD_BUFFER_DEPTH);
1197 }
1198 if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1199 emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1200 FD_BUFFER_STENCIL);
1201 }
1202 }
1203 }
1204
1205 static void
1206 prepare_tile_setup_ib(struct fd_batch *batch)
1207 {
1208 batch->tile_setup = fd_submit_new_ringbuffer(batch->submit, 0x1000,
1209 FD_RINGBUFFER_STREAMING);
1210
1211 set_blit_scissor(batch, batch->tile_setup);
1212
1213 emit_restore_blits(batch, batch->tile_setup);
1214 emit_clears(batch, batch->tile_setup);
1215 }
1216
1217 /*
1218 * transfer from system memory to gmem
1219 */
1220 static void
1221 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1222 {
1223 }
1224
1225 /* before IB to rendering cmds: */
1226 static void
1227 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1228 {
1229 fd_log(batch, "TILE: START CLEAR/RESTORE");
1230 if (batch->fast_cleared || !use_hw_binning(batch)) {
1231 fd6_emit_ib(batch->gmem, batch->tile_setup);
1232 } else {
1233 emit_conditional_ib(batch, tile, batch->tile_setup);
1234 }
1235 fd_log(batch, "TILE: END CLEAR/RESTORE");
1236 }
1237
1238 static void
1239 emit_resolve_blit(struct fd_batch *batch,
1240 struct fd_ringbuffer *ring,
1241 uint32_t base,
1242 struct pipe_surface *psurf,
1243 unsigned buffer)
1244 {
1245 uint32_t info = 0;
1246 bool stencil = false;
1247
1248 if (!fd_resource(psurf->texture)->valid)
1249 return;
1250
1251 switch (buffer) {
1252 case FD_BUFFER_COLOR:
1253 break;
1254 case FD_BUFFER_STENCIL:
1255 info |= A6XX_RB_BLIT_INFO_UNK0;
1256 stencil = true;
1257 break;
1258 case FD_BUFFER_DEPTH:
1259 info |= A6XX_RB_BLIT_INFO_DEPTH;
1260 break;
1261 }
1262
1263 if (util_format_is_pure_integer(psurf->format))
1264 info |= A6XX_RB_BLIT_INFO_INTEGER;
1265
1266 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1267 OUT_RING(ring, info);
1268
1269 emit_blit(batch, ring, base, psurf, stencil);
1270 }
1271
1272 /*
1273 * transfer from gmem to system memory (ie. normal RAM)
1274 */
1275
1276 static void
1277 prepare_tile_fini_ib(struct fd_batch *batch)
1278 {
1279 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1280 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1281 struct fd_ringbuffer *ring;
1282
1283 batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
1284 FD_RINGBUFFER_STREAMING);
1285 ring = batch->tile_fini;
1286
1287 set_blit_scissor(batch, ring);
1288
1289 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1290 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1291
1292 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1293 emit_resolve_blit(batch, ring,
1294 gmem->zsbuf_base[0], pfb->zsbuf,
1295 FD_BUFFER_DEPTH);
1296 }
1297 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1298 emit_resolve_blit(batch, ring,
1299 gmem->zsbuf_base[1], pfb->zsbuf,
1300 FD_BUFFER_STENCIL);
1301 }
1302 }
1303
1304 if (batch->resolve & FD_BUFFER_COLOR) {
1305 unsigned i;
1306 for (i = 0; i < pfb->nr_cbufs; i++) {
1307 if (!pfb->cbufs[i])
1308 continue;
1309 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1310 continue;
1311 emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1312 FD_BUFFER_COLOR);
1313 }
1314 }
1315 }
1316
1317 static void
1318 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1319 {
1320 if (!use_hw_binning(batch)) {
1321 fd6_emit_ib(batch->gmem, batch->draw);
1322 } else {
1323 emit_conditional_ib(batch, tile, batch->draw);
1324 }
1325 }
1326
1327 static void
1328 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1329 {
1330 struct fd_ringbuffer *ring = batch->gmem;
1331
1332 if (use_hw_binning(batch)) {
1333 /* Conditionally execute if no VSC overflow: */
1334
1335 BEGIN_RING(ring, 7); /* ensure if/else doesn't get split */
1336
1337 OUT_PKT7(ring, CP_REG_TEST, 1);
1338 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1339 A6XX_CP_REG_TEST_0_BIT(0) |
1340 A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
1341
1342 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
1343 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
1344 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(2));
1345
1346 /* if (no overflow) */ {
1347 OUT_PKT7(ring, CP_SET_MARKER, 1);
1348 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1349 }
1350 }
1351
1352 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1353 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1354 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1355 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1356 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1357 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1358
1359 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1360 OUT_RING(ring, 0x0);
1361
1362 emit_marker6(ring, 7);
1363 OUT_PKT7(ring, CP_SET_MARKER, 1);
1364 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1365 emit_marker6(ring, 7);
1366
1367 fd_log(batch, "TILE: START RESOLVE");
1368 if (batch->fast_cleared || !use_hw_binning(batch)) {
1369 fd6_emit_ib(batch->gmem, batch->tile_fini);
1370 } else {
1371 emit_conditional_ib(batch, tile, batch->tile_fini);
1372 }
1373 fd_log(batch, "TILE: END RESOLVE");
1374 }
1375
1376 static void
1377 fd6_emit_tile_fini(struct fd_batch *batch)
1378 {
1379 struct fd_ringbuffer *ring = batch->gmem;
1380
1381 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1382 OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
1383
1384 fd6_emit_lrz_flush(ring);
1385
1386 fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
1387
1388 if (use_hw_binning(batch)) {
1389 check_vsc_overflow(batch->ctx);
1390 }
1391 }
1392
1393 static void
1394 emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1395 {
1396 struct fd_context *ctx = batch->ctx;
1397 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1398
1399 uint32_t buffers = batch->fast_cleared;
1400
1401 if (buffers & PIPE_CLEAR_COLOR) {
1402 for (int i = 0; i < pfb->nr_cbufs; i++) {
1403 union pipe_color_union *color = &batch->clear_color[i];
1404
1405 if (!pfb->cbufs[i])
1406 continue;
1407
1408 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1409 continue;
1410
1411 fd6_clear_surface(ctx, ring,
1412 pfb->cbufs[i], pfb->width, pfb->height, color);
1413 }
1414 }
1415 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1416 union pipe_color_union value = {};
1417
1418 const bool has_depth = pfb->zsbuf;
1419 struct pipe_resource *separate_stencil =
1420 has_depth && fd_resource(pfb->zsbuf->texture)->stencil ?
1421 &fd_resource(pfb->zsbuf->texture)->stencil->base : NULL;
1422
1423 if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1424 (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1425 value.f[0] = batch->clear_depth;
1426 value.ui[1] = batch->clear_stencil;
1427 fd6_clear_surface(ctx, ring,
1428 pfb->zsbuf, pfb->width, pfb->height, &value);
1429 }
1430
1431 if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1432 value.ui[0] = batch->clear_stencil;
1433
1434 struct pipe_surface stencil_surf = *pfb->zsbuf;
1435 stencil_surf.texture = separate_stencil;
1436
1437 fd6_clear_surface(ctx, ring,
1438 &stencil_surf, pfb->width, pfb->height, &value);
1439 }
1440 }
1441
1442 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1443 }
1444
1445 static void
1446 setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
1447 {
1448 struct fd_context *ctx = batch->ctx;
1449
1450 batch->tessfactor_bo = fd_bo_new(ctx->screen->dev,
1451 batch->tessfactor_size,
1452 DRM_FREEDRENO_GEM_TYPE_KMEM, "tessfactor");
1453
1454 batch->tessparam_bo = fd_bo_new(ctx->screen->dev,
1455 batch->tessparam_size,
1456 DRM_FREEDRENO_GEM_TYPE_KMEM, "tessparam");
1457
1458 OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR_LO, 2);
1459 OUT_RELOCW(ring, batch->tessfactor_bo, 0, 0, 0);
1460
1461 batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
1462 OUT_RELOCW(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
1463 OUT_RELOCW(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
1464 }
1465
1466 static void
1467 fd6_emit_sysmem_prep(struct fd_batch *batch)
1468 {
1469 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1470 struct fd_ringbuffer *ring = batch->gmem;
1471
1472 fd6_emit_restore(batch, ring);
1473
1474 if (pfb->width > 0 && pfb->height > 0)
1475 set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1476 else
1477 set_scissor(ring, 0, 0, 0, 0);
1478
1479 set_window_offset(ring, 0, 0);
1480
1481 set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
1482
1483 emit_sysmem_clears(batch, ring);
1484
1485 fd6_emit_lrz_flush(ring);
1486
1487 if (batch->lrz_clear)
1488 fd6_emit_ib(ring, batch->lrz_clear);
1489
1490 emit_marker6(ring, 7);
1491 OUT_PKT7(ring, CP_SET_MARKER, 1);
1492 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1493 emit_marker6(ring, 7);
1494
1495 if (batch->tessellation)
1496 setup_tess_buffers(batch, ring);
1497
1498 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1499 OUT_RING(ring, 0x0);
1500
1501 /* blob controls "local" in IB2, but I think that is not required */
1502 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1503 OUT_RING(ring, 0x1);
1504
1505 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
1506 fd6_cache_inv(batch, ring);
1507
1508 fd_wfi(batch, ring);
1509 OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
1510 OUT_RING(ring, fd6_context(batch->ctx)->magic.RB_CCU_CNTL_bypass);
1511
1512 /* enable stream-out, with sysmem there is only one pass: */
1513 OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
1514 OUT_RING(ring, 0);
1515
1516 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1517 OUT_RING(ring, 0x1);
1518
1519 emit_zs(ring, pfb->zsbuf, NULL);
1520 emit_mrt(ring, pfb, NULL);
1521 emit_msaa(ring, pfb->samples);
1522
1523 update_render_cntl(batch, pfb, false);
1524 }
1525
1526 static void
1527 fd6_emit_sysmem_fini(struct fd_batch *batch)
1528 {
1529 struct fd_ringbuffer *ring = batch->gmem;
1530
1531 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1532 OUT_RING(ring, 0x0);
1533
1534 fd6_emit_lrz_flush(ring);
1535
1536 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1537 }
1538
1539 void
1540 fd6_gmem_init(struct pipe_context *pctx)
1541 {
1542 struct fd_context *ctx = fd_context(pctx);
1543
1544 ctx->emit_tile_init = fd6_emit_tile_init;
1545 ctx->emit_tile_prep = fd6_emit_tile_prep;
1546 ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
1547 ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
1548 ctx->emit_tile = fd6_emit_tile;
1549 ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
1550 ctx->emit_tile_fini = fd6_emit_tile_fini;
1551 ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
1552 ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
1553 }