nir: allow specifying filter callback in lower_alu_to_scalar
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_gmem.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_inlines.h"
31
32 #include "freedreno_draw.h"
33 #include "freedreno_state.h"
34 #include "freedreno_resource.h"
35
36 #include "fd2_gmem.h"
37 #include "fd2_context.h"
38 #include "fd2_emit.h"
39 #include "fd2_program.h"
40 #include "fd2_util.h"
41 #include "fd2_zsa.h"
42 #include "fd2_draw.h"
43 #include "instr-a2xx.h"
44
45 static uint32_t fmt2swap(enum pipe_format format)
46 {
47 switch (format) {
48 case PIPE_FORMAT_B8G8R8A8_UNORM:
49 case PIPE_FORMAT_B8G8R8X8_UNORM:
50 case PIPE_FORMAT_B5G6R5_UNORM:
51 case PIPE_FORMAT_B5G5R5A1_UNORM:
52 case PIPE_FORMAT_B5G5R5X1_UNORM:
53 case PIPE_FORMAT_B4G4R4A4_UNORM:
54 case PIPE_FORMAT_B4G4R4X4_UNORM:
55 /* TODO probably some more.. */
56 return 1;
57 default:
58 return 0;
59 }
60 }
61
62 static bool
63 use_hw_binning(struct fd_batch *batch)
64 {
65 struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
66
67 /* we hardcoded a limit of 8 "pipes", we can increase this limit
68 * at the cost of a slightly larger command stream
69 * however very few cases will need more than 8
70 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
71 */
72 if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
73 return false;
74
75 /* only a20x hw binning is implement
76 * a22x is more like a3xx, but perhaps the a20x works? (TODO)
77 */
78 if (!is_a20x(batch->ctx->screen))
79 return false;
80
81 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
82 }
83
84 /* transfer from gmem to system memory (ie. normal RAM) */
85
86 static void
87 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
88 struct pipe_surface *psurf)
89 {
90 struct fd_ringbuffer *ring = batch->tile_fini;
91 struct fd_resource *rsc = fd_resource(psurf->texture);
92 uint32_t swap = fmt2swap(psurf->format);
93 struct fd_resource_slice *slice =
94 fd_resource_slice(rsc, psurf->u.tex.level);
95 uint32_t offset =
96 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
97
98 assert((slice->pitch & 31) == 0);
99 assert((offset & 0xfff) == 0);
100
101 if (!rsc->valid)
102 return;
103
104 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
105 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
106 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(swap) |
107 A2XX_RB_COLOR_INFO_BASE(base) |
108 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
109
110 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
111 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
112 OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
113 OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
114 OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */
115 OUT_RING(ring, /* RB_COPY_DEST_INFO */
116 A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
117 COND(!rsc->tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
118 A2XX_RB_COPY_DEST_INFO_SWAP(swap) |
119 A2XX_RB_COPY_DEST_INFO_WRITE_RED |
120 A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
121 A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
122 A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
123
124 if (!is_a20x(batch->ctx->screen)) {
125 OUT_WFI (ring);
126
127 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
128 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
129 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
130 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
131 }
132
133 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
134 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
135 }
136
137 static void
138 prepare_tile_fini_ib(struct fd_batch *batch)
139 {
140 struct fd_context *ctx = batch->ctx;
141 struct fd2_context *fd2_ctx = fd2_context(ctx);
142 struct fd_gmem_stateobj *gmem = &ctx->gmem;
143 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
144 struct fd_ringbuffer *ring;
145
146 batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
147 FD_RINGBUFFER_STREAMING);
148 ring = batch->tile_fini;
149
150 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
151 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
152 }, 1);
153
154 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
155 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
156 OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
157
158 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
159 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
160 OUT_RING(ring, 0);
161
162 if (!is_a20x(ctx->screen)) {
163 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
164 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
165 OUT_RING(ring, 0x0000028f);
166 }
167
168 fd2_program_emit(ctx, ring, &ctx->solid_prog);
169
170 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
171 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
172 OUT_RING(ring, 0x0000ffff);
173
174 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
175 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
176 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
177
178 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
180 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
181 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
182 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
183
184 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
185 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
186 OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
187 OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
188
189 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
190 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
191 OUT_RING(ring, 0x00000000);
192
193 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
194 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
195 OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XSCALE */
196 OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XOFFSET */
197 OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YSCALE */
198 OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YOFFSET */
199
200 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
201 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
202 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
203
204 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
205 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
206
207 if (batch->resolve & FD_BUFFER_COLOR)
208 emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
209
210 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
211 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
212 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
213
214 if (!is_a20x(ctx->screen)) {
215 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
216 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
217 OUT_RING(ring, 0x0000003b);
218 }
219 }
220
221 static void
222 fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
223 {
224 fd2_emit_ib(batch->gmem, batch->tile_fini);
225 }
226
227 /* transfer from system memory to gmem */
228
229 static void
230 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
231 struct pipe_surface *psurf)
232 {
233 struct fd_ringbuffer *ring = batch->gmem;
234 struct fd_resource *rsc = fd_resource(psurf->texture);
235 struct fd_resource_slice *slice =
236 fd_resource_slice(rsc, psurf->u.tex.level);
237 uint32_t offset =
238 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
239 uint32_t swiz;
240
241 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
242 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
243 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
244 A2XX_RB_COLOR_INFO_BASE(base) |
245 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
246
247 swiz = fd2_tex_swiz(psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
248 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
249
250 /* emit fb as a texture: */
251 OUT_PKT3(ring, CP_SET_CONSTANT, 7);
252 OUT_RING(ring, 0x00010000);
253 OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
254 A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
255 A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
256 A2XX_SQ_TEX_0_PITCH(slice->pitch));
257 OUT_RELOC(ring, rsc->bo, offset,
258 fd2_pipe2surface(psurf->format) |
259 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
260 OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
261 A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
262 OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
263 swiz |
264 A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
265 A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
266 OUT_RING(ring, 0x00000000);
267 OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
268
269 if (!is_a20x(batch->ctx->screen)) {
270 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
271 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
272 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
273 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
274 }
275
276 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
277 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
278 }
279
280 static void
281 fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
282 {
283 struct fd_context *ctx = batch->ctx;
284 struct fd2_context *fd2_ctx = fd2_context(ctx);
285 struct fd_gmem_stateobj *gmem = &ctx->gmem;
286 struct fd_ringbuffer *ring = batch->gmem;
287 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
288 unsigned bin_w = tile->bin_w;
289 unsigned bin_h = tile->bin_h;
290 float x0, y0, x1, y1;
291
292 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
293 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
294 { .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
295 }, 2);
296
297 /* write texture coordinates to vertexbuf: */
298 x0 = ((float)tile->xoff) / ((float)pfb->width);
299 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
300 y0 = ((float)tile->yoff) / ((float)pfb->height);
301 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
302 OUT_PKT3(ring, CP_MEM_WRITE, 7);
303 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
304 OUT_RING(ring, fui(x0));
305 OUT_RING(ring, fui(y0));
306 OUT_RING(ring, fui(x1));
307 OUT_RING(ring, fui(y0));
308 OUT_RING(ring, fui(x0));
309 OUT_RING(ring, fui(y1));
310
311 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
312 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
313 OUT_RING(ring, 0);
314
315 fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
316
317 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
318 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
319
320 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
321 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
322 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
323
324 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
325 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
326 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
327 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
328 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
329
330 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
331 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
332 OUT_RING(ring, 0x0000ffff);
333
334 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
335 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
336 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
337 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
338 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
339 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
340 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
341
342 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
343 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
344 OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
345 A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
346 A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
347 A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
348 A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
349 A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
350
351 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
352 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
353 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
354 xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */
355 OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
356
357 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
358 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
359 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XSCALE */
360 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XOFFSET */
361 OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
362 OUT_RING(ring, fui((float)bin_h/2.0)); /* PA_CL_VPORT_YOFFSET */
363
364 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
365 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
366 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
367 A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
368 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
369 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
370 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
371 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
372
373 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
374 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
375 OUT_RING(ring, 0x00000000);
376
377 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
378 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
379
380 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
381 emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
382
383 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
384 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
385 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
386 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
387 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
388 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
389 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
390 A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
391 A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
392
393 /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
394 }
395
396 static void
397 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
398 {
399 unsigned i;
400
401 if (!is_a20x(batch->ctx->screen)) {
402 /* identical to a3xx */
403 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
404 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
405 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
406 }
407 util_dynarray_clear(&batch->draw_patches);
408 return;
409 }
410
411 if (vismode == USE_VISIBILITY)
412 return;
413
414 for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
415 uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
416 unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
417
418 /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
419 * replace first two DWORDS with NOP and move the rest down
420 * (we don't want to have to move the idx buffer reloc)
421 */
422 ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
423 ptr[1] = 0x00000000;
424
425 ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
426 ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
427 ptr[3] = 0x00000000;
428 }
429 }
430
431 static void
432 fd2_emit_sysmem_prep(struct fd_batch *batch)
433 {
434 struct fd_context *ctx = batch->ctx;
435 struct fd_ringbuffer *ring = batch->gmem;
436 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
437 struct pipe_surface *psurf = pfb->cbufs[0];
438
439 if (!psurf)
440 return;
441
442 struct fd_resource *rsc = fd_resource(psurf->texture);
443 struct fd_resource_slice *slice =
444 fd_resource_slice(rsc, psurf->u.tex.level);
445 uint32_t offset =
446 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
447
448 assert((slice->pitch & 31) == 0);
449 assert((offset & 0xfff) == 0);
450
451 fd2_emit_restore(ctx, ring);
452
453 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
454 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
455 OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch));
456
457 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
458 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
459 OUT_RELOCW(ring, rsc->bo, offset,
460 COND(!rsc->tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
461 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
462 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
463
464 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
465 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
466 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
467 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
468 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
469
470 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
471 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
472 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
473 A2XX_PA_SC_WINDOW_OFFSET_Y(0));
474
475 patch_draws(batch, IGNORE_VISIBILITY);
476 util_dynarray_clear(&batch->draw_patches);
477 util_dynarray_clear(&batch->shader_patches);
478 }
479
480 /* before first tile */
481 static void
482 fd2_emit_tile_init(struct fd_batch *batch)
483 {
484 struct fd_context *ctx = batch->ctx;
485 struct fd_ringbuffer *ring = batch->gmem;
486 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
487 struct fd_gmem_stateobj *gmem = &ctx->gmem;
488 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
489 uint32_t reg;
490
491 fd2_emit_restore(ctx, ring);
492
493 prepare_tile_fini_ib(batch);
494
495 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
496 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
497 OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
498 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
499 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
500 reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
501 if (pfb->zsbuf)
502 reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
503 OUT_RING(ring, reg); /* RB_DEPTH_INFO */
504
505 /* fast clear patches */
506 int depth_size = -1;
507 int color_size = -1;
508
509 if (pfb->cbufs[0])
510 color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
511
512 if (pfb->zsbuf)
513 depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
514
515 for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
516 struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
517 uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
518 uint32_t size, lines;
519
520 /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
521 switch (patch->val) {
522 case GMEM_PATCH_FASTCLEAR_COLOR:
523 size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
524 lines = size / 1024;
525 depth_base = size / 2;
526 break;
527 case GMEM_PATCH_FASTCLEAR_DEPTH:
528 size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
529 lines = size / 1024;
530 color_base = depth_base;
531 depth_base = depth_base + size / 2;
532 break;
533 case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
534 lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
535 break;
536 case GMEM_PATCH_RESTORE_INFO:
537 patch->cs[0] = gmem->bin_w;
538 patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
539 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
540 patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
541 if (pfb->zsbuf)
542 patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
543 continue;
544 default:
545 continue;
546 }
547
548 patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
549 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
550 patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
551 A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
552 patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
553 A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
554 }
555 util_dynarray_clear(&batch->gmem_patches);
556
557 /* set to zero, for some reason hardware doesn't like certain values */
558 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
559 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
560 OUT_RING(ring, 0);
561
562 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
563 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
564 OUT_RING(ring, 0);
565
566 if (use_hw_binning(batch)) {
567 /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
568 *
569 * in the shader compiler, we guarantee that the shader ends with
570 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
571 *
572 * the since patches point only to dwords and CFs are 1.5 dwords
573 * the patch is aligned and might point to a ALLOC CF
574 */
575 for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
576 instr_cf_t *cf =
577 *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
578 if (cf->opc == ALLOC)
579 cf++;
580 assert(cf->opc == EXEC);
581 assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END);
582 cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
583 }
584
585 patch_draws(batch, USE_VISIBILITY);
586
587 /* initialize shader constants for the binning memexport */
588 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
589 OUT_RING(ring, 0x0000000C);
590
591 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
592 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
593
594 /* allocate in 64k increments to avoid reallocs */
595 uint32_t bo_size = align(batch->num_vertices, 0x10000);
596 if (!pipe->bo || fd_bo_size(pipe->bo) < bo_size) {
597 if (pipe->bo)
598 fd_bo_del(pipe->bo);
599 pipe->bo = fd_bo_new(ctx->dev, bo_size,
600 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
601 assert(pipe->bo);
602 }
603
604 /* memory export address (export32):
605 * .x: (base_address >> 2) | 0x40000000 (?)
606 * .y: index (float) - set by shader
607 * .z: 0x4B00D000 (?)
608 * .w: 0x4B000000 (?) | max_index (?)
609 */
610 OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2);
611 OUT_RING(ring, 0x00000000);
612 OUT_RING(ring, 0x4B00D000);
613 OUT_RING(ring, 0x4B000000 | bo_size);
614 }
615
616 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
617 OUT_RING(ring, 0x0000018C);
618
619 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
620 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
621 float off_x, off_y, mul_x, mul_y;
622
623 /* const to tranform from [-1,1] to bin coordinates for this pipe
624 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
625 * 8 possible values on x/y axis,
626 * to clip at binning stage: only use center 6x6
627 * TODO: set the z parameters too so that hw binning
628 * can clip primitives in Z too
629 */
630
631 mul_x = 1.0f / (float) (gmem->bin_w * 8);
632 mul_y = 1.0f / (float) (gmem->bin_h * 8);
633 off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
634 off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
635
636 OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
637 OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
638 OUT_RING(ring, 0x3f000000);
639 OUT_RING(ring, fui(0.0f));
640
641 OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
642 OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
643 OUT_RING(ring, fui(0.0f));
644 OUT_RING(ring, fui(0.0f));
645 }
646
647 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
648 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
649 OUT_RING(ring, 0);
650
651 fd2_emit_ib(ring, batch->binning);
652
653 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
654 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
655 OUT_RING(ring, 0x00000002);
656 } else {
657 patch_draws(batch, IGNORE_VISIBILITY);
658 }
659
660 util_dynarray_clear(&batch->draw_patches);
661 util_dynarray_clear(&batch->shader_patches);
662 }
663
664 /* before mem2gmem */
665 static void
666 fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
667 {
668 struct fd_ringbuffer *ring = batch->gmem;
669 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
670 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
671
672 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
673 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
674 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
675 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
676
677 /* setup screen scissor for current tile (same for mem2gmem): */
678 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
679 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
680 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
681 A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
682 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
683 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
684 }
685
686 /* before IB to rendering cmds: */
687 static void
688 fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
689 {
690 struct fd_context *ctx = batch->ctx;
691 struct fd2_context *fd2_ctx = fd2_context(ctx);
692 struct fd_ringbuffer *ring = batch->gmem;
693 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
694 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
695
696 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
697 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
698 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
699 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
700
701 /* setup window scissor and offset for current tile (different
702 * from mem2gmem):
703 */
704 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
705 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
706 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
707 A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
708
709 /* write SCISSOR_BR to memory so fast clear path can restore from it */
710 OUT_PKT3(ring, CP_MEM_WRITE, 2);
711 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
712 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
713 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
714
715 /* set the copy offset for gmem2mem */
716 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
717 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
718 OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
719 A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
720
721 /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
722 if (is_a20x(ctx->screen)) {
723 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
724 OUT_RING(ring, 0x00000580);
725 OUT_RING(ring, fui(tile->xoff));
726 OUT_RING(ring, fui(tile->yoff));
727 OUT_RING(ring, fui(0.0f));
728 OUT_RING(ring, fui(0.0f));
729 }
730
731 if (use_hw_binning(batch)) {
732 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
733
734 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
735 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
736 OUT_RING(ring, tile->n);
737
738 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
739 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
740 OUT_RING(ring, tile->n);
741
742 /* TODO only emit this when tile->p changes */
743 OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
744 OUT_RELOC(ring, pipe->bo, 0, 0, 0);
745 }
746 }
747
748 void
749 fd2_gmem_init(struct pipe_context *pctx)
750 {
751 struct fd_context *ctx = fd_context(pctx);
752
753 ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
754 ctx->emit_tile_init = fd2_emit_tile_init;
755 ctx->emit_tile_prep = fd2_emit_tile_prep;
756 ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
757 ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
758 ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
759 }