freedreno: a2xx: minor solid_vertexbuf fixups
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_gmem.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_inlines.h"
31
32 #include "freedreno_draw.h"
33 #include "freedreno_state.h"
34 #include "freedreno_resource.h"
35
36 #include "fd2_gmem.h"
37 #include "fd2_context.h"
38 #include "fd2_emit.h"
39 #include "fd2_program.h"
40 #include "fd2_util.h"
41 #include "fd2_zsa.h"
42 #include "fd2_draw.h"
43 #include "instr-a2xx.h"
44
45 static uint32_t fmt2swap(enum pipe_format format)
46 {
47 switch (format) {
48 case PIPE_FORMAT_B8G8R8A8_UNORM:
49 case PIPE_FORMAT_B8G8R8X8_UNORM:
50 case PIPE_FORMAT_B5G6R5_UNORM:
51 case PIPE_FORMAT_B5G5R5A1_UNORM:
52 case PIPE_FORMAT_B5G5R5X1_UNORM:
53 case PIPE_FORMAT_B4G4R4A4_UNORM:
54 case PIPE_FORMAT_B4G4R4X4_UNORM:
55 /* TODO probably some more.. */
56 return 1;
57 default:
58 return 0;
59 }
60 }
61
62 /* transfer from gmem to system memory (ie. normal RAM) */
63
64 static void
65 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
66 struct pipe_surface *psurf)
67 {
68 struct fd_ringbuffer *ring = batch->gmem;
69 struct fd_resource *rsc = fd_resource(psurf->texture);
70 uint32_t swap = fmt2swap(psurf->format);
71 struct fd_resource_slice *slice =
72 fd_resource_slice(rsc, psurf->u.tex.level);
73 uint32_t offset =
74 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
75
76 assert((slice->pitch & 31) == 0);
77 assert((offset & 0xfff) == 0);
78
79 if (!rsc->valid)
80 return;
81
82 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
83 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
84 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(swap) |
85 A2XX_RB_COLOR_INFO_BASE(base) |
86 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
87
88 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
89 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
90 OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
91 OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
92 OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */
93 OUT_RING(ring, /* RB_COPY_DEST_INFO */
94 A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
95 A2XX_RB_COPY_DEST_INFO_LINEAR |
96 A2XX_RB_COPY_DEST_INFO_SWAP(swap) |
97 A2XX_RB_COPY_DEST_INFO_WRITE_RED |
98 A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
99 A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
100 A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
101
102 if (!is_a20x(batch->ctx->screen)) {
103 OUT_WFI (ring);
104
105 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
106 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
107 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
108 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
109 }
110
111 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
112 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
113 }
114
115 static void
116 fd2_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
117 {
118 struct fd_context *ctx = batch->ctx;
119 struct fd2_context *fd2_ctx = fd2_context(ctx);
120 struct fd_gmem_stateobj *gmem = &ctx->gmem;
121 struct fd_ringbuffer *ring = batch->gmem;
122 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
123
124 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
125 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
126 }, 1);
127
128 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
129 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
130 OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
131
132 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
133 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
134 OUT_RING(ring, 0);
135
136 if (!is_a20x(ctx->screen)) {
137 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
138 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
139 OUT_RING(ring, 0x0000028f);
140 }
141
142 fd2_program_emit(ctx, ring, &ctx->solid_prog);
143
144 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
145 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
146 OUT_RING(ring, 0x0000ffff);
147
148 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
149 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
150 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
151
152 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
153 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
154 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
155 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
156 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
157
158 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
159 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
160 OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
161 OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
162
163 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
164 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
165 OUT_RING(ring, 0x00000000);
166
167 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
168 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
169 OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XSCALE */
170 OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XOFFSET */
171 OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YSCALE */
172 OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YOFFSET */
173
174 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
175 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
176 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
177
178 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
180 OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
181 A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
182
183 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
184 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
185
186 if (batch->resolve & FD_BUFFER_COLOR)
187 emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
188
189 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
190 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
191 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
192
193 if (!is_a20x(ctx->screen)) {
194 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
195 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
196 OUT_RING(ring, 0x0000003b);
197 }
198 }
199
200 /* transfer from system memory to gmem */
201
202 static void
203 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
204 struct pipe_surface *psurf)
205 {
206 struct fd_ringbuffer *ring = batch->gmem;
207 struct fd_resource *rsc = fd_resource(psurf->texture);
208 struct fd_resource_slice *slice =
209 fd_resource_slice(rsc, psurf->u.tex.level);
210 uint32_t offset =
211 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
212 uint32_t swiz;
213
214 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
215 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
216 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
217 A2XX_RB_COLOR_INFO_BASE(base) |
218 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
219
220 swiz = fd2_tex_swiz(psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
221 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
222
223 /* emit fb as a texture: */
224 OUT_PKT3(ring, CP_SET_CONSTANT, 7);
225 OUT_RING(ring, 0x00010000);
226 OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
227 A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
228 A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
229 A2XX_SQ_TEX_0_PITCH(slice->pitch));
230 OUT_RELOC(ring, rsc->bo, offset,
231 fd2_pipe2surface(psurf->format) |
232 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
233 OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
234 A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
235 OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
236 swiz |
237 A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
238 A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
239 OUT_RING(ring, 0x00000000);
240 OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
241
242 if (!is_a20x(batch->ctx->screen)) {
243 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
244 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
245 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
246 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
247 }
248
249 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
250 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
251 }
252
253 static void
254 fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
255 {
256 struct fd_context *ctx = batch->ctx;
257 struct fd2_context *fd2_ctx = fd2_context(ctx);
258 struct fd_gmem_stateobj *gmem = &ctx->gmem;
259 struct fd_ringbuffer *ring = batch->gmem;
260 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
261 unsigned bin_w = tile->bin_w;
262 unsigned bin_h = tile->bin_h;
263 float x0, y0, x1, y1;
264
265 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
266 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
267 { .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
268 }, 2);
269
270 /* write texture coordinates to vertexbuf: */
271 x0 = ((float)tile->xoff) / ((float)pfb->width);
272 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
273 y0 = ((float)tile->yoff) / ((float)pfb->height);
274 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
275 OUT_PKT3(ring, CP_MEM_WRITE, 9);
276 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
277 OUT_RING(ring, fui(x0));
278 OUT_RING(ring, fui(y0));
279 OUT_RING(ring, fui(x1));
280 OUT_RING(ring, fui(y0));
281 OUT_RING(ring, fui(x0));
282 OUT_RING(ring, fui(y1));
283 OUT_RING(ring, fui(x1));
284 OUT_RING(ring, fui(y1));
285
286 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
287 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
288 OUT_RING(ring, 0);
289
290 fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
291
292 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
293 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
294
295 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
296 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
297 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
298
299 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
300 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
301 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
302 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
303 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
304
305 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
306 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
307 OUT_RING(ring, 0x0000ffff);
308
309 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
310 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
311 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(PIPE_FUNC_ALWAYS) |
312 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
313 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
314 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
315 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
316
317 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
318 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
319 OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
320 A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
321 A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
322 A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
323 A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
324 A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
325
326 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
327 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
328 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
329 xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */
330 OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
331
332 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
333 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
334 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XSCALE */
335 OUT_RING(ring, fui((float)bin_w/2.0)); /* PA_CL_VPORT_XOFFSET */
336 OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
337 OUT_RING(ring, fui((float)bin_h/2.0)); /* PA_CL_VPORT_YOFFSET */
338
339 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
340 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
341 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
342 A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
343 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
344 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
345 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
346 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
347
348 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
349 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
350 OUT_RING(ring, 0x00000000);
351
352 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
353 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
354
355 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
356 emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
357
358 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
359 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
360 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
361 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
362 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
363 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
364 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
365 A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
366 A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
367
368 /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
369 }
370
371 static void
372 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
373 {
374 unsigned i;
375
376 if (!is_a20x(batch->ctx->screen)) {
377 /* identical to a3xx */
378 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
379 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
380 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
381 }
382 util_dynarray_resize(&batch->draw_patches, 0);
383 return;
384 }
385
386 if (vismode == USE_VISIBILITY)
387 return;
388
389 for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
390 uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
391 unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
392
393 /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
394 * replace first two DWORDS with NOP and move the rest down
395 * (we don't want to have to move the idx buffer reloc)
396 */
397 ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
398 ptr[1] = 0x00000000;
399
400 ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
401 ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
402 ptr[3] = 0x00000000;
403 }
404 }
405
406 static void
407 fd2_emit_sysmem_prep(struct fd_batch *batch)
408 {
409 struct fd_context *ctx = batch->ctx;
410 struct fd_ringbuffer *ring = batch->gmem;
411 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
412 struct pipe_surface *psurf = pfb->cbufs[0];
413
414 if (!psurf)
415 return;
416
417 struct fd_resource *rsc = fd_resource(psurf->texture);
418 struct fd_resource_slice *slice =
419 fd_resource_slice(rsc, psurf->u.tex.level);
420 uint32_t offset =
421 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
422
423 assert((slice->pitch & 31) == 0);
424 assert((offset & 0xfff) == 0);
425
426 fd2_emit_restore(ctx, ring);
427
428 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
429 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
430 OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch));
431
432 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
433 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
434 OUT_RELOCW(ring, rsc->bo, offset, A2XX_RB_COLOR_INFO_LINEAR |
435 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
436 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
437
438 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
439 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
440 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
441 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
442 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
443
444 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
445 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
446 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
447 A2XX_PA_SC_WINDOW_OFFSET_Y(0));
448
449 patch_draws(batch, IGNORE_VISIBILITY);
450 util_dynarray_resize(&batch->draw_patches, 0);
451 util_dynarray_resize(&batch->shader_patches, 0);
452 }
453
454 /* before first tile */
455 static void
456 fd2_emit_tile_init(struct fd_batch *batch)
457 {
458 struct fd_context *ctx = batch->ctx;
459 struct fd_ringbuffer *ring = batch->gmem;
460 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
461 struct fd_gmem_stateobj *gmem = &ctx->gmem;
462 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
463 uint32_t reg;
464
465 fd2_emit_restore(ctx, ring);
466
467 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
468 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
469 OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
470 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
471 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
472 reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
473 if (pfb->zsbuf)
474 reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
475 OUT_RING(ring, reg); /* RB_DEPTH_INFO */
476
477 /* fast clear patches */
478 int depth_size = -1;
479 int color_size = -1;
480
481 if (pfb->cbufs[0])
482 color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
483
484 if (pfb->zsbuf)
485 depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
486
487 for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
488 struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
489 uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
490 uint32_t size, lines;
491
492 /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
493 switch (patch->val) {
494 case GMEM_PATCH_FASTCLEAR_COLOR:
495 size = align(gmem->bin_w * gmem->bin_h * color_size, 0x4000);
496 lines = size / 1024;
497 depth_base = size / 2;
498 break;
499 case GMEM_PATCH_FASTCLEAR_DEPTH:
500 size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x4000);
501 lines = size / 1024;
502 color_base = depth_base;
503 depth_base = depth_base + size / 2;
504 break;
505 case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
506 lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x4000) / 1024;
507 break;
508 case GMEM_PATCH_RESTORE_INFO:
509 patch->cs[0] = gmem->bin_w;
510 patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
511 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
512 patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
513 if (pfb->zsbuf)
514 patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
515 continue;
516 default:
517 continue;
518 }
519
520 patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
521 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
522 patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
523 A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
524 patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
525 A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
526 }
527 util_dynarray_resize(&batch->gmem_patches, 0);
528
529 /* set to zero, for some reason hardware doesn't like certain values */
530 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
531 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
532 OUT_RING(ring, 0);
533
534 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
535 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
536 OUT_RING(ring, 0);
537
538 if (is_a20x(ctx->screen) && fd_binning_enabled && gmem->num_vsc_pipes) {
539 /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
540 *
541 * in the shader compiler, we guarantee that the shader ends with
542 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
543 *
544 * the since patches point only to dwords and CFs are 1.5 dwords
545 * the patch is aligned and might point to a ALLOC CF
546 */
547 for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
548 instr_cf_t *cf =
549 *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
550 if (cf->opc == ALLOC)
551 cf++;
552 assert(cf->opc == EXEC);
553 assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END);
554 cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
555 }
556
557 patch_draws(batch, USE_VISIBILITY);
558
559 /* initialize shader constants for the binning memexport */
560 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
561 OUT_RING(ring, 0x0000000C);
562
563 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
564 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
565
566 /* XXX we know how large this needs to be..
567 * should do some sort of realloc
568 * it should be ctx->batch->num_vertices bytes large
569 * with this size it will break with more than 256k vertices..
570 */
571 if (!pipe->bo) {
572 pipe->bo = fd_bo_new(ctx->dev, 0x40000,
573 DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
574 }
575
576 /* memory export address (export32):
577 * .x: (base_address >> 2) | 0x40000000 (?)
578 * .y: index (float) - set by shader
579 * .z: 0x4B00D000 (?)
580 * .w: 0x4B000000 (?) | max_index (?)
581 */
582 OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2);
583 OUT_RING(ring, 0x00000000);
584 OUT_RING(ring, 0x4B00D000);
585 OUT_RING(ring, 0x4B000000 | 0x40000);
586 }
587
588 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
589 OUT_RING(ring, 0x0000018C);
590
591 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
592 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
593 float off_x, off_y, mul_x, mul_y;
594
595 /* const to tranform from [-1,1] to bin coordinates for this pipe
596 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
597 * 8 possible values on x/y axis,
598 * to clip at binning stage: only use center 6x6
599 * TODO: set the z parameters too so that hw binning
600 * can clip primitives in Z too
601 */
602
603 mul_x = 1.0f / (float) (gmem->bin_w * 8);
604 mul_y = 1.0f / (float) (gmem->bin_h * 8);
605 off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
606 off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
607
608 OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
609 OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
610 OUT_RING(ring, 0x3f000000);
611 OUT_RING(ring, fui(0.0f));
612
613 OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
614 OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
615 OUT_RING(ring, fui(0.0f));
616 OUT_RING(ring, fui(0.0f));
617 }
618
619 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
620 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
621 OUT_RING(ring, 0);
622
623 ctx->emit_ib(ring, batch->binning);
624
625 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
626 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
627 OUT_RING(ring, 0x00000002);
628 } else {
629 patch_draws(batch, IGNORE_VISIBILITY);
630 }
631
632 util_dynarray_resize(&batch->draw_patches, 0);
633 util_dynarray_resize(&batch->shader_patches, 0);
634 }
635
636 /* before mem2gmem */
637 static void
638 fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
639 {
640 struct fd_ringbuffer *ring = batch->gmem;
641 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
642 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
643
644 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
645 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
646 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
647 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
648
649 /* setup screen scissor for current tile (same for mem2gmem): */
650 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
651 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
652 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
653 A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
654 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
655 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
656 }
657
658 /* before IB to rendering cmds: */
659 static void
660 fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
661 {
662 struct fd_context *ctx = batch->ctx;
663 struct fd2_context *fd2_ctx = fd2_context(ctx);
664 struct fd_ringbuffer *ring = batch->gmem;
665 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
666 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
667
668 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
669 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
670 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
671 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
672
673 /* setup window scissor and offset for current tile (different
674 * from mem2gmem):
675 */
676 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
677 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
678 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
679 A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
680
681 /* write SCISSOR_BR to memory so fast clear path can restore from it */
682 OUT_PKT3(ring, CP_MEM_WRITE, 2);
683 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
684 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
685 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
686
687 /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
688 if (is_a20x(ctx->screen)) {
689 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
690 OUT_RING(ring, 0x00000580);
691 OUT_RING(ring, fui(tile->xoff));
692 OUT_RING(ring, fui(tile->yoff));
693 OUT_RING(ring, fui(0.0f));
694 OUT_RING(ring, fui(0.0f));
695 }
696
697 if (is_a20x(ctx->screen) && fd_binning_enabled) {
698 struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
699
700 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
701 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
702 OUT_RING(ring, tile->n);
703
704 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
705 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
706 OUT_RING(ring, tile->n);
707
708 /* TODO only emit this when tile->p changes */
709 OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
710 OUT_RELOC(ring, pipe->bo, 0, 0, 0);
711 }
712 }
713
714 void
715 fd2_gmem_init(struct pipe_context *pctx)
716 {
717 struct fd_context *ctx = fd_context(pctx);
718
719 ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
720 ctx->emit_tile_init = fd2_emit_tile_init;
721 ctx->emit_tile_prep = fd2_emit_tile_prep;
722 ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
723 ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
724 ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
725 }