freedreno: a2xx: minor solid_vertexbuf fixups
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_draw.c
1 /*
2 * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31
32 #include "freedreno_state.h"
33 #include "freedreno_resource.h"
34
35 #include "fd2_draw.h"
36 #include "fd2_context.h"
37 #include "fd2_emit.h"
38 #include "fd2_program.h"
39 #include "fd2_util.h"
40 #include "fd2_zsa.h"
41
42
43 static void
44 emit_cacheflush(struct fd_ringbuffer *ring)
45 {
46 unsigned i;
47
48 for (i = 0; i < 12; i++) {
49 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
50 OUT_RING(ring, CACHE_FLUSH);
51 }
52 }
53
54 static void
55 emit_vertexbufs(struct fd_context *ctx)
56 {
57 struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
58 struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
59 struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
60 unsigned i;
61
62 if (!vtx->num_elements)
63 return;
64
65 for (i = 0; i < vtx->num_elements; i++) {
66 struct pipe_vertex_element *elem = &vtx->pipe[i];
67 struct pipe_vertex_buffer *vb =
68 &vertexbuf->vb[elem->vertex_buffer_index];
69 bufs[i].offset = vb->buffer_offset;
70 bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
71 bufs[i].prsc = vb->buffer.resource;
72 }
73
74 // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
75 // CONST(20,0) (or CONST(26,0) in soliv_vp)
76
77 fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
78 fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
79 }
80
81 static void
82 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
83 struct fd_ringbuffer *ring, unsigned index_offset, bool binning)
84 {
85 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
86 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
87 OUT_RING(ring, info->index_size ? 0 : info->start);
88
89 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
90 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
91
92 if (is_a20x(ctx->screen)) {
93 /* wait for DMA to finish and
94 * dummy draw one triangle with indexes 0,0,0.
95 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
96 *
97 * this workaround is for a HW bug related to DMA alignment:
98 * it is necessary for indexed draws and possibly also
99 * draws that read binning data
100 */
101 OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
102 OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
103 OUT_RING(ring, 0x00000000);
104 OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
105 OUT_RING(ring, 0x00000001);
106
107 OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
108 OUT_RING(ring, 0x00000000);
109 OUT_RING(ring, 0x0003c004);
110 OUT_RING(ring, 0x00000000);
111 OUT_RING(ring, 0x00000003);
112 OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 0);
113 OUT_RING(ring, 0x00000006);
114 } else {
115 OUT_WFI (ring);
116
117 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
118 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
119 OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
120 OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
121 }
122
123 /* binning shader will take offset from C64 */
124 if (binning && is_a20x(ctx->screen)) {
125 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
126 OUT_RING(ring, 0x00000180);
127 OUT_RING(ring, fui(ctx->batch->num_vertices));
128 OUT_RING(ring, fui(0.0f));
129 OUT_RING(ring, fui(0.0f));
130 OUT_RING(ring, fui(0.0f));
131 }
132
133 enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
134 if (binning || info->mode == PIPE_PRIM_POINTS)
135 vismode = IGNORE_VISIBILITY;
136
137 fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
138 vismode, info, index_offset);
139
140 if (is_a20x(ctx->screen)) {
141 /* not sure why this is required, but it fixes some hangs */
142 OUT_WFI(ring);
143 } else {
144 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
145 OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
146 OUT_RING(ring, 0x00000000);
147 }
148
149 emit_cacheflush(ring);
150 }
151
152
153 static bool
154 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
155 unsigned index_offset)
156 {
157 if (!ctx->prog.fp || !ctx->prog.vp)
158 return false;
159
160 if (ctx->dirty & FD_DIRTY_VTXBUF)
161 emit_vertexbufs(ctx);
162
163 if (fd_binning_enabled)
164 fd2_emit_state_binning(ctx, ctx->dirty);
165
166 fd2_emit_state(ctx, ctx->dirty);
167
168 /* a2xx can draw only 65535 vertices at once
169 * on a22x the field in the draw command is 32bits but seems limited too
170 * using a limit of 32k because it fixes an unexplained hang
171 * 32766 works for all primitives (multiple of 2 and 3)
172 */
173 if (pinfo->count > 32766) {
174 static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
175 [0 ... PIPE_PRIM_MAX - 1] = 32766,
176 [PIPE_PRIM_LINE_STRIP] = 32765,
177 [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
178
179 /* needs more work */
180 [PIPE_PRIM_TRIANGLE_FAN] = 0,
181 [PIPE_PRIM_LINE_LOOP] = 0,
182 };
183
184 struct pipe_draw_info info = *pinfo;
185 unsigned count = info.count;
186 unsigned step = step_tbl[info.mode];
187 unsigned num_vertices = ctx->batch->num_vertices;
188
189 if (!step)
190 return false;
191
192 for (; count + step > 32766; count -= step) {
193 info.count = MIN2(count, 32766);
194 draw_impl(ctx, &info, ctx->batch->draw, index_offset, false);
195 draw_impl(ctx, &info, ctx->batch->binning, index_offset, true);
196 info.start += step;
197 ctx->batch->num_vertices += step;
198 }
199 /* changing this value is a hack, restore it */
200 ctx->batch->num_vertices = num_vertices;
201 } else {
202 draw_impl(ctx, pinfo, ctx->batch->draw, index_offset, false);
203 draw_impl(ctx, pinfo, ctx->batch->binning, index_offset, true);
204 }
205
206 fd_context_all_clean(ctx);
207
208 return true;
209 }
210
211 static void
212 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
213 unsigned buffers, bool fast_clear)
214 {
215 struct fd_context *ctx = batch->ctx;
216 struct fd2_context *fd2_ctx = fd2_context(ctx);
217 uint32_t reg;
218
219 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
220 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
221 }, 1);
222
223 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
224 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
225 OUT_RING(ring, 0);
226
227 fd2_program_emit(ctx, ring, &ctx->solid_prog);
228
229 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
230 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
231
232 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
233 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
234 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
235 reg = 0;
236 if (buffers & PIPE_CLEAR_DEPTH) {
237 reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
238 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
239 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
240 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
241 }
242 if (buffers & PIPE_CLEAR_STENCIL) {
243 reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
244 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
245 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
246 }
247 OUT_RING(ring, reg);
248 }
249
250 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
251 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
252 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
253 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
254 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
255 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
256 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
257
258 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
259 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
260 OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
261 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
262 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
263 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
264 (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
265
266 if (fast_clear) {
267 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
268 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
269 OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
270 }
271
272 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
273 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
274 OUT_RING(ring, 0x0000ffff);
275
276 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
277 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
278 if (buffers & PIPE_CLEAR_COLOR) {
279 OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
280 A2XX_RB_COLOR_MASK_WRITE_GREEN |
281 A2XX_RB_COLOR_MASK_WRITE_BLUE |
282 A2XX_RB_COLOR_MASK_WRITE_ALPHA);
283 } else {
284 OUT_RING(ring, 0x0);
285 }
286
287 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
288 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
289 OUT_RING(ring, 0);
290
291 if (is_a20x(batch->ctx->screen))
292 return;
293
294 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
295 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
296 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
297 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
298
299 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
300 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
301 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
302 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
303
304 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
305 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
306 OUT_RING(ring, 0x00000084);
307
308 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
309 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
310 OUT_RING(ring, 0x0000028f);
311 }
312
313 static void
314 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
315 {
316 if (is_a20x(ctx->screen))
317 return;
318
319 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
321 OUT_RING(ring, 0x00000000);
322
323 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
325 OUT_RING(ring, 0x00000000);
326
327 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
329 OUT_RING(ring, 0x0000003b);
330 }
331
332 static void
333 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
334 uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
335 {
336 BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
337
338 /* zero values are patched in */
339 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
340 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
341 OUT_RINGP(ring, patch_type, &batch->gmem_patches);
342 OUT_RING(ring, 0);
343
344 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
345 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
346 OUT_RING(ring, 0x8000 | 32);
347 OUT_RING(ring, 0);
348 OUT_RING(ring, 0);
349
350 /* set fill values */
351 if (!is_a20x(batch->ctx->screen)) {
352 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
353 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
354 OUT_RING(ring, color_clear);
355
356 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
357 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
358 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
359 A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
360
361 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
362 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
363 OUT_RING(ring, depth_clear);
364 } else {
365 const float sc = 1.0f / 255.0f;
366
367 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
368 OUT_RING(ring, 0x00000480);
369 OUT_RING(ring, fui((float) (color_clear >> 0 & 0xff) * sc));
370 OUT_RING(ring, fui((float) (color_clear >> 8 & 0xff) * sc));
371 OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc));
372 OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc));
373
374 // XXX if using float the rounding error breaks it..
375 float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff);
376 assert((unsigned) (((double) depth * (double) 0xffffff)) ==
377 (depth_clear >> 8));
378
379 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
380 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
381 OUT_RING(ring, fui(0.0f));
382 OUT_RING(ring, fui(depth));
383
384 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
385 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
386 OUT_RING(ring, 0xff000000 |
387 A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
388 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
389 OUT_RING(ring, 0xff000000 |
390 A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
391 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
392 }
393
394 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
395 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
396 }
397
398 static bool
399 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
400 const union pipe_color_union *color, double depth, unsigned stencil)
401 {
402 /* using 4x MSAA allows clearing ~2x faster
403 * then we can use higher bpp clearing to clear lower bpp
404 * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
405 * note: its possible to clear with 32_32_32_32 format but its not faster
406 * note: fast clear doesn't work with sysmem rendering
407 * (sysmem rendering is disabled when clear is used)
408 *
409 * we only have 16-bit / 32-bit color formats
410 * and 16-bit / 32-bit depth formats
411 * so there are only a few possible combinations
412 *
413 * if the bpp of the color/depth doesn't match
414 * we clear with depth/color individually
415 */
416 struct fd2_context *fd2_ctx = fd2_context(ctx);
417 struct fd_batch *batch = ctx->batch;
418 struct fd_ringbuffer *ring = batch->draw;
419 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
420 uint32_t color_clear = 0, depth_clear = 0;
421 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
422 int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
423 int color_size = -1;
424
425 /* TODO: need to test performance on a22x */
426 if (!is_a20x(ctx->screen))
427 return false;
428
429 if (buffers & PIPE_CLEAR_COLOR)
430 color_size = util_format_get_blocksizebits(format) == 32;
431
432 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
433 depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
434
435 assert(color_size >= 0 || depth_size >= 0);
436
437 /* when clearing 24_8, depth/stencil must be both cleared
438 * TODO: if buffer isn't attached we can clear it anyway
439 */
440 if (depth_size == 1 && !(buffers & PIPE_CLEAR_STENCIL) != !(buffers & PIPE_CLEAR_DEPTH))
441 return false;
442
443 if (color_size == 0) {
444 color_clear = pack_rgba(format, color->f);
445 color_clear = (color_clear << 16) | (color_clear & 0xffff);
446 } else if (color_size == 1) {
447 color_clear = pack_rgba(format, color->f);
448 }
449
450 if (depth_size == 0) {
451 depth_clear = (uint32_t)(0xffff * depth);
452 depth_clear |= depth_clear << 16;
453 } else if (depth_size == 1) {
454 depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
455 depth_clear |= (stencil & 0xff);
456 }
457
458 /* disable "window" scissor.. */
459 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
460 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
461 OUT_RING(ring, xy2d(0, 0));
462 OUT_RING(ring, xy2d(0x7fff, 0x7fff));
463
464 /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
465 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
466 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
467 OUT_RING(ring, fui(4096.0));
468 OUT_RING(ring, fui(4096.0));
469 OUT_RING(ring, fui(4096.0));
470 OUT_RING(ring, fui(4096.0));
471
472 clear_state(batch, ring, ~0u, true);
473
474 if (color_size >= 0 && depth_size != color_size)
475 clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR);
476
477 if (depth_size >= 0 && depth_size != color_size)
478 clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH);
479
480 if (depth_size == color_size)
481 clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
482
483 clear_state_restore(ctx, ring);
484
485 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
486 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
487 OUT_RING(ring, 0);
488
489 /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
490 * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
491 * the value is read from byte offset 60 in the given bo
492 */
493 OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
494 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
495 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
496 OUT_RING(ring, 1);
497
498 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
499 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
500 OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
501 OUT_RING(ring, 0);
502 OUT_RING(ring, 0);
503 return true;
504 }
505
506 static bool
507 fd2_clear(struct fd_context *ctx, unsigned buffers,
508 const union pipe_color_union *color, double depth, unsigned stencil)
509 {
510 struct fd_ringbuffer *ring = ctx->batch->draw;
511 struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
512
513 if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
514 goto dirty;
515
516 /* set clear value */
517 if (is_a20x(ctx->screen)) {
518 if (buffers & PIPE_CLEAR_COLOR) {
519 /* C0 used by fragment shader */
520 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
521 OUT_RING(ring, 0x00000480);
522 OUT_RING(ring, color->ui[0]);
523 OUT_RING(ring, color->ui[1]);
524 OUT_RING(ring, color->ui[2]);
525 OUT_RING(ring, color->ui[3]);
526 }
527
528 if (buffers & PIPE_CLEAR_DEPTH) {
529 /* use viewport to set depth value */
530 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
531 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
532 OUT_RING(ring, fui(0.0f));
533 OUT_RING(ring, fui(depth));
534 }
535
536 if (buffers & PIPE_CLEAR_STENCIL) {
537 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
538 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
539 OUT_RING(ring, 0xff000000 |
540 A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
541 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
542 OUT_RING(ring, 0xff000000 |
543 A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
544 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
545 }
546 } else {
547 if (buffers & PIPE_CLEAR_COLOR) {
548 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
549 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
550 OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
551 }
552
553 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
554 uint32_t clear_mask, depth_clear;
555 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
556 switch (fd_pipe2depth(fb->zsbuf->format)) {
557 case DEPTHX_24_8:
558 clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
559 ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
560 depth_clear = (((uint32_t)(0xffffff * depth)) << 8) |
561 (stencil & 0xff);
562 break;
563 case DEPTHX_16:
564 clear_mask = 0xf;
565 depth_clear = (uint32_t)(0xffffffff * depth);
566 break;
567 default:
568 debug_assert(0);
569 break;
570 }
571 }
572
573 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
574 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
575 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
576 A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
577
578 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
579 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
580 OUT_RING(ring, depth_clear);
581 }
582 }
583
584 /* scissor state */
585 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
586 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
587 OUT_RING(ring, xy2d(0, 0));
588 OUT_RING(ring, xy2d(fb->width, fb->height));
589
590 /* viewport state */
591 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
592 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
593 OUT_RING(ring, fui((float) fb->width / 2.0));
594 OUT_RING(ring, fui((float) fb->width / 2.0));
595 OUT_RING(ring, fui((float) fb->height / 2.0));
596 OUT_RING(ring, fui((float) fb->height / 2.0));
597
598 /* common state */
599 clear_state(ctx->batch, ring, buffers, false);
600
601 fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
602 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
603
604 clear_state_restore(ctx, ring);
605
606 dirty:
607 ctx->dirty |= FD_DIRTY_ZSA |
608 FD_DIRTY_VIEWPORT |
609 FD_DIRTY_RASTERIZER |
610 FD_DIRTY_SAMPLE_MASK |
611 FD_DIRTY_PROG |
612 FD_DIRTY_CONST |
613 FD_DIRTY_BLEND |
614 FD_DIRTY_FRAMEBUFFER |
615 FD_DIRTY_SCISSOR;
616
617 ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
618 ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
619
620 return true;
621 }
622
623 void
624 fd2_draw_init(struct pipe_context *pctx)
625 {
626 struct fd_context *ctx = fd_context(pctx);
627 ctx->draw_vbo = fd2_draw_vbo;
628 ctx->clear = fd2_clear;
629 }