00381df503487fb1383600a0ba8c31d3125e0640
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_draw.c
1 /*
2 * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31
32 #include "freedreno_state.h"
33 #include "freedreno_resource.h"
34
35 #include "fd2_draw.h"
36 #include "fd2_context.h"
37 #include "fd2_emit.h"
38 #include "fd2_program.h"
39 #include "fd2_util.h"
40 #include "fd2_zsa.h"
41
42
43 static void
44 emit_cacheflush(struct fd_ringbuffer *ring)
45 {
46 unsigned i;
47
48 for (i = 0; i < 12; i++) {
49 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
50 OUT_RING(ring, CACHE_FLUSH);
51 }
52 }
53
54 static void
55 emit_vertexbufs(struct fd_context *ctx)
56 {
57 struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
58 struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
59 struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
60 unsigned i;
61
62 if (!vtx->num_elements)
63 return;
64
65 for (i = 0; i < vtx->num_elements; i++) {
66 struct pipe_vertex_element *elem = &vtx->pipe[i];
67 struct pipe_vertex_buffer *vb =
68 &vertexbuf->vb[elem->vertex_buffer_index];
69 bufs[i].offset = vb->buffer_offset;
70 bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
71 bufs[i].prsc = vb->buffer.resource;
72 }
73
74 // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
75 // CONST(20,0) (or CONST(26,0) in soliv_vp)
76
77 fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
78 }
79
80 static void
81 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
82 struct fd_ringbuffer *ring, unsigned index_offset)
83 {
84 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
85 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
86 OUT_RING(ring, info->index_size ? 0 : info->start);
87
88 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
89 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
90
91 if (is_a20x(ctx->screen)) {
92 /* wait for DMA to finish and
93 * dummy draw one triangle with indexes 0,0,0.
94 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
95 *
96 * this workaround is for a HW bug related to DMA alignment:
97 * it is necessary for indexed draws and possibly also
98 * draws that read binning data
99 */
100 OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
101 OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
102 OUT_RING(ring, 0x00000000);
103 OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
104 OUT_RING(ring, 0x00000001);
105
106 OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
107 OUT_RING(ring, 0x00000000);
108 OUT_RING(ring, 0x0003c004);
109 OUT_RING(ring, 0x00000000);
110 OUT_RING(ring, 0x00000003);
111 OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 0x80, 0, 0);
112 OUT_RING(ring, 0x00000006);
113 } else {
114 OUT_WFI (ring);
115
116 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
117 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
118 OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
119 OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
120 }
121
122 fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
123 IGNORE_VISIBILITY, info, index_offset);
124
125 if (is_a20x(ctx->screen)) {
126 /* not sure why this is required, but it fixes some hangs */
127 OUT_WFI(ring);
128 } else {
129 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
130 OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
131 OUT_RING(ring, 0x00000000);
132 }
133
134 emit_cacheflush(ring);
135 }
136
137
138 static bool
139 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
140 unsigned index_offset)
141 {
142 if (!ctx->prog.fp || !ctx->prog.vp)
143 return false;
144
145 if (ctx->dirty & FD_DIRTY_VTXBUF)
146 emit_vertexbufs(ctx);
147
148 fd2_emit_state(ctx, ctx->dirty);
149
150 /* a2xx can draw only 65535 vertices at once
151 * on a22x the field in the draw command is 32bits but seems limited too
152 * using a limit of 32k because it fixes an unexplained hang
153 * 32766 works for all primitives (multiple of 2 and 3)
154 */
155 if (pinfo->count > 32766) {
156 static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
157 [0 ... PIPE_PRIM_MAX - 1] = 32766,
158 [PIPE_PRIM_LINE_STRIP] = 32765,
159 [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
160
161 /* needs more work */
162 [PIPE_PRIM_TRIANGLE_FAN] = 0,
163 [PIPE_PRIM_LINE_LOOP] = 0,
164 };
165
166 struct pipe_draw_info info = *pinfo;
167 unsigned count = info.count;
168 unsigned step = step_tbl[info.mode];
169
170 if (!step)
171 return false;
172
173 for (; count + step > 32766; count -= step) {
174 info.count = MIN2(count, 32766);
175 draw_impl(ctx, &info, ctx->batch->draw, index_offset);
176 info.start += step;
177 }
178 } else {
179 draw_impl(ctx, pinfo, ctx->batch->draw, index_offset);
180 }
181
182 fd_context_all_clean(ctx);
183
184 return true;
185 }
186
187
188 static bool
189 fd2_clear(struct fd_context *ctx, unsigned buffers,
190 const union pipe_color_union *color, double depth, unsigned stencil)
191 {
192 struct fd2_context *fd2_ctx = fd2_context(ctx);
193 struct fd_ringbuffer *ring = ctx->batch->draw;
194 struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
195 uint32_t reg, colr = 0;
196
197 if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
198 colr = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f);
199
200 /* emit generic state now: */
201 fd2_emit_state(ctx, ctx->dirty &
202 (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT |
203 FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
204
205 fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
206 { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
207 }, 1);
208
209 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
210 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
211 OUT_RING(ring, 0);
212
213 if (!is_a20x(ctx->screen)) {
214 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
215 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
216 OUT_RING(ring, 0x0000028f);
217 }
218
219 fd2_program_emit(ring, &ctx->solid_prog);
220
221 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
222 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
223
224 if (is_a20x(ctx->screen)) {
225 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
226 OUT_RING(ring, 0x00000480);
227 OUT_RING(ring, color->ui[0]);
228 OUT_RING(ring, color->ui[1]);
229 OUT_RING(ring, color->ui[2]);
230 OUT_RING(ring, color->ui[3]);
231 } else {
232 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
233 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
234 OUT_RING(ring, colr);
235 }
236
237 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
238 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
239 OUT_RING(ring, 0x00000084);
240
241 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
242 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
243 reg = 0;
244 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
245 reg |= A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
246 switch (fd_pipe2depth(fb->zsbuf->format)) {
247 case DEPTHX_24_8:
248 if (buffers & PIPE_CLEAR_DEPTH)
249 reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xe);
250 if (buffers & PIPE_CLEAR_STENCIL)
251 reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0x1);
252 break;
253 case DEPTHX_16:
254 if (buffers & PIPE_CLEAR_DEPTH)
255 reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf);
256 break;
257 default:
258 debug_assert(0);
259 break;
260 }
261 }
262 OUT_RING(ring, reg);
263
264 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
265 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
266 reg = 0;
267 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
268 switch (fd_pipe2depth(fb->zsbuf->format)) {
269 case DEPTHX_24_8:
270 reg = (((uint32_t)(0xffffff * depth)) << 8) |
271 (stencil & 0xff);
272 break;
273 case DEPTHX_16:
274 reg = (uint32_t)(0xffffffff * depth);
275 break;
276 default:
277 debug_assert(0);
278 break;
279 }
280 }
281 OUT_RING(ring, reg);
282
283 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
284 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
285 reg = 0;
286 if (buffers & PIPE_CLEAR_DEPTH) {
287 reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
288 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
289 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
290 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
291 }
292 if (buffers & PIPE_CLEAR_STENCIL) {
293 reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
294 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
295 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
296 }
297 OUT_RING(ring, reg);
298
299 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
300 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
301 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
302 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
303
304 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
305 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
306 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
307 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
308 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
309 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
310 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
311
312 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
313 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
314 OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
315 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
316 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
317 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
318
319 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
321 OUT_RING(ring, 0x0000ffff);
322
323 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
324 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
325 OUT_RING(ring, xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */
326 OUT_RING(ring, xy2d(fb->width, /* PA_SC_WINDOW_SCISSOR_BR */
327 fb->height));
328
329 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
330 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
331 if (buffers & PIPE_CLEAR_COLOR) {
332 OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
333 A2XX_RB_COLOR_MASK_WRITE_GREEN |
334 A2XX_RB_COLOR_MASK_WRITE_BLUE |
335 A2XX_RB_COLOR_MASK_WRITE_ALPHA);
336 } else {
337 OUT_RING(ring, 0x0);
338 }
339
340 if (!is_a20x(ctx->screen)) {
341 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
342 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
343 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
344 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
345 }
346
347 fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
348 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
349
350 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
351 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
352 OUT_RING(ring, 0x00000000);
353
354 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
355 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
356 OUT_RING(ring, 0x00000000);
357
358 if (!is_a20x(ctx->screen)) {
359 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
360 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
361 OUT_RING(ring, 0x0000003b);
362 }
363
364 ctx->dirty |= FD_DIRTY_ZSA |
365 FD_DIRTY_VIEWPORT |
366 FD_DIRTY_RASTERIZER |
367 FD_DIRTY_SAMPLE_MASK |
368 FD_DIRTY_PROG |
369 FD_DIRTY_CONST |
370 FD_DIRTY_BLEND |
371 FD_DIRTY_FRAMEBUFFER;
372
373 ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
374 ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
375
376 return true;
377 }
378
379 void
380 fd2_draw_init(struct pipe_context *pctx)
381 {
382 struct fd_context *ctx = fd_context(pctx);
383 ctx->draw_vbo = fd2_draw_vbo;
384 ctx->clear = fd2_clear;
385 }