freedreno/a3xx: add support to emulate GL_CLAMP
[mesa.git] / src / gallium / drivers / freedreno / a3xx / fd3_draw.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_prim.h"
33 #include "util/u_format.h"
34
35 #include "freedreno_state.h"
36 #include "freedreno_resource.h"
37
38 #include "fd3_draw.h"
39 #include "fd3_context.h"
40 #include "fd3_emit.h"
41 #include "fd3_program.h"
42 #include "fd3_util.h"
43 #include "fd3_zsa.h"
44
45
46 static void
47 emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
48 struct ir3_shader_key key)
49 {
50 struct fd_vertex_stateobj *vtx = ctx->vtx;
51 struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
52 struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
53 unsigned i;
54
55 if (!vtx->num_elements)
56 return;
57
58 for (i = 0; i < vtx->num_elements; i++) {
59 struct pipe_vertex_element *elem = &vtx->pipe[i];
60 struct pipe_vertex_buffer *vb =
61 &vertexbuf->vb[elem->vertex_buffer_index];
62 bufs[i].offset = vb->buffer_offset + elem->src_offset;
63 bufs[i].stride = vb->stride;
64 bufs[i].prsc = vb->buffer;
65 bufs[i].format = elem->src_format;
66 }
67
68 fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
69 bufs, vtx->num_elements);
70 }
71
72 static void
73 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
74 struct fd_ringbuffer *ring, unsigned dirty, struct ir3_shader_key key)
75 {
76 fd3_emit_state(ctx, ring, info, &ctx->prog, key, dirty);
77
78 if (dirty & FD_DIRTY_VTXBUF)
79 emit_vertexbufs(ctx, ring, key);
80
81 OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
82 OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
83
84 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
85 OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */
86 OUT_RING(ring, info->max_index); /* VFD_INDEX_MAX */
87 OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
88 OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */
89
90 OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
91 OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
92 info->restart_index : 0xffffffff);
93
94 fd_draw_emit(ctx, ring,
95 key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
96 info);
97 }
98
99 static void
100 fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
101 {
102 unsigned dirty = ctx->dirty;
103 struct fd3_context *fd3_ctx = fd3_context(ctx);
104 struct ir3_shader_key key = {
105 /* do binning pass first: */
106 .binning_pass = true,
107 .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
108 .alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
109 // TODO set .half_precision based on render target format,
110 // ie. float16 and smaller use half, float32 use full..
111 .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
112 .vsaturate_s = fd3_ctx->vsaturate_s,
113 .vsaturate_t = fd3_ctx->vsaturate_t,
114 .vsaturate_r = fd3_ctx->vsaturate_r,
115 .fsaturate_s = fd3_ctx->fsaturate_s,
116 .fsaturate_t = fd3_ctx->fsaturate_t,
117 .fsaturate_r = fd3_ctx->fsaturate_r,
118 };
119
120 draw_impl(ctx, info, ctx->binning_ring,
121 dirty & ~(FD_DIRTY_BLEND), key);
122 /* and now regular (non-binning) pass: */
123 key.binning_pass = false;
124 draw_impl(ctx, info, ctx->ring, dirty, key);
125 }
126
127 /* binning pass cmds for a clear:
128 * NOTE: newer blob drivers don't use binning for clear, which is probably
129 * preferable since it is low vtx count. However that doesn't seem to
130 * actually work for me. Not sure if it is depending on support for
131 * clear pass (rather than using solid-fill shader), or something else
132 * that newer blob is doing differently. Once that is figured out, we
133 * can remove fd3_clear_binning().
134 */
135 static void
136 fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
137 {
138 struct fd3_context *fd3_ctx = fd3_context(ctx);
139 struct fd_ringbuffer *ring = ctx->binning_ring;
140 struct ir3_shader_key key = {
141 .binning_pass = true,
142 .half_precision = true,
143 };
144
145 fd3_emit_state(ctx, ring, NULL, &ctx->solid_prog, key, dirty);
146
147 fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
148 (struct fd3_vertex_buf[]) {{
149 .prsc = fd3_ctx->solid_vbuf,
150 .stride = 12,
151 .format = PIPE_FORMAT_R32G32B32_FLOAT,
152 }}, 1);
153
154 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
155 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
156 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
157 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
158 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
159 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
160 OUT_RING(ring, 0); /* VFD_INDEX_MIN */
161 OUT_RING(ring, 2); /* VFD_INDEX_MAX */
162 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
163 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
164 OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
165 OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
166
167 fd_event_write(ctx, ring, PERFCOUNTER_STOP);
168
169 fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
170 DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
171 }
172
173 static void
174 fd3_clear(struct fd_context *ctx, unsigned buffers,
175 const union pipe_color_union *color, double depth, unsigned stencil)
176 {
177 struct fd3_context *fd3_ctx = fd3_context(ctx);
178 struct fd_ringbuffer *ring = ctx->ring;
179 unsigned dirty = ctx->dirty;
180 unsigned ce, i;
181 struct ir3_shader_key key = {
182 .half_precision = true,
183 };
184
185 dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
186 dirty |= FD_DIRTY_PROG;
187
188 fd3_clear_binning(ctx, dirty);
189
190 /* emit generic state now: */
191 fd3_emit_state(ctx, ring, NULL, &ctx->solid_prog, key, dirty);
192
193 OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
194 OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
195 A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
196
197 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
198 OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
199 &fd3_ctx->rbrc_patches);
200
201 if (buffers & PIPE_CLEAR_DEPTH) {
202 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
203 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
204 A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
205 A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
206
207 fd_wfi(ctx, ring);
208 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
209 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
210 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
211 ctx->dirty |= FD_DIRTY_VIEWPORT;
212 } else {
213 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
214 OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
215 }
216
217 if (buffers & PIPE_CLEAR_STENCIL) {
218 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
219 OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
220 A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
221 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
222 OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
223 A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
224 0xff000000 | // XXX ???
225 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
226
227 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
228 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
229 A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
230 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
231 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
232 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
233 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
234 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
235 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
236 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
237 } else {
238 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
239 OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
240 A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
241 A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
242 OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
243 A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
244 A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
245
246 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
247 OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
248 A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
249 A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
250 A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
251 A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
252 A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
253 A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
254 A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
255 }
256
257 if (buffers & PIPE_CLEAR_COLOR) {
258 ce = 0xf;
259 } else {
260 ce = 0x0;
261 }
262
263 for (i = 0; i < 4; i++) {
264 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
265 OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
266 A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
267 A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
268
269 OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
270 OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
271 A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
272 A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
273 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
274 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
275 A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
276 A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
277 }
278
279 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
280 OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
281
282 fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
283 (struct fd3_vertex_buf[]) {{
284 .prsc = fd3_ctx->solid_vbuf,
285 .stride = 12,
286 .format = PIPE_FORMAT_R32G32B32_FLOAT,
287 }}, 1);
288
289 fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
290
291 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
292 OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
293 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
294 A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
295 A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
296 OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
297 OUT_RING(ring, 0); /* VFD_INDEX_MIN */
298 OUT_RING(ring, 2); /* VFD_INDEX_MAX */
299 OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
300 OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
301 OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
302 OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
303
304 fd_event_write(ctx, ring, PERFCOUNTER_STOP);
305
306 fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
307 DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
308 }
309
310 void
311 fd3_draw_init(struct pipe_context *pctx)
312 {
313 struct fd_context *ctx = fd_context(pctx);
314 ctx->draw = fd3_draw;
315 ctx->clear = fd3_clear;
316 }