freedreno/ir3: debug cleanup
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_program.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 * Jonathan Marek <jonathan@marek.ca>
26 */
27
28 #include "pipe/p_state.h"
29 #include "util/u_string.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "util/u_format.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_parse.h"
35 #include "nir/tgsi_to_nir.h"
36
37 #include "freedreno_program.h"
38
39 #include "ir2.h"
40 #include "fd2_program.h"
41 #include "fd2_texture.h"
42 #include "fd2_util.h"
43 #include "instr-a2xx.h"
44
45 static struct fd2_shader_stateobj *
46 create_shader(struct pipe_context *pctx, gl_shader_stage type)
47 {
48 struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
49 if (!so)
50 return NULL;
51 so->type = type;
52 so->is_a20x = is_a20x(fd_context(pctx)->screen);
53 return so;
54 }
55
56 static void
57 delete_shader(struct fd2_shader_stateobj *so)
58 {
59 if (!so)
60 return;
61 ralloc_free(so->nir);
62 for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
63 free(so->variant[i].info.dwords);
64 free(so);
65 }
66
67 static void
68 emit(struct fd_ringbuffer *ring, gl_shader_stage type,
69 struct ir2_shader_info *info, struct util_dynarray *patches)
70 {
71 unsigned i;
72
73 assert(info->sizedwords);
74
75 OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
76 OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
77 OUT_RING(ring, info->sizedwords);
78
79 if (patches)
80 util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
81
82 for (i = 0; i < info->sizedwords; i++)
83 OUT_RING(ring, info->dwords[i]);
84 }
85
86 static int
87 ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
88 {
89 return glsl_count_attribute_slots(type, false);
90 }
91
92 static void *
93 fd2_fp_state_create(struct pipe_context *pctx,
94 const struct pipe_shader_state *cso)
95 {
96 struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
97 if (!so)
98 return NULL;
99
100 so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
101 tgsi_to_nir(cso->tokens, pctx->screen);
102
103 NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
104 (nir_lower_io_options)0);
105
106 if (ir2_optimize_nir(so->nir, true))
107 goto fail;
108
109 so->first_immediate = so->nir->num_uniforms;
110
111 ir2_compile(so, 0, NULL);
112
113 ralloc_free(so->nir);
114 so->nir = NULL;
115 return so;
116
117 fail:
118 delete_shader(so);
119 return NULL;
120 }
121
122 static void
123 fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
124 {
125 struct fd2_shader_stateobj *so = hwcso;
126 delete_shader(so);
127 }
128
129 static void *
130 fd2_vp_state_create(struct pipe_context *pctx,
131 const struct pipe_shader_state *cso)
132 {
133 struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
134 if (!so)
135 return NULL;
136
137 so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
138 tgsi_to_nir(cso->tokens, pctx->screen);
139
140 NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
141 (nir_lower_io_options)0);
142
143 if (ir2_optimize_nir(so->nir, true))
144 goto fail;
145
146 so->first_immediate = so->nir->num_uniforms;
147
148 /* compile binning variant now */
149 ir2_compile(so, 0, NULL);
150
151 return so;
152
153 fail:
154 delete_shader(so);
155 return NULL;
156 }
157
158 static void
159 fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
160 {
161 struct fd2_shader_stateobj *so = hwcso;
162 delete_shader(so);
163 }
164
165 static void
166 patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
167 instr_fetch_vtx_t *instr, uint16_t dst_swiz)
168 {
169 struct pipe_vertex_buffer *vb =
170 &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
171 enum pipe_format format = elem->src_format;
172 const struct util_format_description *desc =
173 util_format_description(format);
174 unsigned j;
175
176 /* Find the first non-VOID channel. */
177 for (j = 0; j < 4; j++)
178 if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
179 break;
180
181 instr->format = fd2_pipe2surface(format);
182 instr->num_format_all = !desc->channel[j].normalized;
183 instr->format_comp_all = desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
184 instr->stride = vb->stride;
185 instr->offset = elem->src_offset;
186
187 unsigned swiz = 0;
188 for (int i = 0; i < 4; i++) {
189 unsigned s = dst_swiz >> i*3 & 7;
190 swiz |= (s >= 4 ? s : desc->swizzle[s]) << i*3;
191 }
192 instr->dst_swiz = swiz;
193 }
194
195 static void
196 patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
197 struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
198 {
199 for (int i = 0; i < info->num_fetch_instrs; i++) {
200 struct ir2_fetch_info *fi = &info->fetch_info[i];
201
202 instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
203 if (instr->opc == VTX_FETCH) {
204 unsigned idx = (instr->vtx.const_index - 20) * 3 +
205 instr->vtx.const_index_sel;
206 patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
207 continue;
208 }
209
210 assert(instr->opc == TEX_FETCH);
211 instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
212 instr->tex.src_swiz = fi->tex.src_swiz;
213 }
214 }
215
216 void
217 fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
218 struct fd_program_stateobj *prog)
219 {
220 struct fd2_shader_stateobj *fp = NULL, *vp;
221 struct ir2_shader_info *fpi, *vpi;
222 struct ir2_frag_linkage *f;
223 uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
224 enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
225 bool binning = (ctx->batch && ring == ctx->batch->binning);
226 unsigned variant = 0;
227
228 vp = prog->vs;
229
230 /* find variant matching the linked fragment shader */
231 if (!binning) {
232 fp = prog->fs;
233 for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
234 /* if checked all variants, compile a new variant */
235 if (!vp->variant[variant].info.sizedwords) {
236 ir2_compile(vp, variant, fp);
237 break;
238 }
239
240 /* check if fragment shader linkage matches */
241 if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
242 sizeof(struct ir2_frag_linkage)))
243 break;
244 }
245 assert(variant < ARRAY_SIZE(vp->variant));
246 }
247
248 vpi = &vp->variant[variant].info;
249 fpi = &fp->variant[0].info;
250 f = &fp->variant[0].f;
251
252 /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
253 if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
254 patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
255 if (fp)
256 patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
257 }
258
259 emit(ring, MESA_SHADER_VERTEX, vpi,
260 binning ? &ctx->batch->shader_patches : NULL);
261
262 if (fp) {
263 emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
264 fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
265 vs_export = MAX2(1, f->inputs_count) - 1;
266 }
267
268 vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
269
270 if (vp->writes_psize && !binning)
271 mode = POSITION_2_VECTORS_SPRITE;
272
273 /* set register to use for param (fragcoord/pointcoord/frontfacing) */
274 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
275 OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
276 OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
277 COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
278 /* we need SCREEN_XY for both fragcoord and frontfacing */
279 A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
280
281 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
282 OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
283 OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
284 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
285 A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
286 A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
287 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
288 A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
289 A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
290 COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
291 COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
292 }
293
294 void
295 fd2_prog_init(struct pipe_context *pctx)
296 {
297 struct fd_context *ctx = fd_context(pctx);
298 struct fd_program_stateobj *prog;
299 struct fd2_shader_stateobj *so;
300 struct ir2_shader_info *info;
301 instr_fetch_vtx_t *instr;
302
303 pctx->create_fs_state = fd2_fp_state_create;
304 pctx->delete_fs_state = fd2_fp_state_delete;
305
306 pctx->create_vs_state = fd2_vp_state_create;
307 pctx->delete_vs_state = fd2_vp_state_delete;
308
309 fd_prog_init(pctx);
310
311 /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
312
313 prog = &ctx->solid_prog;
314 so = prog->vs;
315 ir2_compile(prog->vs, 1, prog->fs);
316
317 #define IR2_FETCH_SWIZ_XY01 0xb08
318 #define IR2_FETCH_SWIZ_XYZ1 0xa88
319
320 info = &so->variant[1].info;
321
322 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
323 instr->const_index = 26;
324 instr->const_index_sel = 0;
325 instr->format = FMT_32_32_32_FLOAT;
326 instr->format_comp_all = false;
327 instr->stride = 12;
328 instr->num_format_all = true;
329 instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
330
331 prog = &ctx->blit_prog[0];
332 so = prog->vs;
333 ir2_compile(prog->vs, 1, prog->fs);
334
335 info = &so->variant[1].info;
336
337 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
338 instr->const_index = 26;
339 instr->const_index_sel = 1;
340 instr->format = FMT_32_32_FLOAT;
341 instr->format_comp_all = false;
342 instr->stride = 8;
343 instr->num_format_all = false;
344 instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
345
346 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
347 instr->const_index = 26;
348 instr->const_index_sel = 0;
349 instr->format = FMT_32_32_32_FLOAT;
350 instr->format_comp_all = false;
351 instr->stride = 12;
352 instr->num_format_all = false;
353 instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
354 }