tgsi_to_nir: Produce optimized NIR for a given pipe_screen.
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_program.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 * Jonathan Marek <jonathan@marek.ca>
26 */
27
28 #include "pipe/p_state.h"
29 #include "util/u_string.h"
30 #include "util/u_memory.h"
31 #include "util/u_inlines.h"
32 #include "util/u_format.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "tgsi/tgsi_parse.h"
35
36 #include "freedreno_program.h"
37
38 #include "ir2.h"
39 #include "fd2_program.h"
40 #include "fd2_texture.h"
41 #include "fd2_util.h"
42 #include "instr-a2xx.h"
43
44 static struct fd2_shader_stateobj *
45 create_shader(struct pipe_context *pctx, gl_shader_stage type)
46 {
47 struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
48 if (!so)
49 return NULL;
50 so->type = type;
51 so->is_a20x = is_a20x(fd_context(pctx)->screen);
52 return so;
53 }
54
55 static void
56 delete_shader(struct fd2_shader_stateobj *so)
57 {
58 if (!so)
59 return;
60 ralloc_free(so->nir);
61 for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
62 free(so->variant[i].info.dwords);
63 free(so);
64 }
65
66 static void
67 emit(struct fd_ringbuffer *ring, gl_shader_stage type,
68 struct ir2_shader_info *info, struct util_dynarray *patches)
69 {
70 unsigned i;
71
72 assert(info->sizedwords);
73
74 OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
75 OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
76 OUT_RING(ring, info->sizedwords);
77
78 if (patches)
79 util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
80
81 for (i = 0; i < info->sizedwords; i++)
82 OUT_RING(ring, info->dwords[i]);
83 }
84
85 static int
86 ir2_glsl_type_size(const struct glsl_type *type)
87 {
88 return glsl_count_attribute_slots(type, false);
89 }
90
91 static void *
92 fd2_fp_state_create(struct pipe_context *pctx,
93 const struct pipe_shader_state *cso)
94 {
95 struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
96 if (!so)
97 return NULL;
98
99 if (cso->type == PIPE_SHADER_IR_NIR) {
100 so->nir = cso->ir.nir;
101 NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
102 (nir_lower_io_options)0);
103 } else {
104 assert(cso->type == PIPE_SHADER_IR_TGSI);
105 so->nir = ir2_tgsi_to_nir(cso->tokens, pctx->screen);
106 }
107
108 if (ir2_optimize_nir(so->nir, true))
109 goto fail;
110
111 so->first_immediate = so->nir->num_uniforms;
112
113 ir2_compile(so, 0, NULL);
114
115 ralloc_free(so->nir);
116 so->nir = NULL;
117 return so;
118
119 fail:
120 delete_shader(so);
121 return NULL;
122 }
123
124 static void
125 fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
126 {
127 struct fd2_shader_stateobj *so = hwcso;
128 delete_shader(so);
129 }
130
131 static void *
132 fd2_vp_state_create(struct pipe_context *pctx,
133 const struct pipe_shader_state *cso)
134 {
135 struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
136 if (!so)
137 return NULL;
138
139 if (cso->type == PIPE_SHADER_IR_NIR) {
140 so->nir = cso->ir.nir;
141 NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
142 (nir_lower_io_options)0);
143 } else {
144 assert(cso->type == PIPE_SHADER_IR_TGSI);
145 so->nir = ir2_tgsi_to_nir(cso->tokens, pctx->screen);
146 }
147
148 if (ir2_optimize_nir(so->nir, true))
149 goto fail;
150
151 so->first_immediate = so->nir->num_uniforms;
152
153 /* compile binning variant now */
154 ir2_compile(so, 0, NULL);
155
156 return so;
157
158 fail:
159 delete_shader(so);
160 return NULL;
161 }
162
163 static void
164 fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
165 {
166 struct fd2_shader_stateobj *so = hwcso;
167 delete_shader(so);
168 }
169
170 static void
171 patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
172 instr_fetch_vtx_t *instr, uint16_t dst_swiz)
173 {
174 struct pipe_vertex_buffer *vb =
175 &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
176 enum pipe_format format = elem->src_format;
177 const struct util_format_description *desc =
178 util_format_description(format);
179 unsigned j;
180
181 /* Find the first non-VOID channel. */
182 for (j = 0; j < 4; j++)
183 if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
184 break;
185
186 instr->format = fd2_pipe2surface(format);
187 instr->num_format_all = !desc->channel[j].normalized;
188 instr->format_comp_all = desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
189 instr->stride = vb->stride;
190 instr->offset = elem->src_offset;
191
192 unsigned swiz = 0;
193 for (int i = 0; i < 4; i++) {
194 unsigned s = dst_swiz >> i*3 & 7;
195 swiz |= (s >= 4 ? s : desc->swizzle[s]) << i*3;
196 }
197 instr->dst_swiz = swiz;
198 }
199
200 static void
201 patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
202 struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
203 {
204 for (int i = 0; i < info->num_fetch_instrs; i++) {
205 struct ir2_fetch_info *fi = &info->fetch_info[i];
206
207 instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
208 if (instr->opc == VTX_FETCH) {
209 unsigned idx = (instr->vtx.const_index - 20) * 3 +
210 instr->vtx.const_index_sel;
211 patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
212 continue;
213 }
214
215 assert(instr->opc == TEX_FETCH);
216 instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
217 instr->tex.src_swiz = fi->tex.src_swiz;
218 if (fd2_texture_swap_xy(tex, fi->tex.samp_id)) {
219 unsigned x = instr->tex.src_swiz;
220 instr->tex.src_swiz = (x & 0x30) | (x & 3) << 2 | (x >> 2 & 3);
221 }
222 }
223 }
224
225 void
226 fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
227 struct fd_program_stateobj *prog)
228 {
229 struct fd2_shader_stateobj *fp = NULL, *vp;
230 struct ir2_shader_info *fpi, *vpi;
231 struct ir2_frag_linkage *f;
232 uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
233 enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
234 bool binning = (ctx->batch && ring == ctx->batch->binning);
235 unsigned variant = 0;
236
237 vp = prog->vp;
238
239 /* find variant matching the linked fragment shader */
240 if (!binning) {
241 fp = prog->fp;
242 for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
243 /* if checked all variants, compile a new variant */
244 if (!vp->variant[variant].info.sizedwords) {
245 ir2_compile(vp, variant, fp);
246 break;
247 }
248
249 /* check if fragment shader linkage matches */
250 if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
251 sizeof(struct ir2_frag_linkage)))
252 break;
253 }
254 assert(variant < ARRAY_SIZE(vp->variant));
255 }
256
257 vpi = &vp->variant[variant].info;
258 fpi = &fp->variant[0].info;
259 f = &fp->variant[0].f;
260
261 /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
262 if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
263 patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
264 if (fp)
265 patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
266 }
267
268 emit(ring, MESA_SHADER_VERTEX, vpi,
269 binning ? &ctx->batch->shader_patches : NULL);
270
271 if (fp) {
272 emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
273 fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
274 vs_export = MAX2(1, f->inputs_count) - 1;
275 }
276
277 vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
278
279 if (vp->writes_psize && !binning)
280 mode = POSITION_2_VECTORS_SPRITE;
281
282 /* set register to use for param (fragcoord/pointcoord/frontfacing) */
283 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
284 OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
285 OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
286 COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
287 /* we need SCREEN_XY for both fragcoord and frontfacing */
288 A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
289
290 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
291 OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
292 OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
293 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
294 A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
295 A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
296 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
297 A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
298 A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
299 COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
300 COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
301 }
302
303 void
304 fd2_prog_init(struct pipe_context *pctx)
305 {
306 struct fd_context *ctx = fd_context(pctx);
307 struct fd_program_stateobj *prog;
308 struct fd2_shader_stateobj *so;
309 struct ir2_shader_info *info;
310 instr_fetch_vtx_t *instr;
311
312 pctx->create_fs_state = fd2_fp_state_create;
313 pctx->delete_fs_state = fd2_fp_state_delete;
314
315 pctx->create_vs_state = fd2_vp_state_create;
316 pctx->delete_vs_state = fd2_vp_state_delete;
317
318 fd_prog_init(pctx);
319
320 /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
321
322 prog = &ctx->solid_prog;
323 so = prog->vp;
324 ir2_compile(prog->vp, 1, prog->fp);
325
326 #define IR2_FETCH_SWIZ_XY01 0xb08
327 #define IR2_FETCH_SWIZ_XYZ1 0xa88
328
329 info = &so->variant[1].info;
330
331 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
332 instr->const_index = 26;
333 instr->const_index_sel = 0;
334 instr->format = FMT_32_32_32_FLOAT;
335 instr->format_comp_all = false;
336 instr->stride = 12;
337 instr->num_format_all = true;
338 instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
339
340 prog = &ctx->blit_prog[0];
341 so = prog->vp;
342 ir2_compile(prog->vp, 1, prog->fp);
343
344 info = &so->variant[1].info;
345
346 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
347 instr->const_index = 26;
348 instr->const_index_sel = 1;
349 instr->format = FMT_32_32_FLOAT;
350 instr->format_comp_all = false;
351 instr->stride = 8;
352 instr->num_format_all = false;
353 instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
354
355 instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
356 instr->const_index = 26;
357 instr->const_index_sel = 0;
358 instr->format = FMT_32_32_32_FLOAT;
359 instr->format_comp_all = false;
360 instr->stride = 12;
361 instr->num_format_all = false;
362 instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
363 }