gallivm: work around slow code generated for interleaving 128bit vectors
[mesa.git] / src / gallium / drivers / freedreno / freedreno_program.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "util/u_format.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_parse.h"
36
37 #include "freedreno_program.h"
38 #include "freedreno_compiler.h"
39 #include "freedreno_vbo.h"
40 #include "freedreno_texture.h"
41 #include "freedreno_util.h"
42
43 static struct fd_shader_stateobj *
44 create_shader(enum shader_t type)
45 {
46 struct fd_shader_stateobj *so = CALLOC_STRUCT(fd_shader_stateobj);
47 if (!so)
48 return NULL;
49 so->type = type;
50 return so;
51 }
52
53 static void
54 delete_shader(struct fd_shader_stateobj *so)
55 {
56 ir2_shader_destroy(so->ir);
57 FREE(so->tokens);
58 FREE(so);
59 }
60
61 static struct fd_shader_stateobj *
62 assemble(struct fd_shader_stateobj *so)
63 {
64 free(so->bin);
65 so->bin = ir2_shader_assemble(so->ir, &so->info);
66 if (!so->bin)
67 goto fail;
68
69 if (fd_mesa_debug & FD_DBG_DISASM) {
70 DBG("disassemble: type=%d", so->type);
71 disasm(so->bin, so->info.sizedwords, 0, so->type);
72 }
73
74 return so;
75
76 fail:
77 debug_error("assemble failed!");
78 delete_shader(so);
79 return NULL;
80 }
81
82 static struct fd_shader_stateobj *
83 compile(struct fd_program_stateobj *prog, struct fd_shader_stateobj *so)
84 {
85 int ret;
86
87 if (fd_mesa_debug & FD_DBG_DISASM) {
88 DBG("dump tgsi: type=%d", so->type);
89 tgsi_dump(so->tokens, 0);
90 }
91
92 ret = fd_compile_shader(prog, so);
93 if (ret)
94 goto fail;
95
96 /* NOTE: we don't assemble yet because for VS we don't know the
97 * type information for vertex fetch yet.. so those need to be
98 * patched up later before assembling.
99 */
100
101 so->info.sizedwords = 0;
102
103 return so;
104
105 fail:
106 debug_error("compile failed!");
107 delete_shader(so);
108 return NULL;
109 }
110
111 static void
112 emit(struct fd_ringbuffer *ring, struct fd_shader_stateobj *so)
113 {
114 unsigned i;
115
116 if (so->info.sizedwords == 0)
117 assemble(so);
118
119 OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
120 OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
121 OUT_RING(ring, so->info.sizedwords);
122 for (i = 0; i < so->info.sizedwords; i++)
123 OUT_RING(ring, so->bin[i]);
124 }
125
126 static void *
127 fd_fp_state_create(struct pipe_context *pctx,
128 const struct pipe_shader_state *cso)
129 {
130 struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
131 if (!so)
132 return NULL;
133 so->tokens = tgsi_dup_tokens(cso->tokens);
134 return so;
135 }
136
137 static void
138 fd_fp_state_delete(struct pipe_context *pctx, void *hwcso)
139 {
140 struct fd_shader_stateobj *so = hwcso;
141 delete_shader(so);
142 }
143
144 static void
145 fd_fp_state_bind(struct pipe_context *pctx, void *hwcso)
146 {
147 struct fd_context *ctx = fd_context(pctx);
148 ctx->prog.fp = hwcso;
149 ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
150 ctx->dirty |= FD_DIRTY_PROG;
151 }
152
153 static void *
154 fd_vp_state_create(struct pipe_context *pctx,
155 const struct pipe_shader_state *cso)
156 {
157 struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
158 if (!so)
159 return NULL;
160 so->tokens = tgsi_dup_tokens(cso->tokens);
161 return so;
162 }
163
164 static void
165 fd_vp_state_delete(struct pipe_context *pctx, void *hwcso)
166 {
167 struct fd_shader_stateobj *so = hwcso;
168 delete_shader(so);
169 }
170
171 static void
172 fd_vp_state_bind(struct pipe_context *pctx, void *hwcso)
173 {
174 struct fd_context *ctx = fd_context(pctx);
175 ctx->prog.vp = hwcso;
176 ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
177 ctx->dirty |= FD_DIRTY_PROG;
178 }
179
180 static void
181 patch_vtx_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
182 struct fd_vertex_stateobj *vtx)
183 {
184 unsigned i;
185
186 assert(so->num_vfetch_instrs == vtx->num_elements);
187
188 /* update vtx fetch instructions: */
189 for (i = 0; i < so->num_vfetch_instrs; i++) {
190 struct ir2_instruction *instr = so->vfetch_instrs[i];
191 struct pipe_vertex_element *elem = &vtx->pipe[i];
192 struct pipe_vertex_buffer *vb =
193 &ctx->vertexbuf.vb[elem->vertex_buffer_index];
194 enum pipe_format format = elem->src_format;
195 const struct util_format_description *desc =
196 util_format_description(format);
197 unsigned j;
198
199 /* Find the first non-VOID channel. */
200 for (j = 0; j < 4; j++)
201 if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
202 break;
203
204 /* CI/CIS can probably be set in compiler instead: */
205 instr->fetch.const_idx = 20 + (i / 3);
206 instr->fetch.const_idx_sel = i % 3;
207
208 instr->fetch.fmt = fd_pipe2surface(format);
209 instr->fetch.is_normalized = desc->channel[j].normalized;
210 instr->fetch.is_signed =
211 desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
212 instr->fetch.stride = vb->stride ? : 1;
213 instr->fetch.offset = elem->src_offset;
214
215 for (j = 0; j < 4; j++)
216 instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
217
218 assert(instr->fetch.fmt != FMT_INVALID);
219
220 DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
221 "stride=%d, offset=%d",
222 i, util_format_name(format),
223 instr->fetch.fmt,
224 instr->fetch.const_idx,
225 instr->fetch.const_idx_sel,
226 elem->instance_divisor,
227 instr->regs[0]->swizzle,
228 instr->fetch.stride,
229 instr->fetch.offset);
230 }
231
232 /* trigger re-assemble: */
233 so->info.sizedwords = 0;
234 }
235
236 static void
237 patch_tex_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
238 struct fd_texture_stateobj *tex)
239 {
240 unsigned i;
241
242 /* update tex fetch instructions: */
243 for (i = 0; i < so->num_tfetch_instrs; i++) {
244 struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
245 unsigned samp_id = so->tfetch_instrs[i].samp_id;
246 unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id);
247
248 if (const_idx != instr->fetch.const_idx) {
249 instr->fetch.const_idx = const_idx;
250 /* trigger re-assemble: */
251 so->info.sizedwords = 0;
252 }
253 }
254 }
255
256 void
257 fd_program_validate(struct fd_context *ctx)
258 {
259 struct fd_program_stateobj *prog = &ctx->prog;
260
261 /* if vertex or frag shader is dirty, we may need to recompile. Compile
262 * frag shader first, as that assigns the register slots for exports
263 * from the vertex shader. And therefore if frag shader has changed we
264 * need to recompile both vert and frag shader.
265 */
266 if (prog->dirty & FD_SHADER_DIRTY_FP)
267 compile(prog, prog->fp);
268
269 if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
270 compile(prog, prog->vp);
271
272 if (prog->dirty)
273 ctx->dirty |= FD_DIRTY_PROG;
274
275 prog->dirty = 0;
276
277 /* if necessary, fix up vertex fetch instructions: */
278 if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
279 patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
280
281 /* if necessary, fix up texture fetch instructions: */
282 if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
283 patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
284 patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
285 }
286 }
287
288 void
289 fd_program_emit(struct fd_ringbuffer *ring,
290 struct fd_program_stateobj *prog)
291 {
292 struct ir2_shader_info *vsi = &prog->vp->info;
293 struct ir2_shader_info *fsi = &prog->fp->info;
294 uint8_t vs_gprs, fs_gprs, vs_export;
295
296 emit(ring, prog->vp);
297 emit(ring, prog->fp);
298
299 vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
300 fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
301 vs_export = MAX2(1, prog->num_exports) - 1;
302
303 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
304 OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
305 OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
306 A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
307 A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
308 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
309 A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
310 A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
311 }
312
313 /* Creates shader:
314 * EXEC ADDR(0x2) CNT(0x1)
315 * (S)FETCH: SAMPLE R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
316 * ALLOC PARAM/PIXEL SIZE(0x0)
317 * EXEC_END ADDR(0x3) CNT(0x1)
318 * ALU: MAXv export0 = R0, R0 ; gl_FragColor
319 * NOP
320 */
321 static struct fd_shader_stateobj *
322 create_blit_fp(void)
323 {
324 struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
325 struct ir2_cf *cf;
326 struct ir2_instruction *instr;
327
328 if (!so)
329 return NULL;
330
331 so->ir = ir2_shader_create();
332
333 cf = ir2_cf_create(so->ir, EXEC);
334
335 instr = ir2_instr_create_tex_fetch(cf, 0);
336 ir2_reg_create(instr, 0, "xyzw", 0);
337 ir2_reg_create(instr, 0, "xyx", 0);
338 instr->sync = true;
339
340 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
341 cf = ir2_cf_create(so->ir, EXEC_END);
342
343 instr = ir2_instr_create_alu(cf, MAXv, ~0);
344 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
345 ir2_reg_create(instr, 0, NULL, 0);
346 ir2_reg_create(instr, 0, NULL, 0);
347
348 return assemble(so);
349 }
350
351 /* Creates shader:
352 * EXEC ADDR(0x3) CNT(0x2)
353 * FETCH: VERTEX R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
354 * FETCH: VERTEX R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
355 * ALLOC POSITION SIZE(0x0)
356 * EXEC ADDR(0x5) CNT(0x1)
357 * ALU: MAXv export62 = R2, R2 ; gl_Position
358 * ALLOC PARAM/PIXEL SIZE(0x0)
359 * EXEC_END ADDR(0x6) CNT(0x1)
360 * ALU: MAXv export0 = R1, R1
361 * NOP
362 */
363 static struct fd_shader_stateobj *
364 create_blit_vp(void)
365 {
366 struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
367 struct ir2_cf *cf;
368 struct ir2_instruction *instr;
369
370 if (!so)
371 return NULL;
372
373 so->ir = ir2_shader_create();
374
375 cf = ir2_cf_create(so->ir, EXEC);
376
377 instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
378 instr->fetch.is_normalized = true;
379 ir2_reg_create(instr, 1, "xy01", 0);
380 ir2_reg_create(instr, 0, "x", 0);
381
382 instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
383 instr->fetch.is_normalized = true;
384 ir2_reg_create(instr, 2, "xyz1", 0);
385 ir2_reg_create(instr, 0, "x", 0);
386
387 cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
388 cf = ir2_cf_create(so->ir, EXEC);
389
390 instr = ir2_instr_create_alu(cf, MAXv, ~0);
391 ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
392 ir2_reg_create(instr, 2, NULL, 0);
393 ir2_reg_create(instr, 2, NULL, 0);
394
395 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
396 cf = ir2_cf_create(so->ir, EXEC_END);
397
398 instr = ir2_instr_create_alu(cf, MAXv, ~0);
399 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
400 ir2_reg_create(instr, 1, NULL, 0);
401 ir2_reg_create(instr, 1, NULL, 0);
402
403 return assemble(so);
404 }
405
406 /* Creates shader:
407 * ALLOC PARAM/PIXEL SIZE(0x0)
408 * EXEC_END ADDR(0x1) CNT(0x1)
409 * ALU: MAXv export0 = C0, C0 ; gl_FragColor
410 */
411 static struct fd_shader_stateobj *
412 create_solid_fp(void)
413 {
414 struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
415 struct ir2_cf *cf;
416 struct ir2_instruction *instr;
417
418 if (!so)
419 return NULL;
420
421 so->ir = ir2_shader_create();
422
423 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
424 cf = ir2_cf_create(so->ir, EXEC_END);
425
426 instr = ir2_instr_create_alu(cf, MAXv, ~0);
427 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
428 ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
429 ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
430
431 return assemble(so);
432 }
433
434 /* Creates shader:
435 * EXEC ADDR(0x3) CNT(0x1)
436 * (S)FETCH: VERTEX R1.xyz1 = R0.x FMT_32_32_32_FLOAT
437 * UNSIGNED STRIDE(12) CONST(26, 0)
438 * ALLOC POSITION SIZE(0x0)
439 * EXEC ADDR(0x4) CNT(0x1)
440 * ALU: MAXv export62 = R1, R1 ; gl_Position
441 * ALLOC PARAM/PIXEL SIZE(0x0)
442 * EXEC_END ADDR(0x5) CNT(0x0)
443 */
444 static struct fd_shader_stateobj *
445 create_solid_vp(void)
446 {
447 struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
448 struct ir2_cf *cf;
449 struct ir2_instruction *instr;
450
451 if (!so)
452 return NULL;
453
454 so->ir = ir2_shader_create();
455
456 cf = ir2_cf_create(so->ir, EXEC);
457
458 instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
459 ir2_reg_create(instr, 1, "xyz1", 0);
460 ir2_reg_create(instr, 0, "x", 0);
461
462 cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
463 cf = ir2_cf_create(so->ir, EXEC);
464
465 instr = ir2_instr_create_alu(cf, MAXv, ~0);
466 ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
467 ir2_reg_create(instr, 1, NULL, 0);
468 ir2_reg_create(instr, 1, NULL, 0);
469
470 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
471 cf = ir2_cf_create(so->ir, EXEC_END);
472
473 return assemble(so);
474 }
475
476 void
477 fd_prog_init(struct pipe_context *pctx)
478 {
479 struct fd_context *ctx = fd_context(pctx);
480
481 pctx->create_fs_state = fd_fp_state_create;
482 pctx->bind_fs_state = fd_fp_state_bind;
483 pctx->delete_fs_state = fd_fp_state_delete;
484
485 pctx->create_vs_state = fd_vp_state_create;
486 pctx->bind_vs_state = fd_vp_state_bind;
487 pctx->delete_vs_state = fd_vp_state_delete;
488
489 ctx->solid_prog.fp = create_solid_fp();
490 ctx->solid_prog.vp = create_solid_vp();
491 ctx->blit_prog.fp = create_blit_fp();
492 ctx->blit_prog.vp = create_blit_vp();
493 }
494
495 void
496 fd_prog_fini(struct pipe_context *pctx)
497 {
498 struct fd_context *ctx = fd_context(pctx);
499
500 delete_shader(ctx->solid_prog.vp);
501 delete_shader(ctx->solid_prog.fp);
502 delete_shader(ctx->blit_prog.vp);
503 delete_shader(ctx->blit_prog.fp);
504 }