freedreno: split out WFI helper
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_program.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "util/u_format.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_parse.h"
36
37 #include "fd2_program.h"
38 #include "fd2_compiler.h"
39 #include "fd2_texture.h"
40 #include "fd2_util.h"
41
42 static struct fd2_shader_stateobj *
43 create_shader(enum shader_t type)
44 {
45 struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
46 if (!so)
47 return NULL;
48 so->type = type;
49 return so;
50 }
51
52 static void
53 delete_shader(struct fd2_shader_stateobj *so)
54 {
55 ir2_shader_destroy(so->ir);
56 free(so->tokens);
57 free(so->bin);
58 free(so);
59 }
60
61 static struct fd2_shader_stateobj *
62 assemble(struct fd2_shader_stateobj *so)
63 {
64 free(so->bin);
65 so->bin = ir2_shader_assemble(so->ir, &so->info);
66 if (!so->bin)
67 goto fail;
68
69 if (fd_mesa_debug & FD_DBG_DISASM) {
70 DBG("disassemble: type=%d", so->type);
71 disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
72 }
73
74 return so;
75
76 fail:
77 debug_error("assemble failed!");
78 delete_shader(so);
79 return NULL;
80 }
81
82 static struct fd2_shader_stateobj *
83 compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
84 {
85 int ret;
86
87 if (fd_mesa_debug & FD_DBG_DISASM) {
88 DBG("dump tgsi: type=%d", so->type);
89 tgsi_dump(so->tokens, 0);
90 }
91
92 ret = fd2_compile_shader(prog, so);
93 if (ret)
94 goto fail;
95
96 /* NOTE: we don't assemble yet because for VS we don't know the
97 * type information for vertex fetch yet.. so those need to be
98 * patched up later before assembling.
99 */
100
101 so->info.sizedwords = 0;
102
103 return so;
104
105 fail:
106 debug_error("compile failed!");
107 delete_shader(so);
108 return NULL;
109 }
110
111 static void
112 emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
113 {
114 unsigned i;
115
116 if (so->info.sizedwords == 0)
117 assemble(so);
118
119 OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
120 OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
121 OUT_RING(ring, so->info.sizedwords);
122 for (i = 0; i < so->info.sizedwords; i++)
123 OUT_RING(ring, so->bin[i]);
124 }
125
126 static void *
127 fd2_fp_state_create(struct pipe_context *pctx,
128 const struct pipe_shader_state *cso)
129 {
130 struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
131 if (!so)
132 return NULL;
133 so->tokens = tgsi_dup_tokens(cso->tokens);
134 return so;
135 }
136
137 static void
138 fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
139 {
140 struct fd2_shader_stateobj *so = hwcso;
141 delete_shader(so);
142 }
143
144 static void
145 fd2_fp_state_bind(struct pipe_context *pctx, void *hwcso)
146 {
147 struct fd_context *ctx = fd_context(pctx);
148 ctx->prog.fp = hwcso;
149 ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
150 ctx->dirty |= FD_DIRTY_PROG;
151 }
152
153 static void *
154 fd2_vp_state_create(struct pipe_context *pctx,
155 const struct pipe_shader_state *cso)
156 {
157 struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
158 if (!so)
159 return NULL;
160 so->tokens = tgsi_dup_tokens(cso->tokens);
161 return so;
162 }
163
164 static void
165 fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
166 {
167 struct fd2_shader_stateobj *so = hwcso;
168 delete_shader(so);
169 }
170
171 static void
172 fd2_vp_state_bind(struct pipe_context *pctx, void *hwcso)
173 {
174 struct fd_context *ctx = fd_context(pctx);
175 ctx->prog.vp = hwcso;
176 ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
177 ctx->dirty |= FD_DIRTY_PROG;
178 }
179
180 static void
181 patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
182 struct fd_vertex_stateobj *vtx)
183 {
184 unsigned i;
185
186 assert(so->num_vfetch_instrs == vtx->num_elements);
187
188 /* update vtx fetch instructions: */
189 for (i = 0; i < so->num_vfetch_instrs; i++) {
190 struct ir2_instruction *instr = so->vfetch_instrs[i];
191 struct pipe_vertex_element *elem = &vtx->pipe[i];
192 struct pipe_vertex_buffer *vb =
193 &ctx->vertexbuf.vb[elem->vertex_buffer_index];
194 enum pipe_format format = elem->src_format;
195 const struct util_format_description *desc =
196 util_format_description(format);
197 unsigned j;
198
199 /* Find the first non-VOID channel. */
200 for (j = 0; j < 4; j++)
201 if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
202 break;
203
204 /* CI/CIS can probably be set in compiler instead: */
205 instr->fetch.const_idx = 20 + (i / 3);
206 instr->fetch.const_idx_sel = i % 3;
207
208 instr->fetch.fmt = fd2_pipe2surface(format);
209 instr->fetch.is_normalized = desc->channel[j].normalized;
210 instr->fetch.is_signed =
211 desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
212 instr->fetch.stride = vb->stride ? : 1;
213 instr->fetch.offset = elem->src_offset;
214
215 for (j = 0; j < 4; j++)
216 instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
217
218 assert(instr->fetch.fmt != ~0);
219
220 DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
221 "stride=%d, offset=%d",
222 i, util_format_name(format),
223 instr->fetch.fmt,
224 instr->fetch.const_idx,
225 instr->fetch.const_idx_sel,
226 elem->instance_divisor,
227 instr->regs[0]->swizzle,
228 instr->fetch.stride,
229 instr->fetch.offset);
230 }
231
232 /* trigger re-assemble: */
233 so->info.sizedwords = 0;
234 }
235
236 static void
237 patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
238 struct fd_texture_stateobj *tex)
239 {
240 unsigned i;
241
242 /* update tex fetch instructions: */
243 for (i = 0; i < so->num_tfetch_instrs; i++) {
244 struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
245 unsigned samp_id = so->tfetch_instrs[i].samp_id;
246 unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
247
248 if (const_idx != instr->fetch.const_idx) {
249 instr->fetch.const_idx = const_idx;
250 /* trigger re-assemble: */
251 so->info.sizedwords = 0;
252 }
253 }
254 }
255
256 void
257 fd2_program_validate(struct fd_context *ctx)
258 {
259 struct fd_program_stateobj *prog = &ctx->prog;
260
261 /* if vertex or frag shader is dirty, we may need to recompile. Compile
262 * frag shader first, as that assigns the register slots for exports
263 * from the vertex shader. And therefore if frag shader has changed we
264 * need to recompile both vert and frag shader.
265 */
266 if (prog->dirty & FD_SHADER_DIRTY_FP)
267 compile(prog, prog->fp);
268
269 if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
270 compile(prog, prog->vp);
271
272 if (prog->dirty)
273 ctx->dirty |= FD_DIRTY_PROG;
274
275 /* if necessary, fix up vertex fetch instructions: */
276 if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
277 patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
278
279 /* if necessary, fix up texture fetch instructions: */
280 if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
281 patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
282 patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
283 }
284 }
285
286 void
287 fd2_program_emit(struct fd_ringbuffer *ring,
288 struct fd_program_stateobj *prog)
289 {
290 struct ir2_shader_info *vsi =
291 &((struct fd2_shader_stateobj *)prog->vp)->info;
292 struct ir2_shader_info *fsi =
293 &((struct fd2_shader_stateobj *)prog->fp)->info;
294 uint8_t vs_gprs, fs_gprs, vs_export;
295
296 emit(ring, prog->vp);
297 emit(ring, prog->fp);
298
299 vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
300 fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
301 vs_export = MAX2(1, prog->num_exports) - 1;
302
303 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
304 OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
305 OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
306 A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
307 A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
308 A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
309 A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
310 A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
311
312 prog->dirty = 0;
313 }
314
315 /* Creates shader:
316 * EXEC ADDR(0x2) CNT(0x1)
317 * (S)FETCH: SAMPLE R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
318 * ALLOC PARAM/PIXEL SIZE(0x0)
319 * EXEC_END ADDR(0x3) CNT(0x1)
320 * ALU: MAXv export0 = R0, R0 ; gl_FragColor
321 * NOP
322 */
323 static struct fd2_shader_stateobj *
324 create_blit_fp(void)
325 {
326 struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
327 struct ir2_cf *cf;
328 struct ir2_instruction *instr;
329
330 if (!so)
331 return NULL;
332
333 so->ir = ir2_shader_create();
334
335 cf = ir2_cf_create(so->ir, EXEC);
336
337 instr = ir2_instr_create_tex_fetch(cf, 0);
338 ir2_reg_create(instr, 0, "xyzw", 0);
339 ir2_reg_create(instr, 0, "xyx", 0);
340 instr->sync = true;
341
342 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
343 cf = ir2_cf_create(so->ir, EXEC_END);
344
345 instr = ir2_instr_create_alu(cf, MAXv, ~0);
346 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
347 ir2_reg_create(instr, 0, NULL, 0);
348 ir2_reg_create(instr, 0, NULL, 0);
349
350 return assemble(so);
351 }
352
353 /* Creates shader:
354 * EXEC ADDR(0x3) CNT(0x2)
355 * FETCH: VERTEX R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
356 * FETCH: VERTEX R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
357 * ALLOC POSITION SIZE(0x0)
358 * EXEC ADDR(0x5) CNT(0x1)
359 * ALU: MAXv export62 = R2, R2 ; gl_Position
360 * ALLOC PARAM/PIXEL SIZE(0x0)
361 * EXEC_END ADDR(0x6) CNT(0x1)
362 * ALU: MAXv export0 = R1, R1
363 * NOP
364 */
365 static struct fd2_shader_stateobj *
366 create_blit_vp(void)
367 {
368 struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
369 struct ir2_cf *cf;
370 struct ir2_instruction *instr;
371
372 if (!so)
373 return NULL;
374
375 so->ir = ir2_shader_create();
376
377 cf = ir2_cf_create(so->ir, EXEC);
378
379 instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
380 instr->fetch.is_normalized = true;
381 ir2_reg_create(instr, 1, "xy01", 0);
382 ir2_reg_create(instr, 0, "x", 0);
383
384 instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
385 instr->fetch.is_normalized = true;
386 ir2_reg_create(instr, 2, "xyz1", 0);
387 ir2_reg_create(instr, 0, "x", 0);
388
389 cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
390 cf = ir2_cf_create(so->ir, EXEC);
391
392 instr = ir2_instr_create_alu(cf, MAXv, ~0);
393 ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
394 ir2_reg_create(instr, 2, NULL, 0);
395 ir2_reg_create(instr, 2, NULL, 0);
396
397 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
398 cf = ir2_cf_create(so->ir, EXEC_END);
399
400 instr = ir2_instr_create_alu(cf, MAXv, ~0);
401 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
402 ir2_reg_create(instr, 1, NULL, 0);
403 ir2_reg_create(instr, 1, NULL, 0);
404
405 return assemble(so);
406 }
407
408 /* Creates shader:
409 * ALLOC PARAM/PIXEL SIZE(0x0)
410 * EXEC_END ADDR(0x1) CNT(0x1)
411 * ALU: MAXv export0 = C0, C0 ; gl_FragColor
412 */
413 static struct fd2_shader_stateobj *
414 create_solid_fp(void)
415 {
416 struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
417 struct ir2_cf *cf;
418 struct ir2_instruction *instr;
419
420 if (!so)
421 return NULL;
422
423 so->ir = ir2_shader_create();
424
425 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
426 cf = ir2_cf_create(so->ir, EXEC_END);
427
428 instr = ir2_instr_create_alu(cf, MAXv, ~0);
429 ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
430 ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
431 ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
432
433 return assemble(so);
434 }
435
436 /* Creates shader:
437 * EXEC ADDR(0x3) CNT(0x1)
438 * (S)FETCH: VERTEX R1.xyz1 = R0.x FMT_32_32_32_FLOAT
439 * UNSIGNED STRIDE(12) CONST(26, 0)
440 * ALLOC POSITION SIZE(0x0)
441 * EXEC ADDR(0x4) CNT(0x1)
442 * ALU: MAXv export62 = R1, R1 ; gl_Position
443 * ALLOC PARAM/PIXEL SIZE(0x0)
444 * EXEC_END ADDR(0x5) CNT(0x0)
445 */
446 static struct fd2_shader_stateobj *
447 create_solid_vp(void)
448 {
449 struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
450 struct ir2_cf *cf;
451 struct ir2_instruction *instr;
452
453 if (!so)
454 return NULL;
455
456 so->ir = ir2_shader_create();
457
458 cf = ir2_cf_create(so->ir, EXEC);
459
460 instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
461 ir2_reg_create(instr, 1, "xyz1", 0);
462 ir2_reg_create(instr, 0, "x", 0);
463
464 cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
465 cf = ir2_cf_create(so->ir, EXEC);
466
467 instr = ir2_instr_create_alu(cf, MAXv, ~0);
468 ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
469 ir2_reg_create(instr, 1, NULL, 0);
470 ir2_reg_create(instr, 1, NULL, 0);
471
472 cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
473 cf = ir2_cf_create(so->ir, EXEC_END);
474
475 return assemble(so);
476 }
477
478 void
479 fd2_prog_init(struct pipe_context *pctx)
480 {
481 struct fd_context *ctx = fd_context(pctx);
482
483 pctx->create_fs_state = fd2_fp_state_create;
484 pctx->bind_fs_state = fd2_fp_state_bind;
485 pctx->delete_fs_state = fd2_fp_state_delete;
486
487 pctx->create_vs_state = fd2_vp_state_create;
488 pctx->bind_vs_state = fd2_vp_state_bind;
489 pctx->delete_vs_state = fd2_vp_state_delete;
490
491 ctx->solid_prog.fp = create_solid_fp();
492 ctx->solid_prog.vp = create_solid_vp();
493 ctx->blit_prog.fp = create_blit_fp();
494 ctx->blit_prog.vp = create_blit_vp();
495 }
496
497 void
498 fd2_prog_fini(struct pipe_context *pctx)
499 {
500 struct fd_context *ctx = fd_context(pctx);
501
502 delete_shader(ctx->solid_prog.vp);
503 delete_shader(ctx->solid_prog.fp);
504 delete_shader(ctx->blit_prog.vp);
505 delete_shader(ctx->blit_prog.fp);
506 }