freedreno: gallium driver for adreno
[mesa.git] / src / gallium / drivers / freedreno / freedreno_compiler.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
38
39 #include "freedreno_program.h"
40 #include "freedreno_compiler.h"
41 #include "freedreno_util.h"
42
43 #include "instr.h"
44 #include "ir.h"
45
46 struct fd_compile_context {
47 struct fd_program_stateobj *prog;
48 struct fd_shader_stateobj *so;
49
50 struct tgsi_parse_context parser;
51 unsigned type;
52
53 /* predicate stack: */
54 int pred_depth;
55 enum ir_pred pred_stack[8];
56
57 /* Internal-Temporary and Predicate register assignment:
58 *
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers (which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * Whenever possible, the dst register is used as the first temporary,
63 * but this is not possible when the dst register is in an export (ie.
64 * in TGSI_FILE_OUTPUT).
65 *
66 * The predicate register must be valid across multiple TGSI
67 * instructions, but internal temporary's do not. For this reason,
68 * once the predicate register is requested, until it is no longer
69 * needed, it gets the first register slot after after the TGSI
70 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
71 * internal temporaries get the register slots above this.
72 */
73
74 int pred_reg;
75 int num_internal_temps;
76
77 uint8_t num_regs[TGSI_FILE_COUNT];
78
79 /* maps input register idx to prog->export_linkage idx: */
80 uint8_t input_export_idx[64];
81
82 /* maps output register idx to prog->export_linkage idx: */
83 uint8_t output_export_idx[64];
84
85 /* idx/slot for last compiler generated immediate */
86 unsigned immediate_idx;
87
88 // TODO we can skip emit exports in the VS that the FS doesn't need..
89 // and get rid perhaps of num_param..
90 unsigned num_position, num_param;
91 unsigned position, psize;
92
93 uint64_t need_sync;
94
95 /* current exec CF instruction */
96 struct ir_cf *cf;
97 };
98
99 static int
100 semantic_idx(struct tgsi_declaration_semantic *semantic)
101 {
102 int idx = semantic->Name;
103 if (idx == TGSI_SEMANTIC_GENERIC)
104 idx = TGSI_SEMANTIC_COUNT + semantic->Index;
105 return idx;
106 }
107
108 /* assign/get the input/export register # for given semantic idx as
109 * returned by semantic_idx():
110 */
111 static int
112 export_linkage(struct fd_compile_context *ctx, int idx)
113 {
114 struct fd_program_stateobj *prog = ctx->prog;
115
116 /* if first time we've seen this export, assign the next available slot: */
117 if (prog->export_linkage[idx] == 0xff)
118 prog->export_linkage[idx] = prog->num_exports++;
119
120 return prog->export_linkage[idx];
121 }
122
123 static unsigned
124 compile_init(struct fd_compile_context *ctx, struct fd_program_stateobj *prog,
125 struct fd_shader_stateobj *so)
126 {
127 unsigned ret;
128
129 ctx->prog = prog;
130 ctx->so = so;
131 ctx->cf = NULL;
132 ctx->pred_depth = 0;
133
134 ret = tgsi_parse_init(&ctx->parser, so->tokens);
135 if (ret != TGSI_PARSE_OK)
136 return ret;
137
138 ctx->type = ctx->parser.FullHeader.Processor.Processor;
139 ctx->position = ~0;
140 ctx->psize = ~0;
141 ctx->num_position = 0;
142 ctx->num_param = 0;
143 ctx->need_sync = 0;
144 ctx->immediate_idx = 0;
145 ctx->pred_reg = -1;
146 ctx->num_internal_temps = 0;
147
148 memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
149 memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
150 memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
151
152 /* do first pass to extract declarations: */
153 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
154 tgsi_parse_token(&ctx->parser);
155
156 switch (ctx->parser.FullToken.Token.Type) {
157 case TGSI_TOKEN_TYPE_DECLARATION: {
158 struct tgsi_full_declaration *decl =
159 &ctx->parser.FullToken.FullDeclaration;
160 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
161 unsigned name = decl->Semantic.Name;
162
163 assert(decl->Declaration.Semantic); // TODO is this ever not true?
164
165 ctx->output_export_idx[decl->Range.First] =
166 semantic_idx(&decl->Semantic);
167
168 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
169 switch (name) {
170 case TGSI_SEMANTIC_POSITION:
171 ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
172 ctx->num_position++;
173 break;
174 case TGSI_SEMANTIC_PSIZE:
175 ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
176 ctx->num_position++;
177 case TGSI_SEMANTIC_COLOR:
178 case TGSI_SEMANTIC_GENERIC:
179 ctx->num_param++;
180 break;
181 default:
182 DBG("unknown VS semantic name: %s",
183 tgsi_semantic_names[name]);
184 assert(0);
185 }
186 } else {
187 switch (name) {
188 case TGSI_SEMANTIC_COLOR:
189 case TGSI_SEMANTIC_GENERIC:
190 ctx->num_param++;
191 break;
192 default:
193 DBG("unknown PS semantic name: %s",
194 tgsi_semantic_names[name]);
195 assert(0);
196 }
197 }
198 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
199 ctx->input_export_idx[decl->Range.First] =
200 semantic_idx(&decl->Semantic);
201 }
202 ctx->num_regs[decl->Declaration.File] +=
203 1 + decl->Range.Last - decl->Range.First;
204 break;
205 }
206 case TGSI_TOKEN_TYPE_IMMEDIATE: {
207 struct tgsi_full_immediate *imm =
208 &ctx->parser.FullToken.FullImmediate;
209 unsigned n = ctx->so->num_immediates++;
210 memcpy(ctx->so->immediates[n].val, imm->u, 16);
211 break;
212 }
213 default:
214 break;
215 }
216 }
217
218 /* TGSI generated immediates are always entire vec4's, ones we
219 * generate internally are not:
220 */
221 ctx->immediate_idx = ctx->so->num_immediates * 4;
222
223 ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
224
225 tgsi_parse_free(&ctx->parser);
226
227 return tgsi_parse_init(&ctx->parser, so->tokens);
228 }
229
230 static void
231 compile_free(struct fd_compile_context *ctx)
232 {
233 tgsi_parse_free(&ctx->parser);
234 }
235
236 static struct ir_cf *
237 next_exec_cf(struct fd_compile_context *ctx)
238 {
239 struct ir_cf *cf = ctx->cf;
240 if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
241 ctx->cf = cf = ir_cf_create(ctx->so->ir, EXEC);
242 return cf;
243 }
244
245 static void
246 compile_vtx_fetch(struct fd_compile_context *ctx)
247 {
248 struct ir_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
249 int i;
250 for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
251 struct ir_instruction *instr = ir_instr_create(
252 next_exec_cf(ctx), IR_FETCH);
253 instr->fetch.opc = VTX_FETCH;
254
255 ctx->need_sync |= 1 << (i+1);
256
257 ir_reg_create(instr, i+1, "xyzw", 0);
258 ir_reg_create(instr, 0, "x", 0);
259
260 if (i == 0)
261 instr->sync = true;
262
263 vfetch_instrs[i] = instr;
264 }
265 ctx->so->num_vfetch_instrs = i;
266 ctx->cf = NULL;
267 }
268
269 /*
270 * For vertex shaders (VS):
271 * --- ------ -------------
272 *
273 * Inputs: R1-R(num_input)
274 * Constants: C0-C(num_const-1)
275 * Immediates: C(num_const)-C(num_const+num_imm-1)
276 * Outputs: export0-export(n) and export62, export63
277 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
278 * Temps: R(num_input+1)-R(num_input+num_temps)
279 *
280 * R0 could be clobbered after the vertex fetch instructions.. so we
281 * could use it for one of the temporaries.
282 *
283 * TODO: maybe the vertex fetch part could fetch first input into R0 as
284 * the last vtx fetch instruction, which would let us use the same
285 * register layout in either case.. although this is not what the blob
286 * compiler does.
287 *
288 *
289 * For frag shaders (PS):
290 * --- ---- -------------
291 *
292 * Inputs: R0-R(num_input-1)
293 * Constants: same as VS
294 * Immediates: same as VS
295 * Outputs: export0-export(num_outputs)
296 * Temps: R(num_input)-R(num_input+num_temps-1)
297 *
298 * In either case, immediates are are postpended to the constants
299 * (uniforms).
300 *
301 */
302
303 static unsigned
304 get_temp_gpr(struct fd_compile_context *ctx, int idx)
305 {
306 unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
307 if (ctx->type == TGSI_PROCESSOR_VERTEX)
308 num++;
309 return num;
310 }
311
312 static struct ir_register *
313 add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
314 const struct tgsi_dst_register *dst)
315 {
316 unsigned flags = 0, num = 0;
317 char swiz[5];
318
319 switch (dst->File) {
320 case TGSI_FILE_OUTPUT:
321 flags |= IR_REG_EXPORT;
322 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
323 if (dst->Index == ctx->position) {
324 num = 62;
325 } else if (dst->Index == ctx->psize) {
326 num = 63;
327 } else {
328 num = export_linkage(ctx,
329 ctx->output_export_idx[dst->Index]);
330 }
331 } else {
332 num = dst->Index;
333 }
334 break;
335 case TGSI_FILE_TEMPORARY:
336 num = get_temp_gpr(ctx, dst->Index);
337 break;
338 default:
339 DBG("unsupported dst register file: %s",
340 tgsi_file_names[dst->File]);
341 assert(0);
342 break;
343 }
344
345 swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
346 swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
347 swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
348 swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
349 swiz[4] = '\0';
350
351 return ir_reg_create(alu, num, swiz, flags);
352 }
353
354 static struct ir_register *
355 add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
356 const struct tgsi_src_register *src)
357 {
358 static const char swiz_vals[] = {
359 'x', 'y', 'z', 'w',
360 };
361 char swiz[5];
362 unsigned flags = 0, num = 0;
363
364 switch (src->File) {
365 case TGSI_FILE_CONSTANT:
366 num = src->Index;
367 flags |= IR_REG_CONST;
368 break;
369 case TGSI_FILE_INPUT:
370 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
371 num = src->Index + 1;
372 } else {
373 num = export_linkage(ctx,
374 ctx->input_export_idx[src->Index]);
375 }
376 break;
377 case TGSI_FILE_TEMPORARY:
378 num = get_temp_gpr(ctx, src->Index);
379 break;
380 case TGSI_FILE_IMMEDIATE:
381 num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
382 flags |= IR_REG_CONST;
383 break;
384 default:
385 DBG("unsupported src register file: %s",
386 tgsi_file_names[src->File]);
387 assert(0);
388 break;
389 }
390
391 if (src->Absolute)
392 flags |= IR_REG_ABS;
393 if (src->Negate)
394 flags |= IR_REG_NEGATE;
395
396 swiz[0] = swiz_vals[src->SwizzleX];
397 swiz[1] = swiz_vals[src->SwizzleY];
398 swiz[2] = swiz_vals[src->SwizzleZ];
399 swiz[3] = swiz_vals[src->SwizzleW];
400 swiz[4] = '\0';
401
402 if ((ctx->need_sync & (uint64_t)(1 << num)) &&
403 !(flags & IR_REG_CONST)) {
404 alu->sync = true;
405 ctx->need_sync &= ~(uint64_t)(1 << num);
406 }
407
408 return ir_reg_create(alu, num, swiz, flags);
409 }
410
411 static void
412 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
413 {
414 switch (inst->Instruction.Saturate) {
415 case TGSI_SAT_NONE:
416 break;
417 case TGSI_SAT_ZERO_ONE:
418 alu->alu.vector_clamp = true;
419 break;
420 case TGSI_SAT_MINUS_PLUS_ONE:
421 DBG("unsupported saturate");
422 assert(0);
423 break;
424 }
425 }
426
427 static void
428 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
429 {
430 switch (inst->Instruction.Saturate) {
431 case TGSI_SAT_NONE:
432 break;
433 case TGSI_SAT_ZERO_ONE:
434 alu->alu.scalar_clamp = true;
435 break;
436 case TGSI_SAT_MINUS_PLUS_ONE:
437 DBG("unsupported saturate");
438 assert(0);
439 break;
440 }
441 }
442
443 static void
444 add_regs_vector_1(struct fd_compile_context *ctx,
445 struct tgsi_full_instruction *inst, struct ir_instruction *alu)
446 {
447 assert(inst->Instruction.NumSrcRegs == 1);
448 assert(inst->Instruction.NumDstRegs == 1);
449
450 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
451 add_src_reg(ctx, alu, &inst->Src[0].Register);
452 add_src_reg(ctx, alu, &inst->Src[0].Register);
453 add_vector_clamp(inst, alu);
454 }
455
456 static void
457 add_regs_vector_2(struct fd_compile_context *ctx,
458 struct tgsi_full_instruction *inst, struct ir_instruction *alu)
459 {
460 assert(inst->Instruction.NumSrcRegs == 2);
461 assert(inst->Instruction.NumDstRegs == 1);
462
463 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
464 add_src_reg(ctx, alu, &inst->Src[0].Register);
465 add_src_reg(ctx, alu, &inst->Src[1].Register);
466 add_vector_clamp(inst, alu);
467 }
468
469 static void
470 add_regs_vector_3(struct fd_compile_context *ctx,
471 struct tgsi_full_instruction *inst, struct ir_instruction *alu)
472 {
473 assert(inst->Instruction.NumSrcRegs == 3);
474 assert(inst->Instruction.NumDstRegs == 1);
475
476 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
477 /* maybe should re-arrange the syntax some day, but
478 * in assembler/disassembler and what ir.c expects
479 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
480 */
481 add_src_reg(ctx, alu, &inst->Src[2].Register);
482 add_src_reg(ctx, alu, &inst->Src[0].Register);
483 add_src_reg(ctx, alu, &inst->Src[1].Register);
484 add_vector_clamp(inst, alu);
485 }
486
487 static void
488 add_regs_dummy_vector(struct ir_instruction *alu)
489 {
490 /* create dummy, non-written vector dst/src regs
491 * for unused vector instr slot:
492 */
493 ir_reg_create(alu, 0, "____", 0); /* vector dst */
494 ir_reg_create(alu, 0, NULL, 0); /* vector src1 */
495 ir_reg_create(alu, 0, NULL, 0); /* vector src2 */
496 }
497
498 static void
499 add_regs_scalar_1(struct fd_compile_context *ctx,
500 struct tgsi_full_instruction *inst, struct ir_instruction *alu)
501 {
502 assert(inst->Instruction.NumSrcRegs == 1);
503 assert(inst->Instruction.NumDstRegs == 1);
504
505 add_regs_dummy_vector(alu);
506
507 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
508 add_src_reg(ctx, alu, &inst->Src[0].Register);
509 add_scalar_clamp(inst, alu);
510 }
511
512 /*
513 * Helpers for TGSI instructions that don't map to a single shader instr:
514 */
515
516 /* Get internal-temp src/dst to use for a sequence of instructions
517 * generated by a single TGSI op.. if possible, use the final dst
518 * register as the temporary to avoid allocating a new register, but
519 * if necessary allocate one. If a single TGSI op needs multiple
520 * internal temps, pass NULL for orig_dst for all but the first one
521 * so that you don't end up using the same register for all your
522 * internal temps.
523 */
524 static bool
525 get_internal_temp(struct fd_compile_context *ctx,
526 struct tgsi_dst_register *orig_dst,
527 struct tgsi_dst_register *tmp_dst,
528 struct tgsi_src_register *tmp_src)
529 {
530 bool using_temp = false;
531
532 tmp_dst->File = TGSI_FILE_TEMPORARY;
533 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
534 tmp_dst->Indirect = 0;
535 tmp_dst->Dimension = 0;
536
537 if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) {
538 /* if possible, use orig dst register for the temporary: */
539 tmp_dst->Index = orig_dst->Index;
540 } else {
541 /* otherwise assign one: */
542 int n = ctx->num_internal_temps++;
543 if (ctx->pred_reg != -1)
544 n++;
545 tmp_dst->Index = get_temp_gpr(ctx,
546 ctx->num_regs[TGSI_FILE_TEMPORARY] + n);
547 using_temp = true;
548 }
549
550 tmp_src->File = tmp_dst->File;
551 tmp_src->Indirect = tmp_dst->Indirect;
552 tmp_src->Dimension = tmp_dst->Dimension;
553 tmp_src->Index = tmp_dst->Index;
554 tmp_src->Absolute = 0;
555 tmp_src->Negate = 0;
556 tmp_src->SwizzleX = TGSI_SWIZZLE_X;
557 tmp_src->SwizzleY = TGSI_SWIZZLE_Y;
558 tmp_src->SwizzleZ = TGSI_SWIZZLE_Z;
559 tmp_src->SwizzleW = TGSI_SWIZZLE_W;
560
561 return using_temp;
562 }
563
564 static void
565 get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst,
566 struct tgsi_src_register *src)
567 {
568 assert(ctx->pred_reg != -1);
569
570 dst->File = TGSI_FILE_TEMPORARY;
571 dst->WriteMask = TGSI_WRITEMASK_W;
572 dst->Indirect = 0;
573 dst->Dimension = 0;
574 dst->Index = get_temp_gpr(ctx, ctx->pred_reg);
575
576 if (src) {
577 src->File = dst->File;
578 src->Indirect = dst->Indirect;
579 src->Dimension = dst->Dimension;
580 src->Index = dst->Index;
581 src->Absolute = 0;
582 src->Negate = 0;
583 src->SwizzleX = TGSI_SWIZZLE_W;
584 src->SwizzleY = TGSI_SWIZZLE_W;
585 src->SwizzleZ = TGSI_SWIZZLE_W;
586 src->SwizzleW = TGSI_SWIZZLE_W;
587 }
588 }
589
590 static void
591 push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src)
592 {
593 struct ir_instruction *alu;
594 struct tgsi_dst_register pred_dst;
595
596 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
597 * themselves:
598 */
599 ctx->cf = NULL;
600
601 if (ctx->pred_depth == 0) {
602 /* assign predicate register: */
603 ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
604
605 get_predicate(ctx, &pred_dst, NULL);
606
607 alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
608 add_regs_dummy_vector(alu);
609 add_dst_reg(ctx, alu, &pred_dst);
610 add_src_reg(ctx, alu, src);
611 } else {
612 struct tgsi_src_register pred_src;
613
614 get_predicate(ctx, &pred_dst, &pred_src);
615
616 alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
617 add_dst_reg(ctx, alu, &pred_dst);
618 add_src_reg(ctx, alu, &pred_src);
619 add_src_reg(ctx, alu, src);
620
621 // XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make
622 // sure src reg is valid if it was calculated with a predicate
623 // condition..
624 alu->pred = IR_PRED_NONE;
625 }
626
627 /* save previous pred state to restore in pop_predicate(): */
628 ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
629
630 ctx->cf = NULL;
631 }
632
633 static void
634 pop_predicate(struct fd_compile_context *ctx)
635 {
636 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
637 * themselves:
638 */
639 ctx->cf = NULL;
640
641 /* restore previous predicate state: */
642 ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
643
644 if (ctx->pred_depth != 0) {
645 struct ir_instruction *alu;
646 struct tgsi_dst_register pred_dst;
647 struct tgsi_src_register pred_src;
648
649 get_predicate(ctx, &pred_dst, &pred_src);
650
651 alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
652 add_regs_dummy_vector(alu);
653 add_dst_reg(ctx, alu, &pred_dst);
654 add_src_reg(ctx, alu, &pred_src);
655 alu->pred = IR_PRED_NONE;
656 } else {
657 /* predicate register no longer needed: */
658 ctx->pred_reg = -1;
659 }
660
661 ctx->cf = NULL;
662 }
663
664 static void
665 get_immediate(struct fd_compile_context *ctx,
666 struct tgsi_src_register *reg, uint32_t val)
667 {
668 unsigned neg, swiz, idx, i;
669 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
670 static const unsigned swiz2tgsi[] = {
671 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
672 };
673
674 for (i = 0; i < ctx->immediate_idx; i++) {
675 swiz = i % 4;
676 idx = i / 4;
677
678 if (ctx->so->immediates[idx].val[swiz] == val) {
679 neg = 0;
680 break;
681 }
682
683 if (ctx->so->immediates[idx].val[swiz] == -val) {
684 neg = 1;
685 break;
686 }
687 }
688
689 if (i == ctx->immediate_idx) {
690 /* need to generate a new immediate: */
691 swiz = i % 4;
692 idx = i / 4;
693 neg = 0;
694 ctx->so->immediates[idx].val[swiz] = val;
695 ctx->so->num_immediates = idx + 1;
696 ctx->immediate_idx++;
697 }
698
699 reg->File = TGSI_FILE_IMMEDIATE;
700 reg->Indirect = 0;
701 reg->Dimension = 0;
702 reg->Index = idx;
703 reg->Absolute = 0;
704 reg->Negate = neg;
705 reg->SwizzleX = swiz2tgsi[swiz];
706 reg->SwizzleY = swiz2tgsi[swiz];
707 reg->SwizzleZ = swiz2tgsi[swiz];
708 reg->SwizzleW = swiz2tgsi[swiz];
709 }
710
711 /* POW(a,b) = EXP2(b * LOG2(a)) */
712 static void
713 translate_pow(struct fd_compile_context *ctx,
714 struct tgsi_full_instruction *inst)
715 {
716 struct tgsi_dst_register tmp_dst;
717 struct tgsi_src_register tmp_src;
718 struct ir_instruction *alu;
719
720 get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
721
722 alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
723 add_regs_dummy_vector(alu);
724 add_dst_reg(ctx, alu, &tmp_dst);
725 add_src_reg(ctx, alu, &inst->Src[0].Register);
726
727 alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
728 add_dst_reg(ctx, alu, &tmp_dst);
729 add_src_reg(ctx, alu, &tmp_src);
730 add_src_reg(ctx, alu, &inst->Src[1].Register);
731
732 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
733 * coded to take their input from the w component.
734 */
735 switch(inst->Dst[0].Register.WriteMask) {
736 case TGSI_WRITEMASK_X:
737 tmp_src.SwizzleW = TGSI_SWIZZLE_X;
738 break;
739 case TGSI_WRITEMASK_Y:
740 tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
741 break;
742 case TGSI_WRITEMASK_Z:
743 tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
744 break;
745 case TGSI_WRITEMASK_W:
746 tmp_src.SwizzleW = TGSI_SWIZZLE_W;
747 break;
748 default:
749 DBG("invalid writemask!");
750 assert(0);
751 break;
752 }
753
754 alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
755 add_regs_dummy_vector(alu);
756 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
757 add_src_reg(ctx, alu, &tmp_src);
758 add_scalar_clamp(inst, alu);
759 }
760
761 static void
762 translate_tex(struct fd_compile_context *ctx,
763 struct tgsi_full_instruction *inst, unsigned opc)
764 {
765 struct ir_instruction *instr;
766 struct tgsi_dst_register tmp_dst;
767 struct tgsi_src_register tmp_src;
768 const struct tgsi_src_register *coord;
769 bool using_temp;
770 int idx;
771
772 using_temp = get_internal_temp(ctx,
773 &inst->Dst[0].Register, &tmp_dst, &tmp_src);
774
775 if (opc == TGSI_OPCODE_TXP) {
776 /* TXP - Projective Texture Lookup:
777 *
778 * coord.x = src0.x / src.w
779 * coord.y = src0.y / src.w
780 * coord.z = src0.z / src.w
781 * coord.w = src0.w
782 * bias = 0.0
783 *
784 * dst = texture_sample(unit, coord, bias)
785 */
786 instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
787
788 /* MAXv: */
789 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
790 add_src_reg(ctx, instr, &inst->Src[0].Register);
791 add_src_reg(ctx, instr, &inst->Src[0].Register);
792
793 /* RECIP_IEEE: */
794 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
795 add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww";
796
797 instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
798 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
799 add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
800 add_src_reg(ctx, instr, &inst->Src[0].Register);
801
802 coord = &tmp_src;
803 } else {
804 coord = &inst->Src[0].Register;
805 }
806
807 instr = ir_instr_create(next_exec_cf(ctx), IR_FETCH);
808 instr->fetch.opc = TEX_FETCH;
809 assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
810
811 /* save off the tex fetch to be patched later with correct const_idx: */
812 idx = ctx->so->num_tfetch_instrs++;
813 ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
814 ctx->so->tfetch_instrs[idx].instr = instr;
815
816 add_dst_reg(ctx, instr, &tmp_dst);
817 add_src_reg(ctx, instr, coord);
818
819 /* dst register needs to be marked for sync: */
820 ctx->need_sync |= 1 << instr->regs[0]->num;
821
822 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
823 instr->sync = true;
824
825 if (using_temp) {
826 /* texture fetch can't write directly to export, so if tgsi
827 * is telling us the dst register is in output file, we load
828 * the texture to a temp and the use ALU instruction to move
829 * to output
830 */
831 instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
832
833 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
834 add_src_reg(ctx, instr, &tmp_src);
835 add_src_reg(ctx, instr, &tmp_src);
836 add_vector_clamp(inst, instr);
837 }
838 }
839
840 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
841 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
842 static void
843 translate_sge_slt(struct fd_compile_context *ctx,
844 struct tgsi_full_instruction *inst, unsigned opc)
845 {
846 struct ir_instruction *instr;
847 struct tgsi_dst_register tmp_dst;
848 struct tgsi_src_register tmp_src;
849 struct tgsi_src_register tmp_const;
850 float c0, c1;
851
852 switch (opc) {
853 default:
854 assert(0);
855 case TGSI_OPCODE_SGE:
856 c0 = 1.0;
857 c1 = 0.0;
858 break;
859 case TGSI_OPCODE_SLT:
860 c0 = 0.0;
861 c1 = 1.0;
862 break;
863 }
864
865 get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
866
867 instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
868 add_dst_reg(ctx, instr, &tmp_dst);
869 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE;
870 add_src_reg(ctx, instr, &inst->Src[1].Register);
871
872 instr = ir_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
873 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
874 /* maybe should re-arrange the syntax some day, but
875 * in assembler/disassembler and what ir.c expects
876 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
877 */
878 get_immediate(ctx, &tmp_const, f2d(c0));
879 add_src_reg(ctx, instr, &tmp_const);
880 add_src_reg(ctx, instr, &tmp_src);
881 get_immediate(ctx, &tmp_const, f2d(c1));
882 add_src_reg(ctx, instr, &tmp_const);
883 }
884
885 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
886 static void
887 translate_lrp(struct fd_compile_context *ctx,
888 struct tgsi_full_instruction *inst,
889 unsigned opc)
890 {
891 struct ir_instruction *instr;
892 struct tgsi_dst_register tmp_dst1, tmp_dst2;
893 struct tgsi_src_register tmp_src1, tmp_src2;
894 struct tgsi_src_register tmp_const;
895
896 get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1);
897 get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2);
898
899 get_immediate(ctx, &tmp_const, f2d(1.0));
900
901 /* tmp1 = (a * b) */
902 instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
903 add_dst_reg(ctx, instr, &tmp_dst1);
904 add_src_reg(ctx, instr, &inst->Src[0].Register);
905 add_src_reg(ctx, instr, &inst->Src[1].Register);
906
907 /* tmp2 = (1 - a) */
908 instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
909 add_dst_reg(ctx, instr, &tmp_dst2);
910 add_src_reg(ctx, instr, &tmp_const);
911 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE;
912
913 /* tmp2 = tmp2 * c */
914 instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
915 add_dst_reg(ctx, instr, &tmp_dst2);
916 add_src_reg(ctx, instr, &tmp_src2);
917 add_src_reg(ctx, instr, &inst->Src[2].Register);
918
919 /* dst = tmp1 + tmp2 */
920 instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
921 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
922 add_src_reg(ctx, instr, &tmp_src1);
923 add_src_reg(ctx, instr, &tmp_src2);
924 }
925
926 static void
927 translate_trig(struct fd_compile_context *ctx,
928 struct tgsi_full_instruction *inst,
929 unsigned opc)
930 {
931 struct ir_instruction *instr;
932 struct tgsi_dst_register tmp_dst;
933 struct tgsi_src_register tmp_src;
934 struct tgsi_src_register tmp_const;
935 instr_scalar_opc_t op;
936
937 switch (opc) {
938 default:
939 assert(0);
940 case TGSI_OPCODE_SIN:
941 op = SIN;
942 break;
943 case TGSI_OPCODE_COS:
944 op = COS;
945 break;
946 }
947
948 get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
949
950 tmp_dst.WriteMask = TGSI_WRITEMASK_X;
951 tmp_src.SwizzleX = tmp_src.SwizzleY =
952 tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
953
954 /* maybe should re-arrange the syntax some day, but
955 * in assembler/disassembler and what ir.c expects
956 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
957 */
958 instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
959 add_dst_reg(ctx, instr, &tmp_dst);
960 get_immediate(ctx, &tmp_const, f2d(0.5));
961 add_src_reg(ctx, instr, &tmp_const);
962 add_src_reg(ctx, instr, &inst->Src[0].Register);
963 get_immediate(ctx, &tmp_const, f2d(0.159155));
964 add_src_reg(ctx, instr, &tmp_const);
965
966 instr = ir_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
967 add_dst_reg(ctx, instr, &tmp_dst);
968 add_src_reg(ctx, instr, &tmp_src);
969 add_src_reg(ctx, instr, &tmp_src);
970
971 instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
972 add_dst_reg(ctx, instr, &tmp_dst);
973 get_immediate(ctx, &tmp_const, f2d(-3.141593));
974 add_src_reg(ctx, instr, &tmp_const);
975 add_src_reg(ctx, instr, &tmp_src);
976 get_immediate(ctx, &tmp_const, f2d(6.283185));
977 add_src_reg(ctx, instr, &tmp_const);
978
979 instr = ir_instr_create_alu(next_exec_cf(ctx), ~0, op);
980 add_regs_dummy_vector(instr);
981 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
982 add_src_reg(ctx, instr, &tmp_src);
983 }
984
985 /*
986 * Main part of compiler/translator:
987 */
988
989 static void
990 translate_instruction(struct fd_compile_context *ctx,
991 struct tgsi_full_instruction *inst)
992 {
993 unsigned opc = inst->Instruction.Opcode;
994 struct ir_instruction *instr;
995 static struct ir_cf *cf;
996
997 if (opc == TGSI_OPCODE_END)
998 return;
999
1000 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
1001 unsigned num = inst->Dst[0].Register.Index;
1002 /* seems like we need to ensure that position vs param/pixel
1003 * exports don't end up in the same EXEC clause.. easy way
1004 * to do this is force a new EXEC clause on first appearance
1005 * of an position or param/pixel export.
1006 */
1007 if ((num == ctx->position) || (num == ctx->psize)) {
1008 if (ctx->num_position > 0) {
1009 ctx->cf = NULL;
1010 ir_cf_create_alloc(ctx->so->ir, SQ_POSITION,
1011 ctx->num_position - 1);
1012 ctx->num_position = 0;
1013 }
1014 } else {
1015 if (ctx->num_param > 0) {
1016 ctx->cf = NULL;
1017 ir_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
1018 ctx->num_param - 1);
1019 ctx->num_param = 0;
1020 }
1021 }
1022 }
1023
1024 cf = next_exec_cf(ctx);
1025
1026 /* TODO turn this into a table: */
1027 switch (opc) {
1028 case TGSI_OPCODE_MOV:
1029 instr = ir_instr_create_alu(cf, MAXv, ~0);
1030 add_regs_vector_1(ctx, inst, instr);
1031 break;
1032 case TGSI_OPCODE_RCP:
1033 instr = ir_instr_create_alu(cf, ~0, RECIP_IEEE);
1034 add_regs_scalar_1(ctx, inst, instr);
1035 break;
1036 case TGSI_OPCODE_RSQ:
1037 instr = ir_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
1038 add_regs_scalar_1(ctx, inst, instr);
1039 break;
1040 case TGSI_OPCODE_MUL:
1041 instr = ir_instr_create_alu(cf, MULv, ~0);
1042 add_regs_vector_2(ctx, inst, instr);
1043 break;
1044 case TGSI_OPCODE_ADD:
1045 instr = ir_instr_create_alu(cf, ADDv, ~0);
1046 add_regs_vector_2(ctx, inst, instr);
1047 break;
1048 case TGSI_OPCODE_DP3:
1049 instr = ir_instr_create_alu(cf, DOT3v, ~0);
1050 add_regs_vector_2(ctx, inst, instr);
1051 break;
1052 case TGSI_OPCODE_DP4:
1053 instr = ir_instr_create_alu(cf, DOT4v, ~0);
1054 add_regs_vector_2(ctx, inst, instr);
1055 break;
1056 case TGSI_OPCODE_MIN:
1057 instr = ir_instr_create_alu(cf, MINv, ~0);
1058 add_regs_vector_2(ctx, inst, instr);
1059 break;
1060 case TGSI_OPCODE_MAX:
1061 instr = ir_instr_create_alu(cf, MAXv, ~0);
1062 add_regs_vector_2(ctx, inst, instr);
1063 break;
1064 case TGSI_OPCODE_SLT:
1065 case TGSI_OPCODE_SGE:
1066 translate_sge_slt(ctx, inst, opc);
1067 break;
1068 case TGSI_OPCODE_MAD:
1069 instr = ir_instr_create_alu(cf, MULADDv, ~0);
1070 add_regs_vector_3(ctx, inst, instr);
1071 break;
1072 case TGSI_OPCODE_LRP:
1073 translate_lrp(ctx, inst, opc);
1074 break;
1075 case TGSI_OPCODE_FRC:
1076 instr = ir_instr_create_alu(cf, FRACv, ~0);
1077 add_regs_vector_1(ctx, inst, instr);
1078 break;
1079 case TGSI_OPCODE_FLR:
1080 instr = ir_instr_create_alu(cf, FLOORv, ~0);
1081 add_regs_vector_1(ctx, inst, instr);
1082 break;
1083 case TGSI_OPCODE_EX2:
1084 instr = ir_instr_create_alu(cf, ~0, EXP_IEEE);
1085 add_regs_scalar_1(ctx, inst, instr);
1086 break;
1087 case TGSI_OPCODE_POW:
1088 translate_pow(ctx, inst);
1089 break;
1090 case TGSI_OPCODE_ABS:
1091 instr = ir_instr_create_alu(cf, MAXv, ~0);
1092 add_regs_vector_1(ctx, inst, instr);
1093 instr->regs[1]->flags |= IR_REG_NEGATE; /* src0 */
1094 break;
1095 case TGSI_OPCODE_COS:
1096 case TGSI_OPCODE_SIN:
1097 translate_trig(ctx, inst, opc);
1098 break;
1099 case TGSI_OPCODE_TEX:
1100 case TGSI_OPCODE_TXP:
1101 translate_tex(ctx, inst, opc);
1102 break;
1103 case TGSI_OPCODE_CMP:
1104 instr = ir_instr_create_alu(cf, CNDGTEv, ~0);
1105 add_regs_vector_3(ctx, inst, instr);
1106 // TODO this should be src0 if regs where in sane order..
1107 instr->regs[2]->flags ^= IR_REG_NEGATE; /* src1 */
1108 break;
1109 case TGSI_OPCODE_IF:
1110 push_predicate(ctx, &inst->Src[0].Register);
1111 ctx->so->ir->pred = IR_PRED_EQ;
1112 break;
1113 case TGSI_OPCODE_ELSE:
1114 ctx->so->ir->pred = IR_PRED_NE;
1115 /* not sure if this is required in all cases, but blob compiler
1116 * won't combine EQ and NE in same CF:
1117 */
1118 ctx->cf = NULL;
1119 break;
1120 case TGSI_OPCODE_ENDIF:
1121 pop_predicate(ctx);
1122 break;
1123 case TGSI_OPCODE_F2I:
1124 instr = ir_instr_create_alu(cf, TRUNCv, ~0);
1125 add_regs_vector_1(ctx, inst, instr);
1126 break;
1127 default:
1128 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
1129 tgsi_dump(ctx->so->tokens, 0);
1130 assert(0);
1131 break;
1132 }
1133
1134 /* internal temporaries are only valid for the duration of a single
1135 * TGSI instruction:
1136 */
1137 ctx->num_internal_temps = 0;
1138 }
1139
1140 static void
1141 compile_instructions(struct fd_compile_context *ctx)
1142 {
1143 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1144 tgsi_parse_token(&ctx->parser);
1145
1146 switch (ctx->parser.FullToken.Token.Type) {
1147 case TGSI_TOKEN_TYPE_INSTRUCTION:
1148 translate_instruction(ctx,
1149 &ctx->parser.FullToken.FullInstruction);
1150 break;
1151 default:
1152 break;
1153 }
1154 }
1155
1156 ctx->cf->cf_type = EXEC_END;
1157 }
1158
1159 int
1160 fd_compile_shader(struct fd_program_stateobj *prog,
1161 struct fd_shader_stateobj *so)
1162 {
1163 struct fd_compile_context ctx;
1164
1165 ir_shader_destroy(so->ir);
1166 so->ir = ir_shader_create();
1167 so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
1168
1169 if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
1170 return -1;
1171
1172 if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1173 compile_vtx_fetch(&ctx);
1174 } else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
1175 prog->num_exports = 0;
1176 memset(prog->export_linkage, 0xff,
1177 sizeof(prog->export_linkage));
1178 }
1179
1180 compile_instructions(&ctx);
1181
1182 compile_free(&ctx);
1183
1184 return 0;
1185 }
1186