1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
31 #include "pipe/p_state.h"
32 #include "util/u_string.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_strings.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_scan.h"
42 #include "fd3_compiler.h"
43 #include "fd3_program.h"
46 #include "instr-a3xx.h"
50 struct fd3_compile_context
{
51 const struct tgsi_token
*tokens
;
52 struct ir3_shader
*ir
;
53 struct fd3_shader_stateobj
*so
;
55 struct ir3_block
*block
;
56 struct ir3_instruction
*current_instr
;
58 /* we need to defer updates to block->outputs[] until the end
59 * of an instruction (so we don't see new value until *after*
60 * the src registers are processed)
63 struct ir3_instruction
*instr
, **instrp
;
65 unsigned num_output_updates
;
67 /* are we in a sequence of "atomic" instructions?
71 /* For fragment shaders, from the hw perspective the only
72 * actual input is r0.xy position register passed to bary.f.
73 * But TGSI doesn't know that, it still declares things as
74 * IN[] registers. So we do all the input tracking normally
75 * and fix things up after compile_instructions()
77 struct ir3_instruction
*frag_pos
;
79 struct tgsi_parse_context parser
;
82 struct tgsi_shader_info info
;
84 /* for calculating input/output positions/linkages: */
87 unsigned num_internal_temps
;
88 struct tgsi_src_register internal_temps
[6];
90 /* inputs start at r0, temporaries start after last input, and
91 * outputs start after last temporary.
93 * We could be more clever, because this is not a hw restriction,
94 * but probably best just to implement an optimizing pass to
95 * reduce the # of registers used and get rid of redundant mov's
96 * (to output register).
98 unsigned base_reg
[TGSI_FILE_COUNT
];
100 /* idx/slot for last compiler generated immediate */
101 unsigned immediate_idx
;
103 /* stack of branch instructions that mark (potentially nested)
104 * branch if/else/loop/etc
106 struct ir3_instruction
*branch
[16];
107 unsigned int branch_count
;
109 /* used when dst is same as one of the src, to avoid overwriting a
110 * src element before the remaining scalar instructions that make
111 * up the vector operation
113 struct tgsi_dst_register tmp_dst
;
114 struct tgsi_src_register
*tmp_src
;
118 static void vectorize(struct fd3_compile_context
*ctx
,
119 struct ir3_instruction
*instr
, struct tgsi_dst_register
*dst
,
121 static void create_mov(struct fd3_compile_context
*ctx
,
122 struct tgsi_dst_register
*dst
, struct tgsi_src_register
*src
);
123 static type_t
get_ftype(struct fd3_compile_context
*ctx
);
126 compile_init(struct fd3_compile_context
*ctx
, struct fd3_shader_stateobj
*so
,
127 const struct tgsi_token
*tokens
)
129 unsigned ret
, base
= 0;
130 struct tgsi_shader_info
*info
= &ctx
->info
;
132 ctx
->tokens
= tokens
;
136 ctx
->num_internal_temps
= 0;
137 ctx
->branch_count
= 0;
139 ctx
->current_instr
= NULL
;
140 ctx
->num_output_updates
= 0;
143 memset(ctx
->base_reg
, 0, sizeof(ctx
->base_reg
));
145 tgsi_scan_shader(tokens
, &ctx
->info
);
147 #define FM(x) (1 << TGSI_FILE_##x)
148 /* optimize can't deal with relative addressing: */
149 if (info
->indirect_files
& (FM(TEMPORARY
) | FM(INPUT
) |
150 FM(OUTPUT
) | FM(IMMEDIATE
) | FM(CONSTANT
)))
151 return TGSI_PARSE_ERROR
;
153 /* Immediates go after constants: */
154 ctx
->base_reg
[TGSI_FILE_CONSTANT
] = 0;
155 ctx
->base_reg
[TGSI_FILE_IMMEDIATE
] =
156 info
->file_max
[TGSI_FILE_CONSTANT
] + 1;
158 /* if full precision and fragment shader, don't clobber
159 * r0.xy w/ bary fetch:
161 if ((so
->type
== SHADER_FRAGMENT
) && !so
->half_precision
)
164 /* Temporaries after outputs after inputs: */
165 ctx
->base_reg
[TGSI_FILE_INPUT
] = base
;
166 ctx
->base_reg
[TGSI_FILE_OUTPUT
] = base
+
167 info
->file_max
[TGSI_FILE_INPUT
] + 1;
168 ctx
->base_reg
[TGSI_FILE_TEMPORARY
] = base
+
169 info
->file_max
[TGSI_FILE_INPUT
] + 1 +
170 info
->file_max
[TGSI_FILE_OUTPUT
] + 1;
172 so
->first_immediate
= ctx
->base_reg
[TGSI_FILE_IMMEDIATE
];
173 ctx
->immediate_idx
= 4 * (ctx
->info
.file_max
[TGSI_FILE_IMMEDIATE
] + 1);
175 ret
= tgsi_parse_init(&ctx
->parser
, tokens
);
176 if (ret
!= TGSI_PARSE_OK
)
179 ctx
->type
= ctx
->parser
.FullHeader
.Processor
.Processor
;
185 compile_error(struct fd3_compile_context
*ctx
, const char *format
, ...)
188 va_start(ap
, format
);
189 _debug_vprintf(format
, ap
);
191 tgsi_dump(ctx
->tokens
, 0);
195 #define compile_assert(ctx, cond) do { \
196 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
200 compile_free(struct fd3_compile_context
*ctx
)
202 tgsi_parse_free(&ctx
->parser
);
205 struct instr_translater
{
206 void (*fxn
)(const struct instr_translater
*t
,
207 struct fd3_compile_context
*ctx
,
208 struct tgsi_full_instruction
*inst
);
211 opc_t hopc
; /* opc to use for half_precision mode, if different */
216 instr_finish(struct fd3_compile_context
*ctx
)
223 for (i
= 0; i
< ctx
->num_output_updates
; i
++)
224 *(ctx
->output_updates
[i
].instrp
) = ctx
->output_updates
[i
].instr
;
226 ctx
->num_output_updates
= 0;
229 /* For "atomic" groups of instructions, for example the four scalar
230 * instructions to perform a vec4 operation. Basically this just
231 * blocks out handling of output_updates so the next scalar instruction
232 * still sees the result from before the start of the atomic group.
234 * NOTE: when used properly, this could probably replace get/put_dst()
238 instr_atomic_start(struct fd3_compile_context
*ctx
)
244 instr_atomic_end(struct fd3_compile_context
*ctx
)
250 static struct ir3_instruction
*
251 instr_create(struct fd3_compile_context
*ctx
, int category
, opc_t opc
)
254 return (ctx
->current_instr
= ir3_instr_create(ctx
->block
, category
, opc
));
257 static struct ir3_instruction
*
258 instr_clone(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
)
261 return (ctx
->current_instr
= ir3_instr_clone(instr
));
264 static struct ir3_block
*
265 push_block(struct fd3_compile_context
*ctx
)
267 struct ir3_block
*block
;
268 unsigned ntmp
, nin
, nout
;
270 #define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1))
272 /* hmm, give ourselves room to create 4 extra temporaries (vec4):
274 ntmp
= SCALAR_REGS(TEMPORARY
);
277 /* for outermost block, 'inputs' are the actual shader INPUT
278 * register file. Reads from INPUT registers always go back to
279 * top block. For nested blocks, 'inputs' is used to track any
280 * TEMPORARY file register from one of the enclosing blocks that
281 * is ready in this block.
284 /* NOTE: fragment shaders actually have two inputs (r0.xy, the
287 nin
= SCALAR_REGS(INPUT
);
288 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
)
294 nout
= SCALAR_REGS(OUTPUT
);
296 block
= ir3_block_create(ctx
->ir
, ntmp
, nin
, nout
);
298 block
->parent
= ctx
->block
;
305 pop_block(struct fd3_compile_context
*ctx
)
307 ctx
->block
= ctx
->block
->parent
;
308 compile_assert(ctx
, ctx
->block
);
312 ssa_dst(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
,
313 const struct tgsi_dst_register
*dst
, unsigned chan
)
315 unsigned n
= regid(dst
->Index
, chan
);
316 unsigned idx
= ctx
->num_output_updates
;
318 compile_assert(ctx
, idx
< ARRAY_SIZE(ctx
->output_updates
));
320 /* NOTE: defer update of temporaries[idx] or output[idx]
321 * until instr_finish(), so that if the current instruction
322 * reads the same TEMP/OUT[] it gets the old value:
324 * bleh.. this might be a bit easier to just figure out
325 * in instr_finish(). But at that point we've already
326 * lost information about OUTPUT vs TEMPORARY register
331 case TGSI_FILE_OUTPUT
:
332 compile_assert(ctx
, n
< ctx
->block
->noutputs
);
333 ctx
->output_updates
[idx
].instrp
= &ctx
->block
->outputs
[n
];
334 ctx
->output_updates
[idx
].instr
= instr
;
335 ctx
->num_output_updates
++;
337 case TGSI_FILE_TEMPORARY
:
338 compile_assert(ctx
, n
< ctx
->block
->ntemporaries
);
339 ctx
->output_updates
[idx
].instrp
= &ctx
->block
->temporaries
[n
];
340 ctx
->output_updates
[idx
].instr
= instr
;
341 ctx
->num_output_updates
++;
346 static struct ir3_instruction
*
347 create_output(struct ir3_block
*block
, struct ir3_instruction
*instr
,
350 struct ir3_instruction
*out
;
352 out
= ir3_instr_create(block
, -1, OPC_META_OUTPUT
);
353 out
->inout
.block
= block
;
354 ir3_reg_create(out
, n
, 0);
356 ir3_reg_create(out
, 0, IR3_REG_SSA
)->instr
= instr
;
361 static struct ir3_instruction
*
362 create_input(struct ir3_block
*block
, struct ir3_instruction
*instr
,
365 struct ir3_instruction
*in
;
367 in
= ir3_instr_create(block
, -1, OPC_META_INPUT
);
368 in
->inout
.block
= block
;
369 ir3_reg_create(in
, n
, 0);
371 ir3_reg_create(in
, 0, IR3_REG_SSA
)->instr
= instr
;
376 static struct ir3_instruction
*
377 block_input(struct ir3_block
*block
, unsigned n
)
379 /* references to INPUT register file always go back up to
383 return block_input(block
->parent
, n
);
384 return block
->inputs
[n
];
387 /* return temporary in scope, creating if needed meta-input node
388 * to track block inputs
390 static struct ir3_instruction
*
391 block_temporary(struct ir3_block
*block
, unsigned n
)
393 /* references to TEMPORARY register file, find the nearest
394 * enclosing block which has already assigned this temporary,
395 * creating meta-input instructions along the way to keep
396 * track of block inputs
398 if (block
->parent
&& !block
->temporaries
[n
]) {
399 /* if already have input for this block, reuse: */
400 if (!block
->inputs
[n
])
401 block
->inputs
[n
] = block_temporary(block
->parent
, n
);
403 /* and create new input to return: */
404 return create_input(block
, block
->inputs
[n
], n
);
406 return block
->temporaries
[n
];
409 static struct ir3_instruction
*
410 create_immed(struct fd3_compile_context
*ctx
, float val
)
412 /* this can happen when registers (or components of a TGSI
413 * register) are used as src before they have been assigned
414 * (undefined contents). To avoid confusing the rest of the
415 * compiler, and to generally keep things peachy, substitute
416 * an instruction that sets the src to 0.0. Or to keep
417 * things undefined, I could plug in a random number? :-P
419 * NOTE: *don't* use instr_create() here!
421 struct ir3_instruction
*instr
;
422 instr
= ir3_instr_create(ctx
->block
, 1, 0);
423 instr
->cat1
.src_type
= get_ftype(ctx
);
424 instr
->cat1
.dst_type
= get_ftype(ctx
);
425 ir3_reg_create(instr
, 0, 0);
426 ir3_reg_create(instr
, 0, IR3_REG_IMMED
)->fim_val
= val
;
431 ssa_src(struct fd3_compile_context
*ctx
, struct ir3_register
*reg
,
432 const struct tgsi_src_register
*src
, unsigned chan
)
434 struct ir3_block
*block
= ctx
->block
;
435 unsigned n
= regid(src
->Index
, chan
);
438 case TGSI_FILE_INPUT
:
439 reg
->flags
|= IR3_REG_SSA
;
440 reg
->instr
= block_input(ctx
->block
, n
);
442 case TGSI_FILE_OUTPUT
:
443 /* really this should just happen in case of 'MOV_SAT OUT[n], ..',
444 * for the following clamp instructions:
446 reg
->flags
|= IR3_REG_SSA
;
447 reg
->instr
= block
->outputs
[n
];
448 /* we don't have to worry about read from an OUTPUT that was
449 * assigned outside of the current block, because the _SAT
450 * clamp instructions will always be in the same block as
451 * the original instruction which wrote the OUTPUT
453 compile_assert(ctx
, reg
->instr
);
455 case TGSI_FILE_TEMPORARY
:
456 reg
->flags
|= IR3_REG_SSA
;
457 reg
->instr
= block_temporary(ctx
->block
, n
);
461 if ((reg
->flags
& IR3_REG_SSA
) && !reg
->instr
) {
462 /* this can happen when registers (or components of a TGSI
463 * register) are used as src before they have been assigned
464 * (undefined contents). To avoid confusing the rest of the
465 * compiler, and to generally keep things peachy, substitute
466 * an instruction that sets the src to 0.0. Or to keep
467 * things undefined, I could plug in a random number? :-P
469 * NOTE: *don't* use instr_create() here!
471 reg
->instr
= create_immed(ctx
, 0.0);
475 static struct ir3_register
*
476 add_dst_reg_wrmask(struct fd3_compile_context
*ctx
,
477 struct ir3_instruction
*instr
, const struct tgsi_dst_register
*dst
,
478 unsigned chan
, unsigned wrmask
)
480 unsigned flags
= 0, num
= 0;
481 struct ir3_register
*reg
;
484 case TGSI_FILE_OUTPUT
:
485 case TGSI_FILE_TEMPORARY
:
486 num
= dst
->Index
+ ctx
->base_reg
[dst
->File
];
488 case TGSI_FILE_ADDRESS
:
492 compile_error(ctx
, "unsupported dst register file: %s\n",
493 tgsi_file_name(dst
->File
));
498 flags
|= IR3_REG_RELATIV
;
499 if (ctx
->so
->half_precision
)
500 flags
|= IR3_REG_HALF
;
502 reg
= ir3_reg_create(instr
, regid(num
, chan
), flags
);
504 /* NOTE: do not call ssa_dst() if atomic.. vectorize()
505 * itself will call ssa_dst(). This is to filter out
506 * the (initially bogus) .x component dst which is
507 * created (but not necessarily used, ie. if the net
508 * result of the vector operation does not write to
512 reg
->wrmask
= wrmask
;
516 ssa_dst(ctx
, instr
, dst
, chan
);
517 } else if ((dst
->File
== TGSI_FILE_TEMPORARY
) ||
518 (dst
->File
== TGSI_FILE_OUTPUT
)) {
521 /* if instruction writes multiple, we need to create
522 * some place-holder collect the registers:
524 for (i
= 0; i
< 4; i
++) {
525 if (wrmask
& (1 << i
)) {
526 struct ir3_instruction
*collect
=
527 ir3_instr_create(ctx
->block
, -1, OPC_META_FO
);
529 /* unused dst reg: */
530 ir3_reg_create(collect
, 0, 0);
531 /* and src reg used to hold original instr */
532 ir3_reg_create(collect
, 0, IR3_REG_SSA
)->instr
= instr
;
534 ssa_dst(ctx
, collect
, dst
, chan
+i
);
542 static struct ir3_register
*
543 add_dst_reg(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
,
544 const struct tgsi_dst_register
*dst
, unsigned chan
)
546 return add_dst_reg_wrmask(ctx
, instr
, dst
, chan
, 0x1);
549 static struct ir3_register
*
550 add_src_reg_wrmask(struct fd3_compile_context
*ctx
,
551 struct ir3_instruction
*instr
, const struct tgsi_src_register
*src
,
552 unsigned chan
, unsigned wrmask
)
554 unsigned flags
= 0, num
= 0;
555 struct ir3_register
*reg
;
557 /* TODO we need to use a mov to temp for const >= 64.. or maybe
558 * we could use relative addressing..
560 compile_assert(ctx
, src
->Index
< 64);
563 case TGSI_FILE_IMMEDIATE
:
564 /* TODO if possible, use actual immediate instead of const.. but
565 * TGSI has vec4 immediates, we can only embed scalar (of limited
566 * size, depending on instruction..)
568 case TGSI_FILE_CONSTANT
:
569 flags
|= IR3_REG_CONST
;
570 num
= src
->Index
+ ctx
->base_reg
[src
->File
];
572 case TGSI_FILE_OUTPUT
:
573 /* NOTE: we should only end up w/ OUTPUT file for things like
574 * clamp()'ing saturated dst instructions
576 case TGSI_FILE_INPUT
:
577 case TGSI_FILE_TEMPORARY
:
578 num
= src
->Index
+ ctx
->base_reg
[src
->File
];
581 compile_error(ctx
, "unsupported src register file: %s\n",
582 tgsi_file_name(src
->File
));
587 flags
|= IR3_REG_ABS
;
589 flags
|= IR3_REG_NEGATE
;
591 flags
|= IR3_REG_RELATIV
;
592 if (ctx
->so
->half_precision
)
593 flags
|= IR3_REG_HALF
;
595 reg
= ir3_reg_create(instr
, regid(num
, chan
), flags
);
597 reg
->wrmask
= wrmask
;
600 ssa_src(ctx
, reg
, src
, chan
);
601 } else if ((src
->File
== TGSI_FILE_TEMPORARY
) ||
602 (src
->File
== TGSI_FILE_OUTPUT
) ||
603 (src
->File
== TGSI_FILE_INPUT
)) {
604 struct ir3_instruction
*collect
;
607 /* if instruction reads multiple, we need to create
608 * some place-holder collect the registers:
610 collect
= ir3_instr_create(ctx
->block
, -1, OPC_META_FI
);
611 ir3_reg_create(collect
, 0, 0); /* unused dst reg */
613 for (i
= 0; i
< 4; i
++) {
614 if (wrmask
& (1 << i
)) {
615 /* and src reg used point to the original instr */
616 ssa_src(ctx
, ir3_reg_create(collect
, 0, IR3_REG_SSA
),
618 } else if (wrmask
& ~((i
<< i
) - 1)) {
619 /* if any remaining components, then dummy
620 * placeholder src reg to fill in the blanks:
622 ir3_reg_create(collect
, 0, 0);
626 reg
->flags
|= IR3_REG_SSA
;
627 reg
->instr
= collect
;
633 static struct ir3_register
*
634 add_src_reg(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
,
635 const struct tgsi_src_register
*src
, unsigned chan
)
637 return add_src_reg_wrmask(ctx
, instr
, src
, chan
, 0x1);
641 src_from_dst(struct tgsi_src_register
*src
, struct tgsi_dst_register
*dst
)
643 src
->File
= dst
->File
;
644 src
->Indirect
= dst
->Indirect
;
645 src
->Dimension
= dst
->Dimension
;
646 src
->Index
= dst
->Index
;
649 src
->SwizzleX
= TGSI_SWIZZLE_X
;
650 src
->SwizzleY
= TGSI_SWIZZLE_Y
;
651 src
->SwizzleZ
= TGSI_SWIZZLE_Z
;
652 src
->SwizzleW
= TGSI_SWIZZLE_W
;
655 /* Get internal-temp src/dst to use for a sequence of instructions
656 * generated by a single TGSI op.
658 static struct tgsi_src_register
*
659 get_internal_temp(struct fd3_compile_context
*ctx
,
660 struct tgsi_dst_register
*tmp_dst
)
662 struct tgsi_src_register
*tmp_src
;
665 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
666 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
667 tmp_dst
->Indirect
= 0;
668 tmp_dst
->Dimension
= 0;
670 /* assign next temporary: */
671 n
= ctx
->num_internal_temps
++;
672 compile_assert(ctx
, n
< ARRAY_SIZE(ctx
->internal_temps
));
673 tmp_src
= &ctx
->internal_temps
[n
];
675 tmp_dst
->Index
= ctx
->info
.file_max
[TGSI_FILE_TEMPORARY
] + n
+ 1;
677 src_from_dst(tmp_src
, tmp_dst
);
682 /* Get internal half-precision temp src/dst to use for a sequence of
683 * instructions generated by a single TGSI op.
685 static struct tgsi_src_register
*
686 get_internal_temp_hr(struct fd3_compile_context
*ctx
,
687 struct tgsi_dst_register
*tmp_dst
)
689 struct tgsi_src_register
*tmp_src
;
692 if (ctx
->so
->half_precision
)
693 return get_internal_temp(ctx
, tmp_dst
);
695 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
696 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
697 tmp_dst
->Indirect
= 0;
698 tmp_dst
->Dimension
= 0;
700 /* assign next temporary: */
701 n
= ctx
->num_internal_temps
++;
702 compile_assert(ctx
, n
< ARRAY_SIZE(ctx
->internal_temps
));
703 tmp_src
= &ctx
->internal_temps
[n
];
705 /* just use hr0 because no one else should be using half-
710 src_from_dst(tmp_src
, tmp_dst
);
716 is_const(struct tgsi_src_register
*src
)
718 return (src
->File
== TGSI_FILE_CONSTANT
) ||
719 (src
->File
== TGSI_FILE_IMMEDIATE
);
723 is_relative(struct tgsi_src_register
*src
)
725 return src
->Indirect
;
729 is_rel_or_const(struct tgsi_src_register
*src
)
731 return is_relative(src
) || is_const(src
);
735 get_ftype(struct fd3_compile_context
*ctx
)
737 return ctx
->so
->half_precision
? TYPE_F16
: TYPE_F32
;
741 get_utype(struct fd3_compile_context
*ctx
)
743 return ctx
->so
->half_precision
? TYPE_U16
: TYPE_U32
;
747 src_swiz(struct tgsi_src_register
*src
, int chan
)
750 case 0: return src
->SwizzleX
;
751 case 1: return src
->SwizzleY
;
752 case 2: return src
->SwizzleZ
;
753 case 3: return src
->SwizzleW
;
759 /* for instructions that cannot take a const register as src, if needed
760 * generate a move to temporary gpr:
762 static struct tgsi_src_register
*
763 get_unconst(struct fd3_compile_context
*ctx
, struct tgsi_src_register
*src
)
765 struct tgsi_dst_register tmp_dst
;
766 struct tgsi_src_register
*tmp_src
;
768 compile_assert(ctx
, is_rel_or_const(src
));
770 tmp_src
= get_internal_temp(ctx
, &tmp_dst
);
772 create_mov(ctx
, &tmp_dst
, src
);
778 get_immediate(struct fd3_compile_context
*ctx
,
779 struct tgsi_src_register
*reg
, uint32_t val
)
781 unsigned neg
, swiz
, idx
, i
;
782 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
783 static const unsigned swiz2tgsi
[] = {
784 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
787 for (i
= 0; i
< ctx
->immediate_idx
; i
++) {
791 if (ctx
->so
->immediates
[idx
].val
[swiz
] == val
) {
796 if (ctx
->so
->immediates
[idx
].val
[swiz
] == -val
) {
802 if (i
== ctx
->immediate_idx
) {
803 /* need to generate a new immediate: */
807 ctx
->so
->immediates
[idx
].val
[swiz
] = val
;
808 ctx
->so
->immediates_count
= idx
+ 1;
809 ctx
->immediate_idx
++;
812 reg
->File
= TGSI_FILE_IMMEDIATE
;
818 reg
->SwizzleX
= swiz2tgsi
[swiz
];
819 reg
->SwizzleY
= swiz2tgsi
[swiz
];
820 reg
->SwizzleZ
= swiz2tgsi
[swiz
];
821 reg
->SwizzleW
= swiz2tgsi
[swiz
];
825 create_mov(struct fd3_compile_context
*ctx
, struct tgsi_dst_register
*dst
,
826 struct tgsi_src_register
*src
)
828 type_t type_mov
= get_ftype(ctx
);
831 for (i
= 0; i
< 4; i
++) {
832 /* move to destination: */
833 if (dst
->WriteMask
& (1 << i
)) {
834 struct ir3_instruction
*instr
;
836 if (src
->Absolute
|| src
->Negate
) {
837 /* can't have abs or neg on a mov instr, so use
838 * absneg.f instead to handle these cases:
840 instr
= instr_create(ctx
, 2, OPC_ABSNEG_F
);
842 instr
= instr_create(ctx
, 1, 0);
843 instr
->cat1
.src_type
= type_mov
;
844 instr
->cat1
.dst_type
= type_mov
;
847 add_dst_reg(ctx
, instr
, dst
, i
);
848 add_src_reg(ctx
, instr
, src
, src_swiz(src
, i
));
854 create_clamp(struct fd3_compile_context
*ctx
,
855 struct tgsi_dst_register
*dst
, struct tgsi_src_register
*val
,
856 struct tgsi_src_register
*minval
, struct tgsi_src_register
*maxval
)
858 struct ir3_instruction
*instr
;
860 instr
= instr_create(ctx
, 2, OPC_MAX_F
);
861 vectorize(ctx
, instr
, dst
, 2, val
, 0, minval
, 0);
863 instr
= instr_create(ctx
, 2, OPC_MIN_F
);
864 vectorize(ctx
, instr
, dst
, 2, val
, 0, maxval
, 0);
868 create_clamp_imm(struct fd3_compile_context
*ctx
,
869 struct tgsi_dst_register
*dst
,
870 uint32_t minval
, uint32_t maxval
)
872 struct tgsi_src_register minconst
, maxconst
;
873 struct tgsi_src_register src
;
875 src_from_dst(&src
, dst
);
877 get_immediate(ctx
, &minconst
, minval
);
878 get_immediate(ctx
, &maxconst
, maxval
);
880 create_clamp(ctx
, dst
, &src
, &minconst
, &maxconst
);
883 static struct tgsi_dst_register
*
884 get_dst(struct fd3_compile_context
*ctx
, struct tgsi_full_instruction
*inst
)
886 struct tgsi_dst_register
*dst
= &inst
->Dst
[0].Register
;
888 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
889 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
890 if ((src
->File
== dst
->File
) && (src
->Index
== dst
->Index
)) {
891 if ((dst
->WriteMask
== TGSI_WRITEMASK_XYZW
) &&
892 (src
->SwizzleX
== TGSI_SWIZZLE_X
) &&
893 (src
->SwizzleY
== TGSI_SWIZZLE_Y
) &&
894 (src
->SwizzleZ
== TGSI_SWIZZLE_Z
) &&
895 (src
->SwizzleW
== TGSI_SWIZZLE_W
))
897 ctx
->tmp_src
= get_internal_temp(ctx
, &ctx
->tmp_dst
);
898 ctx
->tmp_dst
.WriteMask
= dst
->WriteMask
;
907 put_dst(struct fd3_compile_context
*ctx
, struct tgsi_full_instruction
*inst
,
908 struct tgsi_dst_register
*dst
)
910 /* if necessary, add mov back into original dst: */
911 if (dst
!= &inst
->Dst
[0].Register
) {
912 create_mov(ctx
, &inst
->Dst
[0].Register
, ctx
->tmp_src
);
916 /* helper to generate the necessary repeat and/or additional instructions
917 * to turn a scalar instruction into a vector operation:
920 vectorize(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
,
921 struct tgsi_dst_register
*dst
, int nsrcs
, ...)
926 instr_atomic_start(ctx
);
928 add_dst_reg(ctx
, instr
, dst
, TGSI_SWIZZLE_X
);
931 for (j
= 0; j
< nsrcs
; j
++) {
932 struct tgsi_src_register
*src
=
933 va_arg(ap
, struct tgsi_src_register
*);
934 unsigned flags
= va_arg(ap
, unsigned);
935 struct ir3_register
*reg
;
936 if (flags
& IR3_REG_IMMED
) {
937 reg
= ir3_reg_create(instr
, 0, IR3_REG_IMMED
);
938 /* this is an ugly cast.. should have put flags first! */
939 reg
->iim_val
= *(int *)&src
;
941 reg
= add_src_reg(ctx
, instr
, src
, TGSI_SWIZZLE_X
);
943 reg
->flags
|= flags
& ~IR3_REG_NEGATE
;
944 if (flags
& IR3_REG_NEGATE
)
945 reg
->flags
^= IR3_REG_NEGATE
;
949 for (i
= 0; i
< 4; i
++) {
950 if (dst
->WriteMask
& (1 << i
)) {
951 struct ir3_instruction
*cur
;
956 cur
= instr_clone(ctx
, instr
);
959 ssa_dst(ctx
, cur
, dst
, i
);
961 /* fix-up dst register component: */
962 cur
->regs
[0]->num
= regid(cur
->regs
[0]->num
>> 2, i
);
964 /* fix-up src register component: */
966 for (j
= 0; j
< nsrcs
; j
++) {
967 struct ir3_register
*reg
= cur
->regs
[j
+1];
968 struct tgsi_src_register
*src
=
969 va_arg(ap
, struct tgsi_src_register
*);
970 unsigned flags
= va_arg(ap
, unsigned);
971 if (reg
->flags
& IR3_REG_SSA
) {
972 ssa_src(ctx
, reg
, src
, src_swiz(src
, i
));
973 } else if (!(flags
& IR3_REG_IMMED
)) {
974 reg
->num
= regid(reg
->num
>> 2, src_swiz(src
, i
));
981 instr_atomic_end(ctx
);
985 * Handlers for TGSI instructions which do not have a 1:1 mapping to
986 * native instructions:
990 trans_clamp(const struct instr_translater
*t
,
991 struct fd3_compile_context
*ctx
,
992 struct tgsi_full_instruction
*inst
)
994 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
995 struct tgsi_src_register
*src0
= &inst
->Src
[0].Register
;
996 struct tgsi_src_register
*src1
= &inst
->Src
[1].Register
;
997 struct tgsi_src_register
*src2
= &inst
->Src
[2].Register
;
999 create_clamp(ctx
, dst
, src0
, src1
, src2
);
1001 put_dst(ctx
, inst
, dst
);
1004 /* ARL(x) = x, but mova from hrN.x to a0.. */
1006 trans_arl(const struct instr_translater
*t
,
1007 struct fd3_compile_context
*ctx
,
1008 struct tgsi_full_instruction
*inst
)
1010 struct ir3_instruction
*instr
;
1011 struct tgsi_dst_register tmp_dst
;
1012 struct tgsi_src_register
*tmp_src
;
1013 struct tgsi_dst_register
*dst
= &inst
->Dst
[0].Register
;
1014 struct tgsi_src_register
*src
= &inst
->Src
[0].Register
;
1015 unsigned chan
= src
->SwizzleX
;
1016 compile_assert(ctx
, dst
->File
== TGSI_FILE_ADDRESS
);
1018 tmp_src
= get_internal_temp_hr(ctx
, &tmp_dst
);
1020 /* cov.{f32,f16}s16 Rtmp, Rsrc */
1021 instr
= instr_create(ctx
, 1, 0);
1022 instr
->cat1
.src_type
= get_ftype(ctx
);
1023 instr
->cat1
.dst_type
= TYPE_S16
;
1024 add_dst_reg(ctx
, instr
, &tmp_dst
, chan
)->flags
|= IR3_REG_HALF
;
1025 add_src_reg(ctx
, instr
, src
, chan
);
1027 /* shl.b Rtmp, Rtmp, 2 */
1028 instr
= instr_create(ctx
, 2, OPC_SHL_B
);
1029 add_dst_reg(ctx
, instr
, &tmp_dst
, chan
)->flags
|= IR3_REG_HALF
;
1030 add_src_reg(ctx
, instr
, tmp_src
, chan
)->flags
|= IR3_REG_HALF
;
1031 ir3_reg_create(instr
, 0, IR3_REG_IMMED
)->iim_val
= 2;
1034 instr
= instr_create(ctx
, 1, 0);
1035 instr
->cat1
.src_type
= TYPE_S16
;
1036 instr
->cat1
.dst_type
= TYPE_S16
;
1037 add_dst_reg(ctx
, instr
, dst
, 0)->flags
|= IR3_REG_HALF
;
1038 add_src_reg(ctx
, instr
, tmp_src
, chan
)->flags
|= IR3_REG_HALF
;
1041 /* texture fetch/sample instructions: */
1043 trans_samp(const struct instr_translater
*t
,
1044 struct fd3_compile_context
*ctx
,
1045 struct tgsi_full_instruction
*inst
)
1047 struct ir3_instruction
*instr
;
1048 struct tgsi_src_register
*coord
= &inst
->Src
[0].Register
;
1049 struct tgsi_src_register
*samp
= &inst
->Src
[1].Register
;
1050 unsigned tex
= inst
->Texture
.Texture
;
1052 unsigned i
, flags
= 0, src_wrmask
;
1053 bool needs_mov
= false;
1056 case TGSI_OPCODE_TEX
:
1057 if (tex
== TGSI_TEXTURE_2D
) {
1058 order
= (int8_t[4]){ 0, 1, -1, -1 };
1059 src_wrmask
= TGSI_WRITEMASK_XY
;
1061 order
= (int8_t[4]){ 0, 1, 2, -1 };
1062 src_wrmask
= TGSI_WRITEMASK_XYZ
;
1065 case TGSI_OPCODE_TXP
:
1066 if (tex
== TGSI_TEXTURE_2D
) {
1067 order
= (int8_t[4]){ 0, 1, 3, -1 };
1068 src_wrmask
= TGSI_WRITEMASK_XYZ
;
1070 order
= (int8_t[4]){ 0, 1, 2, 3 };
1071 src_wrmask
= TGSI_WRITEMASK_XYZW
;
1073 flags
|= IR3_INSTR_P
;
1076 compile_assert(ctx
, 0);
1080 if ((tex
== TGSI_TEXTURE_3D
) || (tex
== TGSI_TEXTURE_CUBE
))
1081 flags
|= IR3_INSTR_3D
;
1083 /* cat5 instruction cannot seem to handle const or relative: */
1084 if (is_rel_or_const(coord
))
1087 /* The texture sample instructions need to coord in successive
1088 * registers/components (ie. src.xy but not src.yx). And TXP
1089 * needs the .w component in .z for 2D.. so in some cases we
1090 * might need to emit some mov instructions to shuffle things
1093 for (i
= 1; (i
< 4) && (order
[i
] >= 0) && !needs_mov
; i
++)
1094 if (src_swiz(coord
, i
) != (src_swiz(coord
, 0) + order
[i
]))
1098 struct tgsi_dst_register tmp_dst
;
1099 struct tgsi_src_register
*tmp_src
;
1102 type_t type_mov
= get_ftype(ctx
);
1104 /* need to move things around: */
1105 tmp_src
= get_internal_temp(ctx
, &tmp_dst
);
1107 for (j
= 0; (j
< 4) && (order
[j
] >= 0); j
++) {
1108 instr
= instr_create(ctx
, 1, 0);
1109 instr
->cat1
.src_type
= type_mov
;
1110 instr
->cat1
.dst_type
= type_mov
;
1111 add_dst_reg(ctx
, instr
, &tmp_dst
, j
);
1112 add_src_reg(ctx
, instr
, coord
,
1113 src_swiz(coord
, order
[j
]));
1119 instr
= instr_create(ctx
, 5, t
->opc
);
1120 instr
->cat5
.type
= get_ftype(ctx
);
1121 instr
->cat5
.samp
= samp
->Index
;
1122 instr
->cat5
.tex
= samp
->Index
;
1123 instr
->flags
|= flags
;
1125 add_dst_reg_wrmask(ctx
, instr
, &inst
->Dst
[0].Register
, 0,
1126 inst
->Dst
[0].Register
.WriteMask
);
1128 add_src_reg_wrmask(ctx
, instr
, coord
, coord
->SwizzleX
, src_wrmask
);
1132 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
1133 * cmps.f.eq tmp0, a, b
1134 * cov.u16f16 dst, tmp0
1136 * SNE(a,b) = (a != b) ? 1.0 : 0.0
1137 * cmps.f.ne tmp0, a, b
1138 * cov.u16f16 dst, tmp0
1140 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
1141 * cmps.f.ge tmp0, a, b
1142 * cov.u16f16 dst, tmp0
1144 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
1145 * cmps.f.le tmp0, a, b
1146 * cov.u16f16 dst, tmp0
1148 * SGT(a,b) = (a > b) ? 1.0 : 0.0
1149 * cmps.f.gt tmp0, a, b
1150 * cov.u16f16 dst, tmp0
1152 * SLT(a,b) = (a < b) ? 1.0 : 0.0
1153 * cmps.f.lt tmp0, a, b
1154 * cov.u16f16 dst, tmp0
1156 * CMP(a,b,c) = (a < 0.0) ? b : c
1157 * cmps.f.lt tmp0, a, {0.0}
1158 * sel.b16 dst, b, tmp0, c
1161 trans_cmp(const struct instr_translater
*t
,
1162 struct fd3_compile_context
*ctx
,
1163 struct tgsi_full_instruction
*inst
)
1165 struct ir3_instruction
*instr
;
1166 struct tgsi_dst_register tmp_dst
;
1167 struct tgsi_src_register
*tmp_src
;
1168 struct tgsi_src_register constval0
;
1169 /* final instruction for CMP() uses orig src1 and src2: */
1170 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
1171 struct tgsi_src_register
*a0
, *a1
, *a2
;
1174 tmp_src
= get_internal_temp(ctx
, &tmp_dst
);
1176 a0
= &inst
->Src
[0].Register
; /* a */
1177 a1
= &inst
->Src
[1].Register
; /* b */
1179 switch (t
->tgsi_opc
) {
1180 case TGSI_OPCODE_SEQ
:
1181 condition
= IR3_COND_EQ
;
1183 case TGSI_OPCODE_SNE
:
1184 condition
= IR3_COND_NE
;
1186 case TGSI_OPCODE_SGE
:
1187 condition
= IR3_COND_GE
;
1189 case TGSI_OPCODE_SLT
:
1190 condition
= IR3_COND_LT
;
1192 case TGSI_OPCODE_SLE
:
1193 condition
= IR3_COND_LE
;
1195 case TGSI_OPCODE_SGT
:
1196 condition
= IR3_COND_GT
;
1198 case TGSI_OPCODE_CMP
:
1199 get_immediate(ctx
, &constval0
, fui(0.0));
1200 a0
= &inst
->Src
[0].Register
; /* a */
1201 a1
= &constval0
; /* {0.0} */
1202 condition
= IR3_COND_LT
;
1205 compile_assert(ctx
, 0);
1209 if (is_const(a0
) && is_const(a1
))
1210 a0
= get_unconst(ctx
, a0
);
1212 /* cmps.f.<cond> tmp, a0, a1 */
1213 instr
= instr_create(ctx
, 2, OPC_CMPS_F
);
1214 instr
->cat2
.condition
= condition
;
1215 vectorize(ctx
, instr
, &tmp_dst
, 2, a0
, 0, a1
, 0);
1217 switch (t
->tgsi_opc
) {
1218 case TGSI_OPCODE_SEQ
:
1219 case TGSI_OPCODE_SGE
:
1220 case TGSI_OPCODE_SLE
:
1221 case TGSI_OPCODE_SNE
:
1222 case TGSI_OPCODE_SGT
:
1223 case TGSI_OPCODE_SLT
:
1224 /* cov.u16f16 dst, tmp0 */
1225 instr
= instr_create(ctx
, 1, 0);
1226 instr
->cat1
.src_type
= get_utype(ctx
);
1227 instr
->cat1
.dst_type
= get_ftype(ctx
);
1228 vectorize(ctx
, instr
, dst
, 1, tmp_src
, 0);
1230 case TGSI_OPCODE_CMP
:
1231 a1
= &inst
->Src
[1].Register
;
1232 a2
= &inst
->Src
[2].Register
;
1233 /* sel.{b32,b16} dst, src2, tmp, src1 */
1234 instr
= instr_create(ctx
, 3,
1235 ctx
->so
->half_precision
? OPC_SEL_B16
: OPC_SEL_B32
);
1236 vectorize(ctx
, instr
, dst
, 3, a1
, 0, tmp_src
, 0, a2
, 0);
1241 put_dst(ctx
, inst
, dst
);
1245 * Conditional / Flow control
1249 push_branch(struct fd3_compile_context
*ctx
, struct ir3_instruction
*instr
)
1251 ctx
->branch
[ctx
->branch_count
++] = instr
;
1254 static struct ir3_instruction
*
1255 pop_branch(struct fd3_compile_context
*ctx
)
1257 return ctx
->branch
[--ctx
->branch_count
];
1261 trans_if(const struct instr_translater
*t
,
1262 struct fd3_compile_context
*ctx
,
1263 struct tgsi_full_instruction
*inst
)
1265 struct ir3_instruction
*instr
;
1266 struct tgsi_src_register
*src
= &inst
->Src
[0].Register
;
1267 struct tgsi_dst_register tmp_dst
;
1268 struct tgsi_src_register
*tmp_src
;
1269 struct tgsi_src_register constval
;
1271 get_immediate(ctx
, &constval
, fui(0.0));
1272 tmp_src
= get_internal_temp(ctx
, &tmp_dst
);
1275 src
= get_unconst(ctx
, src
);
1277 /* cmps.f.eq tmp0, b, {0.0} */
1278 instr
= instr_create(ctx
, 2, OPC_CMPS_F
);
1279 add_dst_reg(ctx
, instr
, &tmp_dst
, 0);
1280 add_src_reg(ctx
, instr
, src
, src
->SwizzleX
);
1281 add_src_reg(ctx
, instr
, &constval
, constval
.SwizzleX
);
1282 instr
->cat2
.condition
= IR3_COND_EQ
;
1284 /* add.s tmp0, tmp0, -1 */
1285 instr
= instr_create(ctx
, 2, OPC_ADD_S
);
1286 add_dst_reg(ctx
, instr
, &tmp_dst
, TGSI_SWIZZLE_X
);
1287 add_src_reg(ctx
, instr
, tmp_src
, TGSI_SWIZZLE_X
);
1288 ir3_reg_create(instr
, 0, IR3_REG_IMMED
)->iim_val
= -1;
1290 /* meta:flow tmp0 */
1291 instr
= instr_create(ctx
, -1, OPC_META_FLOW
);
1292 ir3_reg_create(instr
, 0, 0); /* dummy dst */
1293 add_src_reg(ctx
, instr
, tmp_src
, TGSI_SWIZZLE_X
);
1295 push_branch(ctx
, instr
);
1296 instr
->flow
.if_block
= push_block(ctx
);
1300 trans_else(const struct instr_translater
*t
,
1301 struct fd3_compile_context
*ctx
,
1302 struct tgsi_full_instruction
*inst
)
1304 struct ir3_instruction
*instr
;
1308 instr
= pop_branch(ctx
);
1310 compile_assert(ctx
, (instr
->category
== -1) &&
1311 (instr
->opc
== OPC_META_FLOW
));
1313 push_branch(ctx
, instr
);
1314 instr
->flow
.else_block
= push_block(ctx
);
1317 static struct ir3_instruction
*
1318 find_temporary(struct ir3_block
*block
, unsigned n
)
1320 if (block
->parent
&& !block
->temporaries
[n
])
1321 return find_temporary(block
->parent
, n
);
1322 return block
->temporaries
[n
];
1325 static struct ir3_instruction
*
1326 find_output(struct ir3_block
*block
, unsigned n
)
1328 if (block
->parent
&& !block
->outputs
[n
])
1329 return find_output(block
->parent
, n
);
1330 return block
->outputs
[n
];
1333 static struct ir3_instruction
*
1334 create_phi(struct fd3_compile_context
*ctx
, struct ir3_instruction
*cond
,
1335 struct ir3_instruction
*a
, struct ir3_instruction
*b
)
1337 struct ir3_instruction
*phi
;
1339 compile_assert(ctx
, cond
);
1341 /* Either side of the condition could be null.. which
1342 * indicates a variable written on only one side of the
1343 * branch. Normally this should only be variables not
1344 * used outside of that side of the branch. So we could
1345 * just 'return a ? a : b;' in that case. But for better
1346 * defined undefined behavior we just stick in imm{0.0}.
1347 * In the common case of a value only used within the
1348 * one side of the branch, the PHI instruction will not
1352 a
= create_immed(ctx
, 0.0);
1354 b
= create_immed(ctx
, 0.0);
1356 phi
= instr_create(ctx
, -1, OPC_META_PHI
);
1357 ir3_reg_create(phi
, 0, 0); /* dummy dst */
1358 ir3_reg_create(phi
, 0, IR3_REG_SSA
)->instr
= cond
;
1359 ir3_reg_create(phi
, 0, IR3_REG_SSA
)->instr
= a
;
1360 ir3_reg_create(phi
, 0, IR3_REG_SSA
)->instr
= b
;
1366 trans_endif(const struct instr_translater
*t
,
1367 struct fd3_compile_context
*ctx
,
1368 struct tgsi_full_instruction
*inst
)
1370 struct ir3_instruction
*instr
;
1371 struct ir3_block
*ifb
, *elseb
;
1372 struct ir3_instruction
**ifout
, **elseout
;
1373 unsigned i
, ifnout
= 0, elsenout
= 0;
1377 instr
= pop_branch(ctx
);
1379 compile_assert(ctx
, (instr
->category
== -1) &&
1380 (instr
->opc
== OPC_META_FLOW
));
1382 ifb
= instr
->flow
.if_block
;
1383 elseb
= instr
->flow
.else_block
;
1384 /* if there is no else block, the parent block is used for the
1385 * branch-not-taken src of the PHI instructions:
1388 elseb
= ifb
->parent
;
1390 /* count up number of outputs for each block: */
1391 for (i
= 0; i
< ifb
->ntemporaries
; i
++) {
1392 if (ifb
->temporaries
[i
])
1394 if (elseb
->temporaries
[i
])
1397 for (i
= 0; i
< ifb
->noutputs
; i
++) {
1398 if (ifb
->outputs
[i
])
1400 if (elseb
->outputs
[i
])
1404 ifout
= ir3_alloc(ctx
->ir
, sizeof(ifb
->outputs
[0]) * ifnout
);
1405 if (elseb
!= ifb
->parent
)
1406 elseout
= ir3_alloc(ctx
->ir
, sizeof(ifb
->outputs
[0]) * elsenout
);
1411 /* generate PHI instructions for any temporaries written: */
1412 for (i
= 0; i
< ifb
->ntemporaries
; i
++) {
1413 struct ir3_instruction
*a
= ifb
->temporaries
[i
];
1414 struct ir3_instruction
*b
= elseb
->temporaries
[i
];
1416 /* if temporary written in if-block, or if else block
1417 * is present and temporary written in else-block:
1419 if (a
|| ((elseb
!= ifb
->parent
) && b
)) {
1420 struct ir3_instruction
*phi
;
1422 /* if only written on one side, find the closest
1423 * enclosing update on other side:
1426 a
= find_temporary(ifb
, i
);
1428 b
= find_temporary(elseb
, i
);
1431 a
= create_output(ifb
, a
, ifnout
++);
1433 if (elseb
!= ifb
->parent
) {
1434 elseout
[elsenout
] = b
;
1435 b
= create_output(elseb
, b
, elsenout
++);
1438 phi
= create_phi(ctx
, instr
, a
, b
);
1439 ctx
->block
->temporaries
[i
] = phi
;
1443 /* .. and any outputs written: */
1444 for (i
= 0; i
< ifb
->noutputs
; i
++) {
1445 struct ir3_instruction
*a
= ifb
->outputs
[i
];
1446 struct ir3_instruction
*b
= elseb
->outputs
[i
];
1448 /* if output written in if-block, or if else block
1449 * is present and output written in else-block:
1451 if (a
|| ((elseb
!= ifb
->parent
) && b
)) {
1452 struct ir3_instruction
*phi
;
1454 /* if only written on one side, find the closest
1455 * enclosing update on other side:
1458 a
= find_output(ifb
, i
);
1460 b
= find_output(elseb
, i
);
1463 a
= create_output(ifb
, a
, ifnout
++);
1465 if (elseb
!= ifb
->parent
) {
1466 elseout
[elsenout
] = b
;
1467 b
= create_output(elseb
, b
, elsenout
++);
1470 phi
= create_phi(ctx
, instr
, a
, b
);
1471 ctx
->block
->outputs
[i
] = phi
;
1475 ifb
->noutputs
= ifnout
;
1476 ifb
->outputs
= ifout
;
1478 if (elseb
!= ifb
->parent
) {
1479 elseb
->noutputs
= elsenout
;
1480 elseb
->outputs
= elseout
;
1483 // TODO maybe we want to compact block->inputs?
1487 * Handlers for TGSI instructions which do have 1:1 mapping to native
1492 instr_cat0(const struct instr_translater
*t
,
1493 struct fd3_compile_context
*ctx
,
1494 struct tgsi_full_instruction
*inst
)
1496 instr_create(ctx
, 0, t
->opc
);
1500 instr_cat1(const struct instr_translater
*t
,
1501 struct fd3_compile_context
*ctx
,
1502 struct tgsi_full_instruction
*inst
)
1504 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
1505 struct tgsi_src_register
*src
= &inst
->Src
[0].Register
;
1507 /* mov instructions can't handle a negate on src: */
1509 struct tgsi_src_register constval
;
1510 struct ir3_instruction
*instr
;
1512 /* since right now, we are using uniformly either TYPE_F16 or
1513 * TYPE_F32, and we don't utilize the conversion possibilities
1514 * of mov instructions, we can get away with substituting an
1515 * add.f which can handle negate. Might need to revisit this
1516 * in the future if we start supporting widening/narrowing or
1517 * conversion to/from integer..
1519 instr
= instr_create(ctx
, 2, OPC_ADD_F
);
1520 get_immediate(ctx
, &constval
, fui(0.0));
1521 vectorize(ctx
, instr
, dst
, 2, src
, 0, &constval
, 0);
1523 create_mov(ctx
, dst
, src
);
1524 /* create_mov() generates vector sequence, so no vectorize() */
1526 put_dst(ctx
, inst
, dst
);
1530 instr_cat2(const struct instr_translater
*t
,
1531 struct fd3_compile_context
*ctx
,
1532 struct tgsi_full_instruction
*inst
)
1534 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
1535 struct tgsi_src_register
*src0
= &inst
->Src
[0].Register
;
1536 struct tgsi_src_register
*src1
= &inst
->Src
[1].Register
;
1537 struct ir3_instruction
*instr
;
1538 unsigned src0_flags
= 0, src1_flags
= 0;
1540 switch (t
->tgsi_opc
) {
1541 case TGSI_OPCODE_ABS
:
1542 src0_flags
= IR3_REG_ABS
;
1544 case TGSI_OPCODE_SUB
:
1545 src1_flags
= IR3_REG_NEGATE
;
1564 /* these only have one src reg */
1565 instr
= instr_create(ctx
, 2, t
->opc
);
1566 vectorize(ctx
, instr
, dst
, 1, src0
, src0_flags
);
1569 if (is_const(src0
) && is_const(src1
))
1570 src0
= get_unconst(ctx
, src0
);
1572 instr
= instr_create(ctx
, 2, t
->opc
);
1573 vectorize(ctx
, instr
, dst
, 2, src0
, src0_flags
,
1578 put_dst(ctx
, inst
, dst
);
1582 instr_cat3(const struct instr_translater
*t
,
1583 struct fd3_compile_context
*ctx
,
1584 struct tgsi_full_instruction
*inst
)
1586 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
1587 struct tgsi_src_register
*src0
= &inst
->Src
[0].Register
;
1588 struct tgsi_src_register
*src1
= &inst
->Src
[1].Register
;
1589 struct ir3_instruction
*instr
;
1591 /* in particular, can't handle const for src1 for cat3..
1592 * for mad, we can swap first two src's if needed:
1594 if (is_rel_or_const(src1
)) {
1595 if (is_mad(t
->opc
) && !is_rel_or_const(src0
)) {
1596 struct tgsi_src_register
*tmp
;
1601 src1
= get_unconst(ctx
, src1
);
1605 instr
= instr_create(ctx
, 3,
1606 ctx
->so
->half_precision
? t
->hopc
: t
->opc
);
1607 vectorize(ctx
, instr
, dst
, 3, src0
, 0, src1
, 0,
1608 &inst
->Src
[2].Register
, 0);
1609 put_dst(ctx
, inst
, dst
);
1613 instr_cat4(const struct instr_translater
*t
,
1614 struct fd3_compile_context
*ctx
,
1615 struct tgsi_full_instruction
*inst
)
1617 struct tgsi_dst_register
*dst
= get_dst(ctx
, inst
);
1618 struct tgsi_src_register
*src
= &inst
->Src
[0].Register
;
1619 struct ir3_instruction
*instr
;
1622 /* seems like blob compiler avoids const as src.. */
1624 src
= get_unconst(ctx
, src
);
1626 /* we need to replicate into each component: */
1627 for (i
= 0; i
< 4; i
++) {
1628 if (dst
->WriteMask
& (1 << i
)) {
1629 instr
= instr_create(ctx
, 4, t
->opc
);
1630 add_dst_reg(ctx
, instr
, dst
, i
);
1631 add_src_reg(ctx
, instr
, src
, src
->SwizzleX
);
1635 put_dst(ctx
, inst
, dst
);
1638 static const struct instr_translater translaters
[TGSI_OPCODE_LAST
] = {
1639 #define INSTR(n, f, ...) \
1640 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1642 INSTR(MOV
, instr_cat1
),
1643 INSTR(RCP
, instr_cat4
, .opc
= OPC_RCP
),
1644 INSTR(RSQ
, instr_cat4
, .opc
= OPC_RSQ
),
1645 INSTR(SQRT
, instr_cat4
, .opc
= OPC_SQRT
),
1646 INSTR(MUL
, instr_cat2
, .opc
= OPC_MUL_F
),
1647 INSTR(ADD
, instr_cat2
, .opc
= OPC_ADD_F
),
1648 INSTR(SUB
, instr_cat2
, .opc
= OPC_ADD_F
),
1649 INSTR(MIN
, instr_cat2
, .opc
= OPC_MIN_F
),
1650 INSTR(MAX
, instr_cat2
, .opc
= OPC_MAX_F
),
1651 INSTR(MAD
, instr_cat3
, .opc
= OPC_MAD_F32
, .hopc
= OPC_MAD_F16
),
1652 INSTR(TRUNC
, instr_cat2
, .opc
= OPC_TRUNC_F
),
1653 INSTR(CLAMP
, trans_clamp
),
1654 INSTR(FLR
, instr_cat2
, .opc
= OPC_FLOOR_F
),
1655 INSTR(ROUND
, instr_cat2
, .opc
= OPC_RNDNE_F
),
1656 INSTR(ARL
, trans_arl
),
1657 INSTR(EX2
, instr_cat4
, .opc
= OPC_EXP2
),
1658 INSTR(LG2
, instr_cat4
, .opc
= OPC_LOG2
),
1659 INSTR(ABS
, instr_cat2
, .opc
= OPC_ABSNEG_F
),
1660 INSTR(COS
, instr_cat4
, .opc
= OPC_COS
),
1661 INSTR(SIN
, instr_cat4
, .opc
= OPC_SIN
),
1662 INSTR(TEX
, trans_samp
, .opc
= OPC_SAM
, .arg
= TGSI_OPCODE_TEX
),
1663 INSTR(TXP
, trans_samp
, .opc
= OPC_SAM
, .arg
= TGSI_OPCODE_TXP
),
1664 INSTR(SGT
, trans_cmp
),
1665 INSTR(SLT
, trans_cmp
),
1666 INSTR(SGE
, trans_cmp
),
1667 INSTR(SLE
, trans_cmp
),
1668 INSTR(SNE
, trans_cmp
),
1669 INSTR(SEQ
, trans_cmp
),
1670 INSTR(CMP
, trans_cmp
),
1671 INSTR(IF
, trans_if
),
1672 INSTR(ELSE
, trans_else
),
1673 INSTR(ENDIF
, trans_endif
),
1674 INSTR(END
, instr_cat0
, .opc
= OPC_END
),
1675 INSTR(KILL
, instr_cat0
, .opc
= OPC_KILL
),
1679 decl_semantic(const struct tgsi_declaration_semantic
*sem
)
1681 return fd3_semantic_name(sem
->Name
, sem
->Index
);
1685 decl_in(struct fd3_compile_context
*ctx
, struct tgsi_full_declaration
*decl
)
1687 struct fd3_shader_stateobj
*so
= ctx
->so
;
1688 unsigned base
= ctx
->base_reg
[TGSI_FILE_INPUT
];
1689 unsigned i
, flags
= 0;
1691 /* I don't think we should get frag shader input without
1692 * semantic info? Otherwise how do inputs get linked to
1695 compile_assert(ctx
, (ctx
->type
== TGSI_PROCESSOR_VERTEX
) ||
1696 decl
->Declaration
.Semantic
);
1698 if (ctx
->so
->half_precision
)
1699 flags
|= IR3_REG_HALF
;
1701 for (i
= decl
->Range
.First
; i
<= decl
->Range
.Last
; i
++) {
1702 unsigned n
= so
->inputs_count
++;
1703 unsigned r
= regid(i
+ base
, 0);
1706 /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1709 DBG("decl in -> r%d", i
+ base
);
1711 so
->inputs
[n
].semantic
= decl_semantic(&decl
->Semantic
);
1712 so
->inputs
[n
].compmask
= (1 << ncomp
) - 1;
1713 so
->inputs
[n
].regid
= r
;
1714 so
->inputs
[n
].inloc
= ctx
->next_inloc
;
1715 ctx
->next_inloc
+= ncomp
;
1717 so
->total_in
+= ncomp
;
1719 for (j
= 0; j
< ncomp
; j
++) {
1720 struct ir3_instruction
*instr
;
1722 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
1723 struct ir3_register
*src
;
1725 instr
= instr_create(ctx
, 2, OPC_BARY_F
);
1728 ir3_reg_create(instr
, r
+ j
, flags
);
1730 /* input position: */
1731 ir3_reg_create(instr
, 0, IR3_REG_IMMED
)->iim_val
=
1732 so
->inputs
[n
].inloc
+ j
- 8;
1734 /* input base (always r0.xy): */
1735 src
= ir3_reg_create(instr
, regid(0,0), IR3_REG_SSA
);
1737 src
->instr
= ctx
->frag_pos
;
1740 instr
= create_input(ctx
->block
, NULL
, (i
* 4) + j
);
1743 ctx
->block
->inputs
[(i
* 4) + j
] = instr
;
1749 decl_out(struct fd3_compile_context
*ctx
, struct tgsi_full_declaration
*decl
)
1751 struct fd3_shader_stateobj
*so
= ctx
->so
;
1752 unsigned base
= ctx
->base_reg
[TGSI_FILE_OUTPUT
];
1754 unsigned name
= decl
->Semantic
.Name
;
1757 compile_assert(ctx
, decl
->Declaration
.Semantic
);
1759 DBG("decl out[%d] -> r%d", name
, decl
->Range
.First
+ base
);
1761 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
1763 case TGSI_SEMANTIC_POSITION
:
1764 so
->writes_pos
= true;
1766 case TGSI_SEMANTIC_PSIZE
:
1767 case TGSI_SEMANTIC_COLOR
:
1768 case TGSI_SEMANTIC_GENERIC
:
1769 case TGSI_SEMANTIC_FOG
:
1770 case TGSI_SEMANTIC_TEXCOORD
:
1773 compile_error(ctx
, "unknown VS semantic name: %s\n",
1774 tgsi_semantic_names
[name
]);
1778 case TGSI_SEMANTIC_POSITION
:
1779 comp
= 2; /* tgsi will write to .z component */
1780 so
->writes_pos
= true;
1782 case TGSI_SEMANTIC_COLOR
:
1785 compile_error(ctx
, "unknown FS semantic name: %s\n",
1786 tgsi_semantic_names
[name
]);
1790 for (i
= decl
->Range
.First
; i
<= decl
->Range
.Last
; i
++) {
1791 unsigned n
= so
->outputs_count
++;
1796 so
->outputs
[n
].semantic
= decl_semantic(&decl
->Semantic
);
1797 so
->outputs
[n
].regid
= regid(i
+ base
, comp
);
1799 /* avoid undefined outputs, stick a dummy mov from imm{0.0},
1800 * which if the output is actually assigned will be over-
1803 for (j
= 0; j
< ncomp
; j
++)
1804 ctx
->block
->outputs
[(i
* 4) + j
] = create_immed(ctx
, 0.0);
1809 decl_samp(struct fd3_compile_context
*ctx
, struct tgsi_full_declaration
*decl
)
1811 ctx
->so
->samplers_count
++;
1815 compile_instructions(struct fd3_compile_context
*ctx
)
1819 /* for fragment shader, we have a single input register (r0.xy)
1820 * which is used as the base for bary.f varying fetch instrs:
1822 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
1823 struct ir3_instruction
*instr
;
1824 instr
= ir3_instr_create(ctx
->block
, -1, OPC_META_FI
);
1825 ir3_reg_create(instr
, 0, 0);
1826 ir3_reg_create(instr
, 0, IR3_REG_SSA
); /* r0.x */
1827 ir3_reg_create(instr
, 0, IR3_REG_SSA
); /* r0.y */
1828 ctx
->frag_pos
= instr
;
1831 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
1832 tgsi_parse_token(&ctx
->parser
);
1834 switch (ctx
->parser
.FullToken
.Token
.Type
) {
1835 case TGSI_TOKEN_TYPE_DECLARATION
: {
1836 struct tgsi_full_declaration
*decl
=
1837 &ctx
->parser
.FullToken
.FullDeclaration
;
1838 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
) {
1839 decl_out(ctx
, decl
);
1840 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1842 } else if (decl
->Declaration
.File
== TGSI_FILE_SAMPLER
) {
1843 decl_samp(ctx
, decl
);
1847 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
1848 /* TODO: if we know the immediate is small enough, and only
1849 * used with instructions that can embed an immediate, we
1852 struct tgsi_full_immediate
*imm
=
1853 &ctx
->parser
.FullToken
.FullImmediate
;
1854 unsigned n
= ctx
->so
->immediates_count
++;
1855 memcpy(ctx
->so
->immediates
[n
].val
, imm
->u
, 16);
1858 case TGSI_TOKEN_TYPE_INSTRUCTION
: {
1859 struct tgsi_full_instruction
*inst
=
1860 &ctx
->parser
.FullToken
.FullInstruction
;
1861 unsigned opc
= inst
->Instruction
.Opcode
;
1862 const struct instr_translater
*t
= &translaters
[opc
];
1865 t
->fxn(t
, ctx
, inst
);
1866 ctx
->num_internal_temps
= 0;
1868 compile_error(ctx
, "unknown TGSI opc: %s\n",
1869 tgsi_get_opcode_name(opc
));
1872 switch (inst
->Instruction
.Saturate
) {
1873 case TGSI_SAT_ZERO_ONE
:
1874 create_clamp_imm(ctx
, &inst
->Dst
[0].Register
,
1875 fui(0.0), fui(1.0));
1877 case TGSI_SAT_MINUS_PLUS_ONE
:
1878 create_clamp_imm(ctx
, &inst
->Dst
[0].Register
,
1879 fui(-1.0), fui(1.0));
1892 /* fixup actual inputs for frag shader: */
1893 if (ctx
->type
== TGSI_PROCESSOR_FRAGMENT
) {
1894 struct ir3_instruction
*instr
;
1896 ctx
->block
->ninputs
= 2;
1899 instr
= create_input(ctx
->block
, NULL
, 0);
1900 ctx
->block
->inputs
[0] = instr
;
1901 ctx
->frag_pos
->regs
[1]->instr
= instr
;
1904 instr
= create_input(ctx
->block
, NULL
, 1);
1905 ctx
->block
->inputs
[1] = instr
;
1906 ctx
->frag_pos
->regs
[2]->instr
= instr
;
1911 compile_dump(struct fd3_compile_context
*ctx
)
1913 const char *name
= (ctx
->so
->type
== SHADER_VERTEX
) ? "vert" : "frag";
1914 static unsigned n
= 0;
1917 snprintf(fname
, sizeof(fname
), "%s-%04u.dot", name
, n
++);
1918 f
= fopen(fname
, "w");
1921 ir3_block_depth(ctx
->block
);
1922 ir3_shader_dump(ctx
->ir
, name
, ctx
->block
, f
);
1927 fd3_compile_shader(struct fd3_shader_stateobj
*so
,
1928 const struct tgsi_token
*tokens
)
1930 struct fd3_compile_context ctx
;
1931 unsigned i
, actual_in
;
1936 so
->ir
= ir3_shader_create();
1940 if (compile_init(&ctx
, so
, tokens
) != TGSI_PARSE_OK
) {
1945 compile_instructions(&ctx
);
1947 if (fd_mesa_debug
& FD_DBG_OPTDUMP
)
1950 ret
= ir3_block_flatten(ctx
.block
);
1953 if ((ret
> 0) && (fd_mesa_debug
& FD_DBG_OPTDUMP
))
1956 ir3_block_cp(ctx
.block
);
1958 if (fd_mesa_debug
& FD_DBG_OPTDUMP
)
1961 ir3_block_depth(ctx
.block
);
1963 if (fd_mesa_debug
& FD_DBG_OPTMSGS
) {
1964 printf("AFTER DEPTH:\n");
1965 ir3_dump_instr_list(ctx
.block
->head
);
1968 ir3_block_sched(ctx
.block
);
1970 if (fd_mesa_debug
& FD_DBG_OPTMSGS
) {
1971 printf("AFTER SCHED:\n");
1972 ir3_dump_instr_list(ctx
.block
->head
);
1975 ret
= ir3_block_ra(ctx
.block
, so
->type
);
1979 if (fd_mesa_debug
& FD_DBG_OPTMSGS
) {
1980 printf("AFTER RA:\n");
1981 ir3_dump_instr_list(ctx
.block
->head
);
1984 /* fixup input/outputs: */
1985 for (i
= 0; i
< so
->outputs_count
; i
++) {
1986 so
->outputs
[i
].regid
= ctx
.block
->outputs
[i
*4]->regs
[0]->num
;
1987 /* preserve hack for depth output.. tgsi writes depth to .z,
1988 * but what we give the hw is the scalar register:
1990 if ((ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) &&
1991 (sem2name(so
->outputs
[i
].semantic
) == TGSI_SEMANTIC_POSITION
))
1992 so
->outputs
[i
].regid
+= 2;
1994 /* Note that some or all channels of an input may be unused: */
1996 for (i
= 0; i
< so
->inputs_count
; i
++) {
1997 unsigned j
, regid
= ~0, compmask
= 0;
1998 for (j
= 0; j
< 4; j
++) {
1999 struct ir3_instruction
*in
= ctx
.block
->inputs
[(i
*4) + j
];
2001 compmask
|= (1 << j
);
2002 regid
= in
->regs
[0]->num
- j
;
2006 so
->inputs
[i
].regid
= regid
;
2007 so
->inputs
[i
].compmask
= compmask
;
2010 /* fragment shader always gets full vec4's even if it doesn't
2011 * fetch all components, but vertex shader we need to update
2012 * with the actual number of components fetch, otherwise thing
2013 * will hang due to mismaptch between VFD_DECODE's and
2016 if (so
->type
== SHADER_VERTEX
)
2017 so
->total_in
= actual_in
;
2021 ir3_shader_destroy(so
->ir
);