2 * Copyright (c) 2012-2015 Etnaviv Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Wladimir J. van der Laan <laanwj@gmail.com>
27 /* TGSI->Vivante shader ISA conversion */
29 /* What does the compiler return (see etna_shader_object)?
31 * 2) input-to-temporary mapping (fixed for ps)
32 * *) in case of ps, semantic -> varying id mapping
33 * *) for each varying: number of components used (r, rg, rgb, rgba)
34 * 3) temporary-to-output mapping (in case of vs, fixed for ps)
35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 * 5) immediates base offset, immediates data
37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 * configure the hw, but useful for error checking
39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 * (output reg id is enough)
42 * Empty shaders are not allowed, should always at least generate a NOP. Also
43 * if there is a label at the end of the shader, an extra NOP should be
44 * generated as jump target.
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
51 #include "etnaviv_compiler.h"
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
72 #include <sys/types.h>
74 #define ETNA_MAX_INNER_TEMPS 2
76 static const float sincos_const
[2][4] = {
81 1. / (2. * M_PI
), 0.75, 0.5, 0.0,
85 /* Native register description structure */
86 struct etna_native_reg
{
88 unsigned is_tex
: 1; /* is texture unit, overrides rgroup */
93 /* Register description */
94 struct etna_reg_desc
{
95 enum tgsi_file_type file
; /* IN, OUT, TEMP, ... */
96 int idx
; /* index into file */
97 bool active
; /* used in program */
98 int first_use
; /* instruction id of first use (scope begin) */
99 int last_use
; /* instruction id of last use (scope end, inclusive) */
101 struct etna_native_reg native
; /* native register to map to */
102 unsigned usage_mask
: 4; /* usage, per channel */
103 bool has_semantic
; /* register has associated TGSI semantic */
104 struct tgsi_declaration_semantic semantic
; /* TGSI semantic */
105 struct tgsi_declaration_interp interp
; /* Interpolation type */
108 /* Label information structure */
109 struct etna_compile_label
{
110 int inst_idx
; /* Instruction id that label points to */
113 enum etna_compile_frame_type
{
114 ETNA_COMPILE_FRAME_IF
, /* IF/ELSE/ENDIF */
115 ETNA_COMPILE_FRAME_LOOP
,
118 /* nesting scope frame (LOOP, IF, ...) during compilation
120 struct etna_compile_frame
{
121 enum etna_compile_frame_type type
;
124 int lbl_loop_bgn_idx
;
125 int lbl_loop_end_idx
;
128 struct etna_compile_file
{
129 /* Number of registers in each TGSI file (max register+1) */
131 /* Register descriptions, per register index */
132 struct etna_reg_desc
*reg
;
135 #define array_insert(arr, val) \
137 if (arr##_count == arr##_sz) { \
138 arr##_sz = MAX2(2 * arr##_sz, 16); \
139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
141 arr[arr##_count++] = val; \
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile
{
147 const struct tgsi_token
*tokens
;
150 struct tgsi_shader_info info
;
152 /* Register descriptions, per TGSI file, per register index */
153 struct etna_compile_file file
[TGSI_FILE_COUNT
];
155 /* Keep track of TGSI register declarations */
156 struct etna_reg_desc decl
[ETNA_MAX_DECL
];
159 /* Bitmap of dead instructions which are removed in a separate pass */
160 bool dead_inst
[ETNA_MAX_TOKENS
];
163 enum etna_immediate_contents imm_contents
[ETNA_MAX_IMM
];
164 uint32_t imm_data
[ETNA_MAX_IMM
];
165 uint32_t imm_base
; /* base of immediates (in 32 bit units) */
166 uint32_t imm_size
; /* size of immediates (in 32 bit units) */
168 /* Next free native register, for register allocation */
169 uint32_t next_free_native
;
171 /* Temporary register for use within translated TGSI instruction,
172 * only allocated when needed.
174 int inner_temps
; /* number of inner temps used; only up to one available at
176 struct etna_native_reg inner_temp
[ETNA_MAX_INNER_TEMPS
];
178 /* Fields for handling nested conditionals */
179 struct etna_compile_frame frame_stack
[ETNA_MAX_DEPTH
];
181 int lbl_usage
[ETNA_MAX_INSTRUCTIONS
];
183 unsigned labels_count
, labels_sz
;
184 struct etna_compile_label
*labels
;
188 /* Code generation */
189 int inst_ptr
; /* current instruction pointer */
190 uint32_t code
[ETNA_MAX_INSTRUCTIONS
* ETNA_INST_SIZE
];
194 /* Number of varyings (PS only) */
197 /* GPU hardware specs */
198 const struct etna_specs
*specs
;
200 const struct etna_shader_key
*key
;
203 static struct etna_reg_desc
*
204 etna_get_dst_reg(struct etna_compile
*c
, struct tgsi_dst_register dst
)
206 return &c
->file
[dst
.File
].reg
[dst
.Index
];
209 static struct etna_reg_desc
*
210 etna_get_src_reg(struct etna_compile
*c
, struct tgsi_src_register src
)
212 return &c
->file
[src
.File
].reg
[src
.Index
];
215 static struct etna_native_reg
216 etna_native_temp(unsigned reg
)
218 return (struct etna_native_reg
) {
220 .rgroup
= INST_RGROUP_TEMP
,
225 static struct etna_native_reg
226 etna_native_internal(unsigned reg
)
228 return (struct etna_native_reg
) {
230 .rgroup
= INST_RGROUP_INTERNAL
,
235 /** Register allocation **/
236 enum reg_sort_order
{
243 /* Augmented register description for sorting */
245 struct etna_reg_desc
*ptr
;
250 sort_rec_compar(const struct sort_rec
*a
, const struct sort_rec
*b
)
261 /* create an index on a register set based on certain criteria. */
263 sort_registers(struct sort_rec
*sorted
, struct etna_compile_file
*file
,
264 enum reg_sort_order so
)
266 struct etna_reg_desc
*regs
= file
->reg
;
269 /* pre-populate keys from active registers */
270 for (int idx
= 0; idx
< file
->reg_size
; ++idx
) {
271 /* only interested in active registers now; will only assign inactive ones
272 * if no space in active ones */
273 if (regs
[idx
].active
) {
274 sorted
[ptr
].ptr
= ®s
[idx
];
278 sorted
[ptr
].key
= regs
[idx
].first_use
;
281 sorted
[ptr
].key
= regs
[idx
].last_use
;
284 sorted
[ptr
].key
= -regs
[idx
].first_use
;
287 sorted
[ptr
].key
= -regs
[idx
].last_use
;
294 /* sort index by key */
295 qsort(sorted
, ptr
, sizeof(struct sort_rec
),
296 (int (*)(const void *, const void *))sort_rec_compar
);
301 /* Allocate a new, unused, native temp register */
302 static struct etna_native_reg
303 alloc_new_native_reg(struct etna_compile
*c
)
305 assert(c
->next_free_native
< ETNA_MAX_TEMPS
);
306 return etna_native_temp(c
->next_free_native
++);
309 /* assign TEMPs to native registers */
311 assign_temporaries_to_native(struct etna_compile
*c
,
312 struct etna_compile_file
*file
)
314 struct etna_reg_desc
*temps
= file
->reg
;
316 for (int idx
= 0; idx
< file
->reg_size
; ++idx
)
317 temps
[idx
].native
= alloc_new_native_reg(c
);
320 /* assign inputs and outputs to temporaries
321 * Gallium assumes that the hardware has separate registers for taking input and
322 * output, however Vivante GPUs use temporaries both for passing in inputs and
323 * passing back outputs.
324 * Try to re-use temporary registers where possible. */
326 assign_inouts_to_temporaries(struct etna_compile
*c
, uint file
)
328 bool mode_inputs
= (file
== TGSI_FILE_INPUT
);
329 int inout_ptr
= 0, num_inouts
;
330 int temp_ptr
= 0, num_temps
;
331 struct sort_rec inout_order
[ETNA_MAX_TEMPS
];
332 struct sort_rec temps_order
[ETNA_MAX_TEMPS
];
333 num_inouts
= sort_registers(inout_order
, &c
->file
[file
],
334 mode_inputs
? LAST_USE_ASC
: FIRST_USE_ASC
);
335 num_temps
= sort_registers(temps_order
, &c
->file
[TGSI_FILE_TEMPORARY
],
336 mode_inputs
? FIRST_USE_ASC
: LAST_USE_ASC
);
338 while (inout_ptr
< num_inouts
&& temp_ptr
< num_temps
) {
339 struct etna_reg_desc
*inout
= inout_order
[inout_ptr
].ptr
;
340 struct etna_reg_desc
*temp
= temps_order
[temp_ptr
].ptr
;
342 if (!inout
->active
|| inout
->native
.valid
) { /* Skip if already a native register assigned */
347 /* last usage of this input is before or in same instruction of first use
349 if (mode_inputs
? (inout
->last_use
<= temp
->first_use
)
350 : (inout
->first_use
>= temp
->last_use
)) {
351 /* assign it and advance to next input */
352 inout
->native
= temp
->native
;
359 /* if we couldn't reuse current ones, allocate new temporaries */
360 for (inout_ptr
= 0; inout_ptr
< num_inouts
; ++inout_ptr
) {
361 struct etna_reg_desc
*inout
= inout_order
[inout_ptr
].ptr
;
363 if (inout
->active
&& !inout
->native
.valid
)
364 inout
->native
= alloc_new_native_reg(c
);
368 /* Allocate an immediate with a certain value and return the index. If
369 * there is already an immediate with that value, return that.
371 static struct etna_inst_src
372 alloc_imm(struct etna_compile
*c
, enum etna_immediate_contents contents
,
377 /* Could use a hash table to speed this up */
378 for (idx
= 0; idx
< c
->imm_size
; ++idx
) {
379 if (c
->imm_contents
[idx
] == contents
&& c
->imm_data
[idx
] == value
)
383 /* look if there is an unused slot */
384 if (idx
== c
->imm_size
) {
385 for (idx
= 0; idx
< c
->imm_size
; ++idx
) {
386 if (c
->imm_contents
[idx
] == ETNA_IMMEDIATE_UNUSED
)
391 /* allocate new immediate */
392 if (idx
== c
->imm_size
) {
393 assert(c
->imm_size
< ETNA_MAX_IMM
);
395 c
->imm_data
[idx
] = value
;
396 c
->imm_contents
[idx
] = contents
;
399 /* swizzle so that component with value is returned in all components */
401 struct etna_inst_src imm_src
= {
403 .rgroup
= INST_RGROUP_UNIFORM_0
,
405 .swiz
= INST_SWIZ_BROADCAST(idx
& 3)
411 static struct etna_inst_src
412 alloc_imm_u32(struct etna_compile
*c
, uint32_t value
)
414 return alloc_imm(c
, ETNA_IMMEDIATE_CONSTANT
, value
);
417 static struct etna_inst_src
418 alloc_imm_vec4u(struct etna_compile
*c
, enum etna_immediate_contents contents
,
419 const uint32_t *values
)
421 struct etna_inst_src imm_src
= { };
424 for (idx
= 0; idx
+ 3 < c
->imm_size
; idx
+= 4) {
425 /* What if we can use a uniform with a different swizzle? */
426 for (i
= 0; i
< 4; i
++)
427 if (c
->imm_contents
[idx
+ i
] != contents
|| c
->imm_data
[idx
+ i
] != values
[i
])
433 if (idx
+ 3 >= c
->imm_size
) {
434 idx
= align(c
->imm_size
, 4);
435 assert(idx
+ 4 <= ETNA_MAX_IMM
);
437 for (i
= 0; i
< 4; i
++) {
438 c
->imm_data
[idx
+ i
] = values
[i
];
439 c
->imm_contents
[idx
+ i
] = contents
;
442 c
->imm_size
= idx
+ 4;
445 assert((c
->imm_base
& 3) == 0);
448 imm_src
.rgroup
= INST_RGROUP_UNIFORM_0
;
449 imm_src
.reg
= idx
/ 4;
450 imm_src
.swiz
= INST_SWIZ_IDENTITY
;
456 get_imm_u32(struct etna_compile
*c
, const struct etna_inst_src
*imm
,
459 assert(imm
->use
== 1 && imm
->rgroup
== INST_RGROUP_UNIFORM_0
);
460 unsigned int idx
= imm
->reg
* 4 + ((imm
->swiz
>> (swiz_idx
* 2)) & 3);
462 return c
->imm_data
[idx
];
465 /* Allocate immediate with a certain float value. If there is already an
466 * immediate with that value, return that.
468 static struct etna_inst_src
469 alloc_imm_f32(struct etna_compile
*c
, float value
)
471 return alloc_imm_u32(c
, fui(value
));
474 static struct etna_inst_src
475 etna_imm_vec4f(struct etna_compile
*c
, const float *vec4
)
479 for (int i
= 0; i
< 4; i
++)
480 val
[i
] = fui(vec4
[i
]);
482 return alloc_imm_vec4u(c
, ETNA_IMMEDIATE_CONSTANT
, val
);
485 /* Pass -- check register file declarations and immediates */
487 etna_compile_parse_declarations(struct etna_compile
*c
)
489 struct tgsi_parse_context ctx
= { };
490 ASSERTED
unsigned status
= tgsi_parse_init(&ctx
, c
->tokens
);
491 assert(status
== TGSI_PARSE_OK
);
493 while (!tgsi_parse_end_of_tokens(&ctx
)) {
494 tgsi_parse_token(&ctx
);
496 switch (ctx
.FullToken
.Token
.Type
) {
497 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
498 /* immediates are handled differently from other files; they are
499 * not declared explicitly, and always add four components */
500 const struct tgsi_full_immediate
*imm
= &ctx
.FullToken
.FullImmediate
;
501 assert(c
->imm_size
<= (ETNA_MAX_IMM
- 4));
503 for (int i
= 0; i
< 4; ++i
) {
504 unsigned idx
= c
->imm_size
++;
506 c
->imm_data
[idx
] = imm
->u
[i
].Uint
;
507 c
->imm_contents
[idx
] = ETNA_IMMEDIATE_CONSTANT
;
514 tgsi_parse_free(&ctx
);
517 /* Allocate register declarations for the registers in all register files */
519 etna_allocate_decls(struct etna_compile
*c
)
523 for (int x
= 0; x
< TGSI_FILE_COUNT
; ++x
) {
524 c
->file
[x
].reg
= &c
->decl
[idx
];
525 c
->file
[x
].reg_size
= c
->info
.file_max
[x
] + 1;
527 for (int sub
= 0; sub
< c
->file
[x
].reg_size
; ++sub
) {
528 c
->decl
[idx
].file
= x
;
529 c
->decl
[idx
].idx
= sub
;
534 c
->total_decls
= idx
;
537 /* Pass -- check and record usage of temporaries, inputs, outputs */
539 etna_compile_pass_check_usage(struct etna_compile
*c
)
541 struct tgsi_parse_context ctx
= { };
542 ASSERTED
unsigned status
= tgsi_parse_init(&ctx
, c
->tokens
);
543 assert(status
== TGSI_PARSE_OK
);
545 for (int idx
= 0; idx
< c
->total_decls
; ++idx
) {
546 c
->decl
[idx
].active
= false;
547 c
->decl
[idx
].first_use
= c
->decl
[idx
].last_use
= -1;
551 while (!tgsi_parse_end_of_tokens(&ctx
)) {
552 tgsi_parse_token(&ctx
);
553 /* find out max register #s used
554 * For every register mark first and last instruction index where it's
555 * used this allows finding ranges where the temporary can be borrowed
556 * as input and/or output register
558 * XXX in the case of loops this needs special care, or even be completely
560 * the last usage of a register inside a loop means it can still be used
562 * iteration (execution is no longer * chronological). The register can
564 * declared "free" after the loop finishes.
566 * Same for inputs: the first usage of a register inside a loop doesn't
567 * mean that the register
568 * won't have been overwritten in previous iteration. The register can
569 * only be declared free before the loop
571 * The proper way would be to do full dominator / post-dominator analysis
572 * (especially with more complicated
573 * control flow such as direct branch instructions) but not for now...
575 switch (ctx
.FullToken
.Token
.Type
) {
576 case TGSI_TOKEN_TYPE_DECLARATION
: {
577 /* Declaration: fill in file details */
578 const struct tgsi_full_declaration
*decl
= &ctx
.FullToken
.FullDeclaration
;
579 struct etna_compile_file
*file
= &c
->file
[decl
->Declaration
.File
];
581 for (int idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; ++idx
) {
582 file
->reg
[idx
].usage_mask
= 0; // we'll compute this ourselves
583 file
->reg
[idx
].has_semantic
= decl
->Declaration
.Semantic
;
584 file
->reg
[idx
].semantic
= decl
->Semantic
;
585 file
->reg
[idx
].interp
= decl
->Interp
;
588 case TGSI_TOKEN_TYPE_INSTRUCTION
: {
589 /* Instruction: iterate over operands of instruction */
590 const struct tgsi_full_instruction
*inst
= &ctx
.FullToken
.FullInstruction
;
592 /* iterate over destination registers */
593 for (int idx
= 0; idx
< inst
->Instruction
.NumDstRegs
; ++idx
) {
594 struct etna_reg_desc
*reg_desc
= &c
->file
[inst
->Dst
[idx
].Register
.File
].reg
[inst
->Dst
[idx
].Register
.Index
];
596 if (reg_desc
->first_use
== -1)
597 reg_desc
->first_use
= inst_idx
;
599 reg_desc
->last_use
= inst_idx
;
600 reg_desc
->active
= true;
603 /* iterate over source registers */
604 for (int idx
= 0; idx
< inst
->Instruction
.NumSrcRegs
; ++idx
) {
605 struct etna_reg_desc
*reg_desc
= &c
->file
[inst
->Src
[idx
].Register
.File
].reg
[inst
->Src
[idx
].Register
.Index
];
607 if (reg_desc
->first_use
== -1)
608 reg_desc
->first_use
= inst_idx
;
610 reg_desc
->last_use
= inst_idx
;
611 reg_desc
->active
= true;
612 /* accumulate usage mask for register, this is used to determine how
613 * many slots for varyings
614 * should be allocated */
615 reg_desc
->usage_mask
|= tgsi_util_get_inst_usage_mask(inst
, idx
);
624 tgsi_parse_free(&ctx
);
627 /* assign inputs that need to be assigned to specific registers */
629 assign_special_inputs(struct etna_compile
*c
)
631 if (c
->info
.processor
== PIPE_SHADER_FRAGMENT
) {
632 /* never assign t0 as it is the position output, start assigning at t1 */
633 c
->next_free_native
= 1;
635 for (int idx
= 0; idx
< c
->total_decls
; ++idx
) {
636 struct etna_reg_desc
*reg
= &c
->decl
[idx
];
641 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
642 if (reg
->semantic
.Name
== TGSI_SEMANTIC_POSITION
)
643 reg
->native
= etna_native_temp(0);
645 /* hardwire TGSI_SEMANTIC_FACE to i0 */
646 if (reg
->semantic
.Name
== TGSI_SEMANTIC_FACE
)
647 reg
->native
= etna_native_internal(0);
652 /* Check that a move instruction does not swizzle any of the components
656 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst
,
657 const struct tgsi_src_register src
)
659 return (!(dst
.WriteMask
& TGSI_WRITEMASK_X
) || src
.SwizzleX
== TGSI_SWIZZLE_X
) &&
660 (!(dst
.WriteMask
& TGSI_WRITEMASK_Y
) || src
.SwizzleY
== TGSI_SWIZZLE_Y
) &&
661 (!(dst
.WriteMask
& TGSI_WRITEMASK_Z
) || src
.SwizzleZ
== TGSI_SWIZZLE_Z
) &&
662 (!(dst
.WriteMask
& TGSI_WRITEMASK_W
) || src
.SwizzleW
== TGSI_SWIZZLE_W
);
665 /* Pass -- optimize outputs
666 * Mesa tends to generate code like this at the end if their shaders
667 * MOV OUT[1], TEMP[2]
668 * MOV OUT[0], TEMP[0]
669 * MOV OUT[2], TEMP[1]
671 * a) there is only a single assignment to an output register and
672 * b) the temporary is not used after that
673 * Also recognize direct assignment of IN to OUT (passthrough)
676 etna_compile_pass_optimize_outputs(struct etna_compile
*c
)
678 struct tgsi_parse_context ctx
= { };
680 ASSERTED
unsigned status
= tgsi_parse_init(&ctx
, c
->tokens
);
681 assert(status
== TGSI_PARSE_OK
);
683 while (!tgsi_parse_end_of_tokens(&ctx
)) {
684 tgsi_parse_token(&ctx
);
686 switch (ctx
.FullToken
.Token
.Type
) {
687 case TGSI_TOKEN_TYPE_INSTRUCTION
: {
688 const struct tgsi_full_instruction
*inst
= &ctx
.FullToken
.FullInstruction
;
690 /* iterate over operands */
691 switch (inst
->Instruction
.Opcode
) {
692 case TGSI_OPCODE_MOV
: {
693 /* We are only interested in eliminating MOVs which write to
694 * the shader outputs. Test for this early. */
695 if (inst
->Dst
[0].Register
.File
!= TGSI_FILE_OUTPUT
)
697 /* Elimination of a MOV must have no visible effect on the
698 * resulting shader: this means the MOV must not swizzle or
699 * saturate, and its source must not have the negate or
700 * absolute modifiers. */
701 if (!etna_mov_check_no_swizzle(inst
->Dst
[0].Register
, inst
->Src
[0].Register
) ||
702 inst
->Instruction
.Saturate
|| inst
->Src
[0].Register
.Negate
||
703 inst
->Src
[0].Register
.Absolute
)
706 uint out_idx
= inst
->Dst
[0].Register
.Index
;
707 uint in_idx
= inst
->Src
[0].Register
.Index
;
708 /* assignment of temporary to output --
709 * and the output doesn't yet have a native register assigned
710 * and the last use of the temporary is this instruction
711 * and the MOV does not do a swizzle
713 if (inst
->Src
[0].Register
.File
== TGSI_FILE_TEMPORARY
&&
714 !c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].native
.valid
&&
715 c
->file
[TGSI_FILE_TEMPORARY
].reg
[in_idx
].last_use
== inst_idx
) {
716 c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].native
=
717 c
->file
[TGSI_FILE_TEMPORARY
].reg
[in_idx
].native
;
718 /* prevent temp from being re-used for the rest of the shader */
719 c
->file
[TGSI_FILE_TEMPORARY
].reg
[in_idx
].last_use
= ETNA_MAX_TOKENS
;
720 /* mark this MOV instruction as a no-op */
721 c
->dead_inst
[inst_idx
] = true;
723 /* direct assignment of input to output --
724 * and the input or output doesn't yet have a native register
726 * and the output is only used in this instruction,
727 * allocate a new register, and associate both input and output to
729 * and the MOV does not do a swizzle
731 if (inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
732 !c
->file
[TGSI_FILE_INPUT
].reg
[in_idx
].native
.valid
&&
733 !c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].native
.valid
&&
734 c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].last_use
== inst_idx
&&
735 c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].first_use
== inst_idx
) {
736 c
->file
[TGSI_FILE_OUTPUT
].reg
[out_idx
].native
=
737 c
->file
[TGSI_FILE_INPUT
].reg
[in_idx
].native
=
738 alloc_new_native_reg(c
);
739 /* mark this MOV instruction as a no-op */
740 c
->dead_inst
[inst_idx
] = true;
750 tgsi_parse_free(&ctx
);
753 /* Get a temporary to be used within one TGSI instruction.
754 * The first time that this function is called the temporary will be allocated.
755 * Each call to this function will return the same temporary.
757 static struct etna_native_reg
758 etna_compile_get_inner_temp(struct etna_compile
*c
)
760 int inner_temp
= c
->inner_temps
;
762 if (inner_temp
< ETNA_MAX_INNER_TEMPS
) {
763 if (!c
->inner_temp
[inner_temp
].valid
)
764 c
->inner_temp
[inner_temp
] = alloc_new_native_reg(c
);
766 /* alloc_new_native_reg() handles lack of registers */
769 BUG("Too many inner temporaries (%i) requested in one instruction",
773 return c
->inner_temp
[inner_temp
];
776 static struct etna_inst_dst
777 etna_native_to_dst(struct etna_native_reg native
, unsigned comps
)
779 /* Can only assign to temporaries */
780 assert(native
.valid
&& !native
.is_tex
&& native
.rgroup
== INST_RGROUP_TEMP
);
782 struct etna_inst_dst rv
= {
791 static struct etna_inst_src
792 etna_native_to_src(struct etna_native_reg native
, uint32_t swizzle
)
794 assert(native
.valid
&& !native
.is_tex
);
796 struct etna_inst_src rv
= {
799 .rgroup
= native
.rgroup
,
801 .amode
= INST_AMODE_DIRECT
,
807 static inline struct etna_inst_src
808 negate(struct etna_inst_src src
)
815 static inline struct etna_inst_src
816 absolute(struct etna_inst_src src
)
823 static inline struct etna_inst_src
824 swizzle(struct etna_inst_src src
, unsigned swizzle
)
826 src
.swiz
= inst_swiz_compose(src
.swiz
, swizzle
);
831 /* Emit instruction and append it to program */
833 emit_inst(struct etna_compile
*c
, struct etna_inst
*inst
)
835 assert(c
->inst_ptr
<= ETNA_MAX_INSTRUCTIONS
);
837 /* Check for uniform conflicts (each instruction can only access one
839 * if detected, use an intermediate temporary */
840 unsigned uni_rgroup
= -1;
841 unsigned uni_reg
= -1;
843 for (int src
= 0; src
< ETNA_NUM_SRC
; ++src
) {
844 if (inst
->src
[src
].rgroup
== INST_RGROUP_INTERNAL
&&
845 c
->info
.processor
== PIPE_SHADER_FRAGMENT
&&
847 struct etna_native_reg inner_temp
= etna_compile_get_inner_temp(c
);
850 * Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing
851 * configuration (CW or CCW).
853 etna_assemble(&c
->code
[c
->inst_ptr
* 4], &(struct etna_inst
) {
854 .opcode
= INST_OPCODE_SET
,
855 .cond
= INST_CONDITION_NE
,
856 .dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
| INST_COMPS_Y
|
857 INST_COMPS_Z
| INST_COMPS_W
),
858 .src
[0] = inst
->src
[src
],
859 .src
[1] = alloc_imm_f32(c
, 1.0f
)
863 /* Modify instruction to use temp register instead of uniform */
864 inst
->src
[src
].use
= 1;
865 inst
->src
[src
].rgroup
= INST_RGROUP_TEMP
;
866 inst
->src
[src
].reg
= inner_temp
.id
;
867 inst
->src
[src
].swiz
= INST_SWIZ_IDENTITY
; /* swizzling happens on MOV */
868 inst
->src
[src
].neg
= 0; /* negation happens on MOV */
869 inst
->src
[src
].abs
= 0; /* abs happens on MOV */
870 inst
->src
[src
].amode
= 0; /* amode effects happen on MOV */
871 } else if (etna_rgroup_is_uniform(inst
->src
[src
].rgroup
)) {
872 if (uni_reg
== -1) { /* first unique uniform used */
873 uni_rgroup
= inst
->src
[src
].rgroup
;
874 uni_reg
= inst
->src
[src
].reg
;
875 } else { /* second or later; check that it is a re-use */
876 if (uni_rgroup
!= inst
->src
[src
].rgroup
||
877 uni_reg
!= inst
->src
[src
].reg
) {
878 DBG_F(ETNA_DBG_COMPILER_MSGS
, "perf warning: instruction that "
879 "accesses different uniforms, "
880 "need to generate extra MOV");
881 struct etna_native_reg inner_temp
= etna_compile_get_inner_temp(c
);
883 /* Generate move instruction to temporary */
884 etna_assemble(&c
->code
[c
->inst_ptr
* 4], &(struct etna_inst
) {
885 .opcode
= INST_OPCODE_MOV
,
886 .dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
| INST_COMPS_Y
|
887 INST_COMPS_Z
| INST_COMPS_W
),
888 .src
[2] = inst
->src
[src
]
893 /* Modify instruction to use temp register instead of uniform */
894 inst
->src
[src
].use
= 1;
895 inst
->src
[src
].rgroup
= INST_RGROUP_TEMP
;
896 inst
->src
[src
].reg
= inner_temp
.id
;
897 inst
->src
[src
].swiz
= INST_SWIZ_IDENTITY
; /* swizzling happens on MOV */
898 inst
->src
[src
].neg
= 0; /* negation happens on MOV */
899 inst
->src
[src
].abs
= 0; /* abs happens on MOV */
900 inst
->src
[src
].amode
= 0; /* amode effects happen on MOV */
906 /* Finally assemble the actual instruction */
907 etna_assemble(&c
->code
[c
->inst_ptr
* 4], inst
);
912 etna_amode(struct tgsi_ind_register indirect
)
914 assert(indirect
.File
== TGSI_FILE_ADDRESS
);
915 assert(indirect
.Index
== 0);
917 switch (indirect
.Swizzle
) {
919 return INST_AMODE_ADD_A_X
;
921 return INST_AMODE_ADD_A_Y
;
923 return INST_AMODE_ADD_A_Z
;
925 return INST_AMODE_ADD_A_W
;
927 assert(!"Invalid swizzle");
930 unreachable("bad swizzle");
933 /* convert destination operand */
934 static struct etna_inst_dst
935 convert_dst(struct etna_compile
*c
, const struct tgsi_full_dst_register
*in
)
937 struct etna_inst_dst rv
= {
939 .write_mask
= in
->Register
.WriteMask
,
942 if (in
->Register
.File
== TGSI_FILE_ADDRESS
) {
943 assert(in
->Register
.Index
== 0);
944 rv
.reg
= in
->Register
.Index
;
947 rv
= etna_native_to_dst(etna_get_dst_reg(c
, in
->Register
)->native
,
948 in
->Register
.WriteMask
);
951 if (in
->Register
.Indirect
)
952 rv
.amode
= etna_amode(in
->Indirect
);
957 /* convert texture operand */
958 static struct etna_inst_tex
959 convert_tex(struct etna_compile
*c
, const struct tgsi_full_src_register
*in
,
960 const struct tgsi_instruction_texture
*tex
)
962 struct etna_native_reg native_reg
= etna_get_src_reg(c
, in
->Register
)->native
;
963 struct etna_inst_tex rv
= {
964 // XXX .amode (to allow for an array of samplers?)
965 .swiz
= INST_SWIZ_IDENTITY
968 assert(native_reg
.is_tex
&& native_reg
.valid
);
969 rv
.id
= native_reg
.id
;
974 /* convert source operand */
975 static struct etna_inst_src
976 etna_create_src(const struct tgsi_full_src_register
*tgsi
,
977 const struct etna_native_reg
*native
)
979 const struct tgsi_src_register
*reg
= &tgsi
->Register
;
980 struct etna_inst_src rv
= {
982 .swiz
= INST_SWIZ(reg
->SwizzleX
, reg
->SwizzleY
, reg
->SwizzleZ
, reg
->SwizzleW
),
984 .abs
= reg
->Absolute
,
985 .rgroup
= native
->rgroup
,
987 .amode
= INST_AMODE_DIRECT
,
990 assert(native
->valid
&& !native
->is_tex
);
993 rv
.amode
= etna_amode(tgsi
->Indirect
);
998 static struct etna_inst_src
999 etna_mov_src_to_temp(struct etna_compile
*c
, struct etna_inst_src src
,
1000 struct etna_native_reg temp
)
1002 struct etna_inst mov
= { };
1004 mov
.opcode
= INST_OPCODE_MOV
;
1006 mov
.dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
|
1007 INST_COMPS_Z
| INST_COMPS_W
);
1011 src
.swiz
= INST_SWIZ_IDENTITY
;
1012 src
.neg
= src
.abs
= 0;
1013 src
.rgroup
= temp
.rgroup
;
1019 static struct etna_inst_src
1020 etna_mov_src(struct etna_compile
*c
, struct etna_inst_src src
)
1022 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1024 return etna_mov_src_to_temp(c
, src
, temp
);
1028 etna_src_uniforms_conflict(struct etna_inst_src a
, struct etna_inst_src b
)
1030 return etna_rgroup_is_uniform(a
.rgroup
) &&
1031 etna_rgroup_is_uniform(b
.rgroup
) &&
1032 (a
.rgroup
!= b
.rgroup
|| a
.reg
!= b
.reg
);
1035 /* create a new label */
1037 alloc_new_label(struct etna_compile
*c
)
1039 struct etna_compile_label label
= {
1040 .inst_idx
= -1, /* start by point to no specific instruction */
1043 array_insert(c
->labels
, label
);
1045 return c
->labels_count
- 1;
1048 /* place label at current instruction pointer */
1050 label_place(struct etna_compile
*c
, struct etna_compile_label
*label
)
1052 label
->inst_idx
= c
->inst_ptr
;
1055 /* mark label use at current instruction.
1056 * target of the label will be filled in in the marked instruction's src2.imm
1058 * as the value becomes known.
1061 label_mark_use(struct etna_compile
*c
, int lbl_idx
)
1063 assert(c
->inst_ptr
< ETNA_MAX_INSTRUCTIONS
);
1064 c
->lbl_usage
[c
->inst_ptr
] = lbl_idx
;
1067 /* walk the frame stack and return first frame with matching type */
1068 static struct etna_compile_frame
*
1069 find_frame(struct etna_compile
*c
, enum etna_compile_frame_type type
)
1071 for (int sp
= c
->frame_sp
; sp
>= 0; sp
--)
1072 if (c
->frame_stack
[sp
].type
== type
)
1073 return &c
->frame_stack
[sp
];
1079 struct instr_translater
{
1080 void (*fxn
)(const struct instr_translater
*t
, struct etna_compile
*c
,
1081 const struct tgsi_full_instruction
*inst
,
1082 struct etna_inst_src
*src
);
1086 /* tgsi src -> etna src swizzle */
1093 trans_instr(const struct instr_translater
*t
, struct etna_compile
*c
,
1094 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1096 const struct tgsi_opcode_info
*info
= tgsi_get_opcode_info(inst
->Instruction
.Opcode
);
1097 struct etna_inst instr
= { };
1099 instr
.opcode
= t
->opc
;
1100 instr
.cond
= t
->cond
;
1101 instr
.sat
= inst
->Instruction
.Saturate
;
1103 assert(info
->num_dst
<= 1);
1105 instr
.dst
= convert_dst(c
, &inst
->Dst
[0]);
1107 assert(info
->num_src
<= ETNA_NUM_SRC
);
1109 for (unsigned i
= 0; i
< info
->num_src
; i
++) {
1110 int swizzle
= t
->src
[i
];
1112 assert(swizzle
!= -1);
1113 instr
.src
[swizzle
] = src
[i
];
1116 emit_inst(c
, &instr
);
1120 trans_min_max(const struct instr_translater
*t
, struct etna_compile
*c
,
1121 const struct tgsi_full_instruction
*inst
,
1122 struct etna_inst_src
*src
)
1124 emit_inst(c
, &(struct etna_inst
) {
1125 .opcode
= INST_OPCODE_SELECT
,
1127 .sat
= inst
->Instruction
.Saturate
,
1128 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1136 trans_if(const struct instr_translater
*t
, struct etna_compile
*c
,
1137 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1139 struct etna_compile_frame
*f
= &c
->frame_stack
[c
->frame_sp
++];
1140 struct etna_inst_src imm_0
= alloc_imm_f32(c
, 0.0f
);
1142 /* push IF to stack */
1143 f
->type
= ETNA_COMPILE_FRAME_IF
;
1144 /* create "else" label */
1145 f
->lbl_else_idx
= alloc_new_label(c
);
1146 f
->lbl_endif_idx
= -1;
1148 /* We need to avoid the emit_inst() below becoming two instructions */
1149 if (etna_src_uniforms_conflict(src
[0], imm_0
))
1150 src
[0] = etna_mov_src(c
, src
[0]);
1152 /* mark position in instruction stream of label reference so that it can be
1153 * filled in in next pass */
1154 label_mark_use(c
, f
->lbl_else_idx
);
1156 /* create conditional branch to label if src0 EQ 0 */
1157 emit_inst(c
, &(struct etna_inst
){
1158 .opcode
= INST_OPCODE_BRANCH
,
1159 .cond
= INST_CONDITION_EQ
,
1162 /* imm is filled in later */
1167 trans_else(const struct instr_translater
*t
, struct etna_compile
*c
,
1168 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1170 assert(c
->frame_sp
> 0);
1171 struct etna_compile_frame
*f
= &c
->frame_stack
[c
->frame_sp
- 1];
1172 assert(f
->type
== ETNA_COMPILE_FRAME_IF
);
1174 /* create "endif" label, and branch to endif label */
1175 f
->lbl_endif_idx
= alloc_new_label(c
);
1176 label_mark_use(c
, f
->lbl_endif_idx
);
1177 emit_inst(c
, &(struct etna_inst
) {
1178 .opcode
= INST_OPCODE_BRANCH
,
1179 .cond
= INST_CONDITION_TRUE
,
1180 /* imm is filled in later */
1183 /* mark "else" label at this position in instruction stream */
1184 label_place(c
, &c
->labels
[f
->lbl_else_idx
]);
1188 trans_endif(const struct instr_translater
*t
, struct etna_compile
*c
,
1189 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1191 assert(c
->frame_sp
> 0);
1192 struct etna_compile_frame
*f
= &c
->frame_stack
[--c
->frame_sp
];
1193 assert(f
->type
== ETNA_COMPILE_FRAME_IF
);
1195 /* assign "endif" or "else" (if no ELSE) label to current position in
1196 * instruction stream, pop IF */
1197 if (f
->lbl_endif_idx
!= -1)
1198 label_place(c
, &c
->labels
[f
->lbl_endif_idx
]);
1200 label_place(c
, &c
->labels
[f
->lbl_else_idx
]);
1204 trans_loop_bgn(const struct instr_translater
*t
, struct etna_compile
*c
,
1205 const struct tgsi_full_instruction
*inst
,
1206 struct etna_inst_src
*src
)
1208 struct etna_compile_frame
*f
= &c
->frame_stack
[c
->frame_sp
++];
1210 /* push LOOP to stack */
1211 f
->type
= ETNA_COMPILE_FRAME_LOOP
;
1212 f
->lbl_loop_bgn_idx
= alloc_new_label(c
);
1213 f
->lbl_loop_end_idx
= alloc_new_label(c
);
1215 label_place(c
, &c
->labels
[f
->lbl_loop_bgn_idx
]);
1221 trans_loop_end(const struct instr_translater
*t
, struct etna_compile
*c
,
1222 const struct tgsi_full_instruction
*inst
,
1223 struct etna_inst_src
*src
)
1225 assert(c
->frame_sp
> 0);
1226 struct etna_compile_frame
*f
= &c
->frame_stack
[--c
->frame_sp
];
1227 assert(f
->type
== ETNA_COMPILE_FRAME_LOOP
);
1229 /* mark position in instruction stream of label reference so that it can be
1230 * filled in in next pass */
1231 label_mark_use(c
, f
->lbl_loop_bgn_idx
);
1233 /* create branch to loop_bgn label */
1234 emit_inst(c
, &(struct etna_inst
) {
1235 .opcode
= INST_OPCODE_BRANCH
,
1236 .cond
= INST_CONDITION_TRUE
,
1238 /* imm is filled in later */
1241 label_place(c
, &c
->labels
[f
->lbl_loop_end_idx
]);
1245 trans_brk(const struct instr_translater
*t
, struct etna_compile
*c
,
1246 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1248 assert(c
->frame_sp
> 0);
1249 struct etna_compile_frame
*f
= find_frame(c
, ETNA_COMPILE_FRAME_LOOP
);
1251 /* mark position in instruction stream of label reference so that it can be
1252 * filled in in next pass */
1253 label_mark_use(c
, f
->lbl_loop_end_idx
);
1255 /* create branch to loop_end label */
1256 emit_inst(c
, &(struct etna_inst
) {
1257 .opcode
= INST_OPCODE_BRANCH
,
1258 .cond
= INST_CONDITION_TRUE
,
1260 /* imm is filled in later */
1265 trans_cont(const struct instr_translater
*t
, struct etna_compile
*c
,
1266 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1268 assert(c
->frame_sp
> 0);
1269 struct etna_compile_frame
*f
= find_frame(c
, ETNA_COMPILE_FRAME_LOOP
);
1271 /* mark position in instruction stream of label reference so that it can be
1272 * filled in in next pass */
1273 label_mark_use(c
, f
->lbl_loop_bgn_idx
);
1275 /* create branch to loop_end label */
1276 emit_inst(c
, &(struct etna_inst
) {
1277 .opcode
= INST_OPCODE_BRANCH
,
1278 .cond
= INST_CONDITION_TRUE
,
1280 /* imm is filled in later */
1285 trans_deriv(const struct instr_translater
*t
, struct etna_compile
*c
,
1286 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1288 emit_inst(c
, &(struct etna_inst
) {
1290 .sat
= inst
->Instruction
.Saturate
,
1291 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1298 trans_arl(const struct instr_translater
*t
, struct etna_compile
*c
,
1299 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1301 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1302 struct etna_inst arl
= { };
1303 struct etna_inst_dst dst
;
1305 dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
| INST_COMPS_Z
|
1308 if (c
->specs
->has_sign_floor_ceil
) {
1309 struct etna_inst floor
= { };
1311 floor
.opcode
= INST_OPCODE_FLOOR
;
1312 floor
.src
[2] = src
[0];
1315 emit_inst(c
, &floor
);
1317 struct etna_inst floor
[2] = { };
1319 floor
[0].opcode
= INST_OPCODE_FRC
;
1320 floor
[0].sat
= inst
->Instruction
.Saturate
;
1322 floor
[0].src
[2] = src
[0];
1324 floor
[1].opcode
= INST_OPCODE_ADD
;
1325 floor
[1].sat
= inst
->Instruction
.Saturate
;
1327 floor
[1].src
[0] = src
[0];
1328 floor
[1].src
[2].use
= 1;
1329 floor
[1].src
[2].swiz
= INST_SWIZ_IDENTITY
;
1330 floor
[1].src
[2].neg
= 1;
1331 floor
[1].src
[2].rgroup
= temp
.rgroup
;
1332 floor
[1].src
[2].reg
= temp
.id
;
1334 emit_inst(c
, &floor
[0]);
1335 emit_inst(c
, &floor
[1]);
1338 arl
.opcode
= INST_OPCODE_MOVAR
;
1339 arl
.sat
= inst
->Instruction
.Saturate
;
1340 arl
.dst
= convert_dst(c
, &inst
->Dst
[0]);
1341 arl
.src
[2] = etna_native_to_src(temp
, INST_SWIZ_IDENTITY
);
1347 trans_lrp(const struct instr_translater
*t
, struct etna_compile
*c
,
1348 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1350 /* dst = src0 * src1 + (1 - src0) * src2
1351 * => src0 * src1 - (src0 - 1) * src2
1352 * => src0 * src1 - (src0 * src2 - src2)
1353 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1354 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1356 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1357 if (etna_src_uniforms_conflict(src
[0], src
[1]) ||
1358 etna_src_uniforms_conflict(src
[0], src
[2])) {
1359 src
[0] = etna_mov_src(c
, src
[0]);
1362 struct etna_inst mad
[2] = { };
1363 mad
[0].opcode
= INST_OPCODE_MAD
;
1365 mad
[0].dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
|
1366 INST_COMPS_Z
| INST_COMPS_W
);
1367 mad
[0].src
[0] = src
[0];
1368 mad
[0].src
[1] = src
[2];
1369 mad
[0].src
[2] = negate(src
[2]);
1370 mad
[1].opcode
= INST_OPCODE_MAD
;
1371 mad
[1].sat
= inst
->Instruction
.Saturate
;
1372 mad
[1].dst
= convert_dst(c
, &inst
->Dst
[0]), mad
[1].src
[0] = src
[0];
1373 mad
[1].src
[1] = src
[1];
1374 mad
[1].src
[2] = negate(etna_native_to_src(temp
, INST_SWIZ_IDENTITY
));
1376 emit_inst(c
, &mad
[0]);
1377 emit_inst(c
, &mad
[1]);
1381 trans_lit(const struct instr_translater
*t
, struct etna_compile
*c
,
1382 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1384 /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1385 * - can be eliminated if src.y is a uniform and >= 0
1386 * SELECT.GT tmp.___w, 128, src.wwww, 128
1387 * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1388 * - can be eliminated if src.w is a uniform and fits clamp
1389 * LOG tmp.x, void, void, tmp.yyyy
1390 * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1391 * LITP dst, undef, src.xxxx, tmp.xxxx
1393 struct etna_native_reg inner_temp
= etna_compile_get_inner_temp(c
);
1394 struct etna_inst_src src_y
= { };
1396 if (!etna_rgroup_is_uniform(src
[0].rgroup
)) {
1397 src_y
= etna_native_to_src(inner_temp
, SWIZZLE(Y
, Y
, Y
, Y
));
1399 struct etna_inst ins
= { };
1400 ins
.opcode
= INST_OPCODE_SELECT
;
1401 ins
.cond
= INST_CONDITION_LT
;
1402 ins
.dst
= etna_native_to_dst(inner_temp
, INST_COMPS_Y
);
1403 ins
.src
[0] = ins
.src
[2] = alloc_imm_f32(c
, 0.0);
1404 ins
.src
[1] = swizzle(src
[0], SWIZZLE(Y
, Y
, Y
, Y
));
1406 } else if (uif(get_imm_u32(c
, &src
[0], 1)) < 0)
1407 src_y
= alloc_imm_f32(c
, 0.0);
1409 src_y
= swizzle(src
[0], SWIZZLE(Y
, Y
, Y
, Y
));
1411 struct etna_inst_src src_w
= { };
1413 if (!etna_rgroup_is_uniform(src
[0].rgroup
)) {
1414 src_w
= etna_native_to_src(inner_temp
, SWIZZLE(W
, W
, W
, W
));
1416 struct etna_inst ins
= { };
1417 ins
.opcode
= INST_OPCODE_SELECT
;
1418 ins
.cond
= INST_CONDITION_GT
;
1419 ins
.dst
= etna_native_to_dst(inner_temp
, INST_COMPS_W
);
1420 ins
.src
[0] = ins
.src
[2] = alloc_imm_f32(c
, 128.);
1421 ins
.src
[1] = swizzle(src
[0], SWIZZLE(W
, W
, W
, W
));
1423 ins
.cond
= INST_CONDITION_LT
;
1424 ins
.src
[0].neg
= !ins
.src
[0].neg
;
1425 ins
.src
[2].neg
= !ins
.src
[2].neg
;
1428 } else if (uif(get_imm_u32(c
, &src
[0], 3)) < -128.)
1429 src_w
= alloc_imm_f32(c
, -128.);
1430 else if (uif(get_imm_u32(c
, &src
[0], 3)) > 128.)
1431 src_w
= alloc_imm_f32(c
, 128.);
1433 src_w
= swizzle(src
[0], SWIZZLE(W
, W
, W
, W
));
1435 if (c
->specs
->has_new_transcendentals
) { /* Alternative LOG sequence */
1436 emit_inst(c
, &(struct etna_inst
) {
1437 .opcode
= INST_OPCODE_LOG
,
1438 .dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
| INST_COMPS_Y
),
1440 .tex
= { .amode
=1 }, /* Unknown bit needs to be set */
1442 emit_inst(c
, &(struct etna_inst
) {
1443 .opcode
= INST_OPCODE_MUL
,
1444 .dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
),
1445 .src
[0] = etna_native_to_src(inner_temp
, SWIZZLE(X
, X
, X
, X
)),
1446 .src
[1] = etna_native_to_src(inner_temp
, SWIZZLE(Y
, Y
, Y
, Y
)),
1449 struct etna_inst ins
[3] = { };
1450 ins
[0].opcode
= INST_OPCODE_LOG
;
1451 ins
[0].dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
);
1452 ins
[0].src
[2] = src_y
;
1454 emit_inst(c
, &ins
[0]);
1456 emit_inst(c
, &(struct etna_inst
) {
1457 .opcode
= INST_OPCODE_MUL
,
1459 .dst
= etna_native_to_dst(inner_temp
, INST_COMPS_X
),
1460 .src
[0] = etna_native_to_src(inner_temp
, SWIZZLE(X
, X
, X
, X
)),
1463 emit_inst(c
, &(struct etna_inst
) {
1464 .opcode
= INST_OPCODE_LITP
,
1466 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1467 .src
[0] = swizzle(src
[0], SWIZZLE(X
, X
, X
, X
)),
1468 .src
[1] = swizzle(src
[0], SWIZZLE(X
, X
, X
, X
)),
1469 .src
[2] = etna_native_to_src(inner_temp
, SWIZZLE(X
, X
, X
, X
)),
1474 trans_ssg(const struct instr_translater
*t
, struct etna_compile
*c
,
1475 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1477 if (c
->specs
->has_sign_floor_ceil
) {
1478 emit_inst(c
, &(struct etna_inst
){
1479 .opcode
= INST_OPCODE_SIGN
,
1480 .sat
= inst
->Instruction
.Saturate
,
1481 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1485 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1486 struct etna_inst ins
[2] = { };
1488 ins
[0].opcode
= INST_OPCODE_SET
;
1489 ins
[0].cond
= INST_CONDITION_NZ
;
1490 ins
[0].dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
|
1491 INST_COMPS_Z
| INST_COMPS_W
);
1492 ins
[0].src
[0] = src
[0];
1494 ins
[1].opcode
= INST_OPCODE_SELECT
;
1495 ins
[1].cond
= INST_CONDITION_LZ
;
1496 ins
[1].sat
= inst
->Instruction
.Saturate
;
1497 ins
[1].dst
= convert_dst(c
, &inst
->Dst
[0]);
1498 ins
[1].src
[0] = src
[0];
1499 ins
[1].src
[2] = etna_native_to_src(temp
, INST_SWIZ_IDENTITY
);
1500 ins
[1].src
[1] = negate(ins
[1].src
[2]);
1502 emit_inst(c
, &ins
[0]);
1503 emit_inst(c
, &ins
[1]);
1508 trans_trig(const struct instr_translater
*t
, struct etna_compile
*c
,
1509 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1511 if (c
->specs
->has_new_transcendentals
) { /* Alternative SIN/COS */
1512 /* On newer chips alternative SIN/COS instructions are implemented,
1514 * - Need their input scaled by 1/pi instead of 2/pi
1515 * - Output an x and y component, which need to be multiplied to
1518 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
); /* only using .xyz */
1519 emit_inst(c
, &(struct etna_inst
) {
1520 .opcode
= INST_OPCODE_MUL
,
1522 .dst
= etna_native_to_dst(temp
, INST_COMPS_Z
),
1523 .src
[0] = src
[0], /* any swizzling happens here */
1524 .src
[1] = alloc_imm_f32(c
, 1.0f
/ M_PI
),
1526 emit_inst(c
, &(struct etna_inst
) {
1527 .opcode
= inst
->Instruction
.Opcode
== TGSI_OPCODE_COS
1531 .dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
),
1532 .src
[2] = etna_native_to_src(temp
, SWIZZLE(Z
, Z
, Z
, Z
)),
1533 .tex
= { .amode
=1 }, /* Unknown bit needs to be set */
1535 emit_inst(c
, &(struct etna_inst
) {
1536 .opcode
= INST_OPCODE_MUL
,
1537 .sat
= inst
->Instruction
.Saturate
,
1538 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1539 .src
[0] = etna_native_to_src(temp
, SWIZZLE(X
, X
, X
, X
)),
1540 .src
[1] = etna_native_to_src(temp
, SWIZZLE(Y
, Y
, Y
, Y
)),
1543 } else if (c
->specs
->has_sin_cos_sqrt
) {
1544 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1545 /* add divide by PI/2, using a temp register. GC2000
1546 * fails with src==dst for the trig instruction. */
1547 emit_inst(c
, &(struct etna_inst
) {
1548 .opcode
= INST_OPCODE_MUL
,
1550 .dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
|
1551 INST_COMPS_Z
| INST_COMPS_W
),
1552 .src
[0] = src
[0], /* any swizzling happens here */
1553 .src
[1] = alloc_imm_f32(c
, 2.0f
/ M_PI
),
1555 emit_inst(c
, &(struct etna_inst
) {
1556 .opcode
= inst
->Instruction
.Opcode
== TGSI_OPCODE_COS
1559 .sat
= inst
->Instruction
.Saturate
,
1560 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1561 .src
[2] = etna_native_to_src(temp
, INST_SWIZ_IDENTITY
),
1564 /* Implement Nick's fast sine/cosine. Taken from:
1565 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1566 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1567 * MAD t.x_zw, src.xxxx, A, B
1568 * FRC t.x_z_, void, void, t.xwzw
1569 * MAD t.x_z_, t.xwzw, 2, -1
1570 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
1571 * DP3 t.x_z_, t.zyww, C, void (for sin)
1572 * DP3 t.__z_, t.zyww, C, void (for scs)
1573 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
1574 * DP3 t.x_z_, t.xyww, C, void (for cos)
1575 * DP3 t.x___, t.xyww, C, void (for scs)
1576 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1577 * MAD dst, t.ywyw, .2225, t.xzxz
1579 struct etna_inst
*p
, ins
[9] = { };
1580 struct etna_native_reg t0
= etna_compile_get_inner_temp(c
);
1581 struct etna_inst_src t0s
= etna_native_to_src(t0
, INST_SWIZ_IDENTITY
);
1582 struct etna_inst_src sincos
[3], in
= src
[0];
1583 sincos
[0] = etna_imm_vec4f(c
, sincos_const
[0]);
1584 sincos
[1] = etna_imm_vec4f(c
, sincos_const
[1]);
1586 /* A uniform source will cause the inner temp limit to
1587 * be exceeded. Explicitly deal with that scenario.
1589 if (etna_rgroup_is_uniform(src
[0].rgroup
)) {
1590 struct etna_inst ins
= { };
1591 ins
.opcode
= INST_OPCODE_MOV
;
1592 ins
.dst
= etna_native_to_dst(t0
, INST_COMPS_X
);
1598 ins
[0].opcode
= INST_OPCODE_MAD
;
1599 ins
[0].dst
= etna_native_to_dst(t0
, INST_COMPS_X
| INST_COMPS_Z
| INST_COMPS_W
);
1600 ins
[0].src
[0] = swizzle(in
, SWIZZLE(X
, X
, X
, X
));
1601 ins
[0].src
[1] = swizzle(sincos
[1], SWIZZLE(X
, W
, X
, W
)); /* 1/2*PI */
1602 ins
[0].src
[2] = swizzle(sincos
[1], SWIZZLE(Y
, W
, Z
, W
)); /* 0.75, 0, 0.5, 0 */
1604 ins
[1].opcode
= INST_OPCODE_FRC
;
1605 ins
[1].dst
= etna_native_to_dst(t0
, INST_COMPS_X
| INST_COMPS_Z
);
1606 ins
[1].src
[2] = swizzle(t0s
, SWIZZLE(X
, W
, Z
, W
));
1608 ins
[2].opcode
= INST_OPCODE_MAD
;
1609 ins
[2].dst
= etna_native_to_dst(t0
, INST_COMPS_X
| INST_COMPS_Z
);
1610 ins
[2].src
[0] = swizzle(t0s
, SWIZZLE(X
, W
, Z
, W
));
1611 ins
[2].src
[1] = swizzle(sincos
[0], SWIZZLE(X
, X
, X
, X
)); /* 2 */
1612 ins
[2].src
[2] = swizzle(sincos
[0], SWIZZLE(Y
, Y
, Y
, Y
)); /* -1 */
1614 unsigned mul_swiz
, dp3_swiz
;
1615 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_SIN
) {
1616 mul_swiz
= SWIZZLE(W
, Z
, W
, W
);
1617 dp3_swiz
= SWIZZLE(Z
, Y
, W
, W
);
1619 mul_swiz
= SWIZZLE(W
, X
, W
, W
);
1620 dp3_swiz
= SWIZZLE(X
, Y
, W
, W
);
1623 ins
[3].opcode
= INST_OPCODE_MUL
;
1624 ins
[3].dst
= etna_native_to_dst(t0
, INST_COMPS_Y
);
1625 ins
[3].src
[0] = swizzle(t0s
, mul_swiz
);
1626 ins
[3].src
[1] = absolute(ins
[3].src
[0]);
1628 ins
[4].opcode
= INST_OPCODE_DP3
;
1629 ins
[4].dst
= etna_native_to_dst(t0
, INST_COMPS_X
| INST_COMPS_Z
);
1630 ins
[4].src
[0] = swizzle(t0s
, dp3_swiz
);
1631 ins
[4].src
[1] = swizzle(sincos
[0], SWIZZLE(Z
, W
, W
, W
));
1634 p
->opcode
= INST_OPCODE_MAD
;
1635 p
->dst
= etna_native_to_dst(t0
, INST_COMPS_Y
| INST_COMPS_W
);
1636 p
->src
[0] = swizzle(t0s
, SWIZZLE(X
, X
, Z
, Z
));
1637 p
->src
[1] = absolute(p
->src
[0]);
1638 p
->src
[2] = negate(p
->src
[0]);
1641 p
->opcode
= INST_OPCODE_MAD
;
1642 p
->sat
= inst
->Instruction
.Saturate
;
1643 p
->dst
= convert_dst(c
, &inst
->Dst
[0]),
1644 p
->src
[0] = swizzle(t0s
, SWIZZLE(Y
, W
, Y
, W
));
1645 p
->src
[1] = alloc_imm_f32(c
, 0.2225);
1646 p
->src
[2] = swizzle(t0s
, SWIZZLE(X
, Z
, X
, Z
));
1648 for (int i
= 0; &ins
[i
] <= p
; i
++)
1649 emit_inst(c
, &ins
[i
]);
1654 trans_lg2(const struct instr_translater
*t
, struct etna_compile
*c
,
1655 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1657 if (c
->specs
->has_new_transcendentals
) {
1658 /* On newer chips alternative LOG instruction is implemented,
1659 * which outputs an x and y component, which need to be multiplied to
1662 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
); /* only using .xy */
1663 emit_inst(c
, &(struct etna_inst
) {
1664 .opcode
= INST_OPCODE_LOG
,
1666 .dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
),
1668 .tex
= { .amode
=1 }, /* Unknown bit needs to be set */
1670 emit_inst(c
, &(struct etna_inst
) {
1671 .opcode
= INST_OPCODE_MUL
,
1672 .sat
= inst
->Instruction
.Saturate
,
1673 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1674 .src
[0] = etna_native_to_src(temp
, SWIZZLE(X
, X
, X
, X
)),
1675 .src
[1] = etna_native_to_src(temp
, SWIZZLE(Y
, Y
, Y
, Y
)),
1678 emit_inst(c
, &(struct etna_inst
) {
1679 .opcode
= INST_OPCODE_LOG
,
1680 .sat
= inst
->Instruction
.Saturate
,
1681 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1688 trans_sampler(const struct instr_translater
*t
, struct etna_compile
*c
,
1689 const struct tgsi_full_instruction
*inst
,
1690 struct etna_inst_src
*src
)
1692 /* There is no native support for GL texture rectangle coordinates, so
1693 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1694 if (inst
->Texture
.Texture
== TGSI_TEXTURE_RECT
) {
1695 uint32_t unit
= inst
->Src
[1].Register
.Index
;
1696 struct etna_inst ins
[2] = { };
1697 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1699 ins
[0].opcode
= INST_OPCODE_MUL
;
1700 ins
[0].dst
= etna_native_to_dst(temp
, INST_COMPS_X
);
1701 ins
[0].src
[0] = src
[0];
1702 ins
[0].src
[1] = alloc_imm(c
, ETNA_IMMEDIATE_TEXRECT_SCALE_X
, unit
);
1704 ins
[1].opcode
= INST_OPCODE_MUL
;
1705 ins
[1].dst
= etna_native_to_dst(temp
, INST_COMPS_Y
);
1706 ins
[1].src
[0] = src
[0];
1707 ins
[1].src
[1] = alloc_imm(c
, ETNA_IMMEDIATE_TEXRECT_SCALE_Y
, unit
);
1709 emit_inst(c
, &ins
[0]);
1710 emit_inst(c
, &ins
[1]);
1712 src
[0] = etna_native_to_src(temp
, INST_SWIZ_IDENTITY
); /* temp.xyzw */
1715 switch (inst
->Instruction
.Opcode
) {
1716 case TGSI_OPCODE_TEX
:
1717 emit_inst(c
, &(struct etna_inst
) {
1718 .opcode
= INST_OPCODE_TEXLD
,
1720 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1721 .tex
= convert_tex(c
, &inst
->Src
[1], &inst
->Texture
),
1726 case TGSI_OPCODE_TXB
:
1727 emit_inst(c
, &(struct etna_inst
) {
1728 .opcode
= INST_OPCODE_TEXLDB
,
1730 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1731 .tex
= convert_tex(c
, &inst
->Src
[1], &inst
->Texture
),
1736 case TGSI_OPCODE_TXL
:
1737 emit_inst(c
, &(struct etna_inst
) {
1738 .opcode
= INST_OPCODE_TEXLDL
,
1740 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1741 .tex
= convert_tex(c
, &inst
->Src
[1], &inst
->Texture
),
1746 case TGSI_OPCODE_TXP
: { /* divide src.xyz by src.w */
1747 struct etna_native_reg temp
= etna_compile_get_inner_temp(c
);
1749 emit_inst(c
, &(struct etna_inst
) {
1750 .opcode
= INST_OPCODE_RCP
,
1752 .dst
= etna_native_to_dst(temp
, INST_COMPS_W
), /* tmp.w */
1753 .src
[2] = swizzle(src
[0], SWIZZLE(W
, W
, W
, W
)),
1755 emit_inst(c
, &(struct etna_inst
) {
1756 .opcode
= INST_OPCODE_MUL
,
1758 .dst
= etna_native_to_dst(temp
, INST_COMPS_X
| INST_COMPS_Y
|
1759 INST_COMPS_Z
), /* tmp.xyz */
1760 .src
[0] = etna_native_to_src(temp
, SWIZZLE(W
, W
, W
, W
)),
1761 .src
[1] = src
[0], /* src.xyzw */
1763 emit_inst(c
, &(struct etna_inst
) {
1764 .opcode
= INST_OPCODE_TEXLD
,
1766 .dst
= convert_dst(c
, &inst
->Dst
[0]),
1767 .tex
= convert_tex(c
, &inst
->Src
[1], &inst
->Texture
),
1768 .src
[0] = etna_native_to_src(temp
, INST_SWIZ_IDENTITY
), /* tmp.xyzw */
1773 BUG("Unhandled instruction %s",
1774 tgsi_get_opcode_name(inst
->Instruction
.Opcode
));
1781 trans_dummy(const struct instr_translater
*t
, struct etna_compile
*c
,
1782 const struct tgsi_full_instruction
*inst
, struct etna_inst_src
*src
)
1787 static const struct instr_translater translaters
[TGSI_OPCODE_LAST
] = {
1788 #define INSTR(n, f, ...) \
1789 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1791 INSTR(MOV
, trans_instr
, .opc
= INST_OPCODE_MOV
, .src
= {2, -1, -1}),
1792 INSTR(RCP
, trans_instr
, .opc
= INST_OPCODE_RCP
, .src
= {2, -1, -1}),
1793 INSTR(RSQ
, trans_instr
, .opc
= INST_OPCODE_RSQ
, .src
= {2, -1, -1}),
1794 INSTR(MUL
, trans_instr
, .opc
= INST_OPCODE_MUL
, .src
= {0, 1, -1}),
1795 INSTR(ADD
, trans_instr
, .opc
= INST_OPCODE_ADD
, .src
= {0, 2, -1}),
1796 INSTR(DP2
, trans_instr
, .opc
= INST_OPCODE_DP2
, .src
= {0, 1, -1}),
1797 INSTR(DP3
, trans_instr
, .opc
= INST_OPCODE_DP3
, .src
= {0, 1, -1}),
1798 INSTR(DP4
, trans_instr
, .opc
= INST_OPCODE_DP4
, .src
= {0, 1, -1}),
1799 INSTR(DST
, trans_instr
, .opc
= INST_OPCODE_DST
, .src
= {0, 1, -1}),
1800 INSTR(MAD
, trans_instr
, .opc
= INST_OPCODE_MAD
, .src
= {0, 1, 2}),
1801 INSTR(EX2
, trans_instr
, .opc
= INST_OPCODE_EXP
, .src
= {2, -1, -1}),
1802 INSTR(LG2
, trans_lg2
),
1803 INSTR(SQRT
, trans_instr
, .opc
= INST_OPCODE_SQRT
, .src
= {2, -1, -1}),
1804 INSTR(FRC
, trans_instr
, .opc
= INST_OPCODE_FRC
, .src
= {2, -1, -1}),
1805 INSTR(CEIL
, trans_instr
, .opc
= INST_OPCODE_CEIL
, .src
= {2, -1, -1}),
1806 INSTR(FLR
, trans_instr
, .opc
= INST_OPCODE_FLOOR
, .src
= {2, -1, -1}),
1807 INSTR(CMP
, trans_instr
, .opc
= INST_OPCODE_SELECT
, .src
= {0, 1, 2}, .cond
= INST_CONDITION_LZ
),
1809 INSTR(KILL
, trans_instr
, .opc
= INST_OPCODE_TEXKILL
),
1810 INSTR(KILL_IF
, trans_instr
, .opc
= INST_OPCODE_TEXKILL
, .src
= {0, -1, -1}, .cond
= INST_CONDITION_LZ
),
1812 INSTR(DDX
, trans_deriv
, .opc
= INST_OPCODE_DSX
),
1813 INSTR(DDY
, trans_deriv
, .opc
= INST_OPCODE_DSY
),
1815 INSTR(IF
, trans_if
),
1816 INSTR(ELSE
, trans_else
),
1817 INSTR(ENDIF
, trans_endif
),
1819 INSTR(BGNLOOP
, trans_loop_bgn
),
1820 INSTR(ENDLOOP
, trans_loop_end
),
1821 INSTR(BRK
, trans_brk
),
1822 INSTR(CONT
, trans_cont
),
1824 INSTR(MIN
, trans_min_max
, .opc
= INST_OPCODE_SELECT
, .cond
= INST_CONDITION_GT
),
1825 INSTR(MAX
, trans_min_max
, .opc
= INST_OPCODE_SELECT
, .cond
= INST_CONDITION_LT
),
1827 INSTR(ARL
, trans_arl
),
1828 INSTR(LRP
, trans_lrp
),
1829 INSTR(LIT
, trans_lit
),
1830 INSTR(SSG
, trans_ssg
),
1832 INSTR(SIN
, trans_trig
),
1833 INSTR(COS
, trans_trig
),
1835 INSTR(SLT
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_LT
),
1836 INSTR(SGE
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_GE
),
1837 INSTR(SEQ
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_EQ
),
1838 INSTR(SGT
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_GT
),
1839 INSTR(SLE
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_LE
),
1840 INSTR(SNE
, trans_instr
, .opc
= INST_OPCODE_SET
, .src
= {0, 1, -1}, .cond
= INST_CONDITION_NE
),
1842 INSTR(TEX
, trans_sampler
),
1843 INSTR(TXB
, trans_sampler
),
1844 INSTR(TXL
, trans_sampler
),
1845 INSTR(TXP
, trans_sampler
),
1847 INSTR(NOP
, trans_dummy
),
1848 INSTR(END
, trans_dummy
),
1851 /* Pass -- compile instructions */
1853 etna_compile_pass_generate_code(struct etna_compile
*c
)
1855 struct tgsi_parse_context ctx
= { };
1856 ASSERTED
unsigned status
= tgsi_parse_init(&ctx
, c
->tokens
);
1857 assert(status
== TGSI_PARSE_OK
);
1860 while (!tgsi_parse_end_of_tokens(&ctx
)) {
1861 const struct tgsi_full_instruction
*inst
= 0;
1863 /* No inner temps used yet for this instruction, clear counter */
1866 tgsi_parse_token(&ctx
);
1868 switch (ctx
.FullToken
.Token
.Type
) {
1869 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1870 /* iterate over operands */
1871 inst
= &ctx
.FullToken
.FullInstruction
;
1872 if (c
->dead_inst
[inst_idx
]) { /* skip dead instructions */
1877 /* Lookup the TGSI information and generate the source arguments */
1878 struct etna_inst_src src
[ETNA_NUM_SRC
];
1879 memset(src
, 0, sizeof(src
));
1881 const struct tgsi_opcode_info
*tgsi
= tgsi_get_opcode_info(inst
->Instruction
.Opcode
);
1883 for (int i
= 0; i
< tgsi
->num_src
&& i
< ETNA_NUM_SRC
; i
++) {
1884 const struct tgsi_full_src_register
*reg
= &inst
->Src
[i
];
1885 const struct etna_native_reg
*n
= &etna_get_src_reg(c
, reg
->Register
)->native
;
1887 if (!n
->valid
|| n
->is_tex
)
1890 src
[i
] = etna_create_src(reg
, n
);
1893 const unsigned opc
= inst
->Instruction
.Opcode
;
1894 const struct instr_translater
*t
= &translaters
[opc
];
1897 t
->fxn(t
, c
, inst
, src
);
1901 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc
));
1907 tgsi_parse_free(&ctx
);
1910 /* Look up register by semantic */
1911 static struct etna_reg_desc
*
1912 find_decl_by_semantic(struct etna_compile
*c
, uint file
, uint name
, uint index
)
1914 for (int idx
= 0; idx
< c
->file
[file
].reg_size
; ++idx
) {
1915 struct etna_reg_desc
*reg
= &c
->file
[file
].reg
[idx
];
1917 if (reg
->semantic
.Name
== name
&& reg
->semantic
.Index
== index
)
1921 return NULL
; /* not found */
1924 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1925 * - this is a vertex shader
1926 * - and this is an older GPU
1929 etna_compile_add_z_div_if_needed(struct etna_compile
*c
)
1931 if (c
->info
.processor
== PIPE_SHADER_VERTEX
&& c
->specs
->vs_need_z_div
) {
1932 /* find position out */
1933 struct etna_reg_desc
*pos_reg
=
1934 find_decl_by_semantic(c
, TGSI_FILE_OUTPUT
, TGSI_SEMANTIC_POSITION
, 0);
1936 if (pos_reg
!= NULL
) {
1938 * ADD tX.__z_, tX.zzzz, void, tX.wwww
1939 * MUL tX.__z_, tX.zzzz, 0.5, void
1941 emit_inst(c
, &(struct etna_inst
) {
1942 .opcode
= INST_OPCODE_ADD
,
1943 .dst
= etna_native_to_dst(pos_reg
->native
, INST_COMPS_Z
),
1944 .src
[0] = etna_native_to_src(pos_reg
->native
, SWIZZLE(Z
, Z
, Z
, Z
)),
1945 .src
[2] = etna_native_to_src(pos_reg
->native
, SWIZZLE(W
, W
, W
, W
)),
1947 emit_inst(c
, &(struct etna_inst
) {
1948 .opcode
= INST_OPCODE_MUL
,
1949 .dst
= etna_native_to_dst(pos_reg
->native
, INST_COMPS_Z
),
1950 .src
[0] = etna_native_to_src(pos_reg
->native
, SWIZZLE(Z
, Z
, Z
, Z
)),
1951 .src
[1] = alloc_imm_f32(c
, 0.5f
),
1958 etna_compile_frag_rb_swap(struct etna_compile
*c
)
1960 if (c
->info
.processor
== PIPE_SHADER_FRAGMENT
&& c
->key
->frag_rb_swap
) {
1961 /* find color out */
1962 struct etna_reg_desc
*color_reg
=
1963 find_decl_by_semantic(c
, TGSI_FILE_OUTPUT
, TGSI_SEMANTIC_COLOR
, 0);
1965 emit_inst(c
, &(struct etna_inst
) {
1966 .opcode
= INST_OPCODE_MOV
,
1967 .dst
= etna_native_to_dst(color_reg
->native
, INST_COMPS_X
| INST_COMPS_Y
| INST_COMPS_Z
| INST_COMPS_W
),
1968 .src
[2] = etna_native_to_src(color_reg
->native
, SWIZZLE(Z
, Y
, X
, W
)),
1973 /** add a NOP to the shader if
1974 * a) the shader is empty
1976 * b) there is a label at the end of the shader
1979 etna_compile_add_nop_if_needed(struct etna_compile
*c
)
1981 bool label_at_last_inst
= false;
1983 for (int idx
= 0; idx
< c
->labels_count
; ++idx
) {
1984 if (c
->labels
[idx
].inst_idx
== c
->inst_ptr
)
1985 label_at_last_inst
= true;
1989 if (c
->inst_ptr
== 0 || label_at_last_inst
)
1990 emit_inst(c
, &(struct etna_inst
){.opcode
= INST_OPCODE_NOP
});
1994 assign_uniforms(struct etna_compile_file
*file
, unsigned base
)
1996 for (int idx
= 0; idx
< file
->reg_size
; ++idx
) {
1997 file
->reg
[idx
].native
.valid
= 1;
1998 file
->reg
[idx
].native
.rgroup
= INST_RGROUP_UNIFORM_0
;
1999 file
->reg
[idx
].native
.id
= base
+ idx
;
2003 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
2004 * CONST must be consecutive as const buffers are supposed to be consecutive,
2005 * and before IMM, as this is
2006 * more convenient because is possible for the compilation process itself to
2008 * immediates for constants such as pi, one, zero.
2011 assign_constants_and_immediates(struct etna_compile
*c
)
2013 assign_uniforms(&c
->file
[TGSI_FILE_CONSTANT
], 0);
2014 /* immediates start after the constants */
2015 c
->imm_base
= c
->file
[TGSI_FILE_CONSTANT
].reg_size
* 4;
2016 assign_uniforms(&c
->file
[TGSI_FILE_IMMEDIATE
], c
->imm_base
/ 4);
2017 DBG_F(ETNA_DBG_COMPILER_MSGS
, "imm base: %i size: %i", c
->imm_base
,
2021 /* Assign declared samplers to native texture units */
2023 assign_texture_units(struct etna_compile
*c
)
2027 if (c
->info
.processor
== PIPE_SHADER_VERTEX
)
2028 tex_base
= c
->specs
->vertex_sampler_offset
;
2030 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_SAMPLER
].reg_size
; ++idx
) {
2031 c
->file
[TGSI_FILE_SAMPLER
].reg
[idx
].native
.valid
= 1;
2032 c
->file
[TGSI_FILE_SAMPLER
].reg
[idx
].native
.is_tex
= 1; // overrides rgroup
2033 c
->file
[TGSI_FILE_SAMPLER
].reg
[idx
].native
.id
= tex_base
+ idx
;
2037 /* Additional pass to fill in branch targets. This pass should be last
2038 * as no instruction reordering or removing/addition can be done anymore
2039 * once the branch targets are computed.
2042 etna_compile_fill_in_labels(struct etna_compile
*c
)
2044 for (int idx
= 0; idx
< c
->inst_ptr
; ++idx
) {
2045 if (c
->lbl_usage
[idx
] != -1)
2046 etna_assemble_set_imm(&c
->code
[idx
* 4],
2047 c
->labels
[c
->lbl_usage
[idx
]].inst_idx
);
2051 /* compare two etna_native_reg structures, return true if equal */
2053 cmp_etna_native_reg(const struct etna_native_reg to
,
2054 const struct etna_native_reg from
)
2056 return to
.valid
== from
.valid
&& to
.is_tex
== from
.is_tex
&&
2057 to
.rgroup
== from
.rgroup
&& to
.id
== from
.id
;
2060 /* go through all declarations and swap native registers *to* and *from* */
2062 swap_native_registers(struct etna_compile
*c
, const struct etna_native_reg to
,
2063 const struct etna_native_reg from
)
2065 if (cmp_etna_native_reg(from
, to
))
2066 return; /* Nothing to do */
2068 for (int idx
= 0; idx
< c
->total_decls
; ++idx
) {
2069 if (cmp_etna_native_reg(c
->decl
[idx
].native
, from
)) {
2070 c
->decl
[idx
].native
= to
;
2071 } else if (cmp_etna_native_reg(c
->decl
[idx
].native
, to
)) {
2072 c
->decl
[idx
].native
= from
;
2077 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2078 * Semantic POS is always t0. If that semantic is not used, avoid t0.
2081 permute_ps_inputs(struct etna_compile
*c
)
2084 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
2085 * gl_FrontFacing VARYING_SLOT_FACE TGSI_SEMANTIC_FACE
2086 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
2088 uint native_idx
= 1;
2090 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_INPUT
].reg_size
; ++idx
) {
2091 struct etna_reg_desc
*reg
= &c
->file
[TGSI_FILE_INPUT
].reg
[idx
];
2093 assert(reg
->has_semantic
);
2096 reg
->semantic
.Name
== TGSI_SEMANTIC_POSITION
||
2097 reg
->semantic
.Name
== TGSI_SEMANTIC_FACE
)
2100 input_id
= native_idx
++;
2101 swap_native_registers(c
, etna_native_temp(input_id
),
2102 c
->file
[TGSI_FILE_INPUT
].reg
[idx
].native
);
2105 c
->num_varyings
= native_idx
- 1;
2107 if (native_idx
> c
->next_free_native
)
2108 c
->next_free_native
= native_idx
;
2111 /* fill in ps inputs into shader object */
2113 fill_in_ps_inputs(struct etna_shader_variant
*sobj
, struct etna_compile
*c
)
2115 struct etna_shader_io_file
*sf
= &sobj
->infile
;
2119 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_INPUT
].reg_size
; ++idx
) {
2120 struct etna_reg_desc
*reg
= &c
->file
[TGSI_FILE_INPUT
].reg
[idx
];
2122 if (reg
->native
.id
> 0) {
2123 assert(sf
->num_reg
< ETNA_NUM_INPUTS
);
2124 sf
->reg
[sf
->num_reg
].reg
= reg
->native
.id
;
2125 sf
->reg
[sf
->num_reg
].semantic
= reg
->semantic
;
2126 /* convert usage mask to number of components (*=wildcard)
2127 * .r (0..1) -> 1 component
2128 * .*g (2..3) -> 2 component
2129 * .**b (4..7) -> 3 components
2130 * .***a (8..15) -> 4 components
2132 sf
->reg
[sf
->num_reg
].num_components
= util_last_bit(reg
->usage_mask
);
2137 assert(sf
->num_reg
== c
->num_varyings
);
2138 sobj
->input_count_unk8
= 31; /* XXX what is this */
2141 /* fill in output mapping for ps into shader object */
2143 fill_in_ps_outputs(struct etna_shader_variant
*sobj
, struct etna_compile
*c
)
2145 sobj
->outfile
.num_reg
= 0;
2147 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_OUTPUT
].reg_size
; ++idx
) {
2148 struct etna_reg_desc
*reg
= &c
->file
[TGSI_FILE_OUTPUT
].reg
[idx
];
2150 switch (reg
->semantic
.Name
) {
2151 case TGSI_SEMANTIC_COLOR
: /* FRAG_RESULT_COLOR */
2152 sobj
->ps_color_out_reg
= reg
->native
.id
;
2154 case TGSI_SEMANTIC_POSITION
: /* FRAG_RESULT_DEPTH */
2155 sobj
->ps_depth_out_reg
= reg
->native
.id
; /* =always native reg 0, only z component should be assigned */
2158 assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2163 /* fill in inputs for vs into shader object */
2165 fill_in_vs_inputs(struct etna_shader_variant
*sobj
, struct etna_compile
*c
)
2167 struct etna_shader_io_file
*sf
= &sobj
->infile
;
2170 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_INPUT
].reg_size
; ++idx
) {
2171 struct etna_reg_desc
*reg
= &c
->file
[TGSI_FILE_INPUT
].reg
[idx
];
2172 assert(sf
->num_reg
< ETNA_NUM_INPUTS
);
2174 if (!reg
->native
.valid
)
2177 /* XXX exclude inputs with special semantics such as gl_frontFacing */
2178 sf
->reg
[sf
->num_reg
].reg
= reg
->native
.id
;
2179 sf
->reg
[sf
->num_reg
].semantic
= reg
->semantic
;
2180 sf
->reg
[sf
->num_reg
].num_components
= util_last_bit(reg
->usage_mask
);
2184 sobj
->input_count_unk8
= (sf
->num_reg
+ 19) / 16; /* XXX what is this */
2187 /* build two-level output index [Semantic][Index] for fast linking */
2189 build_output_index(struct etna_shader_variant
*sobj
)
2194 for (int name
= 0; name
< TGSI_SEMANTIC_COUNT
; ++name
)
2195 total
+= sobj
->output_count_per_semantic
[name
];
2197 sobj
->output_per_semantic_list
= CALLOC(total
, sizeof(struct etna_shader_inout
*));
2199 for (int name
= 0; name
< TGSI_SEMANTIC_COUNT
; ++name
) {
2200 sobj
->output_per_semantic
[name
] = &sobj
->output_per_semantic_list
[offset
];
2201 offset
+= sobj
->output_count_per_semantic
[name
];
2204 for (int idx
= 0; idx
< sobj
->outfile
.num_reg
; ++idx
) {
2205 sobj
->output_per_semantic
[sobj
->outfile
.reg
[idx
].semantic
.Name
]
2206 [sobj
->outfile
.reg
[idx
].semantic
.Index
] =
2207 &sobj
->outfile
.reg
[idx
];
2211 /* fill in outputs for vs into shader object */
2213 fill_in_vs_outputs(struct etna_shader_variant
*sobj
, struct etna_compile
*c
)
2215 struct etna_shader_io_file
*sf
= &sobj
->outfile
;
2218 for (int idx
= 0; idx
< c
->file
[TGSI_FILE_OUTPUT
].reg_size
; ++idx
) {
2219 struct etna_reg_desc
*reg
= &c
->file
[TGSI_FILE_OUTPUT
].reg
[idx
];
2220 assert(sf
->num_reg
< ETNA_NUM_INPUTS
);
2222 switch (reg
->semantic
.Name
) {
2223 case TGSI_SEMANTIC_POSITION
:
2224 sobj
->vs_pos_out_reg
= reg
->native
.id
;
2226 case TGSI_SEMANTIC_PSIZE
:
2227 sobj
->vs_pointsize_out_reg
= reg
->native
.id
;
2230 sf
->reg
[sf
->num_reg
].reg
= reg
->native
.id
;
2231 sf
->reg
[sf
->num_reg
].semantic
= reg
->semantic
;
2232 sf
->reg
[sf
->num_reg
].num_components
= 4; // XXX reg->num_components;
2234 sobj
->output_count_per_semantic
[reg
->semantic
.Name
] =
2235 MAX2(reg
->semantic
.Index
+ 1,
2236 sobj
->output_count_per_semantic
[reg
->semantic
.Name
]);
2240 /* build two-level index for linking */
2241 build_output_index(sobj
);
2243 /* fill in "mystery meat" load balancing value. This value determines how
2244 * work is scheduled between VS and PS
2245 * in the unified shader architecture. More precisely, it is determined from
2246 * the number of VS outputs, as well as chip-specific
2247 * vertex output buffer size, vertex cache size, and the number of shader
2250 * XXX this is a conservative estimate, the "optimal" value is only known for
2251 * sure at link time because some
2252 * outputs may be unused and thus unmapped. Then again, in the general use
2253 * case with GLSL the vertex and fragment
2254 * shaders are linked already before submitting to Gallium, thus all outputs
2257 int half_out
= (c
->file
[TGSI_FILE_OUTPUT
].reg_size
+ 1) / 2;
2260 uint32_t b
= ((20480 / (c
->specs
->vertex_output_buffer_size
-
2261 2 * half_out
* c
->specs
->vertex_cache_size
)) +
2264 uint32_t a
= (b
+ 256 / (c
->specs
->shader_core_count
* half_out
)) / 2;
2265 sobj
->vs_load_balancing
= VIVS_VS_LOAD_BALANCING_A(MIN2(a
, 255)) |
2266 VIVS_VS_LOAD_BALANCING_B(MIN2(b
, 255)) |
2267 VIVS_VS_LOAD_BALANCING_C(0x3f) |
2268 VIVS_VS_LOAD_BALANCING_D(0x0f);
2272 etna_compile_check_limits(struct etna_compile
*c
)
2274 int max_uniforms
= (c
->info
.processor
== PIPE_SHADER_VERTEX
)
2275 ? c
->specs
->max_vs_uniforms
2276 : c
->specs
->max_ps_uniforms
;
2277 /* round up number of uniforms, including immediates, in units of four */
2278 int num_uniforms
= c
->imm_base
/ 4 + (c
->imm_size
+ 3) / 4;
2280 if (!c
->specs
->has_icache
&& c
->inst_ptr
> c
->specs
->max_instructions
) {
2281 DBG("Number of instructions (%d) exceeds maximum %d", c
->inst_ptr
,
2282 c
->specs
->max_instructions
);
2286 if (c
->next_free_native
> c
->specs
->max_registers
) {
2287 DBG("Number of registers (%d) exceeds maximum %d", c
->next_free_native
,
2288 c
->specs
->max_registers
);
2292 if (num_uniforms
> max_uniforms
) {
2293 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms
,
2298 if (c
->num_varyings
> c
->specs
->max_varyings
) {
2299 DBG("Number of varyings (%d) exceeds maximum %d", c
->num_varyings
,
2300 c
->specs
->max_varyings
);
2304 if (c
->imm_base
> c
->specs
->num_constants
) {
2305 DBG("Number of constants (%d) exceeds maximum %d", c
->imm_base
,
2306 c
->specs
->num_constants
);
2313 copy_uniform_state_to_shader(struct etna_compile
*c
, struct etna_shader_variant
*sobj
)
2315 uint32_t count
= c
->imm_base
+ c
->imm_size
;
2316 struct etna_shader_uniform_info
*uinfo
= &sobj
->uniforms
;
2318 uinfo
->imm_count
= count
;
2320 uinfo
->imm_data
= malloc(count
* sizeof(*c
->imm_data
));
2321 for (unsigned i
= 0; i
< c
->imm_base
; i
++)
2322 uinfo
->imm_data
[i
] = i
;
2323 memcpy(&uinfo
->imm_data
[c
->imm_base
], c
->imm_data
, c
->imm_size
* sizeof(*c
->imm_data
));
2325 uinfo
->imm_contents
= malloc(count
* sizeof(*c
->imm_contents
));
2326 for (unsigned i
= 0; i
< c
->imm_base
; i
++)
2327 uinfo
->imm_contents
[i
] = ETNA_IMMEDIATE_UNIFORM
;
2328 memcpy(&uinfo
->imm_contents
[c
->imm_base
], c
->imm_contents
, c
->imm_size
* sizeof(*c
->imm_contents
));
2330 etna_set_shader_uniforms_dirty_flags(sobj
);
2334 etna_compile_shader(struct etna_shader_variant
*v
)
2336 if (DBG_ENABLED(ETNA_DBG_NIR
))
2337 return etna_compile_shader_nir(v
);
2339 /* Create scratch space that may be too large to fit on stack
2342 struct etna_compile
*c
;
2347 const struct etna_specs
*specs
= v
->shader
->specs
;
2349 struct tgsi_lowering_config lconfig
= {
2350 .lower_FLR
= !specs
->has_sign_floor_ceil
,
2351 .lower_CEIL
= !specs
->has_sign_floor_ceil
,
2355 .lower_DP2
= !specs
->has_halti2_instructions
,
2356 .lower_TRUNC
= true,
2359 c
= CALLOC_STRUCT(etna_compile
);
2363 memset(&c
->lbl_usage
, -1, sizeof(c
->lbl_usage
));
2365 const struct tgsi_token
*tokens
= v
->shader
->tokens
;
2369 c
->tokens
= tgsi_transform_lowering(&lconfig
, tokens
, &c
->info
);
2370 c
->free_tokens
= !!c
->tokens
;
2376 /* Build a map from gallium register to native registers for files
2377 * CONST, SAMP, IMM, OUT, IN, TEMP.
2378 * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2381 /* Pass one -- check register file declarations and immediates */
2382 etna_compile_parse_declarations(c
);
2384 etna_allocate_decls(c
);
2386 /* Pass two -- check usage of temporaries, inputs, outputs */
2387 etna_compile_pass_check_usage(c
);
2389 assign_special_inputs(c
);
2391 /* Assign native temp register to TEMPs */
2392 assign_temporaries_to_native(c
, &c
->file
[TGSI_FILE_TEMPORARY
]);
2394 /* optimize outputs */
2395 etna_compile_pass_optimize_outputs(c
);
2397 /* assign inputs: last usage of input should be <= first usage of temp */
2398 /* potential optimization case:
2399 * if single MOV TEMP[y], IN[x] before which temp y is not used, and
2401 * is not read, temp[y] can be used as input register as-is
2403 /* sort temporaries by first use
2404 * sort inputs by last usage
2405 * iterate over inputs, temporaries
2406 * if last usage of input <= first usage of temp:
2407 * assign input to temp
2408 * advance input, temporary pointer
2410 * advance temporary pointer
2412 * potential problem: instruction with multiple inputs of which one is the
2413 * temp and the other is the input;
2414 * however, as the temp is not used before this, how would this make
2415 * sense? uninitialized temporaries have an undefined
2416 * value, so this would be ok
2418 assign_inouts_to_temporaries(c
, TGSI_FILE_INPUT
);
2420 /* assign outputs: first usage of output should be >= last usage of temp */
2421 /* potential optimization case:
2422 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2423 * writing all components that are used in
2424 * the shader) after which temp y is no longer used temp[y] can be
2425 * used as output register as-is
2427 * potential problem: instruction with multiple outputs of which one is the
2428 * temp and the other is the output;
2429 * however, as the temp is not used after this, how would this make
2430 * sense? could just discard the output value
2432 /* sort temporaries by last use
2433 * sort outputs by first usage
2434 * iterate over outputs, temporaries
2435 * if first usage of output >= last usage of temp:
2436 * assign output to temp
2437 * advance output, temporary pointer
2439 * advance temporary pointer
2441 assign_inouts_to_temporaries(c
, TGSI_FILE_OUTPUT
);
2443 assign_constants_and_immediates(c
);
2444 assign_texture_units(c
);
2446 /* list declarations */
2447 for (int x
= 0; x
< c
->total_decls
; ++x
) {
2448 DBG_F(ETNA_DBG_COMPILER_MSGS
, "%i: %s,%d active=%i first_use=%i "
2449 "last_use=%i native=%i usage_mask=%x "
2451 x
, tgsi_file_name(c
->decl
[x
].file
), c
->decl
[x
].idx
,
2452 c
->decl
[x
].active
, c
->decl
[x
].first_use
, c
->decl
[x
].last_use
,
2453 c
->decl
[x
].native
.valid
? c
->decl
[x
].native
.id
: -1,
2454 c
->decl
[x
].usage_mask
, c
->decl
[x
].has_semantic
);
2455 if (c
->decl
[x
].has_semantic
)
2456 DBG_F(ETNA_DBG_COMPILER_MSGS
, " semantic_name=%s semantic_idx=%i",
2457 tgsi_semantic_names
[c
->decl
[x
].semantic
.Name
],
2458 c
->decl
[x
].semantic
.Index
);
2460 /* XXX for PS we need to permute so that inputs are always in temporary
2462 * There is no "switchboard" for varyings (AFAIK!). The output color,
2463 * however, can be routed
2464 * from an arbitrary temporary.
2466 if (c
->info
.processor
== PIPE_SHADER_FRAGMENT
)
2467 permute_ps_inputs(c
);
2470 /* list declarations */
2471 for (int x
= 0; x
< c
->total_decls
; ++x
) {
2472 DBG_F(ETNA_DBG_COMPILER_MSGS
, "%i: %s,%d active=%i first_use=%i "
2473 "last_use=%i native=%i usage_mask=%x "
2475 x
, tgsi_file_name(c
->decl
[x
].file
), c
->decl
[x
].idx
,
2476 c
->decl
[x
].active
, c
->decl
[x
].first_use
, c
->decl
[x
].last_use
,
2477 c
->decl
[x
].native
.valid
? c
->decl
[x
].native
.id
: -1,
2478 c
->decl
[x
].usage_mask
, c
->decl
[x
].has_semantic
);
2479 if (c
->decl
[x
].has_semantic
)
2480 DBG_F(ETNA_DBG_COMPILER_MSGS
, " semantic_name=%s semantic_idx=%i",
2481 tgsi_semantic_names
[c
->decl
[x
].semantic
.Name
],
2482 c
->decl
[x
].semantic
.Index
);
2485 /* pass 3: generate instructions */
2486 etna_compile_pass_generate_code(c
);
2487 etna_compile_add_z_div_if_needed(c
);
2488 etna_compile_frag_rb_swap(c
);
2489 etna_compile_add_nop_if_needed(c
);
2491 ret
= etna_compile_check_limits(c
);
2495 etna_compile_fill_in_labels(c
);
2497 /* fill in output structure */
2498 v
->stage
= c
->info
.processor
== PIPE_SHADER_FRAGMENT
? MESA_SHADER_FRAGMENT
: MESA_SHADER_VERTEX
;
2499 v
->code_size
= c
->inst_ptr
* 4;
2500 v
->code
= mem_dup(c
->code
, c
->inst_ptr
* 16);
2501 v
->num_loops
= c
->num_loops
;
2502 v
->num_temps
= c
->next_free_native
;
2503 v
->vs_id_in_reg
= -1;
2504 v
->vs_pos_out_reg
= -1;
2505 v
->vs_pointsize_out_reg
= -1;
2506 v
->ps_color_out_reg
= -1;
2507 v
->ps_depth_out_reg
= -1;
2508 v
->needs_icache
= c
->inst_ptr
> c
->specs
->max_instructions
;
2509 copy_uniform_state_to_shader(c
, v
);
2511 if (c
->info
.processor
== PIPE_SHADER_VERTEX
) {
2512 fill_in_vs_inputs(v
, c
);
2513 fill_in_vs_outputs(v
, c
);
2514 } else if (c
->info
.processor
== PIPE_SHADER_FRAGMENT
) {
2515 fill_in_ps_inputs(v
, c
);
2516 fill_in_ps_outputs(v
, c
);
2521 FREE((void *)c
->tokens
);
2529 extern const char *tgsi_swizzle_names
[];
2531 etna_dump_shader(const struct etna_shader_variant
*shader
)
2533 if (shader
->stage
== MESA_SHADER_VERTEX
)
2539 etna_disasm(shader
->code
, shader
->code_size
, PRINT_RAW
);
2541 printf("num loops: %i\n", shader
->num_loops
);
2542 printf("num temps: %i\n", shader
->num_temps
);
2543 printf("immediates:\n");
2544 for (int idx
= 0; idx
< shader
->uniforms
.imm_count
; ++idx
) {
2545 printf(" [%i].%s = %f (0x%08x) (%d)\n",
2547 tgsi_swizzle_names
[idx
% 4],
2548 *((float *)&shader
->uniforms
.imm_data
[idx
]),
2549 shader
->uniforms
.imm_data
[idx
],
2550 shader
->uniforms
.imm_contents
[idx
]);
2552 printf("inputs:\n");
2553 for (int idx
= 0; idx
< shader
->infile
.num_reg
; ++idx
) {
2554 printf(" [%i] name=%s index=%i comps=%i\n", shader
->infile
.reg
[idx
].reg
,
2555 tgsi_semantic_names
[shader
->infile
.reg
[idx
].semantic
.Name
],
2556 shader
->infile
.reg
[idx
].semantic
.Index
,
2557 shader
->infile
.reg
[idx
].num_components
);
2559 printf("outputs:\n");
2560 for (int idx
= 0; idx
< shader
->outfile
.num_reg
; ++idx
) {
2561 printf(" [%i] name=%s index=%i comps=%i\n", shader
->outfile
.reg
[idx
].reg
,
2562 tgsi_semantic_names
[shader
->outfile
.reg
[idx
].semantic
.Name
],
2563 shader
->outfile
.reg
[idx
].semantic
.Index
,
2564 shader
->outfile
.reg
[idx
].num_components
);
2566 printf("special:\n");
2567 if (shader
->stage
== MESA_SHADER_VERTEX
) {
2568 printf(" vs_pos_out_reg=%i\n", shader
->vs_pos_out_reg
);
2569 printf(" vs_pointsize_out_reg=%i\n", shader
->vs_pointsize_out_reg
);
2570 printf(" vs_load_balancing=0x%08x\n", shader
->vs_load_balancing
);
2572 printf(" ps_color_out_reg=%i\n", shader
->ps_color_out_reg
);
2573 printf(" ps_depth_out_reg=%i\n", shader
->ps_depth_out_reg
);
2575 printf(" input_count_unk8=0x%08x\n", shader
->input_count_unk8
);
2579 etna_destroy_shader(struct etna_shader_variant
*shader
)
2584 FREE(shader
->uniforms
.imm_data
);
2585 FREE(shader
->uniforms
.imm_contents
);
2586 FREE(shader
->output_per_semantic_list
);
2590 static const struct etna_shader_inout
*
2591 etna_shader_vs_lookup(const struct etna_shader_variant
*sobj
,
2592 const struct etna_shader_inout
*in
)
2594 if (in
->semantic
.Index
< sobj
->output_count_per_semantic
[in
->semantic
.Name
])
2595 return sobj
->output_per_semantic
[in
->semantic
.Name
][in
->semantic
.Index
];
2601 etna_link_shader(struct etna_shader_link_info
*info
,
2602 const struct etna_shader_variant
*vs
, const struct etna_shader_variant
*fs
)
2605 /* For each fragment input we need to find the associated vertex shader
2606 * output, which can be found by matching on semantic name and index. A
2607 * binary search could be used because the vs outputs are sorted by their
2608 * semantic index and grouped by semantic type by fill_in_vs_outputs.
2610 assert(fs
->infile
.num_reg
< ETNA_NUM_INPUTS
);
2611 info
->pcoord_varying_comp_ofs
= -1;
2613 for (int idx
= 0; idx
< fs
->infile
.num_reg
; ++idx
) {
2614 const struct etna_shader_inout
*fsio
= &fs
->infile
.reg
[idx
];
2615 const struct etna_shader_inout
*vsio
= etna_shader_vs_lookup(vs
, fsio
);
2616 struct etna_varying
*varying
;
2617 bool interpolate_always
= fsio
->semantic
.Name
!= TGSI_SEMANTIC_COLOR
;
2619 assert(fsio
->reg
> 0 && fsio
->reg
<= ARRAY_SIZE(info
->varyings
));
2621 if (fsio
->reg
> info
->num_varyings
)
2622 info
->num_varyings
= fsio
->reg
;
2624 varying
= &info
->varyings
[fsio
->reg
- 1];
2625 varying
->num_components
= fsio
->num_components
;
2627 if (!interpolate_always
) /* colors affected by flat shading */
2628 varying
->pa_attributes
= 0x200;
2629 else /* texture coord or other bypasses flat shading */
2630 varying
->pa_attributes
= 0x2f1;
2632 varying
->use
[0] = VARYING_COMPONENT_USE_UNUSED
;
2633 varying
->use
[1] = VARYING_COMPONENT_USE_UNUSED
;
2634 varying
->use
[2] = VARYING_COMPONENT_USE_UNUSED
;
2635 varying
->use
[3] = VARYING_COMPONENT_USE_UNUSED
;
2637 /* point coord is an input to the PS without matching VS output,
2638 * so it gets a varying slot without being assigned a VS register.
2640 if (fsio
->semantic
.Name
== TGSI_SEMANTIC_PCOORD
) {
2641 varying
->use
[0] = VARYING_COMPONENT_USE_POINTCOORD_X
;
2642 varying
->use
[1] = VARYING_COMPONENT_USE_POINTCOORD_Y
;
2644 info
->pcoord_varying_comp_ofs
= comp_ofs
;
2646 if (vsio
== NULL
) { /* not found -- link error */
2647 BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio
->semantic
.Name
, fsio
->semantic
.Index
);
2651 varying
->reg
= vsio
->reg
;
2654 comp_ofs
+= varying
->num_components
;
2657 assert(info
->num_varyings
== fs
->infile
.num_reg
);