1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
39 #include "freedreno_program.h"
40 #include "freedreno_compiler.h"
41 #include "freedreno_util.h"
43 #include "instr-a2xx.h"
46 struct fd_compile_context
{
47 struct fd_program_stateobj
*prog
;
48 struct fd_shader_stateobj
*so
;
50 struct tgsi_parse_context parser
;
53 /* predicate stack: */
55 enum ir2_pred pred_stack
[8];
57 /* Internal-Temporary and Predicate register assignment:
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers, which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * And some instructions (texture fetch) cannot write directly to
63 * output registers. We could be more clever and re-use dst or a
64 * src register in some cases. But for now don't try to be clever.
65 * Eventually we should implement an optimization pass that re-
66 * juggles the register usage and gets rid of unneeded temporaries.
68 * The predicate register must be valid across multiple TGSI
69 * instructions, but internal temporary's do not. For this reason,
70 * once the predicate register is requested, until it is no longer
71 * needed, it gets the first register slot after after the TGSI
72 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
73 * internal temporaries get the register slots above this.
77 int num_internal_temps
;
79 uint8_t num_regs
[TGSI_FILE_COUNT
];
81 /* maps input register idx to prog->export_linkage idx: */
82 uint8_t input_export_idx
[64];
84 /* maps output register idx to prog->export_linkage idx: */
85 uint8_t output_export_idx
[64];
87 /* idx/slot for last compiler generated immediate */
88 unsigned immediate_idx
;
90 // TODO we can skip emit exports in the VS that the FS doesn't need..
91 // and get rid perhaps of num_param..
92 unsigned num_position
, num_param
;
93 unsigned position
, psize
;
97 /* current exec CF instruction */
102 semantic_idx(struct tgsi_declaration_semantic
*semantic
)
104 int idx
= semantic
->Name
;
105 if (idx
== TGSI_SEMANTIC_GENERIC
)
106 idx
= TGSI_SEMANTIC_COUNT
+ semantic
->Index
;
110 /* assign/get the input/export register # for given semantic idx as
111 * returned by semantic_idx():
114 export_linkage(struct fd_compile_context
*ctx
, int idx
)
116 struct fd_program_stateobj
*prog
= ctx
->prog
;
118 /* if first time we've seen this export, assign the next available slot: */
119 if (prog
->export_linkage
[idx
] == 0xff)
120 prog
->export_linkage
[idx
] = prog
->num_exports
++;
122 return prog
->export_linkage
[idx
];
126 compile_init(struct fd_compile_context
*ctx
, struct fd_program_stateobj
*prog
,
127 struct fd_shader_stateobj
*so
)
136 ret
= tgsi_parse_init(&ctx
->parser
, so
->tokens
);
137 if (ret
!= TGSI_PARSE_OK
)
140 ctx
->type
= ctx
->parser
.FullHeader
.Processor
.Processor
;
143 ctx
->num_position
= 0;
146 ctx
->immediate_idx
= 0;
148 ctx
->num_internal_temps
= 0;
150 memset(ctx
->num_regs
, 0, sizeof(ctx
->num_regs
));
151 memset(ctx
->input_export_idx
, 0, sizeof(ctx
->input_export_idx
));
152 memset(ctx
->output_export_idx
, 0, sizeof(ctx
->output_export_idx
));
154 /* do first pass to extract declarations: */
155 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
156 tgsi_parse_token(&ctx
->parser
);
158 switch (ctx
->parser
.FullToken
.Token
.Type
) {
159 case TGSI_TOKEN_TYPE_DECLARATION
: {
160 struct tgsi_full_declaration
*decl
=
161 &ctx
->parser
.FullToken
.FullDeclaration
;
162 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
) {
163 unsigned name
= decl
->Semantic
.Name
;
165 assert(decl
->Declaration
.Semantic
); // TODO is this ever not true?
167 ctx
->output_export_idx
[decl
->Range
.First
] =
168 semantic_idx(&decl
->Semantic
);
170 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
172 case TGSI_SEMANTIC_POSITION
:
173 ctx
->position
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
176 case TGSI_SEMANTIC_PSIZE
:
177 ctx
->psize
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
179 case TGSI_SEMANTIC_COLOR
:
180 case TGSI_SEMANTIC_GENERIC
:
184 DBG("unknown VS semantic name: %s",
185 tgsi_semantic_names
[name
]);
190 case TGSI_SEMANTIC_COLOR
:
191 case TGSI_SEMANTIC_GENERIC
:
195 DBG("unknown PS semantic name: %s",
196 tgsi_semantic_names
[name
]);
200 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
201 ctx
->input_export_idx
[decl
->Range
.First
] =
202 semantic_idx(&decl
->Semantic
);
204 ctx
->num_regs
[decl
->Declaration
.File
] =
205 MAX2(ctx
->num_regs
[decl
->Declaration
.File
], decl
->Range
.Last
+ 1);
208 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
209 struct tgsi_full_immediate
*imm
=
210 &ctx
->parser
.FullToken
.FullImmediate
;
211 unsigned n
= ctx
->so
->num_immediates
++;
212 memcpy(ctx
->so
->immediates
[n
].val
, imm
->u
, 16);
220 /* TGSI generated immediates are always entire vec4's, ones we
221 * generate internally are not:
223 ctx
->immediate_idx
= ctx
->so
->num_immediates
* 4;
225 ctx
->so
->first_immediate
= ctx
->num_regs
[TGSI_FILE_CONSTANT
];
227 tgsi_parse_free(&ctx
->parser
);
229 return tgsi_parse_init(&ctx
->parser
, so
->tokens
);
233 compile_free(struct fd_compile_context
*ctx
)
235 tgsi_parse_free(&ctx
->parser
);
238 static struct ir2_cf
*
239 next_exec_cf(struct fd_compile_context
*ctx
)
241 struct ir2_cf
*cf
= ctx
->cf
;
242 if (!cf
|| cf
->exec
.instrs_count
>= ARRAY_SIZE(ctx
->cf
->exec
.instrs
))
243 ctx
->cf
= cf
= ir2_cf_create(ctx
->so
->ir
, EXEC
);
248 compile_vtx_fetch(struct fd_compile_context
*ctx
)
250 struct ir2_instruction
**vfetch_instrs
= ctx
->so
->vfetch_instrs
;
252 for (i
= 0; i
< ctx
->num_regs
[TGSI_FILE_INPUT
]; i
++) {
253 struct ir2_instruction
*instr
= ir2_instr_create(
254 next_exec_cf(ctx
), IR2_FETCH
);
255 instr
->fetch
.opc
= VTX_FETCH
;
257 ctx
->need_sync
|= 1 << (i
+1);
259 ir2_reg_create(instr
, i
+1, "xyzw", 0);
260 ir2_reg_create(instr
, 0, "x", 0);
265 vfetch_instrs
[i
] = instr
;
267 ctx
->so
->num_vfetch_instrs
= i
;
272 * For vertex shaders (VS):
273 * --- ------ -------------
275 * Inputs: R1-R(num_input)
276 * Constants: C0-C(num_const-1)
277 * Immediates: C(num_const)-C(num_const+num_imm-1)
278 * Outputs: export0-export(n) and export62, export63
279 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
280 * Temps: R(num_input+1)-R(num_input+num_temps)
282 * R0 could be clobbered after the vertex fetch instructions.. so we
283 * could use it for one of the temporaries.
285 * TODO: maybe the vertex fetch part could fetch first input into R0 as
286 * the last vtx fetch instruction, which would let us use the same
287 * register layout in either case.. although this is not what the blob
291 * For frag shaders (PS):
292 * --- ---- -------------
294 * Inputs: R0-R(num_input-1)
295 * Constants: same as VS
296 * Immediates: same as VS
297 * Outputs: export0-export(num_outputs)
298 * Temps: R(num_input)-R(num_input+num_temps-1)
300 * In either case, immediates are are postpended to the constants
306 get_temp_gpr(struct fd_compile_context
*ctx
, int idx
)
308 unsigned num
= idx
+ ctx
->num_regs
[TGSI_FILE_INPUT
];
309 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
)
314 static struct ir2_register
*
315 add_dst_reg(struct fd_compile_context
*ctx
, struct ir2_instruction
*alu
,
316 const struct tgsi_dst_register
*dst
)
318 unsigned flags
= 0, num
= 0;
322 case TGSI_FILE_OUTPUT
:
323 flags
|= IR2_REG_EXPORT
;
324 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
325 if (dst
->Index
== ctx
->position
) {
327 } else if (dst
->Index
== ctx
->psize
) {
330 num
= export_linkage(ctx
,
331 ctx
->output_export_idx
[dst
->Index
]);
337 case TGSI_FILE_TEMPORARY
:
338 num
= get_temp_gpr(ctx
, dst
->Index
);
341 DBG("unsupported dst register file: %s",
342 tgsi_file_name(dst
->File
));
347 swiz
[0] = (dst
->WriteMask
& TGSI_WRITEMASK_X
) ? 'x' : '_';
348 swiz
[1] = (dst
->WriteMask
& TGSI_WRITEMASK_Y
) ? 'y' : '_';
349 swiz
[2] = (dst
->WriteMask
& TGSI_WRITEMASK_Z
) ? 'z' : '_';
350 swiz
[3] = (dst
->WriteMask
& TGSI_WRITEMASK_W
) ? 'w' : '_';
353 return ir2_reg_create(alu
, num
, swiz
, flags
);
356 static struct ir2_register
*
357 add_src_reg(struct fd_compile_context
*ctx
, struct ir2_instruction
*alu
,
358 const struct tgsi_src_register
*src
)
360 static const char swiz_vals
[] = {
364 unsigned flags
= 0, num
= 0;
367 case TGSI_FILE_CONSTANT
:
369 flags
|= IR2_REG_CONST
;
371 case TGSI_FILE_INPUT
:
372 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
373 num
= src
->Index
+ 1;
375 num
= export_linkage(ctx
,
376 ctx
->input_export_idx
[src
->Index
]);
379 case TGSI_FILE_TEMPORARY
:
380 num
= get_temp_gpr(ctx
, src
->Index
);
382 case TGSI_FILE_IMMEDIATE
:
383 num
= src
->Index
+ ctx
->num_regs
[TGSI_FILE_CONSTANT
];
384 flags
|= IR2_REG_CONST
;
387 DBG("unsupported src register file: %s",
388 tgsi_file_name(src
->File
));
394 flags
|= IR2_REG_ABS
;
396 flags
|= IR2_REG_NEGATE
;
398 swiz
[0] = swiz_vals
[src
->SwizzleX
];
399 swiz
[1] = swiz_vals
[src
->SwizzleY
];
400 swiz
[2] = swiz_vals
[src
->SwizzleZ
];
401 swiz
[3] = swiz_vals
[src
->SwizzleW
];
404 if ((ctx
->need_sync
& (uint64_t)(1 << num
)) &&
405 !(flags
& IR2_REG_CONST
)) {
407 ctx
->need_sync
&= ~(uint64_t)(1 << num
);
410 return ir2_reg_create(alu
, num
, swiz
, flags
);
414 add_vector_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
416 switch (inst
->Instruction
.Saturate
) {
419 case TGSI_SAT_ZERO_ONE
:
420 alu
->alu
.vector_clamp
= true;
422 case TGSI_SAT_MINUS_PLUS_ONE
:
423 DBG("unsupported saturate");
430 add_scalar_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
432 switch (inst
->Instruction
.Saturate
) {
435 case TGSI_SAT_ZERO_ONE
:
436 alu
->alu
.scalar_clamp
= true;
438 case TGSI_SAT_MINUS_PLUS_ONE
:
439 DBG("unsupported saturate");
446 add_regs_vector_1(struct fd_compile_context
*ctx
,
447 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
449 assert(inst
->Instruction
.NumSrcRegs
== 1);
450 assert(inst
->Instruction
.NumDstRegs
== 1);
452 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
453 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
454 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
455 add_vector_clamp(inst
, alu
);
459 add_regs_vector_2(struct fd_compile_context
*ctx
,
460 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
462 assert(inst
->Instruction
.NumSrcRegs
== 2);
463 assert(inst
->Instruction
.NumDstRegs
== 1);
465 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
466 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
467 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
468 add_vector_clamp(inst
, alu
);
472 add_regs_vector_3(struct fd_compile_context
*ctx
,
473 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
475 assert(inst
->Instruction
.NumSrcRegs
== 3);
476 assert(inst
->Instruction
.NumDstRegs
== 1);
478 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
479 /* maybe should re-arrange the syntax some day, but
480 * in assembler/disassembler and what ir.c expects
481 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
483 add_src_reg(ctx
, alu
, &inst
->Src
[2].Register
);
484 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
485 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
486 add_vector_clamp(inst
, alu
);
490 add_regs_dummy_vector(struct ir2_instruction
*alu
)
492 /* create dummy, non-written vector dst/src regs
493 * for unused vector instr slot:
495 ir2_reg_create(alu
, 0, "____", 0); /* vector dst */
496 ir2_reg_create(alu
, 0, NULL
, 0); /* vector src1 */
497 ir2_reg_create(alu
, 0, NULL
, 0); /* vector src2 */
501 add_regs_scalar_1(struct fd_compile_context
*ctx
,
502 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
504 assert(inst
->Instruction
.NumSrcRegs
== 1);
505 assert(inst
->Instruction
.NumDstRegs
== 1);
507 add_regs_dummy_vector(alu
);
509 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
510 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
511 add_scalar_clamp(inst
, alu
);
515 * Helpers for TGSI instructions that don't map to a single shader instr:
519 src_from_dst(struct tgsi_src_register
*src
, struct tgsi_dst_register
*dst
)
521 src
->File
= dst
->File
;
522 src
->Indirect
= dst
->Indirect
;
523 src
->Dimension
= dst
->Dimension
;
524 src
->Index
= dst
->Index
;
527 src
->SwizzleX
= TGSI_SWIZZLE_X
;
528 src
->SwizzleY
= TGSI_SWIZZLE_Y
;
529 src
->SwizzleZ
= TGSI_SWIZZLE_Z
;
530 src
->SwizzleW
= TGSI_SWIZZLE_W
;
533 /* Get internal-temp src/dst to use for a sequence of instructions
534 * generated by a single TGSI op.. if possible, use the final dst
535 * register as the temporary to avoid allocating a new register, but
536 * if necessary allocate one. If a single TGSI op needs multiple
537 * internal temps, pass NULL for orig_dst for all but the first one
538 * so that you don't end up using the same register for all your
542 get_internal_temp(struct fd_compile_context
*ctx
,
543 struct tgsi_dst_register
*tmp_dst
,
544 struct tgsi_src_register
*tmp_src
)
548 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
549 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
550 tmp_dst
->Indirect
= 0;
551 tmp_dst
->Dimension
= 0;
553 /* assign next temporary: */
554 n
= ctx
->num_internal_temps
++;
555 if (ctx
->pred_reg
!= -1)
558 tmp_dst
->Index
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
] + n
;
560 src_from_dst(tmp_src
, tmp_dst
);
564 get_predicate(struct fd_compile_context
*ctx
, struct tgsi_dst_register
*dst
,
565 struct tgsi_src_register
*src
)
567 assert(ctx
->pred_reg
!= -1);
569 dst
->File
= TGSI_FILE_TEMPORARY
;
570 dst
->WriteMask
= TGSI_WRITEMASK_W
;
573 dst
->Index
= get_temp_gpr(ctx
, ctx
->pred_reg
);
576 src_from_dst(src
, dst
);
577 src
->SwizzleX
= TGSI_SWIZZLE_W
;
578 src
->SwizzleY
= TGSI_SWIZZLE_W
;
579 src
->SwizzleZ
= TGSI_SWIZZLE_W
;
580 src
->SwizzleW
= TGSI_SWIZZLE_W
;
585 push_predicate(struct fd_compile_context
*ctx
, struct tgsi_src_register
*src
)
587 struct ir2_instruction
*alu
;
588 struct tgsi_dst_register pred_dst
;
590 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
595 if (ctx
->pred_depth
== 0) {
596 /* assign predicate register: */
597 ctx
->pred_reg
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
];
599 get_predicate(ctx
, &pred_dst
, NULL
);
601 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SETNEs
);
602 add_regs_dummy_vector(alu
);
603 add_dst_reg(ctx
, alu
, &pred_dst
);
604 add_src_reg(ctx
, alu
, src
);
606 struct tgsi_src_register pred_src
;
608 get_predicate(ctx
, &pred_dst
, &pred_src
);
610 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
611 add_dst_reg(ctx
, alu
, &pred_dst
);
612 add_src_reg(ctx
, alu
, &pred_src
);
613 add_src_reg(ctx
, alu
, src
);
615 // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
616 // sure src reg is valid if it was calculated with a predicate
618 alu
->pred
= IR2_PRED_NONE
;
621 /* save previous pred state to restore in pop_predicate(): */
622 ctx
->pred_stack
[ctx
->pred_depth
++] = ctx
->so
->ir
->pred
;
628 pop_predicate(struct fd_compile_context
*ctx
)
630 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
635 /* restore previous predicate state: */
636 ctx
->so
->ir
->pred
= ctx
->pred_stack
[--ctx
->pred_depth
];
638 if (ctx
->pred_depth
!= 0) {
639 struct ir2_instruction
*alu
;
640 struct tgsi_dst_register pred_dst
;
641 struct tgsi_src_register pred_src
;
643 get_predicate(ctx
, &pred_dst
, &pred_src
);
645 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SET_POPs
);
646 add_regs_dummy_vector(alu
);
647 add_dst_reg(ctx
, alu
, &pred_dst
);
648 add_src_reg(ctx
, alu
, &pred_src
);
649 alu
->pred
= IR2_PRED_NONE
;
651 /* predicate register no longer needed: */
659 get_immediate(struct fd_compile_context
*ctx
,
660 struct tgsi_src_register
*reg
, uint32_t val
)
662 unsigned neg
, swiz
, idx
, i
;
663 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
664 static const unsigned swiz2tgsi
[] = {
665 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
668 for (i
= 0; i
< ctx
->immediate_idx
; i
++) {
672 if (ctx
->so
->immediates
[idx
].val
[swiz
] == val
) {
677 if (ctx
->so
->immediates
[idx
].val
[swiz
] == -val
) {
683 if (i
== ctx
->immediate_idx
) {
684 /* need to generate a new immediate: */
688 ctx
->so
->immediates
[idx
].val
[swiz
] = val
;
689 ctx
->so
->num_immediates
= idx
+ 1;
690 ctx
->immediate_idx
++;
693 reg
->File
= TGSI_FILE_IMMEDIATE
;
699 reg
->SwizzleX
= swiz2tgsi
[swiz
];
700 reg
->SwizzleY
= swiz2tgsi
[swiz
];
701 reg
->SwizzleZ
= swiz2tgsi
[swiz
];
702 reg
->SwizzleW
= swiz2tgsi
[swiz
];
705 /* POW(a,b) = EXP2(b * LOG2(a)) */
707 translate_pow(struct fd_compile_context
*ctx
,
708 struct tgsi_full_instruction
*inst
)
710 struct tgsi_dst_register tmp_dst
;
711 struct tgsi_src_register tmp_src
;
712 struct ir2_instruction
*alu
;
714 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
716 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, LOG_CLAMP
);
717 add_regs_dummy_vector(alu
);
718 add_dst_reg(ctx
, alu
, &tmp_dst
);
719 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
721 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
722 add_dst_reg(ctx
, alu
, &tmp_dst
);
723 add_src_reg(ctx
, alu
, &tmp_src
);
724 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
726 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
727 * coded to take their input from the w component.
729 switch(inst
->Dst
[0].Register
.WriteMask
) {
730 case TGSI_WRITEMASK_X
:
731 tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
733 case TGSI_WRITEMASK_Y
:
734 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Y
;
736 case TGSI_WRITEMASK_Z
:
737 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Z
;
739 case TGSI_WRITEMASK_W
:
740 tmp_src
.SwizzleW
= TGSI_SWIZZLE_W
;
743 DBG("invalid writemask!");
748 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, EXP_IEEE
);
749 add_regs_dummy_vector(alu
);
750 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
751 add_src_reg(ctx
, alu
, &tmp_src
);
752 add_scalar_clamp(inst
, alu
);
756 translate_tex(struct fd_compile_context
*ctx
,
757 struct tgsi_full_instruction
*inst
, unsigned opc
)
759 struct ir2_instruction
*instr
;
760 struct ir2_register
*reg
;
761 struct tgsi_dst_register tmp_dst
;
762 struct tgsi_src_register tmp_src
;
763 const struct tgsi_src_register
*coord
;
764 bool using_temp
= (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) ||
765 (inst
->Instruction
.Saturate
!= TGSI_SAT_NONE
);
768 if (using_temp
|| (opc
== TGSI_OPCODE_TXP
))
769 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
771 if (opc
== TGSI_OPCODE_TXP
) {
772 static const char *swiz
[] = {
773 [TGSI_SWIZZLE_X
] = "xxxx",
774 [TGSI_SWIZZLE_Y
] = "yyyy",
775 [TGSI_SWIZZLE_Z
] = "zzzz",
776 [TGSI_SWIZZLE_W
] = "wwww",
779 /* TXP - Projective Texture Lookup:
781 * coord.x = src0.x / src.w
782 * coord.y = src0.y / src.w
783 * coord.z = src0.z / src.w
787 * dst = texture_sample(unit, coord, bias)
789 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MAXv
, RECIP_IEEE
);
792 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "___w";
793 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
794 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
797 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "x___";
798 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->swizzle
=
799 swiz
[inst
->Src
[0].Register
.SwizzleW
];
801 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
802 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "xyz_";
803 add_src_reg(ctx
, instr
, &tmp_src
)->swizzle
= "xxxx";
804 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
808 coord
= &inst
->Src
[0].Register
;
811 instr
= ir2_instr_create(next_exec_cf(ctx
), IR2_FETCH
);
812 instr
->fetch
.opc
= TEX_FETCH
;
813 instr
->fetch
.is_cube
= (inst
->Texture
.Texture
== TGSI_TEXTURE_3D
);
814 assert(inst
->Texture
.NumOffsets
<= 1); // TODO what to do in other cases?
816 /* save off the tex fetch to be patched later with correct const_idx: */
817 idx
= ctx
->so
->num_tfetch_instrs
++;
818 ctx
->so
->tfetch_instrs
[idx
].samp_id
= inst
->Src
[1].Register
.Index
;
819 ctx
->so
->tfetch_instrs
[idx
].instr
= instr
;
821 add_dst_reg(ctx
, instr
, using_temp
? &tmp_dst
: &inst
->Dst
[0].Register
);
822 reg
= add_src_reg(ctx
, instr
, coord
);
824 /* blob compiler always sets 3rd component to same as 1st for 2d: */
825 if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D
)
826 reg
->swizzle
[2] = reg
->swizzle
[0];
828 /* dst register needs to be marked for sync: */
829 ctx
->need_sync
|= 1 << instr
->regs
[0]->num
;
831 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
835 /* texture fetch can't write directly to export, so if tgsi
836 * is telling us the dst register is in output file, we load
837 * the texture to a temp and the use ALU instruction to move
840 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MAXv
, ~0);
842 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
843 add_src_reg(ctx
, instr
, &tmp_src
);
844 add_src_reg(ctx
, instr
, &tmp_src
);
845 add_vector_clamp(inst
, instr
);
849 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
850 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
852 translate_sge_slt(struct fd_compile_context
*ctx
,
853 struct tgsi_full_instruction
*inst
, unsigned opc
)
855 struct ir2_instruction
*instr
;
856 struct tgsi_dst_register tmp_dst
;
857 struct tgsi_src_register tmp_src
;
858 struct tgsi_src_register tmp_const
;
864 case TGSI_OPCODE_SGE
:
868 case TGSI_OPCODE_SLT
:
874 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
876 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
877 add_dst_reg(ctx
, instr
, &tmp_dst
);
878 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
879 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
881 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), CNDGTEv
, ~0);
882 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
883 /* maybe should re-arrange the syntax some day, but
884 * in assembler/disassembler and what ir.c expects
885 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
887 get_immediate(ctx
, &tmp_const
, fui(c0
));
888 add_src_reg(ctx
, instr
, &tmp_const
);
889 add_src_reg(ctx
, instr
, &tmp_src
);
890 get_immediate(ctx
, &tmp_const
, fui(c1
));
891 add_src_reg(ctx
, instr
, &tmp_const
);
894 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
896 translate_lrp(struct fd_compile_context
*ctx
,
897 struct tgsi_full_instruction
*inst
,
900 struct ir2_instruction
*instr
;
901 struct tgsi_dst_register tmp_dst1
, tmp_dst2
;
902 struct tgsi_src_register tmp_src1
, tmp_src2
;
903 struct tgsi_src_register tmp_const
;
905 get_internal_temp(ctx
, &tmp_dst1
, &tmp_src1
);
906 get_internal_temp(ctx
, &tmp_dst2
, &tmp_src2
);
908 get_immediate(ctx
, &tmp_const
, fui(1.0));
911 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
912 add_dst_reg(ctx
, instr
, &tmp_dst1
);
913 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
914 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
917 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
918 add_dst_reg(ctx
, instr
, &tmp_dst2
);
919 add_src_reg(ctx
, instr
, &tmp_const
);
920 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
922 /* tmp2 = tmp2 * c */
923 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
924 add_dst_reg(ctx
, instr
, &tmp_dst2
);
925 add_src_reg(ctx
, instr
, &tmp_src2
);
926 add_src_reg(ctx
, instr
, &inst
->Src
[2].Register
);
928 /* dst = tmp1 + tmp2 */
929 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
930 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
931 add_src_reg(ctx
, instr
, &tmp_src1
);
932 add_src_reg(ctx
, instr
, &tmp_src2
);
936 translate_trig(struct fd_compile_context
*ctx
,
937 struct tgsi_full_instruction
*inst
,
940 struct ir2_instruction
*instr
;
941 struct tgsi_dst_register tmp_dst
;
942 struct tgsi_src_register tmp_src
;
943 struct tgsi_src_register tmp_const
;
944 instr_scalar_opc_t op
;
949 case TGSI_OPCODE_SIN
:
952 case TGSI_OPCODE_COS
:
957 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
959 tmp_dst
.WriteMask
= TGSI_WRITEMASK_X
;
960 tmp_src
.SwizzleX
= tmp_src
.SwizzleY
=
961 tmp_src
.SwizzleZ
= tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
963 /* maybe should re-arrange the syntax some day, but
964 * in assembler/disassembler and what ir.c expects
965 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
967 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
968 add_dst_reg(ctx
, instr
, &tmp_dst
);
969 get_immediate(ctx
, &tmp_const
, fui(0.5));
970 add_src_reg(ctx
, instr
, &tmp_const
);
971 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
972 get_immediate(ctx
, &tmp_const
, fui(0.159155));
973 add_src_reg(ctx
, instr
, &tmp_const
);
975 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), FRACv
, ~0);
976 add_dst_reg(ctx
, instr
, &tmp_dst
);
977 add_src_reg(ctx
, instr
, &tmp_src
);
978 add_src_reg(ctx
, instr
, &tmp_src
);
980 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
981 add_dst_reg(ctx
, instr
, &tmp_dst
);
982 get_immediate(ctx
, &tmp_const
, fui(-3.141593));
983 add_src_reg(ctx
, instr
, &tmp_const
);
984 add_src_reg(ctx
, instr
, &tmp_src
);
985 get_immediate(ctx
, &tmp_const
, fui(6.283185));
986 add_src_reg(ctx
, instr
, &tmp_const
);
988 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, op
);
989 add_regs_dummy_vector(instr
);
990 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
991 add_src_reg(ctx
, instr
, &tmp_src
);
995 * Main part of compiler/translator:
999 translate_instruction(struct fd_compile_context
*ctx
,
1000 struct tgsi_full_instruction
*inst
)
1002 unsigned opc
= inst
->Instruction
.Opcode
;
1003 struct ir2_instruction
*instr
;
1004 static struct ir2_cf
*cf
;
1006 if (opc
== TGSI_OPCODE_END
)
1009 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
1010 unsigned num
= inst
->Dst
[0].Register
.Index
;
1011 /* seems like we need to ensure that position vs param/pixel
1012 * exports don't end up in the same EXEC clause.. easy way
1013 * to do this is force a new EXEC clause on first appearance
1014 * of an position or param/pixel export.
1016 if ((num
== ctx
->position
) || (num
== ctx
->psize
)) {
1017 if (ctx
->num_position
> 0) {
1019 ir2_cf_create_alloc(ctx
->so
->ir
, SQ_POSITION
,
1020 ctx
->num_position
- 1);
1021 ctx
->num_position
= 0;
1024 if (ctx
->num_param
> 0) {
1026 ir2_cf_create_alloc(ctx
->so
->ir
, SQ_PARAMETER_PIXEL
,
1027 ctx
->num_param
- 1);
1033 cf
= next_exec_cf(ctx
);
1035 /* TODO turn this into a table: */
1037 case TGSI_OPCODE_MOV
:
1038 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1039 add_regs_vector_1(ctx
, inst
, instr
);
1041 case TGSI_OPCODE_RCP
:
1042 instr
= ir2_instr_create_alu(cf
, ~0, RECIP_IEEE
);
1043 add_regs_scalar_1(ctx
, inst
, instr
);
1045 case TGSI_OPCODE_RSQ
:
1046 instr
= ir2_instr_create_alu(cf
, ~0, RECIPSQ_IEEE
);
1047 add_regs_scalar_1(ctx
, inst
, instr
);
1049 case TGSI_OPCODE_MUL
:
1050 instr
= ir2_instr_create_alu(cf
, MULv
, ~0);
1051 add_regs_vector_2(ctx
, inst
, instr
);
1053 case TGSI_OPCODE_ADD
:
1054 instr
= ir2_instr_create_alu(cf
, ADDv
, ~0);
1055 add_regs_vector_2(ctx
, inst
, instr
);
1057 case TGSI_OPCODE_DP3
:
1058 instr
= ir2_instr_create_alu(cf
, DOT3v
, ~0);
1059 add_regs_vector_2(ctx
, inst
, instr
);
1061 case TGSI_OPCODE_DP4
:
1062 instr
= ir2_instr_create_alu(cf
, DOT4v
, ~0);
1063 add_regs_vector_2(ctx
, inst
, instr
);
1065 case TGSI_OPCODE_MIN
:
1066 instr
= ir2_instr_create_alu(cf
, MINv
, ~0);
1067 add_regs_vector_2(ctx
, inst
, instr
);
1069 case TGSI_OPCODE_MAX
:
1070 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1071 add_regs_vector_2(ctx
, inst
, instr
);
1073 case TGSI_OPCODE_SLT
:
1074 case TGSI_OPCODE_SGE
:
1075 translate_sge_slt(ctx
, inst
, opc
);
1077 case TGSI_OPCODE_MAD
:
1078 instr
= ir2_instr_create_alu(cf
, MULADDv
, ~0);
1079 add_regs_vector_3(ctx
, inst
, instr
);
1081 case TGSI_OPCODE_LRP
:
1082 translate_lrp(ctx
, inst
, opc
);
1084 case TGSI_OPCODE_FRC
:
1085 instr
= ir2_instr_create_alu(cf
, FRACv
, ~0);
1086 add_regs_vector_1(ctx
, inst
, instr
);
1088 case TGSI_OPCODE_FLR
:
1089 instr
= ir2_instr_create_alu(cf
, FLOORv
, ~0);
1090 add_regs_vector_1(ctx
, inst
, instr
);
1092 case TGSI_OPCODE_EX2
:
1093 instr
= ir2_instr_create_alu(cf
, ~0, EXP_IEEE
);
1094 add_regs_scalar_1(ctx
, inst
, instr
);
1096 case TGSI_OPCODE_POW
:
1097 translate_pow(ctx
, inst
);
1099 case TGSI_OPCODE_ABS
:
1100 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1101 add_regs_vector_1(ctx
, inst
, instr
);
1102 instr
->regs
[1]->flags
|= IR2_REG_NEGATE
; /* src0 */
1104 case TGSI_OPCODE_COS
:
1105 case TGSI_OPCODE_SIN
:
1106 translate_trig(ctx
, inst
, opc
);
1108 case TGSI_OPCODE_TEX
:
1109 case TGSI_OPCODE_TXP
:
1110 translate_tex(ctx
, inst
, opc
);
1112 case TGSI_OPCODE_CMP
:
1113 instr
= ir2_instr_create_alu(cf
, CNDGTEv
, ~0);
1114 add_regs_vector_3(ctx
, inst
, instr
);
1115 // TODO this should be src0 if regs where in sane order..
1116 instr
->regs
[2]->flags
^= IR2_REG_NEGATE
; /* src1 */
1118 case TGSI_OPCODE_IF
:
1119 push_predicate(ctx
, &inst
->Src
[0].Register
);
1120 ctx
->so
->ir
->pred
= IR2_PRED_EQ
;
1122 case TGSI_OPCODE_ELSE
:
1123 ctx
->so
->ir
->pred
= IR2_PRED_NE
;
1124 /* not sure if this is required in all cases, but blob compiler
1125 * won't combine EQ and NE in same CF:
1129 case TGSI_OPCODE_ENDIF
:
1132 case TGSI_OPCODE_F2I
:
1133 instr
= ir2_instr_create_alu(cf
, TRUNCv
, ~0);
1134 add_regs_vector_1(ctx
, inst
, instr
);
1137 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc
));
1138 tgsi_dump(ctx
->so
->tokens
, 0);
1143 /* internal temporaries are only valid for the duration of a single
1146 ctx
->num_internal_temps
= 0;
1150 compile_instructions(struct fd_compile_context
*ctx
)
1152 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
1153 tgsi_parse_token(&ctx
->parser
);
1155 switch (ctx
->parser
.FullToken
.Token
.Type
) {
1156 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1157 translate_instruction(ctx
,
1158 &ctx
->parser
.FullToken
.FullInstruction
);
1165 ctx
->cf
->cf_type
= EXEC_END
;
1169 fd_compile_shader(struct fd_program_stateobj
*prog
,
1170 struct fd_shader_stateobj
*so
)
1172 struct fd_compile_context ctx
;
1174 ir2_shader_destroy(so
->ir
);
1175 so
->ir
= ir2_shader_create();
1176 so
->num_vfetch_instrs
= so
->num_tfetch_instrs
= so
->num_immediates
= 0;
1178 if (compile_init(&ctx
, prog
, so
) != TGSI_PARSE_OK
)
1181 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
1182 compile_vtx_fetch(&ctx
);
1183 } else if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) {
1184 prog
->num_exports
= 0;
1185 memset(prog
->export_linkage
, 0xff,
1186 sizeof(prog
->export_linkage
));
1189 compile_instructions(&ctx
);