1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
39 #include "freedreno_program.h"
40 #include "freedreno_compiler.h"
41 #include "freedreno_util.h"
46 struct fd_compile_context
{
47 struct fd_program_stateobj
*prog
;
48 struct fd_shader_stateobj
*so
;
50 struct tgsi_parse_context parser
;
53 /* predicate stack: */
55 enum ir_pred pred_stack
[8];
57 /* Internal-Temporary and Predicate register assignment:
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers (which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * Whenever possible, the dst register is used as the first temporary,
63 * but this is not possible when the dst register is in an export (ie.
64 * in TGSI_FILE_OUTPUT).
66 * The predicate register must be valid across multiple TGSI
67 * instructions, but internal temporary's do not. For this reason,
68 * once the predicate register is requested, until it is no longer
69 * needed, it gets the first register slot after after the TGSI
70 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
71 * internal temporaries get the register slots above this.
75 int num_internal_temps
;
77 uint8_t num_regs
[TGSI_FILE_COUNT
];
79 /* maps input register idx to prog->export_linkage idx: */
80 uint8_t input_export_idx
[64];
82 /* maps output register idx to prog->export_linkage idx: */
83 uint8_t output_export_idx
[64];
85 /* idx/slot for last compiler generated immediate */
86 unsigned immediate_idx
;
88 // TODO we can skip emit exports in the VS that the FS doesn't need..
89 // and get rid perhaps of num_param..
90 unsigned num_position
, num_param
;
91 unsigned position
, psize
;
95 /* current exec CF instruction */
100 semantic_idx(struct tgsi_declaration_semantic
*semantic
)
102 int idx
= semantic
->Name
;
103 if (idx
== TGSI_SEMANTIC_GENERIC
)
104 idx
= TGSI_SEMANTIC_COUNT
+ semantic
->Index
;
108 /* assign/get the input/export register # for given semantic idx as
109 * returned by semantic_idx():
112 export_linkage(struct fd_compile_context
*ctx
, int idx
)
114 struct fd_program_stateobj
*prog
= ctx
->prog
;
116 /* if first time we've seen this export, assign the next available slot: */
117 if (prog
->export_linkage
[idx
] == 0xff)
118 prog
->export_linkage
[idx
] = prog
->num_exports
++;
120 return prog
->export_linkage
[idx
];
124 compile_init(struct fd_compile_context
*ctx
, struct fd_program_stateobj
*prog
,
125 struct fd_shader_stateobj
*so
)
134 ret
= tgsi_parse_init(&ctx
->parser
, so
->tokens
);
135 if (ret
!= TGSI_PARSE_OK
)
138 ctx
->type
= ctx
->parser
.FullHeader
.Processor
.Processor
;
141 ctx
->num_position
= 0;
144 ctx
->immediate_idx
= 0;
146 ctx
->num_internal_temps
= 0;
148 memset(ctx
->num_regs
, 0, sizeof(ctx
->num_regs
));
149 memset(ctx
->input_export_idx
, 0, sizeof(ctx
->input_export_idx
));
150 memset(ctx
->output_export_idx
, 0, sizeof(ctx
->output_export_idx
));
152 /* do first pass to extract declarations: */
153 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
154 tgsi_parse_token(&ctx
->parser
);
156 switch (ctx
->parser
.FullToken
.Token
.Type
) {
157 case TGSI_TOKEN_TYPE_DECLARATION
: {
158 struct tgsi_full_declaration
*decl
=
159 &ctx
->parser
.FullToken
.FullDeclaration
;
160 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
) {
161 unsigned name
= decl
->Semantic
.Name
;
163 assert(decl
->Declaration
.Semantic
); // TODO is this ever not true?
165 ctx
->output_export_idx
[decl
->Range
.First
] =
166 semantic_idx(&decl
->Semantic
);
168 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
170 case TGSI_SEMANTIC_POSITION
:
171 ctx
->position
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
174 case TGSI_SEMANTIC_PSIZE
:
175 ctx
->psize
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
177 case TGSI_SEMANTIC_COLOR
:
178 case TGSI_SEMANTIC_GENERIC
:
182 DBG("unknown VS semantic name: %s",
183 tgsi_semantic_names
[name
]);
188 case TGSI_SEMANTIC_COLOR
:
189 case TGSI_SEMANTIC_GENERIC
:
193 DBG("unknown PS semantic name: %s",
194 tgsi_semantic_names
[name
]);
198 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
199 ctx
->input_export_idx
[decl
->Range
.First
] =
200 semantic_idx(&decl
->Semantic
);
202 ctx
->num_regs
[decl
->Declaration
.File
] +=
203 1 + decl
->Range
.Last
- decl
->Range
.First
;
206 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
207 struct tgsi_full_immediate
*imm
=
208 &ctx
->parser
.FullToken
.FullImmediate
;
209 unsigned n
= ctx
->so
->num_immediates
++;
210 memcpy(ctx
->so
->immediates
[n
].val
, imm
->u
, 16);
218 /* TGSI generated immediates are always entire vec4's, ones we
219 * generate internally are not:
221 ctx
->immediate_idx
= ctx
->so
->num_immediates
* 4;
223 ctx
->so
->first_immediate
= ctx
->num_regs
[TGSI_FILE_CONSTANT
];
225 tgsi_parse_free(&ctx
->parser
);
227 return tgsi_parse_init(&ctx
->parser
, so
->tokens
);
231 compile_free(struct fd_compile_context
*ctx
)
233 tgsi_parse_free(&ctx
->parser
);
236 static struct ir_cf
*
237 next_exec_cf(struct fd_compile_context
*ctx
)
239 struct ir_cf
*cf
= ctx
->cf
;
240 if (!cf
|| cf
->exec
.instrs_count
>= ARRAY_SIZE(ctx
->cf
->exec
.instrs
))
241 ctx
->cf
= cf
= ir_cf_create(ctx
->so
->ir
, EXEC
);
246 compile_vtx_fetch(struct fd_compile_context
*ctx
)
248 struct ir_instruction
**vfetch_instrs
= ctx
->so
->vfetch_instrs
;
250 for (i
= 0; i
< ctx
->num_regs
[TGSI_FILE_INPUT
]; i
++) {
251 struct ir_instruction
*instr
= ir_instr_create(
252 next_exec_cf(ctx
), IR_FETCH
);
253 instr
->fetch
.opc
= VTX_FETCH
;
255 ctx
->need_sync
|= 1 << (i
+1);
257 ir_reg_create(instr
, i
+1, "xyzw", 0);
258 ir_reg_create(instr
, 0, "x", 0);
263 vfetch_instrs
[i
] = instr
;
265 ctx
->so
->num_vfetch_instrs
= i
;
270 * For vertex shaders (VS):
271 * --- ------ -------------
273 * Inputs: R1-R(num_input)
274 * Constants: C0-C(num_const-1)
275 * Immediates: C(num_const)-C(num_const+num_imm-1)
276 * Outputs: export0-export(n) and export62, export63
277 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
278 * Temps: R(num_input+1)-R(num_input+num_temps)
280 * R0 could be clobbered after the vertex fetch instructions.. so we
281 * could use it for one of the temporaries.
283 * TODO: maybe the vertex fetch part could fetch first input into R0 as
284 * the last vtx fetch instruction, which would let us use the same
285 * register layout in either case.. although this is not what the blob
289 * For frag shaders (PS):
290 * --- ---- -------------
292 * Inputs: R0-R(num_input-1)
293 * Constants: same as VS
294 * Immediates: same as VS
295 * Outputs: export0-export(num_outputs)
296 * Temps: R(num_input)-R(num_input+num_temps-1)
298 * In either case, immediates are are postpended to the constants
304 get_temp_gpr(struct fd_compile_context
*ctx
, int idx
)
306 unsigned num
= idx
+ ctx
->num_regs
[TGSI_FILE_INPUT
];
307 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
)
312 static struct ir_register
*
313 add_dst_reg(struct fd_compile_context
*ctx
, struct ir_instruction
*alu
,
314 const struct tgsi_dst_register
*dst
)
316 unsigned flags
= 0, num
= 0;
320 case TGSI_FILE_OUTPUT
:
321 flags
|= IR_REG_EXPORT
;
322 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
323 if (dst
->Index
== ctx
->position
) {
325 } else if (dst
->Index
== ctx
->psize
) {
328 num
= export_linkage(ctx
,
329 ctx
->output_export_idx
[dst
->Index
]);
335 case TGSI_FILE_TEMPORARY
:
336 num
= get_temp_gpr(ctx
, dst
->Index
);
339 DBG("unsupported dst register file: %s",
340 tgsi_file_names
[dst
->File
]);
345 swiz
[0] = (dst
->WriteMask
& TGSI_WRITEMASK_X
) ? 'x' : '_';
346 swiz
[1] = (dst
->WriteMask
& TGSI_WRITEMASK_Y
) ? 'y' : '_';
347 swiz
[2] = (dst
->WriteMask
& TGSI_WRITEMASK_Z
) ? 'z' : '_';
348 swiz
[3] = (dst
->WriteMask
& TGSI_WRITEMASK_W
) ? 'w' : '_';
351 return ir_reg_create(alu
, num
, swiz
, flags
);
354 static struct ir_register
*
355 add_src_reg(struct fd_compile_context
*ctx
, struct ir_instruction
*alu
,
356 const struct tgsi_src_register
*src
)
358 static const char swiz_vals
[] = {
362 unsigned flags
= 0, num
= 0;
365 case TGSI_FILE_CONSTANT
:
367 flags
|= IR_REG_CONST
;
369 case TGSI_FILE_INPUT
:
370 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
371 num
= src
->Index
+ 1;
373 num
= export_linkage(ctx
,
374 ctx
->input_export_idx
[src
->Index
]);
377 case TGSI_FILE_TEMPORARY
:
378 num
= get_temp_gpr(ctx
, src
->Index
);
380 case TGSI_FILE_IMMEDIATE
:
381 num
= src
->Index
+ ctx
->num_regs
[TGSI_FILE_CONSTANT
];
382 flags
|= IR_REG_CONST
;
385 DBG("unsupported src register file: %s",
386 tgsi_file_names
[src
->File
]);
394 flags
|= IR_REG_NEGATE
;
396 swiz
[0] = swiz_vals
[src
->SwizzleX
];
397 swiz
[1] = swiz_vals
[src
->SwizzleY
];
398 swiz
[2] = swiz_vals
[src
->SwizzleZ
];
399 swiz
[3] = swiz_vals
[src
->SwizzleW
];
402 if ((ctx
->need_sync
& (uint64_t)(1 << num
)) &&
403 !(flags
& IR_REG_CONST
)) {
405 ctx
->need_sync
&= ~(uint64_t)(1 << num
);
408 return ir_reg_create(alu
, num
, swiz
, flags
);
412 add_vector_clamp(struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
414 switch (inst
->Instruction
.Saturate
) {
417 case TGSI_SAT_ZERO_ONE
:
418 alu
->alu
.vector_clamp
= true;
420 case TGSI_SAT_MINUS_PLUS_ONE
:
421 DBG("unsupported saturate");
428 add_scalar_clamp(struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
430 switch (inst
->Instruction
.Saturate
) {
433 case TGSI_SAT_ZERO_ONE
:
434 alu
->alu
.scalar_clamp
= true;
436 case TGSI_SAT_MINUS_PLUS_ONE
:
437 DBG("unsupported saturate");
444 add_regs_vector_1(struct fd_compile_context
*ctx
,
445 struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
447 assert(inst
->Instruction
.NumSrcRegs
== 1);
448 assert(inst
->Instruction
.NumDstRegs
== 1);
450 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
451 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
452 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
453 add_vector_clamp(inst
, alu
);
457 add_regs_vector_2(struct fd_compile_context
*ctx
,
458 struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
460 assert(inst
->Instruction
.NumSrcRegs
== 2);
461 assert(inst
->Instruction
.NumDstRegs
== 1);
463 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
464 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
465 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
466 add_vector_clamp(inst
, alu
);
470 add_regs_vector_3(struct fd_compile_context
*ctx
,
471 struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
473 assert(inst
->Instruction
.NumSrcRegs
== 3);
474 assert(inst
->Instruction
.NumDstRegs
== 1);
476 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
477 /* maybe should re-arrange the syntax some day, but
478 * in assembler/disassembler and what ir.c expects
479 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
481 add_src_reg(ctx
, alu
, &inst
->Src
[2].Register
);
482 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
483 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
484 add_vector_clamp(inst
, alu
);
488 add_regs_dummy_vector(struct ir_instruction
*alu
)
490 /* create dummy, non-written vector dst/src regs
491 * for unused vector instr slot:
493 ir_reg_create(alu
, 0, "____", 0); /* vector dst */
494 ir_reg_create(alu
, 0, NULL
, 0); /* vector src1 */
495 ir_reg_create(alu
, 0, NULL
, 0); /* vector src2 */
499 add_regs_scalar_1(struct fd_compile_context
*ctx
,
500 struct tgsi_full_instruction
*inst
, struct ir_instruction
*alu
)
502 assert(inst
->Instruction
.NumSrcRegs
== 1);
503 assert(inst
->Instruction
.NumDstRegs
== 1);
505 add_regs_dummy_vector(alu
);
507 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
508 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
509 add_scalar_clamp(inst
, alu
);
513 * Helpers for TGSI instructions that don't map to a single shader instr:
516 /* Get internal-temp src/dst to use for a sequence of instructions
517 * generated by a single TGSI op.. if possible, use the final dst
518 * register as the temporary to avoid allocating a new register, but
519 * if necessary allocate one. If a single TGSI op needs multiple
520 * internal temps, pass NULL for orig_dst for all but the first one
521 * so that you don't end up using the same register for all your
525 get_internal_temp(struct fd_compile_context
*ctx
,
526 struct tgsi_dst_register
*orig_dst
,
527 struct tgsi_dst_register
*tmp_dst
,
528 struct tgsi_src_register
*tmp_src
)
530 bool using_temp
= false;
532 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
533 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
534 tmp_dst
->Indirect
= 0;
535 tmp_dst
->Dimension
= 0;
537 if (orig_dst
&& (orig_dst
->File
!= TGSI_FILE_OUTPUT
)) {
538 /* if possible, use orig dst register for the temporary: */
539 tmp_dst
->Index
= orig_dst
->Index
;
541 /* otherwise assign one: */
542 int n
= ctx
->num_internal_temps
++;
543 if (ctx
->pred_reg
!= -1)
545 tmp_dst
->Index
= get_temp_gpr(ctx
,
546 ctx
->num_regs
[TGSI_FILE_TEMPORARY
] + n
);
550 tmp_src
->File
= tmp_dst
->File
;
551 tmp_src
->Indirect
= tmp_dst
->Indirect
;
552 tmp_src
->Dimension
= tmp_dst
->Dimension
;
553 tmp_src
->Index
= tmp_dst
->Index
;
554 tmp_src
->Absolute
= 0;
556 tmp_src
->SwizzleX
= TGSI_SWIZZLE_X
;
557 tmp_src
->SwizzleY
= TGSI_SWIZZLE_Y
;
558 tmp_src
->SwizzleZ
= TGSI_SWIZZLE_Z
;
559 tmp_src
->SwizzleW
= TGSI_SWIZZLE_W
;
565 get_predicate(struct fd_compile_context
*ctx
, struct tgsi_dst_register
*dst
,
566 struct tgsi_src_register
*src
)
568 assert(ctx
->pred_reg
!= -1);
570 dst
->File
= TGSI_FILE_TEMPORARY
;
571 dst
->WriteMask
= TGSI_WRITEMASK_W
;
574 dst
->Index
= get_temp_gpr(ctx
, ctx
->pred_reg
);
577 src
->File
= dst
->File
;
578 src
->Indirect
= dst
->Indirect
;
579 src
->Dimension
= dst
->Dimension
;
580 src
->Index
= dst
->Index
;
583 src
->SwizzleX
= TGSI_SWIZZLE_W
;
584 src
->SwizzleY
= TGSI_SWIZZLE_W
;
585 src
->SwizzleZ
= TGSI_SWIZZLE_W
;
586 src
->SwizzleW
= TGSI_SWIZZLE_W
;
591 push_predicate(struct fd_compile_context
*ctx
, struct tgsi_src_register
*src
)
593 struct ir_instruction
*alu
;
594 struct tgsi_dst_register pred_dst
;
596 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
601 if (ctx
->pred_depth
== 0) {
602 /* assign predicate register: */
603 ctx
->pred_reg
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
];
605 get_predicate(ctx
, &pred_dst
, NULL
);
607 alu
= ir_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SETNEs
);
608 add_regs_dummy_vector(alu
);
609 add_dst_reg(ctx
, alu
, &pred_dst
);
610 add_src_reg(ctx
, alu
, src
);
612 struct tgsi_src_register pred_src
;
614 get_predicate(ctx
, &pred_dst
, &pred_src
);
616 alu
= ir_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
617 add_dst_reg(ctx
, alu
, &pred_dst
);
618 add_src_reg(ctx
, alu
, &pred_src
);
619 add_src_reg(ctx
, alu
, src
);
621 // XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make
622 // sure src reg is valid if it was calculated with a predicate
624 alu
->pred
= IR_PRED_NONE
;
627 /* save previous pred state to restore in pop_predicate(): */
628 ctx
->pred_stack
[ctx
->pred_depth
++] = ctx
->so
->ir
->pred
;
634 pop_predicate(struct fd_compile_context
*ctx
)
636 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
641 /* restore previous predicate state: */
642 ctx
->so
->ir
->pred
= ctx
->pred_stack
[--ctx
->pred_depth
];
644 if (ctx
->pred_depth
!= 0) {
645 struct ir_instruction
*alu
;
646 struct tgsi_dst_register pred_dst
;
647 struct tgsi_src_register pred_src
;
649 get_predicate(ctx
, &pred_dst
, &pred_src
);
651 alu
= ir_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SET_POPs
);
652 add_regs_dummy_vector(alu
);
653 add_dst_reg(ctx
, alu
, &pred_dst
);
654 add_src_reg(ctx
, alu
, &pred_src
);
655 alu
->pred
= IR_PRED_NONE
;
657 /* predicate register no longer needed: */
665 get_immediate(struct fd_compile_context
*ctx
,
666 struct tgsi_src_register
*reg
, uint32_t val
)
668 unsigned neg
, swiz
, idx
, i
;
669 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
670 static const unsigned swiz2tgsi
[] = {
671 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
674 for (i
= 0; i
< ctx
->immediate_idx
; i
++) {
678 if (ctx
->so
->immediates
[idx
].val
[swiz
] == val
) {
683 if (ctx
->so
->immediates
[idx
].val
[swiz
] == -val
) {
689 if (i
== ctx
->immediate_idx
) {
690 /* need to generate a new immediate: */
694 ctx
->so
->immediates
[idx
].val
[swiz
] = val
;
695 ctx
->so
->num_immediates
= idx
+ 1;
696 ctx
->immediate_idx
++;
699 reg
->File
= TGSI_FILE_IMMEDIATE
;
705 reg
->SwizzleX
= swiz2tgsi
[swiz
];
706 reg
->SwizzleY
= swiz2tgsi
[swiz
];
707 reg
->SwizzleZ
= swiz2tgsi
[swiz
];
708 reg
->SwizzleW
= swiz2tgsi
[swiz
];
711 /* POW(a,b) = EXP2(b * LOG2(a)) */
713 translate_pow(struct fd_compile_context
*ctx
,
714 struct tgsi_full_instruction
*inst
)
716 struct tgsi_dst_register tmp_dst
;
717 struct tgsi_src_register tmp_src
;
718 struct ir_instruction
*alu
;
720 get_internal_temp(ctx
, &inst
->Dst
[0].Register
, &tmp_dst
, &tmp_src
);
722 alu
= ir_instr_create_alu(next_exec_cf(ctx
), ~0, LOG_CLAMP
);
723 add_regs_dummy_vector(alu
);
724 add_dst_reg(ctx
, alu
, &tmp_dst
);
725 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
727 alu
= ir_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
728 add_dst_reg(ctx
, alu
, &tmp_dst
);
729 add_src_reg(ctx
, alu
, &tmp_src
);
730 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
732 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
733 * coded to take their input from the w component.
735 switch(inst
->Dst
[0].Register
.WriteMask
) {
736 case TGSI_WRITEMASK_X
:
737 tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
739 case TGSI_WRITEMASK_Y
:
740 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Y
;
742 case TGSI_WRITEMASK_Z
:
743 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Z
;
745 case TGSI_WRITEMASK_W
:
746 tmp_src
.SwizzleW
= TGSI_SWIZZLE_W
;
749 DBG("invalid writemask!");
754 alu
= ir_instr_create_alu(next_exec_cf(ctx
), ~0, EXP_IEEE
);
755 add_regs_dummy_vector(alu
);
756 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
757 add_src_reg(ctx
, alu
, &tmp_src
);
758 add_scalar_clamp(inst
, alu
);
762 translate_tex(struct fd_compile_context
*ctx
,
763 struct tgsi_full_instruction
*inst
, unsigned opc
)
765 struct ir_instruction
*instr
;
766 struct tgsi_dst_register tmp_dst
;
767 struct tgsi_src_register tmp_src
;
768 const struct tgsi_src_register
*coord
;
772 using_temp
= get_internal_temp(ctx
,
773 &inst
->Dst
[0].Register
, &tmp_dst
, &tmp_src
);
775 if (opc
== TGSI_OPCODE_TXP
) {
776 /* TXP - Projective Texture Lookup:
778 * coord.x = src0.x / src.w
779 * coord.y = src0.y / src.w
780 * coord.z = src0.z / src.w
784 * dst = texture_sample(unit, coord, bias)
786 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MAXv
, RECIP_IEEE
);
789 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "___w";
790 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
791 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
794 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "x___";
795 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->swizzle
= "wwww";
797 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
798 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "xyz_";
799 add_src_reg(ctx
, instr
, &tmp_src
)->swizzle
= "xxxx";
800 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
804 coord
= &inst
->Src
[0].Register
;
807 instr
= ir_instr_create(next_exec_cf(ctx
), IR_FETCH
);
808 instr
->fetch
.opc
= TEX_FETCH
;
809 assert(inst
->Texture
.NumOffsets
<= 1); // TODO what to do in other cases?
811 /* save off the tex fetch to be patched later with correct const_idx: */
812 idx
= ctx
->so
->num_tfetch_instrs
++;
813 ctx
->so
->tfetch_instrs
[idx
].samp_id
= inst
->Src
[1].Register
.Index
;
814 ctx
->so
->tfetch_instrs
[idx
].instr
= instr
;
816 add_dst_reg(ctx
, instr
, &tmp_dst
);
817 add_src_reg(ctx
, instr
, coord
);
819 /* dst register needs to be marked for sync: */
820 ctx
->need_sync
|= 1 << instr
->regs
[0]->num
;
822 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
826 /* texture fetch can't write directly to export, so if tgsi
827 * is telling us the dst register is in output file, we load
828 * the texture to a temp and the use ALU instruction to move
831 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MAXv
, ~0);
833 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
834 add_src_reg(ctx
, instr
, &tmp_src
);
835 add_src_reg(ctx
, instr
, &tmp_src
);
836 add_vector_clamp(inst
, instr
);
840 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
841 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
843 translate_sge_slt(struct fd_compile_context
*ctx
,
844 struct tgsi_full_instruction
*inst
, unsigned opc
)
846 struct ir_instruction
*instr
;
847 struct tgsi_dst_register tmp_dst
;
848 struct tgsi_src_register tmp_src
;
849 struct tgsi_src_register tmp_const
;
855 case TGSI_OPCODE_SGE
:
859 case TGSI_OPCODE_SLT
:
865 get_internal_temp(ctx
, &inst
->Dst
[0].Register
, &tmp_dst
, &tmp_src
);
867 instr
= ir_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
868 add_dst_reg(ctx
, instr
, &tmp_dst
);
869 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR_REG_NEGATE
;
870 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
872 instr
= ir_instr_create_alu(next_exec_cf(ctx
), CNDGTEv
, ~0);
873 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
874 /* maybe should re-arrange the syntax some day, but
875 * in assembler/disassembler and what ir.c expects
876 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
878 get_immediate(ctx
, &tmp_const
, f2d(c0
));
879 add_src_reg(ctx
, instr
, &tmp_const
);
880 add_src_reg(ctx
, instr
, &tmp_src
);
881 get_immediate(ctx
, &tmp_const
, f2d(c1
));
882 add_src_reg(ctx
, instr
, &tmp_const
);
885 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
887 translate_lrp(struct fd_compile_context
*ctx
,
888 struct tgsi_full_instruction
*inst
,
891 struct ir_instruction
*instr
;
892 struct tgsi_dst_register tmp_dst1
, tmp_dst2
;
893 struct tgsi_src_register tmp_src1
, tmp_src2
;
894 struct tgsi_src_register tmp_const
;
896 get_internal_temp(ctx
, &inst
->Dst
[0].Register
, &tmp_dst1
, &tmp_src1
);
897 get_internal_temp(ctx
, NULL
, &tmp_dst2
, &tmp_src2
);
899 get_immediate(ctx
, &tmp_const
, f2d(1.0));
902 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
903 add_dst_reg(ctx
, instr
, &tmp_dst1
);
904 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
905 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
908 instr
= ir_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
909 add_dst_reg(ctx
, instr
, &tmp_dst2
);
910 add_src_reg(ctx
, instr
, &tmp_const
);
911 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR_REG_NEGATE
;
913 /* tmp2 = tmp2 * c */
914 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
915 add_dst_reg(ctx
, instr
, &tmp_dst2
);
916 add_src_reg(ctx
, instr
, &tmp_src2
);
917 add_src_reg(ctx
, instr
, &inst
->Src
[2].Register
);
919 /* dst = tmp1 + tmp2 */
920 instr
= ir_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
921 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
922 add_src_reg(ctx
, instr
, &tmp_src1
);
923 add_src_reg(ctx
, instr
, &tmp_src2
);
927 translate_trig(struct fd_compile_context
*ctx
,
928 struct tgsi_full_instruction
*inst
,
931 struct ir_instruction
*instr
;
932 struct tgsi_dst_register tmp_dst
;
933 struct tgsi_src_register tmp_src
;
934 struct tgsi_src_register tmp_const
;
935 instr_scalar_opc_t op
;
940 case TGSI_OPCODE_SIN
:
943 case TGSI_OPCODE_COS
:
948 get_internal_temp(ctx
, &inst
->Dst
[0].Register
, &tmp_dst
, &tmp_src
);
950 tmp_dst
.WriteMask
= TGSI_WRITEMASK_X
;
951 tmp_src
.SwizzleX
= tmp_src
.SwizzleY
=
952 tmp_src
.SwizzleZ
= tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
954 /* maybe should re-arrange the syntax some day, but
955 * in assembler/disassembler and what ir.c expects
956 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
958 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
959 add_dst_reg(ctx
, instr
, &tmp_dst
);
960 get_immediate(ctx
, &tmp_const
, f2d(0.5));
961 add_src_reg(ctx
, instr
, &tmp_const
);
962 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
963 get_immediate(ctx
, &tmp_const
, f2d(0.159155));
964 add_src_reg(ctx
, instr
, &tmp_const
);
966 instr
= ir_instr_create_alu(next_exec_cf(ctx
), FRACv
, ~0);
967 add_dst_reg(ctx
, instr
, &tmp_dst
);
968 add_src_reg(ctx
, instr
, &tmp_src
);
969 add_src_reg(ctx
, instr
, &tmp_src
);
971 instr
= ir_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
972 add_dst_reg(ctx
, instr
, &tmp_dst
);
973 get_immediate(ctx
, &tmp_const
, f2d(-3.141593));
974 add_src_reg(ctx
, instr
, &tmp_const
);
975 add_src_reg(ctx
, instr
, &tmp_src
);
976 get_immediate(ctx
, &tmp_const
, f2d(6.283185));
977 add_src_reg(ctx
, instr
, &tmp_const
);
979 instr
= ir_instr_create_alu(next_exec_cf(ctx
), ~0, op
);
980 add_regs_dummy_vector(instr
);
981 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
982 add_src_reg(ctx
, instr
, &tmp_src
);
986 * Main part of compiler/translator:
990 translate_instruction(struct fd_compile_context
*ctx
,
991 struct tgsi_full_instruction
*inst
)
993 unsigned opc
= inst
->Instruction
.Opcode
;
994 struct ir_instruction
*instr
;
995 static struct ir_cf
*cf
;
997 if (opc
== TGSI_OPCODE_END
)
1000 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
1001 unsigned num
= inst
->Dst
[0].Register
.Index
;
1002 /* seems like we need to ensure that position vs param/pixel
1003 * exports don't end up in the same EXEC clause.. easy way
1004 * to do this is force a new EXEC clause on first appearance
1005 * of an position or param/pixel export.
1007 if ((num
== ctx
->position
) || (num
== ctx
->psize
)) {
1008 if (ctx
->num_position
> 0) {
1010 ir_cf_create_alloc(ctx
->so
->ir
, SQ_POSITION
,
1011 ctx
->num_position
- 1);
1012 ctx
->num_position
= 0;
1015 if (ctx
->num_param
> 0) {
1017 ir_cf_create_alloc(ctx
->so
->ir
, SQ_PARAMETER_PIXEL
,
1018 ctx
->num_param
- 1);
1024 cf
= next_exec_cf(ctx
);
1026 /* TODO turn this into a table: */
1028 case TGSI_OPCODE_MOV
:
1029 instr
= ir_instr_create_alu(cf
, MAXv
, ~0);
1030 add_regs_vector_1(ctx
, inst
, instr
);
1032 case TGSI_OPCODE_RCP
:
1033 instr
= ir_instr_create_alu(cf
, ~0, RECIP_IEEE
);
1034 add_regs_scalar_1(ctx
, inst
, instr
);
1036 case TGSI_OPCODE_RSQ
:
1037 instr
= ir_instr_create_alu(cf
, ~0, RECIPSQ_IEEE
);
1038 add_regs_scalar_1(ctx
, inst
, instr
);
1040 case TGSI_OPCODE_MUL
:
1041 instr
= ir_instr_create_alu(cf
, MULv
, ~0);
1042 add_regs_vector_2(ctx
, inst
, instr
);
1044 case TGSI_OPCODE_ADD
:
1045 instr
= ir_instr_create_alu(cf
, ADDv
, ~0);
1046 add_regs_vector_2(ctx
, inst
, instr
);
1048 case TGSI_OPCODE_DP3
:
1049 instr
= ir_instr_create_alu(cf
, DOT3v
, ~0);
1050 add_regs_vector_2(ctx
, inst
, instr
);
1052 case TGSI_OPCODE_DP4
:
1053 instr
= ir_instr_create_alu(cf
, DOT4v
, ~0);
1054 add_regs_vector_2(ctx
, inst
, instr
);
1056 case TGSI_OPCODE_MIN
:
1057 instr
= ir_instr_create_alu(cf
, MINv
, ~0);
1058 add_regs_vector_2(ctx
, inst
, instr
);
1060 case TGSI_OPCODE_MAX
:
1061 instr
= ir_instr_create_alu(cf
, MAXv
, ~0);
1062 add_regs_vector_2(ctx
, inst
, instr
);
1064 case TGSI_OPCODE_SLT
:
1065 case TGSI_OPCODE_SGE
:
1066 translate_sge_slt(ctx
, inst
, opc
);
1068 case TGSI_OPCODE_MAD
:
1069 instr
= ir_instr_create_alu(cf
, MULADDv
, ~0);
1070 add_regs_vector_3(ctx
, inst
, instr
);
1072 case TGSI_OPCODE_LRP
:
1073 translate_lrp(ctx
, inst
, opc
);
1075 case TGSI_OPCODE_FRC
:
1076 instr
= ir_instr_create_alu(cf
, FRACv
, ~0);
1077 add_regs_vector_1(ctx
, inst
, instr
);
1079 case TGSI_OPCODE_FLR
:
1080 instr
= ir_instr_create_alu(cf
, FLOORv
, ~0);
1081 add_regs_vector_1(ctx
, inst
, instr
);
1083 case TGSI_OPCODE_EX2
:
1084 instr
= ir_instr_create_alu(cf
, ~0, EXP_IEEE
);
1085 add_regs_scalar_1(ctx
, inst
, instr
);
1087 case TGSI_OPCODE_POW
:
1088 translate_pow(ctx
, inst
);
1090 case TGSI_OPCODE_ABS
:
1091 instr
= ir_instr_create_alu(cf
, MAXv
, ~0);
1092 add_regs_vector_1(ctx
, inst
, instr
);
1093 instr
->regs
[1]->flags
|= IR_REG_NEGATE
; /* src0 */
1095 case TGSI_OPCODE_COS
:
1096 case TGSI_OPCODE_SIN
:
1097 translate_trig(ctx
, inst
, opc
);
1099 case TGSI_OPCODE_TEX
:
1100 case TGSI_OPCODE_TXP
:
1101 translate_tex(ctx
, inst
, opc
);
1103 case TGSI_OPCODE_CMP
:
1104 instr
= ir_instr_create_alu(cf
, CNDGTEv
, ~0);
1105 add_regs_vector_3(ctx
, inst
, instr
);
1106 // TODO this should be src0 if regs where in sane order..
1107 instr
->regs
[2]->flags
^= IR_REG_NEGATE
; /* src1 */
1109 case TGSI_OPCODE_IF
:
1110 push_predicate(ctx
, &inst
->Src
[0].Register
);
1111 ctx
->so
->ir
->pred
= IR_PRED_EQ
;
1113 case TGSI_OPCODE_ELSE
:
1114 ctx
->so
->ir
->pred
= IR_PRED_NE
;
1115 /* not sure if this is required in all cases, but blob compiler
1116 * won't combine EQ and NE in same CF:
1120 case TGSI_OPCODE_ENDIF
:
1123 case TGSI_OPCODE_F2I
:
1124 instr
= ir_instr_create_alu(cf
, TRUNCv
, ~0);
1125 add_regs_vector_1(ctx
, inst
, instr
);
1128 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc
));
1129 tgsi_dump(ctx
->so
->tokens
, 0);
1134 /* internal temporaries are only valid for the duration of a single
1137 ctx
->num_internal_temps
= 0;
1141 compile_instructions(struct fd_compile_context
*ctx
)
1143 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
1144 tgsi_parse_token(&ctx
->parser
);
1146 switch (ctx
->parser
.FullToken
.Token
.Type
) {
1147 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1148 translate_instruction(ctx
,
1149 &ctx
->parser
.FullToken
.FullInstruction
);
1156 ctx
->cf
->cf_type
= EXEC_END
;
1160 fd_compile_shader(struct fd_program_stateobj
*prog
,
1161 struct fd_shader_stateobj
*so
)
1163 struct fd_compile_context ctx
;
1165 ir_shader_destroy(so
->ir
);
1166 so
->ir
= ir_shader_create();
1167 so
->num_vfetch_instrs
= so
->num_tfetch_instrs
= so
->num_immediates
= 0;
1169 if (compile_init(&ctx
, prog
, so
) != TGSI_PARSE_OK
)
1172 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
1173 compile_vtx_fetch(&ctx
);
1174 } else if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) {
1175 prog
->num_exports
= 0;
1176 memset(prog
->export_linkage
, 0xff,
1177 sizeof(prog
->export_linkage
));
1180 compile_instructions(&ctx
);