1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
39 #include "fd2_compiler.h"
40 #include "fd2_program.h"
43 #include "instr-a2xx.h"
46 struct fd2_compile_context
{
47 struct fd_program_stateobj
*prog
;
48 struct fd2_shader_stateobj
*so
;
50 struct tgsi_parse_context parser
;
53 /* predicate stack: */
55 enum ir2_pred pred_stack
[8];
57 /* Internal-Temporary and Predicate register assignment:
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers, which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * And some instructions (texture fetch) cannot write directly to
63 * output registers. We could be more clever and re-use dst or a
64 * src register in some cases. But for now don't try to be clever.
65 * Eventually we should implement an optimization pass that re-
66 * juggles the register usage and gets rid of unneeded temporaries.
68 * The predicate register must be valid across multiple TGSI
69 * instructions, but internal temporary's do not. For this reason,
70 * once the predicate register is requested, until it is no longer
71 * needed, it gets the first register slot after after the TGSI
72 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
73 * internal temporaries get the register slots above this.
77 int num_internal_temps
;
79 uint8_t num_regs
[TGSI_FILE_COUNT
];
81 /* maps input register idx to prog->export_linkage idx: */
82 uint8_t input_export_idx
[64];
84 /* maps output register idx to prog->export_linkage idx: */
85 uint8_t output_export_idx
[64];
87 /* idx/slot for last compiler generated immediate */
88 unsigned immediate_idx
;
90 // TODO we can skip emit exports in the VS that the FS doesn't need..
91 // and get rid perhaps of num_param..
92 unsigned num_position
, num_param
;
93 unsigned position
, psize
;
97 /* current exec CF instruction */
102 semantic_idx(struct tgsi_declaration_semantic
*semantic
)
104 int idx
= semantic
->Name
;
105 if (idx
== TGSI_SEMANTIC_GENERIC
)
106 idx
= TGSI_SEMANTIC_COUNT
+ semantic
->Index
;
110 /* assign/get the input/export register # for given semantic idx as
111 * returned by semantic_idx():
114 export_linkage(struct fd2_compile_context
*ctx
, int idx
)
116 struct fd_program_stateobj
*prog
= ctx
->prog
;
118 /* if first time we've seen this export, assign the next available slot: */
119 if (prog
->export_linkage
[idx
] == 0xff)
120 prog
->export_linkage
[idx
] = prog
->num_exports
++;
122 return prog
->export_linkage
[idx
];
126 compile_init(struct fd2_compile_context
*ctx
, struct fd_program_stateobj
*prog
,
127 struct fd2_shader_stateobj
*so
)
136 ret
= tgsi_parse_init(&ctx
->parser
, so
->tokens
);
137 if (ret
!= TGSI_PARSE_OK
)
140 ctx
->type
= ctx
->parser
.FullHeader
.Processor
.Processor
;
143 ctx
->num_position
= 0;
146 ctx
->immediate_idx
= 0;
148 ctx
->num_internal_temps
= 0;
150 memset(ctx
->num_regs
, 0, sizeof(ctx
->num_regs
));
151 memset(ctx
->input_export_idx
, 0, sizeof(ctx
->input_export_idx
));
152 memset(ctx
->output_export_idx
, 0, sizeof(ctx
->output_export_idx
));
154 /* do first pass to extract declarations: */
155 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
156 tgsi_parse_token(&ctx
->parser
);
158 switch (ctx
->parser
.FullToken
.Token
.Type
) {
159 case TGSI_TOKEN_TYPE_DECLARATION
: {
160 struct tgsi_full_declaration
*decl
=
161 &ctx
->parser
.FullToken
.FullDeclaration
;
162 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
) {
163 unsigned name
= decl
->Semantic
.Name
;
165 assert(decl
->Declaration
.Semantic
); // TODO is this ever not true?
167 ctx
->output_export_idx
[decl
->Range
.First
] =
168 semantic_idx(&decl
->Semantic
);
170 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
172 case TGSI_SEMANTIC_POSITION
:
173 ctx
->position
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
176 case TGSI_SEMANTIC_PSIZE
:
177 ctx
->psize
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
180 case TGSI_SEMANTIC_COLOR
:
181 case TGSI_SEMANTIC_GENERIC
:
185 DBG("unknown VS semantic name: %s",
186 tgsi_semantic_names
[name
]);
191 case TGSI_SEMANTIC_COLOR
:
192 case TGSI_SEMANTIC_GENERIC
:
196 DBG("unknown PS semantic name: %s",
197 tgsi_semantic_names
[name
]);
201 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
202 ctx
->input_export_idx
[decl
->Range
.First
] =
203 semantic_idx(&decl
->Semantic
);
205 ctx
->num_regs
[decl
->Declaration
.File
] =
206 MAX2(ctx
->num_regs
[decl
->Declaration
.File
], decl
->Range
.Last
+ 1);
209 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
210 struct tgsi_full_immediate
*imm
=
211 &ctx
->parser
.FullToken
.FullImmediate
;
212 unsigned n
= ctx
->so
->num_immediates
++;
213 memcpy(ctx
->so
->immediates
[n
].val
, imm
->u
, 16);
221 /* TGSI generated immediates are always entire vec4's, ones we
222 * generate internally are not:
224 ctx
->immediate_idx
= ctx
->so
->num_immediates
* 4;
226 ctx
->so
->first_immediate
= ctx
->num_regs
[TGSI_FILE_CONSTANT
];
228 tgsi_parse_free(&ctx
->parser
);
230 return tgsi_parse_init(&ctx
->parser
, so
->tokens
);
234 compile_free(struct fd2_compile_context
*ctx
)
236 tgsi_parse_free(&ctx
->parser
);
239 static struct ir2_cf
*
240 next_exec_cf(struct fd2_compile_context
*ctx
)
242 struct ir2_cf
*cf
= ctx
->cf
;
243 if (!cf
|| cf
->exec
.instrs_count
>= ARRAY_SIZE(ctx
->cf
->exec
.instrs
))
244 ctx
->cf
= cf
= ir2_cf_create(ctx
->so
->ir
, EXEC
);
249 compile_vtx_fetch(struct fd2_compile_context
*ctx
)
251 struct ir2_instruction
**vfetch_instrs
= ctx
->so
->vfetch_instrs
;
253 for (i
= 0; i
< ctx
->num_regs
[TGSI_FILE_INPUT
]; i
++) {
254 struct ir2_instruction
*instr
= ir2_instr_create(
255 next_exec_cf(ctx
), IR2_FETCH
);
256 instr
->fetch
.opc
= VTX_FETCH
;
258 ctx
->need_sync
|= 1 << (i
+1);
260 ir2_reg_create(instr
, i
+1, "xyzw", 0);
261 ir2_reg_create(instr
, 0, "x", 0);
266 vfetch_instrs
[i
] = instr
;
268 ctx
->so
->num_vfetch_instrs
= i
;
273 * For vertex shaders (VS):
274 * --- ------ -------------
276 * Inputs: R1-R(num_input)
277 * Constants: C0-C(num_const-1)
278 * Immediates: C(num_const)-C(num_const+num_imm-1)
279 * Outputs: export0-export(n) and export62, export63
280 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
281 * Temps: R(num_input+1)-R(num_input+num_temps)
283 * R0 could be clobbered after the vertex fetch instructions.. so we
284 * could use it for one of the temporaries.
286 * TODO: maybe the vertex fetch part could fetch first input into R0 as
287 * the last vtx fetch instruction, which would let us use the same
288 * register layout in either case.. although this is not what the blob
292 * For frag shaders (PS):
293 * --- ---- -------------
295 * Inputs: R0-R(num_input-1)
296 * Constants: same as VS
297 * Immediates: same as VS
298 * Outputs: export0-export(num_outputs)
299 * Temps: R(num_input)-R(num_input+num_temps-1)
301 * In either case, immediates are are postpended to the constants
307 get_temp_gpr(struct fd2_compile_context
*ctx
, int idx
)
309 unsigned num
= idx
+ ctx
->num_regs
[TGSI_FILE_INPUT
];
310 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
)
315 static struct ir2_register
*
316 add_dst_reg(struct fd2_compile_context
*ctx
, struct ir2_instruction
*alu
,
317 const struct tgsi_dst_register
*dst
)
319 unsigned flags
= 0, num
= 0;
323 case TGSI_FILE_OUTPUT
:
324 flags
|= IR2_REG_EXPORT
;
325 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
326 if (dst
->Index
== ctx
->position
) {
328 } else if (dst
->Index
== ctx
->psize
) {
331 num
= export_linkage(ctx
,
332 ctx
->output_export_idx
[dst
->Index
]);
338 case TGSI_FILE_TEMPORARY
:
339 num
= get_temp_gpr(ctx
, dst
->Index
);
342 DBG("unsupported dst register file: %s",
343 tgsi_file_name(dst
->File
));
348 swiz
[0] = (dst
->WriteMask
& TGSI_WRITEMASK_X
) ? 'x' : '_';
349 swiz
[1] = (dst
->WriteMask
& TGSI_WRITEMASK_Y
) ? 'y' : '_';
350 swiz
[2] = (dst
->WriteMask
& TGSI_WRITEMASK_Z
) ? 'z' : '_';
351 swiz
[3] = (dst
->WriteMask
& TGSI_WRITEMASK_W
) ? 'w' : '_';
354 return ir2_reg_create(alu
, num
, swiz
, flags
);
357 static struct ir2_register
*
358 add_src_reg(struct fd2_compile_context
*ctx
, struct ir2_instruction
*alu
,
359 const struct tgsi_src_register
*src
)
361 static const char swiz_vals
[] = {
365 unsigned flags
= 0, num
= 0;
368 case TGSI_FILE_CONSTANT
:
370 flags
|= IR2_REG_CONST
;
372 case TGSI_FILE_INPUT
:
373 if (ctx
->type
== TGSI_PROCESSOR_VERTEX
) {
374 num
= src
->Index
+ 1;
376 num
= export_linkage(ctx
,
377 ctx
->input_export_idx
[src
->Index
]);
380 case TGSI_FILE_TEMPORARY
:
381 num
= get_temp_gpr(ctx
, src
->Index
);
383 case TGSI_FILE_IMMEDIATE
:
384 num
= src
->Index
+ ctx
->num_regs
[TGSI_FILE_CONSTANT
];
385 flags
|= IR2_REG_CONST
;
388 DBG("unsupported src register file: %s",
389 tgsi_file_name(src
->File
));
395 flags
|= IR2_REG_ABS
;
397 flags
|= IR2_REG_NEGATE
;
399 swiz
[0] = swiz_vals
[src
->SwizzleX
];
400 swiz
[1] = swiz_vals
[src
->SwizzleY
];
401 swiz
[2] = swiz_vals
[src
->SwizzleZ
];
402 swiz
[3] = swiz_vals
[src
->SwizzleW
];
405 if ((ctx
->need_sync
& (uint64_t)(1 << num
)) &&
406 !(flags
& IR2_REG_CONST
)) {
408 ctx
->need_sync
&= ~(uint64_t)(1 << num
);
411 return ir2_reg_create(alu
, num
, swiz
, flags
);
415 add_vector_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
417 switch (inst
->Instruction
.Saturate
) {
420 case TGSI_SAT_ZERO_ONE
:
421 alu
->alu
.vector_clamp
= true;
423 case TGSI_SAT_MINUS_PLUS_ONE
:
424 DBG("unsupported saturate");
431 add_scalar_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
433 switch (inst
->Instruction
.Saturate
) {
436 case TGSI_SAT_ZERO_ONE
:
437 alu
->alu
.scalar_clamp
= true;
439 case TGSI_SAT_MINUS_PLUS_ONE
:
440 DBG("unsupported saturate");
447 add_regs_vector_1(struct fd2_compile_context
*ctx
,
448 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
450 assert(inst
->Instruction
.NumSrcRegs
== 1);
451 assert(inst
->Instruction
.NumDstRegs
== 1);
453 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
454 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
455 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
456 add_vector_clamp(inst
, alu
);
460 add_regs_vector_2(struct fd2_compile_context
*ctx
,
461 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
463 assert(inst
->Instruction
.NumSrcRegs
== 2);
464 assert(inst
->Instruction
.NumDstRegs
== 1);
466 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
467 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
468 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
469 add_vector_clamp(inst
, alu
);
473 add_regs_vector_3(struct fd2_compile_context
*ctx
,
474 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
476 assert(inst
->Instruction
.NumSrcRegs
== 3);
477 assert(inst
->Instruction
.NumDstRegs
== 1);
479 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
480 /* maybe should re-arrange the syntax some day, but
481 * in assembler/disassembler and what ir.c expects
482 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
484 add_src_reg(ctx
, alu
, &inst
->Src
[2].Register
);
485 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
486 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
487 add_vector_clamp(inst
, alu
);
491 add_regs_dummy_vector(struct ir2_instruction
*alu
)
493 /* create dummy, non-written vector dst/src regs
494 * for unused vector instr slot:
496 ir2_reg_create(alu
, 0, "____", 0); /* vector dst */
497 ir2_reg_create(alu
, 0, NULL
, 0); /* vector src1 */
498 ir2_reg_create(alu
, 0, NULL
, 0); /* vector src2 */
502 add_regs_scalar_1(struct fd2_compile_context
*ctx
,
503 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
505 assert(inst
->Instruction
.NumSrcRegs
== 1);
506 assert(inst
->Instruction
.NumDstRegs
== 1);
508 add_regs_dummy_vector(alu
);
510 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
511 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
512 add_scalar_clamp(inst
, alu
);
516 * Helpers for TGSI instructions that don't map to a single shader instr:
520 src_from_dst(struct tgsi_src_register
*src
, struct tgsi_dst_register
*dst
)
522 src
->File
= dst
->File
;
523 src
->Indirect
= dst
->Indirect
;
524 src
->Dimension
= dst
->Dimension
;
525 src
->Index
= dst
->Index
;
528 src
->SwizzleX
= TGSI_SWIZZLE_X
;
529 src
->SwizzleY
= TGSI_SWIZZLE_Y
;
530 src
->SwizzleZ
= TGSI_SWIZZLE_Z
;
531 src
->SwizzleW
= TGSI_SWIZZLE_W
;
534 /* Get internal-temp src/dst to use for a sequence of instructions
535 * generated by a single TGSI op.
538 get_internal_temp(struct fd2_compile_context
*ctx
,
539 struct tgsi_dst_register
*tmp_dst
,
540 struct tgsi_src_register
*tmp_src
)
544 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
545 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
546 tmp_dst
->Indirect
= 0;
547 tmp_dst
->Dimension
= 0;
549 /* assign next temporary: */
550 n
= ctx
->num_internal_temps
++;
551 if (ctx
->pred_reg
!= -1)
554 tmp_dst
->Index
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
] + n
;
556 src_from_dst(tmp_src
, tmp_dst
);
560 get_predicate(struct fd2_compile_context
*ctx
, struct tgsi_dst_register
*dst
,
561 struct tgsi_src_register
*src
)
563 assert(ctx
->pred_reg
!= -1);
565 dst
->File
= TGSI_FILE_TEMPORARY
;
566 dst
->WriteMask
= TGSI_WRITEMASK_W
;
569 dst
->Index
= get_temp_gpr(ctx
, ctx
->pred_reg
);
572 src_from_dst(src
, dst
);
573 src
->SwizzleX
= TGSI_SWIZZLE_W
;
574 src
->SwizzleY
= TGSI_SWIZZLE_W
;
575 src
->SwizzleZ
= TGSI_SWIZZLE_W
;
576 src
->SwizzleW
= TGSI_SWIZZLE_W
;
581 push_predicate(struct fd2_compile_context
*ctx
, struct tgsi_src_register
*src
)
583 struct ir2_instruction
*alu
;
584 struct tgsi_dst_register pred_dst
;
586 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
591 if (ctx
->pred_depth
== 0) {
592 /* assign predicate register: */
593 ctx
->pred_reg
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
];
595 get_predicate(ctx
, &pred_dst
, NULL
);
597 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SETNEs
);
598 add_regs_dummy_vector(alu
);
599 add_dst_reg(ctx
, alu
, &pred_dst
);
600 add_src_reg(ctx
, alu
, src
);
602 struct tgsi_src_register pred_src
;
604 get_predicate(ctx
, &pred_dst
, &pred_src
);
606 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
607 add_dst_reg(ctx
, alu
, &pred_dst
);
608 add_src_reg(ctx
, alu
, &pred_src
);
609 add_src_reg(ctx
, alu
, src
);
611 // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
612 // sure src reg is valid if it was calculated with a predicate
614 alu
->pred
= IR2_PRED_NONE
;
617 /* save previous pred state to restore in pop_predicate(): */
618 ctx
->pred_stack
[ctx
->pred_depth
++] = ctx
->so
->ir
->pred
;
624 pop_predicate(struct fd2_compile_context
*ctx
)
626 /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
631 /* restore previous predicate state: */
632 ctx
->so
->ir
->pred
= ctx
->pred_stack
[--ctx
->pred_depth
];
634 if (ctx
->pred_depth
!= 0) {
635 struct ir2_instruction
*alu
;
636 struct tgsi_dst_register pred_dst
;
637 struct tgsi_src_register pred_src
;
639 get_predicate(ctx
, &pred_dst
, &pred_src
);
641 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, PRED_SET_POPs
);
642 add_regs_dummy_vector(alu
);
643 add_dst_reg(ctx
, alu
, &pred_dst
);
644 add_src_reg(ctx
, alu
, &pred_src
);
645 alu
->pred
= IR2_PRED_NONE
;
647 /* predicate register no longer needed: */
655 get_immediate(struct fd2_compile_context
*ctx
,
656 struct tgsi_src_register
*reg
, uint32_t val
)
658 unsigned neg
, swiz
, idx
, i
;
659 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
660 static const unsigned swiz2tgsi
[] = {
661 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
664 for (i
= 0; i
< ctx
->immediate_idx
; i
++) {
668 if (ctx
->so
->immediates
[idx
].val
[swiz
] == val
) {
673 if (ctx
->so
->immediates
[idx
].val
[swiz
] == -val
) {
679 if (i
== ctx
->immediate_idx
) {
680 /* need to generate a new immediate: */
684 ctx
->so
->immediates
[idx
].val
[swiz
] = val
;
685 ctx
->so
->num_immediates
= idx
+ 1;
686 ctx
->immediate_idx
++;
689 reg
->File
= TGSI_FILE_IMMEDIATE
;
695 reg
->SwizzleX
= swiz2tgsi
[swiz
];
696 reg
->SwizzleY
= swiz2tgsi
[swiz
];
697 reg
->SwizzleZ
= swiz2tgsi
[swiz
];
698 reg
->SwizzleW
= swiz2tgsi
[swiz
];
701 /* POW(a,b) = EXP2(b * LOG2(a)) */
703 translate_pow(struct fd2_compile_context
*ctx
,
704 struct tgsi_full_instruction
*inst
)
706 struct tgsi_dst_register tmp_dst
;
707 struct tgsi_src_register tmp_src
;
708 struct ir2_instruction
*alu
;
710 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
712 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, LOG_CLAMP
);
713 add_regs_dummy_vector(alu
);
714 add_dst_reg(ctx
, alu
, &tmp_dst
);
715 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
717 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
718 add_dst_reg(ctx
, alu
, &tmp_dst
);
719 add_src_reg(ctx
, alu
, &tmp_src
);
720 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
722 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
723 * coded to take their input from the w component.
725 switch(inst
->Dst
[0].Register
.WriteMask
) {
726 case TGSI_WRITEMASK_X
:
727 tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
729 case TGSI_WRITEMASK_Y
:
730 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Y
;
732 case TGSI_WRITEMASK_Z
:
733 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Z
;
735 case TGSI_WRITEMASK_W
:
736 tmp_src
.SwizzleW
= TGSI_SWIZZLE_W
;
739 DBG("invalid writemask!");
744 alu
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, EXP_IEEE
);
745 add_regs_dummy_vector(alu
);
746 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
747 add_src_reg(ctx
, alu
, &tmp_src
);
748 add_scalar_clamp(inst
, alu
);
752 translate_tex(struct fd2_compile_context
*ctx
,
753 struct tgsi_full_instruction
*inst
, unsigned opc
)
755 struct ir2_instruction
*instr
;
756 struct ir2_register
*reg
;
757 struct tgsi_dst_register tmp_dst
;
758 struct tgsi_src_register tmp_src
;
759 const struct tgsi_src_register
*coord
;
760 bool using_temp
= (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) ||
761 (inst
->Instruction
.Saturate
!= TGSI_SAT_NONE
);
764 if (using_temp
|| (opc
== TGSI_OPCODE_TXP
))
765 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
767 if (opc
== TGSI_OPCODE_TXP
) {
768 static const char *swiz
[] = {
769 [TGSI_SWIZZLE_X
] = "xxxx",
770 [TGSI_SWIZZLE_Y
] = "yyyy",
771 [TGSI_SWIZZLE_Z
] = "zzzz",
772 [TGSI_SWIZZLE_W
] = "wwww",
775 /* TXP - Projective Texture Lookup:
777 * coord.x = src0.x / src.w
778 * coord.y = src0.y / src.w
779 * coord.z = src0.z / src.w
783 * dst = texture_sample(unit, coord, bias)
785 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MAXv
, RECIP_IEEE
);
788 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "___w";
789 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
790 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
793 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "x___";
794 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->swizzle
=
795 swiz
[inst
->Src
[0].Register
.SwizzleW
];
797 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
798 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "xyz_";
799 add_src_reg(ctx
, instr
, &tmp_src
)->swizzle
= "xxxx";
800 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
804 coord
= &inst
->Src
[0].Register
;
807 instr
= ir2_instr_create(next_exec_cf(ctx
), IR2_FETCH
);
808 instr
->fetch
.opc
= TEX_FETCH
;
809 instr
->fetch
.is_cube
= (inst
->Texture
.Texture
== TGSI_TEXTURE_3D
);
810 assert(inst
->Texture
.NumOffsets
<= 1); // TODO what to do in other cases?
812 /* save off the tex fetch to be patched later with correct const_idx: */
813 idx
= ctx
->so
->num_tfetch_instrs
++;
814 ctx
->so
->tfetch_instrs
[idx
].samp_id
= inst
->Src
[1].Register
.Index
;
815 ctx
->so
->tfetch_instrs
[idx
].instr
= instr
;
817 add_dst_reg(ctx
, instr
, using_temp
? &tmp_dst
: &inst
->Dst
[0].Register
);
818 reg
= add_src_reg(ctx
, instr
, coord
);
820 /* blob compiler always sets 3rd component to same as 1st for 2d: */
821 if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D
)
822 reg
->swizzle
[2] = reg
->swizzle
[0];
824 /* dst register needs to be marked for sync: */
825 ctx
->need_sync
|= 1 << instr
->regs
[0]->num
;
827 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
831 /* texture fetch can't write directly to export, so if tgsi
832 * is telling us the dst register is in output file, we load
833 * the texture to a temp and the use ALU instruction to move
836 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MAXv
, ~0);
838 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
839 add_src_reg(ctx
, instr
, &tmp_src
);
840 add_src_reg(ctx
, instr
, &tmp_src
);
841 add_vector_clamp(inst
, instr
);
845 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
846 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
848 translate_sge_slt(struct fd2_compile_context
*ctx
,
849 struct tgsi_full_instruction
*inst
, unsigned opc
)
851 struct ir2_instruction
*instr
;
852 struct tgsi_dst_register tmp_dst
;
853 struct tgsi_src_register tmp_src
;
854 struct tgsi_src_register tmp_const
;
860 case TGSI_OPCODE_SGE
:
864 case TGSI_OPCODE_SLT
:
870 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
872 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
873 add_dst_reg(ctx
, instr
, &tmp_dst
);
874 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
875 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
877 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), CNDGTEv
, ~0);
878 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
879 /* maybe should re-arrange the syntax some day, but
880 * in assembler/disassembler and what ir.c expects
881 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
883 get_immediate(ctx
, &tmp_const
, fui(c0
));
884 add_src_reg(ctx
, instr
, &tmp_const
);
885 add_src_reg(ctx
, instr
, &tmp_src
);
886 get_immediate(ctx
, &tmp_const
, fui(c1
));
887 add_src_reg(ctx
, instr
, &tmp_const
);
890 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
892 translate_lrp(struct fd2_compile_context
*ctx
,
893 struct tgsi_full_instruction
*inst
,
896 struct ir2_instruction
*instr
;
897 struct tgsi_dst_register tmp_dst1
, tmp_dst2
;
898 struct tgsi_src_register tmp_src1
, tmp_src2
;
899 struct tgsi_src_register tmp_const
;
901 get_internal_temp(ctx
, &tmp_dst1
, &tmp_src1
);
902 get_internal_temp(ctx
, &tmp_dst2
, &tmp_src2
);
904 get_immediate(ctx
, &tmp_const
, fui(1.0));
907 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
908 add_dst_reg(ctx
, instr
, &tmp_dst1
);
909 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
910 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
913 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
914 add_dst_reg(ctx
, instr
, &tmp_dst2
);
915 add_src_reg(ctx
, instr
, &tmp_const
);
916 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
918 /* tmp2 = tmp2 * c */
919 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULv
, ~0);
920 add_dst_reg(ctx
, instr
, &tmp_dst2
);
921 add_src_reg(ctx
, instr
, &tmp_src2
);
922 add_src_reg(ctx
, instr
, &inst
->Src
[2].Register
);
924 /* dst = tmp1 + tmp2 */
925 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ADDv
, ~0);
926 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
927 add_src_reg(ctx
, instr
, &tmp_src1
);
928 add_src_reg(ctx
, instr
, &tmp_src2
);
932 translate_trig(struct fd2_compile_context
*ctx
,
933 struct tgsi_full_instruction
*inst
,
936 struct ir2_instruction
*instr
;
937 struct tgsi_dst_register tmp_dst
;
938 struct tgsi_src_register tmp_src
;
939 struct tgsi_src_register tmp_const
;
940 instr_scalar_opc_t op
;
945 case TGSI_OPCODE_SIN
:
948 case TGSI_OPCODE_COS
:
953 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
955 tmp_dst
.WriteMask
= TGSI_WRITEMASK_X
;
956 tmp_src
.SwizzleX
= tmp_src
.SwizzleY
=
957 tmp_src
.SwizzleZ
= tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
959 /* maybe should re-arrange the syntax some day, but
960 * in assembler/disassembler and what ir.c expects
961 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
963 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
964 add_dst_reg(ctx
, instr
, &tmp_dst
);
965 get_immediate(ctx
, &tmp_const
, fui(0.5));
966 add_src_reg(ctx
, instr
, &tmp_const
);
967 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
968 get_immediate(ctx
, &tmp_const
, fui(0.159155));
969 add_src_reg(ctx
, instr
, &tmp_const
);
971 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), FRACv
, ~0);
972 add_dst_reg(ctx
, instr
, &tmp_dst
);
973 add_src_reg(ctx
, instr
, &tmp_src
);
974 add_src_reg(ctx
, instr
, &tmp_src
);
976 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), MULADDv
, ~0);
977 add_dst_reg(ctx
, instr
, &tmp_dst
);
978 get_immediate(ctx
, &tmp_const
, fui(-3.141593));
979 add_src_reg(ctx
, instr
, &tmp_const
);
980 add_src_reg(ctx
, instr
, &tmp_src
);
981 get_immediate(ctx
, &tmp_const
, fui(6.283185));
982 add_src_reg(ctx
, instr
, &tmp_const
);
984 instr
= ir2_instr_create_alu(next_exec_cf(ctx
), ~0, op
);
985 add_regs_dummy_vector(instr
);
986 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
987 add_src_reg(ctx
, instr
, &tmp_src
);
991 * Main part of compiler/translator:
995 translate_instruction(struct fd2_compile_context
*ctx
,
996 struct tgsi_full_instruction
*inst
)
998 unsigned opc
= inst
->Instruction
.Opcode
;
999 struct ir2_instruction
*instr
;
1000 static struct ir2_cf
*cf
;
1002 if (opc
== TGSI_OPCODE_END
)
1005 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
1006 unsigned num
= inst
->Dst
[0].Register
.Index
;
1007 /* seems like we need to ensure that position vs param/pixel
1008 * exports don't end up in the same EXEC clause.. easy way
1009 * to do this is force a new EXEC clause on first appearance
1010 * of an position or param/pixel export.
1012 if ((num
== ctx
->position
) || (num
== ctx
->psize
)) {
1013 if (ctx
->num_position
> 0) {
1015 ir2_cf_create_alloc(ctx
->so
->ir
, SQ_POSITION
,
1016 ctx
->num_position
- 1);
1017 ctx
->num_position
= 0;
1020 if (ctx
->num_param
> 0) {
1022 ir2_cf_create_alloc(ctx
->so
->ir
, SQ_PARAMETER_PIXEL
,
1023 ctx
->num_param
- 1);
1029 cf
= next_exec_cf(ctx
);
1031 /* TODO turn this into a table: */
1033 case TGSI_OPCODE_MOV
:
1034 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1035 add_regs_vector_1(ctx
, inst
, instr
);
1037 case TGSI_OPCODE_RCP
:
1038 instr
= ir2_instr_create_alu(cf
, ~0, RECIP_IEEE
);
1039 add_regs_scalar_1(ctx
, inst
, instr
);
1041 case TGSI_OPCODE_RSQ
:
1042 instr
= ir2_instr_create_alu(cf
, ~0, RECIPSQ_IEEE
);
1043 add_regs_scalar_1(ctx
, inst
, instr
);
1045 case TGSI_OPCODE_SQRT
:
1046 instr
= ir2_instr_create_alu(cf
, ~0, SQRT_IEEE
);
1047 add_regs_scalar_1(ctx
, inst
, instr
);
1049 case TGSI_OPCODE_MUL
:
1050 instr
= ir2_instr_create_alu(cf
, MULv
, ~0);
1051 add_regs_vector_2(ctx
, inst
, instr
);
1053 case TGSI_OPCODE_ADD
:
1054 instr
= ir2_instr_create_alu(cf
, ADDv
, ~0);
1055 add_regs_vector_2(ctx
, inst
, instr
);
1057 case TGSI_OPCODE_DP3
:
1058 instr
= ir2_instr_create_alu(cf
, DOT3v
, ~0);
1059 add_regs_vector_2(ctx
, inst
, instr
);
1061 case TGSI_OPCODE_DP4
:
1062 instr
= ir2_instr_create_alu(cf
, DOT4v
, ~0);
1063 add_regs_vector_2(ctx
, inst
, instr
);
1065 case TGSI_OPCODE_MIN
:
1066 instr
= ir2_instr_create_alu(cf
, MINv
, ~0);
1067 add_regs_vector_2(ctx
, inst
, instr
);
1069 case TGSI_OPCODE_MAX
:
1070 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1071 add_regs_vector_2(ctx
, inst
, instr
);
1073 case TGSI_OPCODE_SLT
:
1074 case TGSI_OPCODE_SGE
:
1075 translate_sge_slt(ctx
, inst
, opc
);
1077 case TGSI_OPCODE_MAD
:
1078 instr
= ir2_instr_create_alu(cf
, MULADDv
, ~0);
1079 add_regs_vector_3(ctx
, inst
, instr
);
1081 case TGSI_OPCODE_LRP
:
1082 translate_lrp(ctx
, inst
, opc
);
1084 case TGSI_OPCODE_FRC
:
1085 instr
= ir2_instr_create_alu(cf
, FRACv
, ~0);
1086 add_regs_vector_1(ctx
, inst
, instr
);
1088 case TGSI_OPCODE_FLR
:
1089 instr
= ir2_instr_create_alu(cf
, FLOORv
, ~0);
1090 add_regs_vector_1(ctx
, inst
, instr
);
1092 case TGSI_OPCODE_EX2
:
1093 instr
= ir2_instr_create_alu(cf
, ~0, EXP_IEEE
);
1094 add_regs_scalar_1(ctx
, inst
, instr
);
1096 case TGSI_OPCODE_POW
:
1097 translate_pow(ctx
, inst
);
1099 case TGSI_OPCODE_ABS
:
1100 instr
= ir2_instr_create_alu(cf
, MAXv
, ~0);
1101 add_regs_vector_1(ctx
, inst
, instr
);
1102 instr
->regs
[1]->flags
|= IR2_REG_NEGATE
; /* src0 */
1104 case TGSI_OPCODE_COS
:
1105 case TGSI_OPCODE_SIN
:
1106 translate_trig(ctx
, inst
, opc
);
1108 case TGSI_OPCODE_TEX
:
1109 case TGSI_OPCODE_TXP
:
1110 translate_tex(ctx
, inst
, opc
);
1112 case TGSI_OPCODE_CMP
:
1113 instr
= ir2_instr_create_alu(cf
, CNDGTEv
, ~0);
1114 add_regs_vector_3(ctx
, inst
, instr
);
1115 // TODO this should be src0 if regs where in sane order..
1116 instr
->regs
[2]->flags
^= IR2_REG_NEGATE
; /* src1 */
1118 case TGSI_OPCODE_IF
:
1119 push_predicate(ctx
, &inst
->Src
[0].Register
);
1120 ctx
->so
->ir
->pred
= IR2_PRED_EQ
;
1122 case TGSI_OPCODE_ELSE
:
1123 ctx
->so
->ir
->pred
= IR2_PRED_NE
;
1124 /* not sure if this is required in all cases, but blob compiler
1125 * won't combine EQ and NE in same CF:
1129 case TGSI_OPCODE_ENDIF
:
1132 case TGSI_OPCODE_F2I
:
1133 instr
= ir2_instr_create_alu(cf
, TRUNCv
, ~0);
1134 add_regs_vector_1(ctx
, inst
, instr
);
1137 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc
));
1138 tgsi_dump(ctx
->so
->tokens
, 0);
1143 /* internal temporaries are only valid for the duration of a single
1146 ctx
->num_internal_temps
= 0;
1150 compile_instructions(struct fd2_compile_context
*ctx
)
1152 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
1153 tgsi_parse_token(&ctx
->parser
);
1155 switch (ctx
->parser
.FullToken
.Token
.Type
) {
1156 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1157 translate_instruction(ctx
,
1158 &ctx
->parser
.FullToken
.FullInstruction
);
1165 ctx
->cf
->cf_type
= EXEC_END
;
1169 fd2_compile_shader(struct fd_program_stateobj
*prog
,
1170 struct fd2_shader_stateobj
*so
)
1172 struct fd2_compile_context ctx
;
1174 ir2_shader_destroy(so
->ir
);
1175 so
->ir
= ir2_shader_create();
1176 so
->num_vfetch_instrs
= so
->num_tfetch_instrs
= so
->num_immediates
= 0;
1178 if (compile_init(&ctx
, prog
, so
) != TGSI_PARSE_OK
)
1181 if (ctx
.type
== TGSI_PROCESSOR_VERTEX
) {
1182 compile_vtx_fetch(&ctx
);
1183 } else if (ctx
.type
== TGSI_PROCESSOR_FRAGMENT
) {
1184 prog
->num_exports
= 0;
1185 memset(prog
->export_linkage
, 0xff,
1186 sizeof(prog
->export_linkage
));
1189 compile_instructions(&ctx
);