1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
39 #include "fd2_compiler.h"
40 #include "fd2_program.h"
43 #include "instr-a2xx.h"
46 struct fd2_compile_context
{
47 struct fd_program_stateobj
*prog
;
48 struct fd2_shader_stateobj
*so
;
50 struct tgsi_parse_context parser
;
53 /* predicate stack: */
55 enum ir2_pred pred_stack
[8];
57 /* Internal-Temporary and Predicate register assignment:
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers, which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * And some instructions (texture fetch) cannot write directly to
63 * output registers. We could be more clever and re-use dst or a
64 * src register in some cases. But for now don't try to be clever.
65 * Eventually we should implement an optimization pass that re-
66 * juggles the register usage and gets rid of unneeded temporaries.
68 * The predicate register must be valid across multiple TGSI
69 * instructions, but internal temporary's do not. For this reason,
70 * once the predicate register is requested, until it is no longer
71 * needed, it gets the first register slot after after the TGSI
72 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
73 * internal temporaries get the register slots above this.
77 int num_internal_temps
;
79 uint8_t num_regs
[TGSI_FILE_COUNT
];
81 /* maps input register idx to prog->export_linkage idx: */
82 uint8_t input_export_idx
[64];
84 /* maps output register idx to prog->export_linkage idx: */
85 uint8_t output_export_idx
[64];
87 /* idx/slot for last compiler generated immediate */
88 unsigned immediate_idx
;
90 // TODO we can skip emit exports in the VS that the FS doesn't need..
91 // and get rid perhaps of num_param..
92 unsigned num_position
, num_param
;
93 unsigned position
, psize
;
99 semantic_idx(struct tgsi_declaration_semantic
*semantic
)
101 int idx
= semantic
->Name
;
102 if (idx
== TGSI_SEMANTIC_GENERIC
)
103 idx
= TGSI_SEMANTIC_COUNT
+ semantic
->Index
;
107 /* assign/get the input/export register # for given semantic idx as
108 * returned by semantic_idx():
111 export_linkage(struct fd2_compile_context
*ctx
, int idx
)
113 struct fd_program_stateobj
*prog
= ctx
->prog
;
115 /* if first time we've seen this export, assign the next available slot: */
116 if (prog
->export_linkage
[idx
] == 0xff)
117 prog
->export_linkage
[idx
] = prog
->num_exports
++;
119 return prog
->export_linkage
[idx
];
123 compile_init(struct fd2_compile_context
*ctx
, struct fd_program_stateobj
*prog
,
124 struct fd2_shader_stateobj
*so
)
132 ret
= tgsi_parse_init(&ctx
->parser
, so
->tokens
);
133 if (ret
!= TGSI_PARSE_OK
)
136 ctx
->type
= ctx
->parser
.FullHeader
.Processor
.Processor
;
139 ctx
->num_position
= 0;
142 ctx
->immediate_idx
= 0;
144 ctx
->num_internal_temps
= 0;
146 memset(ctx
->num_regs
, 0, sizeof(ctx
->num_regs
));
147 memset(ctx
->input_export_idx
, 0, sizeof(ctx
->input_export_idx
));
148 memset(ctx
->output_export_idx
, 0, sizeof(ctx
->output_export_idx
));
150 /* do first pass to extract declarations: */
151 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
152 tgsi_parse_token(&ctx
->parser
);
154 switch (ctx
->parser
.FullToken
.Token
.Type
) {
155 case TGSI_TOKEN_TYPE_DECLARATION
: {
156 struct tgsi_full_declaration
*decl
=
157 &ctx
->parser
.FullToken
.FullDeclaration
;
158 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
) {
159 unsigned name
= decl
->Semantic
.Name
;
161 assert(decl
->Declaration
.Semantic
); // TODO is this ever not true?
163 ctx
->output_export_idx
[decl
->Range
.First
] =
164 semantic_idx(&decl
->Semantic
);
166 if (ctx
->type
== PIPE_SHADER_VERTEX
) {
168 case TGSI_SEMANTIC_POSITION
:
169 ctx
->position
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
172 case TGSI_SEMANTIC_PSIZE
:
173 ctx
->psize
= ctx
->num_regs
[TGSI_FILE_OUTPUT
];
176 case TGSI_SEMANTIC_COLOR
:
177 case TGSI_SEMANTIC_GENERIC
:
181 DBG("unknown VS semantic name: %s",
182 tgsi_semantic_names
[name
]);
187 case TGSI_SEMANTIC_COLOR
:
188 case TGSI_SEMANTIC_GENERIC
:
192 DBG("unknown PS semantic name: %s",
193 tgsi_semantic_names
[name
]);
197 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
198 ctx
->input_export_idx
[decl
->Range
.First
] =
199 semantic_idx(&decl
->Semantic
);
201 ctx
->num_regs
[decl
->Declaration
.File
] =
202 MAX2(ctx
->num_regs
[decl
->Declaration
.File
], decl
->Range
.Last
+ 1);
205 case TGSI_TOKEN_TYPE_IMMEDIATE
: {
206 struct tgsi_full_immediate
*imm
=
207 &ctx
->parser
.FullToken
.FullImmediate
;
208 unsigned n
= ctx
->so
->num_immediates
++;
209 memcpy(ctx
->so
->immediates
[n
].val
, imm
->u
, 16);
217 /* TGSI generated immediates are always entire vec4's, ones we
218 * generate internally are not:
220 ctx
->immediate_idx
= ctx
->so
->num_immediates
* 4;
222 ctx
->so
->first_immediate
= ctx
->num_regs
[TGSI_FILE_CONSTANT
];
224 tgsi_parse_free(&ctx
->parser
);
226 return tgsi_parse_init(&ctx
->parser
, so
->tokens
);
230 compile_free(struct fd2_compile_context
*ctx
)
232 tgsi_parse_free(&ctx
->parser
);
236 compile_vtx_fetch(struct fd2_compile_context
*ctx
)
238 struct ir2_instruction
**vfetch_instrs
= ctx
->so
->vfetch_instrs
;
240 for (i
= 0; i
< ctx
->num_regs
[TGSI_FILE_INPUT
]; i
++) {
241 struct ir2_instruction
*instr
= ir2_instr_create(
242 ctx
->so
->ir
, IR2_FETCH
);
243 instr
->fetch
.opc
= VTX_FETCH
;
245 ctx
->need_sync
|= 1 << (i
+1);
247 ir2_dst_create(instr
, i
+1, "xyzw", 0);
248 ir2_reg_create(instr
, 0, "x", IR2_REG_INPUT
);
253 vfetch_instrs
[i
] = instr
;
255 ctx
->so
->num_vfetch_instrs
= i
;
259 * For vertex shaders (VS):
260 * --- ------ -------------
262 * Inputs: R1-R(num_input)
263 * Constants: C0-C(num_const-1)
264 * Immediates: C(num_const)-C(num_const+num_imm-1)
265 * Outputs: export0-export(n) and export62, export63
266 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
267 * Temps: R(num_input+1)-R(num_input+num_temps)
269 * R0 could be clobbered after the vertex fetch instructions.. so we
270 * could use it for one of the temporaries.
272 * TODO: maybe the vertex fetch part could fetch first input into R0 as
273 * the last vtx fetch instruction, which would let us use the same
274 * register layout in either case.. although this is not what the blob
278 * For frag shaders (PS):
279 * --- ---- -------------
281 * Inputs: R0-R(num_input-1)
282 * Constants: same as VS
283 * Immediates: same as VS
284 * Outputs: export0-export(num_outputs)
285 * Temps: R(num_input)-R(num_input+num_temps-1)
287 * In either case, immediates are are postpended to the constants
293 get_temp_gpr(struct fd2_compile_context
*ctx
, int idx
)
295 unsigned num
= idx
+ ctx
->num_regs
[TGSI_FILE_INPUT
];
296 if (ctx
->type
== PIPE_SHADER_VERTEX
)
301 static struct ir2_dst_register
*
302 add_dst_reg(struct fd2_compile_context
*ctx
, struct ir2_instruction
*alu
,
303 const struct tgsi_dst_register
*dst
)
305 unsigned flags
= 0, num
= 0;
309 case TGSI_FILE_OUTPUT
:
310 flags
|= IR2_REG_EXPORT
;
311 if (ctx
->type
== PIPE_SHADER_VERTEX
) {
312 if (dst
->Index
== ctx
->position
) {
314 } else if (dst
->Index
== ctx
->psize
) {
317 num
= export_linkage(ctx
,
318 ctx
->output_export_idx
[dst
->Index
]);
324 case TGSI_FILE_TEMPORARY
:
325 num
= get_temp_gpr(ctx
, dst
->Index
);
328 DBG("unsupported dst register file: %s",
329 tgsi_file_name(dst
->File
));
334 swiz
[0] = (dst
->WriteMask
& TGSI_WRITEMASK_X
) ? 'x' : '_';
335 swiz
[1] = (dst
->WriteMask
& TGSI_WRITEMASK_Y
) ? 'y' : '_';
336 swiz
[2] = (dst
->WriteMask
& TGSI_WRITEMASK_Z
) ? 'z' : '_';
337 swiz
[3] = (dst
->WriteMask
& TGSI_WRITEMASK_W
) ? 'w' : '_';
340 return ir2_dst_create(alu
, num
, swiz
, flags
);
343 static struct ir2_src_register
*
344 add_src_reg(struct fd2_compile_context
*ctx
, struct ir2_instruction
*alu
,
345 const struct tgsi_src_register
*src
)
347 static const char swiz_vals
[] = {
351 unsigned flags
= 0, num
= 0;
354 case TGSI_FILE_CONSTANT
:
356 flags
|= IR2_REG_CONST
;
358 case TGSI_FILE_INPUT
:
359 if (ctx
->type
== PIPE_SHADER_VERTEX
) {
360 num
= src
->Index
+ 1;
362 flags
|= IR2_REG_INPUT
;
363 num
= export_linkage(ctx
,
364 ctx
->input_export_idx
[src
->Index
]);
367 case TGSI_FILE_TEMPORARY
:
368 num
= get_temp_gpr(ctx
, src
->Index
);
370 case TGSI_FILE_IMMEDIATE
:
371 num
= src
->Index
+ ctx
->num_regs
[TGSI_FILE_CONSTANT
];
372 flags
|= IR2_REG_CONST
;
375 DBG("unsupported src register file: %s",
376 tgsi_file_name(src
->File
));
382 flags
|= IR2_REG_ABS
;
384 flags
|= IR2_REG_NEGATE
;
386 swiz
[0] = swiz_vals
[src
->SwizzleX
];
387 swiz
[1] = swiz_vals
[src
->SwizzleY
];
388 swiz
[2] = swiz_vals
[src
->SwizzleZ
];
389 swiz
[3] = swiz_vals
[src
->SwizzleW
];
392 if ((ctx
->need_sync
& ((uint64_t)1 << num
)) &&
393 !(flags
& IR2_REG_CONST
)) {
395 ctx
->need_sync
&= ~((uint64_t)1 << num
);
398 return ir2_reg_create(alu
, num
, swiz
, flags
);
402 add_vector_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
404 if (inst
->Instruction
.Saturate
) {
405 alu
->alu_vector
.clamp
= true;
410 add_scalar_clamp(struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
412 if (inst
->Instruction
.Saturate
) {
413 alu
->alu_scalar
.clamp
= true;
418 add_regs_vector_1(struct fd2_compile_context
*ctx
,
419 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
421 assert(inst
->Instruction
.NumSrcRegs
== 1);
422 assert(inst
->Instruction
.NumDstRegs
== 1);
424 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
425 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
426 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
427 add_vector_clamp(inst
, alu
);
431 add_regs_vector_2(struct fd2_compile_context
*ctx
,
432 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
434 assert(inst
->Instruction
.NumSrcRegs
== 2);
435 assert(inst
->Instruction
.NumDstRegs
== 1);
437 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
438 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
439 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
440 add_vector_clamp(inst
, alu
);
444 add_regs_vector_3(struct fd2_compile_context
*ctx
,
445 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
447 assert(inst
->Instruction
.NumSrcRegs
== 3);
448 assert(inst
->Instruction
.NumDstRegs
== 1);
450 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
451 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
452 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
453 add_src_reg(ctx
, alu
, &inst
->Src
[2].Register
);
454 add_vector_clamp(inst
, alu
);
458 add_regs_scalar_1(struct fd2_compile_context
*ctx
,
459 struct tgsi_full_instruction
*inst
, struct ir2_instruction
*alu
)
461 assert(inst
->Instruction
.NumSrcRegs
== 1);
462 assert(inst
->Instruction
.NumDstRegs
== 1);
464 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
465 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
466 add_scalar_clamp(inst
, alu
);
470 * Helpers for TGSI instructions that don't map to a single shader instr:
474 src_from_dst(struct tgsi_src_register
*src
, struct tgsi_dst_register
*dst
)
476 src
->File
= dst
->File
;
477 src
->Indirect
= dst
->Indirect
;
478 src
->Dimension
= dst
->Dimension
;
479 src
->Index
= dst
->Index
;
482 src
->SwizzleX
= TGSI_SWIZZLE_X
;
483 src
->SwizzleY
= TGSI_SWIZZLE_Y
;
484 src
->SwizzleZ
= TGSI_SWIZZLE_Z
;
485 src
->SwizzleW
= TGSI_SWIZZLE_W
;
488 /* Get internal-temp src/dst to use for a sequence of instructions
489 * generated by a single TGSI op.
492 get_internal_temp(struct fd2_compile_context
*ctx
,
493 struct tgsi_dst_register
*tmp_dst
,
494 struct tgsi_src_register
*tmp_src
)
498 tmp_dst
->File
= TGSI_FILE_TEMPORARY
;
499 tmp_dst
->WriteMask
= TGSI_WRITEMASK_XYZW
;
500 tmp_dst
->Indirect
= 0;
501 tmp_dst
->Dimension
= 0;
503 /* assign next temporary: */
504 n
= ctx
->num_internal_temps
++;
505 if (ctx
->pred_reg
!= -1)
508 tmp_dst
->Index
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
] + n
;
510 src_from_dst(tmp_src
, tmp_dst
);
514 get_predicate(struct fd2_compile_context
*ctx
, struct tgsi_dst_register
*dst
,
515 struct tgsi_src_register
*src
)
517 assert(ctx
->pred_reg
!= -1);
519 dst
->File
= TGSI_FILE_TEMPORARY
;
520 dst
->WriteMask
= TGSI_WRITEMASK_W
;
523 dst
->Index
= get_temp_gpr(ctx
, ctx
->pred_reg
);
526 src_from_dst(src
, dst
);
527 src
->SwizzleX
= TGSI_SWIZZLE_W
;
528 src
->SwizzleY
= TGSI_SWIZZLE_W
;
529 src
->SwizzleZ
= TGSI_SWIZZLE_W
;
530 src
->SwizzleW
= TGSI_SWIZZLE_W
;
535 push_predicate(struct fd2_compile_context
*ctx
, struct tgsi_src_register
*src
)
537 struct ir2_instruction
*alu
;
538 struct tgsi_dst_register pred_dst
;
540 if (ctx
->pred_depth
== 0) {
541 /* assign predicate register: */
542 ctx
->pred_reg
= ctx
->num_regs
[TGSI_FILE_TEMPORARY
];
544 get_predicate(ctx
, &pred_dst
, NULL
);
546 alu
= ir2_instr_create_alu_s(ctx
->so
->ir
, PRED_SETNEs
);
547 add_dst_reg(ctx
, alu
, &pred_dst
);
548 add_src_reg(ctx
, alu
, src
);
550 struct tgsi_src_register pred_src
;
552 get_predicate(ctx
, &pred_dst
, &pred_src
);
554 alu
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
555 add_dst_reg(ctx
, alu
, &pred_dst
);
556 add_src_reg(ctx
, alu
, &pred_src
);
557 add_src_reg(ctx
, alu
, src
);
559 // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
560 // sure src reg is valid if it was calculated with a predicate
562 alu
->pred
= IR2_PRED_NONE
;
565 /* save previous pred state to restore in pop_predicate(): */
566 ctx
->pred_stack
[ctx
->pred_depth
++] = ctx
->so
->ir
->pred
;
570 pop_predicate(struct fd2_compile_context
*ctx
)
572 /* restore previous predicate state: */
573 ctx
->so
->ir
->pred
= ctx
->pred_stack
[--ctx
->pred_depth
];
575 if (ctx
->pred_depth
!= 0) {
576 struct ir2_instruction
*alu
;
577 struct tgsi_dst_register pred_dst
;
578 struct tgsi_src_register pred_src
;
580 get_predicate(ctx
, &pred_dst
, &pred_src
);
582 alu
= ir2_instr_create_alu_s(ctx
->so
->ir
, PRED_SET_POPs
);
583 add_dst_reg(ctx
, alu
, &pred_dst
);
584 add_src_reg(ctx
, alu
, &pred_src
);
585 alu
->pred
= IR2_PRED_NONE
;
587 /* predicate register no longer needed: */
593 get_immediate(struct fd2_compile_context
*ctx
,
594 struct tgsi_src_register
*reg
, uint32_t val
)
596 unsigned neg
, swiz
, idx
, i
;
597 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
598 static const unsigned swiz2tgsi
[] = {
599 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
602 for (i
= 0; i
< ctx
->immediate_idx
; i
++) {
606 if (ctx
->so
->immediates
[idx
].val
[swiz
] == val
) {
611 if (ctx
->so
->immediates
[idx
].val
[swiz
] == -val
) {
617 if (i
== ctx
->immediate_idx
) {
618 /* need to generate a new immediate: */
622 ctx
->so
->immediates
[idx
].val
[swiz
] = val
;
623 ctx
->so
->num_immediates
= idx
+ 1;
624 ctx
->immediate_idx
++;
627 reg
->File
= TGSI_FILE_IMMEDIATE
;
633 reg
->SwizzleX
= swiz2tgsi
[swiz
];
634 reg
->SwizzleY
= swiz2tgsi
[swiz
];
635 reg
->SwizzleZ
= swiz2tgsi
[swiz
];
636 reg
->SwizzleW
= swiz2tgsi
[swiz
];
639 /* POW(a,b) = EXP2(b * LOG2(a)) */
641 translate_pow(struct fd2_compile_context
*ctx
,
642 struct tgsi_full_instruction
*inst
)
644 struct tgsi_dst_register tmp_dst
;
645 struct tgsi_src_register tmp_src
;
646 struct ir2_instruction
*alu
;
648 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
650 alu
= ir2_instr_create_alu_s(ctx
->so
->ir
, LOG_CLAMP
);
651 add_dst_reg(ctx
, alu
, &tmp_dst
);
652 add_src_reg(ctx
, alu
, &inst
->Src
[0].Register
);
654 alu
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
655 add_dst_reg(ctx
, alu
, &tmp_dst
);
656 add_src_reg(ctx
, alu
, &tmp_src
);
657 add_src_reg(ctx
, alu
, &inst
->Src
[1].Register
);
659 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
660 * coded to take their input from the w component.
662 switch(inst
->Dst
[0].Register
.WriteMask
) {
663 case TGSI_WRITEMASK_X
:
664 tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
666 case TGSI_WRITEMASK_Y
:
667 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Y
;
669 case TGSI_WRITEMASK_Z
:
670 tmp_src
.SwizzleW
= TGSI_SWIZZLE_Z
;
672 case TGSI_WRITEMASK_W
:
673 tmp_src
.SwizzleW
= TGSI_SWIZZLE_W
;
676 DBG("invalid writemask!");
681 alu
= ir2_instr_create_alu_s(ctx
->so
->ir
, EXP_IEEE
);
682 add_dst_reg(ctx
, alu
, &inst
->Dst
[0].Register
);
683 add_src_reg(ctx
, alu
, &tmp_src
);
684 add_scalar_clamp(inst
, alu
);
688 translate_tex(struct fd2_compile_context
*ctx
,
689 struct tgsi_full_instruction
*inst
, unsigned opc
)
691 struct ir2_instruction
*instr
;
692 struct ir2_src_register
*reg
;
693 struct tgsi_dst_register tmp_dst
;
694 struct tgsi_src_register tmp_src
;
695 const struct tgsi_src_register
*coord
;
696 bool using_temp
= (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) ||
697 inst
->Instruction
.Saturate
;
700 if (using_temp
|| (opc
== TGSI_OPCODE_TXP
))
701 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
703 if (opc
== TGSI_OPCODE_TXP
) {
704 static const char *swiz
[] = {
705 [TGSI_SWIZZLE_X
] = "xxxx",
706 [TGSI_SWIZZLE_Y
] = "yyyy",
707 [TGSI_SWIZZLE_Z
] = "zzzz",
708 [TGSI_SWIZZLE_W
] = "wwww",
711 /* TXP - Projective Texture Lookup:
713 * coord.x = src0.x / src.w
714 * coord.y = src0.y / src.w
715 * coord.z = src0.z / src.w
719 * dst = texture_sample(unit, coord, bias)
722 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MAXv
);
723 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "___w";
724 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
725 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
727 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, RECIP_IEEE
);
728 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "x___";
729 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->swizzle
=
730 swiz
[inst
->Src
[0].Register
.SwizzleW
];
732 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
733 add_dst_reg(ctx
, instr
, &tmp_dst
)->swizzle
= "xyz_";
734 add_src_reg(ctx
, instr
, &tmp_src
)->swizzle
= "xxxx";
735 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
739 coord
= &inst
->Src
[0].Register
;
742 instr
= ir2_instr_create(ctx
->so
->ir
, IR2_FETCH
);
743 instr
->fetch
.opc
= TEX_FETCH
;
744 instr
->fetch
.is_cube
= (inst
->Texture
.Texture
== TGSI_TEXTURE_3D
);
745 instr
->fetch
.is_rect
= (inst
->Texture
.Texture
== TGSI_TEXTURE_RECT
);
746 assert(inst
->Texture
.NumOffsets
<= 1); // TODO what to do in other cases?
748 /* save off the tex fetch to be patched later with correct const_idx: */
749 idx
= ctx
->so
->num_tfetch_instrs
++;
750 ctx
->so
->tfetch_instrs
[idx
].samp_id
= inst
->Src
[1].Register
.Index
;
751 ctx
->so
->tfetch_instrs
[idx
].instr
= instr
;
753 add_dst_reg(ctx
, instr
, using_temp
? &tmp_dst
: &inst
->Dst
[0].Register
);
754 reg
= add_src_reg(ctx
, instr
, coord
);
756 /* blob compiler always sets 3rd component to same as 1st for 2d: */
757 if (inst
->Texture
.Texture
== TGSI_TEXTURE_2D
|| inst
->Texture
.Texture
== TGSI_TEXTURE_RECT
)
758 reg
->swizzle
[2] = reg
->swizzle
[0];
760 /* dst register needs to be marked for sync: */
761 ctx
->need_sync
|= 1 << instr
->dst_reg
.num
;
763 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
767 /* texture fetch can't write directly to export, so if tgsi
768 * is telling us the dst register is in output file, we load
769 * the texture to a temp and the use ALU instruction to move
772 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MAXv
);
774 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
775 add_src_reg(ctx
, instr
, &tmp_src
);
776 add_src_reg(ctx
, instr
, &tmp_src
);
777 add_vector_clamp(inst
, instr
);
781 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
782 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
783 /* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */
784 /* SNE(a,b) = EQU((b - a), 0.0, 1.0) */
786 translate_sge_slt_seq_sne(struct fd2_compile_context
*ctx
,
787 struct tgsi_full_instruction
*inst
, unsigned opc
)
789 struct ir2_instruction
*instr
;
790 struct tgsi_dst_register tmp_dst
;
791 struct tgsi_src_register tmp_src
;
792 struct tgsi_src_register tmp_const
;
794 instr_vector_opc_t vopc
;
799 case TGSI_OPCODE_SGE
:
804 case TGSI_OPCODE_SLT
:
809 case TGSI_OPCODE_SEQ
:
814 case TGSI_OPCODE_SNE
:
821 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
823 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, ADDv
);
824 add_dst_reg(ctx
, instr
, &tmp_dst
);
825 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
826 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
828 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, vopc
);
829 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
830 add_src_reg(ctx
, instr
, &tmp_src
);
831 get_immediate(ctx
, &tmp_const
, fui(c1
));
832 add_src_reg(ctx
, instr
, &tmp_const
);
833 get_immediate(ctx
, &tmp_const
, fui(c0
));
834 add_src_reg(ctx
, instr
, &tmp_const
);
837 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
839 translate_lrp(struct fd2_compile_context
*ctx
,
840 struct tgsi_full_instruction
*inst
,
843 struct ir2_instruction
*instr
;
844 struct tgsi_dst_register tmp_dst1
, tmp_dst2
;
845 struct tgsi_src_register tmp_src1
, tmp_src2
;
846 struct tgsi_src_register tmp_const
;
848 get_internal_temp(ctx
, &tmp_dst1
, &tmp_src1
);
849 get_internal_temp(ctx
, &tmp_dst2
, &tmp_src2
);
851 get_immediate(ctx
, &tmp_const
, fui(1.0));
854 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
855 add_dst_reg(ctx
, instr
, &tmp_dst1
);
856 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
857 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
860 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, ADDv
);
861 add_dst_reg(ctx
, instr
, &tmp_dst2
);
862 add_src_reg(ctx
, instr
, &tmp_const
);
863 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
)->flags
|= IR2_REG_NEGATE
;
865 /* tmp2 = tmp2 * c */
866 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
867 add_dst_reg(ctx
, instr
, &tmp_dst2
);
868 add_src_reg(ctx
, instr
, &tmp_src2
);
869 add_src_reg(ctx
, instr
, &inst
->Src
[2].Register
);
871 /* dst = tmp1 + tmp2 */
872 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, ADDv
);
873 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
874 add_src_reg(ctx
, instr
, &tmp_src1
);
875 add_src_reg(ctx
, instr
, &tmp_src2
);
879 translate_trig(struct fd2_compile_context
*ctx
,
880 struct tgsi_full_instruction
*inst
,
883 struct ir2_instruction
*instr
;
884 struct tgsi_dst_register tmp_dst
;
885 struct tgsi_src_register tmp_src
;
886 struct tgsi_src_register tmp_const
;
887 instr_scalar_opc_t op
;
892 case TGSI_OPCODE_SIN
:
895 case TGSI_OPCODE_COS
:
900 get_internal_temp(ctx
, &tmp_dst
, &tmp_src
);
902 tmp_dst
.WriteMask
= TGSI_WRITEMASK_X
;
903 tmp_src
.SwizzleX
= tmp_src
.SwizzleY
=
904 tmp_src
.SwizzleZ
= tmp_src
.SwizzleW
= TGSI_SWIZZLE_X
;
906 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULADDv
);
907 add_dst_reg(ctx
, instr
, &tmp_dst
);
908 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
909 get_immediate(ctx
, &tmp_const
, fui(0.159155));
910 add_src_reg(ctx
, instr
, &tmp_const
);
911 get_immediate(ctx
, &tmp_const
, fui(0.5));
912 add_src_reg(ctx
, instr
, &tmp_const
);
914 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, FRACv
);
915 add_dst_reg(ctx
, instr
, &tmp_dst
);
916 add_src_reg(ctx
, instr
, &tmp_src
);
917 add_src_reg(ctx
, instr
, &tmp_src
);
919 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULADDv
);
920 add_dst_reg(ctx
, instr
, &tmp_dst
);
921 add_src_reg(ctx
, instr
, &tmp_src
);
922 get_immediate(ctx
, &tmp_const
, fui(6.283185));
923 add_src_reg(ctx
, instr
, &tmp_const
);
924 get_immediate(ctx
, &tmp_const
, fui(-3.141593));
925 add_src_reg(ctx
, instr
, &tmp_const
);
927 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, op
);
928 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
929 add_src_reg(ctx
, instr
, &tmp_src
);
933 translate_dp2(struct fd2_compile_context
*ctx
,
934 struct tgsi_full_instruction
*inst
,
937 struct tgsi_src_register tmp_const
;
938 struct ir2_instruction
*instr
;
939 /* DP2ADD c,a,b -> dot2(a,b) + c */
940 /* for c we use the constant 0.0 */
941 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, DOT2ADDv
);
942 add_dst_reg(ctx
, instr
, &inst
->Dst
[0].Register
);
943 add_src_reg(ctx
, instr
, &inst
->Src
[0].Register
);
944 add_src_reg(ctx
, instr
, &inst
->Src
[1].Register
);
945 get_immediate(ctx
, &tmp_const
, fui(0.0f
));
946 add_src_reg(ctx
, instr
, &tmp_const
);
947 add_vector_clamp(inst
, instr
);
951 * Main part of compiler/translator:
955 translate_instruction(struct fd2_compile_context
*ctx
,
956 struct tgsi_full_instruction
*inst
)
958 unsigned opc
= inst
->Instruction
.Opcode
;
959 struct ir2_instruction
*instr
;
961 if (opc
== TGSI_OPCODE_END
)
964 /* TODO turn this into a table: */
966 case TGSI_OPCODE_MOV
:
967 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MAXv
);
968 add_regs_vector_1(ctx
, inst
, instr
);
970 case TGSI_OPCODE_RCP
:
971 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, RECIP_IEEE
);
972 add_regs_scalar_1(ctx
, inst
, instr
);
974 case TGSI_OPCODE_RSQ
:
975 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, RECIPSQ_IEEE
);
976 add_regs_scalar_1(ctx
, inst
, instr
);
978 case TGSI_OPCODE_SQRT
:
979 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, SQRT_IEEE
);
980 add_regs_scalar_1(ctx
, inst
, instr
);
982 case TGSI_OPCODE_MUL
:
983 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULv
);
984 add_regs_vector_2(ctx
, inst
, instr
);
986 case TGSI_OPCODE_ADD
:
987 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, ADDv
);
988 add_regs_vector_2(ctx
, inst
, instr
);
990 case TGSI_OPCODE_DP2
:
991 translate_dp2(ctx
, inst
, opc
);
993 case TGSI_OPCODE_DP3
:
994 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, DOT3v
);
995 add_regs_vector_2(ctx
, inst
, instr
);
997 case TGSI_OPCODE_DP4
:
998 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, DOT4v
);
999 add_regs_vector_2(ctx
, inst
, instr
);
1001 case TGSI_OPCODE_MIN
:
1002 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MINv
);
1003 add_regs_vector_2(ctx
, inst
, instr
);
1005 case TGSI_OPCODE_MAX
:
1006 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MAXv
);
1007 add_regs_vector_2(ctx
, inst
, instr
);
1009 case TGSI_OPCODE_SLT
:
1010 case TGSI_OPCODE_SGE
:
1011 case TGSI_OPCODE_SEQ
:
1012 case TGSI_OPCODE_SNE
:
1013 translate_sge_slt_seq_sne(ctx
, inst
, opc
);
1015 case TGSI_OPCODE_MAD
:
1016 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, MULADDv
);
1017 add_regs_vector_3(ctx
, inst
, instr
);
1019 case TGSI_OPCODE_LRP
:
1020 translate_lrp(ctx
, inst
, opc
);
1022 case TGSI_OPCODE_FRC
:
1023 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, FRACv
);
1024 add_regs_vector_1(ctx
, inst
, instr
);
1026 case TGSI_OPCODE_FLR
:
1027 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, FLOORv
);
1028 add_regs_vector_1(ctx
, inst
, instr
);
1030 case TGSI_OPCODE_EX2
:
1031 instr
= ir2_instr_create_alu_s(ctx
->so
->ir
, EXP_IEEE
);
1032 add_regs_scalar_1(ctx
, inst
, instr
);
1034 case TGSI_OPCODE_POW
:
1035 translate_pow(ctx
, inst
);
1037 case TGSI_OPCODE_COS
:
1038 case TGSI_OPCODE_SIN
:
1039 translate_trig(ctx
, inst
, opc
);
1041 case TGSI_OPCODE_TEX
:
1042 case TGSI_OPCODE_TXP
:
1043 translate_tex(ctx
, inst
, opc
);
1045 case TGSI_OPCODE_CMP
:
1046 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, CNDGTEv
);
1047 add_regs_vector_3(ctx
, inst
, instr
);
1048 instr
->src_reg
[0].flags
^= IR2_REG_NEGATE
; /* src1 */
1050 case TGSI_OPCODE_IF
:
1051 push_predicate(ctx
, &inst
->Src
[0].Register
);
1052 ctx
->so
->ir
->pred
= IR2_PRED_EQ
;
1054 case TGSI_OPCODE_ELSE
:
1055 ctx
->so
->ir
->pred
= IR2_PRED_NE
;
1057 case TGSI_OPCODE_ENDIF
:
1060 case TGSI_OPCODE_F2I
:
1061 instr
= ir2_instr_create_alu_v(ctx
->so
->ir
, TRUNCv
);
1062 add_regs_vector_1(ctx
, inst
, instr
);
1065 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc
));
1066 tgsi_dump(ctx
->so
->tokens
, 0);
1071 /* internal temporaries are only valid for the duration of a single
1074 ctx
->num_internal_temps
= 0;
1078 compile_instructions(struct fd2_compile_context
*ctx
)
1080 while (!tgsi_parse_end_of_tokens(&ctx
->parser
)) {
1081 tgsi_parse_token(&ctx
->parser
);
1083 switch (ctx
->parser
.FullToken
.Token
.Type
) {
1084 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1085 translate_instruction(ctx
,
1086 &ctx
->parser
.FullToken
.FullInstruction
);
1095 fd2_compile_shader(struct fd_program_stateobj
*prog
,
1096 struct fd2_shader_stateobj
*so
)
1098 struct fd2_compile_context ctx
;
1100 ir2_shader_destroy(so
->ir
);
1101 so
->ir
= ir2_shader_create();
1102 so
->num_vfetch_instrs
= so
->num_tfetch_instrs
= so
->num_immediates
= 0;
1104 if (compile_init(&ctx
, prog
, so
) != TGSI_PARSE_OK
)
1107 if (ctx
.type
== PIPE_SHADER_VERTEX
) {
1108 compile_vtx_fetch(&ctx
);
1109 } else if (ctx
.type
== PIPE_SHADER_FRAGMENT
) {
1110 prog
->num_exports
= 0;
1111 memset(prog
->export_linkage
, 0xff,
1112 sizeof(prog
->export_linkage
));
1115 compile_instructions(&ctx
);