1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **********************************************************/
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "util/u_memory.h"
31 #include "util/u_math.h"
33 #include "svga_tgsi_emit.h"
34 #include "svga_context.h"
37 static boolean
emit_vs_postamble( struct svga_shader_emitter
*emit
);
38 static boolean
emit_ps_postamble( struct svga_shader_emitter
*emit
);
46 case TGSI_OPCODE_ABS
: return SVGA3DOP_ABS
;
47 case TGSI_OPCODE_ADD
: return SVGA3DOP_ADD
;
48 case TGSI_OPCODE_BREAKC
: return SVGA3DOP_BREAKC
;
49 case TGSI_OPCODE_DP2A
: return SVGA3DOP_DP2ADD
;
50 case TGSI_OPCODE_DP3
: return SVGA3DOP_DP3
;
51 case TGSI_OPCODE_DP4
: return SVGA3DOP_DP4
;
52 case TGSI_OPCODE_FRC
: return SVGA3DOP_FRC
;
53 case TGSI_OPCODE_MAD
: return SVGA3DOP_MAD
;
54 case TGSI_OPCODE_MAX
: return SVGA3DOP_MAX
;
55 case TGSI_OPCODE_MIN
: return SVGA3DOP_MIN
;
56 case TGSI_OPCODE_MOV
: return SVGA3DOP_MOV
;
57 case TGSI_OPCODE_MUL
: return SVGA3DOP_MUL
;
58 case TGSI_OPCODE_NOP
: return SVGA3DOP_NOP
;
59 case TGSI_OPCODE_NRM4
: return SVGA3DOP_NRM
;
61 debug_printf("Unkown opcode %u\n", opcode
);
63 return SVGA3DOP_LAST_INST
;
68 static unsigned translate_file( unsigned file
)
71 case TGSI_FILE_TEMPORARY
: return SVGA3DREG_TEMP
;
72 case TGSI_FILE_INPUT
: return SVGA3DREG_INPUT
;
73 case TGSI_FILE_OUTPUT
: return SVGA3DREG_OUTPUT
; /* VS3.0+ only */
74 case TGSI_FILE_IMMEDIATE
: return SVGA3DREG_CONST
;
75 case TGSI_FILE_CONSTANT
: return SVGA3DREG_CONST
;
76 case TGSI_FILE_SAMPLER
: return SVGA3DREG_SAMPLER
;
77 case TGSI_FILE_ADDRESS
: return SVGA3DREG_ADDR
;
80 return SVGA3DREG_TEMP
;
85 static SVGA3dShaderDestToken
86 translate_dst_register( struct svga_shader_emitter
*emit
,
87 const struct tgsi_full_instruction
*insn
,
90 const struct tgsi_full_dst_register
*reg
= &insn
->Dst
[idx
];
91 SVGA3dShaderDestToken dest
;
93 switch (reg
->Register
.File
) {
94 case TGSI_FILE_OUTPUT
:
95 /* Output registers encode semantic information in their name.
96 * Need to lookup a table built at decl time:
98 dest
= emit
->output_map
[reg
->Register
.Index
];
103 unsigned index
= reg
->Register
.Index
;
104 assert(index
< SVGA3D_TEMPREG_MAX
);
105 index
= MIN2(index
, SVGA3D_TEMPREG_MAX
- 1);
106 dest
= dst_register(translate_file(reg
->Register
.File
), index
);
111 dest
.mask
= reg
->Register
.WriteMask
;
114 if (insn
->Instruction
.Saturate
)
115 dest
.dstMod
= SVGA3DDSTMOD_SATURATE
;
121 static struct src_register
122 swizzle( struct src_register src
,
128 x
= (src
.base
.swizzle
>> (x
* 2)) & 0x3;
129 y
= (src
.base
.swizzle
>> (y
* 2)) & 0x3;
130 z
= (src
.base
.swizzle
>> (z
* 2)) & 0x3;
131 w
= (src
.base
.swizzle
>> (w
* 2)) & 0x3;
133 src
.base
.swizzle
= TRANSLATE_SWIZZLE(x
,y
,z
,w
);
138 static struct src_register
139 scalar( struct src_register src
,
142 return swizzle( src
, comp
, comp
, comp
, comp
);
145 static INLINE boolean
146 svga_arl_needs_adjustment( const struct svga_shader_emitter
*emit
)
150 for (i
= 0; i
< emit
->num_arl_consts
; ++i
) {
151 if (emit
->arl_consts
[i
].arl_num
== emit
->current_arl
)
158 svga_arl_adjustment( const struct svga_shader_emitter
*emit
)
162 for (i
= 0; i
< emit
->num_arl_consts
; ++i
) {
163 if (emit
->arl_consts
[i
].arl_num
== emit
->current_arl
)
164 return emit
->arl_consts
[i
].number
;
169 static struct src_register
170 translate_src_register( const struct svga_shader_emitter
*emit
,
171 const struct tgsi_full_src_register
*reg
)
173 struct src_register src
;
175 switch (reg
->Register
.File
) {
176 case TGSI_FILE_INPUT
:
177 /* Input registers are referred to by their semantic name rather
178 * than by index. Use the mapping build up from the decls:
180 src
= emit
->input_map
[reg
->Register
.Index
];
183 case TGSI_FILE_IMMEDIATE
:
184 /* Immediates are appended after TGSI constants in the D3D
187 src
= src_register( translate_file( reg
->Register
.File
),
188 reg
->Register
.Index
+ emit
->imm_start
);
192 src
= src_register( translate_file( reg
->Register
.File
),
193 reg
->Register
.Index
);
198 /* Indirect addressing.
200 if (reg
->Register
.Indirect
) {
201 if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
202 /* Pixel shaders have only loop registers for relative
203 * addressing into inputs. Ignore the redundant address
204 * register, the contents of aL should be in sync with it.
206 if (reg
->Register
.File
== TGSI_FILE_INPUT
) {
207 src
.base
.relAddr
= 1;
208 src
.indirect
= src_token(SVGA3DREG_LOOP
, 0);
212 /* Constant buffers only.
214 if (reg
->Register
.File
== TGSI_FILE_CONSTANT
) {
215 /* we shift the offset towards the minimum */
216 if (svga_arl_needs_adjustment( emit
)) {
217 src
.base
.num
-= svga_arl_adjustment( emit
);
219 src
.base
.relAddr
= 1;
221 /* Not really sure what should go in the second token:
223 src
.indirect
= src_token( SVGA3DREG_ADDR
,
224 reg
->Indirect
.Index
);
226 src
.indirect
.swizzle
= SWIZZLE_XXXX
;
232 reg
->Register
.SwizzleX
,
233 reg
->Register
.SwizzleY
,
234 reg
->Register
.SwizzleZ
,
235 reg
->Register
.SwizzleW
);
237 /* src.mod isn't a bitfield, unfortunately:
238 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
240 if (reg
->Register
.Absolute
) {
241 if (reg
->Register
.Negate
)
242 src
.base
.srcMod
= SVGA3DSRCMOD_ABSNEG
;
244 src
.base
.srcMod
= SVGA3DSRCMOD_ABS
;
247 if (reg
->Register
.Negate
)
248 src
.base
.srcMod
= SVGA3DSRCMOD_NEG
;
250 src
.base
.srcMod
= SVGA3DSRCMOD_NONE
;
258 * Get a temporary register.
259 * Note: if we exceed the temporary register limit we just use
260 * register SVGA3D_TEMPREG_MAX - 1.
262 static INLINE SVGA3dShaderDestToken
263 get_temp( struct svga_shader_emitter
*emit
)
265 int i
= emit
->nr_hw_temp
+ emit
->internal_temp_count
++;
266 assert(i
< SVGA3D_TEMPREG_MAX
);
267 i
= MIN2(i
, SVGA3D_TEMPREG_MAX
- 1);
268 return dst_register( SVGA3DREG_TEMP
, i
);
271 /* Release a single temp. Currently only effective if it was the last
272 * allocated temp, otherwise release will be delayed until the next
273 * call to reset_temp_regs().
276 release_temp( struct svga_shader_emitter
*emit
,
277 SVGA3dShaderDestToken temp
)
279 if (temp
.num
== emit
->internal_temp_count
- 1)
280 emit
->internal_temp_count
--;
283 static void reset_temp_regs( struct svga_shader_emitter
*emit
)
285 emit
->internal_temp_count
= 0;
289 /* Replace the src with the temporary specified in the dst, but copying
290 * only the necessary channels, and preserving the original swizzle (which is
291 * important given that several opcodes have constraints in the allowed
294 static boolean
emit_repl( struct svga_shader_emitter
*emit
,
295 SVGA3dShaderDestToken dst
,
296 struct src_register
*src0
)
298 unsigned src0_swizzle
;
301 assert(SVGA3dShaderGetRegType(dst
.value
) == SVGA3DREG_TEMP
);
303 src0_swizzle
= src0
->base
.swizzle
;
306 for (chan
= 0; chan
< 4; ++chan
) {
307 unsigned swizzle
= (src0_swizzle
>> (chan
*2)) & 0x3;
308 dst
.mask
|= 1 << swizzle
;
312 src0
->base
.swizzle
= SVGA3DSWIZZLE_NONE
;
314 if (!emit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
, *src0
))
318 src0
->base
.swizzle
= src0_swizzle
;
324 static boolean
submit_op0( struct svga_shader_emitter
*emit
,
325 SVGA3dShaderInstToken inst
,
326 SVGA3dShaderDestToken dest
)
328 return (emit_instruction( emit
, inst
) &&
329 emit_dst( emit
, dest
));
332 static boolean
submit_op1( struct svga_shader_emitter
*emit
,
333 SVGA3dShaderInstToken inst
,
334 SVGA3dShaderDestToken dest
,
335 struct src_register src0
)
337 return emit_op1( emit
, inst
, dest
, src0
);
341 /* SVGA shaders may not refer to >1 constant register in a single
342 * instruction. This function checks for that usage and inserts a
343 * move to temporary if detected.
345 * The same applies to input registers -- at most a single input
346 * register may be read by any instruction.
348 static boolean
submit_op2( struct svga_shader_emitter
*emit
,
349 SVGA3dShaderInstToken inst
,
350 SVGA3dShaderDestToken dest
,
351 struct src_register src0
,
352 struct src_register src1
)
354 SVGA3dShaderDestToken temp
;
355 SVGA3dShaderRegType type0
, type1
;
356 boolean need_temp
= FALSE
;
359 type0
= SVGA3dShaderGetRegType( src0
.base
.value
);
360 type1
= SVGA3dShaderGetRegType( src1
.base
.value
);
362 if (type0
== SVGA3DREG_CONST
&&
363 type1
== SVGA3DREG_CONST
&&
364 src0
.base
.num
!= src1
.base
.num
)
367 if (type0
== SVGA3DREG_INPUT
&&
368 type1
== SVGA3DREG_INPUT
&&
369 src0
.base
.num
!= src1
.base
.num
)
373 temp
= get_temp( emit
);
375 if (!emit_repl( emit
, temp
, &src0
))
379 if (!emit_op2( emit
, inst
, dest
, src0
, src1
))
383 release_temp( emit
, temp
);
389 /* SVGA shaders may not refer to >1 constant register in a single
390 * instruction. This function checks for that usage and inserts a
391 * move to temporary if detected.
393 static boolean
submit_op3( struct svga_shader_emitter
*emit
,
394 SVGA3dShaderInstToken inst
,
395 SVGA3dShaderDestToken dest
,
396 struct src_register src0
,
397 struct src_register src1
,
398 struct src_register src2
)
400 SVGA3dShaderDestToken temp0
;
401 SVGA3dShaderDestToken temp1
;
402 boolean need_temp0
= FALSE
;
403 boolean need_temp1
= FALSE
;
404 SVGA3dShaderRegType type0
, type1
, type2
;
408 type0
= SVGA3dShaderGetRegType( src0
.base
.value
);
409 type1
= SVGA3dShaderGetRegType( src1
.base
.value
);
410 type2
= SVGA3dShaderGetRegType( src2
.base
.value
);
412 if (inst
.op
!= SVGA3DOP_SINCOS
) {
413 if (type0
== SVGA3DREG_CONST
&&
414 ((type1
== SVGA3DREG_CONST
&& src0
.base
.num
!= src1
.base
.num
) ||
415 (type2
== SVGA3DREG_CONST
&& src0
.base
.num
!= src2
.base
.num
)))
418 if (type1
== SVGA3DREG_CONST
&&
419 (type2
== SVGA3DREG_CONST
&& src1
.base
.num
!= src2
.base
.num
))
423 if (type0
== SVGA3DREG_INPUT
&&
424 ((type1
== SVGA3DREG_INPUT
&& src0
.base
.num
!= src1
.base
.num
) ||
425 (type2
== SVGA3DREG_INPUT
&& src0
.base
.num
!= src2
.base
.num
)))
428 if (type1
== SVGA3DREG_INPUT
&&
429 (type2
== SVGA3DREG_INPUT
&& src1
.base
.num
!= src2
.base
.num
))
433 temp0
= get_temp( emit
);
435 if (!emit_repl( emit
, temp0
, &src0
))
440 temp1
= get_temp( emit
);
442 if (!emit_repl( emit
, temp1
, &src1
))
446 if (!emit_op3( emit
, inst
, dest
, src0
, src1
, src2
))
450 release_temp( emit
, temp1
);
452 release_temp( emit
, temp0
);
459 /* SVGA shaders may not refer to >1 constant register in a single
460 * instruction. This function checks for that usage and inserts a
461 * move to temporary if detected.
463 static boolean
submit_op4( struct svga_shader_emitter
*emit
,
464 SVGA3dShaderInstToken inst
,
465 SVGA3dShaderDestToken dest
,
466 struct src_register src0
,
467 struct src_register src1
,
468 struct src_register src2
,
469 struct src_register src3
)
471 SVGA3dShaderDestToken temp0
;
472 SVGA3dShaderDestToken temp3
;
473 boolean need_temp0
= FALSE
;
474 boolean need_temp3
= FALSE
;
475 SVGA3dShaderRegType type0
, type1
, type2
, type3
;
479 type0
= SVGA3dShaderGetRegType( src0
.base
.value
);
480 type1
= SVGA3dShaderGetRegType( src1
.base
.value
);
481 type2
= SVGA3dShaderGetRegType( src2
.base
.value
);
482 type3
= SVGA3dShaderGetRegType( src2
.base
.value
);
484 /* Make life a little easier - this is only used by the TXD
485 * instruction which is guaranteed not to have a constant/input reg
486 * in one slot at least:
488 assert(type1
== SVGA3DREG_SAMPLER
);
490 if (type0
== SVGA3DREG_CONST
&&
491 ((type3
== SVGA3DREG_CONST
&& src0
.base
.num
!= src3
.base
.num
) ||
492 (type2
== SVGA3DREG_CONST
&& src0
.base
.num
!= src2
.base
.num
)))
495 if (type3
== SVGA3DREG_CONST
&&
496 (type2
== SVGA3DREG_CONST
&& src3
.base
.num
!= src2
.base
.num
))
499 if (type0
== SVGA3DREG_INPUT
&&
500 ((type3
== SVGA3DREG_INPUT
&& src0
.base
.num
!= src3
.base
.num
) ||
501 (type2
== SVGA3DREG_INPUT
&& src0
.base
.num
!= src2
.base
.num
)))
504 if (type3
== SVGA3DREG_INPUT
&&
505 (type2
== SVGA3DREG_INPUT
&& src3
.base
.num
!= src2
.base
.num
))
509 temp0
= get_temp( emit
);
511 if (!emit_repl( emit
, temp0
, &src0
))
516 temp3
= get_temp( emit
);
518 if (!emit_repl( emit
, temp3
, &src3
))
522 if (!emit_op4( emit
, inst
, dest
, src0
, src1
, src2
, src3
))
526 release_temp( emit
, temp3
);
528 release_temp( emit
, temp0
);
533 static boolean
alias_src_dst( struct src_register src
,
534 SVGA3dShaderDestToken dst
)
536 if (src
.base
.num
!= dst
.num
)
539 if (SVGA3dShaderGetRegType(dst
.value
) !=
540 SVGA3dShaderGetRegType(src
.base
.value
))
547 static boolean
submit_lrp(struct svga_shader_emitter
*emit
,
548 SVGA3dShaderDestToken dst
,
549 struct src_register src0
,
550 struct src_register src1
,
551 struct src_register src2
)
553 SVGA3dShaderDestToken tmp
;
554 boolean need_dst_tmp
= FALSE
;
556 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
557 if (SVGA3dShaderGetRegType(dst
.value
) != SVGA3DREG_TEMP
||
558 alias_src_dst(src0
, dst
) ||
559 alias_src_dst(src2
, dst
))
563 tmp
= get_temp( emit
);
570 if (!submit_op3(emit
, inst_token( SVGA3DOP_LRP
), tmp
, src0
, src1
, src2
))
574 if (!submit_op1(emit
, inst_token( SVGA3DOP_MOV
), dst
, src( tmp
)))
582 static boolean
emit_def_const( struct svga_shader_emitter
*emit
,
583 SVGA3dShaderConstType type
,
591 SVGA3dShaderInstToken opcode
;
594 case SVGA3D_CONST_TYPE_FLOAT
:
595 opcode
= inst_token( SVGA3DOP_DEF
);
596 def
.dst
= dst_register( SVGA3DREG_CONST
, idx
);
597 def
.constValues
[0] = a
;
598 def
.constValues
[1] = b
;
599 def
.constValues
[2] = c
;
600 def
.constValues
[3] = d
;
602 case SVGA3D_CONST_TYPE_INT
:
603 opcode
= inst_token( SVGA3DOP_DEFI
);
604 def
.dst
= dst_register( SVGA3DREG_CONSTINT
, idx
);
605 def
.constIValues
[0] = (int)a
;
606 def
.constIValues
[1] = (int)b
;
607 def
.constIValues
[2] = (int)c
;
608 def
.constIValues
[3] = (int)d
;
612 opcode
= inst_token( SVGA3DOP_NOP
);
616 if (!emit_instruction(emit
, opcode
) ||
617 !svga_shader_emit_dwords( emit
, def
.values
, Elements(def
.values
)))
623 static INLINE boolean
624 create_zero_immediate( struct svga_shader_emitter
*emit
)
626 unsigned idx
= emit
->nr_hw_float_const
++;
628 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
629 * other useful vectors.
631 if (!emit_def_const( emit
, SVGA3D_CONST_TYPE_FLOAT
,
632 idx
, 0, 0.5, -1, 1 ))
635 emit
->zero_immediate_idx
= idx
;
636 emit
->created_zero_immediate
= TRUE
;
641 static INLINE boolean
642 create_loop_const( struct svga_shader_emitter
*emit
)
644 unsigned idx
= emit
->nr_hw_int_const
++;
646 if (!emit_def_const( emit
, SVGA3D_CONST_TYPE_INT
, idx
,
647 255, /* iteration count */
648 0, /* initial value */
650 0 /* not used, must be 0 */))
653 emit
->loop_const_idx
= idx
;
654 emit
->created_loop_const
= TRUE
;
659 static INLINE boolean
660 create_arl_consts( struct svga_shader_emitter
*emit
)
664 for (i
= 0; i
< emit
->num_arl_consts
; i
+= 4) {
666 unsigned idx
= emit
->nr_hw_float_const
++;
668 for (j
= 0; j
< 4 && (j
+ i
) < emit
->num_arl_consts
; ++j
) {
669 vals
[j
] = emit
->arl_consts
[i
+ j
].number
;
670 emit
->arl_consts
[i
+ j
].idx
= idx
;
673 emit
->arl_consts
[i
+ 0].swizzle
= TGSI_SWIZZLE_X
;
676 emit
->arl_consts
[i
+ 0].swizzle
= TGSI_SWIZZLE_Y
;
679 emit
->arl_consts
[i
+ 0].swizzle
= TGSI_SWIZZLE_Z
;
682 emit
->arl_consts
[i
+ 0].swizzle
= TGSI_SWIZZLE_W
;
689 if (!emit_def_const( emit
, SVGA3D_CONST_TYPE_FLOAT
, idx
,
698 static INLINE
struct src_register
699 get_vface( struct svga_shader_emitter
*emit
)
701 assert(emit
->emitted_vface
);
702 return src_register(SVGA3DREG_MISCTYPE
, SVGA3DMISCREG_FACE
);
705 /* returns {0, 0, 0, 1} immediate */
706 static INLINE
struct src_register
707 get_zero_immediate( struct svga_shader_emitter
*emit
)
709 assert(emit
->created_zero_immediate
);
710 assert(emit
->zero_immediate_idx
>= 0);
711 return swizzle(src_register( SVGA3DREG_CONST
,
712 emit
->zero_immediate_idx
),
716 /* returns {1, 1, 1, -1} immediate */
717 static INLINE
struct src_register
718 get_pos_neg_one_immediate( struct svga_shader_emitter
*emit
)
720 assert(emit
->created_zero_immediate
);
721 assert(emit
->zero_immediate_idx
>= 0);
722 return swizzle(src_register( SVGA3DREG_CONST
,
723 emit
->zero_immediate_idx
),
727 /* returns {0.5, 0.5, 0.5, 0.5} immediate */
728 static INLINE
struct src_register
729 get_half_immediate( struct svga_shader_emitter
*emit
)
731 assert(emit
->created_zero_immediate
);
732 assert(emit
->zero_immediate_idx
>= 0);
733 return swizzle(src_register(SVGA3DREG_CONST
, emit
->zero_immediate_idx
),
737 /* returns the loop const */
738 static INLINE
struct src_register
739 get_loop_const( struct svga_shader_emitter
*emit
)
741 assert(emit
->created_loop_const
);
742 assert(emit
->loop_const_idx
>= 0);
743 return src_register( SVGA3DREG_CONSTINT
,
744 emit
->loop_const_idx
);
747 static INLINE
struct src_register
748 get_fake_arl_const( struct svga_shader_emitter
*emit
)
750 struct src_register reg
;
751 int idx
= 0, swizzle
= 0, i
;
753 for (i
= 0; i
< emit
->num_arl_consts
; ++ i
) {
754 if (emit
->arl_consts
[i
].arl_num
== emit
->current_arl
) {
755 idx
= emit
->arl_consts
[i
].idx
;
756 swizzle
= emit
->arl_consts
[i
].swizzle
;
760 reg
= src_register( SVGA3DREG_CONST
, idx
);
761 return scalar(reg
, swizzle
);
764 static INLINE
struct src_register
765 get_tex_dimensions( struct svga_shader_emitter
*emit
, int sampler_num
)
768 struct src_register reg
;
770 /* the width/height indexes start right after constants */
771 idx
= emit
->key
.fkey
.tex
[sampler_num
].width_height_idx
+
772 emit
->info
.file_max
[TGSI_FILE_CONSTANT
] + 1;
774 reg
= src_register( SVGA3DREG_CONST
, idx
);
778 static boolean
emit_fake_arl(struct svga_shader_emitter
*emit
,
779 const struct tgsi_full_instruction
*insn
)
781 const struct src_register src0
= translate_src_register(
782 emit
, &insn
->Src
[0] );
783 struct src_register src1
= get_fake_arl_const( emit
);
784 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
785 SVGA3dShaderDestToken tmp
= get_temp( emit
);
787 if (!submit_op1(emit
, inst_token( SVGA3DOP_MOV
), tmp
, src0
))
790 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
), tmp
, src( tmp
),
794 /* replicate the original swizzle */
796 src1
.base
.swizzle
= src0
.base
.swizzle
;
798 return submit_op1( emit
, inst_token( SVGA3DOP_MOVA
),
802 static boolean
emit_if(struct svga_shader_emitter
*emit
,
803 const struct tgsi_full_instruction
*insn
)
805 struct src_register src0
= translate_src_register(
806 emit
, &insn
->Src
[0] );
807 struct src_register zero
= get_zero_immediate( emit
);
808 SVGA3dShaderInstToken if_token
= inst_token( SVGA3DOP_IFC
);
810 if_token
.control
= SVGA3DOPCOMPC_NE
;
811 zero
= scalar(zero
, TGSI_SWIZZLE_X
);
813 if (SVGA3dShaderGetRegType(src0
.base
.value
) == SVGA3DREG_CONST
) {
815 * Max different constant registers readable per IFC instruction is 1.
817 SVGA3dShaderDestToken tmp
= get_temp( emit
);
819 if (!submit_op1(emit
, inst_token( SVGA3DOP_MOV
), tmp
, src0
))
822 src0
= scalar(src( tmp
), TGSI_SWIZZLE_X
);
825 emit
->dynamic_branching_level
++;
827 return (emit_instruction( emit
, if_token
) &&
828 emit_src( emit
, src0
) &&
829 emit_src( emit
, zero
) );
832 static boolean
emit_endif(struct svga_shader_emitter
*emit
,
833 const struct tgsi_full_instruction
*insn
)
835 emit
->dynamic_branching_level
--;
837 return emit_instruction(emit
, inst_token(SVGA3DOP_ENDIF
));
840 static boolean
emit_else(struct svga_shader_emitter
*emit
,
841 const struct tgsi_full_instruction
*insn
)
843 return emit_instruction(emit
, inst_token(SVGA3DOP_ELSE
));
846 /* Translate the following TGSI FLR instruction.
848 * To the following SVGA3D instruction sequence.
852 static boolean
emit_floor(struct svga_shader_emitter
*emit
,
853 const struct tgsi_full_instruction
*insn
)
855 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
856 const struct src_register src0
= translate_src_register(
857 emit
, &insn
->Src
[0] );
858 SVGA3dShaderDestToken temp
= get_temp( emit
);
861 if (!submit_op1( emit
, inst_token( SVGA3DOP_FRC
), temp
, src0
))
864 /* SUB DST, SRC, TMP */
865 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
), dst
, src0
,
866 negate( src( temp
) ) ))
873 /* Translate the following TGSI CEIL instruction.
875 * To the following SVGA3D instruction sequence.
879 static boolean
emit_ceil(struct svga_shader_emitter
*emit
,
880 const struct tgsi_full_instruction
*insn
)
882 SVGA3dShaderDestToken dst
= translate_dst_register(emit
, insn
, 0);
883 const struct src_register src0
= translate_src_register(emit
, &insn
->Src
[0]);
884 SVGA3dShaderDestToken temp
= get_temp(emit
);
887 if (!submit_op1(emit
, inst_token(SVGA3DOP_FRC
), temp
, negate(src0
)))
890 /* ADD DST, SRC, TMP */
891 if (!submit_op2(emit
, inst_token(SVGA3DOP_ADD
), dst
, src0
, src(temp
)))
898 /* Translate the following TGSI DIV instruction.
899 * DIV DST.xy, SRC0, SRC1
900 * To the following SVGA3D instruction sequence.
901 * RCP TMP.x, SRC1.xxxx
902 * RCP TMP.y, SRC1.yyyy
903 * MUL DST.xy, SRC0, TMP
905 static boolean
emit_div(struct svga_shader_emitter
*emit
,
906 const struct tgsi_full_instruction
*insn
)
908 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
909 const struct src_register src0
= translate_src_register(
910 emit
, &insn
->Src
[0] );
911 const struct src_register src1
= translate_src_register(
912 emit
, &insn
->Src
[1] );
913 SVGA3dShaderDestToken temp
= get_temp( emit
);
916 /* For each enabled element, perform a RCP instruction. Note that
917 * RCP is scalar in SVGA3D:
919 for (i
= 0; i
< 4; i
++) {
920 unsigned channel
= 1 << i
;
921 if (dst
.mask
& channel
) {
922 /* RCP TMP.?, SRC1.???? */
923 if (!submit_op1( emit
, inst_token( SVGA3DOP_RCP
),
924 writemask(temp
, channel
),
933 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
), dst
, src0
,
940 /* Translate the following TGSI DP2 instruction.
941 * DP2 DST, SRC1, SRC2
942 * To the following SVGA3D instruction sequence.
943 * MUL TMP, SRC1, SRC2
944 * ADD DST, TMP.xxxx, TMP.yyyy
946 static boolean
emit_dp2(struct svga_shader_emitter
*emit
,
947 const struct tgsi_full_instruction
*insn
)
949 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
950 const struct src_register src0
= translate_src_register(
951 emit
, &insn
->Src
[0] );
952 const struct src_register src1
= translate_src_register(
953 emit
, &insn
->Src
[1] );
954 SVGA3dShaderDestToken temp
= get_temp( emit
);
955 struct src_register temp_src0
, temp_src1
;
957 /* MUL TMP, SRC1, SRC2 */
958 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
), temp
, src0
, src1
))
961 temp_src0
= scalar(src( temp
), TGSI_SWIZZLE_X
);
962 temp_src1
= scalar(src( temp
), TGSI_SWIZZLE_Y
);
964 /* ADD DST, TMP.xxxx, TMP.yyyy */
965 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
), dst
,
966 temp_src0
, temp_src1
))
973 /* Translate the following TGSI DPH instruction.
974 * DPH DST, SRC1, SRC2
975 * To the following SVGA3D instruction sequence.
976 * DP3 TMP, SRC1, SRC2
977 * ADD DST, TMP, SRC2.wwww
979 static boolean
emit_dph(struct svga_shader_emitter
*emit
,
980 const struct tgsi_full_instruction
*insn
)
982 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
983 const struct src_register src0
= translate_src_register(
984 emit
, &insn
->Src
[0] );
985 struct src_register src1
= translate_src_register(
986 emit
, &insn
->Src
[1] );
987 SVGA3dShaderDestToken temp
= get_temp( emit
);
989 /* DP3 TMP, SRC1, SRC2 */
990 if (!submit_op2( emit
, inst_token( SVGA3DOP_DP3
), temp
, src0
, src1
))
993 src1
= scalar(src1
, TGSI_SWIZZLE_W
);
995 /* ADD DST, TMP, SRC2.wwww */
996 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
), dst
,
1003 /* Translate the following TGSI DST instruction.
1005 * To the following SVGA3D instruction sequence.
1010 static boolean
emit_nrm(struct svga_shader_emitter
*emit
,
1011 const struct tgsi_full_instruction
*insn
)
1013 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1014 const struct src_register src0
= translate_src_register(
1015 emit
, &insn
->Src
[0] );
1016 SVGA3dShaderDestToken temp
= get_temp( emit
);
1018 /* DP3 TMP, SRC, SRC */
1019 if (!submit_op2( emit
, inst_token( SVGA3DOP_DP3
), temp
, src0
, src0
))
1023 if (!submit_op1( emit
, inst_token( SVGA3DOP_RSQ
), temp
, src( temp
)))
1026 /* MUL DST, SRC, TMP */
1027 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
), dst
,
1035 static boolean
do_emit_sincos(struct svga_shader_emitter
*emit
,
1036 SVGA3dShaderDestToken dst
,
1037 struct src_register src0
)
1039 src0
= scalar(src0
, TGSI_SWIZZLE_X
);
1040 return submit_op1(emit
, inst_token(SVGA3DOP_SINCOS
), dst
, src0
);
1043 static boolean
emit_sincos(struct svga_shader_emitter
*emit
,
1044 const struct tgsi_full_instruction
*insn
)
1046 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1047 struct src_register src0
= translate_src_register(
1048 emit
, &insn
->Src
[0] );
1049 SVGA3dShaderDestToken temp
= get_temp( emit
);
1052 if (!do_emit_sincos(emit
, writemask(temp
, TGSI_WRITEMASK_XY
), src0
))
1056 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
, src( temp
) ))
1066 static boolean
emit_sin(struct svga_shader_emitter
*emit
,
1067 const struct tgsi_full_instruction
*insn
)
1069 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1070 struct src_register src0
= translate_src_register(
1071 emit
, &insn
->Src
[0] );
1072 SVGA3dShaderDestToken temp
= get_temp( emit
);
1075 if (!do_emit_sincos(emit
, writemask(temp
, TGSI_WRITEMASK_Y
), src0
))
1078 src0
= scalar(src( temp
), TGSI_SWIZZLE_Y
);
1080 /* MOV DST TMP.yyyy */
1081 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
, src0
))
1091 static boolean
emit_cos(struct svga_shader_emitter
*emit
,
1092 const struct tgsi_full_instruction
*insn
)
1094 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1095 struct src_register src0
= translate_src_register(
1096 emit
, &insn
->Src
[0] );
1097 SVGA3dShaderDestToken temp
= get_temp( emit
);
1100 if (!do_emit_sincos( emit
, writemask(temp
, TGSI_WRITEMASK_X
), src0
))
1103 src0
= scalar(src( temp
), TGSI_SWIZZLE_X
);
1105 /* MOV DST TMP.xxxx */
1106 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
, src0
))
1112 static boolean
emit_ssg(struct svga_shader_emitter
*emit
,
1113 const struct tgsi_full_instruction
*insn
)
1115 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1116 struct src_register src0
= translate_src_register(
1117 emit
, &insn
->Src
[0] );
1118 SVGA3dShaderDestToken temp0
= get_temp( emit
);
1119 SVGA3dShaderDestToken temp1
= get_temp( emit
);
1120 struct src_register zero
, one
;
1122 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
1123 /* SGN DST, SRC0, TMP0, TMP1 */
1124 return submit_op3( emit
, inst_token( SVGA3DOP_SGN
), dst
, src0
,
1125 src( temp0
), src( temp1
) );
1128 zero
= get_zero_immediate( emit
);
1129 one
= scalar( zero
, TGSI_SWIZZLE_W
);
1130 zero
= scalar( zero
, TGSI_SWIZZLE_X
);
1132 /* CMP TMP0, SRC0, one, zero */
1133 if (!submit_op3( emit
, inst_token( SVGA3DOP_CMP
),
1134 writemask( temp0
, dst
.mask
), src0
, one
, zero
))
1137 /* CMP TMP1, negate(SRC0), negate(one), zero */
1138 if (!submit_op3( emit
, inst_token( SVGA3DOP_CMP
),
1139 writemask( temp1
, dst
.mask
), negate( src0
), negate( one
),
1143 /* ADD DST, TMP0, TMP1 */
1144 return submit_op2( emit
, inst_token( SVGA3DOP_ADD
), dst
, src( temp0
),
1149 * ADD DST SRC0, negate(SRC0)
1151 static boolean
emit_sub(struct svga_shader_emitter
*emit
,
1152 const struct tgsi_full_instruction
*insn
)
1154 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1155 struct src_register src0
= translate_src_register(
1156 emit
, &insn
->Src
[0] );
1157 struct src_register src1
= translate_src_register(
1158 emit
, &insn
->Src
[1] );
1160 src1
= negate(src1
);
1162 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
), dst
,
1170 static boolean
emit_kil(struct svga_shader_emitter
*emit
,
1171 const struct tgsi_full_instruction
*insn
)
1173 const struct tgsi_full_src_register
*reg
= &insn
->Src
[0];
1174 struct src_register src0
, srcIn
;
1175 /* is the W component tested in another position? */
1176 const boolean w_tested
= (reg
->Register
.SwizzleW
== reg
->Register
.SwizzleX
||
1177 reg
->Register
.SwizzleW
== reg
->Register
.SwizzleY
||
1178 reg
->Register
.SwizzleW
== reg
->Register
.SwizzleZ
);
1179 const boolean special
= (reg
->Register
.Absolute
||
1180 reg
->Register
.Negate
||
1181 reg
->Register
.Indirect
||
1182 reg
->Register
.SwizzleX
!= 0 ||
1183 reg
->Register
.SwizzleY
!= 1 ||
1184 reg
->Register
.SwizzleZ
!= 2 ||
1185 reg
->Register
.File
!= TGSI_FILE_TEMPORARY
);
1186 SVGA3dShaderDestToken temp
;
1188 src0
= srcIn
= translate_src_register( emit
, reg
);
1190 if (special
|| !w_tested
) {
1191 /* need a temp reg */
1192 temp
= get_temp( emit
);
1196 /* move the source into a temp register */
1197 submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
1198 writemask( temp
, TGSI_WRITEMASK_XYZ
),
1204 /* do the texkill (on the xyz components) */
1205 if (!submit_op0( emit
, inst_token( SVGA3DOP_TEXKILL
), dst(src0
) ))
1209 /* need to emit a second texkill to test the W component */
1210 /* put src.wwww into temp register */
1211 if (!submit_op1(emit
,
1212 inst_token( SVGA3DOP_MOV
),
1213 writemask( temp
, TGSI_WRITEMASK_XYZ
),
1214 scalar(srcIn
, TGSI_SWIZZLE_W
)))
1217 /* second texkill */
1218 if (!submit_op0( emit
, inst_token( SVGA3DOP_TEXKILL
), temp
))
1226 /* mesa state tracker always emits kilp as an unconditional
1228 static boolean
emit_kilp(struct svga_shader_emitter
*emit
,
1229 const struct tgsi_full_instruction
*insn
)
1231 SVGA3dShaderInstToken inst
;
1232 SVGA3dShaderDestToken temp
;
1233 struct src_register one
= scalar( get_zero_immediate( emit
),
1236 inst
= inst_token( SVGA3DOP_TEXKILL
);
1238 /* texkill doesn't allow negation on the operand so lets move
1239 * negation of {1} to a temp register */
1240 temp
= get_temp( emit
);
1241 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), temp
,
1245 return submit_op0( emit
, inst
, temp
);
1250 * Test if r1 and r2 are the same register.
1253 same_register(struct src_register r1
, struct src_register r2
)
1255 return (r1
.base
.num
== r2
.base
.num
&&
1256 r1
.base
.type_upper
== r2
.base
.type_upper
&&
1257 r1
.base
.type_lower
== r2
.base
.type_lower
);
1262 /* Implement conditionals by initializing destination reg to 'fail',
1263 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1264 * based on predicate reg.
1266 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1271 emit_conditional(struct svga_shader_emitter
*emit
,
1272 unsigned compare_func
,
1273 SVGA3dShaderDestToken dst
,
1274 struct src_register src0
,
1275 struct src_register src1
,
1276 struct src_register pass
,
1277 struct src_register fail
)
1279 SVGA3dShaderDestToken pred_reg
= dst_register( SVGA3DREG_PREDICATE
, 0 );
1280 SVGA3dShaderInstToken setp_token
, mov_token
;
1281 setp_token
= inst_token( SVGA3DOP_SETP
);
1283 switch (compare_func
) {
1284 case PIPE_FUNC_NEVER
:
1285 return submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
1288 case PIPE_FUNC_LESS
:
1289 setp_token
.control
= SVGA3DOPCOMP_LT
;
1291 case PIPE_FUNC_EQUAL
:
1292 setp_token
.control
= SVGA3DOPCOMP_EQ
;
1294 case PIPE_FUNC_LEQUAL
:
1295 setp_token
.control
= SVGA3DOPCOMP_LE
;
1297 case PIPE_FUNC_GREATER
:
1298 setp_token
.control
= SVGA3DOPCOMP_GT
;
1300 case PIPE_FUNC_NOTEQUAL
:
1301 setp_token
.control
= SVGA3DOPCOMPC_NE
;
1303 case PIPE_FUNC_GEQUAL
:
1304 setp_token
.control
= SVGA3DOPCOMP_GE
;
1306 case PIPE_FUNC_ALWAYS
:
1307 return submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
1312 if (same_register(src(dst
), pass
)) {
1313 /* We'll get bad results if the dst and pass registers are the same
1314 * so use a temp register containing pass.
1316 SVGA3dShaderDestToken temp
= get_temp(emit
);
1317 if (!submit_op1(emit
, inst_token(SVGA3DOP_MOV
), temp
, pass
))
1322 /* SETP src0, COMPOP, src1 */
1323 if (!submit_op2( emit
, setp_token
, pred_reg
,
1327 mov_token
= inst_token( SVGA3DOP_MOV
);
1330 if (!submit_op1( emit
, mov_token
, dst
,
1334 /* MOV dst, pass (predicated)
1336 * Note that the predicate reg (and possible modifiers) is passed
1337 * as the first source argument.
1339 mov_token
.predicated
= 1;
1340 if (!submit_op2( emit
, mov_token
, dst
,
1341 src( pred_reg
), pass
))
1349 emit_select(struct svga_shader_emitter
*emit
,
1350 unsigned compare_func
,
1351 SVGA3dShaderDestToken dst
,
1352 struct src_register src0
,
1353 struct src_register src1
)
1355 /* There are some SVGA instructions which implement some selects
1356 * directly, but they are only available in the vertex shader.
1358 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
1359 switch (compare_func
) {
1360 case PIPE_FUNC_GEQUAL
:
1361 return submit_op2( emit
, inst_token( SVGA3DOP_SGE
), dst
, src0
, src1
);
1362 case PIPE_FUNC_LEQUAL
:
1363 return submit_op2( emit
, inst_token( SVGA3DOP_SGE
), dst
, src1
, src0
);
1364 case PIPE_FUNC_GREATER
:
1365 return submit_op2( emit
, inst_token( SVGA3DOP_SLT
), dst
, src1
, src0
);
1366 case PIPE_FUNC_LESS
:
1367 return submit_op2( emit
, inst_token( SVGA3DOP_SLT
), dst
, src0
, src1
);
1374 /* Otherwise, need to use the setp approach:
1377 struct src_register one
, zero
;
1378 /* zero immediate is 0,0,0,1 */
1379 zero
= get_zero_immediate( emit
);
1380 one
= scalar( zero
, TGSI_SWIZZLE_W
);
1381 zero
= scalar( zero
, TGSI_SWIZZLE_X
);
1383 return emit_conditional(
1394 static boolean
emit_select_op(struct svga_shader_emitter
*emit
,
1396 const struct tgsi_full_instruction
*insn
)
1398 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1399 struct src_register src0
= translate_src_register(
1400 emit
, &insn
->Src
[0] );
1401 struct src_register src1
= translate_src_register(
1402 emit
, &insn
->Src
[1] );
1404 return emit_select( emit
, compare
, dst
, src0
, src1
);
1409 * Translate TGSI CMP instruction.
1412 emit_cmp(struct svga_shader_emitter
*emit
,
1413 const struct tgsi_full_instruction
*insn
)
1415 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1416 const struct src_register src0
=
1417 translate_src_register(emit
, &insn
->Src
[0] );
1418 const struct src_register src1
=
1419 translate_src_register(emit
, &insn
->Src
[1] );
1420 const struct src_register src2
=
1421 translate_src_register(emit
, &insn
->Src
[2] );
1423 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
1424 struct src_register zero
=
1425 scalar(get_zero_immediate(emit
), TGSI_SWIZZLE_X
);
1426 /* We used to simulate CMP with SLT+LRP. But that didn't work when
1427 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed
1428 * because it involves a CMP to handle the 0 case.
1429 * Use a conditional expression instead.
1431 return emit_conditional(emit
, PIPE_FUNC_LESS
, dst
,
1432 src0
, zero
, src1
, src2
);
1435 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
1437 /* CMP DST, SRC0, SRC2, SRC1 */
1438 return submit_op3( emit
, inst_token( SVGA3DOP_CMP
), dst
,
1444 /* Translate texture instructions to SVGA3D representation.
1446 static boolean
emit_tex2(struct svga_shader_emitter
*emit
,
1447 const struct tgsi_full_instruction
*insn
,
1448 SVGA3dShaderDestToken dst
)
1450 SVGA3dShaderInstToken inst
;
1451 struct src_register texcoord
;
1452 struct src_register sampler
;
1453 SVGA3dShaderDestToken tmp
;
1457 switch (insn
->Instruction
.Opcode
) {
1458 case TGSI_OPCODE_TEX
:
1459 inst
.op
= SVGA3DOP_TEX
;
1461 case TGSI_OPCODE_TXP
:
1462 inst
.op
= SVGA3DOP_TEX
;
1463 inst
.control
= SVGA3DOPCONT_PROJECT
;
1465 case TGSI_OPCODE_TXB
:
1466 inst
.op
= SVGA3DOP_TEX
;
1467 inst
.control
= SVGA3DOPCONT_BIAS
;
1469 case TGSI_OPCODE_TXL
:
1470 inst
.op
= SVGA3DOP_TEXLDL
;
1477 texcoord
= translate_src_register( emit
, &insn
->Src
[0] );
1478 sampler
= translate_src_register( emit
, &insn
->Src
[1] );
1480 if (emit
->key
.fkey
.tex
[sampler
.base
.num
].unnormalized
||
1481 emit
->dynamic_branching_level
> 0)
1482 tmp
= get_temp( emit
);
1484 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1485 * zero in that case.
1487 if (emit
->dynamic_branching_level
> 0 &&
1488 inst
.op
== SVGA3DOP_TEX
&&
1489 SVGA3dShaderGetRegType(texcoord
.base
.value
) == SVGA3DREG_TEMP
) {
1490 struct src_register zero
= get_zero_immediate( emit
);
1492 /* MOV tmp, texcoord */
1493 if (!submit_op1( emit
,
1494 inst_token( SVGA3DOP_MOV
),
1499 /* MOV tmp.w, zero */
1500 if (!submit_op1( emit
,
1501 inst_token( SVGA3DOP_MOV
),
1502 writemask( tmp
, TGSI_WRITEMASK_W
),
1503 scalar( zero
, TGSI_SWIZZLE_X
)))
1506 texcoord
= src( tmp
);
1507 inst
.op
= SVGA3DOP_TEXLDL
;
1510 /* Explicit normalization of texcoords:
1512 if (emit
->key
.fkey
.tex
[sampler
.base
.num
].unnormalized
) {
1513 struct src_register wh
= get_tex_dimensions( emit
, sampler
.base
.num
);
1515 /* MUL tmp, SRC0, WH */
1516 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
),
1517 tmp
, texcoord
, wh
))
1520 texcoord
= src( tmp
);
1523 return submit_op2( emit
, inst
, dst
, texcoord
, sampler
);
1529 /* Translate texture instructions to SVGA3D representation.
1531 static boolean
emit_tex4(struct svga_shader_emitter
*emit
,
1532 const struct tgsi_full_instruction
*insn
,
1533 SVGA3dShaderDestToken dst
)
1535 SVGA3dShaderInstToken inst
;
1536 struct src_register texcoord
;
1537 struct src_register ddx
;
1538 struct src_register ddy
;
1539 struct src_register sampler
;
1541 texcoord
= translate_src_register( emit
, &insn
->Src
[0] );
1542 ddx
= translate_src_register( emit
, &insn
->Src
[1] );
1543 ddy
= translate_src_register( emit
, &insn
->Src
[2] );
1544 sampler
= translate_src_register( emit
, &insn
->Src
[3] );
1548 switch (insn
->Instruction
.Opcode
) {
1549 case TGSI_OPCODE_TXD
:
1550 inst
.op
= SVGA3DOP_TEXLDD
; /* 4 args! */
1557 return submit_op4( emit
, inst
, dst
, texcoord
, sampler
, ddx
, ddy
);
1562 * Emit texture swizzle code.
1564 static boolean
emit_tex_swizzle( struct svga_shader_emitter
*emit
,
1565 SVGA3dShaderDestToken dst
,
1566 struct src_register src
,
1572 const unsigned swizzleIn
[4] = {swizzle_x
, swizzle_y
, swizzle_z
, swizzle_w
};
1573 unsigned srcSwizzle
[4];
1574 unsigned srcWritemask
= 0x0, zeroWritemask
= 0x0, oneWritemask
= 0x0;
1577 /* build writemasks and srcSwizzle terms */
1578 for (i
= 0; i
< 4; i
++) {
1579 if (swizzleIn
[i
] == PIPE_SWIZZLE_ZERO
) {
1580 srcSwizzle
[i
] = TGSI_SWIZZLE_X
+ i
;
1581 zeroWritemask
|= (1 << i
);
1583 else if (swizzleIn
[i
] == PIPE_SWIZZLE_ONE
) {
1584 srcSwizzle
[i
] = TGSI_SWIZZLE_X
+ i
;
1585 oneWritemask
|= (1 << i
);
1588 srcSwizzle
[i
] = swizzleIn
[i
];
1589 srcWritemask
|= (1 << i
);
1593 /* write x/y/z/w comps */
1594 if (dst
.mask
& srcWritemask
) {
1595 if (!submit_op1(emit
,
1596 inst_token(SVGA3DOP_MOV
),
1597 writemask(dst
, srcWritemask
),
1607 if (dst
.mask
& zeroWritemask
) {
1608 if (!submit_op1(emit
,
1609 inst_token(SVGA3DOP_MOV
),
1610 writemask(dst
, zeroWritemask
),
1611 scalar(get_zero_immediate(emit
), TGSI_SWIZZLE_X
)))
1616 if (dst
.mask
& oneWritemask
) {
1617 if (!submit_op1(emit
,
1618 inst_token(SVGA3DOP_MOV
),
1619 writemask(dst
, oneWritemask
),
1620 scalar(get_zero_immediate(emit
), TGSI_SWIZZLE_W
)))
1628 static boolean
emit_tex(struct svga_shader_emitter
*emit
,
1629 const struct tgsi_full_instruction
*insn
)
1631 SVGA3dShaderDestToken dst
=
1632 translate_dst_register( emit
, insn
, 0 );
1633 struct src_register src0
=
1634 translate_src_register( emit
, &insn
->Src
[0] );
1635 struct src_register src1
=
1636 translate_src_register( emit
, &insn
->Src
[1] );
1638 SVGA3dShaderDestToken tex_result
;
1639 const unsigned unit
= src1
.base
.num
;
1641 /* check for shadow samplers */
1642 boolean compare
= (emit
->key
.fkey
.tex
[unit
].compare_mode
==
1643 PIPE_TEX_COMPARE_R_TO_TEXTURE
);
1645 /* texture swizzle */
1646 boolean swizzle
= (emit
->key
.fkey
.tex
[unit
].swizzle_r
!= PIPE_SWIZZLE_RED
||
1647 emit
->key
.fkey
.tex
[unit
].swizzle_g
!= PIPE_SWIZZLE_GREEN
||
1648 emit
->key
.fkey
.tex
[unit
].swizzle_b
!= PIPE_SWIZZLE_BLUE
||
1649 emit
->key
.fkey
.tex
[unit
].swizzle_a
!= PIPE_SWIZZLE_ALPHA
);
1651 boolean saturate
= insn
->Instruction
.Saturate
!= TGSI_SAT_NONE
;
1653 /* If doing compare processing or tex swizzle or saturation, we need to put
1654 * the fetched color into a temporary so it can be used as a source later on.
1656 if (compare
|| swizzle
|| saturate
) {
1657 tex_result
= get_temp( emit
);
1663 switch(insn
->Instruction
.Opcode
) {
1664 case TGSI_OPCODE_TEX
:
1665 case TGSI_OPCODE_TXB
:
1666 case TGSI_OPCODE_TXP
:
1667 case TGSI_OPCODE_TXL
:
1668 if (!emit_tex2( emit
, insn
, tex_result
))
1671 case TGSI_OPCODE_TXD
:
1672 if (!emit_tex4( emit
, insn
, tex_result
))
1680 SVGA3dShaderDestToken dst2
;
1682 if (swizzle
|| saturate
)
1687 if (dst
.mask
& TGSI_WRITEMASK_XYZ
) {
1688 SVGA3dShaderDestToken src0_zdivw
= get_temp( emit
);
1689 /* When sampling a depth texture, the result of the comparison is in
1692 struct src_register tex_src_x
= scalar(src(tex_result
), TGSI_SWIZZLE_Y
);
1693 struct src_register r_coord
;
1695 if (insn
->Instruction
.Opcode
== TGSI_OPCODE_TXP
) {
1696 /* Divide texcoord R by Q */
1697 if (!submit_op1( emit
, inst_token( SVGA3DOP_RCP
),
1698 writemask(src0_zdivw
, TGSI_WRITEMASK_X
),
1699 scalar(src0
, TGSI_SWIZZLE_W
) ))
1702 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
),
1703 writemask(src0_zdivw
, TGSI_WRITEMASK_X
),
1704 scalar(src0
, TGSI_SWIZZLE_Z
),
1705 scalar(src(src0_zdivw
), TGSI_SWIZZLE_X
) ))
1708 r_coord
= scalar(src(src0_zdivw
), TGSI_SWIZZLE_X
);
1711 r_coord
= scalar(src0
, TGSI_SWIZZLE_Z
);
1714 /* Compare texture sample value against R component of texcoord */
1715 if (!emit_select(emit
,
1716 emit
->key
.fkey
.tex
[unit
].compare_func
,
1717 writemask( dst2
, TGSI_WRITEMASK_XYZ
),
1723 if (dst
.mask
& TGSI_WRITEMASK_W
) {
1724 struct src_register one
=
1725 scalar( get_zero_immediate( emit
), TGSI_SWIZZLE_W
);
1727 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
1728 writemask( dst2
, TGSI_WRITEMASK_W
),
1734 if (saturate
&& !swizzle
) {
1735 /* MOV_SAT real_dst, dst */
1736 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
, src(tex_result
) ))
1740 /* swizzle from tex_result to dst (handles saturation too, if any) */
1741 emit_tex_swizzle(emit
,
1742 dst
, src(tex_result
),
1743 emit
->key
.fkey
.tex
[unit
].swizzle_r
,
1744 emit
->key
.fkey
.tex
[unit
].swizzle_g
,
1745 emit
->key
.fkey
.tex
[unit
].swizzle_b
,
1746 emit
->key
.fkey
.tex
[unit
].swizzle_a
);
1752 static boolean
emit_bgnloop2( struct svga_shader_emitter
*emit
,
1753 const struct tgsi_full_instruction
*insn
)
1755 SVGA3dShaderInstToken inst
= inst_token( SVGA3DOP_LOOP
);
1756 struct src_register loop_reg
= src_register( SVGA3DREG_LOOP
, 0 );
1757 struct src_register const_int
= get_loop_const( emit
);
1759 emit
->dynamic_branching_level
++;
1761 return (emit_instruction( emit
, inst
) &&
1762 emit_src( emit
, loop_reg
) &&
1763 emit_src( emit
, const_int
) );
1766 static boolean
emit_endloop2( struct svga_shader_emitter
*emit
,
1767 const struct tgsi_full_instruction
*insn
)
1769 SVGA3dShaderInstToken inst
= inst_token( SVGA3DOP_ENDLOOP
);
1771 emit
->dynamic_branching_level
--;
1773 return emit_instruction( emit
, inst
);
1776 static boolean
emit_brk( struct svga_shader_emitter
*emit
,
1777 const struct tgsi_full_instruction
*insn
)
1779 SVGA3dShaderInstToken inst
= inst_token( SVGA3DOP_BREAK
);
1780 return emit_instruction( emit
, inst
);
1783 static boolean
emit_scalar_op1( struct svga_shader_emitter
*emit
,
1785 const struct tgsi_full_instruction
*insn
)
1787 SVGA3dShaderInstToken inst
;
1788 SVGA3dShaderDestToken dst
;
1789 struct src_register src
;
1791 inst
= inst_token( opcode
);
1792 dst
= translate_dst_register( emit
, insn
, 0 );
1793 src
= translate_src_register( emit
, &insn
->Src
[0] );
1794 src
= scalar( src
, TGSI_SWIZZLE_X
);
1796 return submit_op1( emit
, inst
, dst
, src
);
1800 static boolean
emit_simple_instruction(struct svga_shader_emitter
*emit
,
1802 const struct tgsi_full_instruction
*insn
)
1804 const struct tgsi_full_src_register
*src
= insn
->Src
;
1805 SVGA3dShaderInstToken inst
;
1806 SVGA3dShaderDestToken dst
;
1808 inst
= inst_token( opcode
);
1809 dst
= translate_dst_register( emit
, insn
, 0 );
1811 switch (insn
->Instruction
.NumSrcRegs
) {
1813 return submit_op0( emit
, inst
, dst
);
1815 return submit_op1( emit
, inst
, dst
,
1816 translate_src_register( emit
, &src
[0] ));
1818 return submit_op2( emit
, inst
, dst
,
1819 translate_src_register( emit
, &src
[0] ),
1820 translate_src_register( emit
, &src
[1] ) );
1822 return submit_op3( emit
, inst
, dst
,
1823 translate_src_register( emit
, &src
[0] ),
1824 translate_src_register( emit
, &src
[1] ),
1825 translate_src_register( emit
, &src
[2] ) );
1833 static boolean
emit_deriv(struct svga_shader_emitter
*emit
,
1834 const struct tgsi_full_instruction
*insn
)
1836 if (emit
->dynamic_branching_level
> 0 &&
1837 insn
->Src
[0].Register
.File
== TGSI_FILE_TEMPORARY
)
1839 struct src_register zero
= get_zero_immediate( emit
);
1840 SVGA3dShaderDestToken dst
=
1841 translate_dst_register( emit
, insn
, 0 );
1843 /* Deriv opcodes not valid inside dynamic branching, workaround
1844 * by zeroing out the destination.
1846 if (!submit_op1(emit
,
1847 inst_token( SVGA3DOP_MOV
),
1849 scalar(zero
, TGSI_SWIZZLE_X
)))
1856 const struct tgsi_full_src_register
*reg
= &insn
->Src
[0];
1857 SVGA3dShaderInstToken inst
;
1858 SVGA3dShaderDestToken dst
;
1859 struct src_register src0
;
1861 switch (insn
->Instruction
.Opcode
) {
1862 case TGSI_OPCODE_DDX
:
1863 opcode
= SVGA3DOP_DSX
;
1865 case TGSI_OPCODE_DDY
:
1866 opcode
= SVGA3DOP_DSY
;
1872 inst
= inst_token( opcode
);
1873 dst
= translate_dst_register( emit
, insn
, 0 );
1874 src0
= translate_src_register( emit
, reg
);
1876 /* We cannot use negate or abs on source to dsx/dsy instruction.
1878 if (reg
->Register
.Absolute
||
1879 reg
->Register
.Negate
) {
1880 SVGA3dShaderDestToken temp
= get_temp( emit
);
1882 if (!emit_repl( emit
, temp
, &src0
))
1886 return submit_op1( emit
, inst
, dst
, src0
);
1890 static boolean
emit_arl(struct svga_shader_emitter
*emit
,
1891 const struct tgsi_full_instruction
*insn
)
1893 ++emit
->current_arl
;
1894 if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
1895 /* MOVA not present in pixel shader instruction set.
1896 * Ignore this instruction altogether since it is
1897 * only used for loop counters -- and for that
1898 * we reference aL directly.
1902 if (svga_arl_needs_adjustment( emit
)) {
1903 return emit_fake_arl( emit
, insn
);
1905 /* no need to adjust, just emit straight arl */
1906 return emit_simple_instruction(emit
, SVGA3DOP_MOVA
, insn
);
1910 static boolean
emit_pow(struct svga_shader_emitter
*emit
,
1911 const struct tgsi_full_instruction
*insn
)
1913 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1914 struct src_register src0
= translate_src_register(
1915 emit
, &insn
->Src
[0] );
1916 struct src_register src1
= translate_src_register(
1917 emit
, &insn
->Src
[1] );
1918 boolean need_tmp
= FALSE
;
1920 /* POW can only output to a temporary */
1921 if (insn
->Dst
[0].Register
.File
!= TGSI_FILE_TEMPORARY
)
1924 /* POW src1 must not be the same register as dst */
1925 if (alias_src_dst( src1
, dst
))
1928 /* it's a scalar op */
1929 src0
= scalar( src0
, TGSI_SWIZZLE_X
);
1930 src1
= scalar( src1
, TGSI_SWIZZLE_X
);
1933 SVGA3dShaderDestToken tmp
= writemask(get_temp( emit
), TGSI_WRITEMASK_X
);
1935 if (!submit_op2(emit
, inst_token( SVGA3DOP_POW
), tmp
, src0
, src1
))
1938 return submit_op1(emit
, inst_token( SVGA3DOP_MOV
), dst
, scalar(src(tmp
), 0) );
1941 return submit_op2(emit
, inst_token( SVGA3DOP_POW
), dst
, src0
, src1
);
1945 static boolean
emit_xpd(struct svga_shader_emitter
*emit
,
1946 const struct tgsi_full_instruction
*insn
)
1948 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
1949 const struct src_register src0
= translate_src_register(
1950 emit
, &insn
->Src
[0] );
1951 const struct src_register src1
= translate_src_register(
1952 emit
, &insn
->Src
[1] );
1953 boolean need_dst_tmp
= FALSE
;
1955 /* XPD can only output to a temporary */
1956 if (SVGA3dShaderGetRegType(dst
.value
) != SVGA3DREG_TEMP
)
1957 need_dst_tmp
= TRUE
;
1959 /* The dst reg must not be the same as src0 or src1*/
1960 if (alias_src_dst(src0
, dst
) ||
1961 alias_src_dst(src1
, dst
))
1962 need_dst_tmp
= TRUE
;
1965 SVGA3dShaderDestToken tmp
= get_temp( emit
);
1967 /* Obey DX9 restrictions on mask:
1969 tmp
.mask
= dst
.mask
& TGSI_WRITEMASK_XYZ
;
1971 if (!submit_op2(emit
, inst_token( SVGA3DOP_CRS
), tmp
, src0
, src1
))
1974 if (!submit_op1(emit
, inst_token( SVGA3DOP_MOV
), dst
, src( tmp
)))
1978 if (!submit_op2(emit
, inst_token( SVGA3DOP_CRS
), dst
, src0
, src1
))
1982 /* Need to emit 1.0 to dst.w?
1984 if (dst
.mask
& TGSI_WRITEMASK_W
) {
1985 struct src_register zero
= get_zero_immediate( emit
);
1987 if (!submit_op1(emit
,
1988 inst_token( SVGA3DOP_MOV
),
1989 writemask(dst
, TGSI_WRITEMASK_W
),
1998 static boolean
emit_lrp(struct svga_shader_emitter
*emit
,
1999 const struct tgsi_full_instruction
*insn
)
2001 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
2002 const struct src_register src0
= translate_src_register(
2003 emit
, &insn
->Src
[0] );
2004 const struct src_register src1
= translate_src_register(
2005 emit
, &insn
->Src
[1] );
2006 const struct src_register src2
= translate_src_register(
2007 emit
, &insn
->Src
[2] );
2009 return submit_lrp(emit
, dst
, src0
, src1
, src2
);
2013 static boolean
emit_dst_insn(struct svga_shader_emitter
*emit
,
2014 const struct tgsi_full_instruction
*insn
)
2016 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
2017 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2019 return emit_simple_instruction(emit
, SVGA3DOP_DST
, insn
);
2023 /* result[0] = 1 * 1;
2024 * result[1] = a[1] * b[1];
2025 * result[2] = a[2] * 1;
2026 * result[3] = 1 * b[3];
2029 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
2030 SVGA3dShaderDestToken tmp
;
2031 const struct src_register src0
= translate_src_register(
2032 emit
, &insn
->Src
[0] );
2033 const struct src_register src1
= translate_src_register(
2034 emit
, &insn
->Src
[1] );
2035 struct src_register zero
= get_zero_immediate( emit
);
2036 boolean need_tmp
= FALSE
;
2038 if (SVGA3dShaderGetRegType(dst
.value
) != SVGA3DREG_TEMP
||
2039 alias_src_dst(src0
, dst
) ||
2040 alias_src_dst(src1
, dst
))
2044 tmp
= get_temp( emit
);
2052 if (tmp
.mask
& TGSI_WRITEMASK_XW
) {
2053 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2054 writemask(tmp
, TGSI_WRITEMASK_XW
),
2061 if (tmp
.mask
& TGSI_WRITEMASK_YZ
) {
2062 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2063 writemask(tmp
, TGSI_WRITEMASK_YZ
),
2068 /* tmp.yw = tmp * src1
2070 if (tmp
.mask
& TGSI_WRITEMASK_YW
) {
2071 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
),
2072 writemask(tmp
, TGSI_WRITEMASK_YW
),
2081 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2092 static boolean
emit_exp(struct svga_shader_emitter
*emit
,
2093 const struct tgsi_full_instruction
*insn
)
2095 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
2096 struct src_register src0
=
2097 translate_src_register( emit
, &insn
->Src
[0] );
2098 struct src_register zero
= get_zero_immediate( emit
);
2099 SVGA3dShaderDestToken fraction
;
2101 if (dst
.mask
& TGSI_WRITEMASK_Y
)
2103 else if (dst
.mask
& TGSI_WRITEMASK_X
)
2104 fraction
= get_temp( emit
);
2108 /* If y is being written, fill it with src0 - floor(src0).
2110 if (dst
.mask
& TGSI_WRITEMASK_XY
) {
2111 if (!submit_op1( emit
, inst_token( SVGA3DOP_FRC
),
2112 writemask( fraction
, TGSI_WRITEMASK_Y
),
2117 /* If x is being written, fill it with 2 ^ floor(src0).
2119 if (dst
.mask
& TGSI_WRITEMASK_X
) {
2120 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
),
2121 writemask( dst
, TGSI_WRITEMASK_X
),
2123 scalar( negate( src( fraction
) ), TGSI_SWIZZLE_Y
) ) )
2126 if (!submit_op1( emit
, inst_token( SVGA3DOP_EXP
),
2127 writemask( dst
, TGSI_WRITEMASK_X
),
2128 scalar( src( dst
), TGSI_SWIZZLE_X
) ) )
2131 if (!(dst
.mask
& TGSI_WRITEMASK_Y
))
2132 release_temp( emit
, fraction
);
2135 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2137 if (dst
.mask
& TGSI_WRITEMASK_Z
) {
2138 if (!submit_op1( emit
, inst_token( SVGA3DOP_EXPP
),
2139 writemask( dst
, TGSI_WRITEMASK_Z
),
2144 /* If w is being written, fill it with one.
2146 if (dst
.mask
& TGSI_WRITEMASK_W
) {
2147 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2148 writemask(dst
, TGSI_WRITEMASK_W
),
2149 scalar( zero
, TGSI_SWIZZLE_W
) ))
2156 static boolean
emit_lit(struct svga_shader_emitter
*emit
,
2157 const struct tgsi_full_instruction
*insn
)
2159 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
2160 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2162 return emit_simple_instruction(emit
, SVGA3DOP_LIT
, insn
);
2165 /* D3D vs. GL semantics can be fairly easily accomodated by
2166 * variations on this sequence.
2170 * tmp.z = pow(src.y,src.w)
2171 * p0 = src0.xxxx > 0
2172 * result = zero.wxxw
2173 * (p0) result.yz = tmp
2177 * tmp.z = pow(src.y,src.w)
2178 * p0 = src0.xxyy > 0
2179 * result = zero.wxxw
2180 * (p0) result.yz = tmp
2182 * Will implement the GL version for now.
2184 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
2185 SVGA3dShaderDestToken tmp
= get_temp( emit
);
2186 const struct src_register src0
= translate_src_register(
2187 emit
, &insn
->Src
[0] );
2188 struct src_register zero
= get_zero_immediate( emit
);
2190 /* tmp = pow(src.y, src.w)
2192 if (dst
.mask
& TGSI_WRITEMASK_Z
) {
2193 if (!submit_op2(emit
, inst_token( SVGA3DOP_POW
),
2202 if (dst
.mask
& TGSI_WRITEMASK_Y
) {
2203 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2204 writemask(tmp
, TGSI_WRITEMASK_Y
),
2209 /* Can't quite do this with emit conditional due to the extra
2210 * writemask on the predicated mov:
2213 SVGA3dShaderDestToken pred_reg
= dst_register( SVGA3DREG_PREDICATE
, 0 );
2214 SVGA3dShaderInstToken setp_token
, mov_token
;
2215 struct src_register predsrc
;
2217 setp_token
= inst_token( SVGA3DOP_SETP
);
2218 mov_token
= inst_token( SVGA3DOP_MOV
);
2220 setp_token
.control
= SVGA3DOPCOMP_GT
;
2222 /* D3D vs GL semantics:
2225 predsrc
= swizzle(src0
, 0, 0, 1, 1); /* D3D */
2227 predsrc
= swizzle(src0
, 0, 0, 0, 0); /* GL */
2229 /* SETP src0.xxyy, GT, {0}.x */
2230 if (!submit_op2( emit
, setp_token
, pred_reg
,
2232 swizzle(zero
, 0, 0, 0, 0) ))
2236 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), dst
,
2237 swizzle(zero
, 3, 0, 0, 3 )))
2240 /* MOV dst.yz, tmp (predicated)
2242 * Note that the predicate reg (and possible modifiers) is passed
2243 * as the first source argument.
2245 if (dst
.mask
& TGSI_WRITEMASK_YZ
) {
2246 mov_token
.predicated
= 1;
2247 if (!submit_op2( emit
, mov_token
,
2248 writemask(dst
, TGSI_WRITEMASK_YZ
),
2249 src( pred_reg
), src( tmp
) ))
2259 static boolean
emit_ex2( struct svga_shader_emitter
*emit
,
2260 const struct tgsi_full_instruction
*insn
)
2262 SVGA3dShaderInstToken inst
;
2263 SVGA3dShaderDestToken dst
;
2264 struct src_register src0
;
2266 inst
= inst_token( SVGA3DOP_EXP
);
2267 dst
= translate_dst_register( emit
, insn
, 0 );
2268 src0
= translate_src_register( emit
, &insn
->Src
[0] );
2269 src0
= scalar( src0
, TGSI_SWIZZLE_X
);
2271 if (dst
.mask
!= TGSI_WRITEMASK_XYZW
) {
2272 SVGA3dShaderDestToken tmp
= get_temp( emit
);
2274 if (!submit_op1( emit
, inst
, tmp
, src0
))
2277 return submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2279 scalar( src( tmp
), TGSI_SWIZZLE_X
) );
2282 return submit_op1( emit
, inst
, dst
, src0
);
2286 static boolean
emit_log(struct svga_shader_emitter
*emit
,
2287 const struct tgsi_full_instruction
*insn
)
2289 SVGA3dShaderDestToken dst
= translate_dst_register( emit
, insn
, 0 );
2290 struct src_register src0
=
2291 translate_src_register( emit
, &insn
->Src
[0] );
2292 struct src_register zero
= get_zero_immediate( emit
);
2293 SVGA3dShaderDestToken abs_tmp
;
2294 struct src_register abs_src0
;
2295 SVGA3dShaderDestToken log2_abs
;
2299 if (dst
.mask
& TGSI_WRITEMASK_Z
)
2301 else if (dst
.mask
& TGSI_WRITEMASK_XY
)
2302 log2_abs
= get_temp( emit
);
2306 /* If z is being written, fill it with log2( abs( src0 ) ).
2308 if (dst
.mask
& TGSI_WRITEMASK_XYZ
) {
2309 if (!src0
.base
.srcMod
|| src0
.base
.srcMod
== SVGA3DSRCMOD_ABS
)
2312 abs_tmp
= get_temp( emit
);
2314 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2319 abs_src0
= src( abs_tmp
);
2322 abs_src0
= absolute( scalar( abs_src0
, TGSI_SWIZZLE_X
) );
2324 if (!submit_op1( emit
, inst_token( SVGA3DOP_LOG
),
2325 writemask( log2_abs
, TGSI_WRITEMASK_Z
),
2330 if (dst
.mask
& TGSI_WRITEMASK_XY
) {
2331 SVGA3dShaderDestToken floor_log2
;
2333 if (dst
.mask
& TGSI_WRITEMASK_X
)
2336 floor_log2
= get_temp( emit
);
2338 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2340 if (!submit_op1( emit
, inst_token( SVGA3DOP_FRC
),
2341 writemask( floor_log2
, TGSI_WRITEMASK_X
),
2342 scalar( src( log2_abs
), TGSI_SWIZZLE_Z
) ) )
2345 if (!submit_op2( emit
, inst_token( SVGA3DOP_ADD
),
2346 writemask( floor_log2
, TGSI_WRITEMASK_X
),
2347 scalar( src( log2_abs
), TGSI_SWIZZLE_Z
),
2348 negate( src( floor_log2
) ) ) )
2351 /* If y is being written, fill it with
2352 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2354 if (dst
.mask
& TGSI_WRITEMASK_Y
) {
2355 if (!submit_op1( emit
, inst_token( SVGA3DOP_EXP
),
2356 writemask( dst
, TGSI_WRITEMASK_Y
),
2357 negate( scalar( src( floor_log2
),
2358 TGSI_SWIZZLE_X
) ) ) )
2361 if (!submit_op2( emit
, inst_token( SVGA3DOP_MUL
),
2362 writemask( dst
, TGSI_WRITEMASK_Y
),
2368 if (!(dst
.mask
& TGSI_WRITEMASK_X
))
2369 release_temp( emit
, floor_log2
);
2371 if (!(dst
.mask
& TGSI_WRITEMASK_Z
))
2372 release_temp( emit
, log2_abs
);
2375 if (dst
.mask
& TGSI_WRITEMASK_XYZ
&& src0
.base
.srcMod
&&
2376 src0
.base
.srcMod
!= SVGA3DSRCMOD_ABS
)
2377 release_temp( emit
, abs_tmp
);
2379 /* If w is being written, fill it with one.
2381 if (dst
.mask
& TGSI_WRITEMASK_W
) {
2382 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
),
2383 writemask(dst
, TGSI_WRITEMASK_W
),
2384 scalar( zero
, TGSI_SWIZZLE_W
) ))
2393 * Translate TGSI TRUNC or ROUND instruction.
2394 * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2395 * Different approaches are needed for VS versus PS.
2398 emit_trunc_round(struct svga_shader_emitter
*emit
,
2399 const struct tgsi_full_instruction
*insn
,
2402 SVGA3dShaderDestToken dst
= translate_dst_register(emit
, insn
, 0);
2403 const struct src_register src0
=
2404 translate_src_register(emit
, &insn
->Src
[0] );
2405 SVGA3dShaderDestToken t1
= get_temp(emit
);
2408 SVGA3dShaderDestToken t0
= get_temp(emit
);
2409 struct src_register half
= get_half_immediate(emit
);
2411 /* t0 = abs(src0) + 0.5 */
2412 if (!submit_op2(emit
, inst_token(SVGA3DOP_ADD
), t0
,
2413 absolute(src0
), half
))
2416 /* t1 = fract(t0) */
2417 if (!submit_op1(emit
, inst_token(SVGA3DOP_FRC
), t1
, src(t0
)))
2421 if (!submit_op2(emit
, inst_token(SVGA3DOP_ADD
), t1
, src(t0
),
2428 /* t1 = fract(abs(src0)) */
2429 if (!submit_op1(emit
, inst_token(SVGA3DOP_FRC
), t1
, absolute(src0
)))
2432 /* t1 = abs(src0) - t1 */
2433 if (!submit_op2(emit
, inst_token(SVGA3DOP_ADD
), t1
, absolute(src0
),
2439 * Now we need to multiply t1 by the sign of the original value.
2441 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
2442 /* For VS: use SGN instruction */
2443 /* Need two extra/dummy registers: */
2444 SVGA3dShaderDestToken t2
= get_temp(emit
), t3
= get_temp(emit
),
2445 t4
= get_temp(emit
);
2447 /* t2 = sign(src0) */
2448 if (!submit_op3(emit
, inst_token(SVGA3DOP_SGN
), t2
, src0
,
2453 if (!submit_op2(emit
, inst_token(SVGA3DOP_MUL
), dst
, src(t1
), src(t2
)))
2457 /* For FS: Use CMP instruction */
2458 return submit_op3(emit
, inst_token( SVGA3DOP_CMP
), dst
,
2459 src0
, src(t1
), negate(src(t1
)));
2466 static boolean
emit_bgnsub( struct svga_shader_emitter
*emit
,
2468 const struct tgsi_full_instruction
*insn
)
2472 /* Note that we've finished the main function and are now emitting
2473 * subroutines. This affects how we terminate the generated
2476 emit
->in_main_func
= FALSE
;
2478 for (i
= 0; i
< emit
->nr_labels
; i
++) {
2479 if (emit
->label
[i
] == position
) {
2480 return (emit_instruction( emit
, inst_token( SVGA3DOP_RET
) ) &&
2481 emit_instruction( emit
, inst_token( SVGA3DOP_LABEL
) ) &&
2482 emit_src( emit
, src_register( SVGA3DREG_LABEL
, i
)));
2490 static boolean
emit_call( struct svga_shader_emitter
*emit
,
2491 const struct tgsi_full_instruction
*insn
)
2493 unsigned position
= insn
->Label
.Label
;
2496 for (i
= 0; i
< emit
->nr_labels
; i
++) {
2497 if (emit
->label
[i
] == position
)
2501 if (emit
->nr_labels
== Elements(emit
->label
))
2504 if (i
== emit
->nr_labels
) {
2505 emit
->label
[i
] = position
;
2509 return (emit_instruction( emit
, inst_token( SVGA3DOP_CALL
) ) &&
2510 emit_src( emit
, src_register( SVGA3DREG_LABEL
, i
)));
2514 static boolean
emit_end( struct svga_shader_emitter
*emit
)
2516 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
2517 return emit_vs_postamble( emit
);
2520 return emit_ps_postamble( emit
);
2526 static boolean
svga_emit_instruction( struct svga_shader_emitter
*emit
,
2528 const struct tgsi_full_instruction
*insn
)
2530 switch (insn
->Instruction
.Opcode
) {
2532 case TGSI_OPCODE_ARL
:
2533 return emit_arl( emit
, insn
);
2535 case TGSI_OPCODE_TEX
:
2536 case TGSI_OPCODE_TXB
:
2537 case TGSI_OPCODE_TXP
:
2538 case TGSI_OPCODE_TXL
:
2539 case TGSI_OPCODE_TXD
:
2540 return emit_tex( emit
, insn
);
2542 case TGSI_OPCODE_DDX
:
2543 case TGSI_OPCODE_DDY
:
2544 return emit_deriv( emit
, insn
);
2546 case TGSI_OPCODE_BGNSUB
:
2547 return emit_bgnsub( emit
, position
, insn
);
2549 case TGSI_OPCODE_ENDSUB
:
2552 case TGSI_OPCODE_CAL
:
2553 return emit_call( emit
, insn
);
2555 case TGSI_OPCODE_FLR
:
2556 return emit_floor( emit
, insn
);
2558 case TGSI_OPCODE_TRUNC
:
2559 return emit_trunc_round( emit
, insn
, FALSE
);
2561 case TGSI_OPCODE_ROUND
:
2562 return emit_trunc_round( emit
, insn
, TRUE
);
2564 case TGSI_OPCODE_CEIL
:
2565 return emit_ceil( emit
, insn
);
2567 case TGSI_OPCODE_CMP
:
2568 return emit_cmp( emit
, insn
);
2570 case TGSI_OPCODE_DIV
:
2571 return emit_div( emit
, insn
);
2573 case TGSI_OPCODE_DP2
:
2574 return emit_dp2( emit
, insn
);
2576 case TGSI_OPCODE_DPH
:
2577 return emit_dph( emit
, insn
);
2579 case TGSI_OPCODE_NRM
:
2580 return emit_nrm( emit
, insn
);
2582 case TGSI_OPCODE_COS
:
2583 return emit_cos( emit
, insn
);
2585 case TGSI_OPCODE_SIN
:
2586 return emit_sin( emit
, insn
);
2588 case TGSI_OPCODE_SCS
:
2589 return emit_sincos( emit
, insn
);
2591 case TGSI_OPCODE_END
:
2592 /* TGSI always finishes the main func with an END */
2593 return emit_end( emit
);
2595 case TGSI_OPCODE_KIL
:
2596 return emit_kil( emit
, insn
);
2598 /* Selection opcodes. The underlying language is fairly
2599 * non-orthogonal about these.
2601 case TGSI_OPCODE_SEQ
:
2602 return emit_select_op( emit
, PIPE_FUNC_EQUAL
, insn
);
2604 case TGSI_OPCODE_SNE
:
2605 return emit_select_op( emit
, PIPE_FUNC_NOTEQUAL
, insn
);
2607 case TGSI_OPCODE_SGT
:
2608 return emit_select_op( emit
, PIPE_FUNC_GREATER
, insn
);
2610 case TGSI_OPCODE_SGE
:
2611 return emit_select_op( emit
, PIPE_FUNC_GEQUAL
, insn
);
2613 case TGSI_OPCODE_SLT
:
2614 return emit_select_op( emit
, PIPE_FUNC_LESS
, insn
);
2616 case TGSI_OPCODE_SLE
:
2617 return emit_select_op( emit
, PIPE_FUNC_LEQUAL
, insn
);
2619 case TGSI_OPCODE_SUB
:
2620 return emit_sub( emit
, insn
);
2622 case TGSI_OPCODE_POW
:
2623 return emit_pow( emit
, insn
);
2625 case TGSI_OPCODE_EX2
:
2626 return emit_ex2( emit
, insn
);
2628 case TGSI_OPCODE_EXP
:
2629 return emit_exp( emit
, insn
);
2631 case TGSI_OPCODE_LOG
:
2632 return emit_log( emit
, insn
);
2634 case TGSI_OPCODE_LG2
:
2635 return emit_scalar_op1( emit
, SVGA3DOP_LOG
, insn
);
2637 case TGSI_OPCODE_RSQ
:
2638 return emit_scalar_op1( emit
, SVGA3DOP_RSQ
, insn
);
2640 case TGSI_OPCODE_RCP
:
2641 return emit_scalar_op1( emit
, SVGA3DOP_RCP
, insn
);
2643 case TGSI_OPCODE_CONT
:
2644 case TGSI_OPCODE_RET
:
2645 /* This is a noop -- we tell mesa that we can't support RET
2646 * within a function (early return), so this will always be
2647 * followed by an ENDSUB.
2651 /* These aren't actually used by any of the frontends we care
2654 case TGSI_OPCODE_CLAMP
:
2655 case TGSI_OPCODE_AND
:
2656 case TGSI_OPCODE_OR
:
2657 case TGSI_OPCODE_I2F
:
2658 case TGSI_OPCODE_NOT
:
2659 case TGSI_OPCODE_SHL
:
2660 case TGSI_OPCODE_ISHR
:
2661 case TGSI_OPCODE_XOR
:
2664 case TGSI_OPCODE_IF
:
2665 return emit_if( emit
, insn
);
2666 case TGSI_OPCODE_ELSE
:
2667 return emit_else( emit
, insn
);
2668 case TGSI_OPCODE_ENDIF
:
2669 return emit_endif( emit
, insn
);
2671 case TGSI_OPCODE_BGNLOOP
:
2672 return emit_bgnloop2( emit
, insn
);
2673 case TGSI_OPCODE_ENDLOOP
:
2674 return emit_endloop2( emit
, insn
);
2675 case TGSI_OPCODE_BRK
:
2676 return emit_brk( emit
, insn
);
2678 case TGSI_OPCODE_XPD
:
2679 return emit_xpd( emit
, insn
);
2681 case TGSI_OPCODE_KILP
:
2682 return emit_kilp( emit
, insn
);
2684 case TGSI_OPCODE_DST
:
2685 return emit_dst_insn( emit
, insn
);
2687 case TGSI_OPCODE_LIT
:
2688 return emit_lit( emit
, insn
);
2690 case TGSI_OPCODE_LRP
:
2691 return emit_lrp( emit
, insn
);
2693 case TGSI_OPCODE_SSG
:
2694 return emit_ssg( emit
, insn
);
2697 unsigned opcode
= translate_opcode(insn
->Instruction
.Opcode
);
2699 if (opcode
== SVGA3DOP_LAST_INST
)
2702 if (!emit_simple_instruction( emit
, opcode
, insn
))
2711 static boolean
svga_emit_immediate( struct svga_shader_emitter
*emit
,
2712 struct tgsi_full_immediate
*imm
)
2714 static const float id
[4] = {0,0,0,1};
2718 assert(1 <= imm
->Immediate
.NrTokens
&& imm
->Immediate
.NrTokens
<= 5);
2719 for (i
= 0; i
< imm
->Immediate
.NrTokens
- 1; i
++)
2720 value
[i
] = imm
->u
[i
].Float
;
2722 for ( ; i
< 4; i
++ )
2725 return emit_def_const( emit
, SVGA3D_CONST_TYPE_FLOAT
,
2726 emit
->imm_start
+ emit
->internal_imm_count
++,
2727 value
[0], value
[1], value
[2], value
[3]);
2730 static boolean
make_immediate( struct svga_shader_emitter
*emit
,
2735 struct src_register
*out
)
2737 unsigned idx
= emit
->nr_hw_float_const
++;
2739 if (!emit_def_const( emit
, SVGA3D_CONST_TYPE_FLOAT
,
2743 *out
= src_register( SVGA3DREG_CONST
, idx
);
2748 static boolean
emit_vs_preamble( struct svga_shader_emitter
*emit
)
2750 if (!emit
->key
.vkey
.need_prescale
) {
2751 if (!make_immediate( emit
, 0, 0, .5, .5,
2759 static boolean
emit_ps_preamble( struct svga_shader_emitter
*emit
)
2761 if (emit
->ps_reads_pos
&& emit
->info
.reads_z
) {
2763 * Assemble the position from various bits of inputs. Depth and W are
2764 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
2765 * Also fixup the perspective interpolation.
2767 * temp_pos.xy = vPos.xy
2768 * temp_pos.w = rcp(texcoord1.w);
2769 * temp_pos.z = texcoord1.z * temp_pos.w;
2771 if (!submit_op1( emit
,
2772 inst_token(SVGA3DOP_MOV
),
2773 writemask( emit
->ps_temp_pos
, TGSI_WRITEMASK_XY
),
2774 emit
->ps_true_pos
))
2777 if (!submit_op1( emit
,
2778 inst_token(SVGA3DOP_RCP
),
2779 writemask( emit
->ps_temp_pos
, TGSI_WRITEMASK_W
),
2780 scalar( emit
->ps_depth_pos
, TGSI_SWIZZLE_W
) ))
2783 if (!submit_op2( emit
,
2784 inst_token(SVGA3DOP_MUL
),
2785 writemask( emit
->ps_temp_pos
, TGSI_WRITEMASK_Z
),
2786 scalar( emit
->ps_depth_pos
, TGSI_SWIZZLE_Z
),
2787 scalar( src(emit
->ps_temp_pos
), TGSI_SWIZZLE_W
) ))
2794 static boolean
emit_ps_postamble( struct svga_shader_emitter
*emit
)
2798 /* PS oDepth is incredibly fragile and it's very hard to catch the
2799 * types of usage that break it during shader emit. Easier just to
2800 * redirect the main program to a temporary and then only touch
2801 * oDepth with a hand-crafted MOV below.
2803 if (SVGA3dShaderGetRegType(emit
->true_pos
.value
) != 0) {
2805 if (!submit_op1( emit
,
2806 inst_token(SVGA3DOP_MOV
),
2808 scalar(src(emit
->temp_pos
), TGSI_SWIZZLE_Z
) ))
2812 for (i
= 0; i
< PIPE_MAX_COLOR_BUFS
; i
++) {
2813 if (SVGA3dShaderGetRegType(emit
->true_col
[i
].value
) != 0) {
2815 /* Potentially override output colors with white for XOR
2816 * logicop workaround.
2818 if (emit
->unit
== PIPE_SHADER_FRAGMENT
&&
2819 emit
->key
.fkey
.white_fragments
) {
2821 struct src_register one
= scalar( get_zero_immediate( emit
),
2824 if (!submit_op1( emit
,
2825 inst_token(SVGA3DOP_MOV
),
2831 if (!submit_op1( emit
,
2832 inst_token(SVGA3DOP_MOV
),
2834 src(emit
->temp_col
[i
]) ))
2843 static boolean
emit_vs_postamble( struct svga_shader_emitter
*emit
)
2845 /* PSIZ output is incredibly fragile and it's very hard to catch
2846 * the types of usage that break it during shader emit. Easier
2847 * just to redirect the main program to a temporary and then only
2848 * touch PSIZ with a hand-crafted MOV below.
2850 if (SVGA3dShaderGetRegType(emit
->true_psiz
.value
) != 0) {
2851 if (!submit_op1( emit
,
2852 inst_token(SVGA3DOP_MOV
),
2854 scalar(src(emit
->temp_psiz
), TGSI_SWIZZLE_X
) ))
2858 /* Need to perform various manipulations on vertex position to cope
2859 * with the different GL and D3D clip spaces.
2861 if (emit
->key
.vkey
.need_prescale
) {
2862 SVGA3dShaderDestToken temp_pos
= emit
->temp_pos
;
2863 SVGA3dShaderDestToken depth
= emit
->depth_pos
;
2864 SVGA3dShaderDestToken pos
= emit
->true_pos
;
2865 unsigned offset
= emit
->info
.file_max
[TGSI_FILE_CONSTANT
] + 1;
2866 struct src_register prescale_scale
= src_register( SVGA3DREG_CONST
,
2868 struct src_register prescale_trans
= src_register( SVGA3DREG_CONST
,
2871 if (!submit_op1( emit
,
2872 inst_token(SVGA3DOP_MOV
),
2873 writemask(depth
, TGSI_WRITEMASK_W
),
2874 scalar(src(temp_pos
), TGSI_SWIZZLE_W
) ))
2877 /* MUL temp_pos.xyz, temp_pos, prescale.scale
2878 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2879 * --> Note that prescale.trans.w == 0
2881 if (!submit_op2( emit
,
2882 inst_token(SVGA3DOP_MUL
),
2883 writemask(temp_pos
, TGSI_WRITEMASK_XYZ
),
2888 if (!submit_op3( emit
,
2889 inst_token(SVGA3DOP_MAD
),
2891 swizzle(src(temp_pos
), 3, 3, 3, 3),
2896 /* Also write to depth value */
2897 if (!submit_op3( emit
,
2898 inst_token(SVGA3DOP_MAD
),
2899 writemask(depth
, TGSI_WRITEMASK_Z
),
2900 swizzle(src(temp_pos
), 3, 3, 3, 3),
2906 SVGA3dShaderDestToken temp_pos
= emit
->temp_pos
;
2907 SVGA3dShaderDestToken depth
= emit
->depth_pos
;
2908 SVGA3dShaderDestToken pos
= emit
->true_pos
;
2909 struct src_register imm_0055
= emit
->imm_0055
;
2911 /* Adjust GL clipping coordinate space to hardware (D3D-style):
2913 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2914 * MOV result.position, temp_pos
2916 if (!submit_op2( emit
,
2917 inst_token(SVGA3DOP_DP4
),
2918 writemask(temp_pos
, TGSI_WRITEMASK_Z
),
2923 if (!submit_op1( emit
,
2924 inst_token(SVGA3DOP_MOV
),
2929 /* Move the manipulated depth into the extra texcoord reg */
2930 if (!submit_op1( emit
,
2931 inst_token(SVGA3DOP_MOV
),
2932 writemask(depth
, TGSI_WRITEMASK_ZW
),
2942 1: COLOR = FrontColor;
2944 3: COLOR = BackColor;
2947 static boolean
emit_light_twoside( struct svga_shader_emitter
*emit
)
2949 struct src_register vface
, zero
;
2950 struct src_register front
[2];
2951 struct src_register back
[2];
2952 SVGA3dShaderDestToken color
[2];
2953 int count
= emit
->internal_color_count
;
2955 SVGA3dShaderInstToken if_token
;
2960 vface
= get_vface( emit
);
2961 zero
= get_zero_immediate( emit
);
2963 /* Can't use get_temp() to allocate the color reg as such
2964 * temporaries will be reclaimed after each instruction by the call
2965 * to reset_temp_regs().
2967 for (i
= 0; i
< count
; i
++) {
2968 color
[i
] = dst_register( SVGA3DREG_TEMP
, emit
->nr_hw_temp
++ );
2969 front
[i
] = emit
->input_map
[emit
->internal_color_idx
[i
]];
2971 /* Back is always the next input:
2974 back
[i
].base
.num
= front
[i
].base
.num
+ 1;
2976 /* Reassign the input_map to the actual front-face color:
2978 emit
->input_map
[emit
->internal_color_idx
[i
]] = src(color
[i
]);
2981 if_token
= inst_token( SVGA3DOP_IFC
);
2983 if (emit
->key
.fkey
.front_ccw
)
2984 if_token
.control
= SVGA3DOPCOMP_LT
;
2986 if_token
.control
= SVGA3DOPCOMP_GT
;
2988 zero
= scalar(zero
, TGSI_SWIZZLE_X
);
2990 if (!(emit_instruction( emit
, if_token
) &&
2991 emit_src( emit
, vface
) &&
2992 emit_src( emit
, zero
) ))
2995 for (i
= 0; i
< count
; i
++) {
2996 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), color
[i
], front
[i
] ))
3000 if (!(emit_instruction( emit
, inst_token( SVGA3DOP_ELSE
))))
3003 for (i
= 0; i
< count
; i
++) {
3004 if (!submit_op1( emit
, inst_token( SVGA3DOP_MOV
), color
[i
], back
[i
] ))
3008 if (!emit_instruction( emit
, inst_token( SVGA3DOP_ENDIF
) ))
3015 0: SETP_GT TEMP, VFACE, 0
3016 where TEMP is a fake frontface register
3018 static boolean
emit_frontface( struct svga_shader_emitter
*emit
)
3020 struct src_register vface
, zero
;
3021 SVGA3dShaderDestToken temp
;
3022 struct src_register pass
, fail
;
3024 vface
= get_vface( emit
);
3025 zero
= get_zero_immediate( emit
);
3027 /* Can't use get_temp() to allocate the fake frontface reg as such
3028 * temporaries will be reclaimed after each instruction by the call
3029 * to reset_temp_regs().
3031 temp
= dst_register( SVGA3DREG_TEMP
,
3032 emit
->nr_hw_temp
++ );
3034 if (emit
->key
.fkey
.front_ccw
) {
3035 pass
= scalar( zero
, TGSI_SWIZZLE_X
);
3036 fail
= scalar( zero
, TGSI_SWIZZLE_W
);
3038 pass
= scalar( zero
, TGSI_SWIZZLE_W
);
3039 fail
= scalar( zero
, TGSI_SWIZZLE_X
);
3042 if (!emit_conditional(emit
, PIPE_FUNC_GREATER
,
3043 temp
, vface
, scalar( zero
, TGSI_SWIZZLE_X
),
3047 /* Reassign the input_map to the actual front-face color:
3049 emit
->input_map
[emit
->internal_frontface_idx
] = src(temp
);
3056 * Emit code to invert the T component of the incoming texture coordinate.
3057 * This is used for drawing point sprites when
3058 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3060 static boolean
emit_inverted_texcoords( struct svga_shader_emitter
*emit
)
3062 struct src_register zero
= get_zero_immediate(emit
);
3063 struct src_register pos_neg_one
= get_pos_neg_one_immediate( emit
);
3064 unsigned inverted_texcoords
= emit
->inverted_texcoords
;
3066 while (inverted_texcoords
) {
3067 const unsigned unit
= ffs(inverted_texcoords
) - 1;
3069 assert(emit
->inverted_texcoords
& (1 << unit
));
3071 assert(unit
< Elements(emit
->ps_true_texcoord
));
3073 assert(unit
< Elements(emit
->ps_inverted_texcoord_input
));
3075 assert(emit
->ps_inverted_texcoord_input
[unit
]
3076 < Elements(emit
->input_map
));
3078 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3079 if (!submit_op3(emit
,
3080 inst_token(SVGA3DOP_MAD
),
3081 dst(emit
->ps_inverted_texcoord
[unit
]),
3082 emit
->ps_true_texcoord
[unit
],
3083 swizzle(pos_neg_one
, 0, 3, 0, 0), /* (1, -1, 1, 1) */
3084 swizzle(zero
, 0, 3, 0, 0))) /* (0, 1, 0, 0) */
3087 /* Reassign the input_map entry to the new texcoord register */
3088 emit
->input_map
[emit
->ps_inverted_texcoord_input
[unit
]] =
3089 emit
->ps_inverted_texcoord
[unit
];
3091 inverted_texcoords
&= ~(1 << unit
);
3098 static INLINE boolean
3099 needs_to_create_zero( struct svga_shader_emitter
*emit
)
3103 if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
3104 if (emit
->key
.fkey
.light_twoside
)
3107 if (emit
->key
.fkey
.white_fragments
)
3110 if (emit
->emit_frontface
)
3113 if (emit
->info
.opcode_count
[TGSI_OPCODE_DST
] >= 1 ||
3114 emit
->info
.opcode_count
[TGSI_OPCODE_SSG
] >= 1 ||
3115 emit
->info
.opcode_count
[TGSI_OPCODE_LIT
] >= 1)
3118 if (emit
->inverted_texcoords
)
3121 /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3122 for (i
= 0; i
< emit
->key
.fkey
.num_textures
; i
++) {
3123 if (emit
->key
.fkey
.tex
[i
].swizzle_r
> PIPE_SWIZZLE_ALPHA
||
3124 emit
->key
.fkey
.tex
[i
].swizzle_g
> PIPE_SWIZZLE_ALPHA
||
3125 emit
->key
.fkey
.tex
[i
].swizzle_b
> PIPE_SWIZZLE_ALPHA
||
3126 emit
->key
.fkey
.tex
[i
].swizzle_a
> PIPE_SWIZZLE_ALPHA
)
3130 for (i
= 0; i
< emit
->key
.fkey
.num_textures
; i
++) {
3131 if (emit
->key
.fkey
.tex
[i
].compare_mode
== PIPE_TEX_COMPARE_R_TO_TEXTURE
)
3136 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
3137 if (emit
->info
.opcode_count
[TGSI_OPCODE_CMP
] >= 1)
3141 if (emit
->info
.opcode_count
[TGSI_OPCODE_IF
] >= 1 ||
3142 emit
->info
.opcode_count
[TGSI_OPCODE_BGNLOOP
] >= 1 ||
3143 emit
->info
.opcode_count
[TGSI_OPCODE_DDX
] >= 1 ||
3144 emit
->info
.opcode_count
[TGSI_OPCODE_DDY
] >= 1 ||
3145 emit
->info
.opcode_count
[TGSI_OPCODE_ROUND
] >= 1 ||
3146 emit
->info
.opcode_count
[TGSI_OPCODE_SGE
] >= 1 ||
3147 emit
->info
.opcode_count
[TGSI_OPCODE_SGT
] >= 1 ||
3148 emit
->info
.opcode_count
[TGSI_OPCODE_SLE
] >= 1 ||
3149 emit
->info
.opcode_count
[TGSI_OPCODE_SLT
] >= 1 ||
3150 emit
->info
.opcode_count
[TGSI_OPCODE_SNE
] >= 1 ||
3151 emit
->info
.opcode_count
[TGSI_OPCODE_SEQ
] >= 1 ||
3152 emit
->info
.opcode_count
[TGSI_OPCODE_EXP
] >= 1 ||
3153 emit
->info
.opcode_count
[TGSI_OPCODE_LOG
] >= 1 ||
3154 emit
->info
.opcode_count
[TGSI_OPCODE_XPD
] >= 1 ||
3155 emit
->info
.opcode_count
[TGSI_OPCODE_KILP
] >= 1)
3161 static INLINE boolean
3162 needs_to_create_loop_const( struct svga_shader_emitter
*emit
)
3164 return (emit
->info
.opcode_count
[TGSI_OPCODE_BGNLOOP
] >= 1);
3167 static INLINE boolean
3168 needs_to_create_arl_consts( struct svga_shader_emitter
*emit
)
3170 return (emit
->num_arl_consts
> 0);
3173 static INLINE boolean
3174 pre_parse_add_indirect( struct svga_shader_emitter
*emit
,
3175 int num
, int current_arl
)
3180 for (i
= 0; i
< emit
->num_arl_consts
; ++i
) {
3181 if (emit
->arl_consts
[i
].arl_num
== current_arl
)
3185 if (emit
->num_arl_consts
== i
) {
3186 ++emit
->num_arl_consts
;
3188 emit
->arl_consts
[i
].number
= (emit
->arl_consts
[i
].number
> num
) ?
3190 emit
->arl_consts
[i
].number
;
3191 emit
->arl_consts
[i
].arl_num
= current_arl
;
3196 pre_parse_instruction( struct svga_shader_emitter
*emit
,
3197 const struct tgsi_full_instruction
*insn
,
3200 if (insn
->Src
[0].Register
.Indirect
&&
3201 insn
->Src
[0].Indirect
.File
== TGSI_FILE_ADDRESS
) {
3202 const struct tgsi_full_src_register
*reg
= &insn
->Src
[0];
3203 if (reg
->Register
.Index
< 0) {
3204 pre_parse_add_indirect(emit
, reg
->Register
.Index
, current_arl
);
3208 if (insn
->Src
[1].Register
.Indirect
&&
3209 insn
->Src
[1].Indirect
.File
== TGSI_FILE_ADDRESS
) {
3210 const struct tgsi_full_src_register
*reg
= &insn
->Src
[1];
3211 if (reg
->Register
.Index
< 0) {
3212 pre_parse_add_indirect(emit
, reg
->Register
.Index
, current_arl
);
3216 if (insn
->Src
[2].Register
.Indirect
&&
3217 insn
->Src
[2].Indirect
.File
== TGSI_FILE_ADDRESS
) {
3218 const struct tgsi_full_src_register
*reg
= &insn
->Src
[2];
3219 if (reg
->Register
.Index
< 0) {
3220 pre_parse_add_indirect(emit
, reg
->Register
.Index
, current_arl
);
3228 pre_parse_tokens( struct svga_shader_emitter
*emit
,
3229 const struct tgsi_token
*tokens
)
3231 struct tgsi_parse_context parse
;
3232 int current_arl
= 0;
3234 tgsi_parse_init( &parse
, tokens
);
3236 while (!tgsi_parse_end_of_tokens( &parse
)) {
3237 tgsi_parse_token( &parse
);
3238 switch (parse
.FullToken
.Token
.Type
) {
3239 case TGSI_TOKEN_TYPE_IMMEDIATE
:
3240 case TGSI_TOKEN_TYPE_DECLARATION
:
3242 case TGSI_TOKEN_TYPE_INSTRUCTION
:
3243 if (parse
.FullToken
.FullInstruction
.Instruction
.Opcode
==
3247 if (!pre_parse_instruction( emit
, &parse
.FullToken
.FullInstruction
,
3259 static boolean
svga_shader_emit_helpers( struct svga_shader_emitter
*emit
)
3262 if (needs_to_create_zero( emit
)) {
3263 create_zero_immediate( emit
);
3265 if (needs_to_create_loop_const( emit
)) {
3266 create_loop_const( emit
);
3268 if (needs_to_create_arl_consts( emit
)) {
3269 create_arl_consts( emit
);
3272 if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
3273 if (!emit_ps_preamble( emit
))
3276 if (emit
->key
.fkey
.light_twoside
) {
3277 if (!emit_light_twoside( emit
))
3280 if (emit
->emit_frontface
) {
3281 if (!emit_frontface( emit
))
3284 if (emit
->inverted_texcoords
) {
3285 if (!emit_inverted_texcoords( emit
))
3293 boolean
svga_shader_emit_instructions( struct svga_shader_emitter
*emit
,
3294 const struct tgsi_token
*tokens
)
3296 struct tgsi_parse_context parse
;
3298 boolean helpers_emitted
= FALSE
;
3299 unsigned line_nr
= 0;
3301 tgsi_parse_init( &parse
, tokens
);
3302 emit
->internal_imm_count
= 0;
3304 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
3305 ret
= emit_vs_preamble( emit
);
3310 pre_parse_tokens(emit
, tokens
);
3312 while (!tgsi_parse_end_of_tokens( &parse
)) {
3313 tgsi_parse_token( &parse
);
3315 switch (parse
.FullToken
.Token
.Type
) {
3316 case TGSI_TOKEN_TYPE_IMMEDIATE
:
3317 ret
= svga_emit_immediate( emit
, &parse
.FullToken
.FullImmediate
);
3322 case TGSI_TOKEN_TYPE_DECLARATION
:
3323 ret
= svga_translate_decl_sm30( emit
, &parse
.FullToken
.FullDeclaration
);
3328 case TGSI_TOKEN_TYPE_INSTRUCTION
:
3329 if (!helpers_emitted
) {
3330 if (!svga_shader_emit_helpers( emit
))
3332 helpers_emitted
= TRUE
;
3334 ret
= svga_emit_instruction( emit
,
3336 &parse
.FullToken
.FullInstruction
);
3344 reset_temp_regs( emit
);
3347 /* Need to terminate the current subroutine. Note that the
3348 * hardware doesn't tolerate shaders without sub-routines
3349 * terminating with RET+END.
3351 if (!emit
->in_main_func
) {
3352 ret
= emit_instruction( emit
, inst_token( SVGA3DOP_RET
) );
3357 assert(emit
->dynamic_branching_level
== 0);
3359 /* Need to terminate the whole shader:
3361 ret
= emit_instruction( emit
, inst_token( SVGA3DOP_END
) );
3366 tgsi_parse_free( &parse
);