1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
30 * TGSI interpreter/executor.
32 * Flow control information:
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
68 micro_iabs(union tgsi_exec_channel
*dst
,
69 const union tgsi_exec_channel
*src
)
71 dst
->i
[0] = src
->i
[0] >= 0 ? src
->i
[0] : -src
->i
[0];
72 dst
->i
[1] = src
->i
[1] >= 0 ? src
->i
[1] : -src
->i
[1];
73 dst
->i
[2] = src
->i
[2] >= 0 ? src
->i
[2] : -src
->i
[2];
74 dst
->i
[3] = src
->i
[3] >= 0 ? src
->i
[3] : -src
->i
[3];
78 micro_ineg(union tgsi_exec_channel
*dst
,
79 const union tgsi_exec_channel
*src
)
81 dst
->i
[0] = -src
->i
[0];
82 dst
->i
[1] = -src
->i
[1];
83 dst
->i
[2] = -src
->i
[2];
84 dst
->i
[3] = -src
->i
[3];
88 micro_mov(union tgsi_exec_channel
*dst
,
89 const union tgsi_exec_channel
*src
)
91 dst
->u
[0] = src
->u
[0];
92 dst
->u
[1] = src
->u
[1];
93 dst
->u
[2] = src
->u
[2];
94 dst
->u
[3] = src
->u
[3];
97 #define TILE_TOP_LEFT 0
98 #define TILE_TOP_RIGHT 1
99 #define TILE_BOTTOM_LEFT 2
100 #define TILE_BOTTOM_RIGHT 3
107 enum tgsi_exec_datatype
{
108 TGSI_EXEC_DATA_FLOAT
,
114 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
116 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
117 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
118 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
119 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
120 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
121 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
122 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
123 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
124 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
125 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
126 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
127 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
128 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
129 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
130 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
131 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
132 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
133 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
134 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
135 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
136 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
137 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
138 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
139 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
140 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
141 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
142 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
143 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
144 #define TEMP_R0 TGSI_EXEC_TEMP_R0
145 #define TEMP_P0 TGSI_EXEC_TEMP_P0
147 #define IS_CHANNEL_ENABLED(INST, CHAN)\
148 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
150 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
151 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
153 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
154 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
155 if (IS_CHANNEL_ENABLED( INST, CHAN ))
157 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
158 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
159 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
162 /** The execution mask depends on the conditional mask and the loop mask */
163 #define UPDATE_EXEC_MASK(MACH) \
164 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
167 static const union tgsi_exec_channel ZeroVec
=
168 { { 0.0, 0.0, 0.0, 0.0 } };
171 #define CHECK_INF_OR_NAN(chan) do {\
172 assert(!util_is_inf_or_nan((chan)->f[0]));\
173 assert(!util_is_inf_or_nan((chan)->f[1]));\
174 assert(!util_is_inf_or_nan((chan)->f[2]));\
175 assert(!util_is_inf_or_nan((chan)->f[3]));\
181 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
183 debug_printf("%s = {%f, %f, %f, %f}\n",
184 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
191 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
193 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
195 debug_printf("Temp[%u] =\n", index
);
196 for (i
= 0; i
< 4; i
++) {
197 debug_printf(" %c: { %f, %f, %f, %f }\n",
209 * Check if there's a potential src/dst register data dependency when
210 * using SOA execution.
213 * This would expand into:
218 * The second instruction will have the wrong value for t0 if executed as-is.
221 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
225 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
226 if (writemask
== TGSI_WRITEMASK_X
||
227 writemask
== TGSI_WRITEMASK_Y
||
228 writemask
== TGSI_WRITEMASK_Z
||
229 writemask
== TGSI_WRITEMASK_W
||
230 writemask
== TGSI_WRITEMASK_NONE
) {
231 /* no chance of data dependency */
235 /* loop over src regs */
236 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
237 if ((inst
->Src
[i
].Register
.File
==
238 inst
->Dst
[0].Register
.File
) &&
239 (inst
->Src
[i
].Register
.Index
==
240 inst
->Dst
[0].Register
.Index
)) {
241 /* loop over dest channels */
242 uint channelsWritten
= 0x0;
243 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
244 /* check if we're reading a channel that's been written */
245 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
246 if (channelsWritten
& (1 << swizzle
)) {
250 channelsWritten
|= (1 << chan
);
259 * Initialize machine state by expanding tokens to full instructions,
260 * allocating temporary storage, setting up constants, etc.
261 * After this, we can call tgsi_exec_machine_run() many times.
264 tgsi_exec_machine_bind_shader(
265 struct tgsi_exec_machine
*mach
,
266 const struct tgsi_token
*tokens
,
268 struct tgsi_sampler
**samplers
)
271 struct tgsi_parse_context parse
;
272 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
273 struct tgsi_full_instruction
*instructions
;
274 struct tgsi_full_declaration
*declarations
;
275 uint maxInstructions
= 10, numInstructions
= 0;
276 uint maxDeclarations
= 10, numDeclarations
= 0;
280 tgsi_dump(tokens
, 0);
285 mach
->Tokens
= tokens
;
286 mach
->Samplers
= samplers
;
288 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
289 if (k
!= TGSI_PARSE_OK
) {
290 debug_printf( "Problem parsing!\n" );
294 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
298 declarations
= (struct tgsi_full_declaration
*)
299 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
305 instructions
= (struct tgsi_full_instruction
*)
306 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
309 FREE( declarations
);
313 while( !tgsi_parse_end_of_tokens( &parse
) ) {
314 uint pointer
= parse
.Position
;
317 tgsi_parse_token( &parse
);
318 switch( parse
.FullToken
.Token
.Type
) {
319 case TGSI_TOKEN_TYPE_DECLARATION
:
320 /* save expanded declaration */
321 if (numDeclarations
== maxDeclarations
) {
322 declarations
= REALLOC(declarations
,
324 * sizeof(struct tgsi_full_declaration
),
325 (maxDeclarations
+ 10)
326 * sizeof(struct tgsi_full_declaration
));
327 maxDeclarations
+= 10;
329 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
331 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
332 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
337 memcpy(declarations
+ numDeclarations
,
338 &parse
.FullToken
.FullDeclaration
,
339 sizeof(declarations
[0]));
343 case TGSI_TOKEN_TYPE_IMMEDIATE
:
345 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
347 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
349 for( i
= 0; i
< size
; i
++ ) {
350 mach
->Imms
[mach
->ImmLimit
][i
] =
351 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
357 case TGSI_TOKEN_TYPE_INSTRUCTION
:
358 assert( labels
->count
< MAX_LABELS
);
360 labels
->labels
[labels
->count
][0] = instno
;
361 labels
->labels
[labels
->count
][1] = pointer
;
364 /* save expanded instruction */
365 if (numInstructions
== maxInstructions
) {
366 instructions
= REALLOC(instructions
,
368 * sizeof(struct tgsi_full_instruction
),
369 (maxInstructions
+ 10)
370 * sizeof(struct tgsi_full_instruction
));
371 maxInstructions
+= 10;
374 memcpy(instructions
+ numInstructions
,
375 &parse
.FullToken
.FullInstruction
,
376 sizeof(instructions
[0]));
381 case TGSI_TOKEN_TYPE_PROPERTY
:
388 tgsi_parse_free (&parse
);
390 if (mach
->Declarations
) {
391 FREE( mach
->Declarations
);
393 mach
->Declarations
= declarations
;
394 mach
->NumDeclarations
= numDeclarations
;
396 if (mach
->Instructions
) {
397 FREE( mach
->Instructions
);
399 mach
->Instructions
= instructions
;
400 mach
->NumInstructions
= numInstructions
;
404 struct tgsi_exec_machine
*
405 tgsi_exec_machine_create( void )
407 struct tgsi_exec_machine
*mach
;
410 mach
= align_malloc( sizeof *mach
, 16 );
414 memset(mach
, 0, sizeof(*mach
));
416 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
417 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
418 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
420 /* Setup constants. */
421 for( i
= 0; i
< 4; i
++ ) {
422 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
423 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
424 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
425 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
426 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
427 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
428 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
429 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
430 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
431 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
435 /* silence warnings */
449 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
452 FREE(mach
->Instructions
);
453 FREE(mach
->Declarations
);
462 union tgsi_exec_channel
*dst
,
463 const union tgsi_exec_channel
*src
)
465 dst
->f
[0] = fabsf( src
->f
[0] );
466 dst
->f
[1] = fabsf( src
->f
[1] );
467 dst
->f
[2] = fabsf( src
->f
[2] );
468 dst
->f
[3] = fabsf( src
->f
[3] );
473 union tgsi_exec_channel
*dst
,
474 const union tgsi_exec_channel
*src0
,
475 const union tgsi_exec_channel
*src1
)
477 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
478 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
479 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
480 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
485 union tgsi_exec_channel
*dst
,
486 const union tgsi_exec_channel
*src
)
488 dst
->f
[0] = ceilf( src
->f
[0] );
489 dst
->f
[1] = ceilf( src
->f
[1] );
490 dst
->f
[2] = ceilf( src
->f
[2] );
491 dst
->f
[3] = ceilf( src
->f
[3] );
496 union tgsi_exec_channel
*dst
,
497 const union tgsi_exec_channel
*src
)
499 dst
->f
[0] = cosf( src
->f
[0] );
500 dst
->f
[1] = cosf( src
->f
[1] );
501 dst
->f
[2] = cosf( src
->f
[2] );
502 dst
->f
[3] = cosf( src
->f
[3] );
507 union tgsi_exec_channel
*dst
,
508 const union tgsi_exec_channel
*src
)
513 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
518 union tgsi_exec_channel
*dst
,
519 const union tgsi_exec_channel
*src
)
524 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
529 union tgsi_exec_channel
*dst
,
530 const union tgsi_exec_channel
*src0
,
531 const union tgsi_exec_channel
*src1
)
533 if (src1
->f
[0] != 0) {
534 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
536 if (src1
->f
[1] != 0) {
537 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
539 if (src1
->f
[2] != 0) {
540 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
542 if (src1
->f
[3] != 0) {
543 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
549 union tgsi_exec_channel
*dst
,
550 const union tgsi_exec_channel
*src0
,
551 const union tgsi_exec_channel
*src1
,
552 const union tgsi_exec_channel
*src2
,
553 const union tgsi_exec_channel
*src3
)
555 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
556 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
557 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
558 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
563 union tgsi_exec_channel
*dst
,
564 const union tgsi_exec_channel
*src
)
567 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
568 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
569 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
570 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
574 /* Inf is okay for this instruction, so clamp it to silence assertions. */
576 union tgsi_exec_channel clamped
;
578 for (i
= 0; i
< 4; i
++) {
579 if (src
->f
[i
] > 127.99999f
) {
580 clamped
.f
[i
] = 127.99999f
;
581 } else if (src
->f
[i
] < -126.99999f
) {
582 clamped
.f
[i
] = -126.99999f
;
584 clamped
.f
[i
] = src
->f
[i
];
590 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
591 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
592 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
593 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
598 micro_float_clamp(union tgsi_exec_channel
*dst
,
599 const union tgsi_exec_channel
*src
)
603 for (i
= 0; i
< 4; i
++) {
604 if (src
->f
[i
] > 0.0f
) {
605 if (src
->f
[i
] > 1.884467e+019f
)
606 dst
->f
[i
] = 1.884467e+019f
;
607 else if (src
->f
[i
] < 5.42101e-020f
)
608 dst
->f
[i
] = 5.42101e-020f
;
610 dst
->f
[i
] = src
->f
[i
];
613 if (src
->f
[i
] < -1.884467e+019f
)
614 dst
->f
[i
] = -1.884467e+019f
;
615 else if (src
->f
[i
] > -5.42101e-020f
)
616 dst
->f
[i
] = -5.42101e-020f
;
618 dst
->f
[i
] = src
->f
[i
];
625 union tgsi_exec_channel
*dst
,
626 const union tgsi_exec_channel
*src
)
628 dst
->f
[0] = floorf( src
->f
[0] );
629 dst
->f
[1] = floorf( src
->f
[1] );
630 dst
->f
[2] = floorf( src
->f
[2] );
631 dst
->f
[3] = floorf( src
->f
[3] );
636 union tgsi_exec_channel
*dst
,
637 const union tgsi_exec_channel
*src
)
639 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
640 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
641 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
642 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
647 union tgsi_exec_channel
*dst
,
648 const union tgsi_exec_channel
*src
)
651 dst
->f
[0] = util_fast_log2( src
->f
[0] );
652 dst
->f
[1] = util_fast_log2( src
->f
[1] );
653 dst
->f
[2] = util_fast_log2( src
->f
[2] );
654 dst
->f
[3] = util_fast_log2( src
->f
[3] );
656 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
657 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
658 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
659 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
665 union tgsi_exec_channel
*dst
,
666 const union tgsi_exec_channel
*src0
,
667 const union tgsi_exec_channel
*src1
,
668 const union tgsi_exec_channel
*src2
,
669 const union tgsi_exec_channel
*src3
)
671 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
672 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
673 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
674 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
679 union tgsi_exec_channel
*dst
,
680 const union tgsi_exec_channel
*src0
,
681 const union tgsi_exec_channel
*src1
,
682 const union tgsi_exec_channel
*src2
,
683 const union tgsi_exec_channel
*src3
)
685 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
686 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
687 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
688 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
693 union tgsi_exec_channel
*dst
,
694 const union tgsi_exec_channel
*src0
,
695 const union tgsi_exec_channel
*src1
)
697 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
698 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
699 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
700 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
705 union tgsi_exec_channel
*dst
,
706 const union tgsi_exec_channel
*src0
,
707 const union tgsi_exec_channel
*src1
)
709 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
710 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
711 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
712 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
718 union tgsi_exec_channel
*dst
,
719 const union tgsi_exec_channel
*src0
,
720 const union tgsi_exec_channel
*src1
)
722 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
723 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
724 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
725 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
731 union tgsi_exec_channel
*dst
,
732 const union tgsi_exec_channel
*src0
,
733 const union tgsi_exec_channel
*src1
)
735 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
736 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
737 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
738 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
744 union tgsi_exec_channel
*dst0
,
745 union tgsi_exec_channel
*dst1
,
746 const union tgsi_exec_channel
*src0
,
747 const union tgsi_exec_channel
*src1
)
749 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
750 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
751 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
752 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
763 union tgsi_exec_channel
*dst0
,
764 union tgsi_exec_channel
*dst1
,
765 const union tgsi_exec_channel
*src0
,
766 const union tgsi_exec_channel
*src1
)
768 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
769 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
770 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
771 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
783 union tgsi_exec_channel
*dst
,
784 const union tgsi_exec_channel
*src0
,
785 const union tgsi_exec_channel
*src1
,
786 const union tgsi_exec_channel
*src2
)
788 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
789 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
790 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
791 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
797 union tgsi_exec_channel
*dst
,
798 const union tgsi_exec_channel
*src
)
800 dst
->f
[0] = -src
->f
[0];
801 dst
->f
[1] = -src
->f
[1];
802 dst
->f
[2] = -src
->f
[2];
803 dst
->f
[3] = -src
->f
[3];
808 union tgsi_exec_channel
*dst
,
809 const union tgsi_exec_channel
*src0
,
810 const union tgsi_exec_channel
*src1
)
813 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
814 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
815 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
816 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
818 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
819 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
820 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
821 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
827 union tgsi_exec_channel
*dst
,
828 const union tgsi_exec_channel
*src
)
830 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
831 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
832 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
833 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
838 union tgsi_exec_channel
*dst
,
839 const union tgsi_exec_channel
*src
)
841 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
842 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
843 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
844 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
849 union tgsi_exec_channel
*dst
,
850 const union tgsi_exec_channel
*src0
)
852 dst
->f
[0] = (float) (int) src0
->f
[0];
853 dst
->f
[1] = (float) (int) src0
->f
[1];
854 dst
->f
[2] = (float) (int) src0
->f
[2];
855 dst
->f
[3] = (float) (int) src0
->f
[3];
860 union tgsi_exec_channel
*dst
,
861 const union tgsi_exec_channel
*src
)
863 dst
->f
[0] = sinf( src
->f
[0] );
864 dst
->f
[1] = sinf( src
->f
[1] );
865 dst
->f
[2] = sinf( src
->f
[2] );
866 dst
->f
[3] = sinf( src
->f
[3] );
870 micro_sqrt( union tgsi_exec_channel
*dst
,
871 const union tgsi_exec_channel
*src
)
873 dst
->f
[0] = sqrtf( src
->f
[0] );
874 dst
->f
[1] = sqrtf( src
->f
[1] );
875 dst
->f
[2] = sqrtf( src
->f
[2] );
876 dst
->f
[3] = sqrtf( src
->f
[3] );
881 union tgsi_exec_channel
*dst
,
882 const union tgsi_exec_channel
*src0
,
883 const union tgsi_exec_channel
*src1
)
885 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
886 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
887 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
888 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
892 fetch_src_file_channel(
893 const struct tgsi_exec_machine
*mach
,
896 const union tgsi_exec_channel
*index
,
897 union tgsi_exec_channel
*chan
)
905 case TGSI_FILE_CONSTANT
:
906 assert(mach
->Consts
);
910 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
914 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
918 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
922 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
925 case TGSI_FILE_INPUT
:
926 case TGSI_FILE_SYSTEM_VALUE
:
927 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
928 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
929 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
930 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
933 case TGSI_FILE_TEMPORARY
:
934 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
935 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
936 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
937 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
938 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
941 case TGSI_FILE_IMMEDIATE
:
942 assert( index
->i
[0] < (int) mach
->ImmLimit
);
943 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
944 assert( index
->i
[1] < (int) mach
->ImmLimit
);
945 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
946 assert( index
->i
[2] < (int) mach
->ImmLimit
);
947 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
948 assert( index
->i
[3] < (int) mach
->ImmLimit
);
949 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
952 case TGSI_FILE_ADDRESS
:
953 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
954 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
955 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
956 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
959 case TGSI_FILE_PREDICATE
:
960 assert(index
->i
[0] < TGSI_EXEC_NUM_PREDS
);
961 assert(index
->i
[1] < TGSI_EXEC_NUM_PREDS
);
962 assert(index
->i
[2] < TGSI_EXEC_NUM_PREDS
);
963 assert(index
->i
[3] < TGSI_EXEC_NUM_PREDS
);
964 chan
->u
[0] = mach
->Predicates
[0].xyzw
[swizzle
].u
[0];
965 chan
->u
[1] = mach
->Predicates
[0].xyzw
[swizzle
].u
[1];
966 chan
->u
[2] = mach
->Predicates
[0].xyzw
[swizzle
].u
[2];
967 chan
->u
[3] = mach
->Predicates
[0].xyzw
[swizzle
].u
[3];
970 case TGSI_FILE_OUTPUT
:
971 /* vertex/fragment output vars can be read too */
972 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
973 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
974 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
975 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
989 fetch_source(const struct tgsi_exec_machine
*mach
,
990 union tgsi_exec_channel
*chan
,
991 const struct tgsi_full_src_register
*reg
,
992 const uint chan_index
,
993 enum tgsi_exec_datatype src_datatype
)
995 union tgsi_exec_channel index
;
998 /* We start with a direct index into a register file.
1002 * file = Register.File
1003 * [1] = Register.Index
1008 index
.i
[3] = reg
->Register
.Index
;
1010 /* There is an extra source register that indirectly subscripts
1011 * a register file. The direct index now becomes an offset
1012 * that is being added to the indirect register.
1016 * ind = Indirect.File
1017 * [2] = Indirect.Index
1018 * .x = Indirect.SwizzleX
1020 if (reg
->Register
.Indirect
) {
1021 union tgsi_exec_channel index2
;
1022 union tgsi_exec_channel indir_index
;
1023 const uint execmask
= mach
->ExecMask
;
1026 /* which address register (always zero now) */
1030 index2
.i
[3] = reg
->Indirect
.Index
;
1032 /* get current value of address register[swizzle] */
1033 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1034 fetch_src_file_channel(
1041 /* add value of address register to the offset */
1042 index
.i
[0] += (int) indir_index
.f
[0];
1043 index
.i
[1] += (int) indir_index
.f
[1];
1044 index
.i
[2] += (int) indir_index
.f
[2];
1045 index
.i
[3] += (int) indir_index
.f
[3];
1047 /* for disabled execution channels, zero-out the index to
1048 * avoid using a potential garbage value.
1050 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1051 if ((execmask
& (1 << i
)) == 0)
1056 /* There is an extra source register that is a second
1057 * subscript to a register file. Effectively it means that
1058 * the register file is actually a 2D array of registers.
1060 * file[1][3] == file[1*sizeof(file[1])+3],
1062 * [3] = Dimension.Index
1064 if (reg
->Register
.Dimension
) {
1065 /* The size of the first-order array depends on the register file type.
1066 * We need to multiply the index to the first array to get an effective,
1067 * "flat" index that points to the beginning of the second-order array.
1069 switch (reg
->Register
.File
) {
1070 case TGSI_FILE_INPUT
:
1071 case TGSI_FILE_SYSTEM_VALUE
:
1072 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1073 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1074 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1075 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1077 case TGSI_FILE_CONSTANT
:
1078 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1079 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1080 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1081 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1087 index
.i
[0] += reg
->Dimension
.Index
;
1088 index
.i
[1] += reg
->Dimension
.Index
;
1089 index
.i
[2] += reg
->Dimension
.Index
;
1090 index
.i
[3] += reg
->Dimension
.Index
;
1092 /* Again, the second subscript index can be addressed indirectly
1093 * identically to the first one.
1094 * Nothing stops us from indirectly addressing the indirect register,
1095 * but there is no need for that, so we won't exercise it.
1097 * file[1][ind[4].y+3],
1099 * ind = DimIndirect.File
1100 * [4] = DimIndirect.Index
1101 * .y = DimIndirect.SwizzleX
1103 if (reg
->Dimension
.Indirect
) {
1104 union tgsi_exec_channel index2
;
1105 union tgsi_exec_channel indir_index
;
1106 const uint execmask
= mach
->ExecMask
;
1112 index2
.i
[3] = reg
->DimIndirect
.Index
;
1114 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, CHAN_X
);
1115 fetch_src_file_channel(
1117 reg
->DimIndirect
.File
,
1122 index
.i
[0] += (int) indir_index
.f
[0];
1123 index
.i
[1] += (int) indir_index
.f
[1];
1124 index
.i
[2] += (int) indir_index
.f
[2];
1125 index
.i
[3] += (int) indir_index
.f
[3];
1127 /* for disabled execution channels, zero-out the index to
1128 * avoid using a potential garbage value.
1130 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1131 if ((execmask
& (1 << i
)) == 0)
1136 /* If by any chance there was a need for a 3D array of register
1137 * files, we would have to check whether Dimension is followed
1138 * by a dimension register and continue the saga.
1142 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1143 fetch_src_file_channel(
1150 if (reg
->Register
.Absolute
) {
1151 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1152 micro_abs(chan
, chan
);
1154 micro_iabs(chan
, chan
);
1158 if (reg
->Register
.Negate
) {
1159 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1160 micro_neg(chan
, chan
);
1162 micro_ineg(chan
, chan
);
1168 store_dest(struct tgsi_exec_machine
*mach
,
1169 const union tgsi_exec_channel
*chan
,
1170 const struct tgsi_full_dst_register
*reg
,
1171 const struct tgsi_full_instruction
*inst
,
1173 enum tgsi_exec_datatype dst_datatype
)
1176 union tgsi_exec_channel null
;
1177 union tgsi_exec_channel
*dst
;
1178 uint execmask
= mach
->ExecMask
;
1179 int offset
= 0; /* indirection offset */
1182 if (dst_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1183 CHECK_INF_OR_NAN(chan
);
1186 /* There is an extra source register that indirectly subscripts
1187 * a register file. The direct index now becomes an offset
1188 * that is being added to the indirect register.
1192 * ind = Indirect.File
1193 * [2] = Indirect.Index
1194 * .x = Indirect.SwizzleX
1196 if (reg
->Register
.Indirect
) {
1197 union tgsi_exec_channel index
;
1198 union tgsi_exec_channel indir_index
;
1201 /* which address register (always zero for now) */
1205 index
.i
[3] = reg
->Indirect
.Index
;
1207 /* get current value of address register[swizzle] */
1208 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1210 /* fetch values from the address/indirection register */
1211 fetch_src_file_channel(
1218 /* save indirection offset */
1219 offset
= (int) indir_index
.f
[0];
1222 switch (reg
->Register
.File
) {
1223 case TGSI_FILE_NULL
:
1227 case TGSI_FILE_OUTPUT
:
1228 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1229 + reg
->Register
.Index
;
1230 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1232 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1233 fprintf(stderr
, "STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1234 for (i
= 0; i
< QUAD_SIZE
; i
++)
1235 if (execmask
& (1 << i
))
1236 fprintf(stderr
, "%f, ", chan
->f
[i
]);
1237 fprintf(stderr
, ")\n");
1242 case TGSI_FILE_TEMPORARY
:
1243 index
= reg
->Register
.Index
;
1244 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1245 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1248 case TGSI_FILE_ADDRESS
:
1249 index
= reg
->Register
.Index
;
1250 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1253 case TGSI_FILE_LOOP
:
1254 assert(reg
->Register
.Index
== 0);
1255 assert(mach
->LoopCounterStackTop
> 0);
1256 assert(chan_index
== CHAN_X
);
1257 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1260 case TGSI_FILE_PREDICATE
:
1261 index
= reg
->Register
.Index
;
1262 assert(index
< TGSI_EXEC_NUM_PREDS
);
1263 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1271 if (inst
->Instruction
.Predicate
) {
1273 union tgsi_exec_channel
*pred
;
1275 switch (chan_index
) {
1277 swizzle
= inst
->Predicate
.SwizzleX
;
1280 swizzle
= inst
->Predicate
.SwizzleY
;
1283 swizzle
= inst
->Predicate
.SwizzleZ
;
1286 swizzle
= inst
->Predicate
.SwizzleW
;
1293 assert(inst
->Predicate
.Index
== 0);
1295 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1297 if (inst
->Predicate
.Negate
) {
1298 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1300 execmask
&= ~(1 << i
);
1304 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1306 execmask
&= ~(1 << i
);
1312 switch (inst
->Instruction
.Saturate
) {
1314 for (i
= 0; i
< QUAD_SIZE
; i
++)
1315 if (execmask
& (1 << i
))
1316 dst
->i
[i
] = chan
->i
[i
];
1319 case TGSI_SAT_ZERO_ONE
:
1320 for (i
= 0; i
< QUAD_SIZE
; i
++)
1321 if (execmask
& (1 << i
)) {
1322 if (chan
->f
[i
] < 0.0f
)
1324 else if (chan
->f
[i
] > 1.0f
)
1327 dst
->i
[i
] = chan
->i
[i
];
1331 case TGSI_SAT_MINUS_PLUS_ONE
:
1332 for (i
= 0; i
< QUAD_SIZE
; i
++)
1333 if (execmask
& (1 << i
)) {
1334 if (chan
->f
[i
] < -1.0f
)
1336 else if (chan
->f
[i
] > 1.0f
)
1339 dst
->i
[i
] = chan
->i
[i
];
1348 #define FETCH(VAL,INDEX,CHAN)\
1349 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1351 #define STORE(VAL,INDEX,CHAN)\
1352 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1356 * Execute ARB-style KIL which is predicated by a src register.
1357 * Kill fragment if any of the four values is less than zero.
1360 exec_kil(struct tgsi_exec_machine
*mach
,
1361 const struct tgsi_full_instruction
*inst
)
1365 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1366 union tgsi_exec_channel r
[1];
1368 /* This mask stores component bits that were already tested. */
1371 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1376 /* unswizzle channel */
1377 swizzle
= tgsi_util_get_full_src_register_swizzle (
1381 /* check if the component has not been already tested */
1382 if (uniquemask
& (1 << swizzle
))
1384 uniquemask
|= 1 << swizzle
;
1386 FETCH(&r
[0], 0, chan_index
);
1387 for (i
= 0; i
< 4; i
++)
1388 if (r
[0].f
[i
] < 0.0f
)
1392 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1396 * Execute NVIDIA-style KIL which is predicated by a condition code.
1397 * Kill fragment if the condition code is TRUE.
1400 exec_kilp(struct tgsi_exec_machine
*mach
,
1401 const struct tgsi_full_instruction
*inst
)
1403 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1405 /* "unconditional" kil */
1406 kilmask
= mach
->ExecMask
;
1407 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1411 emit_vertex(struct tgsi_exec_machine
*mach
)
1413 /* FIXME: check for exec mask correctly
1415 for (i = 0; i < QUAD_SIZE; ++i) {
1416 if ((mach->ExecMask & (1 << i)))
1418 if (mach
->ExecMask
) {
1419 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
1420 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
1425 emit_primitive(struct tgsi_exec_machine
*mach
)
1427 unsigned *prim_count
= &mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0];
1428 /* FIXME: check for exec mask correctly
1430 for (i = 0; i < QUAD_SIZE; ++i) {
1431 if ((mach->ExecMask & (1 << i)))
1433 if (mach
->ExecMask
) {
1435 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
1436 mach
->Primitives
[*prim_count
] = 0;
1441 * Fetch a four texture samples using STR texture coordinates.
1444 fetch_texel( struct tgsi_sampler
*sampler
,
1445 const union tgsi_exec_channel
*s
,
1446 const union tgsi_exec_channel
*t
,
1447 const union tgsi_exec_channel
*p
,
1448 float lodbias
, /* XXX should be float[4] */
1449 union tgsi_exec_channel
*r
,
1450 union tgsi_exec_channel
*g
,
1451 union tgsi_exec_channel
*b
,
1452 union tgsi_exec_channel
*a
)
1455 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1457 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1459 for (j
= 0; j
< 4; j
++) {
1460 r
->f
[j
] = rgba
[0][j
];
1461 g
->f
[j
] = rgba
[1][j
];
1462 b
->f
[j
] = rgba
[2][j
];
1463 a
->f
[j
] = rgba
[3][j
];
1469 exec_tex(struct tgsi_exec_machine
*mach
,
1470 const struct tgsi_full_instruction
*inst
,
1474 const uint unit
= inst
->Src
[1].Register
.Index
;
1475 union tgsi_exec_channel r
[4];
1479 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1481 switch (inst
->Texture
.Texture
) {
1482 case TGSI_TEXTURE_1D
:
1483 case TGSI_TEXTURE_SHADOW1D
:
1485 FETCH(&r
[0], 0, CHAN_X
);
1488 FETCH(&r
[1], 0, CHAN_W
);
1489 micro_div( &r
[0], &r
[0], &r
[1] );
1493 FETCH(&r
[1], 0, CHAN_W
);
1494 lodBias
= r
[2].f
[0];
1499 fetch_texel(mach
->Samplers
[unit
],
1500 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1501 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1504 case TGSI_TEXTURE_2D
:
1505 case TGSI_TEXTURE_RECT
:
1506 case TGSI_TEXTURE_SHADOW2D
:
1507 case TGSI_TEXTURE_SHADOWRECT
:
1509 FETCH(&r
[0], 0, CHAN_X
);
1510 FETCH(&r
[1], 0, CHAN_Y
);
1511 FETCH(&r
[2], 0, CHAN_Z
);
1514 FETCH(&r
[3], 0, CHAN_W
);
1515 micro_div( &r
[0], &r
[0], &r
[3] );
1516 micro_div( &r
[1], &r
[1], &r
[3] );
1517 micro_div( &r
[2], &r
[2], &r
[3] );
1521 FETCH(&r
[3], 0, CHAN_W
);
1522 lodBias
= r
[3].f
[0];
1527 fetch_texel(mach
->Samplers
[unit
],
1528 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1529 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1532 case TGSI_TEXTURE_3D
:
1533 case TGSI_TEXTURE_CUBE
:
1535 FETCH(&r
[0], 0, CHAN_X
);
1536 FETCH(&r
[1], 0, CHAN_Y
);
1537 FETCH(&r
[2], 0, CHAN_Z
);
1540 FETCH(&r
[3], 0, CHAN_W
);
1541 micro_div( &r
[0], &r
[0], &r
[3] );
1542 micro_div( &r
[1], &r
[1], &r
[3] );
1543 micro_div( &r
[2], &r
[2], &r
[3] );
1547 FETCH(&r
[3], 0, CHAN_W
);
1548 lodBias
= r
[3].f
[0];
1553 fetch_texel(mach
->Samplers
[unit
],
1554 &r
[0], &r
[1], &r
[2], lodBias
,
1555 &r
[0], &r
[1], &r
[2], &r
[3]);
1562 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1563 STORE( &r
[chan_index
], 0, chan_index
);
1568 exec_txd(struct tgsi_exec_machine
*mach
,
1569 const struct tgsi_full_instruction
*inst
)
1571 const uint unit
= inst
->Src
[3].Register
.Index
;
1572 union tgsi_exec_channel r
[4];
1576 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1579 switch (inst
->Texture
.Texture
) {
1580 case TGSI_TEXTURE_1D
:
1581 case TGSI_TEXTURE_SHADOW1D
:
1583 FETCH(&r
[0], 0, CHAN_X
);
1585 fetch_texel(mach
->Samplers
[unit
],
1586 &r
[0], &ZeroVec
, &ZeroVec
, 0.0f
, /* S, T, P, BIAS */
1587 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1590 case TGSI_TEXTURE_2D
:
1591 case TGSI_TEXTURE_RECT
:
1592 case TGSI_TEXTURE_SHADOW2D
:
1593 case TGSI_TEXTURE_SHADOWRECT
:
1595 FETCH(&r
[0], 0, CHAN_X
);
1596 FETCH(&r
[1], 0, CHAN_Y
);
1597 FETCH(&r
[2], 0, CHAN_Z
);
1599 fetch_texel(mach
->Samplers
[unit
],
1600 &r
[0], &r
[1], &r
[2], 0.0f
, /* inputs */
1601 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1604 case TGSI_TEXTURE_3D
:
1605 case TGSI_TEXTURE_CUBE
:
1607 FETCH(&r
[0], 0, CHAN_X
);
1608 FETCH(&r
[1], 0, CHAN_Y
);
1609 FETCH(&r
[2], 0, CHAN_Z
);
1611 fetch_texel(mach
->Samplers
[unit
],
1612 &r
[0], &r
[1], &r
[2], 0.0f
,
1613 &r
[0], &r
[1], &r
[2], &r
[3]);
1620 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1621 STORE(&r
[chan_index
], 0, chan_index
);
1627 * Evaluate a constant-valued coefficient at the position of the
1632 struct tgsi_exec_machine
*mach
,
1638 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1639 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1644 * Evaluate a linear-valued coefficient at the position of the
1649 struct tgsi_exec_machine
*mach
,
1653 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1654 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1655 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1656 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1657 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1658 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1659 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1660 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1661 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1665 * Evaluate a perspective-valued coefficient at the position of the
1669 eval_perspective_coef(
1670 struct tgsi_exec_machine
*mach
,
1674 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1675 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1676 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1677 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1678 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1679 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1680 /* divide by W here */
1681 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1682 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1683 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1684 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1688 typedef void (* eval_coef_func
)(
1689 struct tgsi_exec_machine
*mach
,
1694 exec_declaration(struct tgsi_exec_machine
*mach
,
1695 const struct tgsi_full_declaration
*decl
)
1697 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1698 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
||
1699 decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1700 uint first
, last
, mask
;
1702 first
= decl
->Range
.First
;
1703 last
= decl
->Range
.Last
;
1704 mask
= decl
->Declaration
.UsageMask
;
1706 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1707 assert(decl
->Semantic
.Index
== 0);
1708 assert(first
== last
);
1709 assert(mask
== TGSI_WRITEMASK_XYZW
);
1711 mach
->Inputs
[first
] = mach
->QuadPos
;
1712 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1715 assert(decl
->Semantic
.Index
== 0);
1716 assert(first
== last
);
1718 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1719 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1722 eval_coef_func eval
;
1725 switch (decl
->Declaration
.Interpolate
) {
1726 case TGSI_INTERPOLATE_CONSTANT
:
1727 eval
= eval_constant_coef
;
1730 case TGSI_INTERPOLATE_LINEAR
:
1731 eval
= eval_linear_coef
;
1734 case TGSI_INTERPOLATE_PERSPECTIVE
:
1735 eval
= eval_perspective_coef
;
1743 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1744 if (mask
& (1 << j
)) {
1745 for (i
= first
; i
<= last
; i
++) {
1755 typedef void (* micro_op
)(union tgsi_exec_channel
*dst
,
1756 const union tgsi_exec_channel
*src
);
1759 exec_vector_unary(struct tgsi_exec_machine
*mach
,
1760 const struct tgsi_full_instruction
*inst
,
1762 enum tgsi_exec_datatype dst_datatype
,
1763 enum tgsi_exec_datatype src_datatype
)
1766 struct tgsi_exec_vector dst
;
1768 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1769 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1770 union tgsi_exec_channel src
;
1772 fetch_source(mach
, &src
, &inst
->Src
[0], chan
, src_datatype
);
1773 op(&dst
.xyzw
[chan
], &src
);
1776 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1777 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1778 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1784 exec_vector_binary(struct tgsi_exec_machine
*mach
,
1785 const struct tgsi_full_instruction
*inst
,
1787 enum tgsi_exec_datatype dst_datatype
,
1788 enum tgsi_exec_datatype src_datatype
)
1791 struct tgsi_exec_vector dst
;
1793 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1794 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1795 union tgsi_exec_channel src
[2];
1797 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1798 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1799 op(&dst
.xyzw
[chan
], src
);
1802 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1803 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1804 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1810 exec_vector_trinary(struct tgsi_exec_machine
*mach
,
1811 const struct tgsi_full_instruction
*inst
,
1813 enum tgsi_exec_datatype dst_datatype
,
1814 enum tgsi_exec_datatype src_datatype
)
1817 struct tgsi_exec_vector dst
;
1819 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1820 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1821 union tgsi_exec_channel src
[3];
1823 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1824 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1825 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
1826 op(&dst
.xyzw
[chan
], src
);
1829 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1830 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1831 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1837 exec_break(struct tgsi_exec_machine
*mach
)
1839 if (mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_LOOP
) {
1840 /* turn off loop channels for each enabled exec channel */
1841 mach
->LoopMask
&= ~mach
->ExecMask
;
1842 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1843 UPDATE_EXEC_MASK(mach
);
1845 assert(mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_SWITCH
);
1847 mach
->Switch
.mask
= 0x0;
1849 UPDATE_EXEC_MASK(mach
);
1854 exec_switch(struct tgsi_exec_machine
*mach
,
1855 const struct tgsi_full_instruction
*inst
)
1857 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
1858 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
1860 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
1861 fetch_source(mach
, &mach
->Switch
.selector
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
1862 mach
->Switch
.mask
= 0x0;
1863 mach
->Switch
.defaultMask
= 0x0;
1865 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
1866 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_SWITCH
;
1868 UPDATE_EXEC_MASK(mach
);
1872 exec_case(struct tgsi_exec_machine
*mach
,
1873 const struct tgsi_full_instruction
*inst
)
1875 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
1876 union tgsi_exec_channel src
;
1879 fetch_source(mach
, &src
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
1881 if (mach
->Switch
.selector
.u
[0] == src
.u
[0]) {
1884 if (mach
->Switch
.selector
.u
[1] == src
.u
[1]) {
1887 if (mach
->Switch
.selector
.u
[2] == src
.u
[2]) {
1890 if (mach
->Switch
.selector
.u
[3] == src
.u
[3]) {
1894 mach
->Switch
.defaultMask
|= mask
;
1896 mach
->Switch
.mask
|= mask
& prevMask
;
1898 UPDATE_EXEC_MASK(mach
);
1902 exec_default(struct tgsi_exec_machine
*mach
)
1904 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
1906 mach
->Switch
.mask
|= ~mach
->Switch
.defaultMask
& prevMask
;
1908 UPDATE_EXEC_MASK(mach
);
1912 exec_endswitch(struct tgsi_exec_machine
*mach
)
1914 mach
->Switch
= mach
->SwitchStack
[--mach
->SwitchStackTop
];
1915 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
1917 UPDATE_EXEC_MASK(mach
);
1921 micro_i2f(union tgsi_exec_channel
*dst
,
1922 const union tgsi_exec_channel
*src
)
1924 dst
->f
[0] = (float)src
->i
[0];
1925 dst
->f
[1] = (float)src
->i
[1];
1926 dst
->f
[2] = (float)src
->i
[2];
1927 dst
->f
[3] = (float)src
->i
[3];
1931 micro_not(union tgsi_exec_channel
*dst
,
1932 const union tgsi_exec_channel
*src
)
1934 dst
->u
[0] = ~src
->u
[0];
1935 dst
->u
[1] = ~src
->u
[1];
1936 dst
->u
[2] = ~src
->u
[2];
1937 dst
->u
[3] = ~src
->u
[3];
1941 micro_shl(union tgsi_exec_channel
*dst
,
1942 const union tgsi_exec_channel
*src
)
1944 dst
->u
[0] = src
[0].u
[0] << src
[1].u
[0];
1945 dst
->u
[1] = src
[0].u
[1] << src
[1].u
[1];
1946 dst
->u
[2] = src
[0].u
[2] << src
[1].u
[2];
1947 dst
->u
[3] = src
[0].u
[3] << src
[1].u
[3];
1951 micro_and(union tgsi_exec_channel
*dst
,
1952 const union tgsi_exec_channel
*src
)
1954 dst
->u
[0] = src
[0].u
[0] & src
[1].u
[0];
1955 dst
->u
[1] = src
[0].u
[1] & src
[1].u
[1];
1956 dst
->u
[2] = src
[0].u
[2] & src
[1].u
[2];
1957 dst
->u
[3] = src
[0].u
[3] & src
[1].u
[3];
1961 micro_or(union tgsi_exec_channel
*dst
,
1962 const union tgsi_exec_channel
*src
)
1964 dst
->u
[0] = src
[0].u
[0] | src
[1].u
[0];
1965 dst
->u
[1] = src
[0].u
[1] | src
[1].u
[1];
1966 dst
->u
[2] = src
[0].u
[2] | src
[1].u
[2];
1967 dst
->u
[3] = src
[0].u
[3] | src
[1].u
[3];
1971 micro_xor(union tgsi_exec_channel
*dst
,
1972 const union tgsi_exec_channel
*src
)
1974 dst
->u
[0] = src
[0].u
[0] ^ src
[1].u
[0];
1975 dst
->u
[1] = src
[0].u
[1] ^ src
[1].u
[1];
1976 dst
->u
[2] = src
[0].u
[2] ^ src
[1].u
[2];
1977 dst
->u
[3] = src
[0].u
[3] ^ src
[1].u
[3];
1981 micro_f2i(union tgsi_exec_channel
*dst
,
1982 const union tgsi_exec_channel
*src
)
1984 dst
->i
[0] = (int)src
->f
[0];
1985 dst
->i
[1] = (int)src
->f
[1];
1986 dst
->i
[2] = (int)src
->f
[2];
1987 dst
->i
[3] = (int)src
->f
[3];
1991 micro_idiv(union tgsi_exec_channel
*dst
,
1992 const union tgsi_exec_channel
*src
)
1994 dst
->i
[0] = src
[0].i
[0] / src
[1].i
[0];
1995 dst
->i
[1] = src
[0].i
[1] / src
[1].i
[1];
1996 dst
->i
[2] = src
[0].i
[2] / src
[1].i
[2];
1997 dst
->i
[3] = src
[0].i
[3] / src
[1].i
[3];
2001 micro_imax(union tgsi_exec_channel
*dst
,
2002 const union tgsi_exec_channel
*src
)
2004 dst
->i
[0] = src
[0].i
[0] > src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
2005 dst
->i
[1] = src
[0].i
[1] > src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
2006 dst
->i
[2] = src
[0].i
[2] > src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
2007 dst
->i
[3] = src
[0].i
[3] > src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
2011 micro_imin(union tgsi_exec_channel
*dst
,
2012 const union tgsi_exec_channel
*src
)
2014 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
2015 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
2016 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
2017 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
2021 micro_isge(union tgsi_exec_channel
*dst
,
2022 const union tgsi_exec_channel
*src
)
2024 dst
->i
[0] = src
[0].i
[0] >= src
[1].i
[0] ? -1 : 0;
2025 dst
->i
[1] = src
[0].i
[1] >= src
[1].i
[1] ? -1 : 0;
2026 dst
->i
[2] = src
[0].i
[2] >= src
[1].i
[2] ? -1 : 0;
2027 dst
->i
[3] = src
[0].i
[3] >= src
[1].i
[3] ? -1 : 0;
2031 micro_ishr(union tgsi_exec_channel
*dst
,
2032 const union tgsi_exec_channel
*src
)
2034 dst
->i
[0] = src
[0].i
[0] >> src
[1].i
[0];
2035 dst
->i
[1] = src
[0].i
[1] >> src
[1].i
[1];
2036 dst
->i
[2] = src
[0].i
[2] >> src
[1].i
[2];
2037 dst
->i
[3] = src
[0].i
[3] >> src
[1].i
[3];
2041 micro_islt(union tgsi_exec_channel
*dst
,
2042 const union tgsi_exec_channel
*src
)
2044 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? -1 : 0;
2045 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? -1 : 0;
2046 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? -1 : 0;
2047 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? -1 : 0;
2051 micro_f2u(union tgsi_exec_channel
*dst
,
2052 const union tgsi_exec_channel
*src
)
2054 dst
->u
[0] = (uint
)src
->f
[0];
2055 dst
->u
[1] = (uint
)src
->f
[1];
2056 dst
->u
[2] = (uint
)src
->f
[2];
2057 dst
->u
[3] = (uint
)src
->f
[3];
2061 micro_u2f(union tgsi_exec_channel
*dst
,
2062 const union tgsi_exec_channel
*src
)
2064 dst
->f
[0] = (float)src
->u
[0];
2065 dst
->f
[1] = (float)src
->u
[1];
2066 dst
->f
[2] = (float)src
->u
[2];
2067 dst
->f
[3] = (float)src
->u
[3];
2071 micro_uadd(union tgsi_exec_channel
*dst
,
2072 const union tgsi_exec_channel
*src
)
2074 dst
->u
[0] = src
[0].u
[0] + src
[1].u
[0];
2075 dst
->u
[1] = src
[0].u
[1] + src
[1].u
[1];
2076 dst
->u
[2] = src
[0].u
[2] + src
[1].u
[2];
2077 dst
->u
[3] = src
[0].u
[3] + src
[1].u
[3];
2081 micro_udiv(union tgsi_exec_channel
*dst
,
2082 const union tgsi_exec_channel
*src
)
2084 dst
->u
[0] = src
[0].u
[0] / src
[1].u
[0];
2085 dst
->u
[1] = src
[0].u
[1] / src
[1].u
[1];
2086 dst
->u
[2] = src
[0].u
[2] / src
[1].u
[2];
2087 dst
->u
[3] = src
[0].u
[3] / src
[1].u
[3];
2091 micro_umad(union tgsi_exec_channel
*dst
,
2092 const union tgsi_exec_channel
*src
)
2094 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0] + src
[2].u
[0];
2095 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1] + src
[2].u
[1];
2096 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2] + src
[2].u
[2];
2097 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3] + src
[2].u
[3];
2101 micro_umax(union tgsi_exec_channel
*dst
,
2102 const union tgsi_exec_channel
*src
)
2104 dst
->u
[0] = src
[0].u
[0] > src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2105 dst
->u
[1] = src
[0].u
[1] > src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2106 dst
->u
[2] = src
[0].u
[2] > src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2107 dst
->u
[3] = src
[0].u
[3] > src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2111 micro_umin(union tgsi_exec_channel
*dst
,
2112 const union tgsi_exec_channel
*src
)
2114 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2115 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2116 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2117 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2121 micro_umod(union tgsi_exec_channel
*dst
,
2122 const union tgsi_exec_channel
*src
)
2124 dst
->u
[0] = src
[0].u
[0] % src
[1].u
[0];
2125 dst
->u
[1] = src
[0].u
[1] % src
[1].u
[1];
2126 dst
->u
[2] = src
[0].u
[2] % src
[1].u
[2];
2127 dst
->u
[3] = src
[0].u
[3] % src
[1].u
[3];
2131 micro_umul(union tgsi_exec_channel
*dst
,
2132 const union tgsi_exec_channel
*src
)
2134 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0];
2135 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1];
2136 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2];
2137 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3];
2141 micro_useq(union tgsi_exec_channel
*dst
,
2142 const union tgsi_exec_channel
*src
)
2144 dst
->u
[0] = src
[0].u
[0] == src
[1].u
[0] ? ~0 : 0;
2145 dst
->u
[1] = src
[0].u
[1] == src
[1].u
[1] ? ~0 : 0;
2146 dst
->u
[2] = src
[0].u
[2] == src
[1].u
[2] ? ~0 : 0;
2147 dst
->u
[3] = src
[0].u
[3] == src
[1].u
[3] ? ~0 : 0;
2151 micro_usge(union tgsi_exec_channel
*dst
,
2152 const union tgsi_exec_channel
*src
)
2154 dst
->u
[0] = src
[0].u
[0] >= src
[1].u
[0] ? ~0 : 0;
2155 dst
->u
[1] = src
[0].u
[1] >= src
[1].u
[1] ? ~0 : 0;
2156 dst
->u
[2] = src
[0].u
[2] >= src
[1].u
[2] ? ~0 : 0;
2157 dst
->u
[3] = src
[0].u
[3] >= src
[1].u
[3] ? ~0 : 0;
2161 micro_ushr(union tgsi_exec_channel
*dst
,
2162 const union tgsi_exec_channel
*src
)
2164 dst
->u
[0] = src
[0].u
[0] >> src
[1].u
[0];
2165 dst
->u
[1] = src
[0].u
[1] >> src
[1].u
[1];
2166 dst
->u
[2] = src
[0].u
[2] >> src
[1].u
[2];
2167 dst
->u
[3] = src
[0].u
[3] >> src
[1].u
[3];
2171 micro_uslt(union tgsi_exec_channel
*dst
,
2172 const union tgsi_exec_channel
*src
)
2174 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? ~0 : 0;
2175 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? ~0 : 0;
2176 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? ~0 : 0;
2177 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? ~0 : 0;
2181 micro_usne(union tgsi_exec_channel
*dst
,
2182 const union tgsi_exec_channel
*src
)
2184 dst
->u
[0] = src
[0].u
[0] != src
[1].u
[0] ? ~0 : 0;
2185 dst
->u
[1] = src
[0].u
[1] != src
[1].u
[1] ? ~0 : 0;
2186 dst
->u
[2] = src
[0].u
[2] != src
[1].u
[2] ? ~0 : 0;
2187 dst
->u
[3] = src
[0].u
[3] != src
[1].u
[3] ? ~0 : 0;
2192 struct tgsi_exec_machine
*mach
,
2193 const struct tgsi_full_instruction
*inst
,
2197 union tgsi_exec_channel r
[10];
2198 union tgsi_exec_channel d
[8];
2202 switch (inst
->Instruction
.Opcode
) {
2203 case TGSI_OPCODE_ARL
:
2204 case TGSI_OPCODE_FLR
:
2205 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2206 FETCH( &r
[0], 0, chan_index
);
2207 micro_flr(&d
[chan_index
], &r
[0]);
2209 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2210 STORE(&d
[chan_index
], 0, chan_index
);
2214 case TGSI_OPCODE_MOV
:
2215 exec_vector_unary(mach
, inst
, micro_mov
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
2218 case TGSI_OPCODE_LIT
:
2219 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2220 FETCH( &r
[0], 0, CHAN_X
);
2221 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2222 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2225 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2226 FETCH( &r
[1], 0, CHAN_Y
);
2227 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2229 FETCH( &r
[2], 0, CHAN_W
);
2230 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
2231 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
2232 micro_pow( &r
[1], &r
[1], &r
[2] );
2233 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2236 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2237 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2239 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2240 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2243 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2244 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2246 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2247 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2251 case TGSI_OPCODE_RCP
:
2252 /* TGSI_OPCODE_RECIP */
2253 FETCH( &r
[0], 0, CHAN_X
);
2254 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2255 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2256 STORE( &r
[0], 0, chan_index
);
2260 case TGSI_OPCODE_RSQ
:
2261 /* TGSI_OPCODE_RECIPSQRT */
2262 FETCH( &r
[0], 0, CHAN_X
);
2263 micro_abs( &r
[0], &r
[0] );
2264 micro_sqrt( &r
[0], &r
[0] );
2265 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2266 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2267 STORE( &r
[0], 0, chan_index
);
2271 case TGSI_OPCODE_EXP
:
2272 FETCH( &r
[0], 0, CHAN_X
);
2273 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2274 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2275 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2276 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2278 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2279 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2280 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2282 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2283 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2284 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2286 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2287 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2291 case TGSI_OPCODE_LOG
:
2292 FETCH( &r
[0], 0, CHAN_X
);
2293 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2294 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2295 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2296 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2297 STORE( &r
[0], 0, CHAN_X
);
2299 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2300 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2301 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2302 STORE( &r
[0], 0, CHAN_Y
);
2304 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2305 STORE( &r
[1], 0, CHAN_Z
);
2307 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2308 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2312 case TGSI_OPCODE_MUL
:
2313 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2314 FETCH(&r
[0], 0, chan_index
);
2315 FETCH(&r
[1], 1, chan_index
);
2316 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2318 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2319 STORE(&d
[chan_index
], 0, chan_index
);
2323 case TGSI_OPCODE_ADD
:
2324 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2325 FETCH( &r
[0], 0, chan_index
);
2326 FETCH( &r
[1], 1, chan_index
);
2327 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2329 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2330 STORE(&d
[chan_index
], 0, chan_index
);
2334 case TGSI_OPCODE_DP3
:
2335 /* TGSI_OPCODE_DOT3 */
2336 FETCH( &r
[0], 0, CHAN_X
);
2337 FETCH( &r
[1], 1, CHAN_X
);
2338 micro_mul( &r
[0], &r
[0], &r
[1] );
2340 FETCH( &r
[1], 0, CHAN_Y
);
2341 FETCH( &r
[2], 1, CHAN_Y
);
2342 micro_mul( &r
[1], &r
[1], &r
[2] );
2343 micro_add( &r
[0], &r
[0], &r
[1] );
2345 FETCH( &r
[1], 0, CHAN_Z
);
2346 FETCH( &r
[2], 1, CHAN_Z
);
2347 micro_mul( &r
[1], &r
[1], &r
[2] );
2348 micro_add( &r
[0], &r
[0], &r
[1] );
2350 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2351 STORE( &r
[0], 0, chan_index
);
2355 case TGSI_OPCODE_DP4
:
2356 /* TGSI_OPCODE_DOT4 */
2357 FETCH(&r
[0], 0, CHAN_X
);
2358 FETCH(&r
[1], 1, CHAN_X
);
2360 micro_mul( &r
[0], &r
[0], &r
[1] );
2362 FETCH(&r
[1], 0, CHAN_Y
);
2363 FETCH(&r
[2], 1, CHAN_Y
);
2365 micro_mul( &r
[1], &r
[1], &r
[2] );
2366 micro_add( &r
[0], &r
[0], &r
[1] );
2368 FETCH(&r
[1], 0, CHAN_Z
);
2369 FETCH(&r
[2], 1, CHAN_Z
);
2371 micro_mul( &r
[1], &r
[1], &r
[2] );
2372 micro_add( &r
[0], &r
[0], &r
[1] );
2374 FETCH(&r
[1], 0, CHAN_W
);
2375 FETCH(&r
[2], 1, CHAN_W
);
2377 micro_mul( &r
[1], &r
[1], &r
[2] );
2378 micro_add( &r
[0], &r
[0], &r
[1] );
2380 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2381 STORE( &r
[0], 0, chan_index
);
2385 case TGSI_OPCODE_DST
:
2386 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2387 FETCH( &r
[0], 0, CHAN_Y
);
2388 FETCH( &r
[1], 1, CHAN_Y
);
2389 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2391 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2392 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2394 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2395 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2398 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2399 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2401 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2402 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2404 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2405 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2407 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2408 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2412 case TGSI_OPCODE_MIN
:
2413 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2414 FETCH(&r
[0], 0, chan_index
);
2415 FETCH(&r
[1], 1, chan_index
);
2417 /* XXX use micro_min()?? */
2418 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2420 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2421 STORE(&d
[chan_index
], 0, chan_index
);
2425 case TGSI_OPCODE_MAX
:
2426 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2427 FETCH(&r
[0], 0, chan_index
);
2428 FETCH(&r
[1], 1, chan_index
);
2430 /* XXX use micro_max()?? */
2431 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2433 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2434 STORE(&d
[chan_index
], 0, chan_index
);
2438 case TGSI_OPCODE_SLT
:
2439 /* TGSI_OPCODE_SETLT */
2440 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2441 FETCH( &r
[0], 0, chan_index
);
2442 FETCH( &r
[1], 1, chan_index
);
2443 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2445 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2446 STORE(&d
[chan_index
], 0, chan_index
);
2450 case TGSI_OPCODE_SGE
:
2451 /* TGSI_OPCODE_SETGE */
2452 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2453 FETCH( &r
[0], 0, chan_index
);
2454 FETCH( &r
[1], 1, chan_index
);
2455 micro_le(&d
[chan_index
], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2457 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2458 STORE(&d
[chan_index
], 0, chan_index
);
2462 case TGSI_OPCODE_MAD
:
2463 /* TGSI_OPCODE_MADD */
2464 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2465 FETCH( &r
[0], 0, chan_index
);
2466 FETCH( &r
[1], 1, chan_index
);
2467 micro_mul( &r
[0], &r
[0], &r
[1] );
2468 FETCH( &r
[1], 2, chan_index
);
2469 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2471 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2472 STORE(&d
[chan_index
], 0, chan_index
);
2476 case TGSI_OPCODE_SUB
:
2477 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2478 FETCH(&r
[0], 0, chan_index
);
2479 FETCH(&r
[1], 1, chan_index
);
2480 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2482 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2483 STORE(&d
[chan_index
], 0, chan_index
);
2487 case TGSI_OPCODE_LRP
:
2488 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2489 FETCH(&r
[0], 0, chan_index
);
2490 FETCH(&r
[1], 1, chan_index
);
2491 FETCH(&r
[2], 2, chan_index
);
2492 micro_sub( &r
[1], &r
[1], &r
[2] );
2493 micro_mul( &r
[0], &r
[0], &r
[1] );
2494 micro_add(&d
[chan_index
], &r
[0], &r
[2]);
2496 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2497 STORE(&d
[chan_index
], 0, chan_index
);
2501 case TGSI_OPCODE_CND
:
2502 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2503 FETCH(&r
[0], 0, chan_index
);
2504 FETCH(&r
[1], 1, chan_index
);
2505 FETCH(&r
[2], 2, chan_index
);
2506 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2508 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2509 STORE(&d
[chan_index
], 0, chan_index
);
2513 case TGSI_OPCODE_DP2A
:
2514 FETCH( &r
[0], 0, CHAN_X
);
2515 FETCH( &r
[1], 1, CHAN_X
);
2516 micro_mul( &r
[0], &r
[0], &r
[1] );
2518 FETCH( &r
[1], 0, CHAN_Y
);
2519 FETCH( &r
[2], 1, CHAN_Y
);
2520 micro_mul( &r
[1], &r
[1], &r
[2] );
2521 micro_add( &r
[0], &r
[0], &r
[1] );
2523 FETCH( &r
[2], 2, CHAN_X
);
2524 micro_add( &r
[0], &r
[0], &r
[2] );
2526 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2527 STORE( &r
[0], 0, chan_index
);
2531 case TGSI_OPCODE_FRC
:
2532 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2533 FETCH( &r
[0], 0, chan_index
);
2534 micro_frc(&d
[chan_index
], &r
[0]);
2536 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2537 STORE(&d
[chan_index
], 0, chan_index
);
2541 case TGSI_OPCODE_CLAMP
:
2542 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2543 FETCH(&r
[0], 0, chan_index
);
2544 FETCH(&r
[1], 1, chan_index
);
2545 micro_max(&r
[0], &r
[0], &r
[1]);
2546 FETCH(&r
[1], 2, chan_index
);
2547 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2549 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2550 STORE(&d
[chan_index
], 0, chan_index
);
2554 case TGSI_OPCODE_ROUND
:
2555 case TGSI_OPCODE_ARR
:
2556 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2557 FETCH( &r
[0], 0, chan_index
);
2558 micro_rnd(&d
[chan_index
], &r
[0]);
2560 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2561 STORE(&d
[chan_index
], 0, chan_index
);
2565 case TGSI_OPCODE_EX2
:
2566 FETCH(&r
[0], 0, CHAN_X
);
2568 micro_exp2( &r
[0], &r
[0] );
2570 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2571 STORE( &r
[0], 0, chan_index
);
2575 case TGSI_OPCODE_LG2
:
2576 FETCH( &r
[0], 0, CHAN_X
);
2577 micro_lg2( &r
[0], &r
[0] );
2578 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2579 STORE( &r
[0], 0, chan_index
);
2583 case TGSI_OPCODE_POW
:
2584 FETCH(&r
[0], 0, CHAN_X
);
2585 FETCH(&r
[1], 1, CHAN_X
);
2587 micro_pow( &r
[0], &r
[0], &r
[1] );
2589 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2590 STORE( &r
[0], 0, chan_index
);
2594 case TGSI_OPCODE_XPD
:
2595 FETCH(&r
[0], 0, CHAN_Y
);
2596 FETCH(&r
[1], 1, CHAN_Z
);
2598 micro_mul( &r
[2], &r
[0], &r
[1] );
2600 FETCH(&r
[3], 0, CHAN_Z
);
2601 FETCH(&r
[4], 1, CHAN_Y
);
2603 micro_mul( &r
[5], &r
[3], &r
[4] );
2604 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2606 FETCH(&r
[2], 1, CHAN_X
);
2608 micro_mul( &r
[3], &r
[3], &r
[2] );
2610 FETCH(&r
[5], 0, CHAN_X
);
2612 micro_mul( &r
[1], &r
[1], &r
[5] );
2613 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2615 micro_mul( &r
[5], &r
[5], &r
[4] );
2616 micro_mul( &r
[0], &r
[0], &r
[2] );
2617 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2619 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2620 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2622 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2623 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2625 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2626 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2628 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2629 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2633 case TGSI_OPCODE_ABS
:
2634 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2635 FETCH(&r
[0], 0, chan_index
);
2636 micro_abs(&d
[chan_index
], &r
[0]);
2638 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2639 STORE(&d
[chan_index
], 0, chan_index
);
2643 case TGSI_OPCODE_RCC
:
2644 FETCH(&r
[0], 0, CHAN_X
);
2645 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2646 micro_float_clamp(&r
[0], &r
[0]);
2647 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2648 STORE(&r
[0], 0, chan_index
);
2652 case TGSI_OPCODE_DPH
:
2653 FETCH(&r
[0], 0, CHAN_X
);
2654 FETCH(&r
[1], 1, CHAN_X
);
2656 micro_mul( &r
[0], &r
[0], &r
[1] );
2658 FETCH(&r
[1], 0, CHAN_Y
);
2659 FETCH(&r
[2], 1, CHAN_Y
);
2661 micro_mul( &r
[1], &r
[1], &r
[2] );
2662 micro_add( &r
[0], &r
[0], &r
[1] );
2664 FETCH(&r
[1], 0, CHAN_Z
);
2665 FETCH(&r
[2], 1, CHAN_Z
);
2667 micro_mul( &r
[1], &r
[1], &r
[2] );
2668 micro_add( &r
[0], &r
[0], &r
[1] );
2670 FETCH(&r
[1], 1, CHAN_W
);
2672 micro_add( &r
[0], &r
[0], &r
[1] );
2674 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2675 STORE( &r
[0], 0, chan_index
);
2679 case TGSI_OPCODE_COS
:
2680 FETCH(&r
[0], 0, CHAN_X
);
2682 micro_cos( &r
[0], &r
[0] );
2684 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2685 STORE( &r
[0], 0, chan_index
);
2689 case TGSI_OPCODE_DDX
:
2690 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2691 FETCH( &r
[0], 0, chan_index
);
2692 micro_ddx(&d
[chan_index
], &r
[0]);
2694 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2695 STORE(&d
[chan_index
], 0, chan_index
);
2699 case TGSI_OPCODE_DDY
:
2700 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2701 FETCH( &r
[0], 0, chan_index
);
2702 micro_ddy(&d
[chan_index
], &r
[0]);
2704 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2705 STORE(&d
[chan_index
], 0, chan_index
);
2709 case TGSI_OPCODE_KILP
:
2710 exec_kilp (mach
, inst
);
2713 case TGSI_OPCODE_KIL
:
2714 exec_kil (mach
, inst
);
2717 case TGSI_OPCODE_PK2H
:
2721 case TGSI_OPCODE_PK2US
:
2725 case TGSI_OPCODE_PK4B
:
2729 case TGSI_OPCODE_PK4UB
:
2733 case TGSI_OPCODE_RFL
:
2734 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2735 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2736 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2737 /* r0 = dp3(src0, src0) */
2738 FETCH(&r
[2], 0, CHAN_X
);
2739 micro_mul(&r
[0], &r
[2], &r
[2]);
2740 FETCH(&r
[4], 0, CHAN_Y
);
2741 micro_mul(&r
[8], &r
[4], &r
[4]);
2742 micro_add(&r
[0], &r
[0], &r
[8]);
2743 FETCH(&r
[6], 0, CHAN_Z
);
2744 micro_mul(&r
[8], &r
[6], &r
[6]);
2745 micro_add(&r
[0], &r
[0], &r
[8]);
2747 /* r1 = dp3(src0, src1) */
2748 FETCH(&r
[3], 1, CHAN_X
);
2749 micro_mul(&r
[1], &r
[2], &r
[3]);
2750 FETCH(&r
[5], 1, CHAN_Y
);
2751 micro_mul(&r
[8], &r
[4], &r
[5]);
2752 micro_add(&r
[1], &r
[1], &r
[8]);
2753 FETCH(&r
[7], 1, CHAN_Z
);
2754 micro_mul(&r
[8], &r
[6], &r
[7]);
2755 micro_add(&r
[1], &r
[1], &r
[8]);
2757 /* r1 = 2 * r1 / r0 */
2758 micro_add(&r
[1], &r
[1], &r
[1]);
2759 micro_div(&r
[1], &r
[1], &r
[0]);
2761 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2762 micro_mul(&r
[2], &r
[2], &r
[1]);
2763 micro_sub(&r
[2], &r
[2], &r
[3]);
2764 STORE(&r
[2], 0, CHAN_X
);
2766 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2767 micro_mul(&r
[4], &r
[4], &r
[1]);
2768 micro_sub(&r
[4], &r
[4], &r
[5]);
2769 STORE(&r
[4], 0, CHAN_Y
);
2771 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2772 micro_mul(&r
[6], &r
[6], &r
[1]);
2773 micro_sub(&r
[6], &r
[6], &r
[7]);
2774 STORE(&r
[6], 0, CHAN_Z
);
2777 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2778 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2782 case TGSI_OPCODE_SEQ
:
2783 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2784 FETCH( &r
[0], 0, chan_index
);
2785 FETCH( &r
[1], 1, chan_index
);
2786 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2788 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2789 STORE(&d
[chan_index
], 0, chan_index
);
2793 case TGSI_OPCODE_SFL
:
2794 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2795 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2799 case TGSI_OPCODE_SGT
:
2800 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2801 FETCH( &r
[0], 0, chan_index
);
2802 FETCH( &r
[1], 1, chan_index
);
2803 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2805 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2806 STORE(&d
[chan_index
], 0, chan_index
);
2810 case TGSI_OPCODE_SIN
:
2811 FETCH( &r
[0], 0, CHAN_X
);
2812 micro_sin( &r
[0], &r
[0] );
2813 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2814 STORE( &r
[0], 0, chan_index
);
2818 case TGSI_OPCODE_SLE
:
2819 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2820 FETCH( &r
[0], 0, chan_index
);
2821 FETCH( &r
[1], 1, chan_index
);
2822 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2824 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2825 STORE(&d
[chan_index
], 0, chan_index
);
2829 case TGSI_OPCODE_SNE
:
2830 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2831 FETCH( &r
[0], 0, chan_index
);
2832 FETCH( &r
[1], 1, chan_index
);
2833 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2835 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2836 STORE(&d
[chan_index
], 0, chan_index
);
2840 case TGSI_OPCODE_STR
:
2841 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2842 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2846 case TGSI_OPCODE_TEX
:
2847 /* simple texture lookup */
2848 /* src[0] = texcoord */
2849 /* src[1] = sampler unit */
2850 exec_tex(mach
, inst
, FALSE
, FALSE
);
2853 case TGSI_OPCODE_TXB
:
2854 /* Texture lookup with lod bias */
2855 /* src[0] = texcoord (src[0].w = LOD bias) */
2856 /* src[1] = sampler unit */
2857 exec_tex(mach
, inst
, TRUE
, FALSE
);
2860 case TGSI_OPCODE_TXD
:
2861 /* Texture lookup with explict partial derivatives */
2862 /* src[0] = texcoord */
2863 /* src[1] = d[strq]/dx */
2864 /* src[2] = d[strq]/dy */
2865 /* src[3] = sampler unit */
2866 exec_txd(mach
, inst
);
2869 case TGSI_OPCODE_TXL
:
2870 /* Texture lookup with explit LOD */
2871 /* src[0] = texcoord (src[0].w = LOD) */
2872 /* src[1] = sampler unit */
2873 exec_tex(mach
, inst
, TRUE
, FALSE
);
2876 case TGSI_OPCODE_TXP
:
2877 /* Texture lookup with projection */
2878 /* src[0] = texcoord (src[0].w = projection) */
2879 /* src[1] = sampler unit */
2880 exec_tex(mach
, inst
, FALSE
, TRUE
);
2883 case TGSI_OPCODE_UP2H
:
2887 case TGSI_OPCODE_UP2US
:
2891 case TGSI_OPCODE_UP4B
:
2895 case TGSI_OPCODE_UP4UB
:
2899 case TGSI_OPCODE_X2D
:
2900 FETCH(&r
[0], 1, CHAN_X
);
2901 FETCH(&r
[1], 1, CHAN_Y
);
2902 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2903 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2904 FETCH(&r
[2], 2, CHAN_X
);
2905 micro_mul(&r
[2], &r
[2], &r
[0]);
2906 FETCH(&r
[3], 2, CHAN_Y
);
2907 micro_mul(&r
[3], &r
[3], &r
[1]);
2908 micro_add(&r
[2], &r
[2], &r
[3]);
2909 FETCH(&r
[3], 0, CHAN_X
);
2910 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2913 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2914 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2915 FETCH(&r
[2], 2, CHAN_Z
);
2916 micro_mul(&r
[2], &r
[2], &r
[0]);
2917 FETCH(&r
[3], 2, CHAN_W
);
2918 micro_mul(&r
[3], &r
[3], &r
[1]);
2919 micro_add(&r
[2], &r
[2], &r
[3]);
2920 FETCH(&r
[3], 0, CHAN_Y
);
2921 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2924 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2925 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2927 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2928 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2930 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2931 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2933 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2934 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2938 case TGSI_OPCODE_ARA
:
2942 case TGSI_OPCODE_BRA
:
2946 case TGSI_OPCODE_CAL
:
2947 /* skip the call if no execution channels are enabled */
2948 if (mach
->ExecMask
) {
2951 /* First, record the depths of the execution stacks.
2952 * This is important for deeply nested/looped return statements.
2953 * We have to unwind the stacks by the correct amount. For a
2954 * real code generator, we could determine the number of entries
2955 * to pop off each stack with simple static analysis and avoid
2956 * implementing this data structure at run time.
2958 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2959 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2960 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2961 mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
= mach
->SwitchStackTop
;
2962 mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
= mach
->BreakStackTop
;
2963 /* note that PC was already incremented above */
2964 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2966 mach
->CallStackTop
++;
2968 /* Second, push the Cond, Loop, Cont, Func stacks */
2969 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2970 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2971 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2972 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
2973 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
2974 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2976 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2977 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2978 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2979 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
2980 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
2981 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2983 /* Finally, jump to the subroutine */
2984 *pc
= inst
->Label
.Label
;
2988 case TGSI_OPCODE_RET
:
2989 mach
->FuncMask
&= ~mach
->ExecMask
;
2990 UPDATE_EXEC_MASK(mach
);
2992 if (mach
->FuncMask
== 0x0) {
2993 /* really return now (otherwise, keep executing */
2995 if (mach
->CallStackTop
== 0) {
2996 /* returning from main() */
3001 assert(mach
->CallStackTop
> 0);
3002 mach
->CallStackTop
--;
3004 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3005 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3007 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3008 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3010 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3011 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3013 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3014 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3016 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3017 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3019 assert(mach
->FuncStackTop
> 0);
3020 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3022 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3024 UPDATE_EXEC_MASK(mach
);
3028 case TGSI_OPCODE_SSG
:
3029 /* TGSI_OPCODE_SGN */
3030 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3031 FETCH( &r
[0], 0, chan_index
);
3032 micro_sgn(&d
[chan_index
], &r
[0]);
3034 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3035 STORE(&d
[chan_index
], 0, chan_index
);
3039 case TGSI_OPCODE_CMP
:
3040 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3041 FETCH(&r
[0], 0, chan_index
);
3042 FETCH(&r
[1], 1, chan_index
);
3043 FETCH(&r
[2], 2, chan_index
);
3044 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
3046 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3047 STORE(&d
[chan_index
], 0, chan_index
);
3051 case TGSI_OPCODE_SCS
:
3052 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
3053 FETCH( &r
[0], 0, CHAN_X
);
3054 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3055 micro_cos(&r
[1], &r
[0]);
3056 STORE(&r
[1], 0, CHAN_X
);
3058 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3059 micro_sin(&r
[1], &r
[0]);
3060 STORE(&r
[1], 0, CHAN_Y
);
3063 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
3064 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
3066 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
3067 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3071 case TGSI_OPCODE_NRM
:
3072 /* 3-component vector normalize */
3073 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
3074 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
3075 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3076 /* r3 = sqrt(dp3(src0, src0)) */
3077 FETCH(&r
[0], 0, CHAN_X
);
3078 micro_mul(&r
[3], &r
[0], &r
[0]);
3079 FETCH(&r
[1], 0, CHAN_Y
);
3080 micro_mul(&r
[4], &r
[1], &r
[1]);
3081 micro_add(&r
[3], &r
[3], &r
[4]);
3082 FETCH(&r
[2], 0, CHAN_Z
);
3083 micro_mul(&r
[4], &r
[2], &r
[2]);
3084 micro_add(&r
[3], &r
[3], &r
[4]);
3085 micro_sqrt(&r
[3], &r
[3]);
3087 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3088 micro_div(&r
[0], &r
[0], &r
[3]);
3089 STORE(&r
[0], 0, CHAN_X
);
3091 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3092 micro_div(&r
[1], &r
[1], &r
[3]);
3093 STORE(&r
[1], 0, CHAN_Y
);
3095 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3096 micro_div(&r
[2], &r
[2], &r
[3]);
3097 STORE(&r
[2], 0, CHAN_Z
);
3100 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
3101 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3105 case TGSI_OPCODE_NRM4
:
3106 /* 4-component vector normalize */
3108 union tgsi_exec_channel tmp
, dot
;
3110 /* tmp = dp4(src0, src0): */
3111 FETCH( &r
[0], 0, CHAN_X
);
3112 micro_mul( &tmp
, &r
[0], &r
[0] );
3114 FETCH( &r
[1], 0, CHAN_Y
);
3115 micro_mul( &dot
, &r
[1], &r
[1] );
3116 micro_add( &tmp
, &tmp
, &dot
);
3118 FETCH( &r
[2], 0, CHAN_Z
);
3119 micro_mul( &dot
, &r
[2], &r
[2] );
3120 micro_add( &tmp
, &tmp
, &dot
);
3122 FETCH( &r
[3], 0, CHAN_W
);
3123 micro_mul( &dot
, &r
[3], &r
[3] );
3124 micro_add( &tmp
, &tmp
, &dot
);
3126 /* tmp = 1 / sqrt(tmp) */
3127 micro_sqrt( &tmp
, &tmp
);
3128 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
3130 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3131 /* chan = chan * tmp */
3132 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
3133 STORE( &r
[chan_index
], 0, chan_index
);
3138 case TGSI_OPCODE_DIV
:
3142 case TGSI_OPCODE_DP2
:
3143 FETCH( &r
[0], 0, CHAN_X
);
3144 FETCH( &r
[1], 1, CHAN_X
);
3145 micro_mul( &r
[0], &r
[0], &r
[1] );
3147 FETCH( &r
[1], 0, CHAN_Y
);
3148 FETCH( &r
[2], 1, CHAN_Y
);
3149 micro_mul( &r
[1], &r
[1], &r
[2] );
3150 micro_add( &r
[0], &r
[0], &r
[1] );
3152 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3153 STORE( &r
[0], 0, chan_index
);
3157 case TGSI_OPCODE_IF
:
3159 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
3160 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
3161 FETCH( &r
[0], 0, CHAN_X
);
3162 /* update CondMask */
3164 mach
->CondMask
&= ~0x1;
3167 mach
->CondMask
&= ~0x2;
3170 mach
->CondMask
&= ~0x4;
3173 mach
->CondMask
&= ~0x8;
3175 UPDATE_EXEC_MASK(mach
);
3176 /* Todo: If CondMask==0, jump to ELSE */
3179 case TGSI_OPCODE_ELSE
:
3180 /* invert CondMask wrt previous mask */
3183 assert(mach
->CondStackTop
> 0);
3184 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
3185 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
3186 UPDATE_EXEC_MASK(mach
);
3187 /* Todo: If CondMask==0, jump to ENDIF */
3191 case TGSI_OPCODE_ENDIF
:
3193 assert(mach
->CondStackTop
> 0);
3194 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
3195 UPDATE_EXEC_MASK(mach
);
3198 case TGSI_OPCODE_END
:
3199 /* halt execution */
3203 case TGSI_OPCODE_REP
:
3207 case TGSI_OPCODE_ENDREP
:
3211 case TGSI_OPCODE_PUSHA
:
3215 case TGSI_OPCODE_POPA
:
3219 case TGSI_OPCODE_CEIL
:
3220 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3221 FETCH( &r
[0], 0, chan_index
);
3222 micro_ceil(&d
[chan_index
], &r
[0]);
3224 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3225 STORE(&d
[chan_index
], 0, chan_index
);
3229 case TGSI_OPCODE_I2F
:
3230 exec_vector_unary(mach
, inst
, micro_i2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_INT
);
3233 case TGSI_OPCODE_NOT
:
3234 exec_vector_unary(mach
, inst
, micro_not
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3237 case TGSI_OPCODE_TRUNC
:
3238 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3239 FETCH( &r
[0], 0, chan_index
);
3240 micro_trunc(&d
[chan_index
], &r
[0]);
3242 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3243 STORE(&d
[chan_index
], 0, chan_index
);
3247 case TGSI_OPCODE_SHL
:
3248 exec_vector_binary(mach
, inst
, micro_shl
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3251 case TGSI_OPCODE_AND
:
3252 exec_vector_binary(mach
, inst
, micro_and
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3255 case TGSI_OPCODE_OR
:
3256 exec_vector_binary(mach
, inst
, micro_or
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3259 case TGSI_OPCODE_MOD
:
3263 case TGSI_OPCODE_XOR
:
3264 exec_vector_binary(mach
, inst
, micro_xor
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3267 case TGSI_OPCODE_SAD
:
3271 case TGSI_OPCODE_TXF
:
3275 case TGSI_OPCODE_TXQ
:
3279 case TGSI_OPCODE_EMIT
:
3283 case TGSI_OPCODE_ENDPRIM
:
3284 emit_primitive(mach
);
3287 case TGSI_OPCODE_BGNFOR
:
3288 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3289 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3290 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3292 ++mach
->LoopCounterStackTop
;
3293 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3294 /* update LoopMask */
3295 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3296 mach
->LoopMask
&= ~0x1;
3298 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3299 mach
->LoopMask
&= ~0x2;
3301 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3302 mach
->LoopMask
&= ~0x4;
3304 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3305 mach
->LoopMask
&= ~0x8;
3307 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3308 UPDATE_EXEC_MASK(mach
);
3309 /* fall-through (for now) */
3310 case TGSI_OPCODE_BGNLOOP
:
3311 /* push LoopMask and ContMasks */
3312 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3313 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3314 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3315 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3317 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3318 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3319 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3320 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3321 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_LOOP
;
3324 case TGSI_OPCODE_ENDFOR
:
3325 assert(mach
->LoopCounterStackTop
> 0);
3326 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3327 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3328 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3329 /* update LoopMask */
3330 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3331 mach
->LoopMask
&= ~0x1;
3333 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3334 mach
->LoopMask
&= ~0x2;
3336 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3337 mach
->LoopMask
&= ~0x4;
3339 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3340 mach
->LoopMask
&= ~0x8;
3342 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3343 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3344 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3345 assert(mach
->LoopLabelStackTop
> 0);
3346 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3347 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3348 /* Restore ContMask, but don't pop */
3349 assert(mach
->ContStackTop
> 0);
3350 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3351 UPDATE_EXEC_MASK(mach
);
3352 if (mach
->ExecMask
) {
3353 /* repeat loop: jump to instruction just past BGNLOOP */
3354 assert(mach
->LoopLabelStackTop
> 0);
3355 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3358 /* exit loop: pop LoopMask */
3359 assert(mach
->LoopStackTop
> 0);
3360 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3362 assert(mach
->ContStackTop
> 0);
3363 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3364 assert(mach
->LoopLabelStackTop
> 0);
3365 --mach
->LoopLabelStackTop
;
3366 assert(mach
->LoopCounterStackTop
> 0);
3367 --mach
->LoopCounterStackTop
;
3369 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3371 UPDATE_EXEC_MASK(mach
);
3374 case TGSI_OPCODE_ENDLOOP
:
3375 /* Restore ContMask, but don't pop */
3376 assert(mach
->ContStackTop
> 0);
3377 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3378 UPDATE_EXEC_MASK(mach
);
3379 if (mach
->ExecMask
) {
3380 /* repeat loop: jump to instruction just past BGNLOOP */
3381 assert(mach
->LoopLabelStackTop
> 0);
3382 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3385 /* exit loop: pop LoopMask */
3386 assert(mach
->LoopStackTop
> 0);
3387 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3389 assert(mach
->ContStackTop
> 0);
3390 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3391 assert(mach
->LoopLabelStackTop
> 0);
3392 --mach
->LoopLabelStackTop
;
3394 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3396 UPDATE_EXEC_MASK(mach
);
3399 case TGSI_OPCODE_BRK
:
3403 case TGSI_OPCODE_CONT
:
3404 /* turn off cont channels for each enabled exec channel */
3405 mach
->ContMask
&= ~mach
->ExecMask
;
3406 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3407 UPDATE_EXEC_MASK(mach
);
3410 case TGSI_OPCODE_BGNSUB
:
3414 case TGSI_OPCODE_ENDSUB
:
3416 * XXX: This really should be a no-op. We should never reach this opcode.
3419 assert(mach
->CallStackTop
> 0);
3420 mach
->CallStackTop
--;
3422 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3423 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3425 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3426 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3428 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3429 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3431 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3432 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3434 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3435 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3437 assert(mach
->FuncStackTop
> 0);
3438 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3440 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3442 UPDATE_EXEC_MASK(mach
);
3445 case TGSI_OPCODE_NOP
:
3448 case TGSI_OPCODE_BREAKC
:
3449 FETCH(&r
[0], 0, CHAN_X
);
3450 /* update CondMask */
3451 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
3452 mach
->LoopMask
&= ~0x1;
3454 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
3455 mach
->LoopMask
&= ~0x2;
3457 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
3458 mach
->LoopMask
&= ~0x4;
3460 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
3461 mach
->LoopMask
&= ~0x8;
3463 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3464 UPDATE_EXEC_MASK(mach
);
3467 case TGSI_OPCODE_F2I
:
3468 exec_vector_unary(mach
, inst
, micro_f2i
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
3471 case TGSI_OPCODE_IDIV
:
3472 exec_vector_binary(mach
, inst
, micro_idiv
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3475 case TGSI_OPCODE_IMAX
:
3476 exec_vector_binary(mach
, inst
, micro_imax
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3479 case TGSI_OPCODE_IMIN
:
3480 exec_vector_binary(mach
, inst
, micro_imin
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3483 case TGSI_OPCODE_INEG
:
3484 exec_vector_unary(mach
, inst
, micro_ineg
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3487 case TGSI_OPCODE_ISGE
:
3488 exec_vector_binary(mach
, inst
, micro_isge
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3491 case TGSI_OPCODE_ISHR
:
3492 exec_vector_binary(mach
, inst
, micro_ishr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3495 case TGSI_OPCODE_ISLT
:
3496 exec_vector_binary(mach
, inst
, micro_islt
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3499 case TGSI_OPCODE_F2U
:
3500 exec_vector_unary(mach
, inst
, micro_f2u
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
3503 case TGSI_OPCODE_U2F
:
3504 exec_vector_unary(mach
, inst
, micro_u2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_UINT
);
3507 case TGSI_OPCODE_UADD
:
3508 exec_vector_binary(mach
, inst
, micro_uadd
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3511 case TGSI_OPCODE_UDIV
:
3512 exec_vector_binary(mach
, inst
, micro_udiv
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3515 case TGSI_OPCODE_UMAD
:
3516 exec_vector_trinary(mach
, inst
, micro_umad
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3519 case TGSI_OPCODE_UMAX
:
3520 exec_vector_binary(mach
, inst
, micro_umax
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3523 case TGSI_OPCODE_UMIN
:
3524 exec_vector_binary(mach
, inst
, micro_umin
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3527 case TGSI_OPCODE_UMOD
:
3528 exec_vector_binary(mach
, inst
, micro_umod
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3531 case TGSI_OPCODE_UMUL
:
3532 exec_vector_binary(mach
, inst
, micro_umul
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3535 case TGSI_OPCODE_USEQ
:
3536 exec_vector_binary(mach
, inst
, micro_useq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3539 case TGSI_OPCODE_USGE
:
3540 exec_vector_binary(mach
, inst
, micro_usge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3543 case TGSI_OPCODE_USHR
:
3544 exec_vector_binary(mach
, inst
, micro_ushr
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3547 case TGSI_OPCODE_USLT
:
3548 exec_vector_binary(mach
, inst
, micro_uslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3551 case TGSI_OPCODE_USNE
:
3552 exec_vector_binary(mach
, inst
, micro_usne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3555 case TGSI_OPCODE_SWITCH
:
3556 exec_switch(mach
, inst
);
3559 case TGSI_OPCODE_CASE
:
3560 exec_case(mach
, inst
);
3563 case TGSI_OPCODE_DEFAULT
:
3567 case TGSI_OPCODE_ENDSWITCH
:
3568 exec_endswitch(mach
);
3577 #define DEBUG_EXECUTION 0
3581 * Run TGSI interpreter.
3582 * \return bitmask of "alive" quad components
3585 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3590 mach
->CondMask
= 0xf;
3591 mach
->LoopMask
= 0xf;
3592 mach
->ContMask
= 0xf;
3593 mach
->FuncMask
= 0xf;
3594 mach
->ExecMask
= 0xf;
3596 mach
->Switch
.mask
= 0xf;
3598 assert(mach
->CondStackTop
== 0);
3599 assert(mach
->LoopStackTop
== 0);
3600 assert(mach
->ContStackTop
== 0);
3601 assert(mach
->SwitchStackTop
== 0);
3602 assert(mach
->BreakStackTop
== 0);
3603 assert(mach
->CallStackTop
== 0);
3605 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3606 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3608 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3609 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3610 mach
->Primitives
[0] = 0;
3613 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3614 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3615 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3616 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3617 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3618 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3621 /* execute declarations (interpolants) */
3622 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3623 exec_declaration( mach
, mach
->Declarations
+i
);
3628 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3629 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3632 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3633 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3636 /* execute instructions, until pc is set to -1 */
3642 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3645 assert(pc
< (int) mach
->NumInstructions
);
3646 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3649 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3650 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3653 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3654 debug_printf("TEMP[%2u] = ", i
);
3655 for (j
= 0; j
< 4; j
++) {
3659 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3660 temps
[i
].xyzw
[0].f
[j
], temps
[i
].xyzw
[0].u
[j
],
3661 temps
[i
].xyzw
[1].f
[j
], temps
[i
].xyzw
[1].u
[j
],
3662 temps
[i
].xyzw
[2].f
[j
], temps
[i
].xyzw
[2].u
[j
],
3663 temps
[i
].xyzw
[3].f
[j
], temps
[i
].xyzw
[3].u
[j
]);
3667 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3668 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3671 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3672 debug_printf("OUT[%2u] = ", i
);
3673 for (j
= 0; j
< 4; j
++) {
3677 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3678 outputs
[i
].xyzw
[0].f
[j
], outputs
[i
].xyzw
[0].u
[j
],
3679 outputs
[i
].xyzw
[1].f
[j
], outputs
[i
].xyzw
[1].u
[j
],
3680 outputs
[i
].xyzw
[2].f
[j
], outputs
[i
].xyzw
[2].u
[j
],
3681 outputs
[i
].xyzw
[3].f
[j
], outputs
[i
].xyzw
[3].u
[j
]);
3690 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3691 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3693 * Scale back depth component.
3695 for (i
= 0; i
< 4; i
++)
3696 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3700 assert(mach
->CondStackTop
== 0);
3701 assert(mach
->LoopStackTop
== 0);
3702 assert(mach
->ContStackTop
== 0);
3703 assert(mach
->SwitchStackTop
== 0);
3704 assert(mach
->BreakStackTop
== 0);
3705 assert(mach
->CallStackTop
== 0);
3707 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];