1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN)))
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129 static const union tgsi_exec_channel ZeroVec
=
130 { { 0.0, 0.0, 0.0, 0.0 } };
135 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
137 assert(!util_is_inf_or_nan(chan
->f
[0]));
138 assert(!util_is_inf_or_nan(chan
->f
[1]));
139 assert(!util_is_inf_or_nan(chan
->f
[2]));
140 assert(!util_is_inf_or_nan(chan
->f
[3]));
147 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
157 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
159 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
161 debug_printf("Temp[%u] =\n", index
);
162 for (i
= 0; i
< 4; i
++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
179 * This would expand into:
184 * The second instruction will have the wrong value for t0 if executed as-is.
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
191 uint writemask
= inst
->Dst
[0].DstRegister
.WriteMask
;
192 if (writemask
== TGSI_WRITEMASK_X
||
193 writemask
== TGSI_WRITEMASK_Y
||
194 writemask
== TGSI_WRITEMASK_Z
||
195 writemask
== TGSI_WRITEMASK_W
||
196 writemask
== TGSI_WRITEMASK_NONE
) {
197 /* no chance of data dependency */
201 /* loop over src regs */
202 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
203 if ((inst
->Src
[i
].SrcRegister
.File
==
204 inst
->Dst
[0].DstRegister
.File
) &&
205 (inst
->Src
[i
].SrcRegister
.Index
==
206 inst
->Dst
[0].DstRegister
.Index
)) {
207 /* loop over dest channels */
208 uint channelsWritten
= 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
212 if (channelsWritten
& (1 << swizzle
)) {
216 channelsWritten
|= (1 << chan
);
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine
*mach
,
232 const struct tgsi_token
*tokens
,
234 struct tgsi_sampler
**samplers
)
237 struct tgsi_parse_context parse
;
238 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
239 struct tgsi_full_instruction
*instructions
;
240 struct tgsi_full_declaration
*declarations
;
241 uint maxInstructions
= 10, numInstructions
= 0;
242 uint maxDeclarations
= 10, numDeclarations
= 0;
246 tgsi_dump(tokens
, 0);
251 mach
->Tokens
= tokens
;
252 mach
->Samplers
= samplers
;
254 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
255 if (k
!= TGSI_PARSE_OK
) {
256 debug_printf( "Problem parsing!\n" );
260 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
264 declarations
= (struct tgsi_full_declaration
*)
265 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
271 instructions
= (struct tgsi_full_instruction
*)
272 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
275 FREE( declarations
);
279 while( !tgsi_parse_end_of_tokens( &parse
) ) {
280 uint pointer
= parse
.Position
;
283 tgsi_parse_token( &parse
);
284 switch( parse
.FullToken
.Token
.Type
) {
285 case TGSI_TOKEN_TYPE_DECLARATION
:
286 /* save expanded declaration */
287 if (numDeclarations
== maxDeclarations
) {
288 declarations
= REALLOC(declarations
,
290 * sizeof(struct tgsi_full_declaration
),
291 (maxDeclarations
+ 10)
292 * sizeof(struct tgsi_full_declaration
));
293 maxDeclarations
+= 10;
295 memcpy(declarations
+ numDeclarations
,
296 &parse
.FullToken
.FullDeclaration
,
297 sizeof(declarations
[0]));
301 case TGSI_TOKEN_TYPE_IMMEDIATE
:
303 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
305 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
307 for( i
= 0; i
< size
; i
++ ) {
308 mach
->Imms
[mach
->ImmLimit
][i
] =
309 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
315 case TGSI_TOKEN_TYPE_INSTRUCTION
:
316 assert( labels
->count
< MAX_LABELS
);
318 labels
->labels
[labels
->count
][0] = instno
;
319 labels
->labels
[labels
->count
][1] = pointer
;
322 /* save expanded instruction */
323 if (numInstructions
== maxInstructions
) {
324 instructions
= REALLOC(instructions
,
326 * sizeof(struct tgsi_full_instruction
),
327 (maxInstructions
+ 10)
328 * sizeof(struct tgsi_full_instruction
));
329 maxInstructions
+= 10;
332 memcpy(instructions
+ numInstructions
,
333 &parse
.FullToken
.FullInstruction
,
334 sizeof(instructions
[0]));
343 tgsi_parse_free (&parse
);
345 if (mach
->Declarations
) {
346 FREE( mach
->Declarations
);
348 mach
->Declarations
= declarations
;
349 mach
->NumDeclarations
= numDeclarations
;
351 if (mach
->Instructions
) {
352 FREE( mach
->Instructions
);
354 mach
->Instructions
= instructions
;
355 mach
->NumInstructions
= numInstructions
;
359 struct tgsi_exec_machine
*
360 tgsi_exec_machine_create( void )
362 struct tgsi_exec_machine
*mach
;
365 mach
= align_malloc( sizeof *mach
, 16 );
369 memset(mach
, 0, sizeof(*mach
));
371 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
372 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
374 /* Setup constants. */
375 for( i
= 0; i
< 4; i
++ ) {
376 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
377 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
378 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
379 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
380 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
381 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
382 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
383 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
384 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
385 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
389 /* silence warnings */
403 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
406 FREE(mach
->Instructions
);
407 FREE(mach
->Declarations
);
416 union tgsi_exec_channel
*dst
,
417 const union tgsi_exec_channel
*src
)
419 dst
->f
[0] = fabsf( src
->f
[0] );
420 dst
->f
[1] = fabsf( src
->f
[1] );
421 dst
->f
[2] = fabsf( src
->f
[2] );
422 dst
->f
[3] = fabsf( src
->f
[3] );
427 union tgsi_exec_channel
*dst
,
428 const union tgsi_exec_channel
*src0
,
429 const union tgsi_exec_channel
*src1
)
431 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
432 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
433 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
434 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
440 union tgsi_exec_channel
*dst
,
441 const union tgsi_exec_channel
*src0
,
442 const union tgsi_exec_channel
*src1
)
444 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
445 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
446 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
447 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
453 union tgsi_exec_channel
*dst
,
454 const union tgsi_exec_channel
*src0
,
455 const union tgsi_exec_channel
*src1
)
457 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
458 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
459 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
460 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
465 union tgsi_exec_channel
*dst
,
466 const union tgsi_exec_channel
*src
)
468 dst
->f
[0] = ceilf( src
->f
[0] );
469 dst
->f
[1] = ceilf( src
->f
[1] );
470 dst
->f
[2] = ceilf( src
->f
[2] );
471 dst
->f
[3] = ceilf( src
->f
[3] );
476 union tgsi_exec_channel
*dst
,
477 const union tgsi_exec_channel
*src
)
479 dst
->f
[0] = cosf( src
->f
[0] );
480 dst
->f
[1] = cosf( src
->f
[1] );
481 dst
->f
[2] = cosf( src
->f
[2] );
482 dst
->f
[3] = cosf( src
->f
[3] );
487 union tgsi_exec_channel
*dst
,
488 const union tgsi_exec_channel
*src
)
493 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
498 union tgsi_exec_channel
*dst
,
499 const union tgsi_exec_channel
*src
)
504 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
509 union tgsi_exec_channel
*dst
,
510 const union tgsi_exec_channel
*src0
,
511 const union tgsi_exec_channel
*src1
)
513 if (src1
->f
[0] != 0) {
514 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
516 if (src1
->f
[1] != 0) {
517 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
519 if (src1
->f
[2] != 0) {
520 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
522 if (src1
->f
[3] != 0) {
523 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
530 union tgsi_exec_channel
*dst
,
531 const union tgsi_exec_channel
*src0
,
532 const union tgsi_exec_channel
*src1
)
534 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
535 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
536 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
537 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
543 union tgsi_exec_channel
*dst
,
544 const union tgsi_exec_channel
*src0
,
545 const union tgsi_exec_channel
*src1
,
546 const union tgsi_exec_channel
*src2
,
547 const union tgsi_exec_channel
*src3
)
549 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
550 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
551 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
552 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
558 union tgsi_exec_channel
*dst
,
559 const union tgsi_exec_channel
*src0
,
560 const union tgsi_exec_channel
*src1
,
561 const union tgsi_exec_channel
*src2
,
562 const union tgsi_exec_channel
*src3
)
564 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
565 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
566 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
567 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
573 union tgsi_exec_channel
*dst
,
574 const union tgsi_exec_channel
*src
)
577 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
578 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
579 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
580 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
584 /* Inf is okay for this instruction, so clamp it to silence assertions. */
586 union tgsi_exec_channel clamped
;
588 for (i
= 0; i
< 4; i
++) {
589 if (src
->f
[i
] > 127.99999f
) {
590 clamped
.f
[i
] = 127.99999f
;
591 } else if (src
->f
[i
] < -126.99999f
) {
592 clamped
.f
[i
] = -126.99999f
;
594 clamped
.f
[i
] = src
->f
[i
];
600 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
601 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
602 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
603 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
610 union tgsi_exec_channel
*dst
,
611 const union tgsi_exec_channel
*src
)
613 dst
->u
[0] = (uint
) src
->f
[0];
614 dst
->u
[1] = (uint
) src
->f
[1];
615 dst
->u
[2] = (uint
) src
->f
[2];
616 dst
->u
[3] = (uint
) src
->f
[3];
621 micro_float_clamp(union tgsi_exec_channel
*dst
,
622 const union tgsi_exec_channel
*src
)
626 for (i
= 0; i
< 4; i
++) {
627 if (src
->f
[i
] > 0.0f
) {
628 if (src
->f
[i
] > 1.884467e+019f
)
629 dst
->f
[i
] = 1.884467e+019f
;
630 else if (src
->f
[i
] < 5.42101e-020f
)
631 dst
->f
[i
] = 5.42101e-020f
;
633 dst
->f
[i
] = src
->f
[i
];
636 if (src
->f
[i
] < -1.884467e+019f
)
637 dst
->f
[i
] = -1.884467e+019f
;
638 else if (src
->f
[i
] > -5.42101e-020f
)
639 dst
->f
[i
] = -5.42101e-020f
;
641 dst
->f
[i
] = src
->f
[i
];
648 union tgsi_exec_channel
*dst
,
649 const union tgsi_exec_channel
*src
)
651 dst
->f
[0] = floorf( src
->f
[0] );
652 dst
->f
[1] = floorf( src
->f
[1] );
653 dst
->f
[2] = floorf( src
->f
[2] );
654 dst
->f
[3] = floorf( src
->f
[3] );
659 union tgsi_exec_channel
*dst
,
660 const union tgsi_exec_channel
*src
)
662 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
663 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
664 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
665 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
670 union tgsi_exec_channel
*dst
,
671 const union tgsi_exec_channel
*src
)
673 dst
->f
[0] = (float) src
->i
[0];
674 dst
->f
[1] = (float) src
->i
[1];
675 dst
->f
[2] = (float) src
->i
[2];
676 dst
->f
[3] = (float) src
->i
[3];
681 union tgsi_exec_channel
*dst
,
682 const union tgsi_exec_channel
*src
)
685 dst
->f
[0] = util_fast_log2( src
->f
[0] );
686 dst
->f
[1] = util_fast_log2( src
->f
[1] );
687 dst
->f
[2] = util_fast_log2( src
->f
[2] );
688 dst
->f
[3] = util_fast_log2( src
->f
[3] );
690 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
691 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
692 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
693 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
699 union tgsi_exec_channel
*dst
,
700 const union tgsi_exec_channel
*src0
,
701 const union tgsi_exec_channel
*src1
,
702 const union tgsi_exec_channel
*src2
,
703 const union tgsi_exec_channel
*src3
)
705 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
706 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
707 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
708 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
713 union tgsi_exec_channel
*dst
,
714 const union tgsi_exec_channel
*src0
,
715 const union tgsi_exec_channel
*src1
,
716 const union tgsi_exec_channel
*src2
,
717 const union tgsi_exec_channel
*src3
)
719 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
720 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
721 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
722 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
728 union tgsi_exec_channel
*dst
,
729 const union tgsi_exec_channel
*src0
,
730 const union tgsi_exec_channel
*src1
,
731 const union tgsi_exec_channel
*src2
,
732 const union tgsi_exec_channel
*src3
)
734 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
735 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
736 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
737 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
744 union tgsi_exec_channel
*dst
,
745 const union tgsi_exec_channel
*src0
,
746 const union tgsi_exec_channel
*src1
,
747 const union tgsi_exec_channel
*src2
,
748 const union tgsi_exec_channel
*src3
)
750 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
751 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
752 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
753 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
759 union tgsi_exec_channel
*dst
,
760 const union tgsi_exec_channel
*src0
,
761 const union tgsi_exec_channel
*src1
)
763 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
764 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
765 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
766 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
772 union tgsi_exec_channel
*dst
,
773 const union tgsi_exec_channel
*src0
,
774 const union tgsi_exec_channel
*src1
)
776 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
777 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
778 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
779 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
786 union tgsi_exec_channel
*dst
,
787 const union tgsi_exec_channel
*src0
,
788 const union tgsi_exec_channel
*src1
)
790 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
791 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
792 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
793 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
799 union tgsi_exec_channel
*dst
,
800 const union tgsi_exec_channel
*src0
,
801 const union tgsi_exec_channel
*src1
)
803 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
804 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
805 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
806 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
812 union tgsi_exec_channel
*dst
,
813 const union tgsi_exec_channel
*src0
,
814 const union tgsi_exec_channel
*src1
)
816 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
817 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
818 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
819 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
826 union tgsi_exec_channel
*dst
,
827 const union tgsi_exec_channel
*src0
,
828 const union tgsi_exec_channel
*src1
)
830 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
831 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
832 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
833 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
840 union tgsi_exec_channel
*dst
,
841 const union tgsi_exec_channel
*src0
,
842 const union tgsi_exec_channel
*src1
)
844 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
845 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
846 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
847 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
853 union tgsi_exec_channel
*dst
,
854 const union tgsi_exec_channel
*src0
,
855 const union tgsi_exec_channel
*src1
)
857 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
858 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
859 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
860 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
866 union tgsi_exec_channel
*dst
,
867 const union tgsi_exec_channel
*src0
,
868 const union tgsi_exec_channel
*src1
)
870 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
871 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
872 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
873 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
880 union tgsi_exec_channel
*dst0
,
881 union tgsi_exec_channel
*dst1
,
882 const union tgsi_exec_channel
*src0
,
883 const union tgsi_exec_channel
*src1
)
885 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
886 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
887 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
888 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
899 union tgsi_exec_channel
*dst0
,
900 union tgsi_exec_channel
*dst1
,
901 const union tgsi_exec_channel
*src0
,
902 const union tgsi_exec_channel
*src1
)
904 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
905 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
906 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
907 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
919 union tgsi_exec_channel
*dst
,
920 const union tgsi_exec_channel
*src0
,
921 const union tgsi_exec_channel
*src1
,
922 const union tgsi_exec_channel
*src2
)
924 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
925 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
926 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
927 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
933 union tgsi_exec_channel
*dst
,
934 const union tgsi_exec_channel
*src
)
936 dst
->f
[0] = -src
->f
[0];
937 dst
->f
[1] = -src
->f
[1];
938 dst
->f
[2] = -src
->f
[2];
939 dst
->f
[3] = -src
->f
[3];
945 union tgsi_exec_channel
*dst
,
946 const union tgsi_exec_channel
*src
)
948 dst
->i
[0] = -src
->i
[0];
949 dst
->i
[1] = -src
->i
[1];
950 dst
->i
[2] = -src
->i
[2];
951 dst
->i
[3] = -src
->i
[3];
957 union tgsi_exec_channel
*dst
,
958 const union tgsi_exec_channel
*src
)
960 dst
->u
[0] = ~src
->u
[0];
961 dst
->u
[1] = ~src
->u
[1];
962 dst
->u
[2] = ~src
->u
[2];
963 dst
->u
[3] = ~src
->u
[3];
968 union tgsi_exec_channel
*dst
,
969 const union tgsi_exec_channel
*src0
,
970 const union tgsi_exec_channel
*src1
)
972 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
973 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
974 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
975 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
980 union tgsi_exec_channel
*dst
,
981 const union tgsi_exec_channel
*src0
,
982 const union tgsi_exec_channel
*src1
)
985 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
986 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
987 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
988 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
990 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
991 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
992 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
993 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
999 union tgsi_exec_channel
*dst
,
1000 const union tgsi_exec_channel
*src
)
1002 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
1003 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
1004 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
1005 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
1010 union tgsi_exec_channel
*dst
,
1011 const union tgsi_exec_channel
*src
)
1013 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
1014 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
1015 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
1016 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
1021 union tgsi_exec_channel
*dst
,
1022 const union tgsi_exec_channel
*src0
,
1023 const union tgsi_exec_channel
*src1
)
1025 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
1026 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
1027 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
1028 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
1033 union tgsi_exec_channel
*dst
,
1034 const union tgsi_exec_channel
*src0
,
1035 const union tgsi_exec_channel
*src1
)
1037 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
1038 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
1039 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
1040 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
1045 union tgsi_exec_channel
*dst
,
1046 const union tgsi_exec_channel
*src0
)
1048 dst
->f
[0] = (float) (int) src0
->f
[0];
1049 dst
->f
[1] = (float) (int) src0
->f
[1];
1050 dst
->f
[2] = (float) (int) src0
->f
[2];
1051 dst
->f
[3] = (float) (int) src0
->f
[3];
1057 union tgsi_exec_channel
*dst
,
1058 const union tgsi_exec_channel
*src0
,
1059 const union tgsi_exec_channel
*src1
)
1061 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
1062 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
1063 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
1064 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
1070 union tgsi_exec_channel
*dst
,
1071 const union tgsi_exec_channel
*src
)
1073 dst
->f
[0] = sinf( src
->f
[0] );
1074 dst
->f
[1] = sinf( src
->f
[1] );
1075 dst
->f
[2] = sinf( src
->f
[2] );
1076 dst
->f
[3] = sinf( src
->f
[3] );
1080 micro_sqrt( union tgsi_exec_channel
*dst
,
1081 const union tgsi_exec_channel
*src
)
1083 dst
->f
[0] = sqrtf( src
->f
[0] );
1084 dst
->f
[1] = sqrtf( src
->f
[1] );
1085 dst
->f
[2] = sqrtf( src
->f
[2] );
1086 dst
->f
[3] = sqrtf( src
->f
[3] );
1091 union tgsi_exec_channel
*dst
,
1092 const union tgsi_exec_channel
*src0
,
1093 const union tgsi_exec_channel
*src1
)
1095 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1096 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1097 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1098 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1104 union tgsi_exec_channel
*dst
,
1105 const union tgsi_exec_channel
*src
)
1107 dst
->f
[0] = (float) src
->u
[0];
1108 dst
->f
[1] = (float) src
->u
[1];
1109 dst
->f
[2] = (float) src
->u
[2];
1110 dst
->f
[3] = (float) src
->u
[3];
1116 union tgsi_exec_channel
*dst
,
1117 const union tgsi_exec_channel
*src0
,
1118 const union tgsi_exec_channel
*src1
)
1120 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
1121 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
1122 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
1123 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
1127 fetch_src_file_channel(
1128 const struct tgsi_exec_machine
*mach
,
1131 const union tgsi_exec_channel
*index
,
1132 union tgsi_exec_channel
*chan
)
1135 case TGSI_SWIZZLE_X
:
1136 case TGSI_SWIZZLE_Y
:
1137 case TGSI_SWIZZLE_Z
:
1138 case TGSI_SWIZZLE_W
:
1140 case TGSI_FILE_CONSTANT
:
1141 assert(mach
->Consts
);
1142 if (index
->i
[0] < 0)
1145 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
1146 if (index
->i
[1] < 0)
1149 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
1150 if (index
->i
[2] < 0)
1153 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
1154 if (index
->i
[3] < 0)
1157 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
1160 case TGSI_FILE_INPUT
:
1161 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1162 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1163 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1164 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1167 case TGSI_FILE_TEMPORARY
:
1168 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
1169 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1170 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1171 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1172 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1175 case TGSI_FILE_IMMEDIATE
:
1176 assert( index
->i
[0] < (int) mach
->ImmLimit
);
1177 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
1178 assert( index
->i
[1] < (int) mach
->ImmLimit
);
1179 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
1180 assert( index
->i
[2] < (int) mach
->ImmLimit
);
1181 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
1182 assert( index
->i
[3] < (int) mach
->ImmLimit
);
1183 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
1186 case TGSI_FILE_ADDRESS
:
1187 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1188 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1189 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1190 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1193 case TGSI_FILE_PREDICATE
:
1194 assert(index
->i
[0] < TGSI_EXEC_NUM_PREDS
);
1195 assert(index
->i
[1] < TGSI_EXEC_NUM_PREDS
);
1196 assert(index
->i
[2] < TGSI_EXEC_NUM_PREDS
);
1197 assert(index
->i
[3] < TGSI_EXEC_NUM_PREDS
);
1198 chan
->u
[0] = mach
->Predicates
[0].xyzw
[swizzle
].u
[0];
1199 chan
->u
[1] = mach
->Predicates
[0].xyzw
[swizzle
].u
[1];
1200 chan
->u
[2] = mach
->Predicates
[0].xyzw
[swizzle
].u
[2];
1201 chan
->u
[3] = mach
->Predicates
[0].xyzw
[swizzle
].u
[3];
1204 case TGSI_FILE_OUTPUT
:
1205 /* vertex/fragment output vars can be read too */
1206 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1207 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1208 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1209 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1224 const struct tgsi_exec_machine
*mach
,
1225 union tgsi_exec_channel
*chan
,
1226 const struct tgsi_full_src_register
*reg
,
1227 const uint chan_index
)
1229 union tgsi_exec_channel index
;
1232 /* We start with a direct index into a register file.
1236 * file = SrcRegister.File
1237 * [1] = SrcRegister.Index
1242 index
.i
[3] = reg
->SrcRegister
.Index
;
1244 /* There is an extra source register that indirectly subscripts
1245 * a register file. The direct index now becomes an offset
1246 * that is being added to the indirect register.
1250 * ind = SrcRegisterInd.File
1251 * [2] = SrcRegisterInd.Index
1252 * .x = SrcRegisterInd.SwizzleX
1254 if (reg
->SrcRegister
.Indirect
) {
1255 union tgsi_exec_channel index2
;
1256 union tgsi_exec_channel indir_index
;
1257 const uint execmask
= mach
->ExecMask
;
1260 /* which address register (always zero now) */
1264 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1266 /* get current value of address register[swizzle] */
1267 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1268 fetch_src_file_channel(
1270 reg
->SrcRegisterInd
.File
,
1275 /* add value of address register to the offset */
1276 index
.i
[0] += (int) indir_index
.f
[0];
1277 index
.i
[1] += (int) indir_index
.f
[1];
1278 index
.i
[2] += (int) indir_index
.f
[2];
1279 index
.i
[3] += (int) indir_index
.f
[3];
1281 /* for disabled execution channels, zero-out the index to
1282 * avoid using a potential garbage value.
1284 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1285 if ((execmask
& (1 << i
)) == 0)
1290 /* There is an extra source register that is a second
1291 * subscript to a register file. Effectively it means that
1292 * the register file is actually a 2D array of registers.
1294 * file[1][3] == file[1*sizeof(file[1])+3],
1296 * [3] = SrcRegisterDim.Index
1298 if (reg
->SrcRegister
.Dimension
) {
1299 /* The size of the first-order array depends on the register file type.
1300 * We need to multiply the index to the first array to get an effective,
1301 * "flat" index that points to the beginning of the second-order array.
1303 switch (reg
->SrcRegister
.File
) {
1304 case TGSI_FILE_INPUT
:
1305 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1306 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1307 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1308 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1310 case TGSI_FILE_CONSTANT
:
1311 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1312 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1313 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1314 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1320 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1321 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1322 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1323 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1325 /* Again, the second subscript index can be addressed indirectly
1326 * identically to the first one.
1327 * Nothing stops us from indirectly addressing the indirect register,
1328 * but there is no need for that, so we won't exercise it.
1330 * file[1][ind[4].y+3],
1332 * ind = SrcRegisterDimInd.File
1333 * [4] = SrcRegisterDimInd.Index
1334 * .y = SrcRegisterDimInd.SwizzleX
1336 if (reg
->SrcRegisterDim
.Indirect
) {
1337 union tgsi_exec_channel index2
;
1338 union tgsi_exec_channel indir_index
;
1339 const uint execmask
= mach
->ExecMask
;
1345 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1347 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1348 fetch_src_file_channel(
1350 reg
->SrcRegisterDimInd
.File
,
1355 index
.i
[0] += (int) indir_index
.f
[0];
1356 index
.i
[1] += (int) indir_index
.f
[1];
1357 index
.i
[2] += (int) indir_index
.f
[2];
1358 index
.i
[3] += (int) indir_index
.f
[3];
1360 /* for disabled execution channels, zero-out the index to
1361 * avoid using a potential garbage value.
1363 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1364 if ((execmask
& (1 << i
)) == 0)
1369 /* If by any chance there was a need for a 3D array of register
1370 * files, we would have to check whether SrcRegisterDim is followed
1371 * by a dimension register and continue the saga.
1375 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1376 fetch_src_file_channel(
1378 reg
->SrcRegister
.File
,
1383 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1384 case TGSI_UTIL_SIGN_CLEAR
:
1385 micro_abs( chan
, chan
);
1388 case TGSI_UTIL_SIGN_SET
:
1389 micro_abs( chan
, chan
);
1390 micro_neg( chan
, chan
);
1393 case TGSI_UTIL_SIGN_TOGGLE
:
1394 micro_neg( chan
, chan
);
1397 case TGSI_UTIL_SIGN_KEEP
:
1404 struct tgsi_exec_machine
*mach
,
1405 const union tgsi_exec_channel
*chan
,
1406 const struct tgsi_full_dst_register
*reg
,
1407 const struct tgsi_full_instruction
*inst
,
1411 union tgsi_exec_channel null
;
1412 union tgsi_exec_channel
*dst
;
1413 uint execmask
= mach
->ExecMask
;
1414 int offset
= 0; /* indirection offset */
1418 check_inf_or_nan(chan
);
1421 /* There is an extra source register that indirectly subscripts
1422 * a register file. The direct index now becomes an offset
1423 * that is being added to the indirect register.
1427 * ind = DstRegisterInd.File
1428 * [2] = DstRegisterInd.Index
1429 * .x = DstRegisterInd.SwizzleX
1431 if (reg
->DstRegister
.Indirect
) {
1432 union tgsi_exec_channel index
;
1433 union tgsi_exec_channel indir_index
;
1436 /* which address register (always zero for now) */
1440 index
.i
[3] = reg
->DstRegisterInd
.Index
;
1442 /* get current value of address register[swizzle] */
1443 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DstRegisterInd
, CHAN_X
);
1445 /* fetch values from the address/indirection register */
1446 fetch_src_file_channel(
1448 reg
->DstRegisterInd
.File
,
1453 /* save indirection offset */
1454 offset
= (int) indir_index
.f
[0];
1457 switch (reg
->DstRegister
.File
) {
1458 case TGSI_FILE_NULL
:
1462 case TGSI_FILE_OUTPUT
:
1463 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1464 + reg
->DstRegister
.Index
;
1465 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1468 case TGSI_FILE_TEMPORARY
:
1469 index
= reg
->DstRegister
.Index
;
1470 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1471 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1474 case TGSI_FILE_ADDRESS
:
1475 index
= reg
->DstRegister
.Index
;
1476 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1479 case TGSI_FILE_LOOP
:
1480 assert(reg
->DstRegister
.Index
== 0);
1481 assert(mach
->LoopCounterStackTop
> 0);
1482 assert(chan_index
== CHAN_X
);
1483 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1486 case TGSI_FILE_PREDICATE
:
1487 index
= reg
->DstRegister
.Index
;
1488 assert(index
< TGSI_EXEC_NUM_PREDS
);
1489 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1497 if (inst
->Instruction
.Predicate
) {
1499 union tgsi_exec_channel
*pred
;
1501 switch (chan_index
) {
1503 swizzle
= inst
->Predicate
.SwizzleX
;
1506 swizzle
= inst
->Predicate
.SwizzleY
;
1509 swizzle
= inst
->Predicate
.SwizzleZ
;
1512 swizzle
= inst
->Predicate
.SwizzleW
;
1519 assert(inst
->Predicate
.Index
== 0);
1521 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1523 if (inst
->Predicate
.Negate
) {
1524 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1526 execmask
&= ~(1 << i
);
1530 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1532 execmask
&= ~(1 << i
);
1538 switch (inst
->Instruction
.Saturate
) {
1540 for (i
= 0; i
< QUAD_SIZE
; i
++)
1541 if (execmask
& (1 << i
))
1542 dst
->i
[i
] = chan
->i
[i
];
1545 case TGSI_SAT_ZERO_ONE
:
1546 for (i
= 0; i
< QUAD_SIZE
; i
++)
1547 if (execmask
& (1 << i
)) {
1548 if (chan
->f
[i
] < 0.0f
)
1550 else if (chan
->f
[i
] > 1.0f
)
1553 dst
->i
[i
] = chan
->i
[i
];
1557 case TGSI_SAT_MINUS_PLUS_ONE
:
1558 for (i
= 0; i
< QUAD_SIZE
; i
++)
1559 if (execmask
& (1 << i
)) {
1560 if (chan
->f
[i
] < -1.0f
)
1562 else if (chan
->f
[i
] > 1.0f
)
1565 dst
->i
[i
] = chan
->i
[i
];
1574 #define FETCH(VAL,INDEX,CHAN)\
1575 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
1577 #define STORE(VAL,INDEX,CHAN)\
1578 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
1582 * Execute ARB-style KIL which is predicated by a src register.
1583 * Kill fragment if any of the four values is less than zero.
1586 exec_kil(struct tgsi_exec_machine
*mach
,
1587 const struct tgsi_full_instruction
*inst
)
1591 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1592 union tgsi_exec_channel r
[1];
1594 /* This mask stores component bits that were already tested. */
1597 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1602 /* unswizzle channel */
1603 swizzle
= tgsi_util_get_full_src_register_swizzle (
1607 /* check if the component has not been already tested */
1608 if (uniquemask
& (1 << swizzle
))
1610 uniquemask
|= 1 << swizzle
;
1612 FETCH(&r
[0], 0, chan_index
);
1613 for (i
= 0; i
< 4; i
++)
1614 if (r
[0].f
[i
] < 0.0f
)
1618 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1622 * Execute NVIDIA-style KIL which is predicated by a condition code.
1623 * Kill fragment if the condition code is TRUE.
1626 exec_kilp(struct tgsi_exec_machine
*mach
,
1627 const struct tgsi_full_instruction
*inst
)
1629 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1631 /* "unconditional" kil */
1632 kilmask
= mach
->ExecMask
;
1633 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1638 * Fetch a four texture samples using STR texture coordinates.
1641 fetch_texel( struct tgsi_sampler
*sampler
,
1642 const union tgsi_exec_channel
*s
,
1643 const union tgsi_exec_channel
*t
,
1644 const union tgsi_exec_channel
*p
,
1645 float lodbias
, /* XXX should be float[4] */
1646 union tgsi_exec_channel
*r
,
1647 union tgsi_exec_channel
*g
,
1648 union tgsi_exec_channel
*b
,
1649 union tgsi_exec_channel
*a
)
1652 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1654 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1656 for (j
= 0; j
< 4; j
++) {
1657 r
->f
[j
] = rgba
[0][j
];
1658 g
->f
[j
] = rgba
[1][j
];
1659 b
->f
[j
] = rgba
[2][j
];
1660 a
->f
[j
] = rgba
[3][j
];
1666 exec_tex(struct tgsi_exec_machine
*mach
,
1667 const struct tgsi_full_instruction
*inst
,
1671 const uint unit
= inst
->Src
[1].SrcRegister
.Index
;
1672 union tgsi_exec_channel r
[4];
1676 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1678 switch (inst
->Texture
.Texture
) {
1679 case TGSI_TEXTURE_1D
:
1680 case TGSI_TEXTURE_SHADOW1D
:
1682 FETCH(&r
[0], 0, CHAN_X
);
1685 FETCH(&r
[1], 0, CHAN_W
);
1686 micro_div( &r
[0], &r
[0], &r
[1] );
1690 FETCH(&r
[1], 0, CHAN_W
);
1691 lodBias
= r
[2].f
[0];
1696 fetch_texel(mach
->Samplers
[unit
],
1697 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1698 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1701 case TGSI_TEXTURE_2D
:
1702 case TGSI_TEXTURE_RECT
:
1703 case TGSI_TEXTURE_SHADOW2D
:
1704 case TGSI_TEXTURE_SHADOWRECT
:
1706 FETCH(&r
[0], 0, CHAN_X
);
1707 FETCH(&r
[1], 0, CHAN_Y
);
1708 FETCH(&r
[2], 0, CHAN_Z
);
1711 FETCH(&r
[3], 0, CHAN_W
);
1712 micro_div( &r
[0], &r
[0], &r
[3] );
1713 micro_div( &r
[1], &r
[1], &r
[3] );
1714 micro_div( &r
[2], &r
[2], &r
[3] );
1718 FETCH(&r
[3], 0, CHAN_W
);
1719 lodBias
= r
[3].f
[0];
1724 fetch_texel(mach
->Samplers
[unit
],
1725 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1726 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1729 case TGSI_TEXTURE_3D
:
1730 case TGSI_TEXTURE_CUBE
:
1732 FETCH(&r
[0], 0, CHAN_X
);
1733 FETCH(&r
[1], 0, CHAN_Y
);
1734 FETCH(&r
[2], 0, CHAN_Z
);
1737 FETCH(&r
[3], 0, CHAN_W
);
1738 micro_div( &r
[0], &r
[0], &r
[3] );
1739 micro_div( &r
[1], &r
[1], &r
[3] );
1740 micro_div( &r
[2], &r
[2], &r
[3] );
1744 FETCH(&r
[3], 0, CHAN_W
);
1745 lodBias
= r
[3].f
[0];
1750 fetch_texel(mach
->Samplers
[unit
],
1751 &r
[0], &r
[1], &r
[2], lodBias
,
1752 &r
[0], &r
[1], &r
[2], &r
[3]);
1759 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1760 STORE( &r
[chan_index
], 0, chan_index
);
1765 exec_txd(struct tgsi_exec_machine
*mach
,
1766 const struct tgsi_full_instruction
*inst
)
1768 const uint unit
= inst
->Src
[3].SrcRegister
.Index
;
1769 union tgsi_exec_channel r
[4];
1773 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1776 switch (inst
->Texture
.Texture
) {
1777 case TGSI_TEXTURE_1D
:
1778 case TGSI_TEXTURE_SHADOW1D
:
1780 FETCH(&r
[0], 0, CHAN_X
);
1782 fetch_texel(mach
->Samplers
[unit
],
1783 &r
[0], &ZeroVec
, &ZeroVec
, 0.0f
, /* S, T, P, BIAS */
1784 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1787 case TGSI_TEXTURE_2D
:
1788 case TGSI_TEXTURE_RECT
:
1789 case TGSI_TEXTURE_SHADOW2D
:
1790 case TGSI_TEXTURE_SHADOWRECT
:
1792 FETCH(&r
[0], 0, CHAN_X
);
1793 FETCH(&r
[1], 0, CHAN_Y
);
1794 FETCH(&r
[2], 0, CHAN_Z
);
1796 fetch_texel(mach
->Samplers
[unit
],
1797 &r
[0], &r
[1], &r
[2], 0.0f
, /* inputs */
1798 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1801 case TGSI_TEXTURE_3D
:
1802 case TGSI_TEXTURE_CUBE
:
1804 FETCH(&r
[0], 0, CHAN_X
);
1805 FETCH(&r
[1], 0, CHAN_Y
);
1806 FETCH(&r
[2], 0, CHAN_Z
);
1808 fetch_texel(mach
->Samplers
[unit
],
1809 &r
[0], &r
[1], &r
[2], 0.0f
,
1810 &r
[0], &r
[1], &r
[2], &r
[3]);
1817 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1818 STORE(&r
[chan_index
], 0, chan_index
);
1824 * Evaluate a constant-valued coefficient at the position of the
1829 struct tgsi_exec_machine
*mach
,
1835 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1836 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1841 * Evaluate a linear-valued coefficient at the position of the
1846 struct tgsi_exec_machine
*mach
,
1850 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1851 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1852 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1853 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1854 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1855 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1856 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1857 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1858 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1862 * Evaluate a perspective-valued coefficient at the position of the
1866 eval_perspective_coef(
1867 struct tgsi_exec_machine
*mach
,
1871 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1872 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1873 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1874 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1875 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1876 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1877 /* divide by W here */
1878 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1879 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1880 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1881 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1885 typedef void (* eval_coef_func
)(
1886 struct tgsi_exec_machine
*mach
,
1891 exec_declaration(struct tgsi_exec_machine
*mach
,
1892 const struct tgsi_full_declaration
*decl
)
1894 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1895 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1896 uint first
, last
, mask
;
1898 first
= decl
->Range
.First
;
1899 last
= decl
->Range
.Last
;
1900 mask
= decl
->Declaration
.UsageMask
;
1902 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1903 assert(decl
->Semantic
.Index
== 0);
1904 assert(first
== last
);
1905 assert(mask
= TGSI_WRITEMASK_XYZW
);
1907 mach
->Inputs
[first
] = mach
->QuadPos
;
1908 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1911 assert(decl
->Semantic
.Index
== 0);
1912 assert(first
== last
);
1914 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1915 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1918 eval_coef_func eval
;
1921 switch (decl
->Declaration
.Interpolate
) {
1922 case TGSI_INTERPOLATE_CONSTANT
:
1923 eval
= eval_constant_coef
;
1926 case TGSI_INTERPOLATE_LINEAR
:
1927 eval
= eval_linear_coef
;
1930 case TGSI_INTERPOLATE_PERSPECTIVE
:
1931 eval
= eval_perspective_coef
;
1939 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1940 if (mask
& (1 << j
)) {
1941 for (i
= first
; i
<= last
; i
++) {
1953 struct tgsi_exec_machine
*mach
,
1954 const struct tgsi_full_instruction
*inst
,
1958 union tgsi_exec_channel r
[10];
1959 union tgsi_exec_channel d
[8];
1963 switch (inst
->Instruction
.Opcode
) {
1964 case TGSI_OPCODE_ARL
:
1965 case TGSI_OPCODE_FLR
:
1966 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1967 FETCH( &r
[0], 0, chan_index
);
1968 micro_flr(&d
[chan_index
], &r
[0]);
1970 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1971 STORE(&d
[chan_index
], 0, chan_index
);
1975 case TGSI_OPCODE_MOV
:
1976 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1977 FETCH(&d
[chan_index
], 0, chan_index
);
1979 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1980 STORE(&d
[chan_index
], 0, chan_index
);
1984 case TGSI_OPCODE_LIT
:
1985 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1986 FETCH( &r
[0], 0, CHAN_X
);
1987 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1988 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1991 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1992 FETCH( &r
[1], 0, CHAN_Y
);
1993 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1995 FETCH( &r
[2], 0, CHAN_W
);
1996 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1997 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1998 micro_pow( &r
[1], &r
[1], &r
[2] );
1999 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2002 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2003 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2005 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2006 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2009 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2010 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2012 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2013 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2017 case TGSI_OPCODE_RCP
:
2018 /* TGSI_OPCODE_RECIP */
2019 FETCH( &r
[0], 0, CHAN_X
);
2020 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2021 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2022 STORE( &r
[0], 0, chan_index
);
2026 case TGSI_OPCODE_RSQ
:
2027 /* TGSI_OPCODE_RECIPSQRT */
2028 FETCH( &r
[0], 0, CHAN_X
);
2029 micro_abs( &r
[0], &r
[0] );
2030 micro_sqrt( &r
[0], &r
[0] );
2031 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2032 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2033 STORE( &r
[0], 0, chan_index
);
2037 case TGSI_OPCODE_EXP
:
2038 FETCH( &r
[0], 0, CHAN_X
);
2039 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2040 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2041 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2042 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2044 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2045 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2046 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2048 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2049 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2050 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2052 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2053 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2057 case TGSI_OPCODE_LOG
:
2058 FETCH( &r
[0], 0, CHAN_X
);
2059 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2060 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2061 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2062 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2063 STORE( &r
[0], 0, CHAN_X
);
2065 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2066 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2067 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2068 STORE( &r
[0], 0, CHAN_Y
);
2070 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2071 STORE( &r
[1], 0, CHAN_Z
);
2073 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2074 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2078 case TGSI_OPCODE_MUL
:
2079 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2080 FETCH(&r
[0], 0, chan_index
);
2081 FETCH(&r
[1], 1, chan_index
);
2082 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2084 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2085 STORE(&d
[chan_index
], 0, chan_index
);
2089 case TGSI_OPCODE_ADD
:
2090 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2091 FETCH( &r
[0], 0, chan_index
);
2092 FETCH( &r
[1], 1, chan_index
);
2093 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2095 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2096 STORE(&d
[chan_index
], 0, chan_index
);
2100 case TGSI_OPCODE_DP3
:
2101 /* TGSI_OPCODE_DOT3 */
2102 FETCH( &r
[0], 0, CHAN_X
);
2103 FETCH( &r
[1], 1, CHAN_X
);
2104 micro_mul( &r
[0], &r
[0], &r
[1] );
2106 FETCH( &r
[1], 0, CHAN_Y
);
2107 FETCH( &r
[2], 1, CHAN_Y
);
2108 micro_mul( &r
[1], &r
[1], &r
[2] );
2109 micro_add( &r
[0], &r
[0], &r
[1] );
2111 FETCH( &r
[1], 0, CHAN_Z
);
2112 FETCH( &r
[2], 1, CHAN_Z
);
2113 micro_mul( &r
[1], &r
[1], &r
[2] );
2114 micro_add( &r
[0], &r
[0], &r
[1] );
2116 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2117 STORE( &r
[0], 0, chan_index
);
2121 case TGSI_OPCODE_DP4
:
2122 /* TGSI_OPCODE_DOT4 */
2123 FETCH(&r
[0], 0, CHAN_X
);
2124 FETCH(&r
[1], 1, CHAN_X
);
2126 micro_mul( &r
[0], &r
[0], &r
[1] );
2128 FETCH(&r
[1], 0, CHAN_Y
);
2129 FETCH(&r
[2], 1, CHAN_Y
);
2131 micro_mul( &r
[1], &r
[1], &r
[2] );
2132 micro_add( &r
[0], &r
[0], &r
[1] );
2134 FETCH(&r
[1], 0, CHAN_Z
);
2135 FETCH(&r
[2], 1, CHAN_Z
);
2137 micro_mul( &r
[1], &r
[1], &r
[2] );
2138 micro_add( &r
[0], &r
[0], &r
[1] );
2140 FETCH(&r
[1], 0, CHAN_W
);
2141 FETCH(&r
[2], 1, CHAN_W
);
2143 micro_mul( &r
[1], &r
[1], &r
[2] );
2144 micro_add( &r
[0], &r
[0], &r
[1] );
2146 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2147 STORE( &r
[0], 0, chan_index
);
2151 case TGSI_OPCODE_DST
:
2152 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2153 FETCH( &r
[0], 0, CHAN_Y
);
2154 FETCH( &r
[1], 1, CHAN_Y
);
2155 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2157 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2158 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2160 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2161 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2164 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2165 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2167 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2168 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2170 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2171 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2173 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2174 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2178 case TGSI_OPCODE_MIN
:
2179 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2180 FETCH(&r
[0], 0, chan_index
);
2181 FETCH(&r
[1], 1, chan_index
);
2183 /* XXX use micro_min()?? */
2184 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2186 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2187 STORE(&d
[chan_index
], 0, chan_index
);
2191 case TGSI_OPCODE_MAX
:
2192 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2193 FETCH(&r
[0], 0, chan_index
);
2194 FETCH(&r
[1], 1, chan_index
);
2196 /* XXX use micro_max()?? */
2197 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2199 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2200 STORE(&d
[chan_index
], 0, chan_index
);
2204 case TGSI_OPCODE_SLT
:
2205 /* TGSI_OPCODE_SETLT */
2206 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2207 FETCH( &r
[0], 0, chan_index
);
2208 FETCH( &r
[1], 1, chan_index
);
2209 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2211 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2212 STORE(&d
[chan_index
], 0, chan_index
);
2216 case TGSI_OPCODE_SGE
:
2217 /* TGSI_OPCODE_SETGE */
2218 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2219 FETCH( &r
[0], 0, chan_index
);
2220 FETCH( &r
[1], 1, chan_index
);
2221 micro_le(&d
[chan_index
], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2223 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2224 STORE(&d
[chan_index
], 0, chan_index
);
2228 case TGSI_OPCODE_MAD
:
2229 /* TGSI_OPCODE_MADD */
2230 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2231 FETCH( &r
[0], 0, chan_index
);
2232 FETCH( &r
[1], 1, chan_index
);
2233 micro_mul( &r
[0], &r
[0], &r
[1] );
2234 FETCH( &r
[1], 2, chan_index
);
2235 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2237 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2238 STORE(&d
[chan_index
], 0, chan_index
);
2242 case TGSI_OPCODE_SUB
:
2243 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2244 FETCH(&r
[0], 0, chan_index
);
2245 FETCH(&r
[1], 1, chan_index
);
2246 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2248 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2249 STORE(&d
[chan_index
], 0, chan_index
);
2253 case TGSI_OPCODE_LRP
:
2254 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2255 FETCH(&r
[0], 0, chan_index
);
2256 FETCH(&r
[1], 1, chan_index
);
2257 FETCH(&r
[2], 2, chan_index
);
2258 micro_sub( &r
[1], &r
[1], &r
[2] );
2259 micro_mul( &r
[0], &r
[0], &r
[1] );
2260 micro_add(&d
[chan_index
], &r
[0], &r
[2]);
2262 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2263 STORE(&d
[chan_index
], 0, chan_index
);
2267 case TGSI_OPCODE_CND
:
2268 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2269 FETCH(&r
[0], 0, chan_index
);
2270 FETCH(&r
[1], 1, chan_index
);
2271 FETCH(&r
[2], 2, chan_index
);
2272 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2274 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2275 STORE(&d
[chan_index
], 0, chan_index
);
2279 case TGSI_OPCODE_DP2A
:
2280 FETCH( &r
[0], 0, CHAN_X
);
2281 FETCH( &r
[1], 1, CHAN_X
);
2282 micro_mul( &r
[0], &r
[0], &r
[1] );
2284 FETCH( &r
[1], 0, CHAN_Y
);
2285 FETCH( &r
[2], 1, CHAN_Y
);
2286 micro_mul( &r
[1], &r
[1], &r
[2] );
2287 micro_add( &r
[0], &r
[0], &r
[1] );
2289 FETCH( &r
[2], 2, CHAN_X
);
2290 micro_add( &r
[0], &r
[0], &r
[2] );
2292 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2293 STORE( &r
[0], 0, chan_index
);
2297 case TGSI_OPCODE_FRC
:
2298 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2299 FETCH( &r
[0], 0, chan_index
);
2300 micro_frc(&d
[chan_index
], &r
[0]);
2302 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2303 STORE(&d
[chan_index
], 0, chan_index
);
2307 case TGSI_OPCODE_CLAMP
:
2308 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2309 FETCH(&r
[0], 0, chan_index
);
2310 FETCH(&r
[1], 1, chan_index
);
2311 micro_max(&r
[0], &r
[0], &r
[1]);
2312 FETCH(&r
[1], 2, chan_index
);
2313 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2315 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2316 STORE(&d
[chan_index
], 0, chan_index
);
2320 case TGSI_OPCODE_ROUND
:
2321 case TGSI_OPCODE_ARR
:
2322 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2323 FETCH( &r
[0], 0, chan_index
);
2324 micro_rnd(&d
[chan_index
], &r
[0]);
2326 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2327 STORE(&d
[chan_index
], 0, chan_index
);
2331 case TGSI_OPCODE_EX2
:
2332 FETCH(&r
[0], 0, CHAN_X
);
2334 micro_exp2( &r
[0], &r
[0] );
2336 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2337 STORE( &r
[0], 0, chan_index
);
2341 case TGSI_OPCODE_LG2
:
2342 FETCH( &r
[0], 0, CHAN_X
);
2343 micro_lg2( &r
[0], &r
[0] );
2344 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2345 STORE( &r
[0], 0, chan_index
);
2349 case TGSI_OPCODE_POW
:
2350 FETCH(&r
[0], 0, CHAN_X
);
2351 FETCH(&r
[1], 1, CHAN_X
);
2353 micro_pow( &r
[0], &r
[0], &r
[1] );
2355 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2356 STORE( &r
[0], 0, chan_index
);
2360 case TGSI_OPCODE_XPD
:
2361 FETCH(&r
[0], 0, CHAN_Y
);
2362 FETCH(&r
[1], 1, CHAN_Z
);
2364 micro_mul( &r
[2], &r
[0], &r
[1] );
2366 FETCH(&r
[3], 0, CHAN_Z
);
2367 FETCH(&r
[4], 1, CHAN_Y
);
2369 micro_mul( &r
[5], &r
[3], &r
[4] );
2370 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2372 FETCH(&r
[2], 1, CHAN_X
);
2374 micro_mul( &r
[3], &r
[3], &r
[2] );
2376 FETCH(&r
[5], 0, CHAN_X
);
2378 micro_mul( &r
[1], &r
[1], &r
[5] );
2379 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2381 micro_mul( &r
[5], &r
[5], &r
[4] );
2382 micro_mul( &r
[0], &r
[0], &r
[2] );
2383 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2385 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2386 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2388 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2389 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2391 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2392 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2394 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2395 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2399 case TGSI_OPCODE_ABS
:
2400 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2401 FETCH(&r
[0], 0, chan_index
);
2402 micro_abs(&d
[chan_index
], &r
[0]);
2404 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2405 STORE(&d
[chan_index
], 0, chan_index
);
2409 case TGSI_OPCODE_RCC
:
2410 FETCH(&r
[0], 0, CHAN_X
);
2411 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2412 micro_float_clamp(&r
[0], &r
[0]);
2413 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2414 STORE(&r
[0], 0, chan_index
);
2418 case TGSI_OPCODE_DPH
:
2419 FETCH(&r
[0], 0, CHAN_X
);
2420 FETCH(&r
[1], 1, CHAN_X
);
2422 micro_mul( &r
[0], &r
[0], &r
[1] );
2424 FETCH(&r
[1], 0, CHAN_Y
);
2425 FETCH(&r
[2], 1, CHAN_Y
);
2427 micro_mul( &r
[1], &r
[1], &r
[2] );
2428 micro_add( &r
[0], &r
[0], &r
[1] );
2430 FETCH(&r
[1], 0, CHAN_Z
);
2431 FETCH(&r
[2], 1, CHAN_Z
);
2433 micro_mul( &r
[1], &r
[1], &r
[2] );
2434 micro_add( &r
[0], &r
[0], &r
[1] );
2436 FETCH(&r
[1], 1, CHAN_W
);
2438 micro_add( &r
[0], &r
[0], &r
[1] );
2440 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2441 STORE( &r
[0], 0, chan_index
);
2445 case TGSI_OPCODE_COS
:
2446 FETCH(&r
[0], 0, CHAN_X
);
2448 micro_cos( &r
[0], &r
[0] );
2450 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2451 STORE( &r
[0], 0, chan_index
);
2455 case TGSI_OPCODE_DDX
:
2456 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2457 FETCH( &r
[0], 0, chan_index
);
2458 micro_ddx(&d
[chan_index
], &r
[0]);
2460 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2461 STORE(&d
[chan_index
], 0, chan_index
);
2465 case TGSI_OPCODE_DDY
:
2466 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2467 FETCH( &r
[0], 0, chan_index
);
2468 micro_ddy(&d
[chan_index
], &r
[0]);
2470 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2471 STORE(&d
[chan_index
], 0, chan_index
);
2475 case TGSI_OPCODE_KILP
:
2476 exec_kilp (mach
, inst
);
2479 case TGSI_OPCODE_KIL
:
2480 exec_kil (mach
, inst
);
2483 case TGSI_OPCODE_PK2H
:
2487 case TGSI_OPCODE_PK2US
:
2491 case TGSI_OPCODE_PK4B
:
2495 case TGSI_OPCODE_PK4UB
:
2499 case TGSI_OPCODE_RFL
:
2500 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2501 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2502 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2503 /* r0 = dp3(src0, src0) */
2504 FETCH(&r
[2], 0, CHAN_X
);
2505 micro_mul(&r
[0], &r
[2], &r
[2]);
2506 FETCH(&r
[4], 0, CHAN_Y
);
2507 micro_mul(&r
[8], &r
[4], &r
[4]);
2508 micro_add(&r
[0], &r
[0], &r
[8]);
2509 FETCH(&r
[6], 0, CHAN_Z
);
2510 micro_mul(&r
[8], &r
[6], &r
[6]);
2511 micro_add(&r
[0], &r
[0], &r
[8]);
2513 /* r1 = dp3(src0, src1) */
2514 FETCH(&r
[3], 1, CHAN_X
);
2515 micro_mul(&r
[1], &r
[2], &r
[3]);
2516 FETCH(&r
[5], 1, CHAN_Y
);
2517 micro_mul(&r
[8], &r
[4], &r
[5]);
2518 micro_add(&r
[1], &r
[1], &r
[8]);
2519 FETCH(&r
[7], 1, CHAN_Z
);
2520 micro_mul(&r
[8], &r
[6], &r
[7]);
2521 micro_add(&r
[1], &r
[1], &r
[8]);
2523 /* r1 = 2 * r1 / r0 */
2524 micro_add(&r
[1], &r
[1], &r
[1]);
2525 micro_div(&r
[1], &r
[1], &r
[0]);
2527 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2528 micro_mul(&r
[2], &r
[2], &r
[1]);
2529 micro_sub(&r
[2], &r
[2], &r
[3]);
2530 STORE(&r
[2], 0, CHAN_X
);
2532 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2533 micro_mul(&r
[4], &r
[4], &r
[1]);
2534 micro_sub(&r
[4], &r
[4], &r
[5]);
2535 STORE(&r
[4], 0, CHAN_Y
);
2537 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2538 micro_mul(&r
[6], &r
[6], &r
[1]);
2539 micro_sub(&r
[6], &r
[6], &r
[7]);
2540 STORE(&r
[6], 0, CHAN_Z
);
2543 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2544 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2548 case TGSI_OPCODE_SEQ
:
2549 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2550 FETCH( &r
[0], 0, chan_index
);
2551 FETCH( &r
[1], 1, chan_index
);
2552 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2554 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2555 STORE(&d
[chan_index
], 0, chan_index
);
2559 case TGSI_OPCODE_SFL
:
2560 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2561 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2565 case TGSI_OPCODE_SGT
:
2566 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2567 FETCH( &r
[0], 0, chan_index
);
2568 FETCH( &r
[1], 1, chan_index
);
2569 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2571 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2572 STORE(&d
[chan_index
], 0, chan_index
);
2576 case TGSI_OPCODE_SIN
:
2577 FETCH( &r
[0], 0, CHAN_X
);
2578 micro_sin( &r
[0], &r
[0] );
2579 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2580 STORE( &r
[0], 0, chan_index
);
2584 case TGSI_OPCODE_SLE
:
2585 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2586 FETCH( &r
[0], 0, chan_index
);
2587 FETCH( &r
[1], 1, chan_index
);
2588 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2590 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2591 STORE(&d
[chan_index
], 0, chan_index
);
2595 case TGSI_OPCODE_SNE
:
2596 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2597 FETCH( &r
[0], 0, chan_index
);
2598 FETCH( &r
[1], 1, chan_index
);
2599 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2601 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2602 STORE(&d
[chan_index
], 0, chan_index
);
2606 case TGSI_OPCODE_STR
:
2607 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2608 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2612 case TGSI_OPCODE_TEX
:
2613 /* simple texture lookup */
2614 /* src[0] = texcoord */
2615 /* src[1] = sampler unit */
2616 exec_tex(mach
, inst
, FALSE
, FALSE
);
2619 case TGSI_OPCODE_TXB
:
2620 /* Texture lookup with lod bias */
2621 /* src[0] = texcoord (src[0].w = LOD bias) */
2622 /* src[1] = sampler unit */
2623 exec_tex(mach
, inst
, TRUE
, FALSE
);
2626 case TGSI_OPCODE_TXD
:
2627 /* Texture lookup with explict partial derivatives */
2628 /* src[0] = texcoord */
2629 /* src[1] = d[strq]/dx */
2630 /* src[2] = d[strq]/dy */
2631 /* src[3] = sampler unit */
2632 exec_txd(mach
, inst
);
2635 case TGSI_OPCODE_TXL
:
2636 /* Texture lookup with explit LOD */
2637 /* src[0] = texcoord (src[0].w = LOD) */
2638 /* src[1] = sampler unit */
2639 exec_tex(mach
, inst
, TRUE
, FALSE
);
2642 case TGSI_OPCODE_TXP
:
2643 /* Texture lookup with projection */
2644 /* src[0] = texcoord (src[0].w = projection) */
2645 /* src[1] = sampler unit */
2646 exec_tex(mach
, inst
, FALSE
, TRUE
);
2649 case TGSI_OPCODE_UP2H
:
2653 case TGSI_OPCODE_UP2US
:
2657 case TGSI_OPCODE_UP4B
:
2661 case TGSI_OPCODE_UP4UB
:
2665 case TGSI_OPCODE_X2D
:
2666 FETCH(&r
[0], 1, CHAN_X
);
2667 FETCH(&r
[1], 1, CHAN_Y
);
2668 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2669 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2670 FETCH(&r
[2], 2, CHAN_X
);
2671 micro_mul(&r
[2], &r
[2], &r
[0]);
2672 FETCH(&r
[3], 2, CHAN_Y
);
2673 micro_mul(&r
[3], &r
[3], &r
[1]);
2674 micro_add(&r
[2], &r
[2], &r
[3]);
2675 FETCH(&r
[3], 0, CHAN_X
);
2676 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2679 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2680 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2681 FETCH(&r
[2], 2, CHAN_Z
);
2682 micro_mul(&r
[2], &r
[2], &r
[0]);
2683 FETCH(&r
[3], 2, CHAN_W
);
2684 micro_mul(&r
[3], &r
[3], &r
[1]);
2685 micro_add(&r
[2], &r
[2], &r
[3]);
2686 FETCH(&r
[3], 0, CHAN_Y
);
2687 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2690 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2691 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2693 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2694 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2696 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2697 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2699 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2700 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2704 case TGSI_OPCODE_ARA
:
2708 case TGSI_OPCODE_BRA
:
2712 case TGSI_OPCODE_CAL
:
2713 /* skip the call if no execution channels are enabled */
2714 if (mach
->ExecMask
) {
2717 /* First, record the depths of the execution stacks.
2718 * This is important for deeply nested/looped return statements.
2719 * We have to unwind the stacks by the correct amount. For a
2720 * real code generator, we could determine the number of entries
2721 * to pop off each stack with simple static analysis and avoid
2722 * implementing this data structure at run time.
2724 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2725 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2726 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2727 /* note that PC was already incremented above */
2728 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2730 mach
->CallStackTop
++;
2732 /* Second, push the Cond, Loop, Cont, Func stacks */
2733 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2734 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2735 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2736 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2737 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2738 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2739 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2740 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2742 /* Finally, jump to the subroutine */
2743 *pc
= inst
->Label
.Label
;
2747 case TGSI_OPCODE_RET
:
2748 mach
->FuncMask
&= ~mach
->ExecMask
;
2749 UPDATE_EXEC_MASK(mach
);
2751 if (mach
->FuncMask
== 0x0) {
2752 /* really return now (otherwise, keep executing */
2754 if (mach
->CallStackTop
== 0) {
2755 /* returning from main() */
2760 assert(mach
->CallStackTop
> 0);
2761 mach
->CallStackTop
--;
2763 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
2764 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
2766 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
2767 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
2769 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
2770 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
2772 assert(mach
->FuncStackTop
> 0);
2773 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2775 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
2777 UPDATE_EXEC_MASK(mach
);
2781 case TGSI_OPCODE_SSG
:
2782 /* TGSI_OPCODE_SGN */
2783 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2784 FETCH( &r
[0], 0, chan_index
);
2785 micro_sgn(&d
[chan_index
], &r
[0]);
2787 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2788 STORE(&d
[chan_index
], 0, chan_index
);
2792 case TGSI_OPCODE_CMP
:
2793 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2794 FETCH(&r
[0], 0, chan_index
);
2795 FETCH(&r
[1], 1, chan_index
);
2796 FETCH(&r
[2], 2, chan_index
);
2797 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
2799 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2800 STORE(&d
[chan_index
], 0, chan_index
);
2804 case TGSI_OPCODE_SCS
:
2805 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2806 FETCH( &r
[0], 0, CHAN_X
);
2807 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2808 micro_cos(&r
[1], &r
[0]);
2809 STORE(&r
[1], 0, CHAN_X
);
2811 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2812 micro_sin(&r
[1], &r
[0]);
2813 STORE(&r
[1], 0, CHAN_Y
);
2816 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2817 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2819 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2820 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2824 case TGSI_OPCODE_NRM
:
2825 /* 3-component vector normalize */
2826 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2827 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2828 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2829 /* r3 = sqrt(dp3(src0, src0)) */
2830 FETCH(&r
[0], 0, CHAN_X
);
2831 micro_mul(&r
[3], &r
[0], &r
[0]);
2832 FETCH(&r
[1], 0, CHAN_Y
);
2833 micro_mul(&r
[4], &r
[1], &r
[1]);
2834 micro_add(&r
[3], &r
[3], &r
[4]);
2835 FETCH(&r
[2], 0, CHAN_Z
);
2836 micro_mul(&r
[4], &r
[2], &r
[2]);
2837 micro_add(&r
[3], &r
[3], &r
[4]);
2838 micro_sqrt(&r
[3], &r
[3]);
2840 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2841 micro_div(&r
[0], &r
[0], &r
[3]);
2842 STORE(&r
[0], 0, CHAN_X
);
2844 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2845 micro_div(&r
[1], &r
[1], &r
[3]);
2846 STORE(&r
[1], 0, CHAN_Y
);
2848 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2849 micro_div(&r
[2], &r
[2], &r
[3]);
2850 STORE(&r
[2], 0, CHAN_Z
);
2853 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2854 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2858 case TGSI_OPCODE_NRM4
:
2859 /* 4-component vector normalize */
2861 union tgsi_exec_channel tmp
, dot
;
2863 /* tmp = dp4(src0, src0): */
2864 FETCH( &r
[0], 0, CHAN_X
);
2865 micro_mul( &tmp
, &r
[0], &r
[0] );
2867 FETCH( &r
[1], 0, CHAN_Y
);
2868 micro_mul( &dot
, &r
[1], &r
[1] );
2869 micro_add( &tmp
, &tmp
, &dot
);
2871 FETCH( &r
[2], 0, CHAN_Z
);
2872 micro_mul( &dot
, &r
[2], &r
[2] );
2873 micro_add( &tmp
, &tmp
, &dot
);
2875 FETCH( &r
[3], 0, CHAN_W
);
2876 micro_mul( &dot
, &r
[3], &r
[3] );
2877 micro_add( &tmp
, &tmp
, &dot
);
2879 /* tmp = 1 / sqrt(tmp) */
2880 micro_sqrt( &tmp
, &tmp
);
2881 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
2883 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2884 /* chan = chan * tmp */
2885 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
2886 STORE( &r
[chan_index
], 0, chan_index
);
2891 case TGSI_OPCODE_DIV
:
2895 case TGSI_OPCODE_DP2
:
2896 FETCH( &r
[0], 0, CHAN_X
);
2897 FETCH( &r
[1], 1, CHAN_X
);
2898 micro_mul( &r
[0], &r
[0], &r
[1] );
2900 FETCH( &r
[1], 0, CHAN_Y
);
2901 FETCH( &r
[2], 1, CHAN_Y
);
2902 micro_mul( &r
[1], &r
[1], &r
[2] );
2903 micro_add( &r
[0], &r
[0], &r
[1] );
2905 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2906 STORE( &r
[0], 0, chan_index
);
2910 case TGSI_OPCODE_IF
:
2912 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2913 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2914 FETCH( &r
[0], 0, CHAN_X
);
2915 /* update CondMask */
2917 mach
->CondMask
&= ~0x1;
2920 mach
->CondMask
&= ~0x2;
2923 mach
->CondMask
&= ~0x4;
2926 mach
->CondMask
&= ~0x8;
2928 UPDATE_EXEC_MASK(mach
);
2929 /* Todo: If CondMask==0, jump to ELSE */
2932 case TGSI_OPCODE_ELSE
:
2933 /* invert CondMask wrt previous mask */
2936 assert(mach
->CondStackTop
> 0);
2937 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2938 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2939 UPDATE_EXEC_MASK(mach
);
2940 /* Todo: If CondMask==0, jump to ENDIF */
2944 case TGSI_OPCODE_ENDIF
:
2946 assert(mach
->CondStackTop
> 0);
2947 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2948 UPDATE_EXEC_MASK(mach
);
2951 case TGSI_OPCODE_END
:
2952 /* halt execution */
2956 case TGSI_OPCODE_REP
:
2960 case TGSI_OPCODE_ENDREP
:
2964 case TGSI_OPCODE_PUSHA
:
2968 case TGSI_OPCODE_POPA
:
2972 case TGSI_OPCODE_CEIL
:
2973 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2974 FETCH( &r
[0], 0, chan_index
);
2975 micro_ceil(&d
[chan_index
], &r
[0]);
2977 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2978 STORE(&d
[chan_index
], 0, chan_index
);
2982 case TGSI_OPCODE_I2F
:
2983 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2984 FETCH( &r
[0], 0, chan_index
);
2985 micro_i2f(&d
[chan_index
], &r
[0]);
2987 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2988 STORE(&d
[chan_index
], 0, chan_index
);
2992 case TGSI_OPCODE_NOT
:
2993 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2994 FETCH( &r
[0], 0, chan_index
);
2995 micro_not(&d
[chan_index
], &r
[0]);
2997 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2998 STORE(&d
[chan_index
], 0, chan_index
);
3002 case TGSI_OPCODE_TRUNC
:
3003 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3004 FETCH( &r
[0], 0, chan_index
);
3005 micro_trunc(&d
[chan_index
], &r
[0]);
3007 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3008 STORE(&d
[chan_index
], 0, chan_index
);
3012 case TGSI_OPCODE_SHL
:
3013 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3014 FETCH( &r
[0], 0, chan_index
);
3015 FETCH( &r
[1], 1, chan_index
);
3016 micro_shl(&d
[chan_index
], &r
[0], &r
[1]);
3018 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3019 STORE(&d
[chan_index
], 0, chan_index
);
3023 case TGSI_OPCODE_SHR
:
3024 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3025 FETCH( &r
[0], 0, chan_index
);
3026 FETCH( &r
[1], 1, chan_index
);
3027 micro_ishr(&d
[chan_index
], &r
[0], &r
[1]);
3029 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3030 STORE(&d
[chan_index
], 0, chan_index
);
3034 case TGSI_OPCODE_AND
:
3035 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3036 FETCH( &r
[0], 0, chan_index
);
3037 FETCH( &r
[1], 1, chan_index
);
3038 micro_and(&d
[chan_index
], &r
[0], &r
[1]);
3040 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3041 STORE(&d
[chan_index
], 0, chan_index
);
3045 case TGSI_OPCODE_OR
:
3046 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3047 FETCH( &r
[0], 0, chan_index
);
3048 FETCH( &r
[1], 1, chan_index
);
3049 micro_or(&d
[chan_index
], &r
[0], &r
[1]);
3051 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3052 STORE(&d
[chan_index
], 0, chan_index
);
3056 case TGSI_OPCODE_MOD
:
3060 case TGSI_OPCODE_XOR
:
3061 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3062 FETCH( &r
[0], 0, chan_index
);
3063 FETCH( &r
[1], 1, chan_index
);
3064 micro_xor(&d
[chan_index
], &r
[0], &r
[1]);
3066 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3067 STORE(&d
[chan_index
], 0, chan_index
);
3071 case TGSI_OPCODE_SAD
:
3075 case TGSI_OPCODE_TXF
:
3079 case TGSI_OPCODE_TXQ
:
3083 case TGSI_OPCODE_EMIT
:
3084 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
3085 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
3088 case TGSI_OPCODE_ENDPRIM
:
3089 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
3090 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
3093 case TGSI_OPCODE_BGNFOR
:
3094 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3095 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3096 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3098 ++mach
->LoopCounterStackTop
;
3099 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3100 /* update LoopMask */
3101 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3102 mach
->LoopMask
&= ~0x1;
3104 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3105 mach
->LoopMask
&= ~0x2;
3107 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3108 mach
->LoopMask
&= ~0x4;
3110 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3111 mach
->LoopMask
&= ~0x8;
3113 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3114 UPDATE_EXEC_MASK(mach
);
3115 /* fall-through (for now) */
3116 case TGSI_OPCODE_BGNLOOP
:
3117 /* push LoopMask and ContMasks */
3118 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3119 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3120 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3121 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3122 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3123 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3126 case TGSI_OPCODE_ENDFOR
:
3127 assert(mach
->LoopCounterStackTop
> 0);
3128 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3129 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3130 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3131 /* update LoopMask */
3132 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3133 mach
->LoopMask
&= ~0x1;
3135 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3136 mach
->LoopMask
&= ~0x2;
3138 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3139 mach
->LoopMask
&= ~0x4;
3141 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3142 mach
->LoopMask
&= ~0x8;
3144 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3145 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3146 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3147 assert(mach
->LoopLabelStackTop
> 0);
3148 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3149 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3150 /* Restore ContMask, but don't pop */
3151 assert(mach
->ContStackTop
> 0);
3152 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3153 UPDATE_EXEC_MASK(mach
);
3154 if (mach
->ExecMask
) {
3155 /* repeat loop: jump to instruction just past BGNLOOP */
3156 assert(mach
->LoopLabelStackTop
> 0);
3157 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3160 /* exit loop: pop LoopMask */
3161 assert(mach
->LoopStackTop
> 0);
3162 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3164 assert(mach
->ContStackTop
> 0);
3165 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3166 assert(mach
->LoopLabelStackTop
> 0);
3167 --mach
->LoopLabelStackTop
;
3168 assert(mach
->LoopCounterStackTop
> 0);
3169 --mach
->LoopCounterStackTop
;
3171 UPDATE_EXEC_MASK(mach
);
3174 case TGSI_OPCODE_ENDLOOP
:
3175 /* Restore ContMask, but don't pop */
3176 assert(mach
->ContStackTop
> 0);
3177 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3178 UPDATE_EXEC_MASK(mach
);
3179 if (mach
->ExecMask
) {
3180 /* repeat loop: jump to instruction just past BGNLOOP */
3181 assert(mach
->LoopLabelStackTop
> 0);
3182 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3185 /* exit loop: pop LoopMask */
3186 assert(mach
->LoopStackTop
> 0);
3187 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3189 assert(mach
->ContStackTop
> 0);
3190 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3191 assert(mach
->LoopLabelStackTop
> 0);
3192 --mach
->LoopLabelStackTop
;
3194 UPDATE_EXEC_MASK(mach
);
3197 case TGSI_OPCODE_BRK
:
3198 /* turn off loop channels for each enabled exec channel */
3199 mach
->LoopMask
&= ~mach
->ExecMask
;
3200 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3201 UPDATE_EXEC_MASK(mach
);
3204 case TGSI_OPCODE_CONT
:
3205 /* turn off cont channels for each enabled exec channel */
3206 mach
->ContMask
&= ~mach
->ExecMask
;
3207 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3208 UPDATE_EXEC_MASK(mach
);
3211 case TGSI_OPCODE_BGNSUB
:
3215 case TGSI_OPCODE_ENDSUB
:
3219 case TGSI_OPCODE_NOP
:
3227 #define DEBUG_EXECUTION 0
3231 * Run TGSI interpreter.
3232 * \return bitmask of "alive" quad components
3235 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3240 mach
->CondMask
= 0xf;
3241 mach
->LoopMask
= 0xf;
3242 mach
->ContMask
= 0xf;
3243 mach
->FuncMask
= 0xf;
3244 mach
->ExecMask
= 0xf;
3246 assert(mach
->CondStackTop
== 0);
3247 assert(mach
->LoopStackTop
== 0);
3248 assert(mach
->ContStackTop
== 0);
3249 assert(mach
->CallStackTop
== 0);
3251 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3252 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3254 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3255 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3256 mach
->Primitives
[0] = 0;
3259 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3260 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3261 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3262 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3263 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3264 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3267 /* execute declarations (interpolants) */
3268 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3269 exec_declaration( mach
, mach
->Declarations
+i
);
3274 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3275 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3278 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3279 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3282 /* execute instructions, until pc is set to -1 */
3288 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3291 assert(pc
< (int) mach
->NumInstructions
);
3292 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3295 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3296 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3299 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3300 debug_printf("TEMP[%2u] = ", i
);
3301 for (j
= 0; j
< 4; j
++) {
3305 debug_printf("(%6f, %6f, %6f, %6f)\n",
3306 temps
[i
].xyzw
[0].f
[j
],
3307 temps
[i
].xyzw
[1].f
[j
],
3308 temps
[i
].xyzw
[2].f
[j
],
3309 temps
[i
].xyzw
[3].f
[j
]);
3313 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3314 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3317 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3318 debug_printf("OUT[%2u] = ", i
);
3319 for (j
= 0; j
< 4; j
++) {
3323 debug_printf("{%6f, %6f, %6f, %6f}\n",
3324 outputs
[i
].xyzw
[0].f
[j
],
3325 outputs
[i
].xyzw
[1].f
[j
],
3326 outputs
[i
].xyzw
[2].f
[j
],
3327 outputs
[i
].xyzw
[3].f
[j
]);
3336 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3337 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3339 * Scale back depth component.
3341 for (i
= 0; i
< 4; i
++)
3342 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3346 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];