1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129 static const union tgsi_exec_channel ZeroVec
=
130 { { 0.0, 0.0, 0.0, 0.0 } };
135 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
137 assert(!util_is_inf_or_nan(chan
->f
[0]));
138 assert(!util_is_inf_or_nan(chan
->f
[1]));
139 assert(!util_is_inf_or_nan(chan
->f
[2]));
140 assert(!util_is_inf_or_nan(chan
->f
[3]));
147 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
157 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
159 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
161 debug_printf("Temp[%u] =\n", index
);
162 for (i
= 0; i
< 4; i
++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
179 * This would expand into:
184 * The second instruction will have the wrong value for t0 if executed as-is.
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
191 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
192 if (writemask
== TGSI_WRITEMASK_X
||
193 writemask
== TGSI_WRITEMASK_Y
||
194 writemask
== TGSI_WRITEMASK_Z
||
195 writemask
== TGSI_WRITEMASK_W
||
196 writemask
== TGSI_WRITEMASK_NONE
) {
197 /* no chance of data dependency */
201 /* loop over src regs */
202 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
203 if ((inst
->Src
[i
].Register
.File
==
204 inst
->Dst
[0].Register
.File
) &&
205 (inst
->Src
[i
].Register
.Index
==
206 inst
->Dst
[0].Register
.Index
)) {
207 /* loop over dest channels */
208 uint channelsWritten
= 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
212 if (channelsWritten
& (1 << swizzle
)) {
216 channelsWritten
|= (1 << chan
);
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine
*mach
,
232 const struct tgsi_token
*tokens
,
234 struct tgsi_sampler
**samplers
)
237 struct tgsi_parse_context parse
;
238 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
239 struct tgsi_full_instruction
*instructions
;
240 struct tgsi_full_declaration
*declarations
;
241 uint maxInstructions
= 10, numInstructions
= 0;
242 uint maxDeclarations
= 10, numDeclarations
= 0;
246 tgsi_dump(tokens
, 0);
251 mach
->Tokens
= tokens
;
252 mach
->Samplers
= samplers
;
254 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
255 if (k
!= TGSI_PARSE_OK
) {
256 debug_printf( "Problem parsing!\n" );
260 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
264 declarations
= (struct tgsi_full_declaration
*)
265 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
271 instructions
= (struct tgsi_full_instruction
*)
272 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
275 FREE( declarations
);
279 while( !tgsi_parse_end_of_tokens( &parse
) ) {
280 uint pointer
= parse
.Position
;
283 tgsi_parse_token( &parse
);
284 switch( parse
.FullToken
.Token
.Type
) {
285 case TGSI_TOKEN_TYPE_DECLARATION
:
286 /* save expanded declaration */
287 if (numDeclarations
== maxDeclarations
) {
288 declarations
= REALLOC(declarations
,
290 * sizeof(struct tgsi_full_declaration
),
291 (maxDeclarations
+ 10)
292 * sizeof(struct tgsi_full_declaration
));
293 maxDeclarations
+= 10;
295 memcpy(declarations
+ numDeclarations
,
296 &parse
.FullToken
.FullDeclaration
,
297 sizeof(declarations
[0]));
301 case TGSI_TOKEN_TYPE_IMMEDIATE
:
303 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
305 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
307 for( i
= 0; i
< size
; i
++ ) {
308 mach
->Imms
[mach
->ImmLimit
][i
] =
309 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
315 case TGSI_TOKEN_TYPE_INSTRUCTION
:
316 assert( labels
->count
< MAX_LABELS
);
318 labels
->labels
[labels
->count
][0] = instno
;
319 labels
->labels
[labels
->count
][1] = pointer
;
322 /* save expanded instruction */
323 if (numInstructions
== maxInstructions
) {
324 instructions
= REALLOC(instructions
,
326 * sizeof(struct tgsi_full_instruction
),
327 (maxInstructions
+ 10)
328 * sizeof(struct tgsi_full_instruction
));
329 maxInstructions
+= 10;
332 memcpy(instructions
+ numInstructions
,
333 &parse
.FullToken
.FullInstruction
,
334 sizeof(instructions
[0]));
339 case TGSI_TOKEN_TYPE_PROPERTY
:
346 tgsi_parse_free (&parse
);
348 if (mach
->Declarations
) {
349 FREE( mach
->Declarations
);
351 mach
->Declarations
= declarations
;
352 mach
->NumDeclarations
= numDeclarations
;
354 if (mach
->Instructions
) {
355 FREE( mach
->Instructions
);
357 mach
->Instructions
= instructions
;
358 mach
->NumInstructions
= numInstructions
;
362 struct tgsi_exec_machine
*
363 tgsi_exec_machine_create( void )
365 struct tgsi_exec_machine
*mach
;
368 mach
= align_malloc( sizeof *mach
, 16 );
372 memset(mach
, 0, sizeof(*mach
));
374 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
375 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
377 /* Setup constants. */
378 for( i
= 0; i
< 4; i
++ ) {
379 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
380 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
381 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
382 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
383 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
384 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
385 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
386 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
387 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
388 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
392 /* silence warnings */
406 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
409 FREE(mach
->Instructions
);
410 FREE(mach
->Declarations
);
419 union tgsi_exec_channel
*dst
,
420 const union tgsi_exec_channel
*src
)
422 dst
->f
[0] = fabsf( src
->f
[0] );
423 dst
->f
[1] = fabsf( src
->f
[1] );
424 dst
->f
[2] = fabsf( src
->f
[2] );
425 dst
->f
[3] = fabsf( src
->f
[3] );
430 union tgsi_exec_channel
*dst
,
431 const union tgsi_exec_channel
*src0
,
432 const union tgsi_exec_channel
*src1
)
434 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
435 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
436 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
437 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
443 union tgsi_exec_channel
*dst
,
444 const union tgsi_exec_channel
*src0
,
445 const union tgsi_exec_channel
*src1
)
447 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
448 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
449 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
450 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
456 union tgsi_exec_channel
*dst
,
457 const union tgsi_exec_channel
*src0
,
458 const union tgsi_exec_channel
*src1
)
460 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
461 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
462 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
463 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
468 union tgsi_exec_channel
*dst
,
469 const union tgsi_exec_channel
*src
)
471 dst
->f
[0] = ceilf( src
->f
[0] );
472 dst
->f
[1] = ceilf( src
->f
[1] );
473 dst
->f
[2] = ceilf( src
->f
[2] );
474 dst
->f
[3] = ceilf( src
->f
[3] );
479 union tgsi_exec_channel
*dst
,
480 const union tgsi_exec_channel
*src
)
482 dst
->f
[0] = cosf( src
->f
[0] );
483 dst
->f
[1] = cosf( src
->f
[1] );
484 dst
->f
[2] = cosf( src
->f
[2] );
485 dst
->f
[3] = cosf( src
->f
[3] );
490 union tgsi_exec_channel
*dst
,
491 const union tgsi_exec_channel
*src
)
496 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
501 union tgsi_exec_channel
*dst
,
502 const union tgsi_exec_channel
*src
)
507 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
512 union tgsi_exec_channel
*dst
,
513 const union tgsi_exec_channel
*src0
,
514 const union tgsi_exec_channel
*src1
)
516 if (src1
->f
[0] != 0) {
517 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
519 if (src1
->f
[1] != 0) {
520 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
522 if (src1
->f
[2] != 0) {
523 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
525 if (src1
->f
[3] != 0) {
526 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
533 union tgsi_exec_channel
*dst
,
534 const union tgsi_exec_channel
*src0
,
535 const union tgsi_exec_channel
*src1
)
537 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
538 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
539 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
540 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
546 union tgsi_exec_channel
*dst
,
547 const union tgsi_exec_channel
*src0
,
548 const union tgsi_exec_channel
*src1
,
549 const union tgsi_exec_channel
*src2
,
550 const union tgsi_exec_channel
*src3
)
552 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
553 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
554 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
555 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
561 union tgsi_exec_channel
*dst
,
562 const union tgsi_exec_channel
*src0
,
563 const union tgsi_exec_channel
*src1
,
564 const union tgsi_exec_channel
*src2
,
565 const union tgsi_exec_channel
*src3
)
567 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
568 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
569 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
570 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
576 union tgsi_exec_channel
*dst
,
577 const union tgsi_exec_channel
*src
)
580 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
581 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
582 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
583 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
587 /* Inf is okay for this instruction, so clamp it to silence assertions. */
589 union tgsi_exec_channel clamped
;
591 for (i
= 0; i
< 4; i
++) {
592 if (src
->f
[i
] > 127.99999f
) {
593 clamped
.f
[i
] = 127.99999f
;
594 } else if (src
->f
[i
] < -126.99999f
) {
595 clamped
.f
[i
] = -126.99999f
;
597 clamped
.f
[i
] = src
->f
[i
];
603 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
604 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
605 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
606 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
613 union tgsi_exec_channel
*dst
,
614 const union tgsi_exec_channel
*src
)
616 dst
->u
[0] = (uint
) src
->f
[0];
617 dst
->u
[1] = (uint
) src
->f
[1];
618 dst
->u
[2] = (uint
) src
->f
[2];
619 dst
->u
[3] = (uint
) src
->f
[3];
624 micro_float_clamp(union tgsi_exec_channel
*dst
,
625 const union tgsi_exec_channel
*src
)
629 for (i
= 0; i
< 4; i
++) {
630 if (src
->f
[i
] > 0.0f
) {
631 if (src
->f
[i
] > 1.884467e+019f
)
632 dst
->f
[i
] = 1.884467e+019f
;
633 else if (src
->f
[i
] < 5.42101e-020f
)
634 dst
->f
[i
] = 5.42101e-020f
;
636 dst
->f
[i
] = src
->f
[i
];
639 if (src
->f
[i
] < -1.884467e+019f
)
640 dst
->f
[i
] = -1.884467e+019f
;
641 else if (src
->f
[i
] > -5.42101e-020f
)
642 dst
->f
[i
] = -5.42101e-020f
;
644 dst
->f
[i
] = src
->f
[i
];
651 union tgsi_exec_channel
*dst
,
652 const union tgsi_exec_channel
*src
)
654 dst
->f
[0] = floorf( src
->f
[0] );
655 dst
->f
[1] = floorf( src
->f
[1] );
656 dst
->f
[2] = floorf( src
->f
[2] );
657 dst
->f
[3] = floorf( src
->f
[3] );
662 union tgsi_exec_channel
*dst
,
663 const union tgsi_exec_channel
*src
)
665 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
666 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
667 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
668 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
673 union tgsi_exec_channel
*dst
,
674 const union tgsi_exec_channel
*src
)
676 dst
->f
[0] = (float) src
->i
[0];
677 dst
->f
[1] = (float) src
->i
[1];
678 dst
->f
[2] = (float) src
->i
[2];
679 dst
->f
[3] = (float) src
->i
[3];
684 union tgsi_exec_channel
*dst
,
685 const union tgsi_exec_channel
*src
)
688 dst
->f
[0] = util_fast_log2( src
->f
[0] );
689 dst
->f
[1] = util_fast_log2( src
->f
[1] );
690 dst
->f
[2] = util_fast_log2( src
->f
[2] );
691 dst
->f
[3] = util_fast_log2( src
->f
[3] );
693 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
694 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
695 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
696 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
702 union tgsi_exec_channel
*dst
,
703 const union tgsi_exec_channel
*src0
,
704 const union tgsi_exec_channel
*src1
,
705 const union tgsi_exec_channel
*src2
,
706 const union tgsi_exec_channel
*src3
)
708 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
709 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
710 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
711 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
716 union tgsi_exec_channel
*dst
,
717 const union tgsi_exec_channel
*src0
,
718 const union tgsi_exec_channel
*src1
,
719 const union tgsi_exec_channel
*src2
,
720 const union tgsi_exec_channel
*src3
)
722 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
723 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
724 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
725 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
731 union tgsi_exec_channel
*dst
,
732 const union tgsi_exec_channel
*src0
,
733 const union tgsi_exec_channel
*src1
,
734 const union tgsi_exec_channel
*src2
,
735 const union tgsi_exec_channel
*src3
)
737 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
738 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
739 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
740 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
747 union tgsi_exec_channel
*dst
,
748 const union tgsi_exec_channel
*src0
,
749 const union tgsi_exec_channel
*src1
,
750 const union tgsi_exec_channel
*src2
,
751 const union tgsi_exec_channel
*src3
)
753 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
754 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
755 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
756 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
762 union tgsi_exec_channel
*dst
,
763 const union tgsi_exec_channel
*src0
,
764 const union tgsi_exec_channel
*src1
)
766 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
767 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
768 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
769 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
775 union tgsi_exec_channel
*dst
,
776 const union tgsi_exec_channel
*src0
,
777 const union tgsi_exec_channel
*src1
)
779 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
780 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
781 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
782 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
789 union tgsi_exec_channel
*dst
,
790 const union tgsi_exec_channel
*src0
,
791 const union tgsi_exec_channel
*src1
)
793 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
794 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
795 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
796 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
802 union tgsi_exec_channel
*dst
,
803 const union tgsi_exec_channel
*src0
,
804 const union tgsi_exec_channel
*src1
)
806 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
807 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
808 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
809 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
815 union tgsi_exec_channel
*dst
,
816 const union tgsi_exec_channel
*src0
,
817 const union tgsi_exec_channel
*src1
)
819 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
820 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
821 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
822 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
829 union tgsi_exec_channel
*dst
,
830 const union tgsi_exec_channel
*src0
,
831 const union tgsi_exec_channel
*src1
)
833 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
834 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
835 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
836 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
843 union tgsi_exec_channel
*dst
,
844 const union tgsi_exec_channel
*src0
,
845 const union tgsi_exec_channel
*src1
)
847 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
848 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
849 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
850 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
856 union tgsi_exec_channel
*dst
,
857 const union tgsi_exec_channel
*src0
,
858 const union tgsi_exec_channel
*src1
)
860 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
861 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
862 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
863 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
869 union tgsi_exec_channel
*dst
,
870 const union tgsi_exec_channel
*src0
,
871 const union tgsi_exec_channel
*src1
)
873 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
874 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
875 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
876 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
883 union tgsi_exec_channel
*dst0
,
884 union tgsi_exec_channel
*dst1
,
885 const union tgsi_exec_channel
*src0
,
886 const union tgsi_exec_channel
*src1
)
888 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
889 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
890 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
891 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
902 union tgsi_exec_channel
*dst0
,
903 union tgsi_exec_channel
*dst1
,
904 const union tgsi_exec_channel
*src0
,
905 const union tgsi_exec_channel
*src1
)
907 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
908 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
909 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
910 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
922 union tgsi_exec_channel
*dst
,
923 const union tgsi_exec_channel
*src0
,
924 const union tgsi_exec_channel
*src1
,
925 const union tgsi_exec_channel
*src2
)
927 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
928 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
929 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
930 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
936 union tgsi_exec_channel
*dst
,
937 const union tgsi_exec_channel
*src
)
939 dst
->f
[0] = -src
->f
[0];
940 dst
->f
[1] = -src
->f
[1];
941 dst
->f
[2] = -src
->f
[2];
942 dst
->f
[3] = -src
->f
[3];
948 union tgsi_exec_channel
*dst
,
949 const union tgsi_exec_channel
*src
)
951 dst
->i
[0] = -src
->i
[0];
952 dst
->i
[1] = -src
->i
[1];
953 dst
->i
[2] = -src
->i
[2];
954 dst
->i
[3] = -src
->i
[3];
960 union tgsi_exec_channel
*dst
,
961 const union tgsi_exec_channel
*src
)
963 dst
->u
[0] = ~src
->u
[0];
964 dst
->u
[1] = ~src
->u
[1];
965 dst
->u
[2] = ~src
->u
[2];
966 dst
->u
[3] = ~src
->u
[3];
971 union tgsi_exec_channel
*dst
,
972 const union tgsi_exec_channel
*src0
,
973 const union tgsi_exec_channel
*src1
)
975 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
976 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
977 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
978 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
983 union tgsi_exec_channel
*dst
,
984 const union tgsi_exec_channel
*src0
,
985 const union tgsi_exec_channel
*src1
)
988 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
989 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
990 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
991 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
993 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
994 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
995 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
996 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
1002 union tgsi_exec_channel
*dst
,
1003 const union tgsi_exec_channel
*src
)
1005 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
1006 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
1007 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
1008 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
1013 union tgsi_exec_channel
*dst
,
1014 const union tgsi_exec_channel
*src
)
1016 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
1017 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
1018 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
1019 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
1024 union tgsi_exec_channel
*dst
,
1025 const union tgsi_exec_channel
*src0
,
1026 const union tgsi_exec_channel
*src1
)
1028 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
1029 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
1030 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
1031 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
1036 union tgsi_exec_channel
*dst
,
1037 const union tgsi_exec_channel
*src0
,
1038 const union tgsi_exec_channel
*src1
)
1040 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
1041 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
1042 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
1043 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
1048 union tgsi_exec_channel
*dst
,
1049 const union tgsi_exec_channel
*src0
)
1051 dst
->f
[0] = (float) (int) src0
->f
[0];
1052 dst
->f
[1] = (float) (int) src0
->f
[1];
1053 dst
->f
[2] = (float) (int) src0
->f
[2];
1054 dst
->f
[3] = (float) (int) src0
->f
[3];
1060 union tgsi_exec_channel
*dst
,
1061 const union tgsi_exec_channel
*src0
,
1062 const union tgsi_exec_channel
*src1
)
1064 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
1065 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
1066 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
1067 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
1073 union tgsi_exec_channel
*dst
,
1074 const union tgsi_exec_channel
*src
)
1076 dst
->f
[0] = sinf( src
->f
[0] );
1077 dst
->f
[1] = sinf( src
->f
[1] );
1078 dst
->f
[2] = sinf( src
->f
[2] );
1079 dst
->f
[3] = sinf( src
->f
[3] );
1083 micro_sqrt( union tgsi_exec_channel
*dst
,
1084 const union tgsi_exec_channel
*src
)
1086 dst
->f
[0] = sqrtf( src
->f
[0] );
1087 dst
->f
[1] = sqrtf( src
->f
[1] );
1088 dst
->f
[2] = sqrtf( src
->f
[2] );
1089 dst
->f
[3] = sqrtf( src
->f
[3] );
1094 union tgsi_exec_channel
*dst
,
1095 const union tgsi_exec_channel
*src0
,
1096 const union tgsi_exec_channel
*src1
)
1098 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1099 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1100 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1101 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1107 union tgsi_exec_channel
*dst
,
1108 const union tgsi_exec_channel
*src
)
1110 dst
->f
[0] = (float) src
->u
[0];
1111 dst
->f
[1] = (float) src
->u
[1];
1112 dst
->f
[2] = (float) src
->u
[2];
1113 dst
->f
[3] = (float) src
->u
[3];
1119 union tgsi_exec_channel
*dst
,
1120 const union tgsi_exec_channel
*src0
,
1121 const union tgsi_exec_channel
*src1
)
1123 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
1124 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
1125 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
1126 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
1130 fetch_src_file_channel(
1131 const struct tgsi_exec_machine
*mach
,
1134 const union tgsi_exec_channel
*index
,
1135 union tgsi_exec_channel
*chan
)
1138 case TGSI_SWIZZLE_X
:
1139 case TGSI_SWIZZLE_Y
:
1140 case TGSI_SWIZZLE_Z
:
1141 case TGSI_SWIZZLE_W
:
1143 case TGSI_FILE_CONSTANT
:
1144 assert(mach
->Consts
);
1145 if (index
->i
[0] < 0)
1148 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
1149 if (index
->i
[1] < 0)
1152 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
1153 if (index
->i
[2] < 0)
1156 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
1157 if (index
->i
[3] < 0)
1160 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
1163 case TGSI_FILE_INPUT
:
1164 case TGSI_FILE_SYSTEM_VALUE
:
1165 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1166 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1167 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1168 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1171 case TGSI_FILE_TEMPORARY
:
1172 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
1173 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1174 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1175 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1176 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1179 case TGSI_FILE_IMMEDIATE
:
1180 assert( index
->i
[0] < (int) mach
->ImmLimit
);
1181 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
1182 assert( index
->i
[1] < (int) mach
->ImmLimit
);
1183 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
1184 assert( index
->i
[2] < (int) mach
->ImmLimit
);
1185 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
1186 assert( index
->i
[3] < (int) mach
->ImmLimit
);
1187 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
1190 case TGSI_FILE_ADDRESS
:
1191 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1192 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1193 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1194 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1197 case TGSI_FILE_PREDICATE
:
1198 assert(index
->i
[0] < TGSI_EXEC_NUM_PREDS
);
1199 assert(index
->i
[1] < TGSI_EXEC_NUM_PREDS
);
1200 assert(index
->i
[2] < TGSI_EXEC_NUM_PREDS
);
1201 assert(index
->i
[3] < TGSI_EXEC_NUM_PREDS
);
1202 chan
->u
[0] = mach
->Predicates
[0].xyzw
[swizzle
].u
[0];
1203 chan
->u
[1] = mach
->Predicates
[0].xyzw
[swizzle
].u
[1];
1204 chan
->u
[2] = mach
->Predicates
[0].xyzw
[swizzle
].u
[2];
1205 chan
->u
[3] = mach
->Predicates
[0].xyzw
[swizzle
].u
[3];
1208 case TGSI_FILE_OUTPUT
:
1209 /* vertex/fragment output vars can be read too */
1210 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1211 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1212 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1213 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1228 const struct tgsi_exec_machine
*mach
,
1229 union tgsi_exec_channel
*chan
,
1230 const struct tgsi_full_src_register
*reg
,
1231 const uint chan_index
)
1233 union tgsi_exec_channel index
;
1236 /* We start with a direct index into a register file.
1240 * file = Register.File
1241 * [1] = Register.Index
1246 index
.i
[3] = reg
->Register
.Index
;
1248 /* There is an extra source register that indirectly subscripts
1249 * a register file. The direct index now becomes an offset
1250 * that is being added to the indirect register.
1254 * ind = Indirect.File
1255 * [2] = Indirect.Index
1256 * .x = Indirect.SwizzleX
1258 if (reg
->Register
.Indirect
) {
1259 union tgsi_exec_channel index2
;
1260 union tgsi_exec_channel indir_index
;
1261 const uint execmask
= mach
->ExecMask
;
1264 /* which address register (always zero now) */
1268 index2
.i
[3] = reg
->Indirect
.Index
;
1270 /* get current value of address register[swizzle] */
1271 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1272 fetch_src_file_channel(
1279 /* add value of address register to the offset */
1280 index
.i
[0] += (int) indir_index
.f
[0];
1281 index
.i
[1] += (int) indir_index
.f
[1];
1282 index
.i
[2] += (int) indir_index
.f
[2];
1283 index
.i
[3] += (int) indir_index
.f
[3];
1285 /* for disabled execution channels, zero-out the index to
1286 * avoid using a potential garbage value.
1288 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1289 if ((execmask
& (1 << i
)) == 0)
1294 /* There is an extra source register that is a second
1295 * subscript to a register file. Effectively it means that
1296 * the register file is actually a 2D array of registers.
1298 * file[1][3] == file[1*sizeof(file[1])+3],
1300 * [3] = Dimension.Index
1302 if (reg
->Register
.Dimension
) {
1303 /* The size of the first-order array depends on the register file type.
1304 * We need to multiply the index to the first array to get an effective,
1305 * "flat" index that points to the beginning of the second-order array.
1307 switch (reg
->Register
.File
) {
1308 case TGSI_FILE_INPUT
:
1309 case TGSI_FILE_SYSTEM_VALUE
:
1310 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1311 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1312 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1313 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1315 case TGSI_FILE_CONSTANT
:
1316 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1317 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1318 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1319 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1325 index
.i
[0] += reg
->Dimension
.Index
;
1326 index
.i
[1] += reg
->Dimension
.Index
;
1327 index
.i
[2] += reg
->Dimension
.Index
;
1328 index
.i
[3] += reg
->Dimension
.Index
;
1330 /* Again, the second subscript index can be addressed indirectly
1331 * identically to the first one.
1332 * Nothing stops us from indirectly addressing the indirect register,
1333 * but there is no need for that, so we won't exercise it.
1335 * file[1][ind[4].y+3],
1337 * ind = DimIndirect.File
1338 * [4] = DimIndirect.Index
1339 * .y = DimIndirect.SwizzleX
1341 if (reg
->Dimension
.Indirect
) {
1342 union tgsi_exec_channel index2
;
1343 union tgsi_exec_channel indir_index
;
1344 const uint execmask
= mach
->ExecMask
;
1350 index2
.i
[3] = reg
->DimIndirect
.Index
;
1352 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, CHAN_X
);
1353 fetch_src_file_channel(
1355 reg
->DimIndirect
.File
,
1360 index
.i
[0] += (int) indir_index
.f
[0];
1361 index
.i
[1] += (int) indir_index
.f
[1];
1362 index
.i
[2] += (int) indir_index
.f
[2];
1363 index
.i
[3] += (int) indir_index
.f
[3];
1365 /* for disabled execution channels, zero-out the index to
1366 * avoid using a potential garbage value.
1368 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1369 if ((execmask
& (1 << i
)) == 0)
1374 /* If by any chance there was a need for a 3D array of register
1375 * files, we would have to check whether Dimension is followed
1376 * by a dimension register and continue the saga.
1380 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1381 fetch_src_file_channel(
1388 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1389 case TGSI_UTIL_SIGN_CLEAR
:
1390 micro_abs( chan
, chan
);
1393 case TGSI_UTIL_SIGN_SET
:
1394 micro_abs( chan
, chan
);
1395 micro_neg( chan
, chan
);
1398 case TGSI_UTIL_SIGN_TOGGLE
:
1399 micro_neg( chan
, chan
);
1402 case TGSI_UTIL_SIGN_KEEP
:
1409 struct tgsi_exec_machine
*mach
,
1410 const union tgsi_exec_channel
*chan
,
1411 const struct tgsi_full_dst_register
*reg
,
1412 const struct tgsi_full_instruction
*inst
,
1416 union tgsi_exec_channel null
;
1417 union tgsi_exec_channel
*dst
;
1418 uint execmask
= mach
->ExecMask
;
1419 int offset
= 0; /* indirection offset */
1423 check_inf_or_nan(chan
);
1426 /* There is an extra source register that indirectly subscripts
1427 * a register file. The direct index now becomes an offset
1428 * that is being added to the indirect register.
1432 * ind = Indirect.File
1433 * [2] = Indirect.Index
1434 * .x = Indirect.SwizzleX
1436 if (reg
->Register
.Indirect
) {
1437 union tgsi_exec_channel index
;
1438 union tgsi_exec_channel indir_index
;
1441 /* which address register (always zero for now) */
1445 index
.i
[3] = reg
->Indirect
.Index
;
1447 /* get current value of address register[swizzle] */
1448 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1450 /* fetch values from the address/indirection register */
1451 fetch_src_file_channel(
1458 /* save indirection offset */
1459 offset
= (int) indir_index
.f
[0];
1462 switch (reg
->Register
.File
) {
1463 case TGSI_FILE_NULL
:
1467 case TGSI_FILE_OUTPUT
:
1468 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1469 + reg
->Register
.Index
;
1470 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1473 case TGSI_FILE_TEMPORARY
:
1474 index
= reg
->Register
.Index
;
1475 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1476 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1479 case TGSI_FILE_ADDRESS
:
1480 index
= reg
->Register
.Index
;
1481 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1484 case TGSI_FILE_LOOP
:
1485 assert(reg
->Register
.Index
== 0);
1486 assert(mach
->LoopCounterStackTop
> 0);
1487 assert(chan_index
== CHAN_X
);
1488 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1491 case TGSI_FILE_PREDICATE
:
1492 index
= reg
->Register
.Index
;
1493 assert(index
< TGSI_EXEC_NUM_PREDS
);
1494 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1502 if (inst
->Instruction
.Predicate
) {
1504 union tgsi_exec_channel
*pred
;
1506 switch (chan_index
) {
1508 swizzle
= inst
->Predicate
.SwizzleX
;
1511 swizzle
= inst
->Predicate
.SwizzleY
;
1514 swizzle
= inst
->Predicate
.SwizzleZ
;
1517 swizzle
= inst
->Predicate
.SwizzleW
;
1524 assert(inst
->Predicate
.Index
== 0);
1526 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1528 if (inst
->Predicate
.Negate
) {
1529 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1531 execmask
&= ~(1 << i
);
1535 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1537 execmask
&= ~(1 << i
);
1543 switch (inst
->Instruction
.Saturate
) {
1545 for (i
= 0; i
< QUAD_SIZE
; i
++)
1546 if (execmask
& (1 << i
))
1547 dst
->i
[i
] = chan
->i
[i
];
1550 case TGSI_SAT_ZERO_ONE
:
1551 for (i
= 0; i
< QUAD_SIZE
; i
++)
1552 if (execmask
& (1 << i
)) {
1553 if (chan
->f
[i
] < 0.0f
)
1555 else if (chan
->f
[i
] > 1.0f
)
1558 dst
->i
[i
] = chan
->i
[i
];
1562 case TGSI_SAT_MINUS_PLUS_ONE
:
1563 for (i
= 0; i
< QUAD_SIZE
; i
++)
1564 if (execmask
& (1 << i
)) {
1565 if (chan
->f
[i
] < -1.0f
)
1567 else if (chan
->f
[i
] > 1.0f
)
1570 dst
->i
[i
] = chan
->i
[i
];
1579 #define FETCH(VAL,INDEX,CHAN)\
1580 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
1582 #define STORE(VAL,INDEX,CHAN)\
1583 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
1587 * Execute ARB-style KIL which is predicated by a src register.
1588 * Kill fragment if any of the four values is less than zero.
1591 exec_kil(struct tgsi_exec_machine
*mach
,
1592 const struct tgsi_full_instruction
*inst
)
1596 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1597 union tgsi_exec_channel r
[1];
1599 /* This mask stores component bits that were already tested. */
1602 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1607 /* unswizzle channel */
1608 swizzle
= tgsi_util_get_full_src_register_swizzle (
1612 /* check if the component has not been already tested */
1613 if (uniquemask
& (1 << swizzle
))
1615 uniquemask
|= 1 << swizzle
;
1617 FETCH(&r
[0], 0, chan_index
);
1618 for (i
= 0; i
< 4; i
++)
1619 if (r
[0].f
[i
] < 0.0f
)
1623 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1627 * Execute NVIDIA-style KIL which is predicated by a condition code.
1628 * Kill fragment if the condition code is TRUE.
1631 exec_kilp(struct tgsi_exec_machine
*mach
,
1632 const struct tgsi_full_instruction
*inst
)
1634 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1636 /* "unconditional" kil */
1637 kilmask
= mach
->ExecMask
;
1638 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1643 * Fetch a four texture samples using STR texture coordinates.
1646 fetch_texel( struct tgsi_sampler
*sampler
,
1647 const union tgsi_exec_channel
*s
,
1648 const union tgsi_exec_channel
*t
,
1649 const union tgsi_exec_channel
*p
,
1650 float lodbias
, /* XXX should be float[4] */
1651 union tgsi_exec_channel
*r
,
1652 union tgsi_exec_channel
*g
,
1653 union tgsi_exec_channel
*b
,
1654 union tgsi_exec_channel
*a
)
1657 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1659 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1661 for (j
= 0; j
< 4; j
++) {
1662 r
->f
[j
] = rgba
[0][j
];
1663 g
->f
[j
] = rgba
[1][j
];
1664 b
->f
[j
] = rgba
[2][j
];
1665 a
->f
[j
] = rgba
[3][j
];
1671 exec_tex(struct tgsi_exec_machine
*mach
,
1672 const struct tgsi_full_instruction
*inst
,
1676 const uint unit
= inst
->Src
[1].Register
.Index
;
1677 union tgsi_exec_channel r
[4];
1681 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1683 switch (inst
->Texture
.Texture
) {
1684 case TGSI_TEXTURE_1D
:
1685 case TGSI_TEXTURE_SHADOW1D
:
1687 FETCH(&r
[0], 0, CHAN_X
);
1690 FETCH(&r
[1], 0, CHAN_W
);
1691 micro_div( &r
[0], &r
[0], &r
[1] );
1695 FETCH(&r
[1], 0, CHAN_W
);
1696 lodBias
= r
[2].f
[0];
1701 fetch_texel(mach
->Samplers
[unit
],
1702 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1703 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1706 case TGSI_TEXTURE_2D
:
1707 case TGSI_TEXTURE_RECT
:
1708 case TGSI_TEXTURE_SHADOW2D
:
1709 case TGSI_TEXTURE_SHADOWRECT
:
1711 FETCH(&r
[0], 0, CHAN_X
);
1712 FETCH(&r
[1], 0, CHAN_Y
);
1713 FETCH(&r
[2], 0, CHAN_Z
);
1716 FETCH(&r
[3], 0, CHAN_W
);
1717 micro_div( &r
[0], &r
[0], &r
[3] );
1718 micro_div( &r
[1], &r
[1], &r
[3] );
1719 micro_div( &r
[2], &r
[2], &r
[3] );
1723 FETCH(&r
[3], 0, CHAN_W
);
1724 lodBias
= r
[3].f
[0];
1729 fetch_texel(mach
->Samplers
[unit
],
1730 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1731 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1734 case TGSI_TEXTURE_3D
:
1735 case TGSI_TEXTURE_CUBE
:
1737 FETCH(&r
[0], 0, CHAN_X
);
1738 FETCH(&r
[1], 0, CHAN_Y
);
1739 FETCH(&r
[2], 0, CHAN_Z
);
1742 FETCH(&r
[3], 0, CHAN_W
);
1743 micro_div( &r
[0], &r
[0], &r
[3] );
1744 micro_div( &r
[1], &r
[1], &r
[3] );
1745 micro_div( &r
[2], &r
[2], &r
[3] );
1749 FETCH(&r
[3], 0, CHAN_W
);
1750 lodBias
= r
[3].f
[0];
1755 fetch_texel(mach
->Samplers
[unit
],
1756 &r
[0], &r
[1], &r
[2], lodBias
,
1757 &r
[0], &r
[1], &r
[2], &r
[3]);
1764 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1765 STORE( &r
[chan_index
], 0, chan_index
);
1770 exec_txd(struct tgsi_exec_machine
*mach
,
1771 const struct tgsi_full_instruction
*inst
)
1773 const uint unit
= inst
->Src
[3].Register
.Index
;
1774 union tgsi_exec_channel r
[4];
1778 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1781 switch (inst
->Texture
.Texture
) {
1782 case TGSI_TEXTURE_1D
:
1783 case TGSI_TEXTURE_SHADOW1D
:
1785 FETCH(&r
[0], 0, CHAN_X
);
1787 fetch_texel(mach
->Samplers
[unit
],
1788 &r
[0], &ZeroVec
, &ZeroVec
, 0.0f
, /* S, T, P, BIAS */
1789 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1792 case TGSI_TEXTURE_2D
:
1793 case TGSI_TEXTURE_RECT
:
1794 case TGSI_TEXTURE_SHADOW2D
:
1795 case TGSI_TEXTURE_SHADOWRECT
:
1797 FETCH(&r
[0], 0, CHAN_X
);
1798 FETCH(&r
[1], 0, CHAN_Y
);
1799 FETCH(&r
[2], 0, CHAN_Z
);
1801 fetch_texel(mach
->Samplers
[unit
],
1802 &r
[0], &r
[1], &r
[2], 0.0f
, /* inputs */
1803 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1806 case TGSI_TEXTURE_3D
:
1807 case TGSI_TEXTURE_CUBE
:
1809 FETCH(&r
[0], 0, CHAN_X
);
1810 FETCH(&r
[1], 0, CHAN_Y
);
1811 FETCH(&r
[2], 0, CHAN_Z
);
1813 fetch_texel(mach
->Samplers
[unit
],
1814 &r
[0], &r
[1], &r
[2], 0.0f
,
1815 &r
[0], &r
[1], &r
[2], &r
[3]);
1822 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1823 STORE(&r
[chan_index
], 0, chan_index
);
1829 * Evaluate a constant-valued coefficient at the position of the
1834 struct tgsi_exec_machine
*mach
,
1840 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1841 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1846 * Evaluate a linear-valued coefficient at the position of the
1851 struct tgsi_exec_machine
*mach
,
1855 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1856 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1857 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1858 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1859 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1860 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1861 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1862 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1863 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1867 * Evaluate a perspective-valued coefficient at the position of the
1871 eval_perspective_coef(
1872 struct tgsi_exec_machine
*mach
,
1876 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1877 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1878 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1879 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1880 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1881 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1882 /* divide by W here */
1883 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1884 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1885 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1886 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1890 typedef void (* eval_coef_func
)(
1891 struct tgsi_exec_machine
*mach
,
1896 exec_declaration(struct tgsi_exec_machine
*mach
,
1897 const struct tgsi_full_declaration
*decl
)
1899 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1900 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
||
1901 decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1902 uint first
, last
, mask
;
1904 first
= decl
->Range
.First
;
1905 last
= decl
->Range
.Last
;
1906 mask
= decl
->Declaration
.UsageMask
;
1908 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1909 assert(decl
->Semantic
.Index
== 0);
1910 assert(first
== last
);
1911 assert(mask
= TGSI_WRITEMASK_XYZW
);
1913 mach
->Inputs
[first
] = mach
->QuadPos
;
1914 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1917 assert(decl
->Semantic
.Index
== 0);
1918 assert(first
== last
);
1920 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1921 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1924 eval_coef_func eval
;
1927 switch (decl
->Declaration
.Interpolate
) {
1928 case TGSI_INTERPOLATE_CONSTANT
:
1929 eval
= eval_constant_coef
;
1932 case TGSI_INTERPOLATE_LINEAR
:
1933 eval
= eval_linear_coef
;
1936 case TGSI_INTERPOLATE_PERSPECTIVE
:
1937 eval
= eval_perspective_coef
;
1945 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1946 if (mask
& (1 << j
)) {
1947 for (i
= first
; i
<= last
; i
++) {
1959 struct tgsi_exec_machine
*mach
,
1960 const struct tgsi_full_instruction
*inst
,
1964 union tgsi_exec_channel r
[10];
1965 union tgsi_exec_channel d
[8];
1969 switch (inst
->Instruction
.Opcode
) {
1970 case TGSI_OPCODE_ARL
:
1971 case TGSI_OPCODE_FLR
:
1972 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1973 FETCH( &r
[0], 0, chan_index
);
1974 micro_flr(&d
[chan_index
], &r
[0]);
1976 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1977 STORE(&d
[chan_index
], 0, chan_index
);
1981 case TGSI_OPCODE_MOV
:
1982 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1983 FETCH(&d
[chan_index
], 0, chan_index
);
1985 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1986 STORE(&d
[chan_index
], 0, chan_index
);
1990 case TGSI_OPCODE_LIT
:
1991 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1992 FETCH( &r
[0], 0, CHAN_X
);
1993 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1994 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1997 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1998 FETCH( &r
[1], 0, CHAN_Y
);
1999 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2001 FETCH( &r
[2], 0, CHAN_W
);
2002 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
2003 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
2004 micro_pow( &r
[1], &r
[1], &r
[2] );
2005 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2008 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2009 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2011 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2012 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2015 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2016 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2018 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2019 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2023 case TGSI_OPCODE_RCP
:
2024 /* TGSI_OPCODE_RECIP */
2025 FETCH( &r
[0], 0, CHAN_X
);
2026 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2027 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2028 STORE( &r
[0], 0, chan_index
);
2032 case TGSI_OPCODE_RSQ
:
2033 /* TGSI_OPCODE_RECIPSQRT */
2034 FETCH( &r
[0], 0, CHAN_X
);
2035 micro_abs( &r
[0], &r
[0] );
2036 micro_sqrt( &r
[0], &r
[0] );
2037 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2038 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2039 STORE( &r
[0], 0, chan_index
);
2043 case TGSI_OPCODE_EXP
:
2044 FETCH( &r
[0], 0, CHAN_X
);
2045 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2046 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2047 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2048 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2050 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2051 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2052 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2054 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2055 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2056 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2058 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2059 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2063 case TGSI_OPCODE_LOG
:
2064 FETCH( &r
[0], 0, CHAN_X
);
2065 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2066 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2067 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2068 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2069 STORE( &r
[0], 0, CHAN_X
);
2071 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2072 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2073 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2074 STORE( &r
[0], 0, CHAN_Y
);
2076 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2077 STORE( &r
[1], 0, CHAN_Z
);
2079 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2080 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2084 case TGSI_OPCODE_MUL
:
2085 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2086 FETCH(&r
[0], 0, chan_index
);
2087 FETCH(&r
[1], 1, chan_index
);
2088 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2090 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2091 STORE(&d
[chan_index
], 0, chan_index
);
2095 case TGSI_OPCODE_ADD
:
2096 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2097 FETCH( &r
[0], 0, chan_index
);
2098 FETCH( &r
[1], 1, chan_index
);
2099 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2101 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2102 STORE(&d
[chan_index
], 0, chan_index
);
2106 case TGSI_OPCODE_DP3
:
2107 /* TGSI_OPCODE_DOT3 */
2108 FETCH( &r
[0], 0, CHAN_X
);
2109 FETCH( &r
[1], 1, CHAN_X
);
2110 micro_mul( &r
[0], &r
[0], &r
[1] );
2112 FETCH( &r
[1], 0, CHAN_Y
);
2113 FETCH( &r
[2], 1, CHAN_Y
);
2114 micro_mul( &r
[1], &r
[1], &r
[2] );
2115 micro_add( &r
[0], &r
[0], &r
[1] );
2117 FETCH( &r
[1], 0, CHAN_Z
);
2118 FETCH( &r
[2], 1, CHAN_Z
);
2119 micro_mul( &r
[1], &r
[1], &r
[2] );
2120 micro_add( &r
[0], &r
[0], &r
[1] );
2122 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2123 STORE( &r
[0], 0, chan_index
);
2127 case TGSI_OPCODE_DP4
:
2128 /* TGSI_OPCODE_DOT4 */
2129 FETCH(&r
[0], 0, CHAN_X
);
2130 FETCH(&r
[1], 1, CHAN_X
);
2132 micro_mul( &r
[0], &r
[0], &r
[1] );
2134 FETCH(&r
[1], 0, CHAN_Y
);
2135 FETCH(&r
[2], 1, CHAN_Y
);
2137 micro_mul( &r
[1], &r
[1], &r
[2] );
2138 micro_add( &r
[0], &r
[0], &r
[1] );
2140 FETCH(&r
[1], 0, CHAN_Z
);
2141 FETCH(&r
[2], 1, CHAN_Z
);
2143 micro_mul( &r
[1], &r
[1], &r
[2] );
2144 micro_add( &r
[0], &r
[0], &r
[1] );
2146 FETCH(&r
[1], 0, CHAN_W
);
2147 FETCH(&r
[2], 1, CHAN_W
);
2149 micro_mul( &r
[1], &r
[1], &r
[2] );
2150 micro_add( &r
[0], &r
[0], &r
[1] );
2152 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2153 STORE( &r
[0], 0, chan_index
);
2157 case TGSI_OPCODE_DST
:
2158 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2159 FETCH( &r
[0], 0, CHAN_Y
);
2160 FETCH( &r
[1], 1, CHAN_Y
);
2161 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2163 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2164 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2166 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2167 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2170 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2171 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2173 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2174 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2176 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2177 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2179 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2180 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2184 case TGSI_OPCODE_MIN
:
2185 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2186 FETCH(&r
[0], 0, chan_index
);
2187 FETCH(&r
[1], 1, chan_index
);
2189 /* XXX use micro_min()?? */
2190 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2192 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2193 STORE(&d
[chan_index
], 0, chan_index
);
2197 case TGSI_OPCODE_MAX
:
2198 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2199 FETCH(&r
[0], 0, chan_index
);
2200 FETCH(&r
[1], 1, chan_index
);
2202 /* XXX use micro_max()?? */
2203 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2205 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2206 STORE(&d
[chan_index
], 0, chan_index
);
2210 case TGSI_OPCODE_SLT
:
2211 /* TGSI_OPCODE_SETLT */
2212 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2213 FETCH( &r
[0], 0, chan_index
);
2214 FETCH( &r
[1], 1, chan_index
);
2215 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2217 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2218 STORE(&d
[chan_index
], 0, chan_index
);
2222 case TGSI_OPCODE_SGE
:
2223 /* TGSI_OPCODE_SETGE */
2224 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2225 FETCH( &r
[0], 0, chan_index
);
2226 FETCH( &r
[1], 1, chan_index
);
2227 micro_le(&d
[chan_index
], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2229 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2230 STORE(&d
[chan_index
], 0, chan_index
);
2234 case TGSI_OPCODE_MAD
:
2235 /* TGSI_OPCODE_MADD */
2236 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2237 FETCH( &r
[0], 0, chan_index
);
2238 FETCH( &r
[1], 1, chan_index
);
2239 micro_mul( &r
[0], &r
[0], &r
[1] );
2240 FETCH( &r
[1], 2, chan_index
);
2241 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2243 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2244 STORE(&d
[chan_index
], 0, chan_index
);
2248 case TGSI_OPCODE_SUB
:
2249 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2250 FETCH(&r
[0], 0, chan_index
);
2251 FETCH(&r
[1], 1, chan_index
);
2252 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2254 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2255 STORE(&d
[chan_index
], 0, chan_index
);
2259 case TGSI_OPCODE_LRP
:
2260 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2261 FETCH(&r
[0], 0, chan_index
);
2262 FETCH(&r
[1], 1, chan_index
);
2263 FETCH(&r
[2], 2, chan_index
);
2264 micro_sub( &r
[1], &r
[1], &r
[2] );
2265 micro_mul( &r
[0], &r
[0], &r
[1] );
2266 micro_add(&d
[chan_index
], &r
[0], &r
[2]);
2268 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2269 STORE(&d
[chan_index
], 0, chan_index
);
2273 case TGSI_OPCODE_CND
:
2274 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2275 FETCH(&r
[0], 0, chan_index
);
2276 FETCH(&r
[1], 1, chan_index
);
2277 FETCH(&r
[2], 2, chan_index
);
2278 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2280 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2281 STORE(&d
[chan_index
], 0, chan_index
);
2285 case TGSI_OPCODE_DP2A
:
2286 FETCH( &r
[0], 0, CHAN_X
);
2287 FETCH( &r
[1], 1, CHAN_X
);
2288 micro_mul( &r
[0], &r
[0], &r
[1] );
2290 FETCH( &r
[1], 0, CHAN_Y
);
2291 FETCH( &r
[2], 1, CHAN_Y
);
2292 micro_mul( &r
[1], &r
[1], &r
[2] );
2293 micro_add( &r
[0], &r
[0], &r
[1] );
2295 FETCH( &r
[2], 2, CHAN_X
);
2296 micro_add( &r
[0], &r
[0], &r
[2] );
2298 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2299 STORE( &r
[0], 0, chan_index
);
2303 case TGSI_OPCODE_FRC
:
2304 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2305 FETCH( &r
[0], 0, chan_index
);
2306 micro_frc(&d
[chan_index
], &r
[0]);
2308 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2309 STORE(&d
[chan_index
], 0, chan_index
);
2313 case TGSI_OPCODE_CLAMP
:
2314 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2315 FETCH(&r
[0], 0, chan_index
);
2316 FETCH(&r
[1], 1, chan_index
);
2317 micro_max(&r
[0], &r
[0], &r
[1]);
2318 FETCH(&r
[1], 2, chan_index
);
2319 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2321 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2322 STORE(&d
[chan_index
], 0, chan_index
);
2326 case TGSI_OPCODE_ROUND
:
2327 case TGSI_OPCODE_ARR
:
2328 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2329 FETCH( &r
[0], 0, chan_index
);
2330 micro_rnd(&d
[chan_index
], &r
[0]);
2332 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2333 STORE(&d
[chan_index
], 0, chan_index
);
2337 case TGSI_OPCODE_EX2
:
2338 FETCH(&r
[0], 0, CHAN_X
);
2340 micro_exp2( &r
[0], &r
[0] );
2342 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2343 STORE( &r
[0], 0, chan_index
);
2347 case TGSI_OPCODE_LG2
:
2348 FETCH( &r
[0], 0, CHAN_X
);
2349 micro_lg2( &r
[0], &r
[0] );
2350 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2351 STORE( &r
[0], 0, chan_index
);
2355 case TGSI_OPCODE_POW
:
2356 FETCH(&r
[0], 0, CHAN_X
);
2357 FETCH(&r
[1], 1, CHAN_X
);
2359 micro_pow( &r
[0], &r
[0], &r
[1] );
2361 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2362 STORE( &r
[0], 0, chan_index
);
2366 case TGSI_OPCODE_XPD
:
2367 FETCH(&r
[0], 0, CHAN_Y
);
2368 FETCH(&r
[1], 1, CHAN_Z
);
2370 micro_mul( &r
[2], &r
[0], &r
[1] );
2372 FETCH(&r
[3], 0, CHAN_Z
);
2373 FETCH(&r
[4], 1, CHAN_Y
);
2375 micro_mul( &r
[5], &r
[3], &r
[4] );
2376 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2378 FETCH(&r
[2], 1, CHAN_X
);
2380 micro_mul( &r
[3], &r
[3], &r
[2] );
2382 FETCH(&r
[5], 0, CHAN_X
);
2384 micro_mul( &r
[1], &r
[1], &r
[5] );
2385 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2387 micro_mul( &r
[5], &r
[5], &r
[4] );
2388 micro_mul( &r
[0], &r
[0], &r
[2] );
2389 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2391 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2392 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2394 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2395 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2397 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2398 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2400 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2401 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2405 case TGSI_OPCODE_ABS
:
2406 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2407 FETCH(&r
[0], 0, chan_index
);
2408 micro_abs(&d
[chan_index
], &r
[0]);
2410 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2411 STORE(&d
[chan_index
], 0, chan_index
);
2415 case TGSI_OPCODE_RCC
:
2416 FETCH(&r
[0], 0, CHAN_X
);
2417 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2418 micro_float_clamp(&r
[0], &r
[0]);
2419 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2420 STORE(&r
[0], 0, chan_index
);
2424 case TGSI_OPCODE_DPH
:
2425 FETCH(&r
[0], 0, CHAN_X
);
2426 FETCH(&r
[1], 1, CHAN_X
);
2428 micro_mul( &r
[0], &r
[0], &r
[1] );
2430 FETCH(&r
[1], 0, CHAN_Y
);
2431 FETCH(&r
[2], 1, CHAN_Y
);
2433 micro_mul( &r
[1], &r
[1], &r
[2] );
2434 micro_add( &r
[0], &r
[0], &r
[1] );
2436 FETCH(&r
[1], 0, CHAN_Z
);
2437 FETCH(&r
[2], 1, CHAN_Z
);
2439 micro_mul( &r
[1], &r
[1], &r
[2] );
2440 micro_add( &r
[0], &r
[0], &r
[1] );
2442 FETCH(&r
[1], 1, CHAN_W
);
2444 micro_add( &r
[0], &r
[0], &r
[1] );
2446 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2447 STORE( &r
[0], 0, chan_index
);
2451 case TGSI_OPCODE_COS
:
2452 FETCH(&r
[0], 0, CHAN_X
);
2454 micro_cos( &r
[0], &r
[0] );
2456 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2457 STORE( &r
[0], 0, chan_index
);
2461 case TGSI_OPCODE_DDX
:
2462 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2463 FETCH( &r
[0], 0, chan_index
);
2464 micro_ddx(&d
[chan_index
], &r
[0]);
2466 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2467 STORE(&d
[chan_index
], 0, chan_index
);
2471 case TGSI_OPCODE_DDY
:
2472 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2473 FETCH( &r
[0], 0, chan_index
);
2474 micro_ddy(&d
[chan_index
], &r
[0]);
2476 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2477 STORE(&d
[chan_index
], 0, chan_index
);
2481 case TGSI_OPCODE_KILP
:
2482 exec_kilp (mach
, inst
);
2485 case TGSI_OPCODE_KIL
:
2486 exec_kil (mach
, inst
);
2489 case TGSI_OPCODE_PK2H
:
2493 case TGSI_OPCODE_PK2US
:
2497 case TGSI_OPCODE_PK4B
:
2501 case TGSI_OPCODE_PK4UB
:
2505 case TGSI_OPCODE_RFL
:
2506 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2507 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2508 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2509 /* r0 = dp3(src0, src0) */
2510 FETCH(&r
[2], 0, CHAN_X
);
2511 micro_mul(&r
[0], &r
[2], &r
[2]);
2512 FETCH(&r
[4], 0, CHAN_Y
);
2513 micro_mul(&r
[8], &r
[4], &r
[4]);
2514 micro_add(&r
[0], &r
[0], &r
[8]);
2515 FETCH(&r
[6], 0, CHAN_Z
);
2516 micro_mul(&r
[8], &r
[6], &r
[6]);
2517 micro_add(&r
[0], &r
[0], &r
[8]);
2519 /* r1 = dp3(src0, src1) */
2520 FETCH(&r
[3], 1, CHAN_X
);
2521 micro_mul(&r
[1], &r
[2], &r
[3]);
2522 FETCH(&r
[5], 1, CHAN_Y
);
2523 micro_mul(&r
[8], &r
[4], &r
[5]);
2524 micro_add(&r
[1], &r
[1], &r
[8]);
2525 FETCH(&r
[7], 1, CHAN_Z
);
2526 micro_mul(&r
[8], &r
[6], &r
[7]);
2527 micro_add(&r
[1], &r
[1], &r
[8]);
2529 /* r1 = 2 * r1 / r0 */
2530 micro_add(&r
[1], &r
[1], &r
[1]);
2531 micro_div(&r
[1], &r
[1], &r
[0]);
2533 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2534 micro_mul(&r
[2], &r
[2], &r
[1]);
2535 micro_sub(&r
[2], &r
[2], &r
[3]);
2536 STORE(&r
[2], 0, CHAN_X
);
2538 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2539 micro_mul(&r
[4], &r
[4], &r
[1]);
2540 micro_sub(&r
[4], &r
[4], &r
[5]);
2541 STORE(&r
[4], 0, CHAN_Y
);
2543 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2544 micro_mul(&r
[6], &r
[6], &r
[1]);
2545 micro_sub(&r
[6], &r
[6], &r
[7]);
2546 STORE(&r
[6], 0, CHAN_Z
);
2549 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2550 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2554 case TGSI_OPCODE_SEQ
:
2555 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2556 FETCH( &r
[0], 0, chan_index
);
2557 FETCH( &r
[1], 1, chan_index
);
2558 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2560 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2561 STORE(&d
[chan_index
], 0, chan_index
);
2565 case TGSI_OPCODE_SFL
:
2566 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2567 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2571 case TGSI_OPCODE_SGT
:
2572 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2573 FETCH( &r
[0], 0, chan_index
);
2574 FETCH( &r
[1], 1, chan_index
);
2575 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2577 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2578 STORE(&d
[chan_index
], 0, chan_index
);
2582 case TGSI_OPCODE_SIN
:
2583 FETCH( &r
[0], 0, CHAN_X
);
2584 micro_sin( &r
[0], &r
[0] );
2585 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2586 STORE( &r
[0], 0, chan_index
);
2590 case TGSI_OPCODE_SLE
:
2591 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2592 FETCH( &r
[0], 0, chan_index
);
2593 FETCH( &r
[1], 1, chan_index
);
2594 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2596 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2597 STORE(&d
[chan_index
], 0, chan_index
);
2601 case TGSI_OPCODE_SNE
:
2602 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2603 FETCH( &r
[0], 0, chan_index
);
2604 FETCH( &r
[1], 1, chan_index
);
2605 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2607 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2608 STORE(&d
[chan_index
], 0, chan_index
);
2612 case TGSI_OPCODE_STR
:
2613 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2614 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2618 case TGSI_OPCODE_TEX
:
2619 /* simple texture lookup */
2620 /* src[0] = texcoord */
2621 /* src[1] = sampler unit */
2622 exec_tex(mach
, inst
, FALSE
, FALSE
);
2625 case TGSI_OPCODE_TXB
:
2626 /* Texture lookup with lod bias */
2627 /* src[0] = texcoord (src[0].w = LOD bias) */
2628 /* src[1] = sampler unit */
2629 exec_tex(mach
, inst
, TRUE
, FALSE
);
2632 case TGSI_OPCODE_TXD
:
2633 /* Texture lookup with explict partial derivatives */
2634 /* src[0] = texcoord */
2635 /* src[1] = d[strq]/dx */
2636 /* src[2] = d[strq]/dy */
2637 /* src[3] = sampler unit */
2638 exec_txd(mach
, inst
);
2641 case TGSI_OPCODE_TXL
:
2642 /* Texture lookup with explit LOD */
2643 /* src[0] = texcoord (src[0].w = LOD) */
2644 /* src[1] = sampler unit */
2645 exec_tex(mach
, inst
, TRUE
, FALSE
);
2648 case TGSI_OPCODE_TXP
:
2649 /* Texture lookup with projection */
2650 /* src[0] = texcoord (src[0].w = projection) */
2651 /* src[1] = sampler unit */
2652 exec_tex(mach
, inst
, FALSE
, TRUE
);
2655 case TGSI_OPCODE_UP2H
:
2659 case TGSI_OPCODE_UP2US
:
2663 case TGSI_OPCODE_UP4B
:
2667 case TGSI_OPCODE_UP4UB
:
2671 case TGSI_OPCODE_X2D
:
2672 FETCH(&r
[0], 1, CHAN_X
);
2673 FETCH(&r
[1], 1, CHAN_Y
);
2674 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2675 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2676 FETCH(&r
[2], 2, CHAN_X
);
2677 micro_mul(&r
[2], &r
[2], &r
[0]);
2678 FETCH(&r
[3], 2, CHAN_Y
);
2679 micro_mul(&r
[3], &r
[3], &r
[1]);
2680 micro_add(&r
[2], &r
[2], &r
[3]);
2681 FETCH(&r
[3], 0, CHAN_X
);
2682 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2685 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2686 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2687 FETCH(&r
[2], 2, CHAN_Z
);
2688 micro_mul(&r
[2], &r
[2], &r
[0]);
2689 FETCH(&r
[3], 2, CHAN_W
);
2690 micro_mul(&r
[3], &r
[3], &r
[1]);
2691 micro_add(&r
[2], &r
[2], &r
[3]);
2692 FETCH(&r
[3], 0, CHAN_Y
);
2693 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2696 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2697 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2699 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2700 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2702 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2703 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2705 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2706 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2710 case TGSI_OPCODE_ARA
:
2714 case TGSI_OPCODE_BRA
:
2718 case TGSI_OPCODE_CAL
:
2719 /* skip the call if no execution channels are enabled */
2720 if (mach
->ExecMask
) {
2723 /* First, record the depths of the execution stacks.
2724 * This is important for deeply nested/looped return statements.
2725 * We have to unwind the stacks by the correct amount. For a
2726 * real code generator, we could determine the number of entries
2727 * to pop off each stack with simple static analysis and avoid
2728 * implementing this data structure at run time.
2730 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2731 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2732 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2733 /* note that PC was already incremented above */
2734 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2736 mach
->CallStackTop
++;
2738 /* Second, push the Cond, Loop, Cont, Func stacks */
2739 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2740 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2741 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2742 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2743 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2744 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2745 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2746 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2748 /* Finally, jump to the subroutine */
2749 *pc
= inst
->Label
.Label
;
2753 case TGSI_OPCODE_RET
:
2754 mach
->FuncMask
&= ~mach
->ExecMask
;
2755 UPDATE_EXEC_MASK(mach
);
2757 if (mach
->FuncMask
== 0x0) {
2758 /* really return now (otherwise, keep executing */
2760 if (mach
->CallStackTop
== 0) {
2761 /* returning from main() */
2766 assert(mach
->CallStackTop
> 0);
2767 mach
->CallStackTop
--;
2769 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
2770 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
2772 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
2773 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
2775 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
2776 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
2778 assert(mach
->FuncStackTop
> 0);
2779 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2781 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
2783 UPDATE_EXEC_MASK(mach
);
2787 case TGSI_OPCODE_SSG
:
2788 /* TGSI_OPCODE_SGN */
2789 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2790 FETCH( &r
[0], 0, chan_index
);
2791 micro_sgn(&d
[chan_index
], &r
[0]);
2793 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2794 STORE(&d
[chan_index
], 0, chan_index
);
2798 case TGSI_OPCODE_CMP
:
2799 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2800 FETCH(&r
[0], 0, chan_index
);
2801 FETCH(&r
[1], 1, chan_index
);
2802 FETCH(&r
[2], 2, chan_index
);
2803 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
2805 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2806 STORE(&d
[chan_index
], 0, chan_index
);
2810 case TGSI_OPCODE_SCS
:
2811 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2812 FETCH( &r
[0], 0, CHAN_X
);
2813 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2814 micro_cos(&r
[1], &r
[0]);
2815 STORE(&r
[1], 0, CHAN_X
);
2817 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2818 micro_sin(&r
[1], &r
[0]);
2819 STORE(&r
[1], 0, CHAN_Y
);
2822 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2823 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2825 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2826 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2830 case TGSI_OPCODE_NRM
:
2831 /* 3-component vector normalize */
2832 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2833 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2834 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2835 /* r3 = sqrt(dp3(src0, src0)) */
2836 FETCH(&r
[0], 0, CHAN_X
);
2837 micro_mul(&r
[3], &r
[0], &r
[0]);
2838 FETCH(&r
[1], 0, CHAN_Y
);
2839 micro_mul(&r
[4], &r
[1], &r
[1]);
2840 micro_add(&r
[3], &r
[3], &r
[4]);
2841 FETCH(&r
[2], 0, CHAN_Z
);
2842 micro_mul(&r
[4], &r
[2], &r
[2]);
2843 micro_add(&r
[3], &r
[3], &r
[4]);
2844 micro_sqrt(&r
[3], &r
[3]);
2846 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2847 micro_div(&r
[0], &r
[0], &r
[3]);
2848 STORE(&r
[0], 0, CHAN_X
);
2850 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2851 micro_div(&r
[1], &r
[1], &r
[3]);
2852 STORE(&r
[1], 0, CHAN_Y
);
2854 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2855 micro_div(&r
[2], &r
[2], &r
[3]);
2856 STORE(&r
[2], 0, CHAN_Z
);
2859 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2860 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2864 case TGSI_OPCODE_NRM4
:
2865 /* 4-component vector normalize */
2867 union tgsi_exec_channel tmp
, dot
;
2869 /* tmp = dp4(src0, src0): */
2870 FETCH( &r
[0], 0, CHAN_X
);
2871 micro_mul( &tmp
, &r
[0], &r
[0] );
2873 FETCH( &r
[1], 0, CHAN_Y
);
2874 micro_mul( &dot
, &r
[1], &r
[1] );
2875 micro_add( &tmp
, &tmp
, &dot
);
2877 FETCH( &r
[2], 0, CHAN_Z
);
2878 micro_mul( &dot
, &r
[2], &r
[2] );
2879 micro_add( &tmp
, &tmp
, &dot
);
2881 FETCH( &r
[3], 0, CHAN_W
);
2882 micro_mul( &dot
, &r
[3], &r
[3] );
2883 micro_add( &tmp
, &tmp
, &dot
);
2885 /* tmp = 1 / sqrt(tmp) */
2886 micro_sqrt( &tmp
, &tmp
);
2887 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
2889 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2890 /* chan = chan * tmp */
2891 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
2892 STORE( &r
[chan_index
], 0, chan_index
);
2897 case TGSI_OPCODE_DIV
:
2901 case TGSI_OPCODE_DP2
:
2902 FETCH( &r
[0], 0, CHAN_X
);
2903 FETCH( &r
[1], 1, CHAN_X
);
2904 micro_mul( &r
[0], &r
[0], &r
[1] );
2906 FETCH( &r
[1], 0, CHAN_Y
);
2907 FETCH( &r
[2], 1, CHAN_Y
);
2908 micro_mul( &r
[1], &r
[1], &r
[2] );
2909 micro_add( &r
[0], &r
[0], &r
[1] );
2911 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2912 STORE( &r
[0], 0, chan_index
);
2916 case TGSI_OPCODE_IF
:
2918 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2919 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2920 FETCH( &r
[0], 0, CHAN_X
);
2921 /* update CondMask */
2923 mach
->CondMask
&= ~0x1;
2926 mach
->CondMask
&= ~0x2;
2929 mach
->CondMask
&= ~0x4;
2932 mach
->CondMask
&= ~0x8;
2934 UPDATE_EXEC_MASK(mach
);
2935 /* Todo: If CondMask==0, jump to ELSE */
2938 case TGSI_OPCODE_ELSE
:
2939 /* invert CondMask wrt previous mask */
2942 assert(mach
->CondStackTop
> 0);
2943 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2944 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2945 UPDATE_EXEC_MASK(mach
);
2946 /* Todo: If CondMask==0, jump to ENDIF */
2950 case TGSI_OPCODE_ENDIF
:
2952 assert(mach
->CondStackTop
> 0);
2953 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2954 UPDATE_EXEC_MASK(mach
);
2957 case TGSI_OPCODE_END
:
2958 /* halt execution */
2962 case TGSI_OPCODE_REP
:
2966 case TGSI_OPCODE_ENDREP
:
2970 case TGSI_OPCODE_PUSHA
:
2974 case TGSI_OPCODE_POPA
:
2978 case TGSI_OPCODE_CEIL
:
2979 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2980 FETCH( &r
[0], 0, chan_index
);
2981 micro_ceil(&d
[chan_index
], &r
[0]);
2983 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2984 STORE(&d
[chan_index
], 0, chan_index
);
2988 case TGSI_OPCODE_I2F
:
2989 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2990 FETCH( &r
[0], 0, chan_index
);
2991 micro_i2f(&d
[chan_index
], &r
[0]);
2993 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2994 STORE(&d
[chan_index
], 0, chan_index
);
2998 case TGSI_OPCODE_NOT
:
2999 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3000 FETCH( &r
[0], 0, chan_index
);
3001 micro_not(&d
[chan_index
], &r
[0]);
3003 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3004 STORE(&d
[chan_index
], 0, chan_index
);
3008 case TGSI_OPCODE_TRUNC
:
3009 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3010 FETCH( &r
[0], 0, chan_index
);
3011 micro_trunc(&d
[chan_index
], &r
[0]);
3013 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3014 STORE(&d
[chan_index
], 0, chan_index
);
3018 case TGSI_OPCODE_SHL
:
3019 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3020 FETCH( &r
[0], 0, chan_index
);
3021 FETCH( &r
[1], 1, chan_index
);
3022 micro_shl(&d
[chan_index
], &r
[0], &r
[1]);
3024 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3025 STORE(&d
[chan_index
], 0, chan_index
);
3029 case TGSI_OPCODE_SHR
:
3030 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3031 FETCH( &r
[0], 0, chan_index
);
3032 FETCH( &r
[1], 1, chan_index
);
3033 micro_ishr(&d
[chan_index
], &r
[0], &r
[1]);
3035 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3036 STORE(&d
[chan_index
], 0, chan_index
);
3040 case TGSI_OPCODE_AND
:
3041 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3042 FETCH( &r
[0], 0, chan_index
);
3043 FETCH( &r
[1], 1, chan_index
);
3044 micro_and(&d
[chan_index
], &r
[0], &r
[1]);
3046 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3047 STORE(&d
[chan_index
], 0, chan_index
);
3051 case TGSI_OPCODE_OR
:
3052 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3053 FETCH( &r
[0], 0, chan_index
);
3054 FETCH( &r
[1], 1, chan_index
);
3055 micro_or(&d
[chan_index
], &r
[0], &r
[1]);
3057 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3058 STORE(&d
[chan_index
], 0, chan_index
);
3062 case TGSI_OPCODE_MOD
:
3066 case TGSI_OPCODE_XOR
:
3067 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3068 FETCH( &r
[0], 0, chan_index
);
3069 FETCH( &r
[1], 1, chan_index
);
3070 micro_xor(&d
[chan_index
], &r
[0], &r
[1]);
3072 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3073 STORE(&d
[chan_index
], 0, chan_index
);
3077 case TGSI_OPCODE_SAD
:
3081 case TGSI_OPCODE_TXF
:
3085 case TGSI_OPCODE_TXQ
:
3089 case TGSI_OPCODE_EMIT
:
3090 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
3091 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
3094 case TGSI_OPCODE_ENDPRIM
:
3095 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
3096 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
3099 case TGSI_OPCODE_BGNFOR
:
3100 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3101 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3102 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3104 ++mach
->LoopCounterStackTop
;
3105 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3106 /* update LoopMask */
3107 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3108 mach
->LoopMask
&= ~0x1;
3110 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3111 mach
->LoopMask
&= ~0x2;
3113 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3114 mach
->LoopMask
&= ~0x4;
3116 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3117 mach
->LoopMask
&= ~0x8;
3119 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3120 UPDATE_EXEC_MASK(mach
);
3121 /* fall-through (for now) */
3122 case TGSI_OPCODE_BGNLOOP
:
3123 /* push LoopMask and ContMasks */
3124 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3125 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3126 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3127 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3128 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3129 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3132 case TGSI_OPCODE_ENDFOR
:
3133 assert(mach
->LoopCounterStackTop
> 0);
3134 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3135 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3136 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3137 /* update LoopMask */
3138 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3139 mach
->LoopMask
&= ~0x1;
3141 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3142 mach
->LoopMask
&= ~0x2;
3144 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3145 mach
->LoopMask
&= ~0x4;
3147 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3148 mach
->LoopMask
&= ~0x8;
3150 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3151 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3152 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3153 assert(mach
->LoopLabelStackTop
> 0);
3154 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3155 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3156 /* Restore ContMask, but don't pop */
3157 assert(mach
->ContStackTop
> 0);
3158 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3159 UPDATE_EXEC_MASK(mach
);
3160 if (mach
->ExecMask
) {
3161 /* repeat loop: jump to instruction just past BGNLOOP */
3162 assert(mach
->LoopLabelStackTop
> 0);
3163 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3166 /* exit loop: pop LoopMask */
3167 assert(mach
->LoopStackTop
> 0);
3168 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3170 assert(mach
->ContStackTop
> 0);
3171 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3172 assert(mach
->LoopLabelStackTop
> 0);
3173 --mach
->LoopLabelStackTop
;
3174 assert(mach
->LoopCounterStackTop
> 0);
3175 --mach
->LoopCounterStackTop
;
3177 UPDATE_EXEC_MASK(mach
);
3180 case TGSI_OPCODE_ENDLOOP
:
3181 /* Restore ContMask, but don't pop */
3182 assert(mach
->ContStackTop
> 0);
3183 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3184 UPDATE_EXEC_MASK(mach
);
3185 if (mach
->ExecMask
) {
3186 /* repeat loop: jump to instruction just past BGNLOOP */
3187 assert(mach
->LoopLabelStackTop
> 0);
3188 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3191 /* exit loop: pop LoopMask */
3192 assert(mach
->LoopStackTop
> 0);
3193 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3195 assert(mach
->ContStackTop
> 0);
3196 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3197 assert(mach
->LoopLabelStackTop
> 0);
3198 --mach
->LoopLabelStackTop
;
3200 UPDATE_EXEC_MASK(mach
);
3203 case TGSI_OPCODE_BRK
:
3204 /* turn off loop channels for each enabled exec channel */
3205 mach
->LoopMask
&= ~mach
->ExecMask
;
3206 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3207 UPDATE_EXEC_MASK(mach
);
3210 case TGSI_OPCODE_CONT
:
3211 /* turn off cont channels for each enabled exec channel */
3212 mach
->ContMask
&= ~mach
->ExecMask
;
3213 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3214 UPDATE_EXEC_MASK(mach
);
3217 case TGSI_OPCODE_BGNSUB
:
3221 case TGSI_OPCODE_ENDSUB
:
3223 * XXX: This really should be a no-op. We should never reach this opcode.
3226 assert(mach
->CallStackTop
> 0);
3227 mach
->CallStackTop
--;
3229 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3230 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3232 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3233 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3235 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3236 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3238 assert(mach
->FuncStackTop
> 0);
3239 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3241 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3243 UPDATE_EXEC_MASK(mach
);
3246 case TGSI_OPCODE_NOP
:
3249 case TGSI_OPCODE_BREAKC
:
3250 FETCH(&r
[0], 0, CHAN_X
);
3251 /* update CondMask */
3252 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
3253 mach
->LoopMask
&= ~0x1;
3255 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
3256 mach
->LoopMask
&= ~0x2;
3258 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
3259 mach
->LoopMask
&= ~0x4;
3261 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
3262 mach
->LoopMask
&= ~0x8;
3264 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3265 UPDATE_EXEC_MASK(mach
);
3273 #define DEBUG_EXECUTION 0
3277 * Run TGSI interpreter.
3278 * \return bitmask of "alive" quad components
3281 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3286 mach
->CondMask
= 0xf;
3287 mach
->LoopMask
= 0xf;
3288 mach
->ContMask
= 0xf;
3289 mach
->FuncMask
= 0xf;
3290 mach
->ExecMask
= 0xf;
3292 assert(mach
->CondStackTop
== 0);
3293 assert(mach
->LoopStackTop
== 0);
3294 assert(mach
->ContStackTop
== 0);
3295 assert(mach
->CallStackTop
== 0);
3297 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3298 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3300 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3301 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3302 mach
->Primitives
[0] = 0;
3305 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3306 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3307 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3308 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3309 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3310 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3313 /* execute declarations (interpolants) */
3314 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3315 exec_declaration( mach
, mach
->Declarations
+i
);
3320 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3321 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3324 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3325 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3328 /* execute instructions, until pc is set to -1 */
3334 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3337 assert(pc
< (int) mach
->NumInstructions
);
3338 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3341 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3342 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3345 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3346 debug_printf("TEMP[%2u] = ", i
);
3347 for (j
= 0; j
< 4; j
++) {
3351 debug_printf("(%6f, %6f, %6f, %6f)\n",
3352 temps
[i
].xyzw
[0].f
[j
],
3353 temps
[i
].xyzw
[1].f
[j
],
3354 temps
[i
].xyzw
[2].f
[j
],
3355 temps
[i
].xyzw
[3].f
[j
]);
3359 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3360 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3363 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3364 debug_printf("OUT[%2u] = ", i
);
3365 for (j
= 0; j
< 4; j
++) {
3369 debug_printf("{%6f, %6f, %6f, %6f}\n",
3370 outputs
[i
].xyzw
[0].f
[j
],
3371 outputs
[i
].xyzw
[1].f
[j
],
3372 outputs
[i
].xyzw
[2].f
[j
],
3373 outputs
[i
].xyzw
[3].f
[j
]);
3382 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3383 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3385 * Scale back depth component.
3387 for (i
= 0; i
< 4; i
++)
3388 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3392 assert(mach
->CondStackTop
== 0);
3393 assert(mach
->LoopStackTop
== 0);
3394 assert(mach
->ContStackTop
== 0);
3395 assert(mach
->CallStackTop
== 0);
3397 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];