1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129 static const union tgsi_exec_channel ZeroVec
=
130 { { 0.0, 0.0, 0.0, 0.0 } };
135 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
137 assert(!util_is_inf_or_nan(chan
->f
[0]));
138 assert(!util_is_inf_or_nan(chan
->f
[1]));
139 assert(!util_is_inf_or_nan(chan
->f
[2]));
140 assert(!util_is_inf_or_nan(chan
->f
[3]));
147 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
157 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
159 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
161 debug_printf("Temp[%u] =\n", index
);
162 for (i
= 0; i
< 4; i
++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
179 * This would expand into:
184 * The second instruction will have the wrong value for t0 if executed as-is.
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
191 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
192 if (writemask
== TGSI_WRITEMASK_X
||
193 writemask
== TGSI_WRITEMASK_Y
||
194 writemask
== TGSI_WRITEMASK_Z
||
195 writemask
== TGSI_WRITEMASK_W
||
196 writemask
== TGSI_WRITEMASK_NONE
) {
197 /* no chance of data dependency */
201 /* loop over src regs */
202 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
203 if ((inst
->Src
[i
].Register
.File
==
204 inst
->Dst
[0].Register
.File
) &&
205 (inst
->Src
[i
].Register
.Index
==
206 inst
->Dst
[0].Register
.Index
)) {
207 /* loop over dest channels */
208 uint channelsWritten
= 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
212 if (channelsWritten
& (1 << swizzle
)) {
216 channelsWritten
|= (1 << chan
);
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine
*mach
,
232 const struct tgsi_token
*tokens
,
234 struct tgsi_sampler
**samplers
)
237 struct tgsi_parse_context parse
;
238 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
239 struct tgsi_full_instruction
*instructions
;
240 struct tgsi_full_declaration
*declarations
;
241 uint maxInstructions
= 10, numInstructions
= 0;
242 uint maxDeclarations
= 10, numDeclarations
= 0;
246 tgsi_dump(tokens
, 0);
251 mach
->Tokens
= tokens
;
252 mach
->Samplers
= samplers
;
254 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
255 if (k
!= TGSI_PARSE_OK
) {
256 debug_printf( "Problem parsing!\n" );
260 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
264 declarations
= (struct tgsi_full_declaration
*)
265 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
271 instructions
= (struct tgsi_full_instruction
*)
272 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
275 FREE( declarations
);
279 while( !tgsi_parse_end_of_tokens( &parse
) ) {
280 uint pointer
= parse
.Position
;
283 tgsi_parse_token( &parse
);
284 switch( parse
.FullToken
.Token
.Type
) {
285 case TGSI_TOKEN_TYPE_DECLARATION
:
286 /* save expanded declaration */
287 if (numDeclarations
== maxDeclarations
) {
288 declarations
= REALLOC(declarations
,
290 * sizeof(struct tgsi_full_declaration
),
291 (maxDeclarations
+ 10)
292 * sizeof(struct tgsi_full_declaration
));
293 maxDeclarations
+= 10;
295 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
297 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
298 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
303 memcpy(declarations
+ numDeclarations
,
304 &parse
.FullToken
.FullDeclaration
,
305 sizeof(declarations
[0]));
309 case TGSI_TOKEN_TYPE_IMMEDIATE
:
311 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
313 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
315 for( i
= 0; i
< size
; i
++ ) {
316 mach
->Imms
[mach
->ImmLimit
][i
] =
317 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
323 case TGSI_TOKEN_TYPE_INSTRUCTION
:
324 assert( labels
->count
< MAX_LABELS
);
326 labels
->labels
[labels
->count
][0] = instno
;
327 labels
->labels
[labels
->count
][1] = pointer
;
330 /* save expanded instruction */
331 if (numInstructions
== maxInstructions
) {
332 instructions
= REALLOC(instructions
,
334 * sizeof(struct tgsi_full_instruction
),
335 (maxInstructions
+ 10)
336 * sizeof(struct tgsi_full_instruction
));
337 maxInstructions
+= 10;
340 memcpy(instructions
+ numInstructions
,
341 &parse
.FullToken
.FullInstruction
,
342 sizeof(instructions
[0]));
347 case TGSI_TOKEN_TYPE_PROPERTY
:
354 tgsi_parse_free (&parse
);
356 if (mach
->Declarations
) {
357 FREE( mach
->Declarations
);
359 mach
->Declarations
= declarations
;
360 mach
->NumDeclarations
= numDeclarations
;
362 if (mach
->Instructions
) {
363 FREE( mach
->Instructions
);
365 mach
->Instructions
= instructions
;
366 mach
->NumInstructions
= numInstructions
;
370 struct tgsi_exec_machine
*
371 tgsi_exec_machine_create( void )
373 struct tgsi_exec_machine
*mach
;
376 mach
= align_malloc( sizeof *mach
, 16 );
380 memset(mach
, 0, sizeof(*mach
));
382 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
383 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
384 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
386 /* Setup constants. */
387 for( i
= 0; i
< 4; i
++ ) {
388 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
389 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
390 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
391 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
392 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
393 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
394 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
395 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
396 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
397 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
401 /* silence warnings */
415 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
418 FREE(mach
->Instructions
);
419 FREE(mach
->Declarations
);
428 union tgsi_exec_channel
*dst
,
429 const union tgsi_exec_channel
*src
)
431 dst
->f
[0] = fabsf( src
->f
[0] );
432 dst
->f
[1] = fabsf( src
->f
[1] );
433 dst
->f
[2] = fabsf( src
->f
[2] );
434 dst
->f
[3] = fabsf( src
->f
[3] );
439 union tgsi_exec_channel
*dst
,
440 const union tgsi_exec_channel
*src0
,
441 const union tgsi_exec_channel
*src1
)
443 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
444 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
445 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
446 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
452 union tgsi_exec_channel
*dst
,
453 const union tgsi_exec_channel
*src0
,
454 const union tgsi_exec_channel
*src1
)
456 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
457 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
458 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
459 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
465 union tgsi_exec_channel
*dst
,
466 const union tgsi_exec_channel
*src0
,
467 const union tgsi_exec_channel
*src1
)
469 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
470 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
471 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
472 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
477 union tgsi_exec_channel
*dst
,
478 const union tgsi_exec_channel
*src
)
480 dst
->f
[0] = ceilf( src
->f
[0] );
481 dst
->f
[1] = ceilf( src
->f
[1] );
482 dst
->f
[2] = ceilf( src
->f
[2] );
483 dst
->f
[3] = ceilf( src
->f
[3] );
488 union tgsi_exec_channel
*dst
,
489 const union tgsi_exec_channel
*src
)
491 dst
->f
[0] = cosf( src
->f
[0] );
492 dst
->f
[1] = cosf( src
->f
[1] );
493 dst
->f
[2] = cosf( src
->f
[2] );
494 dst
->f
[3] = cosf( src
->f
[3] );
499 union tgsi_exec_channel
*dst
,
500 const union tgsi_exec_channel
*src
)
505 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
510 union tgsi_exec_channel
*dst
,
511 const union tgsi_exec_channel
*src
)
516 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
521 union tgsi_exec_channel
*dst
,
522 const union tgsi_exec_channel
*src0
,
523 const union tgsi_exec_channel
*src1
)
525 if (src1
->f
[0] != 0) {
526 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
528 if (src1
->f
[1] != 0) {
529 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
531 if (src1
->f
[2] != 0) {
532 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
534 if (src1
->f
[3] != 0) {
535 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
542 union tgsi_exec_channel
*dst
,
543 const union tgsi_exec_channel
*src0
,
544 const union tgsi_exec_channel
*src1
)
546 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
547 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
548 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
549 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
555 union tgsi_exec_channel
*dst
,
556 const union tgsi_exec_channel
*src0
,
557 const union tgsi_exec_channel
*src1
,
558 const union tgsi_exec_channel
*src2
,
559 const union tgsi_exec_channel
*src3
)
561 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
562 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
563 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
564 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
570 union tgsi_exec_channel
*dst
,
571 const union tgsi_exec_channel
*src0
,
572 const union tgsi_exec_channel
*src1
,
573 const union tgsi_exec_channel
*src2
,
574 const union tgsi_exec_channel
*src3
)
576 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
577 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
578 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
579 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
585 union tgsi_exec_channel
*dst
,
586 const union tgsi_exec_channel
*src
)
589 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
590 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
591 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
592 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
596 /* Inf is okay for this instruction, so clamp it to silence assertions. */
598 union tgsi_exec_channel clamped
;
600 for (i
= 0; i
< 4; i
++) {
601 if (src
->f
[i
] > 127.99999f
) {
602 clamped
.f
[i
] = 127.99999f
;
603 } else if (src
->f
[i
] < -126.99999f
) {
604 clamped
.f
[i
] = -126.99999f
;
606 clamped
.f
[i
] = src
->f
[i
];
612 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
613 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
614 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
615 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
622 union tgsi_exec_channel
*dst
,
623 const union tgsi_exec_channel
*src
)
625 dst
->u
[0] = (uint
) src
->f
[0];
626 dst
->u
[1] = (uint
) src
->f
[1];
627 dst
->u
[2] = (uint
) src
->f
[2];
628 dst
->u
[3] = (uint
) src
->f
[3];
633 micro_float_clamp(union tgsi_exec_channel
*dst
,
634 const union tgsi_exec_channel
*src
)
638 for (i
= 0; i
< 4; i
++) {
639 if (src
->f
[i
] > 0.0f
) {
640 if (src
->f
[i
] > 1.884467e+019f
)
641 dst
->f
[i
] = 1.884467e+019f
;
642 else if (src
->f
[i
] < 5.42101e-020f
)
643 dst
->f
[i
] = 5.42101e-020f
;
645 dst
->f
[i
] = src
->f
[i
];
648 if (src
->f
[i
] < -1.884467e+019f
)
649 dst
->f
[i
] = -1.884467e+019f
;
650 else if (src
->f
[i
] > -5.42101e-020f
)
651 dst
->f
[i
] = -5.42101e-020f
;
653 dst
->f
[i
] = src
->f
[i
];
660 union tgsi_exec_channel
*dst
,
661 const union tgsi_exec_channel
*src
)
663 dst
->f
[0] = floorf( src
->f
[0] );
664 dst
->f
[1] = floorf( src
->f
[1] );
665 dst
->f
[2] = floorf( src
->f
[2] );
666 dst
->f
[3] = floorf( src
->f
[3] );
671 union tgsi_exec_channel
*dst
,
672 const union tgsi_exec_channel
*src
)
674 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
675 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
676 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
677 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
682 union tgsi_exec_channel
*dst
,
683 const union tgsi_exec_channel
*src
)
685 dst
->f
[0] = (float) src
->i
[0];
686 dst
->f
[1] = (float) src
->i
[1];
687 dst
->f
[2] = (float) src
->i
[2];
688 dst
->f
[3] = (float) src
->i
[3];
693 union tgsi_exec_channel
*dst
,
694 const union tgsi_exec_channel
*src
)
697 dst
->f
[0] = util_fast_log2( src
->f
[0] );
698 dst
->f
[1] = util_fast_log2( src
->f
[1] );
699 dst
->f
[2] = util_fast_log2( src
->f
[2] );
700 dst
->f
[3] = util_fast_log2( src
->f
[3] );
702 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
703 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
704 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
705 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
711 union tgsi_exec_channel
*dst
,
712 const union tgsi_exec_channel
*src0
,
713 const union tgsi_exec_channel
*src1
,
714 const union tgsi_exec_channel
*src2
,
715 const union tgsi_exec_channel
*src3
)
717 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
718 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
719 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
720 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
725 union tgsi_exec_channel
*dst
,
726 const union tgsi_exec_channel
*src0
,
727 const union tgsi_exec_channel
*src1
,
728 const union tgsi_exec_channel
*src2
,
729 const union tgsi_exec_channel
*src3
)
731 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
732 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
733 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
734 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
740 union tgsi_exec_channel
*dst
,
741 const union tgsi_exec_channel
*src0
,
742 const union tgsi_exec_channel
*src1
,
743 const union tgsi_exec_channel
*src2
,
744 const union tgsi_exec_channel
*src3
)
746 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
747 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
748 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
749 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
756 union tgsi_exec_channel
*dst
,
757 const union tgsi_exec_channel
*src0
,
758 const union tgsi_exec_channel
*src1
,
759 const union tgsi_exec_channel
*src2
,
760 const union tgsi_exec_channel
*src3
)
762 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
763 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
764 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
765 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
771 union tgsi_exec_channel
*dst
,
772 const union tgsi_exec_channel
*src0
,
773 const union tgsi_exec_channel
*src1
)
775 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
776 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
777 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
778 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
784 union tgsi_exec_channel
*dst
,
785 const union tgsi_exec_channel
*src0
,
786 const union tgsi_exec_channel
*src1
)
788 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
789 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
790 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
791 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
798 union tgsi_exec_channel
*dst
,
799 const union tgsi_exec_channel
*src0
,
800 const union tgsi_exec_channel
*src1
)
802 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
803 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
804 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
805 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
811 union tgsi_exec_channel
*dst
,
812 const union tgsi_exec_channel
*src0
,
813 const union tgsi_exec_channel
*src1
)
815 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
816 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
817 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
818 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
824 union tgsi_exec_channel
*dst
,
825 const union tgsi_exec_channel
*src0
,
826 const union tgsi_exec_channel
*src1
)
828 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
829 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
830 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
831 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
838 union tgsi_exec_channel
*dst
,
839 const union tgsi_exec_channel
*src0
,
840 const union tgsi_exec_channel
*src1
)
842 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
843 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
844 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
845 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
852 union tgsi_exec_channel
*dst
,
853 const union tgsi_exec_channel
*src0
,
854 const union tgsi_exec_channel
*src1
)
856 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
857 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
858 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
859 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
865 union tgsi_exec_channel
*dst
,
866 const union tgsi_exec_channel
*src0
,
867 const union tgsi_exec_channel
*src1
)
869 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
870 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
871 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
872 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
878 union tgsi_exec_channel
*dst
,
879 const union tgsi_exec_channel
*src0
,
880 const union tgsi_exec_channel
*src1
)
882 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
883 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
884 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
885 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
892 union tgsi_exec_channel
*dst0
,
893 union tgsi_exec_channel
*dst1
,
894 const union tgsi_exec_channel
*src0
,
895 const union tgsi_exec_channel
*src1
)
897 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
898 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
899 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
900 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
911 union tgsi_exec_channel
*dst0
,
912 union tgsi_exec_channel
*dst1
,
913 const union tgsi_exec_channel
*src0
,
914 const union tgsi_exec_channel
*src1
)
916 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
917 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
918 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
919 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
931 union tgsi_exec_channel
*dst
,
932 const union tgsi_exec_channel
*src0
,
933 const union tgsi_exec_channel
*src1
,
934 const union tgsi_exec_channel
*src2
)
936 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
937 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
938 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
939 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
945 union tgsi_exec_channel
*dst
,
946 const union tgsi_exec_channel
*src
)
948 dst
->f
[0] = -src
->f
[0];
949 dst
->f
[1] = -src
->f
[1];
950 dst
->f
[2] = -src
->f
[2];
951 dst
->f
[3] = -src
->f
[3];
957 union tgsi_exec_channel
*dst
,
958 const union tgsi_exec_channel
*src
)
960 dst
->i
[0] = -src
->i
[0];
961 dst
->i
[1] = -src
->i
[1];
962 dst
->i
[2] = -src
->i
[2];
963 dst
->i
[3] = -src
->i
[3];
969 union tgsi_exec_channel
*dst
,
970 const union tgsi_exec_channel
*src
)
972 dst
->u
[0] = ~src
->u
[0];
973 dst
->u
[1] = ~src
->u
[1];
974 dst
->u
[2] = ~src
->u
[2];
975 dst
->u
[3] = ~src
->u
[3];
980 union tgsi_exec_channel
*dst
,
981 const union tgsi_exec_channel
*src0
,
982 const union tgsi_exec_channel
*src1
)
984 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
985 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
986 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
987 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
992 union tgsi_exec_channel
*dst
,
993 const union tgsi_exec_channel
*src0
,
994 const union tgsi_exec_channel
*src1
)
997 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
998 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
999 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
1000 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
1002 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
1003 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
1004 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
1005 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
1011 union tgsi_exec_channel
*dst
,
1012 const union tgsi_exec_channel
*src
)
1014 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
1015 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
1016 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
1017 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
1022 union tgsi_exec_channel
*dst
,
1023 const union tgsi_exec_channel
*src
)
1025 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
1026 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
1027 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
1028 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
1033 union tgsi_exec_channel
*dst
,
1034 const union tgsi_exec_channel
*src0
,
1035 const union tgsi_exec_channel
*src1
)
1037 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
1038 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
1039 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
1040 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
1045 union tgsi_exec_channel
*dst
,
1046 const union tgsi_exec_channel
*src0
,
1047 const union tgsi_exec_channel
*src1
)
1049 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
1050 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
1051 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
1052 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
1057 union tgsi_exec_channel
*dst
,
1058 const union tgsi_exec_channel
*src0
)
1060 dst
->f
[0] = (float) (int) src0
->f
[0];
1061 dst
->f
[1] = (float) (int) src0
->f
[1];
1062 dst
->f
[2] = (float) (int) src0
->f
[2];
1063 dst
->f
[3] = (float) (int) src0
->f
[3];
1069 union tgsi_exec_channel
*dst
,
1070 const union tgsi_exec_channel
*src0
,
1071 const union tgsi_exec_channel
*src1
)
1073 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
1074 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
1075 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
1076 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
1082 union tgsi_exec_channel
*dst
,
1083 const union tgsi_exec_channel
*src
)
1085 dst
->f
[0] = sinf( src
->f
[0] );
1086 dst
->f
[1] = sinf( src
->f
[1] );
1087 dst
->f
[2] = sinf( src
->f
[2] );
1088 dst
->f
[3] = sinf( src
->f
[3] );
1092 micro_sqrt( union tgsi_exec_channel
*dst
,
1093 const union tgsi_exec_channel
*src
)
1095 dst
->f
[0] = sqrtf( src
->f
[0] );
1096 dst
->f
[1] = sqrtf( src
->f
[1] );
1097 dst
->f
[2] = sqrtf( src
->f
[2] );
1098 dst
->f
[3] = sqrtf( src
->f
[3] );
1103 union tgsi_exec_channel
*dst
,
1104 const union tgsi_exec_channel
*src0
,
1105 const union tgsi_exec_channel
*src1
)
1107 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1108 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1109 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1110 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1116 union tgsi_exec_channel
*dst
,
1117 const union tgsi_exec_channel
*src
)
1119 dst
->f
[0] = (float) src
->u
[0];
1120 dst
->f
[1] = (float) src
->u
[1];
1121 dst
->f
[2] = (float) src
->u
[2];
1122 dst
->f
[3] = (float) src
->u
[3];
1128 union tgsi_exec_channel
*dst
,
1129 const union tgsi_exec_channel
*src0
,
1130 const union tgsi_exec_channel
*src1
)
1132 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
1133 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
1134 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
1135 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
1139 fetch_src_file_channel(
1140 const struct tgsi_exec_machine
*mach
,
1143 const union tgsi_exec_channel
*index
,
1144 union tgsi_exec_channel
*chan
)
1147 case TGSI_SWIZZLE_X
:
1148 case TGSI_SWIZZLE_Y
:
1149 case TGSI_SWIZZLE_Z
:
1150 case TGSI_SWIZZLE_W
:
1152 case TGSI_FILE_CONSTANT
:
1153 assert(mach
->Consts
);
1154 if (index
->i
[0] < 0)
1157 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
1158 if (index
->i
[1] < 0)
1161 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
1162 if (index
->i
[2] < 0)
1165 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
1166 if (index
->i
[3] < 0)
1169 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
1172 case TGSI_FILE_INPUT
:
1173 case TGSI_FILE_SYSTEM_VALUE
:
1174 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1175 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1176 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1177 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1180 case TGSI_FILE_TEMPORARY
:
1181 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
1182 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1183 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1184 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1185 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1188 case TGSI_FILE_IMMEDIATE
:
1189 assert( index
->i
[0] < (int) mach
->ImmLimit
);
1190 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
1191 assert( index
->i
[1] < (int) mach
->ImmLimit
);
1192 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
1193 assert( index
->i
[2] < (int) mach
->ImmLimit
);
1194 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
1195 assert( index
->i
[3] < (int) mach
->ImmLimit
);
1196 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
1199 case TGSI_FILE_ADDRESS
:
1200 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1201 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1202 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1203 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1206 case TGSI_FILE_PREDICATE
:
1207 assert(index
->i
[0] < TGSI_EXEC_NUM_PREDS
);
1208 assert(index
->i
[1] < TGSI_EXEC_NUM_PREDS
);
1209 assert(index
->i
[2] < TGSI_EXEC_NUM_PREDS
);
1210 assert(index
->i
[3] < TGSI_EXEC_NUM_PREDS
);
1211 chan
->u
[0] = mach
->Predicates
[0].xyzw
[swizzle
].u
[0];
1212 chan
->u
[1] = mach
->Predicates
[0].xyzw
[swizzle
].u
[1];
1213 chan
->u
[2] = mach
->Predicates
[0].xyzw
[swizzle
].u
[2];
1214 chan
->u
[3] = mach
->Predicates
[0].xyzw
[swizzle
].u
[3];
1217 case TGSI_FILE_OUTPUT
:
1218 /* vertex/fragment output vars can be read too */
1219 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1220 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1221 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1222 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1237 const struct tgsi_exec_machine
*mach
,
1238 union tgsi_exec_channel
*chan
,
1239 const struct tgsi_full_src_register
*reg
,
1240 const uint chan_index
)
1242 union tgsi_exec_channel index
;
1245 /* We start with a direct index into a register file.
1249 * file = Register.File
1250 * [1] = Register.Index
1255 index
.i
[3] = reg
->Register
.Index
;
1257 /* There is an extra source register that indirectly subscripts
1258 * a register file. The direct index now becomes an offset
1259 * that is being added to the indirect register.
1263 * ind = Indirect.File
1264 * [2] = Indirect.Index
1265 * .x = Indirect.SwizzleX
1267 if (reg
->Register
.Indirect
) {
1268 union tgsi_exec_channel index2
;
1269 union tgsi_exec_channel indir_index
;
1270 const uint execmask
= mach
->ExecMask
;
1273 /* which address register (always zero now) */
1277 index2
.i
[3] = reg
->Indirect
.Index
;
1279 /* get current value of address register[swizzle] */
1280 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1281 fetch_src_file_channel(
1288 /* add value of address register to the offset */
1289 index
.i
[0] += (int) indir_index
.f
[0];
1290 index
.i
[1] += (int) indir_index
.f
[1];
1291 index
.i
[2] += (int) indir_index
.f
[2];
1292 index
.i
[3] += (int) indir_index
.f
[3];
1294 /* for disabled execution channels, zero-out the index to
1295 * avoid using a potential garbage value.
1297 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1298 if ((execmask
& (1 << i
)) == 0)
1303 /* There is an extra source register that is a second
1304 * subscript to a register file. Effectively it means that
1305 * the register file is actually a 2D array of registers.
1307 * file[1][3] == file[1*sizeof(file[1])+3],
1309 * [3] = Dimension.Index
1311 if (reg
->Register
.Dimension
) {
1312 /* The size of the first-order array depends on the register file type.
1313 * We need to multiply the index to the first array to get an effective,
1314 * "flat" index that points to the beginning of the second-order array.
1316 switch (reg
->Register
.File
) {
1317 case TGSI_FILE_INPUT
:
1318 case TGSI_FILE_SYSTEM_VALUE
:
1319 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1320 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1321 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1322 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1324 case TGSI_FILE_CONSTANT
:
1325 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1326 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1327 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1328 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1334 index
.i
[0] += reg
->Dimension
.Index
;
1335 index
.i
[1] += reg
->Dimension
.Index
;
1336 index
.i
[2] += reg
->Dimension
.Index
;
1337 index
.i
[3] += reg
->Dimension
.Index
;
1339 /* Again, the second subscript index can be addressed indirectly
1340 * identically to the first one.
1341 * Nothing stops us from indirectly addressing the indirect register,
1342 * but there is no need for that, so we won't exercise it.
1344 * file[1][ind[4].y+3],
1346 * ind = DimIndirect.File
1347 * [4] = DimIndirect.Index
1348 * .y = DimIndirect.SwizzleX
1350 if (reg
->Dimension
.Indirect
) {
1351 union tgsi_exec_channel index2
;
1352 union tgsi_exec_channel indir_index
;
1353 const uint execmask
= mach
->ExecMask
;
1359 index2
.i
[3] = reg
->DimIndirect
.Index
;
1361 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, CHAN_X
);
1362 fetch_src_file_channel(
1364 reg
->DimIndirect
.File
,
1369 index
.i
[0] += (int) indir_index
.f
[0];
1370 index
.i
[1] += (int) indir_index
.f
[1];
1371 index
.i
[2] += (int) indir_index
.f
[2];
1372 index
.i
[3] += (int) indir_index
.f
[3];
1374 /* for disabled execution channels, zero-out the index to
1375 * avoid using a potential garbage value.
1377 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1378 if ((execmask
& (1 << i
)) == 0)
1383 /* If by any chance there was a need for a 3D array of register
1384 * files, we would have to check whether Dimension is followed
1385 * by a dimension register and continue the saga.
1389 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1390 fetch_src_file_channel(
1397 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1398 case TGSI_UTIL_SIGN_CLEAR
:
1399 micro_abs( chan
, chan
);
1402 case TGSI_UTIL_SIGN_SET
:
1403 micro_abs( chan
, chan
);
1404 micro_neg( chan
, chan
);
1407 case TGSI_UTIL_SIGN_TOGGLE
:
1408 micro_neg( chan
, chan
);
1411 case TGSI_UTIL_SIGN_KEEP
:
1418 struct tgsi_exec_machine
*mach
,
1419 const union tgsi_exec_channel
*chan
,
1420 const struct tgsi_full_dst_register
*reg
,
1421 const struct tgsi_full_instruction
*inst
,
1425 union tgsi_exec_channel null
;
1426 union tgsi_exec_channel
*dst
;
1427 uint execmask
= mach
->ExecMask
;
1428 int offset
= 0; /* indirection offset */
1432 check_inf_or_nan(chan
);
1435 /* There is an extra source register that indirectly subscripts
1436 * a register file. The direct index now becomes an offset
1437 * that is being added to the indirect register.
1441 * ind = Indirect.File
1442 * [2] = Indirect.Index
1443 * .x = Indirect.SwizzleX
1445 if (reg
->Register
.Indirect
) {
1446 union tgsi_exec_channel index
;
1447 union tgsi_exec_channel indir_index
;
1450 /* which address register (always zero for now) */
1454 index
.i
[3] = reg
->Indirect
.Index
;
1456 /* get current value of address register[swizzle] */
1457 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1459 /* fetch values from the address/indirection register */
1460 fetch_src_file_channel(
1467 /* save indirection offset */
1468 offset
= (int) indir_index
.f
[0];
1471 switch (reg
->Register
.File
) {
1472 case TGSI_FILE_NULL
:
1476 case TGSI_FILE_OUTPUT
:
1477 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1478 + reg
->Register
.Index
;
1479 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1481 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1482 fprintf(stderr
, "STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1483 for (i
= 0; i
< QUAD_SIZE
; i
++)
1484 if (execmask
& (1 << i
))
1485 fprintf(stderr
, "%f, ", chan
->f
[i
]);
1486 fprintf(stderr
, ")\n");
1491 case TGSI_FILE_TEMPORARY
:
1492 index
= reg
->Register
.Index
;
1493 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1494 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1497 case TGSI_FILE_ADDRESS
:
1498 index
= reg
->Register
.Index
;
1499 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1502 case TGSI_FILE_LOOP
:
1503 assert(reg
->Register
.Index
== 0);
1504 assert(mach
->LoopCounterStackTop
> 0);
1505 assert(chan_index
== CHAN_X
);
1506 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1509 case TGSI_FILE_PREDICATE
:
1510 index
= reg
->Register
.Index
;
1511 assert(index
< TGSI_EXEC_NUM_PREDS
);
1512 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1520 if (inst
->Instruction
.Predicate
) {
1522 union tgsi_exec_channel
*pred
;
1524 switch (chan_index
) {
1526 swizzle
= inst
->Predicate
.SwizzleX
;
1529 swizzle
= inst
->Predicate
.SwizzleY
;
1532 swizzle
= inst
->Predicate
.SwizzleZ
;
1535 swizzle
= inst
->Predicate
.SwizzleW
;
1542 assert(inst
->Predicate
.Index
== 0);
1544 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1546 if (inst
->Predicate
.Negate
) {
1547 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1549 execmask
&= ~(1 << i
);
1553 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1555 execmask
&= ~(1 << i
);
1561 switch (inst
->Instruction
.Saturate
) {
1563 for (i
= 0; i
< QUAD_SIZE
; i
++)
1564 if (execmask
& (1 << i
))
1565 dst
->i
[i
] = chan
->i
[i
];
1568 case TGSI_SAT_ZERO_ONE
:
1569 for (i
= 0; i
< QUAD_SIZE
; i
++)
1570 if (execmask
& (1 << i
)) {
1571 if (chan
->f
[i
] < 0.0f
)
1573 else if (chan
->f
[i
] > 1.0f
)
1576 dst
->i
[i
] = chan
->i
[i
];
1580 case TGSI_SAT_MINUS_PLUS_ONE
:
1581 for (i
= 0; i
< QUAD_SIZE
; i
++)
1582 if (execmask
& (1 << i
)) {
1583 if (chan
->f
[i
] < -1.0f
)
1585 else if (chan
->f
[i
] > 1.0f
)
1588 dst
->i
[i
] = chan
->i
[i
];
1597 #define FETCH(VAL,INDEX,CHAN)\
1598 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
1600 #define STORE(VAL,INDEX,CHAN)\
1601 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
1605 * Execute ARB-style KIL which is predicated by a src register.
1606 * Kill fragment if any of the four values is less than zero.
1609 exec_kil(struct tgsi_exec_machine
*mach
,
1610 const struct tgsi_full_instruction
*inst
)
1614 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1615 union tgsi_exec_channel r
[1];
1617 /* This mask stores component bits that were already tested. */
1620 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1625 /* unswizzle channel */
1626 swizzle
= tgsi_util_get_full_src_register_swizzle (
1630 /* check if the component has not been already tested */
1631 if (uniquemask
& (1 << swizzle
))
1633 uniquemask
|= 1 << swizzle
;
1635 FETCH(&r
[0], 0, chan_index
);
1636 for (i
= 0; i
< 4; i
++)
1637 if (r
[0].f
[i
] < 0.0f
)
1641 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1645 * Execute NVIDIA-style KIL which is predicated by a condition code.
1646 * Kill fragment if the condition code is TRUE.
1649 exec_kilp(struct tgsi_exec_machine
*mach
,
1650 const struct tgsi_full_instruction
*inst
)
1652 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1654 /* "unconditional" kil */
1655 kilmask
= mach
->ExecMask
;
1656 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1660 emit_vertex(struct tgsi_exec_machine
*mach
)
1662 /* FIXME: check for exec mask correctly
1664 for (i = 0; i < QUAD_SIZE; ++i) {
1665 if ((mach->ExecMask & (1 << i)))
1667 if (mach
->ExecMask
) {
1668 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
1669 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
1674 emit_primitive(struct tgsi_exec_machine
*mach
)
1676 unsigned *prim_count
= &mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0];
1677 /* FIXME: check for exec mask correctly
1679 for (i = 0; i < QUAD_SIZE; ++i) {
1680 if ((mach->ExecMask & (1 << i)))
1682 if (mach
->ExecMask
) {
1684 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
1685 mach
->Primitives
[*prim_count
] = 0;
1690 * Fetch a four texture samples using STR texture coordinates.
1693 fetch_texel( struct tgsi_sampler
*sampler
,
1694 const union tgsi_exec_channel
*s
,
1695 const union tgsi_exec_channel
*t
,
1696 const union tgsi_exec_channel
*p
,
1697 float lodbias
, /* XXX should be float[4] */
1698 union tgsi_exec_channel
*r
,
1699 union tgsi_exec_channel
*g
,
1700 union tgsi_exec_channel
*b
,
1701 union tgsi_exec_channel
*a
)
1704 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1706 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1708 for (j
= 0; j
< 4; j
++) {
1709 r
->f
[j
] = rgba
[0][j
];
1710 g
->f
[j
] = rgba
[1][j
];
1711 b
->f
[j
] = rgba
[2][j
];
1712 a
->f
[j
] = rgba
[3][j
];
1718 exec_tex(struct tgsi_exec_machine
*mach
,
1719 const struct tgsi_full_instruction
*inst
,
1723 const uint unit
= inst
->Src
[1].Register
.Index
;
1724 union tgsi_exec_channel r
[4];
1728 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1730 switch (inst
->Texture
.Texture
) {
1731 case TGSI_TEXTURE_1D
:
1732 case TGSI_TEXTURE_SHADOW1D
:
1734 FETCH(&r
[0], 0, CHAN_X
);
1737 FETCH(&r
[1], 0, CHAN_W
);
1738 micro_div( &r
[0], &r
[0], &r
[1] );
1742 FETCH(&r
[1], 0, CHAN_W
);
1743 lodBias
= r
[2].f
[0];
1748 fetch_texel(mach
->Samplers
[unit
],
1749 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1750 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1753 case TGSI_TEXTURE_2D
:
1754 case TGSI_TEXTURE_RECT
:
1755 case TGSI_TEXTURE_SHADOW2D
:
1756 case TGSI_TEXTURE_SHADOWRECT
:
1758 FETCH(&r
[0], 0, CHAN_X
);
1759 FETCH(&r
[1], 0, CHAN_Y
);
1760 FETCH(&r
[2], 0, CHAN_Z
);
1763 FETCH(&r
[3], 0, CHAN_W
);
1764 micro_div( &r
[0], &r
[0], &r
[3] );
1765 micro_div( &r
[1], &r
[1], &r
[3] );
1766 micro_div( &r
[2], &r
[2], &r
[3] );
1770 FETCH(&r
[3], 0, CHAN_W
);
1771 lodBias
= r
[3].f
[0];
1776 fetch_texel(mach
->Samplers
[unit
],
1777 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1778 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1781 case TGSI_TEXTURE_3D
:
1782 case TGSI_TEXTURE_CUBE
:
1784 FETCH(&r
[0], 0, CHAN_X
);
1785 FETCH(&r
[1], 0, CHAN_Y
);
1786 FETCH(&r
[2], 0, CHAN_Z
);
1789 FETCH(&r
[3], 0, CHAN_W
);
1790 micro_div( &r
[0], &r
[0], &r
[3] );
1791 micro_div( &r
[1], &r
[1], &r
[3] );
1792 micro_div( &r
[2], &r
[2], &r
[3] );
1796 FETCH(&r
[3], 0, CHAN_W
);
1797 lodBias
= r
[3].f
[0];
1802 fetch_texel(mach
->Samplers
[unit
],
1803 &r
[0], &r
[1], &r
[2], lodBias
,
1804 &r
[0], &r
[1], &r
[2], &r
[3]);
1811 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1812 STORE( &r
[chan_index
], 0, chan_index
);
1817 exec_txd(struct tgsi_exec_machine
*mach
,
1818 const struct tgsi_full_instruction
*inst
)
1820 const uint unit
= inst
->Src
[3].Register
.Index
;
1821 union tgsi_exec_channel r
[4];
1825 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1828 switch (inst
->Texture
.Texture
) {
1829 case TGSI_TEXTURE_1D
:
1830 case TGSI_TEXTURE_SHADOW1D
:
1832 FETCH(&r
[0], 0, CHAN_X
);
1834 fetch_texel(mach
->Samplers
[unit
],
1835 &r
[0], &ZeroVec
, &ZeroVec
, 0.0f
, /* S, T, P, BIAS */
1836 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1839 case TGSI_TEXTURE_2D
:
1840 case TGSI_TEXTURE_RECT
:
1841 case TGSI_TEXTURE_SHADOW2D
:
1842 case TGSI_TEXTURE_SHADOWRECT
:
1844 FETCH(&r
[0], 0, CHAN_X
);
1845 FETCH(&r
[1], 0, CHAN_Y
);
1846 FETCH(&r
[2], 0, CHAN_Z
);
1848 fetch_texel(mach
->Samplers
[unit
],
1849 &r
[0], &r
[1], &r
[2], 0.0f
, /* inputs */
1850 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1853 case TGSI_TEXTURE_3D
:
1854 case TGSI_TEXTURE_CUBE
:
1856 FETCH(&r
[0], 0, CHAN_X
);
1857 FETCH(&r
[1], 0, CHAN_Y
);
1858 FETCH(&r
[2], 0, CHAN_Z
);
1860 fetch_texel(mach
->Samplers
[unit
],
1861 &r
[0], &r
[1], &r
[2], 0.0f
,
1862 &r
[0], &r
[1], &r
[2], &r
[3]);
1869 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1870 STORE(&r
[chan_index
], 0, chan_index
);
1876 * Evaluate a constant-valued coefficient at the position of the
1881 struct tgsi_exec_machine
*mach
,
1887 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1888 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1893 * Evaluate a linear-valued coefficient at the position of the
1898 struct tgsi_exec_machine
*mach
,
1902 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1903 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1904 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1905 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1906 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1907 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1908 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1909 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1910 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1914 * Evaluate a perspective-valued coefficient at the position of the
1918 eval_perspective_coef(
1919 struct tgsi_exec_machine
*mach
,
1923 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1924 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1925 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1926 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1927 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1928 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1929 /* divide by W here */
1930 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1931 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1932 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1933 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1937 typedef void (* eval_coef_func
)(
1938 struct tgsi_exec_machine
*mach
,
1943 exec_declaration(struct tgsi_exec_machine
*mach
,
1944 const struct tgsi_full_declaration
*decl
)
1946 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1947 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
||
1948 decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1949 uint first
, last
, mask
;
1951 first
= decl
->Range
.First
;
1952 last
= decl
->Range
.Last
;
1953 mask
= decl
->Declaration
.UsageMask
;
1955 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1956 assert(decl
->Semantic
.Index
== 0);
1957 assert(first
== last
);
1958 assert(mask
= TGSI_WRITEMASK_XYZW
);
1960 mach
->Inputs
[first
] = mach
->QuadPos
;
1961 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1964 assert(decl
->Semantic
.Index
== 0);
1965 assert(first
== last
);
1967 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1968 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1971 eval_coef_func eval
;
1974 switch (decl
->Declaration
.Interpolate
) {
1975 case TGSI_INTERPOLATE_CONSTANT
:
1976 eval
= eval_constant_coef
;
1979 case TGSI_INTERPOLATE_LINEAR
:
1980 eval
= eval_linear_coef
;
1983 case TGSI_INTERPOLATE_PERSPECTIVE
:
1984 eval
= eval_perspective_coef
;
1992 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1993 if (mask
& (1 << j
)) {
1994 for (i
= first
; i
<= last
; i
++) {
2006 struct tgsi_exec_machine
*mach
,
2007 const struct tgsi_full_instruction
*inst
,
2011 union tgsi_exec_channel r
[10];
2012 union tgsi_exec_channel d
[8];
2016 switch (inst
->Instruction
.Opcode
) {
2017 case TGSI_OPCODE_ARL
:
2018 case TGSI_OPCODE_FLR
:
2019 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2020 FETCH( &r
[0], 0, chan_index
);
2021 micro_flr(&d
[chan_index
], &r
[0]);
2023 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2024 STORE(&d
[chan_index
], 0, chan_index
);
2028 case TGSI_OPCODE_MOV
:
2029 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2030 FETCH(&d
[chan_index
], 0, chan_index
);
2032 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2033 STORE(&d
[chan_index
], 0, chan_index
);
2037 case TGSI_OPCODE_LIT
:
2038 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2039 FETCH( &r
[0], 0, CHAN_X
);
2040 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2041 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2044 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2045 FETCH( &r
[1], 0, CHAN_Y
);
2046 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2048 FETCH( &r
[2], 0, CHAN_W
);
2049 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
2050 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
2051 micro_pow( &r
[1], &r
[1], &r
[2] );
2052 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2055 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2056 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2058 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2059 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2062 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2063 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2065 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2066 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2070 case TGSI_OPCODE_RCP
:
2071 /* TGSI_OPCODE_RECIP */
2072 FETCH( &r
[0], 0, CHAN_X
);
2073 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2074 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2075 STORE( &r
[0], 0, chan_index
);
2079 case TGSI_OPCODE_RSQ
:
2080 /* TGSI_OPCODE_RECIPSQRT */
2081 FETCH( &r
[0], 0, CHAN_X
);
2082 micro_abs( &r
[0], &r
[0] );
2083 micro_sqrt( &r
[0], &r
[0] );
2084 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2085 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2086 STORE( &r
[0], 0, chan_index
);
2090 case TGSI_OPCODE_EXP
:
2091 FETCH( &r
[0], 0, CHAN_X
);
2092 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2093 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2094 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2095 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2097 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2098 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2099 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2101 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2102 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2103 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2105 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2106 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2110 case TGSI_OPCODE_LOG
:
2111 FETCH( &r
[0], 0, CHAN_X
);
2112 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2113 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2114 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2115 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2116 STORE( &r
[0], 0, CHAN_X
);
2118 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2119 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2120 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2121 STORE( &r
[0], 0, CHAN_Y
);
2123 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2124 STORE( &r
[1], 0, CHAN_Z
);
2126 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2127 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2131 case TGSI_OPCODE_MUL
:
2132 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2133 FETCH(&r
[0], 0, chan_index
);
2134 FETCH(&r
[1], 1, chan_index
);
2135 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2137 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2138 STORE(&d
[chan_index
], 0, chan_index
);
2142 case TGSI_OPCODE_ADD
:
2143 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2144 FETCH( &r
[0], 0, chan_index
);
2145 FETCH( &r
[1], 1, chan_index
);
2146 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2148 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2149 STORE(&d
[chan_index
], 0, chan_index
);
2153 case TGSI_OPCODE_DP3
:
2154 /* TGSI_OPCODE_DOT3 */
2155 FETCH( &r
[0], 0, CHAN_X
);
2156 FETCH( &r
[1], 1, CHAN_X
);
2157 micro_mul( &r
[0], &r
[0], &r
[1] );
2159 FETCH( &r
[1], 0, CHAN_Y
);
2160 FETCH( &r
[2], 1, CHAN_Y
);
2161 micro_mul( &r
[1], &r
[1], &r
[2] );
2162 micro_add( &r
[0], &r
[0], &r
[1] );
2164 FETCH( &r
[1], 0, CHAN_Z
);
2165 FETCH( &r
[2], 1, CHAN_Z
);
2166 micro_mul( &r
[1], &r
[1], &r
[2] );
2167 micro_add( &r
[0], &r
[0], &r
[1] );
2169 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2170 STORE( &r
[0], 0, chan_index
);
2174 case TGSI_OPCODE_DP4
:
2175 /* TGSI_OPCODE_DOT4 */
2176 FETCH(&r
[0], 0, CHAN_X
);
2177 FETCH(&r
[1], 1, CHAN_X
);
2179 micro_mul( &r
[0], &r
[0], &r
[1] );
2181 FETCH(&r
[1], 0, CHAN_Y
);
2182 FETCH(&r
[2], 1, CHAN_Y
);
2184 micro_mul( &r
[1], &r
[1], &r
[2] );
2185 micro_add( &r
[0], &r
[0], &r
[1] );
2187 FETCH(&r
[1], 0, CHAN_Z
);
2188 FETCH(&r
[2], 1, CHAN_Z
);
2190 micro_mul( &r
[1], &r
[1], &r
[2] );
2191 micro_add( &r
[0], &r
[0], &r
[1] );
2193 FETCH(&r
[1], 0, CHAN_W
);
2194 FETCH(&r
[2], 1, CHAN_W
);
2196 micro_mul( &r
[1], &r
[1], &r
[2] );
2197 micro_add( &r
[0], &r
[0], &r
[1] );
2199 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2200 STORE( &r
[0], 0, chan_index
);
2204 case TGSI_OPCODE_DST
:
2205 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2206 FETCH( &r
[0], 0, CHAN_Y
);
2207 FETCH( &r
[1], 1, CHAN_Y
);
2208 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2210 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2211 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2213 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2214 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2217 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2218 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2220 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2221 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2223 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2224 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2226 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2227 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2231 case TGSI_OPCODE_MIN
:
2232 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2233 FETCH(&r
[0], 0, chan_index
);
2234 FETCH(&r
[1], 1, chan_index
);
2236 /* XXX use micro_min()?? */
2237 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2239 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2240 STORE(&d
[chan_index
], 0, chan_index
);
2244 case TGSI_OPCODE_MAX
:
2245 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2246 FETCH(&r
[0], 0, chan_index
);
2247 FETCH(&r
[1], 1, chan_index
);
2249 /* XXX use micro_max()?? */
2250 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2252 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2253 STORE(&d
[chan_index
], 0, chan_index
);
2257 case TGSI_OPCODE_SLT
:
2258 /* TGSI_OPCODE_SETLT */
2259 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2260 FETCH( &r
[0], 0, chan_index
);
2261 FETCH( &r
[1], 1, chan_index
);
2262 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2264 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2265 STORE(&d
[chan_index
], 0, chan_index
);
2269 case TGSI_OPCODE_SGE
:
2270 /* TGSI_OPCODE_SETGE */
2271 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2272 FETCH( &r
[0], 0, chan_index
);
2273 FETCH( &r
[1], 1, chan_index
);
2274 micro_le(&d
[chan_index
], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2276 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2277 STORE(&d
[chan_index
], 0, chan_index
);
2281 case TGSI_OPCODE_MAD
:
2282 /* TGSI_OPCODE_MADD */
2283 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2284 FETCH( &r
[0], 0, chan_index
);
2285 FETCH( &r
[1], 1, chan_index
);
2286 micro_mul( &r
[0], &r
[0], &r
[1] );
2287 FETCH( &r
[1], 2, chan_index
);
2288 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2290 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2291 STORE(&d
[chan_index
], 0, chan_index
);
2295 case TGSI_OPCODE_SUB
:
2296 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2297 FETCH(&r
[0], 0, chan_index
);
2298 FETCH(&r
[1], 1, chan_index
);
2299 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2301 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2302 STORE(&d
[chan_index
], 0, chan_index
);
2306 case TGSI_OPCODE_LRP
:
2307 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2308 FETCH(&r
[0], 0, chan_index
);
2309 FETCH(&r
[1], 1, chan_index
);
2310 FETCH(&r
[2], 2, chan_index
);
2311 micro_sub( &r
[1], &r
[1], &r
[2] );
2312 micro_mul( &r
[0], &r
[0], &r
[1] );
2313 micro_add(&d
[chan_index
], &r
[0], &r
[2]);
2315 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2316 STORE(&d
[chan_index
], 0, chan_index
);
2320 case TGSI_OPCODE_CND
:
2321 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2322 FETCH(&r
[0], 0, chan_index
);
2323 FETCH(&r
[1], 1, chan_index
);
2324 FETCH(&r
[2], 2, chan_index
);
2325 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2327 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2328 STORE(&d
[chan_index
], 0, chan_index
);
2332 case TGSI_OPCODE_DP2A
:
2333 FETCH( &r
[0], 0, CHAN_X
);
2334 FETCH( &r
[1], 1, CHAN_X
);
2335 micro_mul( &r
[0], &r
[0], &r
[1] );
2337 FETCH( &r
[1], 0, CHAN_Y
);
2338 FETCH( &r
[2], 1, CHAN_Y
);
2339 micro_mul( &r
[1], &r
[1], &r
[2] );
2340 micro_add( &r
[0], &r
[0], &r
[1] );
2342 FETCH( &r
[2], 2, CHAN_X
);
2343 micro_add( &r
[0], &r
[0], &r
[2] );
2345 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2346 STORE( &r
[0], 0, chan_index
);
2350 case TGSI_OPCODE_FRC
:
2351 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2352 FETCH( &r
[0], 0, chan_index
);
2353 micro_frc(&d
[chan_index
], &r
[0]);
2355 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2356 STORE(&d
[chan_index
], 0, chan_index
);
2360 case TGSI_OPCODE_CLAMP
:
2361 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2362 FETCH(&r
[0], 0, chan_index
);
2363 FETCH(&r
[1], 1, chan_index
);
2364 micro_max(&r
[0], &r
[0], &r
[1]);
2365 FETCH(&r
[1], 2, chan_index
);
2366 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2368 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2369 STORE(&d
[chan_index
], 0, chan_index
);
2373 case TGSI_OPCODE_ROUND
:
2374 case TGSI_OPCODE_ARR
:
2375 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2376 FETCH( &r
[0], 0, chan_index
);
2377 micro_rnd(&d
[chan_index
], &r
[0]);
2379 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2380 STORE(&d
[chan_index
], 0, chan_index
);
2384 case TGSI_OPCODE_EX2
:
2385 FETCH(&r
[0], 0, CHAN_X
);
2387 micro_exp2( &r
[0], &r
[0] );
2389 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2390 STORE( &r
[0], 0, chan_index
);
2394 case TGSI_OPCODE_LG2
:
2395 FETCH( &r
[0], 0, CHAN_X
);
2396 micro_lg2( &r
[0], &r
[0] );
2397 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2398 STORE( &r
[0], 0, chan_index
);
2402 case TGSI_OPCODE_POW
:
2403 FETCH(&r
[0], 0, CHAN_X
);
2404 FETCH(&r
[1], 1, CHAN_X
);
2406 micro_pow( &r
[0], &r
[0], &r
[1] );
2408 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2409 STORE( &r
[0], 0, chan_index
);
2413 case TGSI_OPCODE_XPD
:
2414 FETCH(&r
[0], 0, CHAN_Y
);
2415 FETCH(&r
[1], 1, CHAN_Z
);
2417 micro_mul( &r
[2], &r
[0], &r
[1] );
2419 FETCH(&r
[3], 0, CHAN_Z
);
2420 FETCH(&r
[4], 1, CHAN_Y
);
2422 micro_mul( &r
[5], &r
[3], &r
[4] );
2423 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2425 FETCH(&r
[2], 1, CHAN_X
);
2427 micro_mul( &r
[3], &r
[3], &r
[2] );
2429 FETCH(&r
[5], 0, CHAN_X
);
2431 micro_mul( &r
[1], &r
[1], &r
[5] );
2432 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2434 micro_mul( &r
[5], &r
[5], &r
[4] );
2435 micro_mul( &r
[0], &r
[0], &r
[2] );
2436 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2438 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2439 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2441 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2442 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2444 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2445 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2447 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2448 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2452 case TGSI_OPCODE_ABS
:
2453 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2454 FETCH(&r
[0], 0, chan_index
);
2455 micro_abs(&d
[chan_index
], &r
[0]);
2457 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2458 STORE(&d
[chan_index
], 0, chan_index
);
2462 case TGSI_OPCODE_RCC
:
2463 FETCH(&r
[0], 0, CHAN_X
);
2464 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2465 micro_float_clamp(&r
[0], &r
[0]);
2466 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2467 STORE(&r
[0], 0, chan_index
);
2471 case TGSI_OPCODE_DPH
:
2472 FETCH(&r
[0], 0, CHAN_X
);
2473 FETCH(&r
[1], 1, CHAN_X
);
2475 micro_mul( &r
[0], &r
[0], &r
[1] );
2477 FETCH(&r
[1], 0, CHAN_Y
);
2478 FETCH(&r
[2], 1, CHAN_Y
);
2480 micro_mul( &r
[1], &r
[1], &r
[2] );
2481 micro_add( &r
[0], &r
[0], &r
[1] );
2483 FETCH(&r
[1], 0, CHAN_Z
);
2484 FETCH(&r
[2], 1, CHAN_Z
);
2486 micro_mul( &r
[1], &r
[1], &r
[2] );
2487 micro_add( &r
[0], &r
[0], &r
[1] );
2489 FETCH(&r
[1], 1, CHAN_W
);
2491 micro_add( &r
[0], &r
[0], &r
[1] );
2493 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2494 STORE( &r
[0], 0, chan_index
);
2498 case TGSI_OPCODE_COS
:
2499 FETCH(&r
[0], 0, CHAN_X
);
2501 micro_cos( &r
[0], &r
[0] );
2503 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2504 STORE( &r
[0], 0, chan_index
);
2508 case TGSI_OPCODE_DDX
:
2509 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2510 FETCH( &r
[0], 0, chan_index
);
2511 micro_ddx(&d
[chan_index
], &r
[0]);
2513 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2514 STORE(&d
[chan_index
], 0, chan_index
);
2518 case TGSI_OPCODE_DDY
:
2519 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2520 FETCH( &r
[0], 0, chan_index
);
2521 micro_ddy(&d
[chan_index
], &r
[0]);
2523 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2524 STORE(&d
[chan_index
], 0, chan_index
);
2528 case TGSI_OPCODE_KILP
:
2529 exec_kilp (mach
, inst
);
2532 case TGSI_OPCODE_KIL
:
2533 exec_kil (mach
, inst
);
2536 case TGSI_OPCODE_PK2H
:
2540 case TGSI_OPCODE_PK2US
:
2544 case TGSI_OPCODE_PK4B
:
2548 case TGSI_OPCODE_PK4UB
:
2552 case TGSI_OPCODE_RFL
:
2553 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2554 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2555 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2556 /* r0 = dp3(src0, src0) */
2557 FETCH(&r
[2], 0, CHAN_X
);
2558 micro_mul(&r
[0], &r
[2], &r
[2]);
2559 FETCH(&r
[4], 0, CHAN_Y
);
2560 micro_mul(&r
[8], &r
[4], &r
[4]);
2561 micro_add(&r
[0], &r
[0], &r
[8]);
2562 FETCH(&r
[6], 0, CHAN_Z
);
2563 micro_mul(&r
[8], &r
[6], &r
[6]);
2564 micro_add(&r
[0], &r
[0], &r
[8]);
2566 /* r1 = dp3(src0, src1) */
2567 FETCH(&r
[3], 1, CHAN_X
);
2568 micro_mul(&r
[1], &r
[2], &r
[3]);
2569 FETCH(&r
[5], 1, CHAN_Y
);
2570 micro_mul(&r
[8], &r
[4], &r
[5]);
2571 micro_add(&r
[1], &r
[1], &r
[8]);
2572 FETCH(&r
[7], 1, CHAN_Z
);
2573 micro_mul(&r
[8], &r
[6], &r
[7]);
2574 micro_add(&r
[1], &r
[1], &r
[8]);
2576 /* r1 = 2 * r1 / r0 */
2577 micro_add(&r
[1], &r
[1], &r
[1]);
2578 micro_div(&r
[1], &r
[1], &r
[0]);
2580 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2581 micro_mul(&r
[2], &r
[2], &r
[1]);
2582 micro_sub(&r
[2], &r
[2], &r
[3]);
2583 STORE(&r
[2], 0, CHAN_X
);
2585 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2586 micro_mul(&r
[4], &r
[4], &r
[1]);
2587 micro_sub(&r
[4], &r
[4], &r
[5]);
2588 STORE(&r
[4], 0, CHAN_Y
);
2590 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2591 micro_mul(&r
[6], &r
[6], &r
[1]);
2592 micro_sub(&r
[6], &r
[6], &r
[7]);
2593 STORE(&r
[6], 0, CHAN_Z
);
2596 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2597 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2601 case TGSI_OPCODE_SEQ
:
2602 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2603 FETCH( &r
[0], 0, chan_index
);
2604 FETCH( &r
[1], 1, chan_index
);
2605 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2607 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2608 STORE(&d
[chan_index
], 0, chan_index
);
2612 case TGSI_OPCODE_SFL
:
2613 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2614 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2618 case TGSI_OPCODE_SGT
:
2619 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2620 FETCH( &r
[0], 0, chan_index
);
2621 FETCH( &r
[1], 1, chan_index
);
2622 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2624 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2625 STORE(&d
[chan_index
], 0, chan_index
);
2629 case TGSI_OPCODE_SIN
:
2630 FETCH( &r
[0], 0, CHAN_X
);
2631 micro_sin( &r
[0], &r
[0] );
2632 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2633 STORE( &r
[0], 0, chan_index
);
2637 case TGSI_OPCODE_SLE
:
2638 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2639 FETCH( &r
[0], 0, chan_index
);
2640 FETCH( &r
[1], 1, chan_index
);
2641 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2643 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2644 STORE(&d
[chan_index
], 0, chan_index
);
2648 case TGSI_OPCODE_SNE
:
2649 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2650 FETCH( &r
[0], 0, chan_index
);
2651 FETCH( &r
[1], 1, chan_index
);
2652 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2654 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2655 STORE(&d
[chan_index
], 0, chan_index
);
2659 case TGSI_OPCODE_STR
:
2660 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2661 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2665 case TGSI_OPCODE_TEX
:
2666 /* simple texture lookup */
2667 /* src[0] = texcoord */
2668 /* src[1] = sampler unit */
2669 exec_tex(mach
, inst
, FALSE
, FALSE
);
2672 case TGSI_OPCODE_TXB
:
2673 /* Texture lookup with lod bias */
2674 /* src[0] = texcoord (src[0].w = LOD bias) */
2675 /* src[1] = sampler unit */
2676 exec_tex(mach
, inst
, TRUE
, FALSE
);
2679 case TGSI_OPCODE_TXD
:
2680 /* Texture lookup with explict partial derivatives */
2681 /* src[0] = texcoord */
2682 /* src[1] = d[strq]/dx */
2683 /* src[2] = d[strq]/dy */
2684 /* src[3] = sampler unit */
2685 exec_txd(mach
, inst
);
2688 case TGSI_OPCODE_TXL
:
2689 /* Texture lookup with explit LOD */
2690 /* src[0] = texcoord (src[0].w = LOD) */
2691 /* src[1] = sampler unit */
2692 exec_tex(mach
, inst
, TRUE
, FALSE
);
2695 case TGSI_OPCODE_TXP
:
2696 /* Texture lookup with projection */
2697 /* src[0] = texcoord (src[0].w = projection) */
2698 /* src[1] = sampler unit */
2699 exec_tex(mach
, inst
, FALSE
, TRUE
);
2702 case TGSI_OPCODE_UP2H
:
2706 case TGSI_OPCODE_UP2US
:
2710 case TGSI_OPCODE_UP4B
:
2714 case TGSI_OPCODE_UP4UB
:
2718 case TGSI_OPCODE_X2D
:
2719 FETCH(&r
[0], 1, CHAN_X
);
2720 FETCH(&r
[1], 1, CHAN_Y
);
2721 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2722 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2723 FETCH(&r
[2], 2, CHAN_X
);
2724 micro_mul(&r
[2], &r
[2], &r
[0]);
2725 FETCH(&r
[3], 2, CHAN_Y
);
2726 micro_mul(&r
[3], &r
[3], &r
[1]);
2727 micro_add(&r
[2], &r
[2], &r
[3]);
2728 FETCH(&r
[3], 0, CHAN_X
);
2729 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2732 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2733 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2734 FETCH(&r
[2], 2, CHAN_Z
);
2735 micro_mul(&r
[2], &r
[2], &r
[0]);
2736 FETCH(&r
[3], 2, CHAN_W
);
2737 micro_mul(&r
[3], &r
[3], &r
[1]);
2738 micro_add(&r
[2], &r
[2], &r
[3]);
2739 FETCH(&r
[3], 0, CHAN_Y
);
2740 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2743 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2744 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2746 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2747 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2749 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2750 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2752 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2753 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2757 case TGSI_OPCODE_ARA
:
2761 case TGSI_OPCODE_BRA
:
2765 case TGSI_OPCODE_CAL
:
2766 /* skip the call if no execution channels are enabled */
2767 if (mach
->ExecMask
) {
2770 /* First, record the depths of the execution stacks.
2771 * This is important for deeply nested/looped return statements.
2772 * We have to unwind the stacks by the correct amount. For a
2773 * real code generator, we could determine the number of entries
2774 * to pop off each stack with simple static analysis and avoid
2775 * implementing this data structure at run time.
2777 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2778 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2779 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2780 /* note that PC was already incremented above */
2781 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2783 mach
->CallStackTop
++;
2785 /* Second, push the Cond, Loop, Cont, Func stacks */
2786 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2787 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2788 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2789 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2790 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2791 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2792 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2793 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2795 /* Finally, jump to the subroutine */
2796 *pc
= inst
->Label
.Label
;
2800 case TGSI_OPCODE_RET
:
2801 mach
->FuncMask
&= ~mach
->ExecMask
;
2802 UPDATE_EXEC_MASK(mach
);
2804 if (mach
->FuncMask
== 0x0) {
2805 /* really return now (otherwise, keep executing */
2807 if (mach
->CallStackTop
== 0) {
2808 /* returning from main() */
2813 assert(mach
->CallStackTop
> 0);
2814 mach
->CallStackTop
--;
2816 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
2817 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
2819 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
2820 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
2822 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
2823 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
2825 assert(mach
->FuncStackTop
> 0);
2826 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2828 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
2830 UPDATE_EXEC_MASK(mach
);
2834 case TGSI_OPCODE_SSG
:
2835 /* TGSI_OPCODE_SGN */
2836 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2837 FETCH( &r
[0], 0, chan_index
);
2838 micro_sgn(&d
[chan_index
], &r
[0]);
2840 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2841 STORE(&d
[chan_index
], 0, chan_index
);
2845 case TGSI_OPCODE_CMP
:
2846 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2847 FETCH(&r
[0], 0, chan_index
);
2848 FETCH(&r
[1], 1, chan_index
);
2849 FETCH(&r
[2], 2, chan_index
);
2850 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
2852 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2853 STORE(&d
[chan_index
], 0, chan_index
);
2857 case TGSI_OPCODE_SCS
:
2858 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2859 FETCH( &r
[0], 0, CHAN_X
);
2860 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2861 micro_cos(&r
[1], &r
[0]);
2862 STORE(&r
[1], 0, CHAN_X
);
2864 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2865 micro_sin(&r
[1], &r
[0]);
2866 STORE(&r
[1], 0, CHAN_Y
);
2869 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2870 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2872 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2873 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2877 case TGSI_OPCODE_NRM
:
2878 /* 3-component vector normalize */
2879 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2880 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2881 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2882 /* r3 = sqrt(dp3(src0, src0)) */
2883 FETCH(&r
[0], 0, CHAN_X
);
2884 micro_mul(&r
[3], &r
[0], &r
[0]);
2885 FETCH(&r
[1], 0, CHAN_Y
);
2886 micro_mul(&r
[4], &r
[1], &r
[1]);
2887 micro_add(&r
[3], &r
[3], &r
[4]);
2888 FETCH(&r
[2], 0, CHAN_Z
);
2889 micro_mul(&r
[4], &r
[2], &r
[2]);
2890 micro_add(&r
[3], &r
[3], &r
[4]);
2891 micro_sqrt(&r
[3], &r
[3]);
2893 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2894 micro_div(&r
[0], &r
[0], &r
[3]);
2895 STORE(&r
[0], 0, CHAN_X
);
2897 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2898 micro_div(&r
[1], &r
[1], &r
[3]);
2899 STORE(&r
[1], 0, CHAN_Y
);
2901 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2902 micro_div(&r
[2], &r
[2], &r
[3]);
2903 STORE(&r
[2], 0, CHAN_Z
);
2906 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2907 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2911 case TGSI_OPCODE_NRM4
:
2912 /* 4-component vector normalize */
2914 union tgsi_exec_channel tmp
, dot
;
2916 /* tmp = dp4(src0, src0): */
2917 FETCH( &r
[0], 0, CHAN_X
);
2918 micro_mul( &tmp
, &r
[0], &r
[0] );
2920 FETCH( &r
[1], 0, CHAN_Y
);
2921 micro_mul( &dot
, &r
[1], &r
[1] );
2922 micro_add( &tmp
, &tmp
, &dot
);
2924 FETCH( &r
[2], 0, CHAN_Z
);
2925 micro_mul( &dot
, &r
[2], &r
[2] );
2926 micro_add( &tmp
, &tmp
, &dot
);
2928 FETCH( &r
[3], 0, CHAN_W
);
2929 micro_mul( &dot
, &r
[3], &r
[3] );
2930 micro_add( &tmp
, &tmp
, &dot
);
2932 /* tmp = 1 / sqrt(tmp) */
2933 micro_sqrt( &tmp
, &tmp
);
2934 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
2936 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2937 /* chan = chan * tmp */
2938 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
2939 STORE( &r
[chan_index
], 0, chan_index
);
2944 case TGSI_OPCODE_DIV
:
2948 case TGSI_OPCODE_DP2
:
2949 FETCH( &r
[0], 0, CHAN_X
);
2950 FETCH( &r
[1], 1, CHAN_X
);
2951 micro_mul( &r
[0], &r
[0], &r
[1] );
2953 FETCH( &r
[1], 0, CHAN_Y
);
2954 FETCH( &r
[2], 1, CHAN_Y
);
2955 micro_mul( &r
[1], &r
[1], &r
[2] );
2956 micro_add( &r
[0], &r
[0], &r
[1] );
2958 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2959 STORE( &r
[0], 0, chan_index
);
2963 case TGSI_OPCODE_IF
:
2965 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2966 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2967 FETCH( &r
[0], 0, CHAN_X
);
2968 /* update CondMask */
2970 mach
->CondMask
&= ~0x1;
2973 mach
->CondMask
&= ~0x2;
2976 mach
->CondMask
&= ~0x4;
2979 mach
->CondMask
&= ~0x8;
2981 UPDATE_EXEC_MASK(mach
);
2982 /* Todo: If CondMask==0, jump to ELSE */
2985 case TGSI_OPCODE_ELSE
:
2986 /* invert CondMask wrt previous mask */
2989 assert(mach
->CondStackTop
> 0);
2990 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2991 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2992 UPDATE_EXEC_MASK(mach
);
2993 /* Todo: If CondMask==0, jump to ENDIF */
2997 case TGSI_OPCODE_ENDIF
:
2999 assert(mach
->CondStackTop
> 0);
3000 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
3001 UPDATE_EXEC_MASK(mach
);
3004 case TGSI_OPCODE_END
:
3005 /* halt execution */
3009 case TGSI_OPCODE_REP
:
3013 case TGSI_OPCODE_ENDREP
:
3017 case TGSI_OPCODE_PUSHA
:
3021 case TGSI_OPCODE_POPA
:
3025 case TGSI_OPCODE_CEIL
:
3026 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3027 FETCH( &r
[0], 0, chan_index
);
3028 micro_ceil(&d
[chan_index
], &r
[0]);
3030 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3031 STORE(&d
[chan_index
], 0, chan_index
);
3035 case TGSI_OPCODE_I2F
:
3036 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3037 FETCH( &r
[0], 0, chan_index
);
3038 micro_i2f(&d
[chan_index
], &r
[0]);
3040 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3041 STORE(&d
[chan_index
], 0, chan_index
);
3045 case TGSI_OPCODE_NOT
:
3046 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3047 FETCH( &r
[0], 0, chan_index
);
3048 micro_not(&d
[chan_index
], &r
[0]);
3050 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3051 STORE(&d
[chan_index
], 0, chan_index
);
3055 case TGSI_OPCODE_TRUNC
:
3056 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3057 FETCH( &r
[0], 0, chan_index
);
3058 micro_trunc(&d
[chan_index
], &r
[0]);
3060 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3061 STORE(&d
[chan_index
], 0, chan_index
);
3065 case TGSI_OPCODE_SHL
:
3066 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3067 FETCH( &r
[0], 0, chan_index
);
3068 FETCH( &r
[1], 1, chan_index
);
3069 micro_shl(&d
[chan_index
], &r
[0], &r
[1]);
3071 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3072 STORE(&d
[chan_index
], 0, chan_index
);
3076 case TGSI_OPCODE_SHR
:
3077 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3078 FETCH( &r
[0], 0, chan_index
);
3079 FETCH( &r
[1], 1, chan_index
);
3080 micro_ishr(&d
[chan_index
], &r
[0], &r
[1]);
3082 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3083 STORE(&d
[chan_index
], 0, chan_index
);
3087 case TGSI_OPCODE_AND
:
3088 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3089 FETCH( &r
[0], 0, chan_index
);
3090 FETCH( &r
[1], 1, chan_index
);
3091 micro_and(&d
[chan_index
], &r
[0], &r
[1]);
3093 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3094 STORE(&d
[chan_index
], 0, chan_index
);
3098 case TGSI_OPCODE_OR
:
3099 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3100 FETCH( &r
[0], 0, chan_index
);
3101 FETCH( &r
[1], 1, chan_index
);
3102 micro_or(&d
[chan_index
], &r
[0], &r
[1]);
3104 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3105 STORE(&d
[chan_index
], 0, chan_index
);
3109 case TGSI_OPCODE_MOD
:
3113 case TGSI_OPCODE_XOR
:
3114 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3115 FETCH( &r
[0], 0, chan_index
);
3116 FETCH( &r
[1], 1, chan_index
);
3117 micro_xor(&d
[chan_index
], &r
[0], &r
[1]);
3119 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3120 STORE(&d
[chan_index
], 0, chan_index
);
3124 case TGSI_OPCODE_SAD
:
3128 case TGSI_OPCODE_TXF
:
3132 case TGSI_OPCODE_TXQ
:
3136 case TGSI_OPCODE_EMIT
:
3140 case TGSI_OPCODE_ENDPRIM
:
3141 emit_primitive(mach
);
3144 case TGSI_OPCODE_BGNFOR
:
3145 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3146 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3147 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3149 ++mach
->LoopCounterStackTop
;
3150 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3151 /* update LoopMask */
3152 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3153 mach
->LoopMask
&= ~0x1;
3155 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3156 mach
->LoopMask
&= ~0x2;
3158 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3159 mach
->LoopMask
&= ~0x4;
3161 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3162 mach
->LoopMask
&= ~0x8;
3164 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3165 UPDATE_EXEC_MASK(mach
);
3166 /* fall-through (for now) */
3167 case TGSI_OPCODE_BGNLOOP
:
3168 /* push LoopMask and ContMasks */
3169 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3170 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3171 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3172 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3173 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3174 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3177 case TGSI_OPCODE_ENDFOR
:
3178 assert(mach
->LoopCounterStackTop
> 0);
3179 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3180 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3181 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3182 /* update LoopMask */
3183 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3184 mach
->LoopMask
&= ~0x1;
3186 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3187 mach
->LoopMask
&= ~0x2;
3189 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3190 mach
->LoopMask
&= ~0x4;
3192 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3193 mach
->LoopMask
&= ~0x8;
3195 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3196 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3197 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3198 assert(mach
->LoopLabelStackTop
> 0);
3199 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3200 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3201 /* Restore ContMask, but don't pop */
3202 assert(mach
->ContStackTop
> 0);
3203 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3204 UPDATE_EXEC_MASK(mach
);
3205 if (mach
->ExecMask
) {
3206 /* repeat loop: jump to instruction just past BGNLOOP */
3207 assert(mach
->LoopLabelStackTop
> 0);
3208 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3211 /* exit loop: pop LoopMask */
3212 assert(mach
->LoopStackTop
> 0);
3213 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3215 assert(mach
->ContStackTop
> 0);
3216 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3217 assert(mach
->LoopLabelStackTop
> 0);
3218 --mach
->LoopLabelStackTop
;
3219 assert(mach
->LoopCounterStackTop
> 0);
3220 --mach
->LoopCounterStackTop
;
3222 UPDATE_EXEC_MASK(mach
);
3225 case TGSI_OPCODE_ENDLOOP
:
3226 /* Restore ContMask, but don't pop */
3227 assert(mach
->ContStackTop
> 0);
3228 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3229 UPDATE_EXEC_MASK(mach
);
3230 if (mach
->ExecMask
) {
3231 /* repeat loop: jump to instruction just past BGNLOOP */
3232 assert(mach
->LoopLabelStackTop
> 0);
3233 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3236 /* exit loop: pop LoopMask */
3237 assert(mach
->LoopStackTop
> 0);
3238 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3240 assert(mach
->ContStackTop
> 0);
3241 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3242 assert(mach
->LoopLabelStackTop
> 0);
3243 --mach
->LoopLabelStackTop
;
3245 UPDATE_EXEC_MASK(mach
);
3248 case TGSI_OPCODE_BRK
:
3249 /* turn off loop channels for each enabled exec channel */
3250 mach
->LoopMask
&= ~mach
->ExecMask
;
3251 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3252 UPDATE_EXEC_MASK(mach
);
3255 case TGSI_OPCODE_CONT
:
3256 /* turn off cont channels for each enabled exec channel */
3257 mach
->ContMask
&= ~mach
->ExecMask
;
3258 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3259 UPDATE_EXEC_MASK(mach
);
3262 case TGSI_OPCODE_BGNSUB
:
3266 case TGSI_OPCODE_ENDSUB
:
3268 * XXX: This really should be a no-op. We should never reach this opcode.
3271 assert(mach
->CallStackTop
> 0);
3272 mach
->CallStackTop
--;
3274 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3275 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3277 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3278 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3280 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3281 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3283 assert(mach
->FuncStackTop
> 0);
3284 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3286 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3288 UPDATE_EXEC_MASK(mach
);
3291 case TGSI_OPCODE_NOP
:
3294 case TGSI_OPCODE_BREAKC
:
3295 FETCH(&r
[0], 0, CHAN_X
);
3296 /* update CondMask */
3297 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
3298 mach
->LoopMask
&= ~0x1;
3300 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
3301 mach
->LoopMask
&= ~0x2;
3303 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
3304 mach
->LoopMask
&= ~0x4;
3306 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
3307 mach
->LoopMask
&= ~0x8;
3309 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3310 UPDATE_EXEC_MASK(mach
);
3318 #define DEBUG_EXECUTION 0
3322 * Run TGSI interpreter.
3323 * \return bitmask of "alive" quad components
3326 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3331 mach
->CondMask
= 0xf;
3332 mach
->LoopMask
= 0xf;
3333 mach
->ContMask
= 0xf;
3334 mach
->FuncMask
= 0xf;
3335 mach
->ExecMask
= 0xf;
3337 assert(mach
->CondStackTop
== 0);
3338 assert(mach
->LoopStackTop
== 0);
3339 assert(mach
->ContStackTop
== 0);
3340 assert(mach
->CallStackTop
== 0);
3342 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3343 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3345 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3346 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3347 mach
->Primitives
[0] = 0;
3350 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3351 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3352 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3353 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3354 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3355 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3358 /* execute declarations (interpolants) */
3359 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3360 exec_declaration( mach
, mach
->Declarations
+i
);
3365 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3366 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3369 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3370 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3373 /* execute instructions, until pc is set to -1 */
3379 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3382 assert(pc
< (int) mach
->NumInstructions
);
3383 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3386 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3387 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3390 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3391 debug_printf("TEMP[%2u] = ", i
);
3392 for (j
= 0; j
< 4; j
++) {
3396 debug_printf("(%6f, %6f, %6f, %6f)\n",
3397 temps
[i
].xyzw
[0].f
[j
],
3398 temps
[i
].xyzw
[1].f
[j
],
3399 temps
[i
].xyzw
[2].f
[j
],
3400 temps
[i
].xyzw
[3].f
[j
]);
3404 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3405 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3408 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3409 debug_printf("OUT[%2u] = ", i
);
3410 for (j
= 0; j
< 4; j
++) {
3414 debug_printf("{%6f, %6f, %6f, %6f}\n",
3415 outputs
[i
].xyzw
[0].f
[j
],
3416 outputs
[i
].xyzw
[1].f
[j
],
3417 outputs
[i
].xyzw
[2].f
[j
],
3418 outputs
[i
].xyzw
[3].f
[j
]);
3427 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3428 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3430 * Scale back depth component.
3432 for (i
= 0; i
< 4; i
++)
3433 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3437 assert(mach
->CondStackTop
== 0);
3438 assert(mach
->LoopStackTop
== 0);
3439 assert(mach
->ContStackTop
== 0);
3440 assert(mach
->CallStackTop
== 0);
3442 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];