1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
127 static const union tgsi_exec_channel ZeroVec
=
128 { { 0.0, 0.0, 0.0, 0.0 } };
133 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
135 assert(!util_is_inf_or_nan(chan
->f
[0]));
136 assert(!util_is_inf_or_nan(chan
->f
[1]));
137 assert(!util_is_inf_or_nan(chan
->f
[2]));
138 assert(!util_is_inf_or_nan(chan
->f
[3]));
145 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
147 debug_printf("%s = {%f, %f, %f, %f}\n",
148 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
155 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
157 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
159 debug_printf("Temp[%u] =\n", index
);
160 for (i
= 0; i
< 4; i
++) {
161 debug_printf(" %c: { %f, %f, %f, %f }\n",
174 * Initialize machine state by expanding tokens to full instructions,
175 * allocating temporary storage, setting up constants, etc.
176 * After this, we can call tgsi_exec_machine_run() many times.
179 tgsi_exec_machine_bind_shader(
180 struct tgsi_exec_machine
*mach
,
181 const struct tgsi_token
*tokens
,
183 struct tgsi_sampler
**samplers
)
186 struct tgsi_parse_context parse
;
187 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
188 struct tgsi_full_instruction
*instructions
;
189 struct tgsi_full_declaration
*declarations
;
190 uint maxInstructions
= 10, numInstructions
= 0;
191 uint maxDeclarations
= 10, numDeclarations
= 0;
195 tgsi_dump(tokens
, 0);
200 mach
->Tokens
= tokens
;
201 mach
->Samplers
= samplers
;
203 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
204 if (k
!= TGSI_PARSE_OK
) {
205 debug_printf( "Problem parsing!\n" );
209 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
213 declarations
= (struct tgsi_full_declaration
*)
214 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
220 instructions
= (struct tgsi_full_instruction
*)
221 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
224 FREE( declarations
);
228 while( !tgsi_parse_end_of_tokens( &parse
) ) {
229 uint pointer
= parse
.Position
;
232 tgsi_parse_token( &parse
);
233 switch( parse
.FullToken
.Token
.Type
) {
234 case TGSI_TOKEN_TYPE_DECLARATION
:
235 /* save expanded declaration */
236 if (numDeclarations
== maxDeclarations
) {
237 declarations
= REALLOC(declarations
,
239 * sizeof(struct tgsi_full_declaration
),
240 (maxDeclarations
+ 10)
241 * sizeof(struct tgsi_full_declaration
));
242 maxDeclarations
+= 10;
244 memcpy(declarations
+ numDeclarations
,
245 &parse
.FullToken
.FullDeclaration
,
246 sizeof(declarations
[0]));
250 case TGSI_TOKEN_TYPE_IMMEDIATE
:
252 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
253 assert( size
% 4 == 0 );
254 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
256 for( i
= 0; i
< size
; i
++ ) {
257 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
258 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
260 mach
->ImmLimit
+= size
/ 4;
264 case TGSI_TOKEN_TYPE_INSTRUCTION
:
265 assert( labels
->count
< MAX_LABELS
);
267 labels
->labels
[labels
->count
][0] = instno
;
268 labels
->labels
[labels
->count
][1] = pointer
;
271 /* save expanded instruction */
272 if (numInstructions
== maxInstructions
) {
273 instructions
= REALLOC(instructions
,
275 * sizeof(struct tgsi_full_instruction
),
276 (maxInstructions
+ 10)
277 * sizeof(struct tgsi_full_instruction
));
278 maxInstructions
+= 10;
280 memcpy(instructions
+ numInstructions
,
281 &parse
.FullToken
.FullInstruction
,
282 sizeof(instructions
[0]));
290 tgsi_parse_free (&parse
);
292 if (mach
->Declarations
) {
293 FREE( mach
->Declarations
);
295 mach
->Declarations
= declarations
;
296 mach
->NumDeclarations
= numDeclarations
;
298 if (mach
->Instructions
) {
299 FREE( mach
->Instructions
);
301 mach
->Instructions
= instructions
;
302 mach
->NumInstructions
= numInstructions
;
307 tgsi_exec_machine_init(
308 struct tgsi_exec_machine
*mach
)
312 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
313 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
315 /* Setup constants. */
316 for( i
= 0; i
< 4; i
++ ) {
317 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
318 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
319 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
320 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
321 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
322 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
323 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
324 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
325 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
326 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
330 /* silence warnings */
338 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
340 if (mach
->Instructions
) {
341 FREE(mach
->Instructions
);
342 mach
->Instructions
= NULL
;
343 mach
->NumInstructions
= 0;
345 if (mach
->Declarations
) {
346 FREE(mach
->Declarations
);
347 mach
->Declarations
= NULL
;
348 mach
->NumDeclarations
= 0;
355 union tgsi_exec_channel
*dst
,
356 const union tgsi_exec_channel
*src
)
358 dst
->f
[0] = fabsf( src
->f
[0] );
359 dst
->f
[1] = fabsf( src
->f
[1] );
360 dst
->f
[2] = fabsf( src
->f
[2] );
361 dst
->f
[3] = fabsf( src
->f
[3] );
366 union tgsi_exec_channel
*dst
,
367 const union tgsi_exec_channel
*src0
,
368 const union tgsi_exec_channel
*src1
)
370 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
371 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
372 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
373 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
379 union tgsi_exec_channel
*dst
,
380 const union tgsi_exec_channel
*src0
,
381 const union tgsi_exec_channel
*src1
)
383 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
384 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
385 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
386 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
392 union tgsi_exec_channel
*dst
,
393 const union tgsi_exec_channel
*src0
,
394 const union tgsi_exec_channel
*src1
)
396 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
397 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
398 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
399 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
404 union tgsi_exec_channel
*dst
,
405 const union tgsi_exec_channel
*src
)
407 dst
->f
[0] = ceilf( src
->f
[0] );
408 dst
->f
[1] = ceilf( src
->f
[1] );
409 dst
->f
[2] = ceilf( src
->f
[2] );
410 dst
->f
[3] = ceilf( src
->f
[3] );
415 union tgsi_exec_channel
*dst
,
416 const union tgsi_exec_channel
*src
)
418 dst
->f
[0] = cosf( src
->f
[0] );
419 dst
->f
[1] = cosf( src
->f
[1] );
420 dst
->f
[2] = cosf( src
->f
[2] );
421 dst
->f
[3] = cosf( src
->f
[3] );
426 union tgsi_exec_channel
*dst
,
427 const union tgsi_exec_channel
*src
)
432 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
437 union tgsi_exec_channel
*dst
,
438 const union tgsi_exec_channel
*src
)
443 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
448 union tgsi_exec_channel
*dst
,
449 const union tgsi_exec_channel
*src0
,
450 const union tgsi_exec_channel
*src1
)
452 if (src1
->f
[0] != 0) {
453 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
455 if (src1
->f
[1] != 0) {
456 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
458 if (src1
->f
[2] != 0) {
459 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
461 if (src1
->f
[3] != 0) {
462 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
469 union tgsi_exec_channel
*dst
,
470 const union tgsi_exec_channel
*src0
,
471 const union tgsi_exec_channel
*src1
)
473 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
474 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
475 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
476 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
482 union tgsi_exec_channel
*dst
,
483 const union tgsi_exec_channel
*src0
,
484 const union tgsi_exec_channel
*src1
,
485 const union tgsi_exec_channel
*src2
,
486 const union tgsi_exec_channel
*src3
)
488 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
489 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
490 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
491 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
497 union tgsi_exec_channel
*dst
,
498 const union tgsi_exec_channel
*src0
,
499 const union tgsi_exec_channel
*src1
,
500 const union tgsi_exec_channel
*src2
,
501 const union tgsi_exec_channel
*src3
)
503 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
504 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
505 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
506 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
512 union tgsi_exec_channel
*dst
,
513 const union tgsi_exec_channel
*src
)
516 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
517 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
518 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
519 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
521 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
522 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
523 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
524 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
531 union tgsi_exec_channel
*dst
,
532 const union tgsi_exec_channel
*src
)
534 dst
->u
[0] = (uint
) src
->f
[0];
535 dst
->u
[1] = (uint
) src
->f
[1];
536 dst
->u
[2] = (uint
) src
->f
[2];
537 dst
->u
[3] = (uint
) src
->f
[3];
542 micro_float_clamp(union tgsi_exec_channel
*dst
,
543 const union tgsi_exec_channel
*src
)
547 for (i
= 0; i
< 4; i
++) {
548 if (src
->f
[i
] > 0.0f
) {
549 if (src
->f
[i
] > 1.884467e+019f
)
550 dst
->f
[i
] = 1.884467e+019f
;
551 else if (src
->f
[i
] < 5.42101e-020f
)
552 dst
->f
[i
] = 5.42101e-020f
;
554 dst
->f
[i
] = src
->f
[i
];
557 if (src
->f
[i
] < -1.884467e+019f
)
558 dst
->f
[i
] = -1.884467e+019f
;
559 else if (src
->f
[i
] > -5.42101e-020f
)
560 dst
->f
[i
] = -5.42101e-020f
;
562 dst
->f
[i
] = src
->f
[i
];
569 union tgsi_exec_channel
*dst
,
570 const union tgsi_exec_channel
*src
)
572 dst
->f
[0] = floorf( src
->f
[0] );
573 dst
->f
[1] = floorf( src
->f
[1] );
574 dst
->f
[2] = floorf( src
->f
[2] );
575 dst
->f
[3] = floorf( src
->f
[3] );
580 union tgsi_exec_channel
*dst
,
581 const union tgsi_exec_channel
*src
)
583 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
584 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
585 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
586 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
591 union tgsi_exec_channel
*dst
,
592 const union tgsi_exec_channel
*src
)
594 dst
->f
[0] = (float) src
->i
[0];
595 dst
->f
[1] = (float) src
->i
[1];
596 dst
->f
[2] = (float) src
->i
[2];
597 dst
->f
[3] = (float) src
->i
[3];
602 union tgsi_exec_channel
*dst
,
603 const union tgsi_exec_channel
*src
)
606 dst
->f
[0] = util_fast_log2( src
->f
[0] );
607 dst
->f
[1] = util_fast_log2( src
->f
[1] );
608 dst
->f
[2] = util_fast_log2( src
->f
[2] );
609 dst
->f
[3] = util_fast_log2( src
->f
[3] );
611 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
612 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
613 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
614 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
620 union tgsi_exec_channel
*dst
,
621 const union tgsi_exec_channel
*src0
,
622 const union tgsi_exec_channel
*src1
,
623 const union tgsi_exec_channel
*src2
,
624 const union tgsi_exec_channel
*src3
)
626 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
627 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
628 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
629 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
634 union tgsi_exec_channel
*dst
,
635 const union tgsi_exec_channel
*src0
,
636 const union tgsi_exec_channel
*src1
,
637 const union tgsi_exec_channel
*src2
,
638 const union tgsi_exec_channel
*src3
)
640 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
641 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
642 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
643 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
649 union tgsi_exec_channel
*dst
,
650 const union tgsi_exec_channel
*src0
,
651 const union tgsi_exec_channel
*src1
,
652 const union tgsi_exec_channel
*src2
,
653 const union tgsi_exec_channel
*src3
)
655 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
656 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
657 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
658 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
665 union tgsi_exec_channel
*dst
,
666 const union tgsi_exec_channel
*src0
,
667 const union tgsi_exec_channel
*src1
,
668 const union tgsi_exec_channel
*src2
,
669 const union tgsi_exec_channel
*src3
)
671 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
672 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
673 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
674 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
680 union tgsi_exec_channel
*dst
,
681 const union tgsi_exec_channel
*src0
,
682 const union tgsi_exec_channel
*src1
)
684 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
685 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
686 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
687 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
693 union tgsi_exec_channel
*dst
,
694 const union tgsi_exec_channel
*src0
,
695 const union tgsi_exec_channel
*src1
)
697 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
698 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
699 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
700 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
707 union tgsi_exec_channel
*dst
,
708 const union tgsi_exec_channel
*src0
,
709 const union tgsi_exec_channel
*src1
)
711 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
712 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
713 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
714 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
720 union tgsi_exec_channel
*dst
,
721 const union tgsi_exec_channel
*src0
,
722 const union tgsi_exec_channel
*src1
)
724 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
725 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
726 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
727 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
733 union tgsi_exec_channel
*dst
,
734 const union tgsi_exec_channel
*src0
,
735 const union tgsi_exec_channel
*src1
)
737 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
738 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
739 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
740 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
747 union tgsi_exec_channel
*dst
,
748 const union tgsi_exec_channel
*src0
,
749 const union tgsi_exec_channel
*src1
)
751 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
752 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
753 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
754 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
761 union tgsi_exec_channel
*dst
,
762 const union tgsi_exec_channel
*src0
,
763 const union tgsi_exec_channel
*src1
)
765 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
766 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
767 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
768 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
774 union tgsi_exec_channel
*dst
,
775 const union tgsi_exec_channel
*src0
,
776 const union tgsi_exec_channel
*src1
)
778 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
779 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
780 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
781 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
787 union tgsi_exec_channel
*dst
,
788 const union tgsi_exec_channel
*src0
,
789 const union tgsi_exec_channel
*src1
)
791 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
792 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
793 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
794 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
801 union tgsi_exec_channel
*dst0
,
802 union tgsi_exec_channel
*dst1
,
803 const union tgsi_exec_channel
*src0
,
804 const union tgsi_exec_channel
*src1
)
806 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
807 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
808 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
809 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
820 union tgsi_exec_channel
*dst0
,
821 union tgsi_exec_channel
*dst1
,
822 const union tgsi_exec_channel
*src0
,
823 const union tgsi_exec_channel
*src1
)
825 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
826 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
827 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
828 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
840 union tgsi_exec_channel
*dst
,
841 const union tgsi_exec_channel
*src0
,
842 const union tgsi_exec_channel
*src1
,
843 const union tgsi_exec_channel
*src2
)
845 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
846 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
847 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
848 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
854 union tgsi_exec_channel
*dst
,
855 const union tgsi_exec_channel
*src
)
857 dst
->f
[0] = -src
->f
[0];
858 dst
->f
[1] = -src
->f
[1];
859 dst
->f
[2] = -src
->f
[2];
860 dst
->f
[3] = -src
->f
[3];
866 union tgsi_exec_channel
*dst
,
867 const union tgsi_exec_channel
*src
)
869 dst
->i
[0] = -src
->i
[0];
870 dst
->i
[1] = -src
->i
[1];
871 dst
->i
[2] = -src
->i
[2];
872 dst
->i
[3] = -src
->i
[3];
878 union tgsi_exec_channel
*dst
,
879 const union tgsi_exec_channel
*src
)
881 dst
->u
[0] = ~src
->u
[0];
882 dst
->u
[1] = ~src
->u
[1];
883 dst
->u
[2] = ~src
->u
[2];
884 dst
->u
[3] = ~src
->u
[3];
889 union tgsi_exec_channel
*dst
,
890 const union tgsi_exec_channel
*src0
,
891 const union tgsi_exec_channel
*src1
)
893 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
894 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
895 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
896 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
901 union tgsi_exec_channel
*dst
,
902 const union tgsi_exec_channel
*src0
,
903 const union tgsi_exec_channel
*src1
)
906 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
907 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
908 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
909 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
911 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
912 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
913 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
914 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
920 union tgsi_exec_channel
*dst
,
921 const union tgsi_exec_channel
*src
)
923 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
924 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
925 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
926 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
931 union tgsi_exec_channel
*dst
,
932 const union tgsi_exec_channel
*src
)
934 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
935 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
936 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
937 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
942 union tgsi_exec_channel
*dst
,
943 const union tgsi_exec_channel
*src0
,
944 const union tgsi_exec_channel
*src1
)
946 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
947 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
948 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
949 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
954 union tgsi_exec_channel
*dst
,
955 const union tgsi_exec_channel
*src0
,
956 const union tgsi_exec_channel
*src1
)
958 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
959 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
960 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
961 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
966 union tgsi_exec_channel
*dst
,
967 const union tgsi_exec_channel
*src0
)
969 dst
->f
[0] = (float) (int) src0
->f
[0];
970 dst
->f
[1] = (float) (int) src0
->f
[1];
971 dst
->f
[2] = (float) (int) src0
->f
[2];
972 dst
->f
[3] = (float) (int) src0
->f
[3];
978 union tgsi_exec_channel
*dst
,
979 const union tgsi_exec_channel
*src0
,
980 const union tgsi_exec_channel
*src1
)
982 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
983 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
984 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
985 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
991 union tgsi_exec_channel
*dst
,
992 const union tgsi_exec_channel
*src
)
994 dst
->f
[0] = sinf( src
->f
[0] );
995 dst
->f
[1] = sinf( src
->f
[1] );
996 dst
->f
[2] = sinf( src
->f
[2] );
997 dst
->f
[3] = sinf( src
->f
[3] );
1001 micro_sqrt( union tgsi_exec_channel
*dst
,
1002 const union tgsi_exec_channel
*src
)
1004 dst
->f
[0] = sqrtf( src
->f
[0] );
1005 dst
->f
[1] = sqrtf( src
->f
[1] );
1006 dst
->f
[2] = sqrtf( src
->f
[2] );
1007 dst
->f
[3] = sqrtf( src
->f
[3] );
1012 union tgsi_exec_channel
*dst
,
1013 const union tgsi_exec_channel
*src0
,
1014 const union tgsi_exec_channel
*src1
)
1016 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1017 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1018 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1019 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1025 union tgsi_exec_channel
*dst
,
1026 const union tgsi_exec_channel
*src
)
1028 dst
->f
[0] = (float) src
->u
[0];
1029 dst
->f
[1] = (float) src
->u
[1];
1030 dst
->f
[2] = (float) src
->u
[2];
1031 dst
->f
[3] = (float) src
->u
[3];
1037 union tgsi_exec_channel
*dst
,
1038 const union tgsi_exec_channel
*src0
,
1039 const union tgsi_exec_channel
*src1
)
1041 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
1042 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
1043 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
1044 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
1048 fetch_src_file_channel(
1049 const struct tgsi_exec_machine
*mach
,
1052 const union tgsi_exec_channel
*index
,
1053 union tgsi_exec_channel
*chan
)
1056 case TGSI_EXTSWIZZLE_X
:
1057 case TGSI_EXTSWIZZLE_Y
:
1058 case TGSI_EXTSWIZZLE_Z
:
1059 case TGSI_EXTSWIZZLE_W
:
1061 case TGSI_FILE_CONSTANT
:
1062 assert(mach
->Consts
);
1063 if (index
->i
[0] < 0)
1066 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
1067 if (index
->i
[1] < 0)
1070 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
1071 if (index
->i
[2] < 0)
1074 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
1075 if (index
->i
[3] < 0)
1078 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
1081 case TGSI_FILE_INPUT
:
1082 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1083 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1084 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1085 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1088 case TGSI_FILE_TEMPORARY
:
1089 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
1090 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1091 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1092 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1093 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1096 case TGSI_FILE_IMMEDIATE
:
1097 assert( index
->i
[0] < (int) mach
->ImmLimit
);
1098 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
1099 assert( index
->i
[1] < (int) mach
->ImmLimit
);
1100 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
1101 assert( index
->i
[2] < (int) mach
->ImmLimit
);
1102 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
1103 assert( index
->i
[3] < (int) mach
->ImmLimit
);
1104 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
1107 case TGSI_FILE_ADDRESS
:
1108 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1109 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1110 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1111 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1114 case TGSI_FILE_OUTPUT
:
1115 /* vertex/fragment output vars can be read too */
1116 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
1117 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
1118 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
1119 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
1127 case TGSI_EXTSWIZZLE_ZERO
:
1128 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
1131 case TGSI_EXTSWIZZLE_ONE
:
1132 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
1142 const struct tgsi_exec_machine
*mach
,
1143 union tgsi_exec_channel
*chan
,
1144 const struct tgsi_full_src_register
*reg
,
1145 const uint chan_index
)
1147 union tgsi_exec_channel index
;
1150 /* We start with a direct index into a register file.
1154 * file = SrcRegister.File
1155 * [1] = SrcRegister.Index
1160 index
.i
[3] = reg
->SrcRegister
.Index
;
1162 /* There is an extra source register that indirectly subscripts
1163 * a register file. The direct index now becomes an offset
1164 * that is being added to the indirect register.
1168 * ind = SrcRegisterInd.File
1169 * [2] = SrcRegisterInd.Index
1170 * .x = SrcRegisterInd.SwizzleX
1172 if (reg
->SrcRegister
.Indirect
) {
1173 union tgsi_exec_channel index2
;
1174 union tgsi_exec_channel indir_index
;
1175 const uint execmask
= mach
->ExecMask
;
1178 /* which address register (always zero now) */
1182 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1184 /* get current value of address register[swizzle] */
1185 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1186 fetch_src_file_channel(
1188 reg
->SrcRegisterInd
.File
,
1193 /* add value of address register to the offset */
1194 index
.i
[0] += (int) indir_index
.f
[0];
1195 index
.i
[1] += (int) indir_index
.f
[1];
1196 index
.i
[2] += (int) indir_index
.f
[2];
1197 index
.i
[3] += (int) indir_index
.f
[3];
1199 /* for disabled execution channels, zero-out the index to
1200 * avoid using a potential garbage value.
1202 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1203 if ((execmask
& (1 << i
)) == 0)
1208 /* There is an extra source register that is a second
1209 * subscript to a register file. Effectively it means that
1210 * the register file is actually a 2D array of registers.
1212 * file[1][3] == file[1*sizeof(file[1])+3],
1214 * [3] = SrcRegisterDim.Index
1216 if (reg
->SrcRegister
.Dimension
) {
1217 /* The size of the first-order array depends on the register file type.
1218 * We need to multiply the index to the first array to get an effective,
1219 * "flat" index that points to the beginning of the second-order array.
1221 switch (reg
->SrcRegister
.File
) {
1222 case TGSI_FILE_INPUT
:
1223 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1224 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1225 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1226 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1228 case TGSI_FILE_CONSTANT
:
1229 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1230 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1231 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1232 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1238 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1239 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1240 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1241 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1243 /* Again, the second subscript index can be addressed indirectly
1244 * identically to the first one.
1245 * Nothing stops us from indirectly addressing the indirect register,
1246 * but there is no need for that, so we won't exercise it.
1248 * file[1][ind[4].y+3],
1250 * ind = SrcRegisterDimInd.File
1251 * [4] = SrcRegisterDimInd.Index
1252 * .y = SrcRegisterDimInd.SwizzleX
1254 if (reg
->SrcRegisterDim
.Indirect
) {
1255 union tgsi_exec_channel index2
;
1256 union tgsi_exec_channel indir_index
;
1257 const uint execmask
= mach
->ExecMask
;
1263 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1265 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1266 fetch_src_file_channel(
1268 reg
->SrcRegisterDimInd
.File
,
1273 index
.i
[0] += (int) indir_index
.f
[0];
1274 index
.i
[1] += (int) indir_index
.f
[1];
1275 index
.i
[2] += (int) indir_index
.f
[2];
1276 index
.i
[3] += (int) indir_index
.f
[3];
1278 /* for disabled execution channels, zero-out the index to
1279 * avoid using a potential garbage value.
1281 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1282 if ((execmask
& (1 << i
)) == 0)
1287 /* If by any chance there was a need for a 3D array of register
1288 * files, we would have to check whether SrcRegisterDim is followed
1289 * by a dimension register and continue the saga.
1293 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1294 fetch_src_file_channel(
1296 reg
->SrcRegister
.File
,
1301 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1302 case TGSI_UTIL_SIGN_CLEAR
:
1303 micro_abs( chan
, chan
);
1306 case TGSI_UTIL_SIGN_SET
:
1307 micro_abs( chan
, chan
);
1308 micro_neg( chan
, chan
);
1311 case TGSI_UTIL_SIGN_TOGGLE
:
1312 micro_neg( chan
, chan
);
1315 case TGSI_UTIL_SIGN_KEEP
:
1319 if (reg
->SrcRegisterExtMod
.Complement
) {
1320 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1326 struct tgsi_exec_machine
*mach
,
1327 const union tgsi_exec_channel
*chan
,
1328 const struct tgsi_full_dst_register
*reg
,
1329 const struct tgsi_full_instruction
*inst
,
1333 union tgsi_exec_channel null
;
1334 union tgsi_exec_channel
*dst
;
1335 uint execmask
= mach
->ExecMask
;
1338 check_inf_or_nan(chan
);
1341 switch (reg
->DstRegister
.File
) {
1342 case TGSI_FILE_NULL
:
1346 case TGSI_FILE_OUTPUT
:
1347 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1348 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1351 case TGSI_FILE_TEMPORARY
:
1352 assert( reg
->DstRegister
.Index
< TGSI_EXEC_NUM_TEMPS
);
1353 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1356 case TGSI_FILE_ADDRESS
:
1357 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1365 if (inst
->InstructionExtNv
.CondFlowEnable
) {
1366 union tgsi_exec_channel
*cc
= &mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
];
1372 /* Only CC0 supported.
1374 assert( inst
->InstructionExtNv
.CondFlowIndex
< 1 );
1376 switch (chan_index
) {
1378 swizzle
= inst
->InstructionExtNv
.CondSwizzleX
;
1381 swizzle
= inst
->InstructionExtNv
.CondSwizzleY
;
1384 swizzle
= inst
->InstructionExtNv
.CondSwizzleZ
;
1387 swizzle
= inst
->InstructionExtNv
.CondSwizzleW
;
1395 case TGSI_SWIZZLE_X
:
1396 shift
= TGSI_EXEC_CC_X_SHIFT
;
1397 mask
= TGSI_EXEC_CC_X_MASK
;
1399 case TGSI_SWIZZLE_Y
:
1400 shift
= TGSI_EXEC_CC_Y_SHIFT
;
1401 mask
= TGSI_EXEC_CC_Y_MASK
;
1403 case TGSI_SWIZZLE_Z
:
1404 shift
= TGSI_EXEC_CC_Z_SHIFT
;
1405 mask
= TGSI_EXEC_CC_Z_MASK
;
1407 case TGSI_SWIZZLE_W
:
1408 shift
= TGSI_EXEC_CC_W_SHIFT
;
1409 mask
= TGSI_EXEC_CC_W_MASK
;
1416 switch (inst
->InstructionExtNv
.CondMask
) {
1418 test
= ~(TGSI_EXEC_CC_GT
<< shift
) & mask
;
1419 for (i
= 0; i
< QUAD_SIZE
; i
++)
1420 if (cc
->u
[i
] & test
)
1421 execmask
&= ~(1 << i
);
1425 test
= ~(TGSI_EXEC_CC_EQ
<< shift
) & mask
;
1426 for (i
= 0; i
< QUAD_SIZE
; i
++)
1427 if (cc
->u
[i
] & test
)
1428 execmask
&= ~(1 << i
);
1432 test
= ~(TGSI_EXEC_CC_LT
<< shift
) & mask
;
1433 for (i
= 0; i
< QUAD_SIZE
; i
++)
1434 if (cc
->u
[i
] & test
)
1435 execmask
&= ~(1 << i
);
1439 test
= ~((TGSI_EXEC_CC_GT
| TGSI_EXEC_CC_EQ
) << shift
) & mask
;
1440 for (i
= 0; i
< QUAD_SIZE
; i
++)
1441 if (cc
->u
[i
] & test
)
1442 execmask
&= ~(1 << i
);
1446 test
= ~((TGSI_EXEC_CC_LT
| TGSI_EXEC_CC_EQ
) << shift
) & mask
;
1447 for (i
= 0; i
< QUAD_SIZE
; i
++)
1448 if (cc
->u
[i
] & test
)
1449 execmask
&= ~(1 << i
);
1453 test
= ~((TGSI_EXEC_CC_GT
| TGSI_EXEC_CC_LT
| TGSI_EXEC_CC_UN
) << shift
) & mask
;
1454 for (i
= 0; i
< QUAD_SIZE
; i
++)
1455 if (cc
->u
[i
] & test
)
1456 execmask
&= ~(1 << i
);
1463 for (i
= 0; i
< QUAD_SIZE
; i
++)
1464 execmask
&= ~(1 << i
);
1473 switch (inst
->Instruction
.Saturate
) {
1475 for (i
= 0; i
< QUAD_SIZE
; i
++)
1476 if (execmask
& (1 << i
))
1477 dst
->i
[i
] = chan
->i
[i
];
1480 case TGSI_SAT_ZERO_ONE
:
1481 for (i
= 0; i
< QUAD_SIZE
; i
++)
1482 if (execmask
& (1 << i
)) {
1483 if (chan
->f
[i
] < 0.0f
)
1485 else if (chan
->f
[i
] > 1.0f
)
1488 dst
->i
[i
] = chan
->i
[i
];
1492 case TGSI_SAT_MINUS_PLUS_ONE
:
1493 for (i
= 0; i
< QUAD_SIZE
; i
++)
1494 if (execmask
& (1 << i
)) {
1495 if (chan
->f
[i
] < -1.0f
)
1497 else if (chan
->f
[i
] > 1.0f
)
1500 dst
->i
[i
] = chan
->i
[i
];
1508 if (inst
->InstructionExtNv
.CondDstUpdate
) {
1509 union tgsi_exec_channel
*cc
= &mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
];
1513 /* Only CC0 supported.
1515 assert( inst
->InstructionExtNv
.CondDstIndex
< 1 );
1517 switch (chan_index
) {
1519 shift
= TGSI_EXEC_CC_X_SHIFT
;
1520 mask
= ~TGSI_EXEC_CC_X_MASK
;
1523 shift
= TGSI_EXEC_CC_Y_SHIFT
;
1524 mask
= ~TGSI_EXEC_CC_Y_MASK
;
1527 shift
= TGSI_EXEC_CC_Z_SHIFT
;
1528 mask
= ~TGSI_EXEC_CC_Z_MASK
;
1531 shift
= TGSI_EXEC_CC_W_SHIFT
;
1532 mask
= ~TGSI_EXEC_CC_W_MASK
;
1539 for (i
= 0; i
< QUAD_SIZE
; i
++)
1540 if (execmask
& (1 << i
)) {
1542 if (dst
->f
[i
] < 0.0f
)
1543 cc
->u
[i
] |= TGSI_EXEC_CC_LT
<< shift
;
1544 else if (dst
->f
[i
] > 0.0f
)
1545 cc
->u
[i
] |= TGSI_EXEC_CC_GT
<< shift
;
1546 else if (dst
->f
[i
] == 0.0f
)
1547 cc
->u
[i
] |= TGSI_EXEC_CC_EQ
<< shift
;
1549 cc
->u
[i
] |= TGSI_EXEC_CC_UN
<< shift
;
1554 #define FETCH(VAL,INDEX,CHAN)\
1555 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1557 #define STORE(VAL,INDEX,CHAN)\
1558 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1562 * Execute ARB-style KIL which is predicated by a src register.
1563 * Kill fragment if any of the four values is less than zero.
1566 exec_kil(struct tgsi_exec_machine
*mach
,
1567 const struct tgsi_full_instruction
*inst
)
1571 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1572 union tgsi_exec_channel r
[1];
1574 /* This mask stores component bits that were already tested. Note that
1575 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1577 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1579 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1584 /* unswizzle channel */
1585 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1586 &inst
->FullSrcRegisters
[0],
1589 /* check if the component has not been already tested */
1590 if (uniquemask
& (1 << swizzle
))
1592 uniquemask
|= 1 << swizzle
;
1594 FETCH(&r
[0], 0, chan_index
);
1595 for (i
= 0; i
< 4; i
++)
1596 if (r
[0].f
[i
] < 0.0f
)
1600 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1604 * Execute NVIDIA-style KIL which is predicated by a condition code.
1605 * Kill fragment if the condition code is TRUE.
1608 exec_kilp(struct tgsi_exec_machine
*mach
,
1609 const struct tgsi_full_instruction
*inst
)
1611 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1613 if (inst
->InstructionExtNv
.CondFlowEnable
) {
1619 swizzle
[0] = inst
->InstructionExtNv
.CondSwizzleX
;
1620 swizzle
[1] = inst
->InstructionExtNv
.CondSwizzleY
;
1621 swizzle
[2] = inst
->InstructionExtNv
.CondSwizzleZ
;
1622 swizzle
[3] = inst
->InstructionExtNv
.CondSwizzleW
;
1624 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1628 for (i
= 0; i
< 4; i
++) {
1629 /* TODO: evaluate the condition code */
1636 /* "unconditional" kil */
1637 kilmask
= mach
->ExecMask
;
1639 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1644 * Fetch a four texture samples using STR texture coordinates.
1647 fetch_texel( struct tgsi_sampler
*sampler
,
1648 const union tgsi_exec_channel
*s
,
1649 const union tgsi_exec_channel
*t
,
1650 const union tgsi_exec_channel
*p
,
1651 float lodbias
, /* XXX should be float[4] */
1652 union tgsi_exec_channel
*r
,
1653 union tgsi_exec_channel
*g
,
1654 union tgsi_exec_channel
*b
,
1655 union tgsi_exec_channel
*a
)
1658 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1660 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1662 for (j
= 0; j
< 4; j
++) {
1663 r
->f
[j
] = rgba
[0][j
];
1664 g
->f
[j
] = rgba
[1][j
];
1665 b
->f
[j
] = rgba
[2][j
];
1666 a
->f
[j
] = rgba
[3][j
];
1672 exec_tex(struct tgsi_exec_machine
*mach
,
1673 const struct tgsi_full_instruction
*inst
,
1677 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1678 union tgsi_exec_channel r
[4];
1682 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1684 switch (inst
->InstructionExtTexture
.Texture
) {
1685 case TGSI_TEXTURE_1D
:
1686 case TGSI_TEXTURE_SHADOW1D
:
1688 FETCH(&r
[0], 0, CHAN_X
);
1691 FETCH(&r
[1], 0, CHAN_W
);
1692 micro_div( &r
[0], &r
[0], &r
[1] );
1696 FETCH(&r
[1], 0, CHAN_W
);
1697 lodBias
= r
[2].f
[0];
1702 fetch_texel(mach
->Samplers
[unit
],
1703 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1704 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1707 case TGSI_TEXTURE_2D
:
1708 case TGSI_TEXTURE_RECT
:
1709 case TGSI_TEXTURE_SHADOW2D
:
1710 case TGSI_TEXTURE_SHADOWRECT
:
1712 FETCH(&r
[0], 0, CHAN_X
);
1713 FETCH(&r
[1], 0, CHAN_Y
);
1714 FETCH(&r
[2], 0, CHAN_Z
);
1717 FETCH(&r
[3], 0, CHAN_W
);
1718 micro_div( &r
[0], &r
[0], &r
[3] );
1719 micro_div( &r
[1], &r
[1], &r
[3] );
1720 micro_div( &r
[2], &r
[2], &r
[3] );
1724 FETCH(&r
[3], 0, CHAN_W
);
1725 lodBias
= r
[3].f
[0];
1730 fetch_texel(mach
->Samplers
[unit
],
1731 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1732 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1735 case TGSI_TEXTURE_3D
:
1736 case TGSI_TEXTURE_CUBE
:
1738 FETCH(&r
[0], 0, CHAN_X
);
1739 FETCH(&r
[1], 0, CHAN_Y
);
1740 FETCH(&r
[2], 0, CHAN_Z
);
1743 FETCH(&r
[3], 0, CHAN_W
);
1744 micro_div( &r
[0], &r
[0], &r
[3] );
1745 micro_div( &r
[1], &r
[1], &r
[3] );
1746 micro_div( &r
[2], &r
[2], &r
[3] );
1750 FETCH(&r
[3], 0, CHAN_W
);
1751 lodBias
= r
[3].f
[0];
1756 fetch_texel(mach
->Samplers
[unit
],
1757 &r
[0], &r
[1], &r
[2], lodBias
,
1758 &r
[0], &r
[1], &r
[2], &r
[3]);
1765 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1766 STORE( &r
[chan_index
], 0, chan_index
);
1772 * Evaluate a constant-valued coefficient at the position of the
1777 struct tgsi_exec_machine
*mach
,
1783 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1784 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1789 * Evaluate a linear-valued coefficient at the position of the
1794 struct tgsi_exec_machine
*mach
,
1798 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1799 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1800 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1801 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1802 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1803 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1804 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1805 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1806 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1810 * Evaluate a perspective-valued coefficient at the position of the
1814 eval_perspective_coef(
1815 struct tgsi_exec_machine
*mach
,
1819 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1820 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1821 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1822 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1823 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1824 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1825 /* divide by W here */
1826 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1827 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1828 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1829 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1833 typedef void (* eval_coef_func
)(
1834 struct tgsi_exec_machine
*mach
,
1840 struct tgsi_exec_machine
*mach
,
1841 const struct tgsi_full_declaration
*decl
)
1843 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1844 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1845 unsigned first
, last
, mask
;
1846 eval_coef_func eval
;
1848 first
= decl
->DeclarationRange
.First
;
1849 last
= decl
->DeclarationRange
.Last
;
1850 mask
= decl
->Declaration
.UsageMask
;
1852 switch( decl
->Declaration
.Interpolate
) {
1853 case TGSI_INTERPOLATE_CONSTANT
:
1854 eval
= eval_constant_coef
;
1857 case TGSI_INTERPOLATE_LINEAR
:
1858 eval
= eval_linear_coef
;
1861 case TGSI_INTERPOLATE_PERSPECTIVE
:
1862 eval
= eval_perspective_coef
;
1870 if( mask
== TGSI_WRITEMASK_XYZW
) {
1873 for( i
= first
; i
<= last
; i
++ ) {
1874 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1882 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1883 if( mask
& (1 << j
) ) {
1884 for( i
= first
; i
<= last
; i
++ ) {
1896 struct tgsi_exec_machine
*mach
,
1897 const struct tgsi_full_instruction
*inst
,
1901 union tgsi_exec_channel r
[10];
1905 switch (inst
->Instruction
.Opcode
) {
1906 case TGSI_OPCODE_ARL
:
1907 case TGSI_OPCODE_FLOOR
:
1908 /* TGSI_OPCODE_FLR */
1909 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1910 FETCH( &r
[0], 0, chan_index
);
1911 micro_flr( &r
[0], &r
[0] );
1912 STORE( &r
[0], 0, chan_index
);
1916 case TGSI_OPCODE_MOV
:
1917 case TGSI_OPCODE_SWZ
:
1918 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1919 FETCH( &r
[0], 0, chan_index
);
1920 STORE( &r
[0], 0, chan_index
);
1924 case TGSI_OPCODE_LIT
:
1925 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1926 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1929 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1930 FETCH( &r
[0], 0, CHAN_X
);
1931 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1932 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1933 STORE( &r
[0], 0, CHAN_Y
);
1936 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1937 FETCH( &r
[1], 0, CHAN_Y
);
1938 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1940 FETCH( &r
[2], 0, CHAN_W
);
1941 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1942 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1943 micro_pow( &r
[1], &r
[1], &r
[2] );
1944 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1945 STORE( &r
[0], 0, CHAN_Z
);
1949 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1950 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1954 case TGSI_OPCODE_RCP
:
1955 /* TGSI_OPCODE_RECIP */
1956 FETCH( &r
[0], 0, CHAN_X
);
1957 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1958 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1959 STORE( &r
[0], 0, chan_index
);
1963 case TGSI_OPCODE_RSQ
:
1964 /* TGSI_OPCODE_RECIPSQRT */
1965 FETCH( &r
[0], 0, CHAN_X
);
1966 micro_abs( &r
[0], &r
[0] );
1967 micro_sqrt( &r
[0], &r
[0] );
1968 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1969 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1970 STORE( &r
[0], 0, chan_index
);
1974 case TGSI_OPCODE_EXP
:
1975 FETCH( &r
[0], 0, CHAN_X
);
1976 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
1977 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1978 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
1979 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
1981 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1982 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
1983 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
1985 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1986 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
1987 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
1989 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1990 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1994 case TGSI_OPCODE_LOG
:
1995 FETCH( &r
[0], 0, CHAN_X
);
1996 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
1997 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
1998 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
1999 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2000 STORE( &r
[0], 0, CHAN_X
);
2002 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2003 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2004 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2005 STORE( &r
[0], 0, CHAN_Y
);
2007 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2008 STORE( &r
[1], 0, CHAN_Z
);
2010 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2011 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2015 case TGSI_OPCODE_MUL
:
2016 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
2018 FETCH(&r
[0], 0, chan_index
);
2019 FETCH(&r
[1], 1, chan_index
);
2021 micro_mul( &r
[0], &r
[0], &r
[1] );
2023 STORE(&r
[0], 0, chan_index
);
2027 case TGSI_OPCODE_ADD
:
2028 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2029 FETCH( &r
[0], 0, chan_index
);
2030 FETCH( &r
[1], 1, chan_index
);
2031 micro_add( &r
[0], &r
[0], &r
[1] );
2032 STORE( &r
[0], 0, chan_index
);
2036 case TGSI_OPCODE_DP3
:
2037 /* TGSI_OPCODE_DOT3 */
2038 FETCH( &r
[0], 0, CHAN_X
);
2039 FETCH( &r
[1], 1, CHAN_X
);
2040 micro_mul( &r
[0], &r
[0], &r
[1] );
2042 FETCH( &r
[1], 0, CHAN_Y
);
2043 FETCH( &r
[2], 1, CHAN_Y
);
2044 micro_mul( &r
[1], &r
[1], &r
[2] );
2045 micro_add( &r
[0], &r
[0], &r
[1] );
2047 FETCH( &r
[1], 0, CHAN_Z
);
2048 FETCH( &r
[2], 1, CHAN_Z
);
2049 micro_mul( &r
[1], &r
[1], &r
[2] );
2050 micro_add( &r
[0], &r
[0], &r
[1] );
2052 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2053 STORE( &r
[0], 0, chan_index
);
2057 case TGSI_OPCODE_DP4
:
2058 /* TGSI_OPCODE_DOT4 */
2059 FETCH(&r
[0], 0, CHAN_X
);
2060 FETCH(&r
[1], 1, CHAN_X
);
2062 micro_mul( &r
[0], &r
[0], &r
[1] );
2064 FETCH(&r
[1], 0, CHAN_Y
);
2065 FETCH(&r
[2], 1, CHAN_Y
);
2067 micro_mul( &r
[1], &r
[1], &r
[2] );
2068 micro_add( &r
[0], &r
[0], &r
[1] );
2070 FETCH(&r
[1], 0, CHAN_Z
);
2071 FETCH(&r
[2], 1, CHAN_Z
);
2073 micro_mul( &r
[1], &r
[1], &r
[2] );
2074 micro_add( &r
[0], &r
[0], &r
[1] );
2076 FETCH(&r
[1], 0, CHAN_W
);
2077 FETCH(&r
[2], 1, CHAN_W
);
2079 micro_mul( &r
[1], &r
[1], &r
[2] );
2080 micro_add( &r
[0], &r
[0], &r
[1] );
2082 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2083 STORE( &r
[0], 0, chan_index
);
2087 case TGSI_OPCODE_DST
:
2088 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2089 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2092 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2093 FETCH( &r
[0], 0, CHAN_Y
);
2094 FETCH( &r
[1], 1, CHAN_Y
);
2095 micro_mul( &r
[0], &r
[0], &r
[1] );
2096 STORE( &r
[0], 0, CHAN_Y
);
2099 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2100 FETCH( &r
[0], 0, CHAN_Z
);
2101 STORE( &r
[0], 0, CHAN_Z
);
2104 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2105 FETCH( &r
[0], 1, CHAN_W
);
2106 STORE( &r
[0], 0, CHAN_W
);
2110 case TGSI_OPCODE_MIN
:
2111 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2112 FETCH(&r
[0], 0, chan_index
);
2113 FETCH(&r
[1], 1, chan_index
);
2115 /* XXX use micro_min()?? */
2116 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
2118 STORE(&r
[0], 0, chan_index
);
2122 case TGSI_OPCODE_MAX
:
2123 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2124 FETCH(&r
[0], 0, chan_index
);
2125 FETCH(&r
[1], 1, chan_index
);
2127 /* XXX use micro_max()?? */
2128 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
2130 STORE(&r
[0], 0, chan_index
);
2134 case TGSI_OPCODE_SLT
:
2135 /* TGSI_OPCODE_SETLT */
2136 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2137 FETCH( &r
[0], 0, chan_index
);
2138 FETCH( &r
[1], 1, chan_index
);
2139 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2140 STORE( &r
[0], 0, chan_index
);
2144 case TGSI_OPCODE_SGE
:
2145 /* TGSI_OPCODE_SETGE */
2146 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2147 FETCH( &r
[0], 0, chan_index
);
2148 FETCH( &r
[1], 1, chan_index
);
2149 micro_le( &r
[0], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2150 STORE( &r
[0], 0, chan_index
);
2154 case TGSI_OPCODE_MAD
:
2155 /* TGSI_OPCODE_MADD */
2156 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2157 FETCH( &r
[0], 0, chan_index
);
2158 FETCH( &r
[1], 1, chan_index
);
2159 micro_mul( &r
[0], &r
[0], &r
[1] );
2160 FETCH( &r
[1], 2, chan_index
);
2161 micro_add( &r
[0], &r
[0], &r
[1] );
2162 STORE( &r
[0], 0, chan_index
);
2166 case TGSI_OPCODE_SUB
:
2167 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2168 FETCH(&r
[0], 0, chan_index
);
2169 FETCH(&r
[1], 1, chan_index
);
2171 micro_sub( &r
[0], &r
[0], &r
[1] );
2173 STORE(&r
[0], 0, chan_index
);
2177 case TGSI_OPCODE_LERP
:
2178 /* TGSI_OPCODE_LRP */
2179 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2180 FETCH(&r
[0], 0, chan_index
);
2181 FETCH(&r
[1], 1, chan_index
);
2182 FETCH(&r
[2], 2, chan_index
);
2184 micro_sub( &r
[1], &r
[1], &r
[2] );
2185 micro_mul( &r
[0], &r
[0], &r
[1] );
2186 micro_add( &r
[0], &r
[0], &r
[2] );
2188 STORE(&r
[0], 0, chan_index
);
2192 case TGSI_OPCODE_CND
:
2193 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2194 FETCH(&r
[0], 0, chan_index
);
2195 FETCH(&r
[1], 1, chan_index
);
2196 FETCH(&r
[2], 2, chan_index
);
2197 micro_lt(&r
[0], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2198 STORE(&r
[0], 0, chan_index
);
2202 case TGSI_OPCODE_CND0
:
2203 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2204 FETCH(&r
[0], 0, chan_index
);
2205 FETCH(&r
[1], 1, chan_index
);
2206 FETCH(&r
[2], 2, chan_index
);
2207 micro_le(&r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[2], &r
[0], &r
[1]);
2208 STORE(&r
[0], 0, chan_index
);
2212 case TGSI_OPCODE_DOT2ADD
:
2213 /* TGSI_OPCODE_DP2A */
2214 FETCH( &r
[0], 0, CHAN_X
);
2215 FETCH( &r
[1], 1, CHAN_X
);
2216 micro_mul( &r
[0], &r
[0], &r
[1] );
2218 FETCH( &r
[1], 0, CHAN_Y
);
2219 FETCH( &r
[2], 1, CHAN_Y
);
2220 micro_mul( &r
[1], &r
[1], &r
[2] );
2221 micro_add( &r
[0], &r
[0], &r
[1] );
2223 FETCH( &r
[2], 2, CHAN_X
);
2224 micro_add( &r
[0], &r
[0], &r
[2] );
2226 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2227 STORE( &r
[0], 0, chan_index
);
2231 case TGSI_OPCODE_INDEX
:
2232 /* XXX: considered for removal */
2236 case TGSI_OPCODE_NEGATE
:
2237 /* XXX: considered for removal */
2241 case TGSI_OPCODE_FRAC
:
2242 /* TGSI_OPCODE_FRC */
2243 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2244 FETCH( &r
[0], 0, chan_index
);
2245 micro_frc( &r
[0], &r
[0] );
2246 STORE( &r
[0], 0, chan_index
);
2250 case TGSI_OPCODE_CLAMP
:
2251 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2252 FETCH(&r
[0], 0, chan_index
);
2253 FETCH(&r
[1], 1, chan_index
);
2254 micro_max(&r
[0], &r
[0], &r
[1]);
2255 FETCH(&r
[1], 2, chan_index
);
2256 micro_min(&r
[0], &r
[0], &r
[1]);
2257 STORE(&r
[0], 0, chan_index
);
2261 case TGSI_OPCODE_ROUND
:
2262 case TGSI_OPCODE_ARR
:
2263 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2264 FETCH( &r
[0], 0, chan_index
);
2265 micro_rnd( &r
[0], &r
[0] );
2266 STORE( &r
[0], 0, chan_index
);
2270 case TGSI_OPCODE_EXPBASE2
:
2271 /* TGSI_OPCODE_EX2 */
2272 FETCH(&r
[0], 0, CHAN_X
);
2275 micro_exp2( &r
[0], &r
[0] );
2277 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
2280 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2281 STORE( &r
[0], 0, chan_index
);
2285 case TGSI_OPCODE_LOGBASE2
:
2286 /* TGSI_OPCODE_LG2 */
2287 FETCH( &r
[0], 0, CHAN_X
);
2288 micro_lg2( &r
[0], &r
[0] );
2289 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2290 STORE( &r
[0], 0, chan_index
);
2294 case TGSI_OPCODE_POWER
:
2295 /* TGSI_OPCODE_POW */
2296 FETCH(&r
[0], 0, CHAN_X
);
2297 FETCH(&r
[1], 1, CHAN_X
);
2299 micro_pow( &r
[0], &r
[0], &r
[1] );
2301 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2302 STORE( &r
[0], 0, chan_index
);
2306 case TGSI_OPCODE_CROSSPRODUCT
:
2307 /* TGSI_OPCODE_XPD */
2308 FETCH(&r
[0], 0, CHAN_Y
);
2309 FETCH(&r
[1], 1, CHAN_Z
);
2311 micro_mul( &r
[2], &r
[0], &r
[1] );
2313 FETCH(&r
[3], 0, CHAN_Z
);
2314 FETCH(&r
[4], 1, CHAN_Y
);
2316 micro_mul( &r
[5], &r
[3], &r
[4] );
2317 micro_sub( &r
[2], &r
[2], &r
[5] );
2319 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2320 STORE( &r
[2], 0, CHAN_X
);
2323 FETCH(&r
[2], 1, CHAN_X
);
2325 micro_mul( &r
[3], &r
[3], &r
[2] );
2327 FETCH(&r
[5], 0, CHAN_X
);
2329 micro_mul( &r
[1], &r
[1], &r
[5] );
2330 micro_sub( &r
[3], &r
[3], &r
[1] );
2332 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2333 STORE( &r
[3], 0, CHAN_Y
);
2336 micro_mul( &r
[5], &r
[5], &r
[4] );
2337 micro_mul( &r
[0], &r
[0], &r
[2] );
2338 micro_sub( &r
[5], &r
[5], &r
[0] );
2340 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2341 STORE( &r
[5], 0, CHAN_Z
);
2344 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2345 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2349 case TGSI_OPCODE_MULTIPLYMATRIX
:
2350 /* XXX: considered for removal */
2354 case TGSI_OPCODE_ABS
:
2355 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2356 FETCH(&r
[0], 0, chan_index
);
2358 micro_abs( &r
[0], &r
[0] );
2360 STORE(&r
[0], 0, chan_index
);
2364 case TGSI_OPCODE_RCC
:
2365 FETCH(&r
[0], 0, CHAN_X
);
2366 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2367 micro_float_clamp(&r
[0], &r
[0]);
2368 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2369 STORE(&r
[0], 0, chan_index
);
2373 case TGSI_OPCODE_DPH
:
2374 FETCH(&r
[0], 0, CHAN_X
);
2375 FETCH(&r
[1], 1, CHAN_X
);
2377 micro_mul( &r
[0], &r
[0], &r
[1] );
2379 FETCH(&r
[1], 0, CHAN_Y
);
2380 FETCH(&r
[2], 1, CHAN_Y
);
2382 micro_mul( &r
[1], &r
[1], &r
[2] );
2383 micro_add( &r
[0], &r
[0], &r
[1] );
2385 FETCH(&r
[1], 0, CHAN_Z
);
2386 FETCH(&r
[2], 1, CHAN_Z
);
2388 micro_mul( &r
[1], &r
[1], &r
[2] );
2389 micro_add( &r
[0], &r
[0], &r
[1] );
2391 FETCH(&r
[1], 1, CHAN_W
);
2393 micro_add( &r
[0], &r
[0], &r
[1] );
2395 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2396 STORE( &r
[0], 0, chan_index
);
2400 case TGSI_OPCODE_COS
:
2401 FETCH(&r
[0], 0, CHAN_X
);
2403 micro_cos( &r
[0], &r
[0] );
2405 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2406 STORE( &r
[0], 0, chan_index
);
2410 case TGSI_OPCODE_DDX
:
2411 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2412 FETCH( &r
[0], 0, chan_index
);
2413 micro_ddx( &r
[0], &r
[0] );
2414 STORE( &r
[0], 0, chan_index
);
2418 case TGSI_OPCODE_DDY
:
2419 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2420 FETCH( &r
[0], 0, chan_index
);
2421 micro_ddy( &r
[0], &r
[0] );
2422 STORE( &r
[0], 0, chan_index
);
2426 case TGSI_OPCODE_KILP
:
2427 exec_kilp (mach
, inst
);
2430 case TGSI_OPCODE_KIL
:
2431 exec_kil (mach
, inst
);
2434 case TGSI_OPCODE_PK2H
:
2438 case TGSI_OPCODE_PK2US
:
2442 case TGSI_OPCODE_PK4B
:
2446 case TGSI_OPCODE_PK4UB
:
2450 case TGSI_OPCODE_RFL
:
2451 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2452 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2453 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2454 /* r0 = dp3(src0, src0) */
2455 FETCH(&r
[2], 0, CHAN_X
);
2456 micro_mul(&r
[0], &r
[2], &r
[2]);
2457 FETCH(&r
[4], 0, CHAN_Y
);
2458 micro_mul(&r
[8], &r
[4], &r
[4]);
2459 micro_add(&r
[0], &r
[0], &r
[8]);
2460 FETCH(&r
[6], 0, CHAN_Z
);
2461 micro_mul(&r
[8], &r
[6], &r
[6]);
2462 micro_add(&r
[0], &r
[0], &r
[8]);
2464 /* r1 = dp3(src0, src1) */
2465 FETCH(&r
[3], 1, CHAN_X
);
2466 micro_mul(&r
[1], &r
[2], &r
[3]);
2467 FETCH(&r
[5], 1, CHAN_Y
);
2468 micro_mul(&r
[8], &r
[4], &r
[5]);
2469 micro_add(&r
[1], &r
[1], &r
[8]);
2470 FETCH(&r
[7], 1, CHAN_Z
);
2471 micro_mul(&r
[8], &r
[6], &r
[7]);
2472 micro_add(&r
[1], &r
[1], &r
[8]);
2474 /* r1 = 2 * r1 / r0 */
2475 micro_add(&r
[1], &r
[1], &r
[1]);
2476 micro_div(&r
[1], &r
[1], &r
[0]);
2478 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2479 micro_mul(&r
[2], &r
[2], &r
[1]);
2480 micro_sub(&r
[2], &r
[2], &r
[3]);
2481 STORE(&r
[2], 0, CHAN_X
);
2483 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2484 micro_mul(&r
[4], &r
[4], &r
[1]);
2485 micro_sub(&r
[4], &r
[4], &r
[5]);
2486 STORE(&r
[4], 0, CHAN_Y
);
2488 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2489 micro_mul(&r
[6], &r
[6], &r
[1]);
2490 micro_sub(&r
[6], &r
[6], &r
[7]);
2491 STORE(&r
[6], 0, CHAN_Z
);
2494 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2495 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2499 case TGSI_OPCODE_SEQ
:
2500 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2501 FETCH( &r
[0], 0, chan_index
);
2502 FETCH( &r
[1], 1, chan_index
);
2503 micro_eq( &r
[0], &r
[0], &r
[1],
2504 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
2505 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2506 STORE( &r
[0], 0, chan_index
);
2510 case TGSI_OPCODE_SFL
:
2511 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2512 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2516 case TGSI_OPCODE_SGT
:
2517 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2518 FETCH( &r
[0], 0, chan_index
);
2519 FETCH( &r
[1], 1, chan_index
);
2520 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2521 STORE( &r
[0], 0, chan_index
);
2525 case TGSI_OPCODE_SIN
:
2526 FETCH( &r
[0], 0, CHAN_X
);
2527 micro_sin( &r
[0], &r
[0] );
2528 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2529 STORE( &r
[0], 0, chan_index
);
2533 case TGSI_OPCODE_SLE
:
2534 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2535 FETCH( &r
[0], 0, chan_index
);
2536 FETCH( &r
[1], 1, chan_index
);
2537 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2538 STORE( &r
[0], 0, chan_index
);
2542 case TGSI_OPCODE_SNE
:
2543 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2544 FETCH( &r
[0], 0, chan_index
);
2545 FETCH( &r
[1], 1, chan_index
);
2546 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2547 STORE( &r
[0], 0, chan_index
);
2551 case TGSI_OPCODE_STR
:
2552 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2553 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2557 case TGSI_OPCODE_TEX
:
2558 /* simple texture lookup */
2559 /* src[0] = texcoord */
2560 /* src[1] = sampler unit */
2561 exec_tex(mach
, inst
, FALSE
, FALSE
);
2564 case TGSI_OPCODE_TXB
:
2565 /* Texture lookup with lod bias */
2566 /* src[0] = texcoord (src[0].w = LOD bias) */
2567 /* src[1] = sampler unit */
2568 exec_tex(mach
, inst
, TRUE
, FALSE
);
2571 case TGSI_OPCODE_TXD
:
2572 /* Texture lookup with explict partial derivatives */
2573 /* src[0] = texcoord */
2574 /* src[1] = d[strq]/dx */
2575 /* src[2] = d[strq]/dy */
2576 /* src[3] = sampler unit */
2580 case TGSI_OPCODE_TXL
:
2581 /* Texture lookup with explit LOD */
2582 /* src[0] = texcoord (src[0].w = LOD) */
2583 /* src[1] = sampler unit */
2584 exec_tex(mach
, inst
, TRUE
, FALSE
);
2587 case TGSI_OPCODE_TXP
:
2588 /* Texture lookup with projection */
2589 /* src[0] = texcoord (src[0].w = projection) */
2590 /* src[1] = sampler unit */
2591 exec_tex(mach
, inst
, FALSE
, TRUE
);
2594 case TGSI_OPCODE_UP2H
:
2598 case TGSI_OPCODE_UP2US
:
2602 case TGSI_OPCODE_UP4B
:
2606 case TGSI_OPCODE_UP4UB
:
2610 case TGSI_OPCODE_X2D
:
2611 FETCH(&r
[0], 1, CHAN_X
);
2612 FETCH(&r
[1], 1, CHAN_Y
);
2613 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2614 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2615 FETCH(&r
[2], 2, CHAN_X
);
2616 micro_mul(&r
[2], &r
[2], &r
[0]);
2617 FETCH(&r
[3], 2, CHAN_Y
);
2618 micro_mul(&r
[3], &r
[3], &r
[1]);
2619 micro_add(&r
[2], &r
[2], &r
[3]);
2620 FETCH(&r
[3], 0, CHAN_X
);
2621 micro_add(&r
[2], &r
[2], &r
[3]);
2622 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2623 STORE(&r
[2], 0, CHAN_X
);
2625 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2626 STORE(&r
[2], 0, CHAN_Z
);
2629 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2630 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2631 FETCH(&r
[2], 2, CHAN_Z
);
2632 micro_mul(&r
[2], &r
[2], &r
[0]);
2633 FETCH(&r
[3], 2, CHAN_W
);
2634 micro_mul(&r
[3], &r
[3], &r
[1]);
2635 micro_add(&r
[2], &r
[2], &r
[3]);
2636 FETCH(&r
[3], 0, CHAN_Y
);
2637 micro_add(&r
[2], &r
[2], &r
[3]);
2638 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2639 STORE(&r
[2], 0, CHAN_Y
);
2641 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2642 STORE(&r
[2], 0, CHAN_W
);
2647 case TGSI_OPCODE_ARA
:
2651 case TGSI_OPCODE_BRA
:
2655 case TGSI_OPCODE_CAL
:
2656 /* skip the call if no execution channels are enabled */
2657 if (mach
->ExecMask
) {
2660 /* push the Cond, Loop, Cont stacks */
2661 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2662 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2663 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2664 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2665 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2666 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2668 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2669 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2671 /* note that PC was already incremented above */
2672 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2673 *pc
= inst
->InstructionExtLabel
.Label
;
2677 case TGSI_OPCODE_RET
:
2678 mach
->FuncMask
&= ~mach
->ExecMask
;
2679 UPDATE_EXEC_MASK(mach
);
2681 if (mach
->FuncMask
== 0x0) {
2682 /* really return now (otherwise, keep executing */
2684 if (mach
->CallStackTop
== 0) {
2685 /* returning from main() */
2689 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2691 /* pop the Cond, Loop, Cont stacks */
2692 assert(mach
->CondStackTop
> 0);
2693 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2694 assert(mach
->LoopStackTop
> 0);
2695 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2696 assert(mach
->ContStackTop
> 0);
2697 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2698 assert(mach
->FuncStackTop
> 0);
2699 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2701 UPDATE_EXEC_MASK(mach
);
2705 case TGSI_OPCODE_SSG
:
2706 /* TGSI_OPCODE_SGN */
2707 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2708 FETCH( &r
[0], 0, chan_index
);
2709 micro_sgn( &r
[0], &r
[0] );
2710 STORE( &r
[0], 0, chan_index
);
2714 case TGSI_OPCODE_CMP
:
2715 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2716 FETCH(&r
[0], 0, chan_index
);
2717 FETCH(&r
[1], 1, chan_index
);
2718 FETCH(&r
[2], 2, chan_index
);
2720 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2722 STORE(&r
[0], 0, chan_index
);
2726 case TGSI_OPCODE_SCS
:
2727 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2728 FETCH( &r
[0], 0, CHAN_X
);
2729 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2730 micro_cos(&r
[1], &r
[0]);
2731 STORE(&r
[1], 0, CHAN_X
);
2733 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2734 micro_sin(&r
[1], &r
[0]);
2735 STORE(&r
[1], 0, CHAN_Y
);
2738 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2739 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2741 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2742 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2746 case TGSI_OPCODE_NRM
:
2747 /* 3-component vector normalize */
2748 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2749 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2750 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2751 /* r3 = sqrt(dp3(src0, src0)) */
2752 FETCH(&r
[0], 0, CHAN_X
);
2753 micro_mul(&r
[3], &r
[0], &r
[0]);
2754 FETCH(&r
[1], 0, CHAN_Y
);
2755 micro_mul(&r
[4], &r
[1], &r
[1]);
2756 micro_add(&r
[3], &r
[3], &r
[4]);
2757 FETCH(&r
[2], 0, CHAN_Z
);
2758 micro_mul(&r
[4], &r
[2], &r
[2]);
2759 micro_add(&r
[3], &r
[3], &r
[4]);
2760 micro_sqrt(&r
[3], &r
[3]);
2762 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2763 micro_div(&r
[0], &r
[0], &r
[3]);
2764 STORE(&r
[0], 0, CHAN_X
);
2766 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2767 micro_div(&r
[1], &r
[1], &r
[3]);
2768 STORE(&r
[1], 0, CHAN_Y
);
2770 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2771 micro_div(&r
[2], &r
[2], &r
[3]);
2772 STORE(&r
[2], 0, CHAN_Z
);
2775 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2776 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2780 case TGSI_OPCODE_NRM4
:
2781 /* 4-component vector normalize */
2783 union tgsi_exec_channel tmp
, dot
;
2785 /* tmp = dp4(src0, src0): */
2786 FETCH( &r
[0], 0, CHAN_X
);
2787 micro_mul( &tmp
, &r
[0], &r
[0] );
2789 FETCH( &r
[1], 0, CHAN_Y
);
2790 micro_mul( &dot
, &r
[1], &r
[1] );
2791 micro_add( &tmp
, &tmp
, &dot
);
2793 FETCH( &r
[2], 0, CHAN_Z
);
2794 micro_mul( &dot
, &r
[2], &r
[2] );
2795 micro_add( &tmp
, &tmp
, &dot
);
2797 FETCH( &r
[3], 0, CHAN_W
);
2798 micro_mul( &dot
, &r
[3], &r
[3] );
2799 micro_add( &tmp
, &tmp
, &dot
);
2801 /* tmp = 1 / sqrt(tmp) */
2802 micro_sqrt( &tmp
, &tmp
);
2803 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
2805 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2806 /* chan = chan * tmp */
2807 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
2808 STORE( &r
[chan_index
], 0, chan_index
);
2813 case TGSI_OPCODE_DIV
:
2817 case TGSI_OPCODE_DP2
:
2818 FETCH( &r
[0], 0, CHAN_X
);
2819 FETCH( &r
[1], 1, CHAN_X
);
2820 micro_mul( &r
[0], &r
[0], &r
[1] );
2822 FETCH( &r
[1], 0, CHAN_Y
);
2823 FETCH( &r
[2], 1, CHAN_Y
);
2824 micro_mul( &r
[1], &r
[1], &r
[2] );
2825 micro_add( &r
[0], &r
[0], &r
[1] );
2827 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2828 STORE( &r
[0], 0, chan_index
);
2832 case TGSI_OPCODE_IF
:
2834 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2835 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2836 FETCH( &r
[0], 0, CHAN_X
);
2837 /* update CondMask */
2839 mach
->CondMask
&= ~0x1;
2842 mach
->CondMask
&= ~0x2;
2845 mach
->CondMask
&= ~0x4;
2848 mach
->CondMask
&= ~0x8;
2850 UPDATE_EXEC_MASK(mach
);
2851 /* Todo: If CondMask==0, jump to ELSE */
2854 case TGSI_OPCODE_ELSE
:
2855 /* invert CondMask wrt previous mask */
2858 assert(mach
->CondStackTop
> 0);
2859 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2860 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2861 UPDATE_EXEC_MASK(mach
);
2862 /* Todo: If CondMask==0, jump to ENDIF */
2866 case TGSI_OPCODE_ENDIF
:
2868 assert(mach
->CondStackTop
> 0);
2869 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2870 UPDATE_EXEC_MASK(mach
);
2873 case TGSI_OPCODE_END
:
2874 /* halt execution */
2878 case TGSI_OPCODE_REP
:
2882 case TGSI_OPCODE_ENDREP
:
2886 case TGSI_OPCODE_PUSHA
:
2890 case TGSI_OPCODE_POPA
:
2894 case TGSI_OPCODE_CEIL
:
2895 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2896 FETCH( &r
[0], 0, chan_index
);
2897 micro_ceil( &r
[0], &r
[0] );
2898 STORE( &r
[0], 0, chan_index
);
2902 case TGSI_OPCODE_I2F
:
2903 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2904 FETCH( &r
[0], 0, chan_index
);
2905 micro_i2f( &r
[0], &r
[0] );
2906 STORE( &r
[0], 0, chan_index
);
2910 case TGSI_OPCODE_NOT
:
2911 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2912 FETCH( &r
[0], 0, chan_index
);
2913 micro_not( &r
[0], &r
[0] );
2914 STORE( &r
[0], 0, chan_index
);
2918 case TGSI_OPCODE_TRUNC
:
2919 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2920 FETCH( &r
[0], 0, chan_index
);
2921 micro_trunc( &r
[0], &r
[0] );
2922 STORE( &r
[0], 0, chan_index
);
2926 case TGSI_OPCODE_SHL
:
2927 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2928 FETCH( &r
[0], 0, chan_index
);
2929 FETCH( &r
[1], 1, chan_index
);
2930 micro_shl( &r
[0], &r
[0], &r
[1] );
2931 STORE( &r
[0], 0, chan_index
);
2935 case TGSI_OPCODE_SHR
:
2936 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2937 FETCH( &r
[0], 0, chan_index
);
2938 FETCH( &r
[1], 1, chan_index
);
2939 micro_ishr( &r
[0], &r
[0], &r
[1] );
2940 STORE( &r
[0], 0, chan_index
);
2944 case TGSI_OPCODE_AND
:
2945 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2946 FETCH( &r
[0], 0, chan_index
);
2947 FETCH( &r
[1], 1, chan_index
);
2948 micro_and( &r
[0], &r
[0], &r
[1] );
2949 STORE( &r
[0], 0, chan_index
);
2953 case TGSI_OPCODE_OR
:
2954 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2955 FETCH( &r
[0], 0, chan_index
);
2956 FETCH( &r
[1], 1, chan_index
);
2957 micro_or( &r
[0], &r
[0], &r
[1] );
2958 STORE( &r
[0], 0, chan_index
);
2962 case TGSI_OPCODE_MOD
:
2966 case TGSI_OPCODE_XOR
:
2967 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2968 FETCH( &r
[0], 0, chan_index
);
2969 FETCH( &r
[1], 1, chan_index
);
2970 micro_xor( &r
[0], &r
[0], &r
[1] );
2971 STORE( &r
[0], 0, chan_index
);
2975 case TGSI_OPCODE_SAD
:
2979 case TGSI_OPCODE_TXF
:
2983 case TGSI_OPCODE_TXQ
:
2987 case TGSI_OPCODE_EMIT
:
2988 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2989 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2992 case TGSI_OPCODE_ENDPRIM
:
2993 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2994 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2997 case TGSI_OPCODE_LOOP
:
2998 /* fall-through (for now) */
2999 case TGSI_OPCODE_BGNLOOP2
:
3000 /* push LoopMask and ContMasks */
3001 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3002 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3003 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3004 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3007 case TGSI_OPCODE_ENDLOOP
:
3008 /* fall-through (for now at least) */
3009 case TGSI_OPCODE_ENDLOOP2
:
3010 /* Restore ContMask, but don't pop */
3011 assert(mach
->ContStackTop
> 0);
3012 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3013 UPDATE_EXEC_MASK(mach
);
3014 if (mach
->ExecMask
) {
3015 /* repeat loop: jump to instruction just past BGNLOOP */
3016 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
3019 /* exit loop: pop LoopMask */
3020 assert(mach
->LoopStackTop
> 0);
3021 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3023 assert(mach
->ContStackTop
> 0);
3024 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3026 UPDATE_EXEC_MASK(mach
);
3029 case TGSI_OPCODE_BRK
:
3030 /* turn off loop channels for each enabled exec channel */
3031 mach
->LoopMask
&= ~mach
->ExecMask
;
3032 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3033 UPDATE_EXEC_MASK(mach
);
3036 case TGSI_OPCODE_CONT
:
3037 /* turn off cont channels for each enabled exec channel */
3038 mach
->ContMask
&= ~mach
->ExecMask
;
3039 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3040 UPDATE_EXEC_MASK(mach
);
3043 case TGSI_OPCODE_BGNSUB
:
3047 case TGSI_OPCODE_ENDSUB
:
3051 case TGSI_OPCODE_NOISE1
:
3055 case TGSI_OPCODE_NOISE2
:
3059 case TGSI_OPCODE_NOISE3
:
3063 case TGSI_OPCODE_NOISE4
:
3067 case TGSI_OPCODE_NOP
:
3077 * Run TGSI interpreter.
3078 * \return bitmask of "alive" quad components
3081 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3086 mach
->CondMask
= 0xf;
3087 mach
->LoopMask
= 0xf;
3088 mach
->ContMask
= 0xf;
3089 mach
->FuncMask
= 0xf;
3090 mach
->ExecMask
= 0xf;
3092 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
3093 assert(mach
->CondStackTop
== 0);
3094 assert(mach
->LoopStackTop
== 0);
3095 assert(mach
->ContStackTop
== 0);
3096 assert(mach
->CallStackTop
== 0);
3098 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3099 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3101 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3102 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3103 mach
->Primitives
[0] = 0;
3106 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3107 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3108 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3109 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3110 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3111 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3114 /* execute declarations (interpolants) */
3115 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3116 exec_declaration( mach
, mach
->Declarations
+i
);
3119 /* execute instructions, until pc is set to -1 */
3121 assert(pc
< (int) mach
->NumInstructions
);
3122 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
3126 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3127 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3129 * Scale back depth component.
3131 for (i
= 0; i
< 4; i
++)
3132 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3136 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];