1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpretor/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "tgsi_platform.h"
54 #include "tgsi_core.h"
55 #include "pipe/p_state.h"
59 #include "main/context.h"
60 #include "main/macros.h"
63 #define TILE_BOTTOM_LEFT 0
64 #define TILE_BOTTOM_RIGHT 1
65 #define TILE_TOP_LEFT 2
66 #define TILE_TOP_RIGHT 3
68 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
69 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
70 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
71 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
72 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
73 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
74 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
75 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
76 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
77 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
78 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
79 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
80 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
81 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
82 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
83 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
84 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
85 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
86 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
87 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
88 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
89 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
90 #define TEMP_R0 TGSI_EXEC_TEMP_R0
92 #define FOR_EACH_CHANNEL(CHAN)\
93 for (CHAN = 0; CHAN < 4; CHAN++)
95 #define IS_CHANNEL_ENABLED(INST, CHAN)\
96 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
98 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
99 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
101 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
102 FOR_EACH_CHANNEL( CHAN )\
103 if (IS_CHANNEL_ENABLED( INST, CHAN ))
105 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
106 FOR_EACH_CHANNEL( CHAN )\
107 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
110 /** The execution mask depends on the conditional mask and the loop mask */
111 #define UPDATE_EXEC_MASK(MACH) \
112 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask
123 tgsi_exec_prepare( struct tgsi_exec_machine
*mach
)
125 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
126 struct tgsi_parse_context parse
;
127 struct tgsi_full_instruction
*instructions
;
128 struct tgsi_full_declaration
*declarations
;
129 uint maxInstructions
= 10, numInstructions
= 0;
130 uint maxDeclarations
= 10, numDeclarations
= 0;
137 declarations
= (struct tgsi_full_declaration
*)
138 malloc(maxDeclarations
* sizeof(struct tgsi_full_declaration
));
140 instructions
= (struct tgsi_full_instruction
*)
141 malloc(maxInstructions
* sizeof(struct tgsi_full_instruction
));
143 k
= tgsi_parse_init( &parse
, mach
->Tokens
);
144 if (k
!= TGSI_PARSE_OK
) {
145 fprintf(stderr
, "Problem parsing!\n");
149 while( !tgsi_parse_end_of_tokens( &parse
) ) {
150 GLuint pointer
= parse
.Position
;
153 tgsi_parse_token( &parse
);
154 switch( parse
.FullToken
.Token
.Type
) {
155 case TGSI_TOKEN_TYPE_DECLARATION
:
156 /* save expanded declaration */
157 if (numDeclarations
== maxDeclarations
) {
158 maxDeclarations
+= 10;
159 declarations
= realloc(declarations
,
161 * sizeof(struct tgsi_full_instruction
));
163 memcpy(declarations
+ numDeclarations
,
164 &parse
.FullToken
.FullInstruction
,
165 sizeof(declarations
[0]));
169 case TGSI_TOKEN_TYPE_IMMEDIATE
:
170 assert( (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) % 4 == 0 );
171 assert( mach
->ImmLimit
+ (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) / 4 <= 256 );
173 for( i
= 0; i
< parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1; i
++ ) {
174 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] = parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
176 mach
->ImmLimit
+= (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) / 4;
179 case TGSI_TOKEN_TYPE_INSTRUCTION
:
180 assert( labels
->count
< 128 );
182 labels
->labels
[labels
->count
][0] = instno
;
183 labels
->labels
[labels
->count
][1] = pointer
;
186 /* save expanded instruction */
187 if (numInstructions
== maxInstructions
) {
188 maxInstructions
+= 10;
189 instructions
= realloc(instructions
,
191 * sizeof(struct tgsi_full_instruction
));
193 memcpy(instructions
+ numInstructions
,
194 &parse
.FullToken
.FullInstruction
,
195 sizeof(instructions
[0]));
203 tgsi_parse_free (&parse
);
205 if (mach
->Declarations
) {
206 free(mach
->Declarations
);
208 mach
->Declarations
= declarations
;
209 mach
->NumDeclarations
= numDeclarations
;
211 if (mach
->Instructions
) {
212 free(mach
->Instructions
);
214 mach
->Instructions
= instructions
;
215 mach
->NumInstructions
= numInstructions
;
220 * Initialize machine state by expanding tokens to full instructions,
221 * allocating temporary storage, setting up constants, etc.
222 * After this, we can call tgsi_exec_machine_run() many times.
225 tgsi_exec_machine_init(
226 struct tgsi_exec_machine
*mach
,
227 const struct tgsi_token
*tokens
,
229 struct tgsi_sampler
*samplers
)
232 struct tgsi_parse_context parse
;
235 tgsi_dump(tokens
, 0);
238 mach
->Tokens
= tokens
;
240 mach
->Samplers
= samplers
;
242 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
243 if (k
!= TGSI_PARSE_OK
) {
244 printf("Problem parsing!\n");
248 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
249 tgsi_parse_free (&parse
);
251 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
252 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_NUM_TEMPS
];
254 /* Setup constants. */
255 for( i
= 0; i
< 4; i
++ ) {
256 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
257 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
258 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
259 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
260 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
261 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
262 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
263 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
266 tgsi_exec_prepare( mach
);
272 union tgsi_exec_channel
*dst
,
273 const union tgsi_exec_channel
*src
)
275 dst
->f
[0] = (GLfloat
) fabs( (GLdouble
) src
->f
[0] );
276 dst
->f
[1] = (GLfloat
) fabs( (GLdouble
) src
->f
[1] );
277 dst
->f
[2] = (GLfloat
) fabs( (GLdouble
) src
->f
[2] );
278 dst
->f
[3] = (GLfloat
) fabs( (GLdouble
) src
->f
[3] );
283 union tgsi_exec_channel
*dst
,
284 const union tgsi_exec_channel
*src0
,
285 const union tgsi_exec_channel
*src1
)
287 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
288 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
289 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
290 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
295 union tgsi_exec_channel
*dst
,
296 const union tgsi_exec_channel
*src0
,
297 const union tgsi_exec_channel
*src1
)
299 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
300 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
301 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
302 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
307 union tgsi_exec_channel
*dst
,
308 const union tgsi_exec_channel
*src0
,
309 const union tgsi_exec_channel
*src1
)
311 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
312 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
313 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
314 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
319 union tgsi_exec_channel
*dst
,
320 const union tgsi_exec_channel
*src
)
322 dst
->f
[0] = (GLfloat
) ceil( (GLdouble
) src
->f
[0] );
323 dst
->f
[1] = (GLfloat
) ceil( (GLdouble
) src
->f
[1] );
324 dst
->f
[2] = (GLfloat
) ceil( (GLdouble
) src
->f
[2] );
325 dst
->f
[3] = (GLfloat
) ceil( (GLdouble
) src
->f
[3] );
330 union tgsi_exec_channel
*dst
,
331 const union tgsi_exec_channel
*src
)
333 dst
->f
[0] = (GLfloat
) cos( (GLdouble
) src
->f
[0] );
334 dst
->f
[1] = (GLfloat
) cos( (GLdouble
) src
->f
[1] );
335 dst
->f
[2] = (GLfloat
) cos( (GLdouble
) src
->f
[2] );
336 dst
->f
[3] = (GLfloat
) cos( (GLdouble
) src
->f
[3] );
341 union tgsi_exec_channel
*dst
,
342 const union tgsi_exec_channel
*src
)
347 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
352 union tgsi_exec_channel
*dst
,
353 const union tgsi_exec_channel
*src
)
358 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
363 union tgsi_exec_channel
*dst
,
364 const union tgsi_exec_channel
*src0
,
365 const union tgsi_exec_channel
*src1
)
367 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
368 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
369 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
370 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
375 union tgsi_exec_channel
*dst
,
376 const union tgsi_exec_channel
*src0
,
377 const union tgsi_exec_channel
*src1
)
379 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
380 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
381 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
382 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
387 union tgsi_exec_channel
*dst
,
388 const union tgsi_exec_channel
*src0
,
389 const union tgsi_exec_channel
*src1
,
390 const union tgsi_exec_channel
*src2
,
391 const union tgsi_exec_channel
*src3
)
393 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
394 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
395 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
396 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
401 union tgsi_exec_channel
*dst
,
402 const union tgsi_exec_channel
*src0
,
403 const union tgsi_exec_channel
*src1
,
404 const union tgsi_exec_channel
*src2
,
405 const union tgsi_exec_channel
*src3
)
407 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
408 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
409 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
410 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
415 union tgsi_exec_channel
*dst
,
416 const union tgsi_exec_channel
*src
)
418 dst
->f
[0] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[0] );
419 dst
->f
[1] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[1] );
420 dst
->f
[2] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[2] );
421 dst
->f
[3] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[3] );
426 union tgsi_exec_channel
*dst
,
427 const union tgsi_exec_channel
*src
)
429 dst
->i
[0] = (GLint
) src
->f
[0];
430 dst
->i
[1] = (GLint
) src
->f
[1];
431 dst
->i
[2] = (GLint
) src
->f
[2];
432 dst
->i
[3] = (GLint
) src
->f
[3];
437 union tgsi_exec_channel
*dst
,
438 const union tgsi_exec_channel
*src
)
440 dst
->u
[0] = (GLuint
) src
->f
[0];
441 dst
->u
[1] = (GLuint
) src
->f
[1];
442 dst
->u
[2] = (GLuint
) src
->f
[2];
443 dst
->u
[3] = (GLuint
) src
->f
[3];
448 union tgsi_exec_channel
*dst
,
449 const union tgsi_exec_channel
*src
)
451 dst
->f
[0] = (GLfloat
) floor( (GLdouble
) src
->f
[0] );
452 dst
->f
[1] = (GLfloat
) floor( (GLdouble
) src
->f
[1] );
453 dst
->f
[2] = (GLfloat
) floor( (GLdouble
) src
->f
[2] );
454 dst
->f
[3] = (GLfloat
) floor( (GLdouble
) src
->f
[3] );
459 union tgsi_exec_channel
*dst
,
460 const union tgsi_exec_channel
*src
)
462 dst
->f
[0] = src
->f
[0] - (GLfloat
) floor( (GLdouble
) src
->f
[0] );
463 dst
->f
[1] = src
->f
[1] - (GLfloat
) floor( (GLdouble
) src
->f
[1] );
464 dst
->f
[2] = src
->f
[2] - (GLfloat
) floor( (GLdouble
) src
->f
[2] );
465 dst
->f
[3] = src
->f
[3] - (GLfloat
) floor( (GLdouble
) src
->f
[3] );
470 union tgsi_exec_channel
*dst
,
471 const union tgsi_exec_channel
*src0
,
472 const union tgsi_exec_channel
*src1
,
473 const union tgsi_exec_channel
*src2
,
474 const union tgsi_exec_channel
*src3
)
476 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
477 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
478 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
479 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
484 union tgsi_exec_channel
*dst
,
485 const union tgsi_exec_channel
*src
)
487 dst
->f
[0] = (GLfloat
) src
->i
[0];
488 dst
->f
[1] = (GLfloat
) src
->i
[1];
489 dst
->f
[2] = (GLfloat
) src
->i
[2];
490 dst
->f
[3] = (GLfloat
) src
->i
[3];
495 union tgsi_exec_channel
*dst
,
496 const union tgsi_exec_channel
*src
)
498 dst
->f
[0] = (GLfloat
) log( (GLdouble
) src
->f
[0] ) * 1.442695f
;
499 dst
->f
[1] = (GLfloat
) log( (GLdouble
) src
->f
[1] ) * 1.442695f
;
500 dst
->f
[2] = (GLfloat
) log( (GLdouble
) src
->f
[2] ) * 1.442695f
;
501 dst
->f
[3] = (GLfloat
) log( (GLdouble
) src
->f
[3] ) * 1.442695f
;
506 union tgsi_exec_channel
*dst
,
507 const union tgsi_exec_channel
*src0
,
508 const union tgsi_exec_channel
*src1
,
509 const union tgsi_exec_channel
*src2
,
510 const union tgsi_exec_channel
*src3
)
512 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
513 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
514 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
515 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
520 union tgsi_exec_channel
*dst
,
521 const union tgsi_exec_channel
*src0
,
522 const union tgsi_exec_channel
*src1
,
523 const union tgsi_exec_channel
*src2
,
524 const union tgsi_exec_channel
*src3
)
526 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
527 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
528 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
529 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
534 union tgsi_exec_channel
*dst
,
535 const union tgsi_exec_channel
*src0
,
536 const union tgsi_exec_channel
*src1
,
537 const union tgsi_exec_channel
*src2
,
538 const union tgsi_exec_channel
*src3
)
540 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
541 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
542 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
543 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
548 union tgsi_exec_channel
*dst
,
549 const union tgsi_exec_channel
*src0
,
550 const union tgsi_exec_channel
*src1
)
552 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
553 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
554 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
555 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
560 union tgsi_exec_channel
*dst
,
561 const union tgsi_exec_channel
*src0
,
562 const union tgsi_exec_channel
*src1
)
564 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
565 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
566 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
567 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
572 union tgsi_exec_channel
*dst
,
573 const union tgsi_exec_channel
*src0
,
574 const union tgsi_exec_channel
*src1
)
576 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
577 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
578 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
579 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
584 union tgsi_exec_channel
*dst
,
585 const union tgsi_exec_channel
*src0
,
586 const union tgsi_exec_channel
*src1
)
588 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
589 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
590 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
591 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
596 union tgsi_exec_channel
*dst
,
597 const union tgsi_exec_channel
*src0
,
598 const union tgsi_exec_channel
*src1
)
600 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
601 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
602 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
603 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
608 union tgsi_exec_channel
*dst
,
609 const union tgsi_exec_channel
*src0
,
610 const union tgsi_exec_channel
*src1
)
612 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
613 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
614 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
615 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
620 union tgsi_exec_channel
*dst
,
621 const union tgsi_exec_channel
*src0
,
622 const union tgsi_exec_channel
*src1
)
624 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
625 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
626 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
627 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
632 union tgsi_exec_channel
*dst
,
633 const union tgsi_exec_channel
*src0
,
634 const union tgsi_exec_channel
*src1
)
636 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
637 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
638 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
639 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
644 union tgsi_exec_channel
*dst
,
645 const union tgsi_exec_channel
*src0
,
646 const union tgsi_exec_channel
*src1
)
648 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
649 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
650 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
651 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
656 union tgsi_exec_channel
*dst0
,
657 union tgsi_exec_channel
*dst1
,
658 const union tgsi_exec_channel
*src0
,
659 const union tgsi_exec_channel
*src1
)
661 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
662 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
663 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
664 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
673 union tgsi_exec_channel
*dst0
,
674 union tgsi_exec_channel
*dst1
,
675 const union tgsi_exec_channel
*src0
,
676 const union tgsi_exec_channel
*src1
)
678 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
679 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
680 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
681 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
690 union tgsi_exec_channel
*dst
,
691 const union tgsi_exec_channel
*src0
,
692 const union tgsi_exec_channel
*src1
,
693 const union tgsi_exec_channel
*src2
)
695 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
696 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
697 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
698 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
703 union tgsi_exec_channel
*dst
,
704 const union tgsi_exec_channel
*src0
,
705 const union tgsi_exec_channel
*src1
,
706 const union tgsi_exec_channel
*src2
,
707 const union tgsi_exec_channel
*src3
)
709 dst
->f
[0] = src0
->f
[0] != src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
710 dst
->f
[1] = src0
->f
[1] != src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
711 dst
->f
[2] = src0
->f
[2] != src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
712 dst
->f
[3] = src0
->f
[3] != src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
717 union tgsi_exec_channel
*dst
,
718 const union tgsi_exec_channel
*src
)
720 dst
->f
[0] = -src
->f
[0];
721 dst
->f
[1] = -src
->f
[1];
722 dst
->f
[2] = -src
->f
[2];
723 dst
->f
[3] = -src
->f
[3];
728 union tgsi_exec_channel
*dst
,
729 const union tgsi_exec_channel
*src
)
731 dst
->i
[0] = -src
->i
[0];
732 dst
->i
[1] = -src
->i
[1];
733 dst
->i
[2] = -src
->i
[2];
734 dst
->i
[3] = -src
->i
[3];
739 union tgsi_exec_channel
*dst
,
740 const union tgsi_exec_channel
*src
)
742 dst
->u
[0] = ~src
->u
[0];
743 dst
->u
[1] = ~src
->u
[1];
744 dst
->u
[2] = ~src
->u
[2];
745 dst
->u
[3] = ~src
->u
[3];
750 union tgsi_exec_channel
*dst
,
751 const union tgsi_exec_channel
*src0
,
752 const union tgsi_exec_channel
*src1
)
754 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
755 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
756 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
757 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
762 union tgsi_exec_channel
*dst
,
763 const union tgsi_exec_channel
*src0
,
764 const union tgsi_exec_channel
*src1
)
766 dst
->f
[0] = (GLfloat
) pow( (GLdouble
) src0
->f
[0], (GLdouble
) src1
->f
[0] );
767 dst
->f
[1] = (GLfloat
) pow( (GLdouble
) src0
->f
[1], (GLdouble
) src1
->f
[1] );
768 dst
->f
[2] = (GLfloat
) pow( (GLdouble
) src0
->f
[2], (GLdouble
) src1
->f
[2] );
769 dst
->f
[3] = (GLfloat
) pow( (GLdouble
) src0
->f
[3], (GLdouble
) src1
->f
[3] );
774 union tgsi_exec_channel
*dst
,
775 const union tgsi_exec_channel
*src
)
777 dst
->f
[0] = (GLfloat
) floor( (GLdouble
) (src
->f
[0] + 0.5f
) );
778 dst
->f
[1] = (GLfloat
) floor( (GLdouble
) (src
->f
[1] + 0.5f
) );
779 dst
->f
[2] = (GLfloat
) floor( (GLdouble
) (src
->f
[2] + 0.5f
) );
780 dst
->f
[3] = (GLfloat
) floor( (GLdouble
) (src
->f
[3] + 0.5f
) );
785 union tgsi_exec_channel
*dst
,
786 const union tgsi_exec_channel
*src0
,
787 const union tgsi_exec_channel
*src1
)
789 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
790 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
791 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
792 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
797 union tgsi_exec_channel
*dst
,
798 const union tgsi_exec_channel
*src0
,
799 const union tgsi_exec_channel
*src1
)
801 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
802 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
803 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
804 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
809 union tgsi_exec_channel
*dst
,
810 const union tgsi_exec_channel
*src0
)
812 dst
->f
[0] = (float) (int) src0
->u
[0];
813 dst
->f
[1] = (float) (int) src0
->u
[1];
814 dst
->f
[2] = (float) (int) src0
->u
[2];
815 dst
->f
[3] = (float) (int) src0
->u
[3];
820 union tgsi_exec_channel
*dst
,
821 const union tgsi_exec_channel
*src0
,
822 const union tgsi_exec_channel
*src1
)
824 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
825 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
826 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
827 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
832 union tgsi_exec_channel
*dst
,
833 const union tgsi_exec_channel
*src
)
835 dst
->f
[0] = (GLfloat
) sin( (GLdouble
) src
->f
[0] );
836 dst
->f
[1] = (GLfloat
) sin( (GLdouble
) src
->f
[1] );
837 dst
->f
[2] = (GLfloat
) sin( (GLdouble
) src
->f
[2] );
838 dst
->f
[3] = (GLfloat
) sin( (GLdouble
) src
->f
[3] );
842 micro_sqrt( union tgsi_exec_channel
*dst
,
843 const union tgsi_exec_channel
*src
)
845 dst
->f
[0] = (GLfloat
) sqrt( (GLdouble
) src
->f
[0] );
846 dst
->f
[1] = (GLfloat
) sqrt( (GLdouble
) src
->f
[1] );
847 dst
->f
[2] = (GLfloat
) sqrt( (GLdouble
) src
->f
[2] );
848 dst
->f
[3] = (GLfloat
) sqrt( (GLdouble
) src
->f
[3] );
853 union tgsi_exec_channel
*dst
,
854 const union tgsi_exec_channel
*src0
,
855 const union tgsi_exec_channel
*src1
)
857 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
858 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
859 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
860 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
865 union tgsi_exec_channel
*dst
,
866 const union tgsi_exec_channel
*src
)
868 dst
->f
[0] = (GLfloat
) src
->u
[0];
869 dst
->f
[1] = (GLfloat
) src
->u
[1];
870 dst
->f
[2] = (GLfloat
) src
->u
[2];
871 dst
->f
[3] = (GLfloat
) src
->u
[3];
876 union tgsi_exec_channel
*dst
,
877 const union tgsi_exec_channel
*src0
,
878 const union tgsi_exec_channel
*src1
)
880 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
881 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
882 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
883 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
887 fetch_src_file_channel(
888 const struct tgsi_exec_machine
*mach
,
890 const GLuint swizzle
,
891 const union tgsi_exec_channel
*index
,
892 union tgsi_exec_channel
*chan
)
895 case TGSI_EXTSWIZZLE_X
:
896 case TGSI_EXTSWIZZLE_Y
:
897 case TGSI_EXTSWIZZLE_Z
:
898 case TGSI_EXTSWIZZLE_W
:
900 case TGSI_FILE_CONSTANT
:
901 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
902 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
903 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
904 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
907 case TGSI_FILE_INPUT
:
908 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
909 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
910 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
911 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
914 case TGSI_FILE_TEMPORARY
:
915 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
916 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
917 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
918 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
921 case TGSI_FILE_IMMEDIATE
:
922 assert( index
->i
[0] < (GLint
) mach
->ImmLimit
);
923 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
924 assert( index
->i
[1] < (GLint
) mach
->ImmLimit
);
925 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
926 assert( index
->i
[2] < (GLint
) mach
->ImmLimit
);
927 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
928 assert( index
->i
[3] < (GLint
) mach
->ImmLimit
);
929 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
932 case TGSI_FILE_ADDRESS
:
933 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
934 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
935 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
936 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
939 case TGSI_FILE_OUTPUT
:
940 /* vertex varying/output vars can be read too */
941 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
942 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
943 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
944 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
952 case TGSI_EXTSWIZZLE_ZERO
:
953 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
956 case TGSI_EXTSWIZZLE_ONE
:
957 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
967 const struct tgsi_exec_machine
*mach
,
968 union tgsi_exec_channel
*chan
,
969 const struct tgsi_full_src_register
*reg
,
970 const GLuint chan_index
)
972 union tgsi_exec_channel index
;
978 index
.i
[3] = reg
->SrcRegister
.Index
;
980 if (reg
->SrcRegister
.Indirect
) {
981 union tgsi_exec_channel index2
;
982 union tgsi_exec_channel indir_index
;
987 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
989 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
990 fetch_src_file_channel(
992 reg
->SrcRegisterInd
.File
,
997 index
.i
[0] += indir_index
.i
[0];
998 index
.i
[1] += indir_index
.i
[1];
999 index
.i
[2] += indir_index
.i
[2];
1000 index
.i
[3] += indir_index
.i
[3];
1003 if( reg
->SrcRegister
.Dimension
) {
1004 switch( reg
->SrcRegister
.File
) {
1005 case TGSI_FILE_INPUT
:
1011 case TGSI_FILE_CONSTANT
:
1021 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1022 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1023 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1024 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1026 if (reg
->SrcRegisterDim
.Indirect
) {
1027 union tgsi_exec_channel index2
;
1028 union tgsi_exec_channel indir_index
;
1033 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1035 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1036 fetch_src_file_channel(
1038 reg
->SrcRegisterDimInd
.File
,
1043 index
.i
[0] += indir_index
.i
[0];
1044 index
.i
[1] += indir_index
.i
[1];
1045 index
.i
[2] += indir_index
.i
[2];
1046 index
.i
[3] += indir_index
.i
[3];
1050 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1051 fetch_src_file_channel(
1053 reg
->SrcRegister
.File
,
1058 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1059 case TGSI_UTIL_SIGN_CLEAR
:
1060 micro_abs( chan
, chan
);
1063 case TGSI_UTIL_SIGN_SET
:
1064 micro_abs( chan
, chan
);
1065 micro_neg( chan
, chan
);
1068 case TGSI_UTIL_SIGN_TOGGLE
:
1069 micro_neg( chan
, chan
);
1072 case TGSI_UTIL_SIGN_KEEP
:
1079 struct tgsi_exec_machine
*mach
,
1080 const union tgsi_exec_channel
*chan
,
1081 const struct tgsi_full_dst_register
*reg
,
1082 const struct tgsi_full_instruction
*inst
,
1085 union tgsi_exec_channel
*dst
;
1087 switch( reg
->DstRegister
.File
) {
1088 case TGSI_FILE_NULL
:
1091 case TGSI_FILE_OUTPUT
:
1092 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1095 case TGSI_FILE_TEMPORARY
:
1096 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1099 case TGSI_FILE_ADDRESS
:
1100 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1108 switch (inst
->Instruction
.Saturate
)
1114 if (mach
->ExecMask
& 0x1)
1115 dst
->i
[0] = chan
->i
[0];
1116 if (mach
->ExecMask
& 0x2)
1117 dst
->i
[1] = chan
->i
[1];
1118 if (mach
->ExecMask
& 0x4)
1119 dst
->i
[2] = chan
->i
[2];
1120 if (mach
->ExecMask
& 0x8)
1121 dst
->i
[3] = chan
->i
[3];
1125 case TGSI_SAT_ZERO_ONE
:
1126 micro_lt( dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], chan
);
1127 micro_lt( dst
, chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1130 case TGSI_SAT_MINUS_PLUS_ONE
:
1139 #define FETCH(VAL,INDEX,CHAN)\
1140 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1142 #define STORE(VAL,INDEX,CHAN)\
1143 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1147 * Execute ARB-style KIL which is predicated by a src register.
1148 * Kill fragment if any of the four values is less than zero.
1151 exec_kilp(struct tgsi_exec_machine
*mach
,
1152 const struct tgsi_full_instruction
*inst
)
1157 union tgsi_exec_channel r
[1];
1159 /* This mask stores component bits that were already tested. Note that
1160 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1162 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1164 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1169 /* unswizzle channel */
1170 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1171 &inst
->FullSrcRegisters
[0],
1174 /* check if the component has not been already tested */
1175 if (uniquemask
& (1 << swizzle
))
1177 uniquemask
|= 1 << swizzle
;
1179 FETCH(&r
[0], 0, chan_index
);
1180 for (i
= 0; i
< 4; i
++)
1181 if (r
[0].f
[i
] < 0.0f
)
1182 kilmask
|= 1 << (i
* 4);
1185 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1190 exec_kil(struct tgsi_exec_machine
*mach
,
1191 const struct tgsi_full_instruction
*inst
)
1193 /* for enabled ExecMask bits, set the killed bit */
1194 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= mach
->ExecMask
;
1200 * Fetch a texel using STR texture coordinates.
1203 fetch_texel( struct tgsi_sampler
*sampler
,
1204 const union tgsi_exec_channel
*s
,
1205 const union tgsi_exec_channel
*t
,
1206 const union tgsi_exec_channel
*p
,
1208 union tgsi_exec_channel
*r
,
1209 union tgsi_exec_channel
*g
,
1210 union tgsi_exec_channel
*b
,
1211 union tgsi_exec_channel
*a
)
1214 GLfloat rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1216 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1218 for (j
= 0; j
< 4; j
++) {
1219 r
->f
[j
] = rgba
[0][j
];
1220 g
->f
[j
] = rgba
[1][j
];
1221 b
->f
[j
] = rgba
[2][j
];
1222 a
->f
[j
] = rgba
[3][j
];
1227 constant_interpolation(
1228 struct tgsi_exec_machine
*mach
,
1234 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1235 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1240 linear_interpolation(
1241 struct tgsi_exec_machine
*mach
,
1247 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1248 const float x
= mach
->Inputs
[0].xyzw
[0].f
[i
];
1249 const float y
= mach
->Inputs
[0].xyzw
[1].f
[i
];
1251 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] =
1252 mach
->InterpCoefs
[attrib
].a0
[chan
] +
1253 mach
->InterpCoefs
[attrib
].dadx
[chan
] * x
+
1254 mach
->InterpCoefs
[attrib
].dady
[chan
] * y
;
1259 perspective_interpolation(
1260 struct tgsi_exec_machine
*mach
,
1266 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1267 const float x
= mach
->Inputs
[0].xyzw
[0].f
[i
];
1268 const float y
= mach
->Inputs
[0].xyzw
[1].f
[i
];
1269 // WPOS.w here is really 1/w
1270 const float w
= 1.0f
/ mach
->Inputs
[0].xyzw
[3].f
[i
];
1271 assert(mach
->Inputs
[0].xyzw
[3].f
[i
] != 0.0);
1273 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] =
1274 (mach
->InterpCoefs
[attrib
].a0
[chan
] +
1275 mach
->InterpCoefs
[attrib
].dadx
[chan
] * x
+
1276 mach
->InterpCoefs
[attrib
].dady
[chan
] * y
) * w
;
1281 typedef void (* interpolation_func
)(
1282 struct tgsi_exec_machine
*mach
,
1288 struct tgsi_exec_machine
*mach
,
1289 const struct tgsi_full_declaration
*decl
)
1291 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1292 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1293 unsigned first
, last
, mask
;
1294 interpolation_func interp
;
1296 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
1298 first
= decl
->u
.DeclarationRange
.First
;
1299 last
= decl
->u
.DeclarationRange
.Last
;
1300 mask
= decl
->Declaration
.UsageMask
;
1302 /* Do not touch WPOS.xy */
1304 mask
&= ~TGSI_WRITEMASK_XY
;
1305 if( mask
== TGSI_WRITEMASK_NONE
) {
1307 if( first
> last
) {
1313 switch( decl
->Interpolation
.Interpolate
) {
1314 case TGSI_INTERPOLATE_CONSTANT
:
1315 interp
= constant_interpolation
;
1318 case TGSI_INTERPOLATE_LINEAR
:
1319 interp
= linear_interpolation
;
1322 case TGSI_INTERPOLATE_PERSPECTIVE
:
1323 interp
= perspective_interpolation
;
1330 if( mask
== TGSI_WRITEMASK_XYZW
) {
1333 for( i
= first
; i
<= last
; i
++ ) {
1334 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1335 interp( mach
, i
, j
);
1342 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1343 if( mask
& (1 << j
) ) {
1344 for( i
= first
; i
<= last
; i
++ ) {
1345 interp( mach
, i
, j
);
1356 struct tgsi_exec_machine
*mach
,
1357 const struct tgsi_full_instruction
*inst
,
1361 union tgsi_exec_channel r
[8];
1365 switch (inst
->Instruction
.Opcode
) {
1366 case TGSI_OPCODE_ARL
:
1367 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1368 FETCH( &r
[0], 0, chan_index
);
1369 micro_f2it( &r
[0], &r
[0] );
1370 STORE( &r
[0], 0, chan_index
);
1374 case TGSI_OPCODE_MOV
:
1375 /* TGSI_OPCODE_SWZ */
1376 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1377 FETCH( &r
[0], 0, chan_index
);
1378 STORE( &r
[0], 0, chan_index
);
1382 case TGSI_OPCODE_LIT
:
1383 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1384 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1387 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1388 FETCH( &r
[0], 0, CHAN_X
);
1389 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1390 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1391 STORE( &r
[0], 0, CHAN_Y
);
1394 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1395 FETCH( &r
[1], 0, CHAN_Y
);
1396 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1398 FETCH( &r
[2], 0, CHAN_W
);
1399 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1400 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1401 micro_pow( &r
[1], &r
[1], &r
[2] );
1402 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1403 STORE( &r
[0], 0, CHAN_Z
);
1407 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1408 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1412 case TGSI_OPCODE_RCP
:
1413 /* TGSI_OPCODE_RECIP */
1414 FETCH( &r
[0], 0, CHAN_X
);
1415 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1416 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1417 STORE( &r
[0], 0, chan_index
);
1421 case TGSI_OPCODE_RSQ
:
1422 /* TGSI_OPCODE_RECIPSQRT */
1423 FETCH( &r
[0], 0, CHAN_X
);
1424 micro_sqrt( &r
[0], &r
[0] );
1425 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1426 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1427 STORE( &r
[0], 0, chan_index
);
1431 case TGSI_OPCODE_EXP
:
1435 case TGSI_OPCODE_LOG
:
1439 case TGSI_OPCODE_MUL
:
1440 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1442 FETCH(&r
[0], 0, chan_index
);
1443 FETCH(&r
[1], 1, chan_index
);
1445 micro_mul( &r
[0], &r
[0], &r
[1] );
1447 STORE(&r
[0], 0, chan_index
);
1451 case TGSI_OPCODE_ADD
:
1452 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1453 FETCH( &r
[0], 0, chan_index
);
1454 FETCH( &r
[1], 1, chan_index
);
1455 micro_add( &r
[0], &r
[0], &r
[1] );
1456 STORE( &r
[0], 0, chan_index
);
1460 case TGSI_OPCODE_DP3
:
1461 /* TGSI_OPCODE_DOT3 */
1462 FETCH( &r
[0], 0, CHAN_X
);
1463 FETCH( &r
[1], 1, CHAN_X
);
1464 micro_mul( &r
[0], &r
[0], &r
[1] );
1466 FETCH( &r
[1], 0, CHAN_Y
);
1467 FETCH( &r
[2], 1, CHAN_Y
);
1468 micro_mul( &r
[1], &r
[1], &r
[2] );
1469 micro_add( &r
[0], &r
[0], &r
[1] );
1471 FETCH( &r
[1], 0, CHAN_Z
);
1472 FETCH( &r
[2], 1, CHAN_Z
);
1473 micro_mul( &r
[1], &r
[1], &r
[2] );
1474 micro_add( &r
[0], &r
[0], &r
[1] );
1476 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1477 STORE( &r
[0], 0, chan_index
);
1481 case TGSI_OPCODE_DP4
:
1482 /* TGSI_OPCODE_DOT4 */
1483 FETCH(&r
[0], 0, CHAN_X
);
1484 FETCH(&r
[1], 1, CHAN_X
);
1486 micro_mul( &r
[0], &r
[0], &r
[1] );
1488 FETCH(&r
[1], 0, CHAN_Y
);
1489 FETCH(&r
[2], 1, CHAN_Y
);
1491 micro_mul( &r
[1], &r
[1], &r
[2] );
1492 micro_add( &r
[0], &r
[0], &r
[1] );
1494 FETCH(&r
[1], 0, CHAN_Z
);
1495 FETCH(&r
[2], 1, CHAN_Z
);
1497 micro_mul( &r
[1], &r
[1], &r
[2] );
1498 micro_add( &r
[0], &r
[0], &r
[1] );
1500 FETCH(&r
[1], 0, CHAN_W
);
1501 FETCH(&r
[2], 1, CHAN_W
);
1503 micro_mul( &r
[1], &r
[1], &r
[2] );
1504 micro_add( &r
[0], &r
[0], &r
[1] );
1506 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1507 STORE( &r
[0], 0, chan_index
);
1511 case TGSI_OPCODE_DST
:
1512 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1513 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1516 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1517 FETCH( &r
[0], 0, CHAN_Y
);
1518 FETCH( &r
[1], 1, CHAN_Y
);
1519 micro_mul( &r
[0], &r
[0], &r
[1] );
1520 STORE( &r
[0], 0, CHAN_Y
);
1523 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1524 FETCH( &r
[0], 0, CHAN_Z
);
1525 STORE( &r
[0], 0, CHAN_Z
);
1528 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1529 FETCH( &r
[0], 1, CHAN_W
);
1530 STORE( &r
[0], 0, CHAN_W
);
1534 case TGSI_OPCODE_MIN
:
1535 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1536 FETCH(&r
[0], 0, chan_index
);
1537 FETCH(&r
[1], 1, chan_index
);
1539 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1541 STORE(&r
[0], 0, chan_index
);
1545 case TGSI_OPCODE_MAX
:
1546 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1547 FETCH(&r
[0], 0, chan_index
);
1548 FETCH(&r
[1], 1, chan_index
);
1550 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1552 STORE(&r
[0], 0, chan_index
);
1556 case TGSI_OPCODE_SLT
:
1557 /* TGSI_OPCODE_SETLT */
1558 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1559 FETCH( &r
[0], 0, chan_index
);
1560 FETCH( &r
[1], 1, chan_index
);
1561 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1562 STORE( &r
[0], 0, chan_index
);
1566 case TGSI_OPCODE_SGE
:
1567 /* TGSI_OPCODE_SETGE */
1568 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1569 FETCH( &r
[0], 0, chan_index
);
1570 FETCH( &r
[1], 1, chan_index
);
1571 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1572 STORE( &r
[0], 0, chan_index
);
1576 case TGSI_OPCODE_MAD
:
1577 /* TGSI_OPCODE_MADD */
1578 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1579 FETCH( &r
[0], 0, chan_index
);
1580 FETCH( &r
[1], 1, chan_index
);
1581 micro_mul( &r
[0], &r
[0], &r
[1] );
1582 FETCH( &r
[1], 2, chan_index
);
1583 micro_add( &r
[0], &r
[0], &r
[1] );
1584 STORE( &r
[0], 0, chan_index
);
1588 case TGSI_OPCODE_SUB
:
1589 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1590 FETCH(&r
[0], 0, chan_index
);
1591 FETCH(&r
[1], 1, chan_index
);
1593 micro_sub( &r
[0], &r
[0], &r
[1] );
1595 STORE(&r
[0], 0, chan_index
);
1599 case TGSI_OPCODE_LERP
:
1600 /* TGSI_OPCODE_LRP */
1601 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1602 FETCH(&r
[0], 0, chan_index
);
1603 FETCH(&r
[1], 1, chan_index
);
1604 FETCH(&r
[2], 2, chan_index
);
1606 micro_sub( &r
[1], &r
[1], &r
[2] );
1607 micro_mul( &r
[0], &r
[0], &r
[1] );
1608 micro_add( &r
[0], &r
[0], &r
[2] );
1610 STORE(&r
[0], 0, chan_index
);
1614 case TGSI_OPCODE_CND
:
1618 case TGSI_OPCODE_CND0
:
1622 case TGSI_OPCODE_DOT2ADD
:
1623 /* TGSI_OPCODE_DP2A */
1627 case TGSI_OPCODE_INDEX
:
1631 case TGSI_OPCODE_NEGATE
:
1635 case TGSI_OPCODE_FRAC
:
1636 /* TGSI_OPCODE_FRC */
1637 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1638 FETCH( &r
[0], 0, chan_index
);
1639 micro_frc( &r
[0], &r
[0] );
1640 STORE( &r
[0], 0, chan_index
);
1644 case TGSI_OPCODE_CLAMP
:
1648 case TGSI_OPCODE_FLOOR
:
1649 /* TGSI_OPCODE_FLR */
1650 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1651 FETCH( &r
[0], 0, chan_index
);
1652 micro_flr( &r
[0], &r
[0] );
1653 STORE( &r
[0], 0, chan_index
);
1657 case TGSI_OPCODE_ROUND
:
1658 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1659 FETCH( &r
[0], 0, chan_index
);
1660 micro_rnd( &r
[0], &r
[0] );
1661 STORE( &r
[0], 0, chan_index
);
1665 case TGSI_OPCODE_EXPBASE2
:
1666 /* TGSI_OPCODE_EX2 */
1667 FETCH(&r
[0], 0, CHAN_X
);
1669 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1671 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1672 STORE( &r
[0], 0, chan_index
);
1676 case TGSI_OPCODE_LOGBASE2
:
1677 /* TGSI_OPCODE_LG2 */
1678 FETCH( &r
[0], 0, CHAN_X
);
1679 micro_lg2( &r
[0], &r
[0] );
1680 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1681 STORE( &r
[0], 0, chan_index
);
1685 case TGSI_OPCODE_POWER
:
1686 /* TGSI_OPCODE_POW */
1687 FETCH(&r
[0], 0, CHAN_X
);
1688 FETCH(&r
[1], 1, CHAN_X
);
1690 micro_pow( &r
[0], &r
[0], &r
[1] );
1692 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1693 STORE( &r
[0], 0, chan_index
);
1697 case TGSI_OPCODE_CROSSPRODUCT
:
1698 /* TGSI_OPCODE_XPD */
1699 FETCH(&r
[0], 0, CHAN_Y
);
1700 FETCH(&r
[1], 1, CHAN_Z
);
1702 micro_mul( &r
[2], &r
[0], &r
[1] );
1704 FETCH(&r
[3], 0, CHAN_Z
);
1705 FETCH(&r
[4], 1, CHAN_Y
);
1707 micro_mul( &r
[5], &r
[3], &r
[4] );
1708 micro_sub( &r
[2], &r
[2], &r
[5] );
1710 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1711 STORE( &r
[2], 0, CHAN_X
);
1714 FETCH(&r
[2], 1, CHAN_X
);
1716 micro_mul( &r
[3], &r
[3], &r
[2] );
1718 FETCH(&r
[5], 0, CHAN_X
);
1720 micro_mul( &r
[1], &r
[1], &r
[5] );
1721 micro_sub( &r
[3], &r
[3], &r
[1] );
1723 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1724 STORE( &r
[3], 0, CHAN_Y
);
1727 micro_mul( &r
[5], &r
[5], &r
[4] );
1728 micro_mul( &r
[0], &r
[0], &r
[2] );
1729 micro_sub( &r
[5], &r
[5], &r
[0] );
1731 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1732 STORE( &r
[5], 0, CHAN_Z
);
1735 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1736 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1740 case TGSI_OPCODE_MULTIPLYMATRIX
:
1744 case TGSI_OPCODE_ABS
:
1745 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1746 FETCH(&r
[0], 0, chan_index
);
1748 micro_abs( &r
[0], &r
[0] );
1750 STORE(&r
[0], 0, chan_index
);
1754 case TGSI_OPCODE_RCC
:
1758 case TGSI_OPCODE_DPH
:
1759 FETCH(&r
[0], 0, CHAN_X
);
1760 FETCH(&r
[1], 1, CHAN_X
);
1762 micro_mul( &r
[0], &r
[0], &r
[1] );
1764 FETCH(&r
[1], 0, CHAN_Y
);
1765 FETCH(&r
[2], 1, CHAN_Y
);
1767 micro_mul( &r
[1], &r
[1], &r
[2] );
1768 micro_add( &r
[0], &r
[0], &r
[1] );
1770 FETCH(&r
[1], 0, CHAN_Z
);
1771 FETCH(&r
[2], 1, CHAN_Z
);
1773 micro_mul( &r
[1], &r
[1], &r
[2] );
1774 micro_add( &r
[0], &r
[0], &r
[1] );
1776 FETCH(&r
[1], 1, CHAN_W
);
1778 micro_add( &r
[0], &r
[0], &r
[1] );
1780 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1781 STORE( &r
[0], 0, chan_index
);
1785 case TGSI_OPCODE_COS
:
1786 FETCH(&r
[0], 0, CHAN_X
);
1788 micro_cos( &r
[0], &r
[0] );
1790 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1791 STORE( &r
[0], 0, chan_index
);
1795 case TGSI_OPCODE_DDX
:
1796 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1797 FETCH( &r
[0], 0, chan_index
);
1798 micro_ddx( &r
[0], &r
[0] );
1799 STORE( &r
[0], 0, chan_index
);
1803 case TGSI_OPCODE_DDY
:
1804 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1805 FETCH( &r
[0], 0, chan_index
);
1806 micro_ddy( &r
[0], &r
[0] );
1807 STORE( &r
[0], 0, chan_index
);
1811 case TGSI_OPCODE_KILP
:
1812 exec_kilp (mach
, inst
);
1815 case TGSI_OPCODE_KIL
:
1816 exec_kil (mach
, inst
);
1819 case TGSI_OPCODE_PK2H
:
1823 case TGSI_OPCODE_PK2US
:
1827 case TGSI_OPCODE_PK4B
:
1831 case TGSI_OPCODE_PK4UB
:
1835 case TGSI_OPCODE_RFL
:
1839 case TGSI_OPCODE_SEQ
:
1840 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1841 FETCH( &r
[0], 0, chan_index
);
1842 FETCH( &r
[1], 1, chan_index
);
1843 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1844 STORE( &r
[0], 0, chan_index
);
1848 case TGSI_OPCODE_SFL
:
1852 case TGSI_OPCODE_SGT
:
1853 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1854 FETCH( &r
[0], 0, chan_index
);
1855 FETCH( &r
[1], 1, chan_index
);
1856 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1857 STORE( &r
[0], 0, chan_index
);
1861 case TGSI_OPCODE_SIN
:
1862 FETCH( &r
[0], 0, CHAN_X
);
1863 micro_sin( &r
[0], &r
[0] );
1864 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1865 STORE( &r
[0], 0, chan_index
);
1869 case TGSI_OPCODE_SLE
:
1870 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1871 FETCH( &r
[0], 0, chan_index
);
1872 FETCH( &r
[1], 1, chan_index
);
1873 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1874 STORE( &r
[0], 0, chan_index
);
1878 case TGSI_OPCODE_SNE
:
1879 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1880 FETCH( &r
[0], 0, chan_index
);
1881 FETCH( &r
[1], 1, chan_index
);
1882 micro_ne( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1883 STORE( &r
[0], 0, chan_index
);
1887 case TGSI_OPCODE_STR
:
1891 case TGSI_OPCODE_TEX
:
1893 const GLuint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1894 switch (inst
->InstructionExtTexture
.Texture
) {
1895 case TGSI_TEXTURE_1D
:
1897 FETCH(&r
[0], 0, CHAN_X
);
1899 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1900 case TGSI_EXTSWIZZLE_W
:
1901 FETCH(&r
[1], 0, CHAN_W
);
1902 micro_div( &r
[0], &r
[0], &r
[1] );
1905 case TGSI_EXTSWIZZLE_ONE
:
1912 fetch_texel(&mach
->Samplers
[unit
],
1913 &r
[0], NULL
, NULL
, 0.0,
1914 &r
[0], &r
[1], &r
[2], &r
[3]);
1917 case TGSI_TEXTURE_2D
:
1918 case TGSI_TEXTURE_RECT
:
1920 FETCH(&r
[0], 0, CHAN_X
);
1921 FETCH(&r
[1], 0, CHAN_Y
);
1923 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1924 case TGSI_EXTSWIZZLE_W
:
1925 FETCH(&r
[2], 0, CHAN_W
);
1926 micro_div( &r
[0], &r
[0], &r
[2] );
1927 micro_div( &r
[1], &r
[1], &r
[2] );
1930 case TGSI_EXTSWIZZLE_ONE
:
1937 fetch_texel(&mach
->Samplers
[unit
],
1938 &r
[0], &r
[1], NULL
, 0.0,
1939 &r
[0], &r
[1], &r
[2], &r
[3]);
1942 case TGSI_TEXTURE_3D
:
1943 case TGSI_TEXTURE_CUBE
:
1945 FETCH(&r
[0], 0, CHAN_X
);
1946 FETCH(&r
[1], 0, CHAN_Y
);
1947 FETCH(&r
[2], 0, CHAN_Z
);
1949 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1950 case TGSI_EXTSWIZZLE_W
:
1951 FETCH(&r
[3], 0, CHAN_W
);
1952 micro_div( &r
[0], &r
[0], &r
[3] );
1953 micro_div( &r
[1], &r
[1], &r
[3] );
1954 micro_div( &r
[2], &r
[2], &r
[3] );
1957 case TGSI_EXTSWIZZLE_ONE
:
1964 fetch_texel(&mach
->Samplers
[unit
],
1965 &r
[0], &r
[1], &r
[2], 0.0,
1966 &r
[0], &r
[1], &r
[2], &r
[3]);
1973 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1974 STORE( &r
[chan_index
], 0, chan_index
);
1979 case TGSI_OPCODE_TXD
:
1983 case TGSI_OPCODE_UP2H
:
1987 case TGSI_OPCODE_UP2US
:
1991 case TGSI_OPCODE_UP4B
:
1995 case TGSI_OPCODE_UP4UB
:
1999 case TGSI_OPCODE_X2D
:
2003 case TGSI_OPCODE_ARA
:
2007 case TGSI_OPCODE_ARR
:
2011 case TGSI_OPCODE_BRA
:
2015 case TGSI_OPCODE_CAL
:
2016 /* skip the call if no execution channels are enabled */
2017 if (mach
->ExecMask
) {
2020 /* push Cond, Loop, Cont stacks */
2021 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2022 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2023 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2024 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2025 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2026 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2028 /* note that PC was already incremented above */
2029 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2030 *pc
= inst
->InstructionExtLabel
.Label
;
2034 case TGSI_OPCODE_RET
:
2035 /* XXX examine ExecMask to determine if we should _really_ return */
2036 /* pop Cond, Loop, Cont stacks */
2037 assert(mach
->CondStackTop
> 0);
2038 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2039 assert(mach
->LoopStackTop
> 0);
2040 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2041 assert(mach
->ContStackTop
> 0);
2042 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2044 assert(mach
->CallStackTop
>= 0);
2045 if (mach
->CallStackTop
== 0) {
2049 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2052 case TGSI_OPCODE_SSG
:
2056 case TGSI_OPCODE_CMP
:
2057 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2058 FETCH(&r
[0], 0, chan_index
);
2059 FETCH(&r
[1], 1, chan_index
);
2060 FETCH(&r
[2], 2, chan_index
);
2062 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2064 STORE(&r
[0], 0, chan_index
);
2068 case TGSI_OPCODE_SCS
:
2069 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2070 FETCH( &r
[0], 0, CHAN_X
);
2072 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2073 micro_cos( &r
[1], &r
[0] );
2074 STORE( &r
[1], 0, CHAN_X
);
2076 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2077 micro_sin( &r
[1], &r
[0] );
2078 STORE( &r
[1], 0, CHAN_Y
);
2080 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2081 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2083 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2084 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2088 case TGSI_OPCODE_TXB
:
2092 case TGSI_OPCODE_NRM
:
2096 case TGSI_OPCODE_DIV
:
2100 case TGSI_OPCODE_DP2
:
2101 FETCH( &r
[0], 0, CHAN_X
);
2102 FETCH( &r
[1], 1, CHAN_X
);
2103 micro_mul( &r
[0], &r
[0], &r
[1] );
2105 FETCH( &r
[1], 0, CHAN_Y
);
2106 FETCH( &r
[2], 1, CHAN_Y
);
2107 micro_mul( &r
[1], &r
[1], &r
[2] );
2108 micro_add( &r
[0], &r
[0], &r
[1] );
2110 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2111 STORE( &r
[0], 0, chan_index
);
2115 case TGSI_OPCODE_TXL
:
2119 case TGSI_OPCODE_IF
:
2121 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2122 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2123 FETCH( &r
[0], 0, CHAN_X
);
2124 /* update CondMask */
2126 mach
->CondMask
&= ~0x1;
2129 mach
->CondMask
&= ~0x2;
2132 mach
->CondMask
&= ~0x4;
2135 mach
->CondMask
&= ~0x8;
2137 UPDATE_EXEC_MASK(mach
);
2138 /* Todo: If CondMask==0, jump to ELSE */
2141 case TGSI_OPCODE_ELSE
:
2142 /* invert CondMask wrt previous mask */
2145 assert(mach
->CondStackTop
> 0);
2146 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2147 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2148 UPDATE_EXEC_MASK(mach
);
2149 /* Todo: If CondMask==0, jump to ENDIF */
2153 case TGSI_OPCODE_ENDIF
:
2155 assert(mach
->CondStackTop
> 0);
2156 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2157 UPDATE_EXEC_MASK(mach
);
2160 case TGSI_OPCODE_END
:
2161 /* halt execution */
2165 case TGSI_OPCODE_REP
:
2169 case TGSI_OPCODE_ENDREP
:
2173 case TGSI_OPCODE_PUSHA
:
2177 case TGSI_OPCODE_POPA
:
2181 case TGSI_OPCODE_CEIL
:
2182 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2183 FETCH( &r
[0], 0, chan_index
);
2184 micro_ceil( &r
[0], &r
[0] );
2185 STORE( &r
[0], 0, chan_index
);
2189 case TGSI_OPCODE_I2F
:
2190 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2191 FETCH( &r
[0], 0, chan_index
);
2192 micro_i2f( &r
[0], &r
[0] );
2193 STORE( &r
[0], 0, chan_index
);
2197 case TGSI_OPCODE_NOT
:
2198 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2199 FETCH( &r
[0], 0, chan_index
);
2200 micro_not( &r
[0], &r
[0] );
2201 STORE( &r
[0], 0, chan_index
);
2205 case TGSI_OPCODE_TRUNC
:
2206 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2207 FETCH( &r
[0], 0, chan_index
);
2208 micro_trunc( &r
[0], &r
[0] );
2209 STORE( &r
[0], 0, chan_index
);
2213 case TGSI_OPCODE_SHL
:
2214 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2215 FETCH( &r
[0], 0, chan_index
);
2216 FETCH( &r
[1], 1, chan_index
);
2217 micro_shl( &r
[0], &r
[0], &r
[1] );
2218 STORE( &r
[0], 0, chan_index
);
2222 case TGSI_OPCODE_SHR
:
2223 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2224 FETCH( &r
[0], 0, chan_index
);
2225 FETCH( &r
[1], 1, chan_index
);
2226 micro_ishr( &r
[0], &r
[0], &r
[1] );
2227 STORE( &r
[0], 0, chan_index
);
2231 case TGSI_OPCODE_AND
:
2232 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2233 FETCH( &r
[0], 0, chan_index
);
2234 FETCH( &r
[1], 1, chan_index
);
2235 micro_and( &r
[0], &r
[0], &r
[1] );
2236 STORE( &r
[0], 0, chan_index
);
2240 case TGSI_OPCODE_OR
:
2241 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2242 FETCH( &r
[0], 0, chan_index
);
2243 FETCH( &r
[1], 1, chan_index
);
2244 micro_or( &r
[0], &r
[0], &r
[1] );
2245 STORE( &r
[0], 0, chan_index
);
2249 case TGSI_OPCODE_MOD
:
2253 case TGSI_OPCODE_XOR
:
2254 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2255 FETCH( &r
[0], 0, chan_index
);
2256 FETCH( &r
[1], 1, chan_index
);
2257 micro_xor( &r
[0], &r
[0], &r
[1] );
2258 STORE( &r
[0], 0, chan_index
);
2262 case TGSI_OPCODE_SAD
:
2266 case TGSI_OPCODE_TXF
:
2270 case TGSI_OPCODE_TXQ
:
2274 case TGSI_OPCODE_EMIT
:
2275 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2276 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2279 case TGSI_OPCODE_ENDPRIM
:
2280 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2281 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2284 case TGSI_OPCODE_LOOP
:
2285 /* fall-through (for now) */
2286 case TGSI_OPCODE_BGNLOOP2
:
2287 /* push LoopMask and ContMasks */
2288 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2289 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2290 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2291 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2294 case TGSI_OPCODE_ENDLOOP
:
2295 /* fall-through (for now at least) */
2296 case TGSI_OPCODE_ENDLOOP2
:
2297 /* Restore ContMask, but don't pop */
2298 assert(mach
->ContStackTop
> 0);
2299 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2300 if (mach
->LoopMask
) {
2301 /* repeat loop: jump to instruction just past BGNLOOP */
2302 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2305 /* exit loop: pop LoopMask */
2306 assert(mach
->LoopStackTop
> 0);
2307 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2309 assert(mach
->ContStackTop
> 0);
2310 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2312 UPDATE_EXEC_MASK(mach
);
2315 case TGSI_OPCODE_BRK
:
2316 /* turn off loop channels for each enabled exec channel */
2317 mach
->LoopMask
&= ~mach
->ExecMask
;
2318 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2319 UPDATE_EXEC_MASK(mach
);
2322 case TGSI_OPCODE_CONT
:
2323 /* turn off cont channels for each enabled exec channel */
2324 mach
->ContMask
&= ~mach
->ExecMask
;
2325 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2326 UPDATE_EXEC_MASK(mach
);
2329 case TGSI_OPCODE_BGNSUB
:
2333 case TGSI_OPCODE_ENDSUB
:
2337 case TGSI_OPCODE_NOISE1
:
2341 case TGSI_OPCODE_NOISE2
:
2345 case TGSI_OPCODE_NOISE3
:
2349 case TGSI_OPCODE_NOISE4
:
2353 case TGSI_OPCODE_NOP
:
2363 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2369 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2372 mach
->CondMask
= 0xf;
2373 mach
->LoopMask
= 0xf;
2374 mach
->ContMask
= 0xf;
2375 mach
->ExecMask
= 0xf;
2377 assert(mach
->CondStackTop
== 0);
2378 assert(mach
->LoopStackTop
== 0);
2379 assert(mach
->ContStackTop
== 0);
2380 assert(mach
->CallStackTop
== 0);
2382 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2383 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2385 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2386 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2387 mach
->Primitives
[0] = 0;
2391 /* execute declarations (interpolants) */
2392 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2393 exec_declaration( mach
, mach
->Declarations
+i
);
2396 /* execute instructions, until pc is set to -1 */
2398 assert(pc
< mach
->NumInstructions
);
2399 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2405 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2406 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2408 * Scale back depth component.
2410 for (i
= 0; i
< 4; i
++)
2411 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;