1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
72 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
74 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
75 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
76 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
77 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
78 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
79 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
80 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
81 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
82 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
83 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
84 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
85 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
86 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
87 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
88 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
89 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
90 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
91 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
92 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
93 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
94 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
95 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
96 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
97 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
98 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
99 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
100 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
101 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
102 #define TEMP_R0 TGSI_EXEC_TEMP_R0
104 #define IS_CHANNEL_ENABLED(INST, CHAN)\
105 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
107 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
108 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
110 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
111 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
112 if (IS_CHANNEL_ENABLED( INST, CHAN ))
114 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
119 /** The execution mask depends on the conditional mask and the loop mask */
120 #define UPDATE_EXEC_MASK(MACH) \
121 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine
*mach
,
131 const struct tgsi_token
*tokens
,
133 struct tgsi_sampler
*samplers
)
136 struct tgsi_parse_context parse
;
137 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
138 struct tgsi_full_instruction
*instructions
;
139 struct tgsi_full_declaration
*declarations
;
140 uint maxInstructions
= 10, numInstructions
= 0;
141 uint maxDeclarations
= 10, numDeclarations
= 0;
145 tgsi_dump(tokens
, 0);
148 mach
->Tokens
= tokens
;
149 mach
->Samplers
= samplers
;
151 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
152 if (k
!= TGSI_PARSE_OK
) {
153 debug_printf( "Problem parsing!\n" );
157 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
161 declarations
= (struct tgsi_full_declaration
*)
162 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
168 instructions
= (struct tgsi_full_instruction
*)
169 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
172 FREE( declarations
);
176 while( !tgsi_parse_end_of_tokens( &parse
) ) {
177 uint pointer
= parse
.Position
;
180 tgsi_parse_token( &parse
);
181 switch( parse
.FullToken
.Token
.Type
) {
182 case TGSI_TOKEN_TYPE_DECLARATION
:
183 /* save expanded declaration */
184 if (numDeclarations
== maxDeclarations
) {
185 declarations
= REALLOC(declarations
,
187 * sizeof(struct tgsi_full_declaration
),
188 (maxDeclarations
+ 10)
189 * sizeof(struct tgsi_full_declaration
));
190 maxDeclarations
+= 10;
192 memcpy(declarations
+ numDeclarations
,
193 &parse
.FullToken
.FullDeclaration
,
194 sizeof(declarations
[0]));
198 case TGSI_TOKEN_TYPE_IMMEDIATE
:
200 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
201 assert( size
% 4 == 0 );
202 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
204 for( i
= 0; i
< size
; i
++ ) {
205 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
206 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
208 mach
->ImmLimit
+= size
/ 4;
212 case TGSI_TOKEN_TYPE_INSTRUCTION
:
213 assert( labels
->count
< MAX_LABELS
);
215 labels
->labels
[labels
->count
][0] = instno
;
216 labels
->labels
[labels
->count
][1] = pointer
;
219 /* save expanded instruction */
220 if (numInstructions
== maxInstructions
) {
221 instructions
= REALLOC(instructions
,
223 * sizeof(struct tgsi_full_instruction
),
224 (maxInstructions
+ 10)
225 * sizeof(struct tgsi_full_instruction
));
226 maxInstructions
+= 10;
228 memcpy(instructions
+ numInstructions
,
229 &parse
.FullToken
.FullInstruction
,
230 sizeof(instructions
[0]));
238 tgsi_parse_free (&parse
);
240 if (mach
->Declarations
) {
241 FREE( mach
->Declarations
);
243 mach
->Declarations
= declarations
;
244 mach
->NumDeclarations
= numDeclarations
;
246 if (mach
->Instructions
) {
247 FREE( mach
->Instructions
);
249 mach
->Instructions
= instructions
;
250 mach
->NumInstructions
= numInstructions
;
255 tgsi_exec_machine_init(
256 struct tgsi_exec_machine
*mach
)
260 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
261 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
263 /* Setup constants. */
264 for( i
= 0; i
< 4; i
++ ) {
265 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
266 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
267 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
268 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
269 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
270 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
271 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
272 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
273 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
274 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
280 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
282 if (mach
->Instructions
) {
283 FREE(mach
->Instructions
);
284 mach
->Instructions
= NULL
;
285 mach
->NumInstructions
= 0;
287 if (mach
->Declarations
) {
288 FREE(mach
->Declarations
);
289 mach
->Declarations
= NULL
;
290 mach
->NumDeclarations
= 0;
297 union tgsi_exec_channel
*dst
,
298 const union tgsi_exec_channel
*src
)
300 dst
->f
[0] = fabsf( src
->f
[0] );
301 dst
->f
[1] = fabsf( src
->f
[1] );
302 dst
->f
[2] = fabsf( src
->f
[2] );
303 dst
->f
[3] = fabsf( src
->f
[3] );
308 union tgsi_exec_channel
*dst
,
309 const union tgsi_exec_channel
*src0
,
310 const union tgsi_exec_channel
*src1
)
312 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
313 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
314 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
315 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
320 union tgsi_exec_channel
*dst
,
321 const union tgsi_exec_channel
*src0
,
322 const union tgsi_exec_channel
*src1
)
324 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
325 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
326 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
327 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
332 union tgsi_exec_channel
*dst
,
333 const union tgsi_exec_channel
*src0
,
334 const union tgsi_exec_channel
*src1
)
336 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
337 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
338 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
339 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
344 union tgsi_exec_channel
*dst
,
345 const union tgsi_exec_channel
*src
)
347 dst
->f
[0] = ceilf( src
->f
[0] );
348 dst
->f
[1] = ceilf( src
->f
[1] );
349 dst
->f
[2] = ceilf( src
->f
[2] );
350 dst
->f
[3] = ceilf( src
->f
[3] );
355 union tgsi_exec_channel
*dst
,
356 const union tgsi_exec_channel
*src
)
358 dst
->f
[0] = cosf( src
->f
[0] );
359 dst
->f
[1] = cosf( src
->f
[1] );
360 dst
->f
[2] = cosf( src
->f
[2] );
361 dst
->f
[3] = cosf( src
->f
[3] );
366 union tgsi_exec_channel
*dst
,
367 const union tgsi_exec_channel
*src
)
372 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
377 union tgsi_exec_channel
*dst
,
378 const union tgsi_exec_channel
*src
)
383 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
388 union tgsi_exec_channel
*dst
,
389 const union tgsi_exec_channel
*src0
,
390 const union tgsi_exec_channel
*src1
)
392 if (src1
->f
[0] != 0) {
393 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
395 if (src1
->f
[1] != 0) {
396 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
398 if (src1
->f
[2] != 0) {
399 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
401 if (src1
->f
[3] != 0) {
402 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
408 union tgsi_exec_channel
*dst
,
409 const union tgsi_exec_channel
*src0
,
410 const union tgsi_exec_channel
*src1
)
412 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
413 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
414 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
415 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
420 union tgsi_exec_channel
*dst
,
421 const union tgsi_exec_channel
*src0
,
422 const union tgsi_exec_channel
*src1
,
423 const union tgsi_exec_channel
*src2
,
424 const union tgsi_exec_channel
*src3
)
426 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
427 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
428 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
429 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
434 union tgsi_exec_channel
*dst
,
435 const union tgsi_exec_channel
*src0
,
436 const union tgsi_exec_channel
*src1
,
437 const union tgsi_exec_channel
*src2
,
438 const union tgsi_exec_channel
*src3
)
440 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
441 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
442 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
443 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
448 union tgsi_exec_channel
*dst
,
449 const union tgsi_exec_channel
*src
)
451 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
452 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
453 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
454 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
459 union tgsi_exec_channel
*dst
,
460 const union tgsi_exec_channel
*src
)
462 dst
->i
[0] = (int) src
->f
[0];
463 dst
->i
[1] = (int) src
->f
[1];
464 dst
->i
[2] = (int) src
->f
[2];
465 dst
->i
[3] = (int) src
->f
[3];
470 union tgsi_exec_channel
*dst
,
471 const union tgsi_exec_channel
*src
)
473 dst
->u
[0] = (uint
) src
->f
[0];
474 dst
->u
[1] = (uint
) src
->f
[1];
475 dst
->u
[2] = (uint
) src
->f
[2];
476 dst
->u
[3] = (uint
) src
->f
[3];
481 union tgsi_exec_channel
*dst
,
482 const union tgsi_exec_channel
*src
)
484 dst
->f
[0] = floorf( src
->f
[0] );
485 dst
->f
[1] = floorf( src
->f
[1] );
486 dst
->f
[2] = floorf( src
->f
[2] );
487 dst
->f
[3] = floorf( src
->f
[3] );
492 union tgsi_exec_channel
*dst
,
493 const union tgsi_exec_channel
*src
)
495 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
496 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
497 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
498 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
503 union tgsi_exec_channel
*dst
,
504 const union tgsi_exec_channel
*src0
,
505 const union tgsi_exec_channel
*src1
,
506 const union tgsi_exec_channel
*src2
,
507 const union tgsi_exec_channel
*src3
)
509 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
510 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
511 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
512 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
517 union tgsi_exec_channel
*dst
,
518 const union tgsi_exec_channel
*src
)
520 dst
->f
[0] = (float) src
->i
[0];
521 dst
->f
[1] = (float) src
->i
[1];
522 dst
->f
[2] = (float) src
->i
[2];
523 dst
->f
[3] = (float) src
->i
[3];
528 union tgsi_exec_channel
*dst
,
529 const union tgsi_exec_channel
*src
)
531 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
532 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
533 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
534 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
539 union tgsi_exec_channel
*dst
,
540 const union tgsi_exec_channel
*src0
,
541 const union tgsi_exec_channel
*src1
,
542 const union tgsi_exec_channel
*src2
,
543 const union tgsi_exec_channel
*src3
)
545 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
546 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
547 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
548 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
553 union tgsi_exec_channel
*dst
,
554 const union tgsi_exec_channel
*src0
,
555 const union tgsi_exec_channel
*src1
,
556 const union tgsi_exec_channel
*src2
,
557 const union tgsi_exec_channel
*src3
)
559 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
560 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
561 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
562 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
567 union tgsi_exec_channel
*dst
,
568 const union tgsi_exec_channel
*src0
,
569 const union tgsi_exec_channel
*src1
,
570 const union tgsi_exec_channel
*src2
,
571 const union tgsi_exec_channel
*src3
)
573 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
574 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
575 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
576 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
581 union tgsi_exec_channel
*dst
,
582 const union tgsi_exec_channel
*src0
,
583 const union tgsi_exec_channel
*src1
,
584 const union tgsi_exec_channel
*src2
,
585 const union tgsi_exec_channel
*src3
)
587 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
588 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
589 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
590 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
595 union tgsi_exec_channel
*dst
,
596 const union tgsi_exec_channel
*src0
,
597 const union tgsi_exec_channel
*src1
)
599 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
600 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
601 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
602 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
607 union tgsi_exec_channel
*dst
,
608 const union tgsi_exec_channel
*src0
,
609 const union tgsi_exec_channel
*src1
)
611 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
612 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
613 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
614 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
619 union tgsi_exec_channel
*dst
,
620 const union tgsi_exec_channel
*src0
,
621 const union tgsi_exec_channel
*src1
)
623 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
624 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
625 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
626 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
631 union tgsi_exec_channel
*dst
,
632 const union tgsi_exec_channel
*src0
,
633 const union tgsi_exec_channel
*src1
)
635 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
636 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
637 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
638 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
643 union tgsi_exec_channel
*dst
,
644 const union tgsi_exec_channel
*src0
,
645 const union tgsi_exec_channel
*src1
)
647 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
648 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
649 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
650 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
655 union tgsi_exec_channel
*dst
,
656 const union tgsi_exec_channel
*src0
,
657 const union tgsi_exec_channel
*src1
)
659 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
660 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
661 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
662 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
667 union tgsi_exec_channel
*dst
,
668 const union tgsi_exec_channel
*src0
,
669 const union tgsi_exec_channel
*src1
)
671 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
672 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
673 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
674 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
679 union tgsi_exec_channel
*dst
,
680 const union tgsi_exec_channel
*src0
,
681 const union tgsi_exec_channel
*src1
)
683 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
684 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
685 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
686 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
691 union tgsi_exec_channel
*dst
,
692 const union tgsi_exec_channel
*src0
,
693 const union tgsi_exec_channel
*src1
)
695 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
696 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
697 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
698 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
703 union tgsi_exec_channel
*dst0
,
704 union tgsi_exec_channel
*dst1
,
705 const union tgsi_exec_channel
*src0
,
706 const union tgsi_exec_channel
*src1
)
708 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
709 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
710 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
711 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
720 union tgsi_exec_channel
*dst0
,
721 union tgsi_exec_channel
*dst1
,
722 const union tgsi_exec_channel
*src0
,
723 const union tgsi_exec_channel
*src1
)
725 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
726 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
727 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
728 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
737 union tgsi_exec_channel
*dst
,
738 const union tgsi_exec_channel
*src0
,
739 const union tgsi_exec_channel
*src1
,
740 const union tgsi_exec_channel
*src2
)
742 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
743 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
744 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
745 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
750 union tgsi_exec_channel
*dst
,
751 const union tgsi_exec_channel
*src
)
753 dst
->f
[0] = -src
->f
[0];
754 dst
->f
[1] = -src
->f
[1];
755 dst
->f
[2] = -src
->f
[2];
756 dst
->f
[3] = -src
->f
[3];
761 union tgsi_exec_channel
*dst
,
762 const union tgsi_exec_channel
*src
)
764 dst
->i
[0] = -src
->i
[0];
765 dst
->i
[1] = -src
->i
[1];
766 dst
->i
[2] = -src
->i
[2];
767 dst
->i
[3] = -src
->i
[3];
772 union tgsi_exec_channel
*dst
,
773 const union tgsi_exec_channel
*src
)
775 dst
->u
[0] = ~src
->u
[0];
776 dst
->u
[1] = ~src
->u
[1];
777 dst
->u
[2] = ~src
->u
[2];
778 dst
->u
[3] = ~src
->u
[3];
783 union tgsi_exec_channel
*dst
,
784 const union tgsi_exec_channel
*src0
,
785 const union tgsi_exec_channel
*src1
)
787 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
788 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
789 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
790 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
795 union tgsi_exec_channel
*dst
,
796 const union tgsi_exec_channel
*src0
,
797 const union tgsi_exec_channel
*src1
)
799 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
800 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
801 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
802 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
807 union tgsi_exec_channel
*dst
,
808 const union tgsi_exec_channel
*src
)
810 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
811 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
812 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
813 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
818 union tgsi_exec_channel
*dst
,
819 const union tgsi_exec_channel
*src0
,
820 const union tgsi_exec_channel
*src1
)
822 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
823 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
824 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
825 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
830 union tgsi_exec_channel
*dst
,
831 const union tgsi_exec_channel
*src0
,
832 const union tgsi_exec_channel
*src1
)
834 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
835 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
836 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
837 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
842 union tgsi_exec_channel
*dst
,
843 const union tgsi_exec_channel
*src0
)
845 dst
->f
[0] = (float) (int) src0
->f
[0];
846 dst
->f
[1] = (float) (int) src0
->f
[1];
847 dst
->f
[2] = (float) (int) src0
->f
[2];
848 dst
->f
[3] = (float) (int) src0
->f
[3];
853 union tgsi_exec_channel
*dst
,
854 const union tgsi_exec_channel
*src0
,
855 const union tgsi_exec_channel
*src1
)
857 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
858 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
859 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
860 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
865 union tgsi_exec_channel
*dst
,
866 const union tgsi_exec_channel
*src
)
868 dst
->f
[0] = sinf( src
->f
[0] );
869 dst
->f
[1] = sinf( src
->f
[1] );
870 dst
->f
[2] = sinf( src
->f
[2] );
871 dst
->f
[3] = sinf( src
->f
[3] );
875 micro_sqrt( union tgsi_exec_channel
*dst
,
876 const union tgsi_exec_channel
*src
)
878 dst
->f
[0] = sqrtf( src
->f
[0] );
879 dst
->f
[1] = sqrtf( src
->f
[1] );
880 dst
->f
[2] = sqrtf( src
->f
[2] );
881 dst
->f
[3] = sqrtf( src
->f
[3] );
886 union tgsi_exec_channel
*dst
,
887 const union tgsi_exec_channel
*src0
,
888 const union tgsi_exec_channel
*src1
)
890 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
891 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
892 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
893 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
898 union tgsi_exec_channel
*dst
,
899 const union tgsi_exec_channel
*src
)
901 dst
->f
[0] = (float) src
->u
[0];
902 dst
->f
[1] = (float) src
->u
[1];
903 dst
->f
[2] = (float) src
->u
[2];
904 dst
->f
[3] = (float) src
->u
[3];
909 union tgsi_exec_channel
*dst
,
910 const union tgsi_exec_channel
*src0
,
911 const union tgsi_exec_channel
*src1
)
913 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
914 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
915 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
916 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
920 fetch_src_file_channel(
921 const struct tgsi_exec_machine
*mach
,
924 const union tgsi_exec_channel
*index
,
925 union tgsi_exec_channel
*chan
)
928 case TGSI_EXTSWIZZLE_X
:
929 case TGSI_EXTSWIZZLE_Y
:
930 case TGSI_EXTSWIZZLE_Z
:
931 case TGSI_EXTSWIZZLE_W
:
933 case TGSI_FILE_CONSTANT
:
934 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
935 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
936 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
937 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
940 case TGSI_FILE_INPUT
:
941 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
942 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
943 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
944 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
947 case TGSI_FILE_TEMPORARY
:
948 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
949 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
950 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
951 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
952 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
955 case TGSI_FILE_IMMEDIATE
:
956 assert( index
->i
[0] < (int) mach
->ImmLimit
);
957 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
958 assert( index
->i
[1] < (int) mach
->ImmLimit
);
959 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
960 assert( index
->i
[2] < (int) mach
->ImmLimit
);
961 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
962 assert( index
->i
[3] < (int) mach
->ImmLimit
);
963 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
966 case TGSI_FILE_ADDRESS
:
967 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
968 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
969 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
970 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
973 case TGSI_FILE_OUTPUT
:
974 /* vertex/fragment output vars can be read too */
975 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
976 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
977 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
978 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
986 case TGSI_EXTSWIZZLE_ZERO
:
987 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
990 case TGSI_EXTSWIZZLE_ONE
:
991 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
1001 const struct tgsi_exec_machine
*mach
,
1002 union tgsi_exec_channel
*chan
,
1003 const struct tgsi_full_src_register
*reg
,
1004 const uint chan_index
)
1006 union tgsi_exec_channel index
;
1012 index
.i
[3] = reg
->SrcRegister
.Index
;
1014 if (reg
->SrcRegister
.Indirect
) {
1015 union tgsi_exec_channel index2
;
1016 union tgsi_exec_channel indir_index
;
1021 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1023 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1024 fetch_src_file_channel(
1026 reg
->SrcRegisterInd
.File
,
1031 index
.i
[0] += indir_index
.i
[0];
1032 index
.i
[1] += indir_index
.i
[1];
1033 index
.i
[2] += indir_index
.i
[2];
1034 index
.i
[3] += indir_index
.i
[3];
1037 if( reg
->SrcRegister
.Dimension
) {
1038 switch( reg
->SrcRegister
.File
) {
1039 case TGSI_FILE_INPUT
:
1045 case TGSI_FILE_CONSTANT
:
1055 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1056 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1057 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1058 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1060 if (reg
->SrcRegisterDim
.Indirect
) {
1061 union tgsi_exec_channel index2
;
1062 union tgsi_exec_channel indir_index
;
1067 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1069 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1070 fetch_src_file_channel(
1072 reg
->SrcRegisterDimInd
.File
,
1077 index
.i
[0] += indir_index
.i
[0];
1078 index
.i
[1] += indir_index
.i
[1];
1079 index
.i
[2] += indir_index
.i
[2];
1080 index
.i
[3] += indir_index
.i
[3];
1084 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1085 fetch_src_file_channel(
1087 reg
->SrcRegister
.File
,
1092 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1093 case TGSI_UTIL_SIGN_CLEAR
:
1094 micro_abs( chan
, chan
);
1097 case TGSI_UTIL_SIGN_SET
:
1098 micro_abs( chan
, chan
);
1099 micro_neg( chan
, chan
);
1102 case TGSI_UTIL_SIGN_TOGGLE
:
1103 micro_neg( chan
, chan
);
1106 case TGSI_UTIL_SIGN_KEEP
:
1110 if (reg
->SrcRegisterExtMod
.Complement
) {
1111 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1117 struct tgsi_exec_machine
*mach
,
1118 const union tgsi_exec_channel
*chan
,
1119 const struct tgsi_full_dst_register
*reg
,
1120 const struct tgsi_full_instruction
*inst
,
1124 union tgsi_exec_channel null
;
1125 union tgsi_exec_channel
*dst
;
1126 uint execmask
= mach
->ExecMask
;
1128 switch (reg
->DstRegister
.File
) {
1129 case TGSI_FILE_NULL
:
1133 case TGSI_FILE_OUTPUT
:
1134 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1135 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1138 case TGSI_FILE_TEMPORARY
:
1139 assert( reg
->DstRegister
.Index
< TGSI_EXEC_NUM_TEMPS
);
1140 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1143 case TGSI_FILE_ADDRESS
:
1144 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1152 if (inst
->InstructionExtNv
.CondFlowEnable
) {
1153 union tgsi_exec_channel
*cc
= &mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
];
1159 /* Only CC0 supported.
1161 assert( inst
->InstructionExtNv
.CondFlowIndex
< 1 );
1163 switch (chan_index
) {
1165 swizzle
= inst
->InstructionExtNv
.CondSwizzleX
;
1168 swizzle
= inst
->InstructionExtNv
.CondSwizzleY
;
1171 swizzle
= inst
->InstructionExtNv
.CondSwizzleZ
;
1174 swizzle
= inst
->InstructionExtNv
.CondSwizzleW
;
1182 case TGSI_SWIZZLE_X
:
1183 shift
= TGSI_EXEC_CC_X_SHIFT
;
1184 mask
= TGSI_EXEC_CC_X_MASK
;
1186 case TGSI_SWIZZLE_Y
:
1187 shift
= TGSI_EXEC_CC_Y_SHIFT
;
1188 mask
= TGSI_EXEC_CC_Y_MASK
;
1190 case TGSI_SWIZZLE_Z
:
1191 shift
= TGSI_EXEC_CC_Z_SHIFT
;
1192 mask
= TGSI_EXEC_CC_Z_MASK
;
1194 case TGSI_SWIZZLE_W
:
1195 shift
= TGSI_EXEC_CC_W_SHIFT
;
1196 mask
= TGSI_EXEC_CC_W_MASK
;
1203 switch (inst
->InstructionExtNv
.CondMask
) {
1205 test
= ~(TGSI_EXEC_CC_GT
<< shift
) & mask
;
1206 for (i
= 0; i
< QUAD_SIZE
; i
++)
1207 if (cc
->u
[i
] & test
)
1208 execmask
&= ~(1 << i
);
1212 test
= ~(TGSI_EXEC_CC_EQ
<< shift
) & mask
;
1213 for (i
= 0; i
< QUAD_SIZE
; i
++)
1214 if (cc
->u
[i
] & test
)
1215 execmask
&= ~(1 << i
);
1219 test
= ~(TGSI_EXEC_CC_LT
<< shift
) & mask
;
1220 for (i
= 0; i
< QUAD_SIZE
; i
++)
1221 if (cc
->u
[i
] & test
)
1222 execmask
&= ~(1 << i
);
1226 test
= ~((TGSI_EXEC_CC_GT
| TGSI_EXEC_CC_EQ
) << shift
) & mask
;
1227 for (i
= 0; i
< QUAD_SIZE
; i
++)
1228 if (cc
->u
[i
] & test
)
1229 execmask
&= ~(1 << i
);
1233 test
= ~((TGSI_EXEC_CC_LT
| TGSI_EXEC_CC_EQ
) << shift
) & mask
;
1234 for (i
= 0; i
< QUAD_SIZE
; i
++)
1235 if (cc
->u
[i
] & test
)
1236 execmask
&= ~(1 << i
);
1240 test
= ~((TGSI_EXEC_CC_GT
| TGSI_EXEC_CC_LT
| TGSI_EXEC_CC_UN
) << shift
) & mask
;
1241 for (i
= 0; i
< QUAD_SIZE
; i
++)
1242 if (cc
->u
[i
] & test
)
1243 execmask
&= ~(1 << i
);
1250 for (i
= 0; i
< QUAD_SIZE
; i
++)
1251 execmask
&= ~(1 << i
);
1260 switch (inst
->Instruction
.Saturate
) {
1262 for (i
= 0; i
< QUAD_SIZE
; i
++)
1263 if (execmask
& (1 << i
))
1264 dst
->i
[i
] = chan
->i
[i
];
1267 case TGSI_SAT_ZERO_ONE
:
1268 for (i
= 0; i
< QUAD_SIZE
; i
++)
1269 if (execmask
& (1 << i
)) {
1270 if (chan
->f
[i
] < 0.0f
)
1272 else if (chan
->f
[i
] > 1.0f
)
1275 dst
->i
[i
] = chan
->i
[i
];
1279 case TGSI_SAT_MINUS_PLUS_ONE
:
1280 for (i
= 0; i
< QUAD_SIZE
; i
++)
1281 if (execmask
& (1 << i
)) {
1282 if (chan
->f
[i
] < -1.0f
)
1284 else if (chan
->f
[i
] > 1.0f
)
1287 dst
->i
[i
] = chan
->i
[i
];
1295 if (inst
->InstructionExtNv
.CondDstUpdate
) {
1296 union tgsi_exec_channel
*cc
= &mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
];
1300 /* Only CC0 supported.
1302 assert( inst
->InstructionExtNv
.CondDstIndex
< 1 );
1304 switch (chan_index
) {
1306 shift
= TGSI_EXEC_CC_X_SHIFT
;
1307 mask
= ~TGSI_EXEC_CC_X_MASK
;
1310 shift
= TGSI_EXEC_CC_Y_SHIFT
;
1311 mask
= ~TGSI_EXEC_CC_Y_MASK
;
1314 shift
= TGSI_EXEC_CC_Z_SHIFT
;
1315 mask
= ~TGSI_EXEC_CC_Z_MASK
;
1318 shift
= TGSI_EXEC_CC_W_SHIFT
;
1319 mask
= ~TGSI_EXEC_CC_W_MASK
;
1326 for (i
= 0; i
< QUAD_SIZE
; i
++)
1327 if (execmask
& (1 << i
)) {
1329 if (dst
->f
[i
] < 0.0f
)
1330 cc
->u
[i
] |= TGSI_EXEC_CC_LT
<< shift
;
1331 else if (dst
->f
[i
] > 0.0f
)
1332 cc
->u
[i
] |= TGSI_EXEC_CC_GT
<< shift
;
1333 else if (dst
->f
[i
] == 0.0f
)
1334 cc
->u
[i
] |= TGSI_EXEC_CC_EQ
<< shift
;
1336 cc
->u
[i
] |= TGSI_EXEC_CC_UN
<< shift
;
1341 #define FETCH(VAL,INDEX,CHAN)\
1342 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1344 #define STORE(VAL,INDEX,CHAN)\
1345 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1349 * Execute ARB-style KIL which is predicated by a src register.
1350 * Kill fragment if any of the four values is less than zero.
1353 exec_kil(struct tgsi_exec_machine
*mach
,
1354 const struct tgsi_full_instruction
*inst
)
1358 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1359 union tgsi_exec_channel r
[1];
1361 /* This mask stores component bits that were already tested. Note that
1362 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1364 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1366 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1371 /* unswizzle channel */
1372 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1373 &inst
->FullSrcRegisters
[0],
1376 /* check if the component has not been already tested */
1377 if (uniquemask
& (1 << swizzle
))
1379 uniquemask
|= 1 << swizzle
;
1381 FETCH(&r
[0], 0, chan_index
);
1382 for (i
= 0; i
< 4; i
++)
1383 if (r
[0].f
[i
] < 0.0f
)
1387 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1391 * Execute NVIDIA-style KIL which is predicated by a condition code.
1392 * Kill fragment if the condition code is TRUE.
1395 exec_kilp(struct tgsi_exec_machine
*mach
,
1396 const struct tgsi_full_instruction
*inst
)
1398 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1400 if (inst
->InstructionExtNv
.CondFlowEnable
) {
1406 swizzle
[0] = inst
->InstructionExtNv
.CondSwizzleX
;
1407 swizzle
[1] = inst
->InstructionExtNv
.CondSwizzleY
;
1408 swizzle
[2] = inst
->InstructionExtNv
.CondSwizzleZ
;
1409 swizzle
[3] = inst
->InstructionExtNv
.CondSwizzleW
;
1411 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1415 for (i
= 0; i
< 4; i
++) {
1416 /* TODO: evaluate the condition code */
1423 /* "unconditional" kil */
1424 kilmask
= mach
->ExecMask
;
1426 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1431 * Fetch a texel using STR texture coordinates.
1434 fetch_texel( struct tgsi_sampler
*sampler
,
1435 const union tgsi_exec_channel
*s
,
1436 const union tgsi_exec_channel
*t
,
1437 const union tgsi_exec_channel
*p
,
1438 float lodbias
, /* XXX should be float[4] */
1439 union tgsi_exec_channel
*r
,
1440 union tgsi_exec_channel
*g
,
1441 union tgsi_exec_channel
*b
,
1442 union tgsi_exec_channel
*a
)
1445 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1447 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1449 for (j
= 0; j
< 4; j
++) {
1450 r
->f
[j
] = rgba
[0][j
];
1451 g
->f
[j
] = rgba
[1][j
];
1452 b
->f
[j
] = rgba
[2][j
];
1453 a
->f
[j
] = rgba
[3][j
];
1459 exec_tex(struct tgsi_exec_machine
*mach
,
1460 const struct tgsi_full_instruction
*inst
,
1464 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1465 union tgsi_exec_channel r
[8];
1469 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1471 switch (inst
->InstructionExtTexture
.Texture
) {
1472 case TGSI_TEXTURE_1D
:
1474 FETCH(&r
[0], 0, CHAN_X
);
1477 FETCH(&r
[1], 0, CHAN_W
);
1478 micro_div( &r
[0], &r
[0], &r
[1] );
1482 FETCH(&r
[1], 0, CHAN_W
);
1483 lodBias
= r
[2].f
[0];
1488 fetch_texel(&mach
->Samplers
[unit
],
1489 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1490 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1493 case TGSI_TEXTURE_2D
:
1494 case TGSI_TEXTURE_RECT
:
1496 FETCH(&r
[0], 0, CHAN_X
);
1497 FETCH(&r
[1], 0, CHAN_Y
);
1498 FETCH(&r
[2], 0, CHAN_Z
);
1501 FETCH(&r
[3], 0, CHAN_W
);
1502 micro_div( &r
[0], &r
[0], &r
[3] );
1503 micro_div( &r
[1], &r
[1], &r
[3] );
1504 micro_div( &r
[2], &r
[2], &r
[3] );
1508 FETCH(&r
[3], 0, CHAN_W
);
1509 lodBias
= r
[3].f
[0];
1514 fetch_texel(&mach
->Samplers
[unit
],
1515 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1516 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1519 case TGSI_TEXTURE_3D
:
1520 case TGSI_TEXTURE_CUBE
:
1522 FETCH(&r
[0], 0, CHAN_X
);
1523 FETCH(&r
[1], 0, CHAN_Y
);
1524 FETCH(&r
[2], 0, CHAN_Z
);
1527 FETCH(&r
[3], 0, CHAN_W
);
1528 micro_div( &r
[0], &r
[0], &r
[3] );
1529 micro_div( &r
[1], &r
[1], &r
[3] );
1530 micro_div( &r
[2], &r
[2], &r
[3] );
1534 FETCH(&r
[3], 0, CHAN_W
);
1535 lodBias
= r
[3].f
[0];
1540 fetch_texel(&mach
->Samplers
[unit
],
1541 &r
[0], &r
[1], &r
[2], lodBias
,
1542 &r
[0], &r
[1], &r
[2], &r
[3]);
1549 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1550 STORE( &r
[chan_index
], 0, chan_index
);
1556 * Evaluate a constant-valued coefficient at the position of the
1561 struct tgsi_exec_machine
*mach
,
1567 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1568 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1573 * Evaluate a linear-valued coefficient at the position of the
1578 struct tgsi_exec_machine
*mach
,
1582 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1583 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1584 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1585 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1586 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1587 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1588 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1589 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1590 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1594 * Evaluate a perspective-valued coefficient at the position of the
1598 eval_perspective_coef(
1599 struct tgsi_exec_machine
*mach
,
1603 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1604 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1605 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1606 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1607 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1608 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1609 /* divide by W here */
1610 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1611 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1612 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1613 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1617 typedef void (* eval_coef_func
)(
1618 struct tgsi_exec_machine
*mach
,
1624 struct tgsi_exec_machine
*mach
,
1625 const struct tgsi_full_declaration
*decl
)
1627 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1628 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1629 unsigned first
, last
, mask
;
1630 eval_coef_func eval
;
1632 first
= decl
->DeclarationRange
.First
;
1633 last
= decl
->DeclarationRange
.Last
;
1634 mask
= decl
->Declaration
.UsageMask
;
1636 switch( decl
->Declaration
.Interpolate
) {
1637 case TGSI_INTERPOLATE_CONSTANT
:
1638 eval
= eval_constant_coef
;
1641 case TGSI_INTERPOLATE_LINEAR
:
1642 eval
= eval_linear_coef
;
1645 case TGSI_INTERPOLATE_PERSPECTIVE
:
1646 eval
= eval_perspective_coef
;
1653 if( mask
== TGSI_WRITEMASK_XYZW
) {
1656 for( i
= first
; i
<= last
; i
++ ) {
1657 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1665 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1666 if( mask
& (1 << j
) ) {
1667 for( i
= first
; i
<= last
; i
++ ) {
1679 struct tgsi_exec_machine
*mach
,
1680 const struct tgsi_full_instruction
*inst
,
1684 union tgsi_exec_channel r
[8];
1688 switch (inst
->Instruction
.Opcode
) {
1689 case TGSI_OPCODE_ARL
:
1690 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1691 FETCH( &r
[0], 0, chan_index
);
1692 micro_f2it( &r
[0], &r
[0] );
1693 STORE( &r
[0], 0, chan_index
);
1697 case TGSI_OPCODE_MOV
:
1698 case TGSI_OPCODE_SWZ
:
1699 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1700 FETCH( &r
[0], 0, chan_index
);
1701 STORE( &r
[0], 0, chan_index
);
1705 case TGSI_OPCODE_LIT
:
1706 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1707 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1710 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1711 FETCH( &r
[0], 0, CHAN_X
);
1712 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1713 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1714 STORE( &r
[0], 0, CHAN_Y
);
1717 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1718 FETCH( &r
[1], 0, CHAN_Y
);
1719 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1721 FETCH( &r
[2], 0, CHAN_W
);
1722 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1723 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1724 micro_pow( &r
[1], &r
[1], &r
[2] );
1725 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1726 STORE( &r
[0], 0, CHAN_Z
);
1730 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1731 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1735 case TGSI_OPCODE_RCP
:
1736 /* TGSI_OPCODE_RECIP */
1737 FETCH( &r
[0], 0, CHAN_X
);
1738 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1739 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1740 STORE( &r
[0], 0, chan_index
);
1744 case TGSI_OPCODE_RSQ
:
1745 /* TGSI_OPCODE_RECIPSQRT */
1746 FETCH( &r
[0], 0, CHAN_X
);
1747 micro_sqrt( &r
[0], &r
[0] );
1748 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1749 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1750 STORE( &r
[0], 0, chan_index
);
1754 case TGSI_OPCODE_EXP
:
1755 FETCH( &r
[0], 0, CHAN_X
);
1756 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
1757 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1758 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
1759 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
1761 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1762 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
1763 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
1765 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1766 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
1767 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
1769 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1770 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1774 case TGSI_OPCODE_LOG
:
1775 FETCH( &r
[0], 0, CHAN_X
);
1776 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
1777 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
1778 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
1779 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1780 STORE( &r
[0], 0, CHAN_X
);
1782 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1783 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
1784 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
1785 STORE( &r
[0], 0, CHAN_Y
);
1787 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1788 STORE( &r
[1], 0, CHAN_Z
);
1790 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1791 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1795 case TGSI_OPCODE_MUL
:
1796 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1798 FETCH(&r
[0], 0, chan_index
);
1799 FETCH(&r
[1], 1, chan_index
);
1801 micro_mul( &r
[0], &r
[0], &r
[1] );
1803 STORE(&r
[0], 0, chan_index
);
1807 case TGSI_OPCODE_ADD
:
1808 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1809 FETCH( &r
[0], 0, chan_index
);
1810 FETCH( &r
[1], 1, chan_index
);
1811 micro_add( &r
[0], &r
[0], &r
[1] );
1812 STORE( &r
[0], 0, chan_index
);
1816 case TGSI_OPCODE_DP3
:
1817 /* TGSI_OPCODE_DOT3 */
1818 FETCH( &r
[0], 0, CHAN_X
);
1819 FETCH( &r
[1], 1, CHAN_X
);
1820 micro_mul( &r
[0], &r
[0], &r
[1] );
1822 FETCH( &r
[1], 0, CHAN_Y
);
1823 FETCH( &r
[2], 1, CHAN_Y
);
1824 micro_mul( &r
[1], &r
[1], &r
[2] );
1825 micro_add( &r
[0], &r
[0], &r
[1] );
1827 FETCH( &r
[1], 0, CHAN_Z
);
1828 FETCH( &r
[2], 1, CHAN_Z
);
1829 micro_mul( &r
[1], &r
[1], &r
[2] );
1830 micro_add( &r
[0], &r
[0], &r
[1] );
1832 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1833 STORE( &r
[0], 0, chan_index
);
1837 case TGSI_OPCODE_DP4
:
1838 /* TGSI_OPCODE_DOT4 */
1839 FETCH(&r
[0], 0, CHAN_X
);
1840 FETCH(&r
[1], 1, CHAN_X
);
1842 micro_mul( &r
[0], &r
[0], &r
[1] );
1844 FETCH(&r
[1], 0, CHAN_Y
);
1845 FETCH(&r
[2], 1, CHAN_Y
);
1847 micro_mul( &r
[1], &r
[1], &r
[2] );
1848 micro_add( &r
[0], &r
[0], &r
[1] );
1850 FETCH(&r
[1], 0, CHAN_Z
);
1851 FETCH(&r
[2], 1, CHAN_Z
);
1853 micro_mul( &r
[1], &r
[1], &r
[2] );
1854 micro_add( &r
[0], &r
[0], &r
[1] );
1856 FETCH(&r
[1], 0, CHAN_W
);
1857 FETCH(&r
[2], 1, CHAN_W
);
1859 micro_mul( &r
[1], &r
[1], &r
[2] );
1860 micro_add( &r
[0], &r
[0], &r
[1] );
1862 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1863 STORE( &r
[0], 0, chan_index
);
1867 case TGSI_OPCODE_DST
:
1868 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1869 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1872 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1873 FETCH( &r
[0], 0, CHAN_Y
);
1874 FETCH( &r
[1], 1, CHAN_Y
);
1875 micro_mul( &r
[0], &r
[0], &r
[1] );
1876 STORE( &r
[0], 0, CHAN_Y
);
1879 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1880 FETCH( &r
[0], 0, CHAN_Z
);
1881 STORE( &r
[0], 0, CHAN_Z
);
1884 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1885 FETCH( &r
[0], 1, CHAN_W
);
1886 STORE( &r
[0], 0, CHAN_W
);
1890 case TGSI_OPCODE_MIN
:
1891 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1892 FETCH(&r
[0], 0, chan_index
);
1893 FETCH(&r
[1], 1, chan_index
);
1895 /* XXX use micro_min()?? */
1896 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1898 STORE(&r
[0], 0, chan_index
);
1902 case TGSI_OPCODE_MAX
:
1903 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1904 FETCH(&r
[0], 0, chan_index
);
1905 FETCH(&r
[1], 1, chan_index
);
1907 /* XXX use micro_max()?? */
1908 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1910 STORE(&r
[0], 0, chan_index
);
1914 case TGSI_OPCODE_SLT
:
1915 /* TGSI_OPCODE_SETLT */
1916 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1917 FETCH( &r
[0], 0, chan_index
);
1918 FETCH( &r
[1], 1, chan_index
);
1919 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1920 STORE( &r
[0], 0, chan_index
);
1924 case TGSI_OPCODE_SGE
:
1925 /* TGSI_OPCODE_SETGE */
1926 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1927 FETCH( &r
[0], 0, chan_index
);
1928 FETCH( &r
[1], 1, chan_index
);
1929 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1930 STORE( &r
[0], 0, chan_index
);
1934 case TGSI_OPCODE_MAD
:
1935 /* TGSI_OPCODE_MADD */
1936 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1937 FETCH( &r
[0], 0, chan_index
);
1938 FETCH( &r
[1], 1, chan_index
);
1939 micro_mul( &r
[0], &r
[0], &r
[1] );
1940 FETCH( &r
[1], 2, chan_index
);
1941 micro_add( &r
[0], &r
[0], &r
[1] );
1942 STORE( &r
[0], 0, chan_index
);
1946 case TGSI_OPCODE_SUB
:
1947 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1948 FETCH(&r
[0], 0, chan_index
);
1949 FETCH(&r
[1], 1, chan_index
);
1951 micro_sub( &r
[0], &r
[0], &r
[1] );
1953 STORE(&r
[0], 0, chan_index
);
1957 case TGSI_OPCODE_LERP
:
1958 /* TGSI_OPCODE_LRP */
1959 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1960 FETCH(&r
[0], 0, chan_index
);
1961 FETCH(&r
[1], 1, chan_index
);
1962 FETCH(&r
[2], 2, chan_index
);
1964 micro_sub( &r
[1], &r
[1], &r
[2] );
1965 micro_mul( &r
[0], &r
[0], &r
[1] );
1966 micro_add( &r
[0], &r
[0], &r
[2] );
1968 STORE(&r
[0], 0, chan_index
);
1972 case TGSI_OPCODE_CND
:
1976 case TGSI_OPCODE_CND0
:
1980 case TGSI_OPCODE_DOT2ADD
:
1981 /* TGSI_OPCODE_DP2A */
1985 case TGSI_OPCODE_INDEX
:
1989 case TGSI_OPCODE_NEGATE
:
1993 case TGSI_OPCODE_FRAC
:
1994 /* TGSI_OPCODE_FRC */
1995 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1996 FETCH( &r
[0], 0, chan_index
);
1997 micro_frc( &r
[0], &r
[0] );
1998 STORE( &r
[0], 0, chan_index
);
2002 case TGSI_OPCODE_CLAMP
:
2006 case TGSI_OPCODE_FLOOR
:
2007 /* TGSI_OPCODE_FLR */
2008 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2009 FETCH( &r
[0], 0, chan_index
);
2010 micro_flr( &r
[0], &r
[0] );
2011 STORE( &r
[0], 0, chan_index
);
2015 case TGSI_OPCODE_ROUND
:
2016 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2017 FETCH( &r
[0], 0, chan_index
);
2018 micro_rnd( &r
[0], &r
[0] );
2019 STORE( &r
[0], 0, chan_index
);
2023 case TGSI_OPCODE_EXPBASE2
:
2024 /* TGSI_OPCODE_EX2 */
2025 FETCH(&r
[0], 0, CHAN_X
);
2027 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
2029 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2030 STORE( &r
[0], 0, chan_index
);
2034 case TGSI_OPCODE_LOGBASE2
:
2035 /* TGSI_OPCODE_LG2 */
2036 FETCH( &r
[0], 0, CHAN_X
);
2037 micro_lg2( &r
[0], &r
[0] );
2038 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2039 STORE( &r
[0], 0, chan_index
);
2043 case TGSI_OPCODE_POWER
:
2044 /* TGSI_OPCODE_POW */
2045 FETCH(&r
[0], 0, CHAN_X
);
2046 FETCH(&r
[1], 1, CHAN_X
);
2048 micro_pow( &r
[0], &r
[0], &r
[1] );
2050 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2051 STORE( &r
[0], 0, chan_index
);
2055 case TGSI_OPCODE_CROSSPRODUCT
:
2056 /* TGSI_OPCODE_XPD */
2057 FETCH(&r
[0], 0, CHAN_Y
);
2058 FETCH(&r
[1], 1, CHAN_Z
);
2060 micro_mul( &r
[2], &r
[0], &r
[1] );
2062 FETCH(&r
[3], 0, CHAN_Z
);
2063 FETCH(&r
[4], 1, CHAN_Y
);
2065 micro_mul( &r
[5], &r
[3], &r
[4] );
2066 micro_sub( &r
[2], &r
[2], &r
[5] );
2068 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2069 STORE( &r
[2], 0, CHAN_X
);
2072 FETCH(&r
[2], 1, CHAN_X
);
2074 micro_mul( &r
[3], &r
[3], &r
[2] );
2076 FETCH(&r
[5], 0, CHAN_X
);
2078 micro_mul( &r
[1], &r
[1], &r
[5] );
2079 micro_sub( &r
[3], &r
[3], &r
[1] );
2081 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2082 STORE( &r
[3], 0, CHAN_Y
);
2085 micro_mul( &r
[5], &r
[5], &r
[4] );
2086 micro_mul( &r
[0], &r
[0], &r
[2] );
2087 micro_sub( &r
[5], &r
[5], &r
[0] );
2089 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2090 STORE( &r
[5], 0, CHAN_Z
);
2093 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2094 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2098 case TGSI_OPCODE_MULTIPLYMATRIX
:
2102 case TGSI_OPCODE_ABS
:
2103 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2104 FETCH(&r
[0], 0, chan_index
);
2106 micro_abs( &r
[0], &r
[0] );
2108 STORE(&r
[0], 0, chan_index
);
2112 case TGSI_OPCODE_RCC
:
2116 case TGSI_OPCODE_DPH
:
2117 FETCH(&r
[0], 0, CHAN_X
);
2118 FETCH(&r
[1], 1, CHAN_X
);
2120 micro_mul( &r
[0], &r
[0], &r
[1] );
2122 FETCH(&r
[1], 0, CHAN_Y
);
2123 FETCH(&r
[2], 1, CHAN_Y
);
2125 micro_mul( &r
[1], &r
[1], &r
[2] );
2126 micro_add( &r
[0], &r
[0], &r
[1] );
2128 FETCH(&r
[1], 0, CHAN_Z
);
2129 FETCH(&r
[2], 1, CHAN_Z
);
2131 micro_mul( &r
[1], &r
[1], &r
[2] );
2132 micro_add( &r
[0], &r
[0], &r
[1] );
2134 FETCH(&r
[1], 1, CHAN_W
);
2136 micro_add( &r
[0], &r
[0], &r
[1] );
2138 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2139 STORE( &r
[0], 0, chan_index
);
2143 case TGSI_OPCODE_COS
:
2144 FETCH(&r
[0], 0, CHAN_X
);
2146 micro_cos( &r
[0], &r
[0] );
2148 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2149 STORE( &r
[0], 0, chan_index
);
2153 case TGSI_OPCODE_DDX
:
2154 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2155 FETCH( &r
[0], 0, chan_index
);
2156 micro_ddx( &r
[0], &r
[0] );
2157 STORE( &r
[0], 0, chan_index
);
2161 case TGSI_OPCODE_DDY
:
2162 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2163 FETCH( &r
[0], 0, chan_index
);
2164 micro_ddy( &r
[0], &r
[0] );
2165 STORE( &r
[0], 0, chan_index
);
2169 case TGSI_OPCODE_KILP
:
2170 exec_kilp (mach
, inst
);
2173 case TGSI_OPCODE_KIL
:
2174 exec_kil (mach
, inst
);
2177 case TGSI_OPCODE_PK2H
:
2181 case TGSI_OPCODE_PK2US
:
2185 case TGSI_OPCODE_PK4B
:
2189 case TGSI_OPCODE_PK4UB
:
2193 case TGSI_OPCODE_RFL
:
2197 case TGSI_OPCODE_SEQ
:
2198 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2199 FETCH( &r
[0], 0, chan_index
);
2200 FETCH( &r
[1], 1, chan_index
);
2201 micro_eq( &r
[0], &r
[0], &r
[1],
2202 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
2203 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2204 STORE( &r
[0], 0, chan_index
);
2208 case TGSI_OPCODE_SFL
:
2212 case TGSI_OPCODE_SGT
:
2213 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2214 FETCH( &r
[0], 0, chan_index
);
2215 FETCH( &r
[1], 1, chan_index
);
2216 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2217 STORE( &r
[0], 0, chan_index
);
2221 case TGSI_OPCODE_SIN
:
2222 FETCH( &r
[0], 0, CHAN_X
);
2223 micro_sin( &r
[0], &r
[0] );
2224 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2225 STORE( &r
[0], 0, chan_index
);
2229 case TGSI_OPCODE_SLE
:
2230 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2231 FETCH( &r
[0], 0, chan_index
);
2232 FETCH( &r
[1], 1, chan_index
);
2233 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2234 STORE( &r
[0], 0, chan_index
);
2238 case TGSI_OPCODE_SNE
:
2239 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2240 FETCH( &r
[0], 0, chan_index
);
2241 FETCH( &r
[1], 1, chan_index
);
2242 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2243 STORE( &r
[0], 0, chan_index
);
2247 case TGSI_OPCODE_STR
:
2251 case TGSI_OPCODE_TEX
:
2252 /* simple texture lookup */
2253 /* src[0] = texcoord */
2254 /* src[1] = sampler unit */
2255 exec_tex(mach
, inst
, FALSE
, FALSE
);
2258 case TGSI_OPCODE_TXB
:
2259 /* Texture lookup with lod bias */
2260 /* src[0] = texcoord (src[0].w = LOD bias) */
2261 /* src[1] = sampler unit */
2262 exec_tex(mach
, inst
, TRUE
, FALSE
);
2265 case TGSI_OPCODE_TXD
:
2266 /* Texture lookup with explict partial derivatives */
2267 /* src[0] = texcoord */
2268 /* src[1] = d[strq]/dx */
2269 /* src[2] = d[strq]/dy */
2270 /* src[3] = sampler unit */
2274 case TGSI_OPCODE_TXL
:
2275 /* Texture lookup with explit LOD */
2276 /* src[0] = texcoord (src[0].w = LOD) */
2277 /* src[1] = sampler unit */
2278 exec_tex(mach
, inst
, TRUE
, FALSE
);
2281 case TGSI_OPCODE_TXP
:
2282 /* Texture lookup with projection */
2283 /* src[0] = texcoord (src[0].w = projection) */
2284 /* src[1] = sampler unit */
2285 exec_tex(mach
, inst
, FALSE
, TRUE
);
2288 case TGSI_OPCODE_UP2H
:
2292 case TGSI_OPCODE_UP2US
:
2296 case TGSI_OPCODE_UP4B
:
2300 case TGSI_OPCODE_UP4UB
:
2304 case TGSI_OPCODE_X2D
:
2308 case TGSI_OPCODE_ARA
:
2312 case TGSI_OPCODE_ARR
:
2316 case TGSI_OPCODE_BRA
:
2320 case TGSI_OPCODE_CAL
:
2321 /* skip the call if no execution channels are enabled */
2322 if (mach
->ExecMask
) {
2325 /* push the Cond, Loop, Cont stacks */
2326 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2327 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2328 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2329 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2330 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2331 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2333 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2334 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2336 /* note that PC was already incremented above */
2337 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2338 *pc
= inst
->InstructionExtLabel
.Label
;
2342 case TGSI_OPCODE_RET
:
2343 mach
->FuncMask
&= ~mach
->ExecMask
;
2344 UPDATE_EXEC_MASK(mach
);
2346 if (mach
->FuncMask
== 0x0) {
2347 /* really return now (otherwise, keep executing */
2349 if (mach
->CallStackTop
== 0) {
2350 /* returning from main() */
2354 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2356 /* pop the Cond, Loop, Cont stacks */
2357 assert(mach
->CondStackTop
> 0);
2358 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2359 assert(mach
->LoopStackTop
> 0);
2360 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2361 assert(mach
->ContStackTop
> 0);
2362 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2363 assert(mach
->FuncStackTop
> 0);
2364 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2366 UPDATE_EXEC_MASK(mach
);
2370 case TGSI_OPCODE_SSG
:
2374 case TGSI_OPCODE_CMP
:
2375 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2376 FETCH(&r
[0], 0, chan_index
);
2377 FETCH(&r
[1], 1, chan_index
);
2378 FETCH(&r
[2], 2, chan_index
);
2380 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2382 STORE(&r
[0], 0, chan_index
);
2386 case TGSI_OPCODE_SCS
:
2387 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2388 FETCH( &r
[0], 0, CHAN_X
);
2390 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2391 micro_cos( &r
[1], &r
[0] );
2392 STORE( &r
[1], 0, CHAN_X
);
2394 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2395 micro_sin( &r
[1], &r
[0] );
2396 STORE( &r
[1], 0, CHAN_Y
);
2398 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2399 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2401 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2402 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2406 case TGSI_OPCODE_NRM
:
2410 case TGSI_OPCODE_DIV
:
2414 case TGSI_OPCODE_DP2
:
2415 FETCH( &r
[0], 0, CHAN_X
);
2416 FETCH( &r
[1], 1, CHAN_X
);
2417 micro_mul( &r
[0], &r
[0], &r
[1] );
2419 FETCH( &r
[1], 0, CHAN_Y
);
2420 FETCH( &r
[2], 1, CHAN_Y
);
2421 micro_mul( &r
[1], &r
[1], &r
[2] );
2422 micro_add( &r
[0], &r
[0], &r
[1] );
2424 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2425 STORE( &r
[0], 0, chan_index
);
2429 case TGSI_OPCODE_IF
:
2431 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2432 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2433 FETCH( &r
[0], 0, CHAN_X
);
2434 /* update CondMask */
2436 mach
->CondMask
&= ~0x1;
2439 mach
->CondMask
&= ~0x2;
2442 mach
->CondMask
&= ~0x4;
2445 mach
->CondMask
&= ~0x8;
2447 UPDATE_EXEC_MASK(mach
);
2448 /* Todo: If CondMask==0, jump to ELSE */
2451 case TGSI_OPCODE_ELSE
:
2452 /* invert CondMask wrt previous mask */
2455 assert(mach
->CondStackTop
> 0);
2456 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2457 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2458 UPDATE_EXEC_MASK(mach
);
2459 /* Todo: If CondMask==0, jump to ENDIF */
2463 case TGSI_OPCODE_ENDIF
:
2465 assert(mach
->CondStackTop
> 0);
2466 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2467 UPDATE_EXEC_MASK(mach
);
2470 case TGSI_OPCODE_END
:
2471 /* halt execution */
2475 case TGSI_OPCODE_REP
:
2479 case TGSI_OPCODE_ENDREP
:
2483 case TGSI_OPCODE_PUSHA
:
2487 case TGSI_OPCODE_POPA
:
2491 case TGSI_OPCODE_CEIL
:
2492 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2493 FETCH( &r
[0], 0, chan_index
);
2494 micro_ceil( &r
[0], &r
[0] );
2495 STORE( &r
[0], 0, chan_index
);
2499 case TGSI_OPCODE_I2F
:
2500 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2501 FETCH( &r
[0], 0, chan_index
);
2502 micro_i2f( &r
[0], &r
[0] );
2503 STORE( &r
[0], 0, chan_index
);
2507 case TGSI_OPCODE_NOT
:
2508 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2509 FETCH( &r
[0], 0, chan_index
);
2510 micro_not( &r
[0], &r
[0] );
2511 STORE( &r
[0], 0, chan_index
);
2515 case TGSI_OPCODE_TRUNC
:
2516 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2517 FETCH( &r
[0], 0, chan_index
);
2518 micro_trunc( &r
[0], &r
[0] );
2519 STORE( &r
[0], 0, chan_index
);
2523 case TGSI_OPCODE_SHL
:
2524 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2525 FETCH( &r
[0], 0, chan_index
);
2526 FETCH( &r
[1], 1, chan_index
);
2527 micro_shl( &r
[0], &r
[0], &r
[1] );
2528 STORE( &r
[0], 0, chan_index
);
2532 case TGSI_OPCODE_SHR
:
2533 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2534 FETCH( &r
[0], 0, chan_index
);
2535 FETCH( &r
[1], 1, chan_index
);
2536 micro_ishr( &r
[0], &r
[0], &r
[1] );
2537 STORE( &r
[0], 0, chan_index
);
2541 case TGSI_OPCODE_AND
:
2542 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2543 FETCH( &r
[0], 0, chan_index
);
2544 FETCH( &r
[1], 1, chan_index
);
2545 micro_and( &r
[0], &r
[0], &r
[1] );
2546 STORE( &r
[0], 0, chan_index
);
2550 case TGSI_OPCODE_OR
:
2551 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2552 FETCH( &r
[0], 0, chan_index
);
2553 FETCH( &r
[1], 1, chan_index
);
2554 micro_or( &r
[0], &r
[0], &r
[1] );
2555 STORE( &r
[0], 0, chan_index
);
2559 case TGSI_OPCODE_MOD
:
2563 case TGSI_OPCODE_XOR
:
2564 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2565 FETCH( &r
[0], 0, chan_index
);
2566 FETCH( &r
[1], 1, chan_index
);
2567 micro_xor( &r
[0], &r
[0], &r
[1] );
2568 STORE( &r
[0], 0, chan_index
);
2572 case TGSI_OPCODE_SAD
:
2576 case TGSI_OPCODE_TXF
:
2580 case TGSI_OPCODE_TXQ
:
2584 case TGSI_OPCODE_EMIT
:
2585 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2586 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2589 case TGSI_OPCODE_ENDPRIM
:
2590 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2591 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2594 case TGSI_OPCODE_LOOP
:
2595 /* fall-through (for now) */
2596 case TGSI_OPCODE_BGNLOOP2
:
2597 /* push LoopMask and ContMasks */
2598 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2599 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2600 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2601 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2604 case TGSI_OPCODE_ENDLOOP
:
2605 /* fall-through (for now at least) */
2606 case TGSI_OPCODE_ENDLOOP2
:
2607 /* Restore ContMask, but don't pop */
2608 assert(mach
->ContStackTop
> 0);
2609 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2610 UPDATE_EXEC_MASK(mach
);
2611 if (mach
->ExecMask
) {
2612 /* repeat loop: jump to instruction just past BGNLOOP */
2613 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2616 /* exit loop: pop LoopMask */
2617 assert(mach
->LoopStackTop
> 0);
2618 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2620 assert(mach
->ContStackTop
> 0);
2621 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2623 UPDATE_EXEC_MASK(mach
);
2626 case TGSI_OPCODE_BRK
:
2627 /* turn off loop channels for each enabled exec channel */
2628 mach
->LoopMask
&= ~mach
->ExecMask
;
2629 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2630 UPDATE_EXEC_MASK(mach
);
2633 case TGSI_OPCODE_CONT
:
2634 /* turn off cont channels for each enabled exec channel */
2635 mach
->ContMask
&= ~mach
->ExecMask
;
2636 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2637 UPDATE_EXEC_MASK(mach
);
2640 case TGSI_OPCODE_BGNSUB
:
2644 case TGSI_OPCODE_ENDSUB
:
2648 case TGSI_OPCODE_NOISE1
:
2652 case TGSI_OPCODE_NOISE2
:
2656 case TGSI_OPCODE_NOISE3
:
2660 case TGSI_OPCODE_NOISE4
:
2664 case TGSI_OPCODE_NOP
:
2674 * Run TGSI interpreter.
2675 * \return bitmask of "alive" quad components
2678 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2683 mach
->CondMask
= 0xf;
2684 mach
->LoopMask
= 0xf;
2685 mach
->ContMask
= 0xf;
2686 mach
->FuncMask
= 0xf;
2687 mach
->ExecMask
= 0xf;
2689 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2690 assert(mach
->CondStackTop
== 0);
2691 assert(mach
->LoopStackTop
== 0);
2692 assert(mach
->ContStackTop
== 0);
2693 assert(mach
->CallStackTop
== 0);
2695 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2696 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2698 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2699 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2700 mach
->Primitives
[0] = 0;
2703 for (i
= 0; i
< QUAD_SIZE
; i
++) {
2704 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
2705 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
2706 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
2707 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
2708 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
2711 /* execute declarations (interpolants) */
2712 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2713 exec_declaration( mach
, mach
->Declarations
+i
);
2716 /* execute instructions, until pc is set to -1 */
2718 assert(pc
< (int) mach
->NumInstructions
);
2719 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2723 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2724 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2726 * Scale back depth component.
2728 for (i
= 0; i
< 4; i
++)
2729 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2733 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];