1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpreter/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
72 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
74 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
75 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
76 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
77 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
78 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
79 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
80 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
81 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
82 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
83 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
84 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
85 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
86 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
87 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
88 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
89 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
90 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
91 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
92 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
93 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
94 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
95 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
96 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
97 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
98 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
99 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
100 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
101 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
102 #define TEMP_R0 TGSI_EXEC_TEMP_R0
104 #define IS_CHANNEL_ENABLED(INST, CHAN)\
105 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
107 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
108 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
110 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
111 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
112 if (IS_CHANNEL_ENABLED( INST, CHAN ))
114 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
119 /** The execution mask depends on the conditional mask and the loop mask */
120 #define UPDATE_EXEC_MASK(MACH) \
121 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine
*mach
,
131 const struct tgsi_token
*tokens
,
133 struct tgsi_sampler
*samplers
)
136 struct tgsi_parse_context parse
;
137 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
138 struct tgsi_full_instruction
*instructions
;
139 struct tgsi_full_declaration
*declarations
;
140 uint maxInstructions
= 10, numInstructions
= 0;
141 uint maxDeclarations
= 10, numDeclarations
= 0;
145 tgsi_dump(tokens
, 0);
148 mach
->Tokens
= tokens
;
149 mach
->Samplers
= samplers
;
151 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
152 if (k
!= TGSI_PARSE_OK
) {
153 debug_printf( "Problem parsing!\n" );
157 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
161 declarations
= (struct tgsi_full_declaration
*)
162 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
168 instructions
= (struct tgsi_full_instruction
*)
169 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
172 FREE( declarations
);
176 while( !tgsi_parse_end_of_tokens( &parse
) ) {
177 uint pointer
= parse
.Position
;
180 tgsi_parse_token( &parse
);
181 switch( parse
.FullToken
.Token
.Type
) {
182 case TGSI_TOKEN_TYPE_DECLARATION
:
183 /* save expanded declaration */
184 if (numDeclarations
== maxDeclarations
) {
185 declarations
= REALLOC(declarations
,
187 * sizeof(struct tgsi_full_declaration
),
188 (maxDeclarations
+ 10)
189 * sizeof(struct tgsi_full_declaration
));
190 maxDeclarations
+= 10;
192 memcpy(declarations
+ numDeclarations
,
193 &parse
.FullToken
.FullDeclaration
,
194 sizeof(declarations
[0]));
198 case TGSI_TOKEN_TYPE_IMMEDIATE
:
200 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
201 assert( size
% 4 == 0 );
202 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
204 for( i
= 0; i
< size
; i
++ ) {
205 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
206 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
208 mach
->ImmLimit
+= size
/ 4;
212 case TGSI_TOKEN_TYPE_INSTRUCTION
:
213 assert( labels
->count
< MAX_LABELS
);
215 labels
->labels
[labels
->count
][0] = instno
;
216 labels
->labels
[labels
->count
][1] = pointer
;
219 /* save expanded instruction */
220 if (numInstructions
== maxInstructions
) {
221 instructions
= REALLOC(instructions
,
223 * sizeof(struct tgsi_full_instruction
),
224 (maxInstructions
+ 10)
225 * sizeof(struct tgsi_full_instruction
));
226 maxInstructions
+= 10;
228 memcpy(instructions
+ numInstructions
,
229 &parse
.FullToken
.FullInstruction
,
230 sizeof(instructions
[0]));
238 tgsi_parse_free (&parse
);
240 if (mach
->Declarations
) {
241 FREE( mach
->Declarations
);
243 mach
->Declarations
= declarations
;
244 mach
->NumDeclarations
= numDeclarations
;
246 if (mach
->Instructions
) {
247 FREE( mach
->Instructions
);
249 mach
->Instructions
= instructions
;
250 mach
->NumInstructions
= numInstructions
;
255 tgsi_exec_machine_init(
256 struct tgsi_exec_machine
*mach
)
260 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
261 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
263 /* Setup constants. */
264 for( i
= 0; i
< 4; i
++ ) {
265 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
266 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
267 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
268 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
269 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
270 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
271 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
272 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
273 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
274 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
280 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
282 if (mach
->Instructions
) {
283 FREE(mach
->Instructions
);
284 mach
->Instructions
= NULL
;
285 mach
->NumInstructions
= 0;
287 if (mach
->Declarations
) {
288 FREE(mach
->Declarations
);
289 mach
->Declarations
= NULL
;
290 mach
->NumDeclarations
= 0;
297 union tgsi_exec_channel
*dst
,
298 const union tgsi_exec_channel
*src
)
300 dst
->f
[0] = fabsf( src
->f
[0] );
301 dst
->f
[1] = fabsf( src
->f
[1] );
302 dst
->f
[2] = fabsf( src
->f
[2] );
303 dst
->f
[3] = fabsf( src
->f
[3] );
308 union tgsi_exec_channel
*dst
,
309 const union tgsi_exec_channel
*src0
,
310 const union tgsi_exec_channel
*src1
)
312 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
313 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
314 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
315 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
320 union tgsi_exec_channel
*dst
,
321 const union tgsi_exec_channel
*src0
,
322 const union tgsi_exec_channel
*src1
)
324 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
325 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
326 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
327 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
332 union tgsi_exec_channel
*dst
,
333 const union tgsi_exec_channel
*src0
,
334 const union tgsi_exec_channel
*src1
)
336 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
337 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
338 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
339 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
344 union tgsi_exec_channel
*dst
,
345 const union tgsi_exec_channel
*src
)
347 dst
->f
[0] = ceilf( src
->f
[0] );
348 dst
->f
[1] = ceilf( src
->f
[1] );
349 dst
->f
[2] = ceilf( src
->f
[2] );
350 dst
->f
[3] = ceilf( src
->f
[3] );
355 union tgsi_exec_channel
*dst
,
356 const union tgsi_exec_channel
*src
)
358 dst
->f
[0] = cosf( src
->f
[0] );
359 dst
->f
[1] = cosf( src
->f
[1] );
360 dst
->f
[2] = cosf( src
->f
[2] );
361 dst
->f
[3] = cosf( src
->f
[3] );
366 union tgsi_exec_channel
*dst
,
367 const union tgsi_exec_channel
*src
)
372 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
377 union tgsi_exec_channel
*dst
,
378 const union tgsi_exec_channel
*src
)
383 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
388 union tgsi_exec_channel
*dst
,
389 const union tgsi_exec_channel
*src0
,
390 const union tgsi_exec_channel
*src1
)
392 if (src1
->f
[0] != 0) {
393 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
395 if (src1
->f
[1] != 0) {
396 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
398 if (src1
->f
[2] != 0) {
399 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
401 if (src1
->f
[3] != 0) {
402 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
408 union tgsi_exec_channel
*dst
,
409 const union tgsi_exec_channel
*src0
,
410 const union tgsi_exec_channel
*src1
)
412 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
413 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
414 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
415 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
420 union tgsi_exec_channel
*dst
,
421 const union tgsi_exec_channel
*src0
,
422 const union tgsi_exec_channel
*src1
,
423 const union tgsi_exec_channel
*src2
,
424 const union tgsi_exec_channel
*src3
)
426 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
427 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
428 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
429 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
434 union tgsi_exec_channel
*dst
,
435 const union tgsi_exec_channel
*src0
,
436 const union tgsi_exec_channel
*src1
,
437 const union tgsi_exec_channel
*src2
,
438 const union tgsi_exec_channel
*src3
)
440 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
441 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
442 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
443 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
448 union tgsi_exec_channel
*dst
,
449 const union tgsi_exec_channel
*src
)
451 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
452 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
453 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
454 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
459 union tgsi_exec_channel
*dst
,
460 const union tgsi_exec_channel
*src
)
462 dst
->i
[0] = (int) src
->f
[0];
463 dst
->i
[1] = (int) src
->f
[1];
464 dst
->i
[2] = (int) src
->f
[2];
465 dst
->i
[3] = (int) src
->f
[3];
470 union tgsi_exec_channel
*dst
,
471 const union tgsi_exec_channel
*src
)
473 dst
->u
[0] = (uint
) src
->f
[0];
474 dst
->u
[1] = (uint
) src
->f
[1];
475 dst
->u
[2] = (uint
) src
->f
[2];
476 dst
->u
[3] = (uint
) src
->f
[3];
481 union tgsi_exec_channel
*dst
,
482 const union tgsi_exec_channel
*src
)
484 dst
->f
[0] = floorf( src
->f
[0] );
485 dst
->f
[1] = floorf( src
->f
[1] );
486 dst
->f
[2] = floorf( src
->f
[2] );
487 dst
->f
[3] = floorf( src
->f
[3] );
492 union tgsi_exec_channel
*dst
,
493 const union tgsi_exec_channel
*src
)
495 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
496 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
497 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
498 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
503 union tgsi_exec_channel
*dst
,
504 const union tgsi_exec_channel
*src0
,
505 const union tgsi_exec_channel
*src1
,
506 const union tgsi_exec_channel
*src2
,
507 const union tgsi_exec_channel
*src3
)
509 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
510 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
511 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
512 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
517 union tgsi_exec_channel
*dst
,
518 const union tgsi_exec_channel
*src
)
520 dst
->f
[0] = (float) src
->i
[0];
521 dst
->f
[1] = (float) src
->i
[1];
522 dst
->f
[2] = (float) src
->i
[2];
523 dst
->f
[3] = (float) src
->i
[3];
528 union tgsi_exec_channel
*dst
,
529 const union tgsi_exec_channel
*src
)
531 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
532 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
533 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
534 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
539 union tgsi_exec_channel
*dst
,
540 const union tgsi_exec_channel
*src0
,
541 const union tgsi_exec_channel
*src1
,
542 const union tgsi_exec_channel
*src2
,
543 const union tgsi_exec_channel
*src3
)
545 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
546 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
547 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
548 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
553 union tgsi_exec_channel
*dst
,
554 const union tgsi_exec_channel
*src0
,
555 const union tgsi_exec_channel
*src1
,
556 const union tgsi_exec_channel
*src2
,
557 const union tgsi_exec_channel
*src3
)
559 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
560 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
561 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
562 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
567 union tgsi_exec_channel
*dst
,
568 const union tgsi_exec_channel
*src0
,
569 const union tgsi_exec_channel
*src1
,
570 const union tgsi_exec_channel
*src2
,
571 const union tgsi_exec_channel
*src3
)
573 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
574 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
575 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
576 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
581 union tgsi_exec_channel
*dst
,
582 const union tgsi_exec_channel
*src0
,
583 const union tgsi_exec_channel
*src1
,
584 const union tgsi_exec_channel
*src2
,
585 const union tgsi_exec_channel
*src3
)
587 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
588 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
589 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
590 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
595 union tgsi_exec_channel
*dst
,
596 const union tgsi_exec_channel
*src0
,
597 const union tgsi_exec_channel
*src1
)
599 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
600 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
601 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
602 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
607 union tgsi_exec_channel
*dst
,
608 const union tgsi_exec_channel
*src0
,
609 const union tgsi_exec_channel
*src1
)
611 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
612 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
613 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
614 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
619 union tgsi_exec_channel
*dst
,
620 const union tgsi_exec_channel
*src0
,
621 const union tgsi_exec_channel
*src1
)
623 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
624 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
625 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
626 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
631 union tgsi_exec_channel
*dst
,
632 const union tgsi_exec_channel
*src0
,
633 const union tgsi_exec_channel
*src1
)
635 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
636 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
637 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
638 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
643 union tgsi_exec_channel
*dst
,
644 const union tgsi_exec_channel
*src0
,
645 const union tgsi_exec_channel
*src1
)
647 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
648 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
649 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
650 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
655 union tgsi_exec_channel
*dst
,
656 const union tgsi_exec_channel
*src0
,
657 const union tgsi_exec_channel
*src1
)
659 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
660 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
661 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
662 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
667 union tgsi_exec_channel
*dst
,
668 const union tgsi_exec_channel
*src0
,
669 const union tgsi_exec_channel
*src1
)
671 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
672 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
673 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
674 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
679 union tgsi_exec_channel
*dst
,
680 const union tgsi_exec_channel
*src0
,
681 const union tgsi_exec_channel
*src1
)
683 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
684 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
685 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
686 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
691 union tgsi_exec_channel
*dst
,
692 const union tgsi_exec_channel
*src0
,
693 const union tgsi_exec_channel
*src1
)
695 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
696 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
697 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
698 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
703 union tgsi_exec_channel
*dst0
,
704 union tgsi_exec_channel
*dst1
,
705 const union tgsi_exec_channel
*src0
,
706 const union tgsi_exec_channel
*src1
)
708 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
709 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
710 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
711 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
720 union tgsi_exec_channel
*dst0
,
721 union tgsi_exec_channel
*dst1
,
722 const union tgsi_exec_channel
*src0
,
723 const union tgsi_exec_channel
*src1
)
725 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
726 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
727 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
728 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
737 union tgsi_exec_channel
*dst
,
738 const union tgsi_exec_channel
*src0
,
739 const union tgsi_exec_channel
*src1
,
740 const union tgsi_exec_channel
*src2
)
742 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
743 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
744 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
745 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
750 union tgsi_exec_channel
*dst
,
751 const union tgsi_exec_channel
*src
)
753 dst
->f
[0] = -src
->f
[0];
754 dst
->f
[1] = -src
->f
[1];
755 dst
->f
[2] = -src
->f
[2];
756 dst
->f
[3] = -src
->f
[3];
761 union tgsi_exec_channel
*dst
,
762 const union tgsi_exec_channel
*src
)
764 dst
->i
[0] = -src
->i
[0];
765 dst
->i
[1] = -src
->i
[1];
766 dst
->i
[2] = -src
->i
[2];
767 dst
->i
[3] = -src
->i
[3];
772 union tgsi_exec_channel
*dst
,
773 const union tgsi_exec_channel
*src
)
775 dst
->u
[0] = ~src
->u
[0];
776 dst
->u
[1] = ~src
->u
[1];
777 dst
->u
[2] = ~src
->u
[2];
778 dst
->u
[3] = ~src
->u
[3];
783 union tgsi_exec_channel
*dst
,
784 const union tgsi_exec_channel
*src0
,
785 const union tgsi_exec_channel
*src1
)
787 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
788 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
789 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
790 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
795 union tgsi_exec_channel
*dst
,
796 const union tgsi_exec_channel
*src0
,
797 const union tgsi_exec_channel
*src1
)
799 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
800 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
801 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
802 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
807 union tgsi_exec_channel
*dst
,
808 const union tgsi_exec_channel
*src
)
810 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
811 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
812 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
813 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
818 union tgsi_exec_channel
*dst
,
819 const union tgsi_exec_channel
*src0
,
820 const union tgsi_exec_channel
*src1
)
822 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
823 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
824 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
825 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
830 union tgsi_exec_channel
*dst
,
831 const union tgsi_exec_channel
*src0
,
832 const union tgsi_exec_channel
*src1
)
834 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
835 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
836 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
837 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
842 union tgsi_exec_channel
*dst
,
843 const union tgsi_exec_channel
*src0
)
845 dst
->f
[0] = (float) (int) src0
->f
[0];
846 dst
->f
[1] = (float) (int) src0
->f
[1];
847 dst
->f
[2] = (float) (int) src0
->f
[2];
848 dst
->f
[3] = (float) (int) src0
->f
[3];
853 union tgsi_exec_channel
*dst
,
854 const union tgsi_exec_channel
*src0
,
855 const union tgsi_exec_channel
*src1
)
857 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
858 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
859 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
860 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
865 union tgsi_exec_channel
*dst
,
866 const union tgsi_exec_channel
*src
)
868 dst
->f
[0] = sinf( src
->f
[0] );
869 dst
->f
[1] = sinf( src
->f
[1] );
870 dst
->f
[2] = sinf( src
->f
[2] );
871 dst
->f
[3] = sinf( src
->f
[3] );
875 micro_sqrt( union tgsi_exec_channel
*dst
,
876 const union tgsi_exec_channel
*src
)
878 dst
->f
[0] = sqrtf( src
->f
[0] );
879 dst
->f
[1] = sqrtf( src
->f
[1] );
880 dst
->f
[2] = sqrtf( src
->f
[2] );
881 dst
->f
[3] = sqrtf( src
->f
[3] );
886 union tgsi_exec_channel
*dst
,
887 const union tgsi_exec_channel
*src0
,
888 const union tgsi_exec_channel
*src1
)
890 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
891 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
892 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
893 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
898 union tgsi_exec_channel
*dst
,
899 const union tgsi_exec_channel
*src
)
901 dst
->f
[0] = (float) src
->u
[0];
902 dst
->f
[1] = (float) src
->u
[1];
903 dst
->f
[2] = (float) src
->u
[2];
904 dst
->f
[3] = (float) src
->u
[3];
909 union tgsi_exec_channel
*dst
,
910 const union tgsi_exec_channel
*src0
,
911 const union tgsi_exec_channel
*src1
)
913 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
914 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
915 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
916 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
920 fetch_src_file_channel(
921 const struct tgsi_exec_machine
*mach
,
924 const union tgsi_exec_channel
*index
,
925 union tgsi_exec_channel
*chan
)
928 case TGSI_EXTSWIZZLE_X
:
929 case TGSI_EXTSWIZZLE_Y
:
930 case TGSI_EXTSWIZZLE_Z
:
931 case TGSI_EXTSWIZZLE_W
:
933 case TGSI_FILE_CONSTANT
:
934 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
935 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
936 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
937 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
940 case TGSI_FILE_INPUT
:
941 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
942 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
943 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
944 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
947 case TGSI_FILE_TEMPORARY
:
948 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
949 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
950 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
951 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
952 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
955 case TGSI_FILE_IMMEDIATE
:
956 assert( index
->i
[0] < (int) mach
->ImmLimit
);
957 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
958 assert( index
->i
[1] < (int) mach
->ImmLimit
);
959 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
960 assert( index
->i
[2] < (int) mach
->ImmLimit
);
961 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
962 assert( index
->i
[3] < (int) mach
->ImmLimit
);
963 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
966 case TGSI_FILE_ADDRESS
:
967 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
968 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
969 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
970 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
973 case TGSI_FILE_OUTPUT
:
974 /* vertex/fragment output vars can be read too */
975 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
976 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
977 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
978 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
986 case TGSI_EXTSWIZZLE_ZERO
:
987 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
990 case TGSI_EXTSWIZZLE_ONE
:
991 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
1001 const struct tgsi_exec_machine
*mach
,
1002 union tgsi_exec_channel
*chan
,
1003 const struct tgsi_full_src_register
*reg
,
1004 const uint chan_index
)
1006 union tgsi_exec_channel index
;
1012 index
.i
[3] = reg
->SrcRegister
.Index
;
1014 if (reg
->SrcRegister
.Indirect
) {
1015 union tgsi_exec_channel index2
;
1016 union tgsi_exec_channel indir_index
;
1021 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1023 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1024 fetch_src_file_channel(
1026 reg
->SrcRegisterInd
.File
,
1031 index
.i
[0] += indir_index
.i
[0];
1032 index
.i
[1] += indir_index
.i
[1];
1033 index
.i
[2] += indir_index
.i
[2];
1034 index
.i
[3] += indir_index
.i
[3];
1037 if( reg
->SrcRegister
.Dimension
) {
1038 switch( reg
->SrcRegister
.File
) {
1039 case TGSI_FILE_INPUT
:
1045 case TGSI_FILE_CONSTANT
:
1055 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1056 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1057 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1058 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1060 if (reg
->SrcRegisterDim
.Indirect
) {
1061 union tgsi_exec_channel index2
;
1062 union tgsi_exec_channel indir_index
;
1067 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1069 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1070 fetch_src_file_channel(
1072 reg
->SrcRegisterDimInd
.File
,
1077 index
.i
[0] += indir_index
.i
[0];
1078 index
.i
[1] += indir_index
.i
[1];
1079 index
.i
[2] += indir_index
.i
[2];
1080 index
.i
[3] += indir_index
.i
[3];
1084 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1085 fetch_src_file_channel(
1087 reg
->SrcRegister
.File
,
1092 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1093 case TGSI_UTIL_SIGN_CLEAR
:
1094 micro_abs( chan
, chan
);
1097 case TGSI_UTIL_SIGN_SET
:
1098 micro_abs( chan
, chan
);
1099 micro_neg( chan
, chan
);
1102 case TGSI_UTIL_SIGN_TOGGLE
:
1103 micro_neg( chan
, chan
);
1106 case TGSI_UTIL_SIGN_KEEP
:
1110 if (reg
->SrcRegisterExtMod
.Complement
) {
1111 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1117 struct tgsi_exec_machine
*mach
,
1118 const union tgsi_exec_channel
*chan
,
1119 const struct tgsi_full_dst_register
*reg
,
1120 const struct tgsi_full_instruction
*inst
,
1123 union tgsi_exec_channel
*dst
;
1125 switch (reg
->DstRegister
.File
) {
1126 case TGSI_FILE_NULL
:
1129 case TGSI_FILE_OUTPUT
:
1130 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1131 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1134 case TGSI_FILE_TEMPORARY
:
1135 assert( reg
->DstRegister
.Index
< TGSI_EXEC_NUM_TEMPS
);
1136 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1139 case TGSI_FILE_ADDRESS
:
1140 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1148 switch (inst
->Instruction
.Saturate
) {
1150 if (mach
->ExecMask
& 0x1)
1151 dst
->i
[0] = chan
->i
[0];
1152 if (mach
->ExecMask
& 0x2)
1153 dst
->i
[1] = chan
->i
[1];
1154 if (mach
->ExecMask
& 0x4)
1155 dst
->i
[2] = chan
->i
[2];
1156 if (mach
->ExecMask
& 0x8)
1157 dst
->i
[3] = chan
->i
[3];
1160 case TGSI_SAT_ZERO_ONE
:
1161 /* XXX need to obey ExecMask here */
1162 micro_max( dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1163 micro_min( dst
, dst
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1166 case TGSI_SAT_MINUS_PLUS_ONE
:
1175 #define FETCH(VAL,INDEX,CHAN)\
1176 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1178 #define STORE(VAL,INDEX,CHAN)\
1179 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1183 * Execute ARB-style KIL which is predicated by a src register.
1184 * Kill fragment if any of the four values is less than zero.
1187 exec_kil(struct tgsi_exec_machine
*mach
,
1188 const struct tgsi_full_instruction
*inst
)
1192 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1193 union tgsi_exec_channel r
[1];
1195 /* This mask stores component bits that were already tested. Note that
1196 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1198 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1200 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1205 /* unswizzle channel */
1206 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1207 &inst
->FullSrcRegisters
[0],
1210 /* check if the component has not been already tested */
1211 if (uniquemask
& (1 << swizzle
))
1213 uniquemask
|= 1 << swizzle
;
1215 FETCH(&r
[0], 0, chan_index
);
1216 for (i
= 0; i
< 4; i
++)
1217 if (r
[0].f
[i
] < 0.0f
)
1221 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1225 * Execute NVIDIA-style KIL which is predicated by a condition code.
1226 * Kill fragment if the condition code is TRUE.
1229 exec_kilp(struct tgsi_exec_machine
*mach
,
1230 const struct tgsi_full_instruction
*inst
)
1232 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1234 if (inst
->InstructionExtNv
.CondFlowEnable
) {
1240 swizzle
[0] = inst
->InstructionExtNv
.CondSwizzleX
;
1241 swizzle
[1] = inst
->InstructionExtNv
.CondSwizzleY
;
1242 swizzle
[2] = inst
->InstructionExtNv
.CondSwizzleZ
;
1243 swizzle
[3] = inst
->InstructionExtNv
.CondSwizzleW
;
1245 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1249 for (i
= 0; i
< 4; i
++) {
1250 /* TODO: evaluate the condition code */
1257 /* "unconditional" kil */
1258 kilmask
= mach
->ExecMask
;
1260 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1265 * Fetch a texel using STR texture coordinates.
1268 fetch_texel( struct tgsi_sampler
*sampler
,
1269 const union tgsi_exec_channel
*s
,
1270 const union tgsi_exec_channel
*t
,
1271 const union tgsi_exec_channel
*p
,
1272 float lodbias
, /* XXX should be float[4] */
1273 union tgsi_exec_channel
*r
,
1274 union tgsi_exec_channel
*g
,
1275 union tgsi_exec_channel
*b
,
1276 union tgsi_exec_channel
*a
)
1279 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1281 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1283 for (j
= 0; j
< 4; j
++) {
1284 r
->f
[j
] = rgba
[0][j
];
1285 g
->f
[j
] = rgba
[1][j
];
1286 b
->f
[j
] = rgba
[2][j
];
1287 a
->f
[j
] = rgba
[3][j
];
1293 exec_tex(struct tgsi_exec_machine
*mach
,
1294 const struct tgsi_full_instruction
*inst
,
1298 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1299 union tgsi_exec_channel r
[8];
1303 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1305 switch (inst
->InstructionExtTexture
.Texture
) {
1306 case TGSI_TEXTURE_1D
:
1308 FETCH(&r
[0], 0, CHAN_X
);
1311 FETCH(&r
[1], 0, CHAN_W
);
1312 micro_div( &r
[0], &r
[0], &r
[1] );
1316 FETCH(&r
[1], 0, CHAN_W
);
1317 lodBias
= r
[2].f
[0];
1322 fetch_texel(&mach
->Samplers
[unit
],
1323 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1324 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1327 case TGSI_TEXTURE_2D
:
1328 case TGSI_TEXTURE_RECT
:
1330 FETCH(&r
[0], 0, CHAN_X
);
1331 FETCH(&r
[1], 0, CHAN_Y
);
1332 FETCH(&r
[2], 0, CHAN_Z
);
1335 FETCH(&r
[3], 0, CHAN_W
);
1336 micro_div( &r
[0], &r
[0], &r
[3] );
1337 micro_div( &r
[1], &r
[1], &r
[3] );
1338 micro_div( &r
[2], &r
[2], &r
[3] );
1342 FETCH(&r
[3], 0, CHAN_W
);
1343 lodBias
= r
[3].f
[0];
1348 fetch_texel(&mach
->Samplers
[unit
],
1349 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1350 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1353 case TGSI_TEXTURE_3D
:
1354 case TGSI_TEXTURE_CUBE
:
1356 FETCH(&r
[0], 0, CHAN_X
);
1357 FETCH(&r
[1], 0, CHAN_Y
);
1358 FETCH(&r
[2], 0, CHAN_Z
);
1361 FETCH(&r
[3], 0, CHAN_W
);
1362 micro_div( &r
[0], &r
[0], &r
[3] );
1363 micro_div( &r
[1], &r
[1], &r
[3] );
1364 micro_div( &r
[2], &r
[2], &r
[3] );
1368 FETCH(&r
[3], 0, CHAN_W
);
1369 lodBias
= r
[3].f
[0];
1374 fetch_texel(&mach
->Samplers
[unit
],
1375 &r
[0], &r
[1], &r
[2], lodBias
,
1376 &r
[0], &r
[1], &r
[2], &r
[3]);
1383 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1384 STORE( &r
[chan_index
], 0, chan_index
);
1390 * Evaluate a constant-valued coefficient at the position of the
1395 struct tgsi_exec_machine
*mach
,
1401 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1402 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1407 * Evaluate a linear-valued coefficient at the position of the
1412 struct tgsi_exec_machine
*mach
,
1416 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1417 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1418 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1419 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1420 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1421 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1422 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1423 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1424 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1428 * Evaluate a perspective-valued coefficient at the position of the
1432 eval_perspective_coef(
1433 struct tgsi_exec_machine
*mach
,
1437 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1438 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1439 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1440 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1441 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1442 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1443 /* divide by W here */
1444 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1445 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1446 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1447 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1451 typedef void (* eval_coef_func
)(
1452 struct tgsi_exec_machine
*mach
,
1458 struct tgsi_exec_machine
*mach
,
1459 const struct tgsi_full_declaration
*decl
)
1461 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1462 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1463 unsigned first
, last
, mask
;
1464 eval_coef_func eval
;
1466 first
= decl
->DeclarationRange
.First
;
1467 last
= decl
->DeclarationRange
.Last
;
1468 mask
= decl
->Declaration
.UsageMask
;
1470 switch( decl
->Declaration
.Interpolate
) {
1471 case TGSI_INTERPOLATE_CONSTANT
:
1472 eval
= eval_constant_coef
;
1475 case TGSI_INTERPOLATE_LINEAR
:
1476 eval
= eval_linear_coef
;
1479 case TGSI_INTERPOLATE_PERSPECTIVE
:
1480 eval
= eval_perspective_coef
;
1487 if( mask
== TGSI_WRITEMASK_XYZW
) {
1490 for( i
= first
; i
<= last
; i
++ ) {
1491 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1499 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1500 if( mask
& (1 << j
) ) {
1501 for( i
= first
; i
<= last
; i
++ ) {
1513 struct tgsi_exec_machine
*mach
,
1514 const struct tgsi_full_instruction
*inst
,
1518 union tgsi_exec_channel r
[8];
1522 switch (inst
->Instruction
.Opcode
) {
1523 case TGSI_OPCODE_ARL
:
1524 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1525 FETCH( &r
[0], 0, chan_index
);
1526 micro_f2it( &r
[0], &r
[0] );
1527 STORE( &r
[0], 0, chan_index
);
1531 case TGSI_OPCODE_MOV
:
1532 case TGSI_OPCODE_SWZ
:
1533 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1534 FETCH( &r
[0], 0, chan_index
);
1535 STORE( &r
[0], 0, chan_index
);
1539 case TGSI_OPCODE_LIT
:
1540 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1541 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1544 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1545 FETCH( &r
[0], 0, CHAN_X
);
1546 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1547 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1548 STORE( &r
[0], 0, CHAN_Y
);
1551 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1552 FETCH( &r
[1], 0, CHAN_Y
);
1553 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1555 FETCH( &r
[2], 0, CHAN_W
);
1556 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1557 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1558 micro_pow( &r
[1], &r
[1], &r
[2] );
1559 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1560 STORE( &r
[0], 0, CHAN_Z
);
1564 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1565 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1569 case TGSI_OPCODE_RCP
:
1570 /* TGSI_OPCODE_RECIP */
1571 FETCH( &r
[0], 0, CHAN_X
);
1572 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1573 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1574 STORE( &r
[0], 0, chan_index
);
1578 case TGSI_OPCODE_RSQ
:
1579 /* TGSI_OPCODE_RECIPSQRT */
1580 FETCH( &r
[0], 0, CHAN_X
);
1581 micro_sqrt( &r
[0], &r
[0] );
1582 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1583 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1584 STORE( &r
[0], 0, chan_index
);
1588 case TGSI_OPCODE_EXP
:
1589 FETCH( &r
[0], 0, CHAN_X
);
1590 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
1591 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1592 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
1593 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
1595 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1596 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
1597 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
1599 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1600 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
1601 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
1603 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1604 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1608 case TGSI_OPCODE_LOG
:
1609 FETCH( &r
[0], 0, CHAN_X
);
1610 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
1611 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
1612 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
1613 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1614 STORE( &r
[0], 0, CHAN_X
);
1616 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1617 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
1618 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
1619 STORE( &r
[0], 0, CHAN_Y
);
1621 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1622 STORE( &r
[1], 0, CHAN_Z
);
1624 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1625 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1629 case TGSI_OPCODE_MUL
:
1630 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1632 FETCH(&r
[0], 0, chan_index
);
1633 FETCH(&r
[1], 1, chan_index
);
1635 micro_mul( &r
[0], &r
[0], &r
[1] );
1637 STORE(&r
[0], 0, chan_index
);
1641 case TGSI_OPCODE_ADD
:
1642 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1643 FETCH( &r
[0], 0, chan_index
);
1644 FETCH( &r
[1], 1, chan_index
);
1645 micro_add( &r
[0], &r
[0], &r
[1] );
1646 STORE( &r
[0], 0, chan_index
);
1650 case TGSI_OPCODE_DP3
:
1651 /* TGSI_OPCODE_DOT3 */
1652 FETCH( &r
[0], 0, CHAN_X
);
1653 FETCH( &r
[1], 1, CHAN_X
);
1654 micro_mul( &r
[0], &r
[0], &r
[1] );
1656 FETCH( &r
[1], 0, CHAN_Y
);
1657 FETCH( &r
[2], 1, CHAN_Y
);
1658 micro_mul( &r
[1], &r
[1], &r
[2] );
1659 micro_add( &r
[0], &r
[0], &r
[1] );
1661 FETCH( &r
[1], 0, CHAN_Z
);
1662 FETCH( &r
[2], 1, CHAN_Z
);
1663 micro_mul( &r
[1], &r
[1], &r
[2] );
1664 micro_add( &r
[0], &r
[0], &r
[1] );
1666 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1667 STORE( &r
[0], 0, chan_index
);
1671 case TGSI_OPCODE_DP4
:
1672 /* TGSI_OPCODE_DOT4 */
1673 FETCH(&r
[0], 0, CHAN_X
);
1674 FETCH(&r
[1], 1, CHAN_X
);
1676 micro_mul( &r
[0], &r
[0], &r
[1] );
1678 FETCH(&r
[1], 0, CHAN_Y
);
1679 FETCH(&r
[2], 1, CHAN_Y
);
1681 micro_mul( &r
[1], &r
[1], &r
[2] );
1682 micro_add( &r
[0], &r
[0], &r
[1] );
1684 FETCH(&r
[1], 0, CHAN_Z
);
1685 FETCH(&r
[2], 1, CHAN_Z
);
1687 micro_mul( &r
[1], &r
[1], &r
[2] );
1688 micro_add( &r
[0], &r
[0], &r
[1] );
1690 FETCH(&r
[1], 0, CHAN_W
);
1691 FETCH(&r
[2], 1, CHAN_W
);
1693 micro_mul( &r
[1], &r
[1], &r
[2] );
1694 micro_add( &r
[0], &r
[0], &r
[1] );
1696 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1697 STORE( &r
[0], 0, chan_index
);
1701 case TGSI_OPCODE_DST
:
1702 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1703 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1706 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1707 FETCH( &r
[0], 0, CHAN_Y
);
1708 FETCH( &r
[1], 1, CHAN_Y
);
1709 micro_mul( &r
[0], &r
[0], &r
[1] );
1710 STORE( &r
[0], 0, CHAN_Y
);
1713 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1714 FETCH( &r
[0], 0, CHAN_Z
);
1715 STORE( &r
[0], 0, CHAN_Z
);
1718 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1719 FETCH( &r
[0], 1, CHAN_W
);
1720 STORE( &r
[0], 0, CHAN_W
);
1724 case TGSI_OPCODE_MIN
:
1725 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1726 FETCH(&r
[0], 0, chan_index
);
1727 FETCH(&r
[1], 1, chan_index
);
1729 /* XXX use micro_min()?? */
1730 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1732 STORE(&r
[0], 0, chan_index
);
1736 case TGSI_OPCODE_MAX
:
1737 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1738 FETCH(&r
[0], 0, chan_index
);
1739 FETCH(&r
[1], 1, chan_index
);
1741 /* XXX use micro_max()?? */
1742 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1744 STORE(&r
[0], 0, chan_index
);
1748 case TGSI_OPCODE_SLT
:
1749 /* TGSI_OPCODE_SETLT */
1750 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1751 FETCH( &r
[0], 0, chan_index
);
1752 FETCH( &r
[1], 1, chan_index
);
1753 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1754 STORE( &r
[0], 0, chan_index
);
1758 case TGSI_OPCODE_SGE
:
1759 /* TGSI_OPCODE_SETGE */
1760 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1761 FETCH( &r
[0], 0, chan_index
);
1762 FETCH( &r
[1], 1, chan_index
);
1763 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1764 STORE( &r
[0], 0, chan_index
);
1768 case TGSI_OPCODE_MAD
:
1769 /* TGSI_OPCODE_MADD */
1770 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1771 FETCH( &r
[0], 0, chan_index
);
1772 FETCH( &r
[1], 1, chan_index
);
1773 micro_mul( &r
[0], &r
[0], &r
[1] );
1774 FETCH( &r
[1], 2, chan_index
);
1775 micro_add( &r
[0], &r
[0], &r
[1] );
1776 STORE( &r
[0], 0, chan_index
);
1780 case TGSI_OPCODE_SUB
:
1781 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1782 FETCH(&r
[0], 0, chan_index
);
1783 FETCH(&r
[1], 1, chan_index
);
1785 micro_sub( &r
[0], &r
[0], &r
[1] );
1787 STORE(&r
[0], 0, chan_index
);
1791 case TGSI_OPCODE_LERP
:
1792 /* TGSI_OPCODE_LRP */
1793 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1794 FETCH(&r
[0], 0, chan_index
);
1795 FETCH(&r
[1], 1, chan_index
);
1796 FETCH(&r
[2], 2, chan_index
);
1798 micro_sub( &r
[1], &r
[1], &r
[2] );
1799 micro_mul( &r
[0], &r
[0], &r
[1] );
1800 micro_add( &r
[0], &r
[0], &r
[2] );
1802 STORE(&r
[0], 0, chan_index
);
1806 case TGSI_OPCODE_CND
:
1810 case TGSI_OPCODE_CND0
:
1814 case TGSI_OPCODE_DOT2ADD
:
1815 /* TGSI_OPCODE_DP2A */
1819 case TGSI_OPCODE_INDEX
:
1823 case TGSI_OPCODE_NEGATE
:
1827 case TGSI_OPCODE_FRAC
:
1828 /* TGSI_OPCODE_FRC */
1829 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1830 FETCH( &r
[0], 0, chan_index
);
1831 micro_frc( &r
[0], &r
[0] );
1832 STORE( &r
[0], 0, chan_index
);
1836 case TGSI_OPCODE_CLAMP
:
1840 case TGSI_OPCODE_FLOOR
:
1841 /* TGSI_OPCODE_FLR */
1842 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1843 FETCH( &r
[0], 0, chan_index
);
1844 micro_flr( &r
[0], &r
[0] );
1845 STORE( &r
[0], 0, chan_index
);
1849 case TGSI_OPCODE_ROUND
:
1850 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1851 FETCH( &r
[0], 0, chan_index
);
1852 micro_rnd( &r
[0], &r
[0] );
1853 STORE( &r
[0], 0, chan_index
);
1857 case TGSI_OPCODE_EXPBASE2
:
1858 /* TGSI_OPCODE_EX2 */
1859 FETCH(&r
[0], 0, CHAN_X
);
1861 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1863 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1864 STORE( &r
[0], 0, chan_index
);
1868 case TGSI_OPCODE_LOGBASE2
:
1869 /* TGSI_OPCODE_LG2 */
1870 FETCH( &r
[0], 0, CHAN_X
);
1871 micro_lg2( &r
[0], &r
[0] );
1872 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1873 STORE( &r
[0], 0, chan_index
);
1877 case TGSI_OPCODE_POWER
:
1878 /* TGSI_OPCODE_POW */
1879 FETCH(&r
[0], 0, CHAN_X
);
1880 FETCH(&r
[1], 1, CHAN_X
);
1882 micro_pow( &r
[0], &r
[0], &r
[1] );
1884 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1885 STORE( &r
[0], 0, chan_index
);
1889 case TGSI_OPCODE_CROSSPRODUCT
:
1890 /* TGSI_OPCODE_XPD */
1891 FETCH(&r
[0], 0, CHAN_Y
);
1892 FETCH(&r
[1], 1, CHAN_Z
);
1894 micro_mul( &r
[2], &r
[0], &r
[1] );
1896 FETCH(&r
[3], 0, CHAN_Z
);
1897 FETCH(&r
[4], 1, CHAN_Y
);
1899 micro_mul( &r
[5], &r
[3], &r
[4] );
1900 micro_sub( &r
[2], &r
[2], &r
[5] );
1902 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1903 STORE( &r
[2], 0, CHAN_X
);
1906 FETCH(&r
[2], 1, CHAN_X
);
1908 micro_mul( &r
[3], &r
[3], &r
[2] );
1910 FETCH(&r
[5], 0, CHAN_X
);
1912 micro_mul( &r
[1], &r
[1], &r
[5] );
1913 micro_sub( &r
[3], &r
[3], &r
[1] );
1915 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1916 STORE( &r
[3], 0, CHAN_Y
);
1919 micro_mul( &r
[5], &r
[5], &r
[4] );
1920 micro_mul( &r
[0], &r
[0], &r
[2] );
1921 micro_sub( &r
[5], &r
[5], &r
[0] );
1923 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1924 STORE( &r
[5], 0, CHAN_Z
);
1927 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1928 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1932 case TGSI_OPCODE_MULTIPLYMATRIX
:
1936 case TGSI_OPCODE_ABS
:
1937 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1938 FETCH(&r
[0], 0, chan_index
);
1940 micro_abs( &r
[0], &r
[0] );
1942 STORE(&r
[0], 0, chan_index
);
1946 case TGSI_OPCODE_RCC
:
1950 case TGSI_OPCODE_DPH
:
1951 FETCH(&r
[0], 0, CHAN_X
);
1952 FETCH(&r
[1], 1, CHAN_X
);
1954 micro_mul( &r
[0], &r
[0], &r
[1] );
1956 FETCH(&r
[1], 0, CHAN_Y
);
1957 FETCH(&r
[2], 1, CHAN_Y
);
1959 micro_mul( &r
[1], &r
[1], &r
[2] );
1960 micro_add( &r
[0], &r
[0], &r
[1] );
1962 FETCH(&r
[1], 0, CHAN_Z
);
1963 FETCH(&r
[2], 1, CHAN_Z
);
1965 micro_mul( &r
[1], &r
[1], &r
[2] );
1966 micro_add( &r
[0], &r
[0], &r
[1] );
1968 FETCH(&r
[1], 1, CHAN_W
);
1970 micro_add( &r
[0], &r
[0], &r
[1] );
1972 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1973 STORE( &r
[0], 0, chan_index
);
1977 case TGSI_OPCODE_COS
:
1978 FETCH(&r
[0], 0, CHAN_X
);
1980 micro_cos( &r
[0], &r
[0] );
1982 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1983 STORE( &r
[0], 0, chan_index
);
1987 case TGSI_OPCODE_DDX
:
1988 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1989 FETCH( &r
[0], 0, chan_index
);
1990 micro_ddx( &r
[0], &r
[0] );
1991 STORE( &r
[0], 0, chan_index
);
1995 case TGSI_OPCODE_DDY
:
1996 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1997 FETCH( &r
[0], 0, chan_index
);
1998 micro_ddy( &r
[0], &r
[0] );
1999 STORE( &r
[0], 0, chan_index
);
2003 case TGSI_OPCODE_KILP
:
2004 exec_kilp (mach
, inst
);
2007 case TGSI_OPCODE_KIL
:
2008 exec_kil (mach
, inst
);
2011 case TGSI_OPCODE_PK2H
:
2015 case TGSI_OPCODE_PK2US
:
2019 case TGSI_OPCODE_PK4B
:
2023 case TGSI_OPCODE_PK4UB
:
2027 case TGSI_OPCODE_RFL
:
2031 case TGSI_OPCODE_SEQ
:
2032 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2033 FETCH( &r
[0], 0, chan_index
);
2034 FETCH( &r
[1], 1, chan_index
);
2035 micro_eq( &r
[0], &r
[0], &r
[1],
2036 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
2037 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2038 STORE( &r
[0], 0, chan_index
);
2042 case TGSI_OPCODE_SFL
:
2046 case TGSI_OPCODE_SGT
:
2047 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2048 FETCH( &r
[0], 0, chan_index
);
2049 FETCH( &r
[1], 1, chan_index
);
2050 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2051 STORE( &r
[0], 0, chan_index
);
2055 case TGSI_OPCODE_SIN
:
2056 FETCH( &r
[0], 0, CHAN_X
);
2057 micro_sin( &r
[0], &r
[0] );
2058 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2059 STORE( &r
[0], 0, chan_index
);
2063 case TGSI_OPCODE_SLE
:
2064 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2065 FETCH( &r
[0], 0, chan_index
);
2066 FETCH( &r
[1], 1, chan_index
);
2067 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2068 STORE( &r
[0], 0, chan_index
);
2072 case TGSI_OPCODE_SNE
:
2073 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2074 FETCH( &r
[0], 0, chan_index
);
2075 FETCH( &r
[1], 1, chan_index
);
2076 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2077 STORE( &r
[0], 0, chan_index
);
2081 case TGSI_OPCODE_STR
:
2085 case TGSI_OPCODE_TEX
:
2086 /* simple texture lookup */
2087 /* src[0] = texcoord */
2088 /* src[1] = sampler unit */
2089 exec_tex(mach
, inst
, FALSE
, FALSE
);
2092 case TGSI_OPCODE_TXB
:
2093 /* Texture lookup with lod bias */
2094 /* src[0] = texcoord (src[0].w = LOD bias) */
2095 /* src[1] = sampler unit */
2096 exec_tex(mach
, inst
, TRUE
, FALSE
);
2099 case TGSI_OPCODE_TXD
:
2100 /* Texture lookup with explict partial derivatives */
2101 /* src[0] = texcoord */
2102 /* src[1] = d[strq]/dx */
2103 /* src[2] = d[strq]/dy */
2104 /* src[3] = sampler unit */
2108 case TGSI_OPCODE_TXL
:
2109 /* Texture lookup with explit LOD */
2110 /* src[0] = texcoord (src[0].w = LOD) */
2111 /* src[1] = sampler unit */
2112 exec_tex(mach
, inst
, TRUE
, FALSE
);
2115 case TGSI_OPCODE_TXP
:
2116 /* Texture lookup with projection */
2117 /* src[0] = texcoord (src[0].w = projection) */
2118 /* src[1] = sampler unit */
2119 exec_tex(mach
, inst
, FALSE
, TRUE
);
2122 case TGSI_OPCODE_UP2H
:
2126 case TGSI_OPCODE_UP2US
:
2130 case TGSI_OPCODE_UP4B
:
2134 case TGSI_OPCODE_UP4UB
:
2138 case TGSI_OPCODE_X2D
:
2142 case TGSI_OPCODE_ARA
:
2146 case TGSI_OPCODE_ARR
:
2150 case TGSI_OPCODE_BRA
:
2154 case TGSI_OPCODE_CAL
:
2155 /* skip the call if no execution channels are enabled */
2156 if (mach
->ExecMask
) {
2159 /* push the Cond, Loop, Cont stacks */
2160 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2161 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2162 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2163 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2164 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2165 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2167 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2168 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2170 /* note that PC was already incremented above */
2171 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2172 *pc
= inst
->InstructionExtLabel
.Label
;
2176 case TGSI_OPCODE_RET
:
2177 mach
->FuncMask
&= ~mach
->ExecMask
;
2178 UPDATE_EXEC_MASK(mach
);
2180 if (mach
->FuncMask
== 0x0) {
2181 /* really return now (otherwise, keep executing */
2183 if (mach
->CallStackTop
== 0) {
2184 /* returning from main() */
2188 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2190 /* pop the Cond, Loop, Cont stacks */
2191 assert(mach
->CondStackTop
> 0);
2192 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2193 assert(mach
->LoopStackTop
> 0);
2194 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2195 assert(mach
->ContStackTop
> 0);
2196 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2197 assert(mach
->FuncStackTop
> 0);
2198 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2200 UPDATE_EXEC_MASK(mach
);
2204 case TGSI_OPCODE_SSG
:
2208 case TGSI_OPCODE_CMP
:
2209 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2210 FETCH(&r
[0], 0, chan_index
);
2211 FETCH(&r
[1], 1, chan_index
);
2212 FETCH(&r
[2], 2, chan_index
);
2214 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2216 STORE(&r
[0], 0, chan_index
);
2220 case TGSI_OPCODE_SCS
:
2221 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2222 FETCH( &r
[0], 0, CHAN_X
);
2224 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2225 micro_cos( &r
[1], &r
[0] );
2226 STORE( &r
[1], 0, CHAN_X
);
2228 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2229 micro_sin( &r
[1], &r
[0] );
2230 STORE( &r
[1], 0, CHAN_Y
);
2232 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2233 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2235 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2236 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2240 case TGSI_OPCODE_NRM
:
2244 case TGSI_OPCODE_DIV
:
2248 case TGSI_OPCODE_DP2
:
2249 FETCH( &r
[0], 0, CHAN_X
);
2250 FETCH( &r
[1], 1, CHAN_X
);
2251 micro_mul( &r
[0], &r
[0], &r
[1] );
2253 FETCH( &r
[1], 0, CHAN_Y
);
2254 FETCH( &r
[2], 1, CHAN_Y
);
2255 micro_mul( &r
[1], &r
[1], &r
[2] );
2256 micro_add( &r
[0], &r
[0], &r
[1] );
2258 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2259 STORE( &r
[0], 0, chan_index
);
2263 case TGSI_OPCODE_IF
:
2265 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2266 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2267 FETCH( &r
[0], 0, CHAN_X
);
2268 /* update CondMask */
2270 mach
->CondMask
&= ~0x1;
2273 mach
->CondMask
&= ~0x2;
2276 mach
->CondMask
&= ~0x4;
2279 mach
->CondMask
&= ~0x8;
2281 UPDATE_EXEC_MASK(mach
);
2282 /* Todo: If CondMask==0, jump to ELSE */
2285 case TGSI_OPCODE_ELSE
:
2286 /* invert CondMask wrt previous mask */
2289 assert(mach
->CondStackTop
> 0);
2290 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2291 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2292 UPDATE_EXEC_MASK(mach
);
2293 /* Todo: If CondMask==0, jump to ENDIF */
2297 case TGSI_OPCODE_ENDIF
:
2299 assert(mach
->CondStackTop
> 0);
2300 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2301 UPDATE_EXEC_MASK(mach
);
2304 case TGSI_OPCODE_END
:
2305 /* halt execution */
2309 case TGSI_OPCODE_REP
:
2313 case TGSI_OPCODE_ENDREP
:
2317 case TGSI_OPCODE_PUSHA
:
2321 case TGSI_OPCODE_POPA
:
2325 case TGSI_OPCODE_CEIL
:
2326 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2327 FETCH( &r
[0], 0, chan_index
);
2328 micro_ceil( &r
[0], &r
[0] );
2329 STORE( &r
[0], 0, chan_index
);
2333 case TGSI_OPCODE_I2F
:
2334 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2335 FETCH( &r
[0], 0, chan_index
);
2336 micro_i2f( &r
[0], &r
[0] );
2337 STORE( &r
[0], 0, chan_index
);
2341 case TGSI_OPCODE_NOT
:
2342 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2343 FETCH( &r
[0], 0, chan_index
);
2344 micro_not( &r
[0], &r
[0] );
2345 STORE( &r
[0], 0, chan_index
);
2349 case TGSI_OPCODE_TRUNC
:
2350 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2351 FETCH( &r
[0], 0, chan_index
);
2352 micro_trunc( &r
[0], &r
[0] );
2353 STORE( &r
[0], 0, chan_index
);
2357 case TGSI_OPCODE_SHL
:
2358 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2359 FETCH( &r
[0], 0, chan_index
);
2360 FETCH( &r
[1], 1, chan_index
);
2361 micro_shl( &r
[0], &r
[0], &r
[1] );
2362 STORE( &r
[0], 0, chan_index
);
2366 case TGSI_OPCODE_SHR
:
2367 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2368 FETCH( &r
[0], 0, chan_index
);
2369 FETCH( &r
[1], 1, chan_index
);
2370 micro_ishr( &r
[0], &r
[0], &r
[1] );
2371 STORE( &r
[0], 0, chan_index
);
2375 case TGSI_OPCODE_AND
:
2376 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2377 FETCH( &r
[0], 0, chan_index
);
2378 FETCH( &r
[1], 1, chan_index
);
2379 micro_and( &r
[0], &r
[0], &r
[1] );
2380 STORE( &r
[0], 0, chan_index
);
2384 case TGSI_OPCODE_OR
:
2385 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2386 FETCH( &r
[0], 0, chan_index
);
2387 FETCH( &r
[1], 1, chan_index
);
2388 micro_or( &r
[0], &r
[0], &r
[1] );
2389 STORE( &r
[0], 0, chan_index
);
2393 case TGSI_OPCODE_MOD
:
2397 case TGSI_OPCODE_XOR
:
2398 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2399 FETCH( &r
[0], 0, chan_index
);
2400 FETCH( &r
[1], 1, chan_index
);
2401 micro_xor( &r
[0], &r
[0], &r
[1] );
2402 STORE( &r
[0], 0, chan_index
);
2406 case TGSI_OPCODE_SAD
:
2410 case TGSI_OPCODE_TXF
:
2414 case TGSI_OPCODE_TXQ
:
2418 case TGSI_OPCODE_EMIT
:
2419 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2420 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2423 case TGSI_OPCODE_ENDPRIM
:
2424 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2425 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2428 case TGSI_OPCODE_LOOP
:
2429 /* fall-through (for now) */
2430 case TGSI_OPCODE_BGNLOOP2
:
2431 /* push LoopMask and ContMasks */
2432 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2433 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2434 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2435 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2438 case TGSI_OPCODE_ENDLOOP
:
2439 /* fall-through (for now at least) */
2440 case TGSI_OPCODE_ENDLOOP2
:
2441 /* Restore ContMask, but don't pop */
2442 assert(mach
->ContStackTop
> 0);
2443 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2444 UPDATE_EXEC_MASK(mach
);
2445 if (mach
->ExecMask
) {
2446 /* repeat loop: jump to instruction just past BGNLOOP */
2447 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2450 /* exit loop: pop LoopMask */
2451 assert(mach
->LoopStackTop
> 0);
2452 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2454 assert(mach
->ContStackTop
> 0);
2455 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2457 UPDATE_EXEC_MASK(mach
);
2460 case TGSI_OPCODE_BRK
:
2461 /* turn off loop channels for each enabled exec channel */
2462 mach
->LoopMask
&= ~mach
->ExecMask
;
2463 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2464 UPDATE_EXEC_MASK(mach
);
2467 case TGSI_OPCODE_CONT
:
2468 /* turn off cont channels for each enabled exec channel */
2469 mach
->ContMask
&= ~mach
->ExecMask
;
2470 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2471 UPDATE_EXEC_MASK(mach
);
2474 case TGSI_OPCODE_BGNSUB
:
2478 case TGSI_OPCODE_ENDSUB
:
2482 case TGSI_OPCODE_NOISE1
:
2486 case TGSI_OPCODE_NOISE2
:
2490 case TGSI_OPCODE_NOISE3
:
2494 case TGSI_OPCODE_NOISE4
:
2498 case TGSI_OPCODE_NOP
:
2508 * Run TGSI interpreter.
2509 * \return bitmask of "alive" quad components
2512 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2517 mach
->CondMask
= 0xf;
2518 mach
->LoopMask
= 0xf;
2519 mach
->ContMask
= 0xf;
2520 mach
->FuncMask
= 0xf;
2521 mach
->ExecMask
= 0xf;
2523 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2524 assert(mach
->CondStackTop
== 0);
2525 assert(mach
->LoopStackTop
== 0);
2526 assert(mach
->ContStackTop
== 0);
2527 assert(mach
->CallStackTop
== 0);
2529 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2530 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2532 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2533 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2534 mach
->Primitives
[0] = 0;
2537 for (i
= 0; i
< QUAD_SIZE
; i
++) {
2538 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
2539 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
2540 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
2541 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
2542 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
2545 /* execute declarations (interpolants) */
2546 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2547 exec_declaration( mach
, mach
->Declarations
+i
);
2550 /* execute instructions, until pc is set to -1 */
2552 assert(pc
< (int) mach
->NumInstructions
);
2553 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2557 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2558 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2560 * Scale back depth component.
2562 for (i
= 0; i
< 4; i
++)
2563 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2567 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];