1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpretor/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/util/tgsi_parse.h"
58 #include "tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine
*mach
,
131 const struct tgsi_token
*tokens
,
133 struct tgsi_sampler
*samplers
)
136 struct tgsi_parse_context parse
;
137 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
138 struct tgsi_full_instruction
*instructions
;
139 struct tgsi_full_declaration
*declarations
;
140 uint maxInstructions
= 10, numInstructions
= 0;
141 uint maxDeclarations
= 10, numDeclarations
= 0;
145 tgsi_dump(tokens
, 0);
148 mach
->Tokens
= tokens
;
149 mach
->Samplers
= samplers
;
151 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
152 if (k
!= TGSI_PARSE_OK
) {
153 debug_printf( "Problem parsing!\n" );
157 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
161 declarations
= (struct tgsi_full_declaration
*)
162 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
164 instructions
= (struct tgsi_full_instruction
*)
165 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
168 while( !tgsi_parse_end_of_tokens( &parse
) ) {
169 uint pointer
= parse
.Position
;
172 tgsi_parse_token( &parse
);
173 switch( parse
.FullToken
.Token
.Type
) {
174 case TGSI_TOKEN_TYPE_DECLARATION
:
175 /* save expanded declaration */
176 if (numDeclarations
== maxDeclarations
) {
177 declarations
= REALLOC(declarations
,
179 * sizeof(struct tgsi_full_declaration
),
180 (maxDeclarations
+ 10)
181 * sizeof(struct tgsi_full_declaration
));
182 maxDeclarations
+= 10;
184 memcpy(declarations
+ numDeclarations
,
185 &parse
.FullToken
.FullDeclaration
,
186 sizeof(declarations
[0]));
190 case TGSI_TOKEN_TYPE_IMMEDIATE
:
192 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
193 assert( size
% 4 == 0 );
194 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
196 for( i
= 0; i
< size
; i
++ ) {
197 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
198 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
200 mach
->ImmLimit
+= size
/ 4;
204 case TGSI_TOKEN_TYPE_INSTRUCTION
:
205 assert( labels
->count
< 128 );
207 labels
->labels
[labels
->count
][0] = instno
;
208 labels
->labels
[labels
->count
][1] = pointer
;
211 /* save expanded instruction */
212 if (numInstructions
== maxInstructions
) {
213 instructions
= REALLOC(instructions
,
215 * sizeof(struct tgsi_full_instruction
),
216 (maxInstructions
+ 10)
217 * sizeof(struct tgsi_full_instruction
));
218 maxInstructions
+= 10;
220 memcpy(instructions
+ numInstructions
,
221 &parse
.FullToken
.FullInstruction
,
222 sizeof(instructions
[0]));
230 tgsi_parse_free (&parse
);
232 if (mach
->Declarations
) {
233 FREE( mach
->Declarations
);
235 mach
->Declarations
= declarations
;
236 mach
->NumDeclarations
= numDeclarations
;
238 if (mach
->Instructions
) {
239 FREE( mach
->Instructions
);
241 mach
->Instructions
= instructions
;
242 mach
->NumInstructions
= numInstructions
;
247 tgsi_exec_machine_init(
248 struct tgsi_exec_machine
*mach
)
252 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
253 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_NUM_TEMPS
];
255 /* Setup constants. */
256 for( i
= 0; i
< 4; i
++ ) {
257 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
258 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
259 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
260 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
261 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
262 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
263 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
264 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
270 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
272 if (mach
->Instructions
) {
273 FREE(mach
->Instructions
);
274 mach
->Instructions
= NULL
;
275 mach
->NumInstructions
= 0;
277 if (mach
->Declarations
) {
278 FREE(mach
->Declarations
);
279 mach
->Declarations
= NULL
;
280 mach
->NumDeclarations
= 0;
287 union tgsi_exec_channel
*dst
,
288 const union tgsi_exec_channel
*src
)
290 dst
->f
[0] = fabsf( src
->f
[0] );
291 dst
->f
[1] = fabsf( src
->f
[1] );
292 dst
->f
[2] = fabsf( src
->f
[2] );
293 dst
->f
[3] = fabsf( src
->f
[3] );
298 union tgsi_exec_channel
*dst
,
299 const union tgsi_exec_channel
*src0
,
300 const union tgsi_exec_channel
*src1
)
302 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
303 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
304 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
305 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
310 union tgsi_exec_channel
*dst
,
311 const union tgsi_exec_channel
*src0
,
312 const union tgsi_exec_channel
*src1
)
314 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
315 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
316 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
317 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
322 union tgsi_exec_channel
*dst
,
323 const union tgsi_exec_channel
*src0
,
324 const union tgsi_exec_channel
*src1
)
326 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
327 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
328 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
329 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
334 union tgsi_exec_channel
*dst
,
335 const union tgsi_exec_channel
*src
)
337 dst
->f
[0] = ceilf( src
->f
[0] );
338 dst
->f
[1] = ceilf( src
->f
[1] );
339 dst
->f
[2] = ceilf( src
->f
[2] );
340 dst
->f
[3] = ceilf( src
->f
[3] );
345 union tgsi_exec_channel
*dst
,
346 const union tgsi_exec_channel
*src
)
348 dst
->f
[0] = cosf( src
->f
[0] );
349 dst
->f
[1] = cosf( src
->f
[1] );
350 dst
->f
[2] = cosf( src
->f
[2] );
351 dst
->f
[3] = cosf( src
->f
[3] );
356 union tgsi_exec_channel
*dst
,
357 const union tgsi_exec_channel
*src
)
362 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
367 union tgsi_exec_channel
*dst
,
368 const union tgsi_exec_channel
*src
)
373 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
378 union tgsi_exec_channel
*dst
,
379 const union tgsi_exec_channel
*src0
,
380 const union tgsi_exec_channel
*src1
)
382 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
383 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
384 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
385 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
390 union tgsi_exec_channel
*dst
,
391 const union tgsi_exec_channel
*src0
,
392 const union tgsi_exec_channel
*src1
)
394 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
395 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
396 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
397 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
402 union tgsi_exec_channel
*dst
,
403 const union tgsi_exec_channel
*src0
,
404 const union tgsi_exec_channel
*src1
,
405 const union tgsi_exec_channel
*src2
,
406 const union tgsi_exec_channel
*src3
)
408 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
409 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
410 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
411 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
416 union tgsi_exec_channel
*dst
,
417 const union tgsi_exec_channel
*src0
,
418 const union tgsi_exec_channel
*src1
,
419 const union tgsi_exec_channel
*src2
,
420 const union tgsi_exec_channel
*src3
)
422 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
423 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
424 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
425 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
430 union tgsi_exec_channel
*dst
,
431 const union tgsi_exec_channel
*src
)
433 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
434 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
435 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
436 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
441 union tgsi_exec_channel
*dst
,
442 const union tgsi_exec_channel
*src
)
444 dst
->i
[0] = (int) src
->f
[0];
445 dst
->i
[1] = (int) src
->f
[1];
446 dst
->i
[2] = (int) src
->f
[2];
447 dst
->i
[3] = (int) src
->f
[3];
452 union tgsi_exec_channel
*dst
,
453 const union tgsi_exec_channel
*src
)
455 dst
->u
[0] = (uint
) src
->f
[0];
456 dst
->u
[1] = (uint
) src
->f
[1];
457 dst
->u
[2] = (uint
) src
->f
[2];
458 dst
->u
[3] = (uint
) src
->f
[3];
463 union tgsi_exec_channel
*dst
,
464 const union tgsi_exec_channel
*src
)
466 dst
->f
[0] = floorf( src
->f
[0] );
467 dst
->f
[1] = floorf( src
->f
[1] );
468 dst
->f
[2] = floorf( src
->f
[2] );
469 dst
->f
[3] = floorf( src
->f
[3] );
474 union tgsi_exec_channel
*dst
,
475 const union tgsi_exec_channel
*src
)
477 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
478 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
479 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
480 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
485 union tgsi_exec_channel
*dst
,
486 const union tgsi_exec_channel
*src0
,
487 const union tgsi_exec_channel
*src1
,
488 const union tgsi_exec_channel
*src2
,
489 const union tgsi_exec_channel
*src3
)
491 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
492 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
493 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
494 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
499 union tgsi_exec_channel
*dst
,
500 const union tgsi_exec_channel
*src
)
502 dst
->f
[0] = (float) src
->i
[0];
503 dst
->f
[1] = (float) src
->i
[1];
504 dst
->f
[2] = (float) src
->i
[2];
505 dst
->f
[3] = (float) src
->i
[3];
510 union tgsi_exec_channel
*dst
,
511 const union tgsi_exec_channel
*src
)
513 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
514 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
515 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
516 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
521 union tgsi_exec_channel
*dst
,
522 const union tgsi_exec_channel
*src0
,
523 const union tgsi_exec_channel
*src1
,
524 const union tgsi_exec_channel
*src2
,
525 const union tgsi_exec_channel
*src3
)
527 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
528 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
529 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
530 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
535 union tgsi_exec_channel
*dst
,
536 const union tgsi_exec_channel
*src0
,
537 const union tgsi_exec_channel
*src1
,
538 const union tgsi_exec_channel
*src2
,
539 const union tgsi_exec_channel
*src3
)
541 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
542 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
543 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
544 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
549 union tgsi_exec_channel
*dst
,
550 const union tgsi_exec_channel
*src0
,
551 const union tgsi_exec_channel
*src1
,
552 const union tgsi_exec_channel
*src2
,
553 const union tgsi_exec_channel
*src3
)
555 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
556 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
557 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
558 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
563 union tgsi_exec_channel
*dst
,
564 const union tgsi_exec_channel
*src0
,
565 const union tgsi_exec_channel
*src1
,
566 const union tgsi_exec_channel
*src2
,
567 const union tgsi_exec_channel
*src3
)
569 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
570 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
571 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
572 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
577 union tgsi_exec_channel
*dst
,
578 const union tgsi_exec_channel
*src0
,
579 const union tgsi_exec_channel
*src1
)
581 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
582 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
583 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
584 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
589 union tgsi_exec_channel
*dst
,
590 const union tgsi_exec_channel
*src0
,
591 const union tgsi_exec_channel
*src1
)
593 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
594 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
595 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
596 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
601 union tgsi_exec_channel
*dst
,
602 const union tgsi_exec_channel
*src0
,
603 const union tgsi_exec_channel
*src1
)
605 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
606 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
607 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
608 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
613 union tgsi_exec_channel
*dst
,
614 const union tgsi_exec_channel
*src0
,
615 const union tgsi_exec_channel
*src1
)
617 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
618 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
619 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
620 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
625 union tgsi_exec_channel
*dst
,
626 const union tgsi_exec_channel
*src0
,
627 const union tgsi_exec_channel
*src1
)
629 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
630 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
631 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
632 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
637 union tgsi_exec_channel
*dst
,
638 const union tgsi_exec_channel
*src0
,
639 const union tgsi_exec_channel
*src1
)
641 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
642 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
643 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
644 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
649 union tgsi_exec_channel
*dst
,
650 const union tgsi_exec_channel
*src0
,
651 const union tgsi_exec_channel
*src1
)
653 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
654 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
655 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
656 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
661 union tgsi_exec_channel
*dst
,
662 const union tgsi_exec_channel
*src0
,
663 const union tgsi_exec_channel
*src1
)
665 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
666 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
667 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
668 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
673 union tgsi_exec_channel
*dst
,
674 const union tgsi_exec_channel
*src0
,
675 const union tgsi_exec_channel
*src1
)
677 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
678 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
679 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
680 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
685 union tgsi_exec_channel
*dst0
,
686 union tgsi_exec_channel
*dst1
,
687 const union tgsi_exec_channel
*src0
,
688 const union tgsi_exec_channel
*src1
)
690 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
691 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
692 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
693 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
702 union tgsi_exec_channel
*dst0
,
703 union tgsi_exec_channel
*dst1
,
704 const union tgsi_exec_channel
*src0
,
705 const union tgsi_exec_channel
*src1
)
707 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
708 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
709 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
710 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
719 union tgsi_exec_channel
*dst
,
720 const union tgsi_exec_channel
*src0
,
721 const union tgsi_exec_channel
*src1
,
722 const union tgsi_exec_channel
*src2
)
724 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
725 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
726 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
727 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
732 union tgsi_exec_channel
*dst
,
733 const union tgsi_exec_channel
*src
)
735 dst
->f
[0] = -src
->f
[0];
736 dst
->f
[1] = -src
->f
[1];
737 dst
->f
[2] = -src
->f
[2];
738 dst
->f
[3] = -src
->f
[3];
743 union tgsi_exec_channel
*dst
,
744 const union tgsi_exec_channel
*src
)
746 dst
->i
[0] = -src
->i
[0];
747 dst
->i
[1] = -src
->i
[1];
748 dst
->i
[2] = -src
->i
[2];
749 dst
->i
[3] = -src
->i
[3];
754 union tgsi_exec_channel
*dst
,
755 const union tgsi_exec_channel
*src
)
757 dst
->u
[0] = ~src
->u
[0];
758 dst
->u
[1] = ~src
->u
[1];
759 dst
->u
[2] = ~src
->u
[2];
760 dst
->u
[3] = ~src
->u
[3];
765 union tgsi_exec_channel
*dst
,
766 const union tgsi_exec_channel
*src0
,
767 const union tgsi_exec_channel
*src1
)
769 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
770 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
771 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
772 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
777 union tgsi_exec_channel
*dst
,
778 const union tgsi_exec_channel
*src0
,
779 const union tgsi_exec_channel
*src1
)
781 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
782 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
783 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
784 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
789 union tgsi_exec_channel
*dst
,
790 const union tgsi_exec_channel
*src
)
792 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
793 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
794 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
795 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
800 union tgsi_exec_channel
*dst
,
801 const union tgsi_exec_channel
*src0
,
802 const union tgsi_exec_channel
*src1
)
804 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
805 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
806 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
807 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
812 union tgsi_exec_channel
*dst
,
813 const union tgsi_exec_channel
*src0
,
814 const union tgsi_exec_channel
*src1
)
816 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
817 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
818 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
819 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
824 union tgsi_exec_channel
*dst
,
825 const union tgsi_exec_channel
*src0
)
827 dst
->f
[0] = (float) (int) src0
->f
[0];
828 dst
->f
[1] = (float) (int) src0
->f
[1];
829 dst
->f
[2] = (float) (int) src0
->f
[2];
830 dst
->f
[3] = (float) (int) src0
->f
[3];
835 union tgsi_exec_channel
*dst
,
836 const union tgsi_exec_channel
*src0
,
837 const union tgsi_exec_channel
*src1
)
839 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
840 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
841 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
842 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
847 union tgsi_exec_channel
*dst
,
848 const union tgsi_exec_channel
*src
)
850 dst
->f
[0] = sinf( src
->f
[0] );
851 dst
->f
[1] = sinf( src
->f
[1] );
852 dst
->f
[2] = sinf( src
->f
[2] );
853 dst
->f
[3] = sinf( src
->f
[3] );
857 micro_sqrt( union tgsi_exec_channel
*dst
,
858 const union tgsi_exec_channel
*src
)
860 dst
->f
[0] = sqrtf( src
->f
[0] );
861 dst
->f
[1] = sqrtf( src
->f
[1] );
862 dst
->f
[2] = sqrtf( src
->f
[2] );
863 dst
->f
[3] = sqrtf( src
->f
[3] );
868 union tgsi_exec_channel
*dst
,
869 const union tgsi_exec_channel
*src0
,
870 const union tgsi_exec_channel
*src1
)
872 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
873 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
874 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
875 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
880 union tgsi_exec_channel
*dst
,
881 const union tgsi_exec_channel
*src
)
883 dst
->f
[0] = (float) src
->u
[0];
884 dst
->f
[1] = (float) src
->u
[1];
885 dst
->f
[2] = (float) src
->u
[2];
886 dst
->f
[3] = (float) src
->u
[3];
891 union tgsi_exec_channel
*dst
,
892 const union tgsi_exec_channel
*src0
,
893 const union tgsi_exec_channel
*src1
)
895 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
896 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
897 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
898 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
902 fetch_src_file_channel(
903 const struct tgsi_exec_machine
*mach
,
906 const union tgsi_exec_channel
*index
,
907 union tgsi_exec_channel
*chan
)
910 case TGSI_EXTSWIZZLE_X
:
911 case TGSI_EXTSWIZZLE_Y
:
912 case TGSI_EXTSWIZZLE_Z
:
913 case TGSI_EXTSWIZZLE_W
:
915 case TGSI_FILE_CONSTANT
:
916 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
917 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
918 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
919 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
922 case TGSI_FILE_INPUT
:
923 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
924 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
925 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
926 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
929 case TGSI_FILE_TEMPORARY
:
930 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
931 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
932 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
933 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
936 case TGSI_FILE_IMMEDIATE
:
937 assert( index
->i
[0] < (int) mach
->ImmLimit
);
938 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
939 assert( index
->i
[1] < (int) mach
->ImmLimit
);
940 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
941 assert( index
->i
[2] < (int) mach
->ImmLimit
);
942 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
943 assert( index
->i
[3] < (int) mach
->ImmLimit
);
944 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
947 case TGSI_FILE_ADDRESS
:
948 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
949 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
950 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
951 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
954 case TGSI_FILE_OUTPUT
:
955 /* vertex/fragment output vars can be read too */
956 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
957 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
958 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
959 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
967 case TGSI_EXTSWIZZLE_ZERO
:
968 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
971 case TGSI_EXTSWIZZLE_ONE
:
972 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
982 const struct tgsi_exec_machine
*mach
,
983 union tgsi_exec_channel
*chan
,
984 const struct tgsi_full_src_register
*reg
,
985 const uint chan_index
)
987 union tgsi_exec_channel index
;
993 index
.i
[3] = reg
->SrcRegister
.Index
;
995 if (reg
->SrcRegister
.Indirect
) {
996 union tgsi_exec_channel index2
;
997 union tgsi_exec_channel indir_index
;
1002 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1004 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1005 fetch_src_file_channel(
1007 reg
->SrcRegisterInd
.File
,
1012 index
.i
[0] += indir_index
.i
[0];
1013 index
.i
[1] += indir_index
.i
[1];
1014 index
.i
[2] += indir_index
.i
[2];
1015 index
.i
[3] += indir_index
.i
[3];
1018 if( reg
->SrcRegister
.Dimension
) {
1019 switch( reg
->SrcRegister
.File
) {
1020 case TGSI_FILE_INPUT
:
1026 case TGSI_FILE_CONSTANT
:
1036 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1037 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1038 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1039 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1041 if (reg
->SrcRegisterDim
.Indirect
) {
1042 union tgsi_exec_channel index2
;
1043 union tgsi_exec_channel indir_index
;
1048 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1050 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1051 fetch_src_file_channel(
1053 reg
->SrcRegisterDimInd
.File
,
1058 index
.i
[0] += indir_index
.i
[0];
1059 index
.i
[1] += indir_index
.i
[1];
1060 index
.i
[2] += indir_index
.i
[2];
1061 index
.i
[3] += indir_index
.i
[3];
1065 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1066 fetch_src_file_channel(
1068 reg
->SrcRegister
.File
,
1073 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1074 case TGSI_UTIL_SIGN_CLEAR
:
1075 micro_abs( chan
, chan
);
1078 case TGSI_UTIL_SIGN_SET
:
1079 micro_abs( chan
, chan
);
1080 micro_neg( chan
, chan
);
1083 case TGSI_UTIL_SIGN_TOGGLE
:
1084 micro_neg( chan
, chan
);
1087 case TGSI_UTIL_SIGN_KEEP
:
1091 if (reg
->SrcRegisterExtMod
.Complement
) {
1092 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1098 struct tgsi_exec_machine
*mach
,
1099 const union tgsi_exec_channel
*chan
,
1100 const struct tgsi_full_dst_register
*reg
,
1101 const struct tgsi_full_instruction
*inst
,
1104 union tgsi_exec_channel
*dst
;
1106 switch( reg
->DstRegister
.File
) {
1107 case TGSI_FILE_NULL
:
1110 case TGSI_FILE_OUTPUT
:
1111 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1112 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1115 case TGSI_FILE_TEMPORARY
:
1116 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1119 case TGSI_FILE_ADDRESS
:
1120 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1128 switch (inst
->Instruction
.Saturate
)
1131 if (mach
->ExecMask
& 0x1)
1132 dst
->i
[0] = chan
->i
[0];
1133 if (mach
->ExecMask
& 0x2)
1134 dst
->i
[1] = chan
->i
[1];
1135 if (mach
->ExecMask
& 0x4)
1136 dst
->i
[2] = chan
->i
[2];
1137 if (mach
->ExecMask
& 0x8)
1138 dst
->i
[3] = chan
->i
[3];
1141 case TGSI_SAT_ZERO_ONE
:
1142 /* XXX need to obey ExecMask here */
1143 micro_max(dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1144 micro_min(dst
, dst
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
1147 case TGSI_SAT_MINUS_PLUS_ONE
:
1156 #define FETCH(VAL,INDEX,CHAN)\
1157 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1159 #define STORE(VAL,INDEX,CHAN)\
1160 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1164 * Execute ARB-style KIL which is predicated by a src register.
1165 * Kill fragment if any of the four values is less than zero.
1168 exec_kilp(struct tgsi_exec_machine
*mach
,
1169 const struct tgsi_full_instruction
*inst
)
1173 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1174 union tgsi_exec_channel r
[1];
1176 /* This mask stores component bits that were already tested. Note that
1177 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1179 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1181 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1186 /* unswizzle channel */
1187 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1188 &inst
->FullSrcRegisters
[0],
1191 /* check if the component has not been already tested */
1192 if (uniquemask
& (1 << swizzle
))
1194 uniquemask
|= 1 << swizzle
;
1196 FETCH(&r
[0], 0, chan_index
);
1197 for (i
= 0; i
< 4; i
++)
1198 if (r
[0].f
[i
] < 0.0f
)
1202 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1207 * Fetch a texel using STR texture coordinates.
1210 fetch_texel( struct tgsi_sampler
*sampler
,
1211 const union tgsi_exec_channel
*s
,
1212 const union tgsi_exec_channel
*t
,
1213 const union tgsi_exec_channel
*p
,
1214 float lodbias
, /* XXX should be float[4] */
1215 union tgsi_exec_channel
*r
,
1216 union tgsi_exec_channel
*g
,
1217 union tgsi_exec_channel
*b
,
1218 union tgsi_exec_channel
*a
)
1221 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1223 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1225 for (j
= 0; j
< 4; j
++) {
1226 r
->f
[j
] = rgba
[0][j
];
1227 g
->f
[j
] = rgba
[1][j
];
1228 b
->f
[j
] = rgba
[2][j
];
1229 a
->f
[j
] = rgba
[3][j
];
1235 exec_tex(struct tgsi_exec_machine
*mach
,
1236 const struct tgsi_full_instruction
*inst
,
1240 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1241 union tgsi_exec_channel r
[8];
1245 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1247 switch (inst
->InstructionExtTexture
.Texture
) {
1248 case TGSI_TEXTURE_1D
:
1250 FETCH(&r
[0], 0, CHAN_X
);
1253 FETCH(&r
[1], 0, CHAN_W
);
1254 micro_div( &r
[0], &r
[0], &r
[1] );
1258 FETCH(&r
[1], 0, CHAN_W
);
1259 lodBias
= r
[2].f
[0];
1264 fetch_texel(&mach
->Samplers
[unit
],
1265 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1266 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1269 case TGSI_TEXTURE_2D
:
1270 case TGSI_TEXTURE_RECT
:
1272 FETCH(&r
[0], 0, CHAN_X
);
1273 FETCH(&r
[1], 0, CHAN_Y
);
1274 FETCH(&r
[2], 0, CHAN_Z
);
1277 FETCH(&r
[3], 0, CHAN_W
);
1278 micro_div( &r
[0], &r
[0], &r
[3] );
1279 micro_div( &r
[1], &r
[1], &r
[3] );
1280 micro_div( &r
[2], &r
[2], &r
[3] );
1284 FETCH(&r
[3], 0, CHAN_W
);
1285 lodBias
= r
[3].f
[0];
1290 fetch_texel(&mach
->Samplers
[unit
],
1291 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1292 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1295 case TGSI_TEXTURE_3D
:
1296 case TGSI_TEXTURE_CUBE
:
1298 FETCH(&r
[0], 0, CHAN_X
);
1299 FETCH(&r
[1], 0, CHAN_Y
);
1300 FETCH(&r
[2], 0, CHAN_Z
);
1303 FETCH(&r
[3], 0, CHAN_W
);
1304 micro_div( &r
[0], &r
[0], &r
[3] );
1305 micro_div( &r
[1], &r
[1], &r
[3] );
1306 micro_div( &r
[2], &r
[2], &r
[3] );
1310 FETCH(&r
[3], 0, CHAN_W
);
1311 lodBias
= r
[3].f
[0];
1316 fetch_texel(&mach
->Samplers
[unit
],
1317 &r
[0], &r
[1], &r
[2], lodBias
,
1318 &r
[0], &r
[1], &r
[2], &r
[3]);
1325 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1326 STORE( &r
[chan_index
], 0, chan_index
);
1332 * Evaluate a constant-valued coefficient at the position of the
1337 struct tgsi_exec_machine
*mach
,
1343 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1344 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1349 * Evaluate a linear-valued coefficient at the position of the
1354 struct tgsi_exec_machine
*mach
,
1358 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1359 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1360 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1361 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1362 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1363 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1364 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1365 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1366 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1370 * Evaluate a perspective-valued coefficient at the position of the
1374 eval_perspective_coef(
1375 struct tgsi_exec_machine
*mach
,
1379 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1380 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1381 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1382 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1383 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1384 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1385 /* divide by W here */
1386 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1387 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1388 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1389 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1393 typedef void (* eval_coef_func
)(
1394 struct tgsi_exec_machine
*mach
,
1400 struct tgsi_exec_machine
*mach
,
1401 const struct tgsi_full_declaration
*decl
)
1403 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1404 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1405 unsigned first
, last
, mask
;
1406 eval_coef_func eval
;
1408 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
1410 first
= decl
->u
.DeclarationRange
.First
;
1411 last
= decl
->u
.DeclarationRange
.Last
;
1412 mask
= decl
->Declaration
.UsageMask
;
1414 switch( decl
->Interpolation
.Interpolate
) {
1415 case TGSI_INTERPOLATE_CONSTANT
:
1416 eval
= eval_constant_coef
;
1419 case TGSI_INTERPOLATE_LINEAR
:
1420 eval
= eval_linear_coef
;
1423 case TGSI_INTERPOLATE_PERSPECTIVE
:
1424 eval
= eval_perspective_coef
;
1431 if( mask
== TGSI_WRITEMASK_XYZW
) {
1434 for( i
= first
; i
<= last
; i
++ ) {
1435 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1443 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1444 if( mask
& (1 << j
) ) {
1445 for( i
= first
; i
<= last
; i
++ ) {
1457 struct tgsi_exec_machine
*mach
,
1458 const struct tgsi_full_instruction
*inst
,
1462 union tgsi_exec_channel r
[8];
1466 switch (inst
->Instruction
.Opcode
) {
1467 case TGSI_OPCODE_ARL
:
1468 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1469 FETCH( &r
[0], 0, chan_index
);
1470 micro_f2it( &r
[0], &r
[0] );
1471 STORE( &r
[0], 0, chan_index
);
1475 case TGSI_OPCODE_MOV
:
1476 /* TGSI_OPCODE_SWZ */
1477 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1478 FETCH( &r
[0], 0, chan_index
);
1479 STORE( &r
[0], 0, chan_index
);
1483 case TGSI_OPCODE_LIT
:
1484 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1485 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1488 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1489 FETCH( &r
[0], 0, CHAN_X
);
1490 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1491 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1492 STORE( &r
[0], 0, CHAN_Y
);
1495 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1496 FETCH( &r
[1], 0, CHAN_Y
);
1497 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1499 FETCH( &r
[2], 0, CHAN_W
);
1500 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1501 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1502 micro_pow( &r
[1], &r
[1], &r
[2] );
1503 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1504 STORE( &r
[0], 0, CHAN_Z
);
1508 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1509 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1513 case TGSI_OPCODE_RCP
:
1514 /* TGSI_OPCODE_RECIP */
1515 FETCH( &r
[0], 0, CHAN_X
);
1516 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1517 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1518 STORE( &r
[0], 0, chan_index
);
1522 case TGSI_OPCODE_RSQ
:
1523 /* TGSI_OPCODE_RECIPSQRT */
1524 FETCH( &r
[0], 0, CHAN_X
);
1525 micro_sqrt( &r
[0], &r
[0] );
1526 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1527 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1528 STORE( &r
[0], 0, chan_index
);
1532 case TGSI_OPCODE_EXP
:
1533 FETCH( &r
[0], 0, CHAN_X
);
1534 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
1535 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1536 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
1537 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
1539 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1540 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
1541 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
1543 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1544 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
1545 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
1547 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1548 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1552 case TGSI_OPCODE_LOG
:
1553 FETCH( &r
[0], 0, CHAN_X
);
1554 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
1555 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
1556 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
1557 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1558 STORE( &r
[0], 0, CHAN_X
);
1560 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1561 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
1562 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
1563 STORE( &r
[0], 0, CHAN_Y
);
1565 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1566 STORE( &r
[1], 0, CHAN_Z
);
1568 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1569 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1573 case TGSI_OPCODE_MUL
:
1574 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1576 FETCH(&r
[0], 0, chan_index
);
1577 FETCH(&r
[1], 1, chan_index
);
1579 micro_mul( &r
[0], &r
[0], &r
[1] );
1581 STORE(&r
[0], 0, chan_index
);
1585 case TGSI_OPCODE_ADD
:
1586 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1587 FETCH( &r
[0], 0, chan_index
);
1588 FETCH( &r
[1], 1, chan_index
);
1589 micro_add( &r
[0], &r
[0], &r
[1] );
1590 STORE( &r
[0], 0, chan_index
);
1594 case TGSI_OPCODE_DP3
:
1595 /* TGSI_OPCODE_DOT3 */
1596 FETCH( &r
[0], 0, CHAN_X
);
1597 FETCH( &r
[1], 1, CHAN_X
);
1598 micro_mul( &r
[0], &r
[0], &r
[1] );
1600 FETCH( &r
[1], 0, CHAN_Y
);
1601 FETCH( &r
[2], 1, CHAN_Y
);
1602 micro_mul( &r
[1], &r
[1], &r
[2] );
1603 micro_add( &r
[0], &r
[0], &r
[1] );
1605 FETCH( &r
[1], 0, CHAN_Z
);
1606 FETCH( &r
[2], 1, CHAN_Z
);
1607 micro_mul( &r
[1], &r
[1], &r
[2] );
1608 micro_add( &r
[0], &r
[0], &r
[1] );
1610 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1611 STORE( &r
[0], 0, chan_index
);
1615 case TGSI_OPCODE_DP4
:
1616 /* TGSI_OPCODE_DOT4 */
1617 FETCH(&r
[0], 0, CHAN_X
);
1618 FETCH(&r
[1], 1, CHAN_X
);
1620 micro_mul( &r
[0], &r
[0], &r
[1] );
1622 FETCH(&r
[1], 0, CHAN_Y
);
1623 FETCH(&r
[2], 1, CHAN_Y
);
1625 micro_mul( &r
[1], &r
[1], &r
[2] );
1626 micro_add( &r
[0], &r
[0], &r
[1] );
1628 FETCH(&r
[1], 0, CHAN_Z
);
1629 FETCH(&r
[2], 1, CHAN_Z
);
1631 micro_mul( &r
[1], &r
[1], &r
[2] );
1632 micro_add( &r
[0], &r
[0], &r
[1] );
1634 FETCH(&r
[1], 0, CHAN_W
);
1635 FETCH(&r
[2], 1, CHAN_W
);
1637 micro_mul( &r
[1], &r
[1], &r
[2] );
1638 micro_add( &r
[0], &r
[0], &r
[1] );
1640 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1641 STORE( &r
[0], 0, chan_index
);
1645 case TGSI_OPCODE_DST
:
1646 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1647 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1650 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1651 FETCH( &r
[0], 0, CHAN_Y
);
1652 FETCH( &r
[1], 1, CHAN_Y
);
1653 micro_mul( &r
[0], &r
[0], &r
[1] );
1654 STORE( &r
[0], 0, CHAN_Y
);
1657 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1658 FETCH( &r
[0], 0, CHAN_Z
);
1659 STORE( &r
[0], 0, CHAN_Z
);
1662 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1663 FETCH( &r
[0], 1, CHAN_W
);
1664 STORE( &r
[0], 0, CHAN_W
);
1668 case TGSI_OPCODE_MIN
:
1669 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1670 FETCH(&r
[0], 0, chan_index
);
1671 FETCH(&r
[1], 1, chan_index
);
1673 /* XXX use micro_min()?? */
1674 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1676 STORE(&r
[0], 0, chan_index
);
1680 case TGSI_OPCODE_MAX
:
1681 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1682 FETCH(&r
[0], 0, chan_index
);
1683 FETCH(&r
[1], 1, chan_index
);
1685 /* XXX use micro_max()?? */
1686 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1688 STORE(&r
[0], 0, chan_index
);
1692 case TGSI_OPCODE_SLT
:
1693 /* TGSI_OPCODE_SETLT */
1694 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1695 FETCH( &r
[0], 0, chan_index
);
1696 FETCH( &r
[1], 1, chan_index
);
1697 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1698 STORE( &r
[0], 0, chan_index
);
1702 case TGSI_OPCODE_SGE
:
1703 /* TGSI_OPCODE_SETGE */
1704 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1705 FETCH( &r
[0], 0, chan_index
);
1706 FETCH( &r
[1], 1, chan_index
);
1707 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1708 STORE( &r
[0], 0, chan_index
);
1712 case TGSI_OPCODE_MAD
:
1713 /* TGSI_OPCODE_MADD */
1714 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1715 FETCH( &r
[0], 0, chan_index
);
1716 FETCH( &r
[1], 1, chan_index
);
1717 micro_mul( &r
[0], &r
[0], &r
[1] );
1718 FETCH( &r
[1], 2, chan_index
);
1719 micro_add( &r
[0], &r
[0], &r
[1] );
1720 STORE( &r
[0], 0, chan_index
);
1724 case TGSI_OPCODE_SUB
:
1725 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1726 FETCH(&r
[0], 0, chan_index
);
1727 FETCH(&r
[1], 1, chan_index
);
1729 micro_sub( &r
[0], &r
[0], &r
[1] );
1731 STORE(&r
[0], 0, chan_index
);
1735 case TGSI_OPCODE_LERP
:
1736 /* TGSI_OPCODE_LRP */
1737 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1738 FETCH(&r
[0], 0, chan_index
);
1739 FETCH(&r
[1], 1, chan_index
);
1740 FETCH(&r
[2], 2, chan_index
);
1742 micro_sub( &r
[1], &r
[1], &r
[2] );
1743 micro_mul( &r
[0], &r
[0], &r
[1] );
1744 micro_add( &r
[0], &r
[0], &r
[2] );
1746 STORE(&r
[0], 0, chan_index
);
1750 case TGSI_OPCODE_CND
:
1754 case TGSI_OPCODE_CND0
:
1758 case TGSI_OPCODE_DOT2ADD
:
1759 /* TGSI_OPCODE_DP2A */
1763 case TGSI_OPCODE_INDEX
:
1767 case TGSI_OPCODE_NEGATE
:
1771 case TGSI_OPCODE_FRAC
:
1772 /* TGSI_OPCODE_FRC */
1773 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1774 FETCH( &r
[0], 0, chan_index
);
1775 micro_frc( &r
[0], &r
[0] );
1776 STORE( &r
[0], 0, chan_index
);
1780 case TGSI_OPCODE_CLAMP
:
1784 case TGSI_OPCODE_FLOOR
:
1785 /* TGSI_OPCODE_FLR */
1786 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1787 FETCH( &r
[0], 0, chan_index
);
1788 micro_flr( &r
[0], &r
[0] );
1789 STORE( &r
[0], 0, chan_index
);
1793 case TGSI_OPCODE_ROUND
:
1794 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1795 FETCH( &r
[0], 0, chan_index
);
1796 micro_rnd( &r
[0], &r
[0] );
1797 STORE( &r
[0], 0, chan_index
);
1801 case TGSI_OPCODE_EXPBASE2
:
1802 /* TGSI_OPCODE_EX2 */
1803 FETCH(&r
[0], 0, CHAN_X
);
1805 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1807 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1808 STORE( &r
[0], 0, chan_index
);
1812 case TGSI_OPCODE_LOGBASE2
:
1813 /* TGSI_OPCODE_LG2 */
1814 FETCH( &r
[0], 0, CHAN_X
);
1815 micro_lg2( &r
[0], &r
[0] );
1816 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1817 STORE( &r
[0], 0, chan_index
);
1821 case TGSI_OPCODE_POWER
:
1822 /* TGSI_OPCODE_POW */
1823 FETCH(&r
[0], 0, CHAN_X
);
1824 FETCH(&r
[1], 1, CHAN_X
);
1826 micro_pow( &r
[0], &r
[0], &r
[1] );
1828 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1829 STORE( &r
[0], 0, chan_index
);
1833 case TGSI_OPCODE_CROSSPRODUCT
:
1834 /* TGSI_OPCODE_XPD */
1835 FETCH(&r
[0], 0, CHAN_Y
);
1836 FETCH(&r
[1], 1, CHAN_Z
);
1838 micro_mul( &r
[2], &r
[0], &r
[1] );
1840 FETCH(&r
[3], 0, CHAN_Z
);
1841 FETCH(&r
[4], 1, CHAN_Y
);
1843 micro_mul( &r
[5], &r
[3], &r
[4] );
1844 micro_sub( &r
[2], &r
[2], &r
[5] );
1846 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1847 STORE( &r
[2], 0, CHAN_X
);
1850 FETCH(&r
[2], 1, CHAN_X
);
1852 micro_mul( &r
[3], &r
[3], &r
[2] );
1854 FETCH(&r
[5], 0, CHAN_X
);
1856 micro_mul( &r
[1], &r
[1], &r
[5] );
1857 micro_sub( &r
[3], &r
[3], &r
[1] );
1859 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1860 STORE( &r
[3], 0, CHAN_Y
);
1863 micro_mul( &r
[5], &r
[5], &r
[4] );
1864 micro_mul( &r
[0], &r
[0], &r
[2] );
1865 micro_sub( &r
[5], &r
[5], &r
[0] );
1867 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1868 STORE( &r
[5], 0, CHAN_Z
);
1871 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1872 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1876 case TGSI_OPCODE_MULTIPLYMATRIX
:
1880 case TGSI_OPCODE_ABS
:
1881 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1882 FETCH(&r
[0], 0, chan_index
);
1884 micro_abs( &r
[0], &r
[0] );
1886 STORE(&r
[0], 0, chan_index
);
1890 case TGSI_OPCODE_RCC
:
1894 case TGSI_OPCODE_DPH
:
1895 FETCH(&r
[0], 0, CHAN_X
);
1896 FETCH(&r
[1], 1, CHAN_X
);
1898 micro_mul( &r
[0], &r
[0], &r
[1] );
1900 FETCH(&r
[1], 0, CHAN_Y
);
1901 FETCH(&r
[2], 1, CHAN_Y
);
1903 micro_mul( &r
[1], &r
[1], &r
[2] );
1904 micro_add( &r
[0], &r
[0], &r
[1] );
1906 FETCH(&r
[1], 0, CHAN_Z
);
1907 FETCH(&r
[2], 1, CHAN_Z
);
1909 micro_mul( &r
[1], &r
[1], &r
[2] );
1910 micro_add( &r
[0], &r
[0], &r
[1] );
1912 FETCH(&r
[1], 1, CHAN_W
);
1914 micro_add( &r
[0], &r
[0], &r
[1] );
1916 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1917 STORE( &r
[0], 0, chan_index
);
1921 case TGSI_OPCODE_COS
:
1922 FETCH(&r
[0], 0, CHAN_X
);
1924 micro_cos( &r
[0], &r
[0] );
1926 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1927 STORE( &r
[0], 0, chan_index
);
1931 case TGSI_OPCODE_DDX
:
1932 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1933 FETCH( &r
[0], 0, chan_index
);
1934 micro_ddx( &r
[0], &r
[0] );
1935 STORE( &r
[0], 0, chan_index
);
1939 case TGSI_OPCODE_DDY
:
1940 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1941 FETCH( &r
[0], 0, chan_index
);
1942 micro_ddy( &r
[0], &r
[0] );
1943 STORE( &r
[0], 0, chan_index
);
1947 case TGSI_OPCODE_KILP
:
1948 exec_kilp (mach
, inst
);
1951 case TGSI_OPCODE_KIL
:
1952 /* for enabled ExecMask bits, set the killed bit */
1953 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= mach
->ExecMask
;
1956 case TGSI_OPCODE_PK2H
:
1960 case TGSI_OPCODE_PK2US
:
1964 case TGSI_OPCODE_PK4B
:
1968 case TGSI_OPCODE_PK4UB
:
1972 case TGSI_OPCODE_RFL
:
1976 case TGSI_OPCODE_SEQ
:
1977 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1978 FETCH( &r
[0], 0, chan_index
);
1979 FETCH( &r
[1], 1, chan_index
);
1980 micro_eq( &r
[0], &r
[0], &r
[1],
1981 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
1982 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1983 STORE( &r
[0], 0, chan_index
);
1987 case TGSI_OPCODE_SFL
:
1991 case TGSI_OPCODE_SGT
:
1992 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1993 FETCH( &r
[0], 0, chan_index
);
1994 FETCH( &r
[1], 1, chan_index
);
1995 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1996 STORE( &r
[0], 0, chan_index
);
2000 case TGSI_OPCODE_SIN
:
2001 FETCH( &r
[0], 0, CHAN_X
);
2002 micro_sin( &r
[0], &r
[0] );
2003 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2004 STORE( &r
[0], 0, chan_index
);
2008 case TGSI_OPCODE_SLE
:
2009 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2010 FETCH( &r
[0], 0, chan_index
);
2011 FETCH( &r
[1], 1, chan_index
);
2012 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2013 STORE( &r
[0], 0, chan_index
);
2017 case TGSI_OPCODE_SNE
:
2018 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2019 FETCH( &r
[0], 0, chan_index
);
2020 FETCH( &r
[1], 1, chan_index
);
2021 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2022 STORE( &r
[0], 0, chan_index
);
2026 case TGSI_OPCODE_STR
:
2030 case TGSI_OPCODE_TEX
:
2031 /* simple texture lookup */
2032 /* src[0] = texcoord */
2033 /* src[1] = sampler unit */
2034 exec_tex(mach
, inst
, FALSE
, FALSE
);
2037 case TGSI_OPCODE_TXB
:
2038 /* Texture lookup with lod bias */
2039 /* src[0] = texcoord (src[0].w = LOD bias) */
2040 /* src[1] = sampler unit */
2041 exec_tex(mach
, inst
, TRUE
, FALSE
);
2044 case TGSI_OPCODE_TXD
:
2045 /* Texture lookup with explict partial derivatives */
2046 /* src[0] = texcoord */
2047 /* src[1] = d[strq]/dx */
2048 /* src[2] = d[strq]/dy */
2049 /* src[3] = sampler unit */
2053 case TGSI_OPCODE_TXL
:
2054 /* Texture lookup with explit LOD */
2055 /* src[0] = texcoord (src[0].w = LOD) */
2056 /* src[1] = sampler unit */
2057 exec_tex(mach
, inst
, TRUE
, FALSE
);
2060 case TGSI_OPCODE_TXP
:
2061 /* Texture lookup with projection */
2062 /* src[0] = texcoord (src[0].w = projection) */
2063 /* src[1] = sampler unit */
2064 exec_tex(mach
, inst
, FALSE
, TRUE
);
2067 case TGSI_OPCODE_UP2H
:
2071 case TGSI_OPCODE_UP2US
:
2075 case TGSI_OPCODE_UP4B
:
2079 case TGSI_OPCODE_UP4UB
:
2083 case TGSI_OPCODE_X2D
:
2087 case TGSI_OPCODE_ARA
:
2091 case TGSI_OPCODE_ARR
:
2095 case TGSI_OPCODE_BRA
:
2099 case TGSI_OPCODE_CAL
:
2100 /* skip the call if no execution channels are enabled */
2101 if (mach
->ExecMask
) {
2104 /* push the Cond, Loop, Cont stacks */
2105 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2106 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2107 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2108 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2109 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2110 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2112 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2113 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2115 /* note that PC was already incremented above */
2116 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2117 *pc
= inst
->InstructionExtLabel
.Label
;
2121 case TGSI_OPCODE_RET
:
2122 mach
->FuncMask
&= ~mach
->ExecMask
;
2123 UPDATE_EXEC_MASK(mach
);
2125 if (mach
->ExecMask
== 0x0) {
2126 /* really return now (otherwise, keep executing */
2128 if (mach
->CallStackTop
== 0) {
2129 /* returning from main() */
2133 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2135 /* pop the Cond, Loop, Cont stacks */
2136 assert(mach
->CondStackTop
> 0);
2137 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2138 assert(mach
->LoopStackTop
> 0);
2139 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2140 assert(mach
->ContStackTop
> 0);
2141 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2142 assert(mach
->FuncStackTop
> 0);
2143 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2145 UPDATE_EXEC_MASK(mach
);
2149 case TGSI_OPCODE_SSG
:
2153 case TGSI_OPCODE_CMP
:
2154 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2155 FETCH(&r
[0], 0, chan_index
);
2156 FETCH(&r
[1], 1, chan_index
);
2157 FETCH(&r
[2], 2, chan_index
);
2159 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2161 STORE(&r
[0], 0, chan_index
);
2165 case TGSI_OPCODE_SCS
:
2166 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2167 FETCH( &r
[0], 0, CHAN_X
);
2169 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2170 micro_cos( &r
[1], &r
[0] );
2171 STORE( &r
[1], 0, CHAN_X
);
2173 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2174 micro_sin( &r
[1], &r
[0] );
2175 STORE( &r
[1], 0, CHAN_Y
);
2177 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2178 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2180 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2181 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2185 case TGSI_OPCODE_NRM
:
2189 case TGSI_OPCODE_DIV
:
2193 case TGSI_OPCODE_DP2
:
2194 FETCH( &r
[0], 0, CHAN_X
);
2195 FETCH( &r
[1], 1, CHAN_X
);
2196 micro_mul( &r
[0], &r
[0], &r
[1] );
2198 FETCH( &r
[1], 0, CHAN_Y
);
2199 FETCH( &r
[2], 1, CHAN_Y
);
2200 micro_mul( &r
[1], &r
[1], &r
[2] );
2201 micro_add( &r
[0], &r
[0], &r
[1] );
2203 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2204 STORE( &r
[0], 0, chan_index
);
2208 case TGSI_OPCODE_IF
:
2210 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2211 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2212 FETCH( &r
[0], 0, CHAN_X
);
2213 /* update CondMask */
2215 mach
->CondMask
&= ~0x1;
2218 mach
->CondMask
&= ~0x2;
2221 mach
->CondMask
&= ~0x4;
2224 mach
->CondMask
&= ~0x8;
2226 UPDATE_EXEC_MASK(mach
);
2227 /* Todo: If CondMask==0, jump to ELSE */
2230 case TGSI_OPCODE_ELSE
:
2231 /* invert CondMask wrt previous mask */
2234 assert(mach
->CondStackTop
> 0);
2235 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2236 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2237 UPDATE_EXEC_MASK(mach
);
2238 /* Todo: If CondMask==0, jump to ENDIF */
2242 case TGSI_OPCODE_ENDIF
:
2244 assert(mach
->CondStackTop
> 0);
2245 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2246 UPDATE_EXEC_MASK(mach
);
2249 case TGSI_OPCODE_END
:
2250 /* halt execution */
2254 case TGSI_OPCODE_REP
:
2258 case TGSI_OPCODE_ENDREP
:
2262 case TGSI_OPCODE_PUSHA
:
2266 case TGSI_OPCODE_POPA
:
2270 case TGSI_OPCODE_CEIL
:
2271 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2272 FETCH( &r
[0], 0, chan_index
);
2273 micro_ceil( &r
[0], &r
[0] );
2274 STORE( &r
[0], 0, chan_index
);
2278 case TGSI_OPCODE_I2F
:
2279 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2280 FETCH( &r
[0], 0, chan_index
);
2281 micro_i2f( &r
[0], &r
[0] );
2282 STORE( &r
[0], 0, chan_index
);
2286 case TGSI_OPCODE_NOT
:
2287 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2288 FETCH( &r
[0], 0, chan_index
);
2289 micro_not( &r
[0], &r
[0] );
2290 STORE( &r
[0], 0, chan_index
);
2294 case TGSI_OPCODE_TRUNC
:
2295 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2296 FETCH( &r
[0], 0, chan_index
);
2297 micro_trunc( &r
[0], &r
[0] );
2298 STORE( &r
[0], 0, chan_index
);
2302 case TGSI_OPCODE_SHL
:
2303 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2304 FETCH( &r
[0], 0, chan_index
);
2305 FETCH( &r
[1], 1, chan_index
);
2306 micro_shl( &r
[0], &r
[0], &r
[1] );
2307 STORE( &r
[0], 0, chan_index
);
2311 case TGSI_OPCODE_SHR
:
2312 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2313 FETCH( &r
[0], 0, chan_index
);
2314 FETCH( &r
[1], 1, chan_index
);
2315 micro_ishr( &r
[0], &r
[0], &r
[1] );
2316 STORE( &r
[0], 0, chan_index
);
2320 case TGSI_OPCODE_AND
:
2321 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2322 FETCH( &r
[0], 0, chan_index
);
2323 FETCH( &r
[1], 1, chan_index
);
2324 micro_and( &r
[0], &r
[0], &r
[1] );
2325 STORE( &r
[0], 0, chan_index
);
2329 case TGSI_OPCODE_OR
:
2330 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2331 FETCH( &r
[0], 0, chan_index
);
2332 FETCH( &r
[1], 1, chan_index
);
2333 micro_or( &r
[0], &r
[0], &r
[1] );
2334 STORE( &r
[0], 0, chan_index
);
2338 case TGSI_OPCODE_MOD
:
2342 case TGSI_OPCODE_XOR
:
2343 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2344 FETCH( &r
[0], 0, chan_index
);
2345 FETCH( &r
[1], 1, chan_index
);
2346 micro_xor( &r
[0], &r
[0], &r
[1] );
2347 STORE( &r
[0], 0, chan_index
);
2351 case TGSI_OPCODE_SAD
:
2355 case TGSI_OPCODE_TXF
:
2359 case TGSI_OPCODE_TXQ
:
2363 case TGSI_OPCODE_EMIT
:
2364 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2365 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2368 case TGSI_OPCODE_ENDPRIM
:
2369 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2370 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2373 case TGSI_OPCODE_LOOP
:
2374 /* fall-through (for now) */
2375 case TGSI_OPCODE_BGNLOOP2
:
2376 /* push LoopMask and ContMasks */
2377 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2378 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2379 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2380 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2383 case TGSI_OPCODE_ENDLOOP
:
2384 /* fall-through (for now at least) */
2385 case TGSI_OPCODE_ENDLOOP2
:
2386 /* Restore ContMask, but don't pop */
2387 assert(mach
->ContStackTop
> 0);
2388 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2389 if (mach
->LoopMask
) {
2390 /* repeat loop: jump to instruction just past BGNLOOP */
2391 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2394 /* exit loop: pop LoopMask */
2395 assert(mach
->LoopStackTop
> 0);
2396 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2398 assert(mach
->ContStackTop
> 0);
2399 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2401 UPDATE_EXEC_MASK(mach
);
2404 case TGSI_OPCODE_BRK
:
2405 /* turn off loop channels for each enabled exec channel */
2406 mach
->LoopMask
&= ~mach
->ExecMask
;
2407 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2408 UPDATE_EXEC_MASK(mach
);
2411 case TGSI_OPCODE_CONT
:
2412 /* turn off cont channels for each enabled exec channel */
2413 mach
->ContMask
&= ~mach
->ExecMask
;
2414 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2415 UPDATE_EXEC_MASK(mach
);
2418 case TGSI_OPCODE_BGNSUB
:
2422 case TGSI_OPCODE_ENDSUB
:
2426 case TGSI_OPCODE_NOISE1
:
2430 case TGSI_OPCODE_NOISE2
:
2434 case TGSI_OPCODE_NOISE3
:
2438 case TGSI_OPCODE_NOISE4
:
2442 case TGSI_OPCODE_NOP
:
2452 * Run TGSI interpreter.
2453 * \return bitmask of "alive" quad components
2456 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2461 mach
->CondMask
= 0xf;
2462 mach
->LoopMask
= 0xf;
2463 mach
->ContMask
= 0xf;
2464 mach
->FuncMask
= 0xf;
2465 mach
->ExecMask
= 0xf;
2467 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2468 assert(mach
->CondStackTop
== 0);
2469 assert(mach
->LoopStackTop
== 0);
2470 assert(mach
->ContStackTop
== 0);
2471 assert(mach
->CallStackTop
== 0);
2473 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2474 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2476 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2477 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2478 mach
->Primitives
[0] = 0;
2482 /* execute declarations (interpolants) */
2483 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2484 exec_declaration( mach
, mach
->Declarations
+i
);
2487 /* execute instructions, until pc is set to -1 */
2489 assert(pc
< (int) mach
->NumInstructions
);
2490 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2494 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2495 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2497 * Scale back depth component.
2499 for (i
= 0; i
< 4; i
++)
2500 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2504 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];