1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpretor/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/util/tgsi_parse.h"
58 #include "tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine
*mach
,
131 const struct tgsi_token
*tokens
,
133 struct tgsi_sampler
*samplers
)
136 struct tgsi_parse_context parse
;
137 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
138 struct tgsi_full_instruction
*instructions
;
139 struct tgsi_full_declaration
*declarations
;
140 uint maxInstructions
= 10, numInstructions
= 0;
141 uint maxDeclarations
= 10, numDeclarations
= 0;
145 tgsi_dump(tokens
, 0);
148 mach
->Tokens
= tokens
;
149 mach
->Samplers
= samplers
;
151 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
152 if (k
!= TGSI_PARSE_OK
) {
153 debug_printf( "Problem parsing!\n" );
157 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
161 declarations
= (struct tgsi_full_declaration
*)
162 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
164 instructions
= (struct tgsi_full_instruction
*)
165 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
168 while( !tgsi_parse_end_of_tokens( &parse
) ) {
169 uint pointer
= parse
.Position
;
172 tgsi_parse_token( &parse
);
173 switch( parse
.FullToken
.Token
.Type
) {
174 case TGSI_TOKEN_TYPE_DECLARATION
:
175 /* save expanded declaration */
176 if (numDeclarations
== maxDeclarations
) {
177 declarations
= REALLOC(declarations
,
179 * sizeof(struct tgsi_full_declaration
),
180 (maxDeclarations
+ 10)
181 * sizeof(struct tgsi_full_declaration
));
182 maxDeclarations
+= 10;
184 memcpy(declarations
+ numDeclarations
,
185 &parse
.FullToken
.FullDeclaration
,
186 sizeof(declarations
[0]));
190 case TGSI_TOKEN_TYPE_IMMEDIATE
:
192 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
193 assert( size
% 4 == 0 );
194 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
196 for( i
= 0; i
< size
; i
++ ) {
197 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
198 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
200 mach
->ImmLimit
+= size
/ 4;
204 case TGSI_TOKEN_TYPE_INSTRUCTION
:
205 assert( labels
->count
< 128 );
207 labels
->labels
[labels
->count
][0] = instno
;
208 labels
->labels
[labels
->count
][1] = pointer
;
211 /* save expanded instruction */
212 if (numInstructions
== maxInstructions
) {
213 instructions
= REALLOC(instructions
,
215 * sizeof(struct tgsi_full_instruction
),
216 (maxInstructions
+ 10)
217 * sizeof(struct tgsi_full_instruction
));
218 maxInstructions
+= 10;
220 memcpy(instructions
+ numInstructions
,
221 &parse
.FullToken
.FullInstruction
,
222 sizeof(instructions
[0]));
230 tgsi_parse_free (&parse
);
232 if (mach
->Declarations
) {
233 FREE( mach
->Declarations
);
235 mach
->Declarations
= declarations
;
236 mach
->NumDeclarations
= numDeclarations
;
238 if (mach
->Instructions
) {
239 FREE( mach
->Instructions
);
241 mach
->Instructions
= instructions
;
242 mach
->NumInstructions
= numInstructions
;
247 tgsi_exec_machine_init(
248 struct tgsi_exec_machine
*mach
)
252 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
253 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_NUM_TEMPS
];
255 /* Setup constants. */
256 for( i
= 0; i
< 4; i
++ ) {
257 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
258 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
259 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
260 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
261 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
262 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
263 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
264 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
270 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
272 if (mach
->Instructions
) {
273 FREE(mach
->Instructions
);
274 mach
->Instructions
= NULL
;
275 mach
->NumInstructions
= 0;
277 if (mach
->Declarations
) {
278 FREE(mach
->Declarations
);
279 mach
->Declarations
= NULL
;
280 mach
->NumDeclarations
= 0;
287 union tgsi_exec_channel
*dst
,
288 const union tgsi_exec_channel
*src
)
290 dst
->f
[0] = (float) fabs( (double) src
->f
[0] );
291 dst
->f
[1] = (float) fabs( (double) src
->f
[1] );
292 dst
->f
[2] = (float) fabs( (double) src
->f
[2] );
293 dst
->f
[3] = (float) fabs( (double) src
->f
[3] );
298 union tgsi_exec_channel
*dst
,
299 const union tgsi_exec_channel
*src0
,
300 const union tgsi_exec_channel
*src1
)
302 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
303 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
304 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
305 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
310 union tgsi_exec_channel
*dst
,
311 const union tgsi_exec_channel
*src0
,
312 const union tgsi_exec_channel
*src1
)
314 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
315 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
316 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
317 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
322 union tgsi_exec_channel
*dst
,
323 const union tgsi_exec_channel
*src0
,
324 const union tgsi_exec_channel
*src1
)
326 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
327 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
328 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
329 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
334 union tgsi_exec_channel
*dst
,
335 const union tgsi_exec_channel
*src
)
337 dst
->f
[0] = (float) ceil( (double) src
->f
[0] );
338 dst
->f
[1] = (float) ceil( (double) src
->f
[1] );
339 dst
->f
[2] = (float) ceil( (double) src
->f
[2] );
340 dst
->f
[3] = (float) ceil( (double) src
->f
[3] );
345 union tgsi_exec_channel
*dst
,
346 const union tgsi_exec_channel
*src
)
348 dst
->f
[0] = (float) cos( (double) src
->f
[0] );
349 dst
->f
[1] = (float) cos( (double) src
->f
[1] );
350 dst
->f
[2] = (float) cos( (double) src
->f
[2] );
351 dst
->f
[3] = (float) cos( (double) src
->f
[3] );
356 union tgsi_exec_channel
*dst
,
357 const union tgsi_exec_channel
*src
)
362 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
367 union tgsi_exec_channel
*dst
,
368 const union tgsi_exec_channel
*src
)
373 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
378 union tgsi_exec_channel
*dst
,
379 const union tgsi_exec_channel
*src0
,
380 const union tgsi_exec_channel
*src1
)
382 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
383 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
384 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
385 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
390 union tgsi_exec_channel
*dst
,
391 const union tgsi_exec_channel
*src0
,
392 const union tgsi_exec_channel
*src1
)
394 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
395 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
396 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
397 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
402 union tgsi_exec_channel
*dst
,
403 const union tgsi_exec_channel
*src0
,
404 const union tgsi_exec_channel
*src1
,
405 const union tgsi_exec_channel
*src2
,
406 const union tgsi_exec_channel
*src3
)
408 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
409 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
410 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
411 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
416 union tgsi_exec_channel
*dst
,
417 const union tgsi_exec_channel
*src0
,
418 const union tgsi_exec_channel
*src1
,
419 const union tgsi_exec_channel
*src2
,
420 const union tgsi_exec_channel
*src3
)
422 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
423 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
424 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
425 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
430 union tgsi_exec_channel
*dst
,
431 const union tgsi_exec_channel
*src
)
433 dst
->f
[0] = (float) pow( 2.0, (double) src
->f
[0] );
434 dst
->f
[1] = (float) pow( 2.0, (double) src
->f
[1] );
435 dst
->f
[2] = (float) pow( 2.0, (double) src
->f
[2] );
436 dst
->f
[3] = (float) pow( 2.0, (double) src
->f
[3] );
441 union tgsi_exec_channel
*dst
,
442 const union tgsi_exec_channel
*src
)
444 dst
->i
[0] = (int) src
->f
[0];
445 dst
->i
[1] = (int) src
->f
[1];
446 dst
->i
[2] = (int) src
->f
[2];
447 dst
->i
[3] = (int) src
->f
[3];
452 union tgsi_exec_channel
*dst
,
453 const union tgsi_exec_channel
*src
)
455 dst
->u
[0] = (uint
) src
->f
[0];
456 dst
->u
[1] = (uint
) src
->f
[1];
457 dst
->u
[2] = (uint
) src
->f
[2];
458 dst
->u
[3] = (uint
) src
->f
[3];
463 union tgsi_exec_channel
*dst
,
464 const union tgsi_exec_channel
*src
)
466 dst
->f
[0] = (float) floor( (double) src
->f
[0] );
467 dst
->f
[1] = (float) floor( (double) src
->f
[1] );
468 dst
->f
[2] = (float) floor( (double) src
->f
[2] );
469 dst
->f
[3] = (float) floor( (double) src
->f
[3] );
474 union tgsi_exec_channel
*dst
,
475 const union tgsi_exec_channel
*src
)
477 dst
->f
[0] = src
->f
[0] - (float) floor( (double) src
->f
[0] );
478 dst
->f
[1] = src
->f
[1] - (float) floor( (double) src
->f
[1] );
479 dst
->f
[2] = src
->f
[2] - (float) floor( (double) src
->f
[2] );
480 dst
->f
[3] = src
->f
[3] - (float) floor( (double) src
->f
[3] );
485 union tgsi_exec_channel
*dst
,
486 const union tgsi_exec_channel
*src0
,
487 const union tgsi_exec_channel
*src1
,
488 const union tgsi_exec_channel
*src2
,
489 const union tgsi_exec_channel
*src3
)
491 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
492 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
493 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
494 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
499 union tgsi_exec_channel
*dst
,
500 const union tgsi_exec_channel
*src
)
502 dst
->f
[0] = (float) src
->i
[0];
503 dst
->f
[1] = (float) src
->i
[1];
504 dst
->f
[2] = (float) src
->i
[2];
505 dst
->f
[3] = (float) src
->i
[3];
510 union tgsi_exec_channel
*dst
,
511 const union tgsi_exec_channel
*src
)
513 dst
->f
[0] = (float) log( (double) src
->f
[0] ) * 1.442695f
;
514 dst
->f
[1] = (float) log( (double) src
->f
[1] ) * 1.442695f
;
515 dst
->f
[2] = (float) log( (double) src
->f
[2] ) * 1.442695f
;
516 dst
->f
[3] = (float) log( (double) src
->f
[3] ) * 1.442695f
;
521 union tgsi_exec_channel
*dst
,
522 const union tgsi_exec_channel
*src0
,
523 const union tgsi_exec_channel
*src1
,
524 const union tgsi_exec_channel
*src2
,
525 const union tgsi_exec_channel
*src3
)
527 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
528 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
529 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
530 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
535 union tgsi_exec_channel
*dst
,
536 const union tgsi_exec_channel
*src0
,
537 const union tgsi_exec_channel
*src1
,
538 const union tgsi_exec_channel
*src2
,
539 const union tgsi_exec_channel
*src3
)
541 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
542 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
543 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
544 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
549 union tgsi_exec_channel
*dst
,
550 const union tgsi_exec_channel
*src0
,
551 const union tgsi_exec_channel
*src1
,
552 const union tgsi_exec_channel
*src2
,
553 const union tgsi_exec_channel
*src3
)
555 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
556 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
557 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
558 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
563 union tgsi_exec_channel
*dst
,
564 const union tgsi_exec_channel
*src0
,
565 const union tgsi_exec_channel
*src1
)
567 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
568 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
569 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
570 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
575 union tgsi_exec_channel
*dst
,
576 const union tgsi_exec_channel
*src0
,
577 const union tgsi_exec_channel
*src1
)
579 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
580 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
581 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
582 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
587 union tgsi_exec_channel
*dst
,
588 const union tgsi_exec_channel
*src0
,
589 const union tgsi_exec_channel
*src1
)
591 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
592 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
593 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
594 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
599 union tgsi_exec_channel
*dst
,
600 const union tgsi_exec_channel
*src0
,
601 const union tgsi_exec_channel
*src1
)
603 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
604 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
605 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
606 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
611 union tgsi_exec_channel
*dst
,
612 const union tgsi_exec_channel
*src0
,
613 const union tgsi_exec_channel
*src1
)
615 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
616 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
617 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
618 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
623 union tgsi_exec_channel
*dst
,
624 const union tgsi_exec_channel
*src0
,
625 const union tgsi_exec_channel
*src1
)
627 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
628 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
629 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
630 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
635 union tgsi_exec_channel
*dst
,
636 const union tgsi_exec_channel
*src0
,
637 const union tgsi_exec_channel
*src1
)
639 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
640 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
641 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
642 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
647 union tgsi_exec_channel
*dst
,
648 const union tgsi_exec_channel
*src0
,
649 const union tgsi_exec_channel
*src1
)
651 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
652 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
653 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
654 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
659 union tgsi_exec_channel
*dst
,
660 const union tgsi_exec_channel
*src0
,
661 const union tgsi_exec_channel
*src1
)
663 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
664 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
665 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
666 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
671 union tgsi_exec_channel
*dst0
,
672 union tgsi_exec_channel
*dst1
,
673 const union tgsi_exec_channel
*src0
,
674 const union tgsi_exec_channel
*src1
)
676 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
677 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
678 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
679 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
688 union tgsi_exec_channel
*dst0
,
689 union tgsi_exec_channel
*dst1
,
690 const union tgsi_exec_channel
*src0
,
691 const union tgsi_exec_channel
*src1
)
693 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
694 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
695 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
696 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
705 union tgsi_exec_channel
*dst
,
706 const union tgsi_exec_channel
*src0
,
707 const union tgsi_exec_channel
*src1
,
708 const union tgsi_exec_channel
*src2
)
710 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
711 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
712 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
713 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
718 union tgsi_exec_channel
*dst
,
719 const union tgsi_exec_channel
*src
)
721 dst
->f
[0] = -src
->f
[0];
722 dst
->f
[1] = -src
->f
[1];
723 dst
->f
[2] = -src
->f
[2];
724 dst
->f
[3] = -src
->f
[3];
729 union tgsi_exec_channel
*dst
,
730 const union tgsi_exec_channel
*src
)
732 dst
->i
[0] = -src
->i
[0];
733 dst
->i
[1] = -src
->i
[1];
734 dst
->i
[2] = -src
->i
[2];
735 dst
->i
[3] = -src
->i
[3];
740 union tgsi_exec_channel
*dst
,
741 const union tgsi_exec_channel
*src
)
743 dst
->u
[0] = ~src
->u
[0];
744 dst
->u
[1] = ~src
->u
[1];
745 dst
->u
[2] = ~src
->u
[2];
746 dst
->u
[3] = ~src
->u
[3];
751 union tgsi_exec_channel
*dst
,
752 const union tgsi_exec_channel
*src0
,
753 const union tgsi_exec_channel
*src1
)
755 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
756 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
757 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
758 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
763 union tgsi_exec_channel
*dst
,
764 const union tgsi_exec_channel
*src0
,
765 const union tgsi_exec_channel
*src1
)
767 dst
->f
[0] = (float) pow( (double) src0
->f
[0], (double) src1
->f
[0] );
768 dst
->f
[1] = (float) pow( (double) src0
->f
[1], (double) src1
->f
[1] );
769 dst
->f
[2] = (float) pow( (double) src0
->f
[2], (double) src1
->f
[2] );
770 dst
->f
[3] = (float) pow( (double) src0
->f
[3], (double) src1
->f
[3] );
775 union tgsi_exec_channel
*dst
,
776 const union tgsi_exec_channel
*src
)
778 dst
->f
[0] = (float) floor( (double) (src
->f
[0] + 0.5f
) );
779 dst
->f
[1] = (float) floor( (double) (src
->f
[1] + 0.5f
) );
780 dst
->f
[2] = (float) floor( (double) (src
->f
[2] + 0.5f
) );
781 dst
->f
[3] = (float) floor( (double) (src
->f
[3] + 0.5f
) );
786 union tgsi_exec_channel
*dst
,
787 const union tgsi_exec_channel
*src0
,
788 const union tgsi_exec_channel
*src1
)
790 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
791 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
792 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
793 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
798 union tgsi_exec_channel
*dst
,
799 const union tgsi_exec_channel
*src0
,
800 const union tgsi_exec_channel
*src1
)
802 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
803 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
804 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
805 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
810 union tgsi_exec_channel
*dst
,
811 const union tgsi_exec_channel
*src0
)
813 dst
->f
[0] = (float) (int) src0
->f
[0];
814 dst
->f
[1] = (float) (int) src0
->f
[1];
815 dst
->f
[2] = (float) (int) src0
->f
[2];
816 dst
->f
[3] = (float) (int) src0
->f
[3];
821 union tgsi_exec_channel
*dst
,
822 const union tgsi_exec_channel
*src0
,
823 const union tgsi_exec_channel
*src1
)
825 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
826 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
827 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
828 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
833 union tgsi_exec_channel
*dst
,
834 const union tgsi_exec_channel
*src
)
836 dst
->f
[0] = (float) sin( (double) src
->f
[0] );
837 dst
->f
[1] = (float) sin( (double) src
->f
[1] );
838 dst
->f
[2] = (float) sin( (double) src
->f
[2] );
839 dst
->f
[3] = (float) sin( (double) src
->f
[3] );
843 micro_sqrt( union tgsi_exec_channel
*dst
,
844 const union tgsi_exec_channel
*src
)
846 dst
->f
[0] = (float) sqrt( (double) src
->f
[0] );
847 dst
->f
[1] = (float) sqrt( (double) src
->f
[1] );
848 dst
->f
[2] = (float) sqrt( (double) src
->f
[2] );
849 dst
->f
[3] = (float) sqrt( (double) src
->f
[3] );
854 union tgsi_exec_channel
*dst
,
855 const union tgsi_exec_channel
*src0
,
856 const union tgsi_exec_channel
*src1
)
858 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
859 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
860 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
861 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
866 union tgsi_exec_channel
*dst
,
867 const union tgsi_exec_channel
*src
)
869 dst
->f
[0] = (float) src
->u
[0];
870 dst
->f
[1] = (float) src
->u
[1];
871 dst
->f
[2] = (float) src
->u
[2];
872 dst
->f
[3] = (float) src
->u
[3];
877 union tgsi_exec_channel
*dst
,
878 const union tgsi_exec_channel
*src0
,
879 const union tgsi_exec_channel
*src1
)
881 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
882 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
883 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
884 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
888 fetch_src_file_channel(
889 const struct tgsi_exec_machine
*mach
,
892 const union tgsi_exec_channel
*index
,
893 union tgsi_exec_channel
*chan
)
896 case TGSI_EXTSWIZZLE_X
:
897 case TGSI_EXTSWIZZLE_Y
:
898 case TGSI_EXTSWIZZLE_Z
:
899 case TGSI_EXTSWIZZLE_W
:
901 case TGSI_FILE_CONSTANT
:
902 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
903 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
904 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
905 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
908 case TGSI_FILE_INPUT
:
909 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
910 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
911 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
912 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
915 case TGSI_FILE_TEMPORARY
:
916 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
917 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
918 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
919 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
922 case TGSI_FILE_IMMEDIATE
:
923 assert( index
->i
[0] < (int) mach
->ImmLimit
);
924 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
925 assert( index
->i
[1] < (int) mach
->ImmLimit
);
926 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
927 assert( index
->i
[2] < (int) mach
->ImmLimit
);
928 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
929 assert( index
->i
[3] < (int) mach
->ImmLimit
);
930 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
933 case TGSI_FILE_ADDRESS
:
934 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
935 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
936 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
937 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
940 case TGSI_FILE_OUTPUT
:
941 /* vertex/fragment output vars can be read too */
942 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
943 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
944 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
945 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
953 case TGSI_EXTSWIZZLE_ZERO
:
954 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
957 case TGSI_EXTSWIZZLE_ONE
:
958 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
968 const struct tgsi_exec_machine
*mach
,
969 union tgsi_exec_channel
*chan
,
970 const struct tgsi_full_src_register
*reg
,
971 const uint chan_index
)
973 union tgsi_exec_channel index
;
979 index
.i
[3] = reg
->SrcRegister
.Index
;
981 if (reg
->SrcRegister
.Indirect
) {
982 union tgsi_exec_channel index2
;
983 union tgsi_exec_channel indir_index
;
988 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
990 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
991 fetch_src_file_channel(
993 reg
->SrcRegisterInd
.File
,
998 index
.i
[0] += indir_index
.i
[0];
999 index
.i
[1] += indir_index
.i
[1];
1000 index
.i
[2] += indir_index
.i
[2];
1001 index
.i
[3] += indir_index
.i
[3];
1004 if( reg
->SrcRegister
.Dimension
) {
1005 switch( reg
->SrcRegister
.File
) {
1006 case TGSI_FILE_INPUT
:
1012 case TGSI_FILE_CONSTANT
:
1022 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1023 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1024 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1025 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1027 if (reg
->SrcRegisterDim
.Indirect
) {
1028 union tgsi_exec_channel index2
;
1029 union tgsi_exec_channel indir_index
;
1034 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1036 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1037 fetch_src_file_channel(
1039 reg
->SrcRegisterDimInd
.File
,
1044 index
.i
[0] += indir_index
.i
[0];
1045 index
.i
[1] += indir_index
.i
[1];
1046 index
.i
[2] += indir_index
.i
[2];
1047 index
.i
[3] += indir_index
.i
[3];
1051 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1052 fetch_src_file_channel(
1054 reg
->SrcRegister
.File
,
1059 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1060 case TGSI_UTIL_SIGN_CLEAR
:
1061 micro_abs( chan
, chan
);
1064 case TGSI_UTIL_SIGN_SET
:
1065 micro_abs( chan
, chan
);
1066 micro_neg( chan
, chan
);
1069 case TGSI_UTIL_SIGN_TOGGLE
:
1070 micro_neg( chan
, chan
);
1073 case TGSI_UTIL_SIGN_KEEP
:
1077 if (reg
->SrcRegisterExtMod
.Complement
) {
1078 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1084 struct tgsi_exec_machine
*mach
,
1085 const union tgsi_exec_channel
*chan
,
1086 const struct tgsi_full_dst_register
*reg
,
1087 const struct tgsi_full_instruction
*inst
,
1090 union tgsi_exec_channel
*dst
;
1092 switch( reg
->DstRegister
.File
) {
1093 case TGSI_FILE_NULL
:
1096 case TGSI_FILE_OUTPUT
:
1097 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1098 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1101 case TGSI_FILE_TEMPORARY
:
1102 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1105 case TGSI_FILE_ADDRESS
:
1106 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1114 switch (inst
->Instruction
.Saturate
)
1117 if (mach
->ExecMask
& 0x1)
1118 dst
->i
[0] = chan
->i
[0];
1119 if (mach
->ExecMask
& 0x2)
1120 dst
->i
[1] = chan
->i
[1];
1121 if (mach
->ExecMask
& 0x4)
1122 dst
->i
[2] = chan
->i
[2];
1123 if (mach
->ExecMask
& 0x8)
1124 dst
->i
[3] = chan
->i
[3];
1127 case TGSI_SAT_ZERO_ONE
:
1128 /* XXX need to obey ExecMask here */
1129 micro_max(dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1130 micro_min(dst
, dst
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
1133 case TGSI_SAT_MINUS_PLUS_ONE
:
1142 #define FETCH(VAL,INDEX,CHAN)\
1143 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1145 #define STORE(VAL,INDEX,CHAN)\
1146 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1150 * Execute ARB-style KIL which is predicated by a src register.
1151 * Kill fragment if any of the four values is less than zero.
1154 exec_kilp(struct tgsi_exec_machine
*mach
,
1155 const struct tgsi_full_instruction
*inst
)
1159 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1160 union tgsi_exec_channel r
[1];
1162 /* This mask stores component bits that were already tested. Note that
1163 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1165 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1167 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1172 /* unswizzle channel */
1173 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1174 &inst
->FullSrcRegisters
[0],
1177 /* check if the component has not been already tested */
1178 if (uniquemask
& (1 << swizzle
))
1180 uniquemask
|= 1 << swizzle
;
1182 FETCH(&r
[0], 0, chan_index
);
1183 for (i
= 0; i
< 4; i
++)
1184 if (r
[0].f
[i
] < 0.0f
)
1188 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1193 * Fetch a texel using STR texture coordinates.
1196 fetch_texel( struct tgsi_sampler
*sampler
,
1197 const union tgsi_exec_channel
*s
,
1198 const union tgsi_exec_channel
*t
,
1199 const union tgsi_exec_channel
*p
,
1200 float lodbias
, /* XXX should be float[4] */
1201 union tgsi_exec_channel
*r
,
1202 union tgsi_exec_channel
*g
,
1203 union tgsi_exec_channel
*b
,
1204 union tgsi_exec_channel
*a
)
1207 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1209 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1211 for (j
= 0; j
< 4; j
++) {
1212 r
->f
[j
] = rgba
[0][j
];
1213 g
->f
[j
] = rgba
[1][j
];
1214 b
->f
[j
] = rgba
[2][j
];
1215 a
->f
[j
] = rgba
[3][j
];
1221 exec_tex(struct tgsi_exec_machine
*mach
,
1222 const struct tgsi_full_instruction
*inst
,
1225 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1226 union tgsi_exec_channel r
[8];
1230 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1232 switch (inst
->InstructionExtTexture
.Texture
) {
1233 case TGSI_TEXTURE_1D
:
1235 FETCH(&r
[0], 0, CHAN_X
);
1237 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1238 case TGSI_EXTSWIZZLE_W
:
1239 FETCH(&r
[1], 0, CHAN_W
);
1240 micro_div( &r
[0], &r
[0], &r
[1] );
1243 case TGSI_EXTSWIZZLE_ONE
:
1251 FETCH(&r
[1], 0, CHAN_W
);
1252 lodBias
= r
[2].f
[0];
1257 fetch_texel(&mach
->Samplers
[unit
],
1258 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1259 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1262 case TGSI_TEXTURE_2D
:
1263 case TGSI_TEXTURE_RECT
:
1265 FETCH(&r
[0], 0, CHAN_X
);
1266 FETCH(&r
[1], 0, CHAN_Y
);
1267 FETCH(&r
[2], 0, CHAN_Z
);
1269 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1270 case TGSI_EXTSWIZZLE_W
:
1271 FETCH(&r
[3], 0, CHAN_W
);
1272 micro_div( &r
[0], &r
[0], &r
[3] );
1273 micro_div( &r
[1], &r
[1], &r
[3] );
1274 micro_div( &r
[2], &r
[2], &r
[3] );
1277 case TGSI_EXTSWIZZLE_ONE
:
1285 FETCH(&r
[3], 0, CHAN_W
);
1286 lodBias
= r
[3].f
[0];
1291 fetch_texel(&mach
->Samplers
[unit
],
1292 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1293 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1296 case TGSI_TEXTURE_3D
:
1297 case TGSI_TEXTURE_CUBE
:
1299 FETCH(&r
[0], 0, CHAN_X
);
1300 FETCH(&r
[1], 0, CHAN_Y
);
1301 FETCH(&r
[2], 0, CHAN_Z
);
1303 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1304 case TGSI_EXTSWIZZLE_W
:
1305 FETCH(&r
[3], 0, CHAN_W
);
1306 micro_div( &r
[0], &r
[0], &r
[3] );
1307 micro_div( &r
[1], &r
[1], &r
[3] );
1308 micro_div( &r
[2], &r
[2], &r
[3] );
1311 case TGSI_EXTSWIZZLE_ONE
:
1319 FETCH(&r
[3], 0, CHAN_W
);
1320 lodBias
= r
[3].f
[0];
1325 fetch_texel(&mach
->Samplers
[unit
],
1326 &r
[0], &r
[1], &r
[2], lodBias
,
1327 &r
[0], &r
[1], &r
[2], &r
[3]);
1334 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1335 STORE( &r
[chan_index
], 0, chan_index
);
1341 * Evaluate a constant-valued coefficient at the position of the
1346 struct tgsi_exec_machine
*mach
,
1352 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1353 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1358 * Evaluate a linear-valued coefficient at the position of the
1363 struct tgsi_exec_machine
*mach
,
1367 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1368 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1369 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1370 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1371 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1372 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1373 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1374 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1375 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1379 * Evaluate a perspective-valued coefficient at the position of the
1383 eval_perspective_coef(
1384 struct tgsi_exec_machine
*mach
,
1388 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1389 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1390 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1391 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1392 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1393 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1394 /* divide by W here */
1395 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1396 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1397 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1398 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1402 typedef void (* eval_coef_func
)(
1403 struct tgsi_exec_machine
*mach
,
1409 struct tgsi_exec_machine
*mach
,
1410 const struct tgsi_full_declaration
*decl
)
1412 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1413 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1414 unsigned first
, last
, mask
;
1415 eval_coef_func eval
;
1417 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
1419 first
= decl
->u
.DeclarationRange
.First
;
1420 last
= decl
->u
.DeclarationRange
.Last
;
1421 mask
= decl
->Declaration
.UsageMask
;
1423 switch( decl
->Interpolation
.Interpolate
) {
1424 case TGSI_INTERPOLATE_CONSTANT
:
1425 eval
= eval_constant_coef
;
1428 case TGSI_INTERPOLATE_LINEAR
:
1429 eval
= eval_linear_coef
;
1432 case TGSI_INTERPOLATE_PERSPECTIVE
:
1433 eval
= eval_perspective_coef
;
1440 if( mask
== TGSI_WRITEMASK_XYZW
) {
1443 for( i
= first
; i
<= last
; i
++ ) {
1444 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1452 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1453 if( mask
& (1 << j
) ) {
1454 for( i
= first
; i
<= last
; i
++ ) {
1466 struct tgsi_exec_machine
*mach
,
1467 const struct tgsi_full_instruction
*inst
,
1471 union tgsi_exec_channel r
[8];
1475 switch (inst
->Instruction
.Opcode
) {
1476 case TGSI_OPCODE_ARL
:
1477 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1478 FETCH( &r
[0], 0, chan_index
);
1479 micro_f2it( &r
[0], &r
[0] );
1480 STORE( &r
[0], 0, chan_index
);
1484 case TGSI_OPCODE_MOV
:
1485 /* TGSI_OPCODE_SWZ */
1486 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1487 FETCH( &r
[0], 0, chan_index
);
1488 STORE( &r
[0], 0, chan_index
);
1492 case TGSI_OPCODE_LIT
:
1493 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1494 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1497 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1498 FETCH( &r
[0], 0, CHAN_X
);
1499 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1500 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1501 STORE( &r
[0], 0, CHAN_Y
);
1504 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1505 FETCH( &r
[1], 0, CHAN_Y
);
1506 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1508 FETCH( &r
[2], 0, CHAN_W
);
1509 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1510 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1511 micro_pow( &r
[1], &r
[1], &r
[2] );
1512 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1513 STORE( &r
[0], 0, CHAN_Z
);
1517 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1518 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1522 case TGSI_OPCODE_RCP
:
1523 /* TGSI_OPCODE_RECIP */
1524 FETCH( &r
[0], 0, CHAN_X
);
1525 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1526 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1527 STORE( &r
[0], 0, chan_index
);
1531 case TGSI_OPCODE_RSQ
:
1532 /* TGSI_OPCODE_RECIPSQRT */
1533 FETCH( &r
[0], 0, CHAN_X
);
1534 micro_sqrt( &r
[0], &r
[0] );
1535 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1536 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1537 STORE( &r
[0], 0, chan_index
);
1541 case TGSI_OPCODE_EXP
:
1545 case TGSI_OPCODE_LOG
:
1549 case TGSI_OPCODE_MUL
:
1550 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1552 FETCH(&r
[0], 0, chan_index
);
1553 FETCH(&r
[1], 1, chan_index
);
1555 micro_mul( &r
[0], &r
[0], &r
[1] );
1557 STORE(&r
[0], 0, chan_index
);
1561 case TGSI_OPCODE_ADD
:
1562 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1563 FETCH( &r
[0], 0, chan_index
);
1564 FETCH( &r
[1], 1, chan_index
);
1565 micro_add( &r
[0], &r
[0], &r
[1] );
1566 STORE( &r
[0], 0, chan_index
);
1570 case TGSI_OPCODE_DP3
:
1571 /* TGSI_OPCODE_DOT3 */
1572 FETCH( &r
[0], 0, CHAN_X
);
1573 FETCH( &r
[1], 1, CHAN_X
);
1574 micro_mul( &r
[0], &r
[0], &r
[1] );
1576 FETCH( &r
[1], 0, CHAN_Y
);
1577 FETCH( &r
[2], 1, CHAN_Y
);
1578 micro_mul( &r
[1], &r
[1], &r
[2] );
1579 micro_add( &r
[0], &r
[0], &r
[1] );
1581 FETCH( &r
[1], 0, CHAN_Z
);
1582 FETCH( &r
[2], 1, CHAN_Z
);
1583 micro_mul( &r
[1], &r
[1], &r
[2] );
1584 micro_add( &r
[0], &r
[0], &r
[1] );
1586 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1587 STORE( &r
[0], 0, chan_index
);
1591 case TGSI_OPCODE_DP4
:
1592 /* TGSI_OPCODE_DOT4 */
1593 FETCH(&r
[0], 0, CHAN_X
);
1594 FETCH(&r
[1], 1, CHAN_X
);
1596 micro_mul( &r
[0], &r
[0], &r
[1] );
1598 FETCH(&r
[1], 0, CHAN_Y
);
1599 FETCH(&r
[2], 1, CHAN_Y
);
1601 micro_mul( &r
[1], &r
[1], &r
[2] );
1602 micro_add( &r
[0], &r
[0], &r
[1] );
1604 FETCH(&r
[1], 0, CHAN_Z
);
1605 FETCH(&r
[2], 1, CHAN_Z
);
1607 micro_mul( &r
[1], &r
[1], &r
[2] );
1608 micro_add( &r
[0], &r
[0], &r
[1] );
1610 FETCH(&r
[1], 0, CHAN_W
);
1611 FETCH(&r
[2], 1, CHAN_W
);
1613 micro_mul( &r
[1], &r
[1], &r
[2] );
1614 micro_add( &r
[0], &r
[0], &r
[1] );
1616 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1617 STORE( &r
[0], 0, chan_index
);
1621 case TGSI_OPCODE_DST
:
1622 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1623 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1626 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1627 FETCH( &r
[0], 0, CHAN_Y
);
1628 FETCH( &r
[1], 1, CHAN_Y
);
1629 micro_mul( &r
[0], &r
[0], &r
[1] );
1630 STORE( &r
[0], 0, CHAN_Y
);
1633 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1634 FETCH( &r
[0], 0, CHAN_Z
);
1635 STORE( &r
[0], 0, CHAN_Z
);
1638 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1639 FETCH( &r
[0], 1, CHAN_W
);
1640 STORE( &r
[0], 0, CHAN_W
);
1644 case TGSI_OPCODE_MIN
:
1645 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1646 FETCH(&r
[0], 0, chan_index
);
1647 FETCH(&r
[1], 1, chan_index
);
1649 /* XXX use micro_min()?? */
1650 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1652 STORE(&r
[0], 0, chan_index
);
1656 case TGSI_OPCODE_MAX
:
1657 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1658 FETCH(&r
[0], 0, chan_index
);
1659 FETCH(&r
[1], 1, chan_index
);
1661 /* XXX use micro_max()?? */
1662 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1664 STORE(&r
[0], 0, chan_index
);
1668 case TGSI_OPCODE_SLT
:
1669 /* TGSI_OPCODE_SETLT */
1670 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1671 FETCH( &r
[0], 0, chan_index
);
1672 FETCH( &r
[1], 1, chan_index
);
1673 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1674 STORE( &r
[0], 0, chan_index
);
1678 case TGSI_OPCODE_SGE
:
1679 /* TGSI_OPCODE_SETGE */
1680 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1681 FETCH( &r
[0], 0, chan_index
);
1682 FETCH( &r
[1], 1, chan_index
);
1683 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1684 STORE( &r
[0], 0, chan_index
);
1688 case TGSI_OPCODE_MAD
:
1689 /* TGSI_OPCODE_MADD */
1690 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1691 FETCH( &r
[0], 0, chan_index
);
1692 FETCH( &r
[1], 1, chan_index
);
1693 micro_mul( &r
[0], &r
[0], &r
[1] );
1694 FETCH( &r
[1], 2, chan_index
);
1695 micro_add( &r
[0], &r
[0], &r
[1] );
1696 STORE( &r
[0], 0, chan_index
);
1700 case TGSI_OPCODE_SUB
:
1701 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1702 FETCH(&r
[0], 0, chan_index
);
1703 FETCH(&r
[1], 1, chan_index
);
1705 micro_sub( &r
[0], &r
[0], &r
[1] );
1707 STORE(&r
[0], 0, chan_index
);
1711 case TGSI_OPCODE_LERP
:
1712 /* TGSI_OPCODE_LRP */
1713 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1714 FETCH(&r
[0], 0, chan_index
);
1715 FETCH(&r
[1], 1, chan_index
);
1716 FETCH(&r
[2], 2, chan_index
);
1718 micro_sub( &r
[1], &r
[1], &r
[2] );
1719 micro_mul( &r
[0], &r
[0], &r
[1] );
1720 micro_add( &r
[0], &r
[0], &r
[2] );
1722 STORE(&r
[0], 0, chan_index
);
1726 case TGSI_OPCODE_CND
:
1730 case TGSI_OPCODE_CND0
:
1734 case TGSI_OPCODE_DOT2ADD
:
1735 /* TGSI_OPCODE_DP2A */
1739 case TGSI_OPCODE_INDEX
:
1743 case TGSI_OPCODE_NEGATE
:
1747 case TGSI_OPCODE_FRAC
:
1748 /* TGSI_OPCODE_FRC */
1749 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1750 FETCH( &r
[0], 0, chan_index
);
1751 micro_frc( &r
[0], &r
[0] );
1752 STORE( &r
[0], 0, chan_index
);
1756 case TGSI_OPCODE_CLAMP
:
1760 case TGSI_OPCODE_FLOOR
:
1761 /* TGSI_OPCODE_FLR */
1762 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1763 FETCH( &r
[0], 0, chan_index
);
1764 micro_flr( &r
[0], &r
[0] );
1765 STORE( &r
[0], 0, chan_index
);
1769 case TGSI_OPCODE_ROUND
:
1770 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1771 FETCH( &r
[0], 0, chan_index
);
1772 micro_rnd( &r
[0], &r
[0] );
1773 STORE( &r
[0], 0, chan_index
);
1777 case TGSI_OPCODE_EXPBASE2
:
1778 /* TGSI_OPCODE_EX2 */
1779 FETCH(&r
[0], 0, CHAN_X
);
1781 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1783 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1784 STORE( &r
[0], 0, chan_index
);
1788 case TGSI_OPCODE_LOGBASE2
:
1789 /* TGSI_OPCODE_LG2 */
1790 FETCH( &r
[0], 0, CHAN_X
);
1791 micro_lg2( &r
[0], &r
[0] );
1792 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1793 STORE( &r
[0], 0, chan_index
);
1797 case TGSI_OPCODE_POWER
:
1798 /* TGSI_OPCODE_POW */
1799 FETCH(&r
[0], 0, CHAN_X
);
1800 FETCH(&r
[1], 1, CHAN_X
);
1802 micro_pow( &r
[0], &r
[0], &r
[1] );
1804 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1805 STORE( &r
[0], 0, chan_index
);
1809 case TGSI_OPCODE_CROSSPRODUCT
:
1810 /* TGSI_OPCODE_XPD */
1811 FETCH(&r
[0], 0, CHAN_Y
);
1812 FETCH(&r
[1], 1, CHAN_Z
);
1814 micro_mul( &r
[2], &r
[0], &r
[1] );
1816 FETCH(&r
[3], 0, CHAN_Z
);
1817 FETCH(&r
[4], 1, CHAN_Y
);
1819 micro_mul( &r
[5], &r
[3], &r
[4] );
1820 micro_sub( &r
[2], &r
[2], &r
[5] );
1822 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1823 STORE( &r
[2], 0, CHAN_X
);
1826 FETCH(&r
[2], 1, CHAN_X
);
1828 micro_mul( &r
[3], &r
[3], &r
[2] );
1830 FETCH(&r
[5], 0, CHAN_X
);
1832 micro_mul( &r
[1], &r
[1], &r
[5] );
1833 micro_sub( &r
[3], &r
[3], &r
[1] );
1835 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1836 STORE( &r
[3], 0, CHAN_Y
);
1839 micro_mul( &r
[5], &r
[5], &r
[4] );
1840 micro_mul( &r
[0], &r
[0], &r
[2] );
1841 micro_sub( &r
[5], &r
[5], &r
[0] );
1843 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1844 STORE( &r
[5], 0, CHAN_Z
);
1847 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1848 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1852 case TGSI_OPCODE_MULTIPLYMATRIX
:
1856 case TGSI_OPCODE_ABS
:
1857 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1858 FETCH(&r
[0], 0, chan_index
);
1860 micro_abs( &r
[0], &r
[0] );
1862 STORE(&r
[0], 0, chan_index
);
1866 case TGSI_OPCODE_RCC
:
1870 case TGSI_OPCODE_DPH
:
1871 FETCH(&r
[0], 0, CHAN_X
);
1872 FETCH(&r
[1], 1, CHAN_X
);
1874 micro_mul( &r
[0], &r
[0], &r
[1] );
1876 FETCH(&r
[1], 0, CHAN_Y
);
1877 FETCH(&r
[2], 1, CHAN_Y
);
1879 micro_mul( &r
[1], &r
[1], &r
[2] );
1880 micro_add( &r
[0], &r
[0], &r
[1] );
1882 FETCH(&r
[1], 0, CHAN_Z
);
1883 FETCH(&r
[2], 1, CHAN_Z
);
1885 micro_mul( &r
[1], &r
[1], &r
[2] );
1886 micro_add( &r
[0], &r
[0], &r
[1] );
1888 FETCH(&r
[1], 1, CHAN_W
);
1890 micro_add( &r
[0], &r
[0], &r
[1] );
1892 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1893 STORE( &r
[0], 0, chan_index
);
1897 case TGSI_OPCODE_COS
:
1898 FETCH(&r
[0], 0, CHAN_X
);
1900 micro_cos( &r
[0], &r
[0] );
1902 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1903 STORE( &r
[0], 0, chan_index
);
1907 case TGSI_OPCODE_DDX
:
1908 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1909 FETCH( &r
[0], 0, chan_index
);
1910 micro_ddx( &r
[0], &r
[0] );
1911 STORE( &r
[0], 0, chan_index
);
1915 case TGSI_OPCODE_DDY
:
1916 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1917 FETCH( &r
[0], 0, chan_index
);
1918 micro_ddy( &r
[0], &r
[0] );
1919 STORE( &r
[0], 0, chan_index
);
1923 case TGSI_OPCODE_KILP
:
1924 exec_kilp (mach
, inst
);
1927 case TGSI_OPCODE_KIL
:
1928 /* for enabled ExecMask bits, set the killed bit */
1929 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= mach
->ExecMask
;
1932 case TGSI_OPCODE_PK2H
:
1936 case TGSI_OPCODE_PK2US
:
1940 case TGSI_OPCODE_PK4B
:
1944 case TGSI_OPCODE_PK4UB
:
1948 case TGSI_OPCODE_RFL
:
1952 case TGSI_OPCODE_SEQ
:
1953 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1954 FETCH( &r
[0], 0, chan_index
);
1955 FETCH( &r
[1], 1, chan_index
);
1956 micro_eq( &r
[0], &r
[0], &r
[1],
1957 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
1958 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1959 STORE( &r
[0], 0, chan_index
);
1963 case TGSI_OPCODE_SFL
:
1967 case TGSI_OPCODE_SGT
:
1968 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1969 FETCH( &r
[0], 0, chan_index
);
1970 FETCH( &r
[1], 1, chan_index
);
1971 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1972 STORE( &r
[0], 0, chan_index
);
1976 case TGSI_OPCODE_SIN
:
1977 FETCH( &r
[0], 0, CHAN_X
);
1978 micro_sin( &r
[0], &r
[0] );
1979 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1980 STORE( &r
[0], 0, chan_index
);
1984 case TGSI_OPCODE_SLE
:
1985 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1986 FETCH( &r
[0], 0, chan_index
);
1987 FETCH( &r
[1], 1, chan_index
);
1988 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1989 STORE( &r
[0], 0, chan_index
);
1993 case TGSI_OPCODE_SNE
:
1994 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1995 FETCH( &r
[0], 0, chan_index
);
1996 FETCH( &r
[1], 1, chan_index
);
1997 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1998 STORE( &r
[0], 0, chan_index
);
2002 case TGSI_OPCODE_STR
:
2006 case TGSI_OPCODE_TEX
:
2007 /* simple texture lookup */
2008 /* src[0] = texcoord */
2009 /* src[1] = sampler unit */
2010 exec_tex(mach
, inst
, FALSE
);
2013 case TGSI_OPCODE_TXB
:
2014 /* Texture lookup with lod bias */
2015 /* src[0] = texcoord (src[0].w = LOD bias) */
2016 /* src[1] = sampler unit */
2017 exec_tex(mach
, inst
, TRUE
);
2020 case TGSI_OPCODE_TXD
:
2021 /* Texture lookup with explict partial derivatives */
2022 /* src[0] = texcoord */
2023 /* src[1] = d[strq]/dx */
2024 /* src[2] = d[strq]/dy */
2025 /* src[3] = sampler unit */
2029 case TGSI_OPCODE_TXL
:
2030 /* Texture lookup with explit LOD */
2031 /* src[0] = texcoord (src[0].w = LOD) */
2032 /* src[1] = sampler unit */
2033 exec_tex(mach
, inst
, TRUE
);
2036 case TGSI_OPCODE_UP2H
:
2040 case TGSI_OPCODE_UP2US
:
2044 case TGSI_OPCODE_UP4B
:
2048 case TGSI_OPCODE_UP4UB
:
2052 case TGSI_OPCODE_X2D
:
2056 case TGSI_OPCODE_ARA
:
2060 case TGSI_OPCODE_ARR
:
2064 case TGSI_OPCODE_BRA
:
2068 case TGSI_OPCODE_CAL
:
2069 /* skip the call if no execution channels are enabled */
2070 if (mach
->ExecMask
) {
2073 /* push the Cond, Loop, Cont stacks */
2074 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2075 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2076 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2077 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2078 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2079 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2081 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2082 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2084 /* note that PC was already incremented above */
2085 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2086 *pc
= inst
->InstructionExtLabel
.Label
;
2090 case TGSI_OPCODE_RET
:
2091 mach
->FuncMask
&= ~mach
->ExecMask
;
2092 UPDATE_EXEC_MASK(mach
);
2094 if (mach
->ExecMask
== 0x0) {
2095 /* really return now (otherwise, keep executing */
2097 if (mach
->CallStackTop
== 0) {
2098 /* returning from main() */
2102 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2104 /* pop the Cond, Loop, Cont stacks */
2105 assert(mach
->CondStackTop
> 0);
2106 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2107 assert(mach
->LoopStackTop
> 0);
2108 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2109 assert(mach
->ContStackTop
> 0);
2110 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2111 assert(mach
->FuncStackTop
> 0);
2112 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2114 UPDATE_EXEC_MASK(mach
);
2118 case TGSI_OPCODE_SSG
:
2122 case TGSI_OPCODE_CMP
:
2123 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2124 FETCH(&r
[0], 0, chan_index
);
2125 FETCH(&r
[1], 1, chan_index
);
2126 FETCH(&r
[2], 2, chan_index
);
2128 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2130 STORE(&r
[0], 0, chan_index
);
2134 case TGSI_OPCODE_SCS
:
2135 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2136 FETCH( &r
[0], 0, CHAN_X
);
2138 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2139 micro_cos( &r
[1], &r
[0] );
2140 STORE( &r
[1], 0, CHAN_X
);
2142 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2143 micro_sin( &r
[1], &r
[0] );
2144 STORE( &r
[1], 0, CHAN_Y
);
2146 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2147 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2149 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2150 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2154 case TGSI_OPCODE_NRM
:
2158 case TGSI_OPCODE_DIV
:
2162 case TGSI_OPCODE_DP2
:
2163 FETCH( &r
[0], 0, CHAN_X
);
2164 FETCH( &r
[1], 1, CHAN_X
);
2165 micro_mul( &r
[0], &r
[0], &r
[1] );
2167 FETCH( &r
[1], 0, CHAN_Y
);
2168 FETCH( &r
[2], 1, CHAN_Y
);
2169 micro_mul( &r
[1], &r
[1], &r
[2] );
2170 micro_add( &r
[0], &r
[0], &r
[1] );
2172 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2173 STORE( &r
[0], 0, chan_index
);
2177 case TGSI_OPCODE_IF
:
2179 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2180 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2181 FETCH( &r
[0], 0, CHAN_X
);
2182 /* update CondMask */
2184 mach
->CondMask
&= ~0x1;
2187 mach
->CondMask
&= ~0x2;
2190 mach
->CondMask
&= ~0x4;
2193 mach
->CondMask
&= ~0x8;
2195 UPDATE_EXEC_MASK(mach
);
2196 /* Todo: If CondMask==0, jump to ELSE */
2199 case TGSI_OPCODE_ELSE
:
2200 /* invert CondMask wrt previous mask */
2203 assert(mach
->CondStackTop
> 0);
2204 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2205 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2206 UPDATE_EXEC_MASK(mach
);
2207 /* Todo: If CondMask==0, jump to ENDIF */
2211 case TGSI_OPCODE_ENDIF
:
2213 assert(mach
->CondStackTop
> 0);
2214 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2215 UPDATE_EXEC_MASK(mach
);
2218 case TGSI_OPCODE_END
:
2219 /* halt execution */
2223 case TGSI_OPCODE_REP
:
2227 case TGSI_OPCODE_ENDREP
:
2231 case TGSI_OPCODE_PUSHA
:
2235 case TGSI_OPCODE_POPA
:
2239 case TGSI_OPCODE_CEIL
:
2240 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2241 FETCH( &r
[0], 0, chan_index
);
2242 micro_ceil( &r
[0], &r
[0] );
2243 STORE( &r
[0], 0, chan_index
);
2247 case TGSI_OPCODE_I2F
:
2248 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2249 FETCH( &r
[0], 0, chan_index
);
2250 micro_i2f( &r
[0], &r
[0] );
2251 STORE( &r
[0], 0, chan_index
);
2255 case TGSI_OPCODE_NOT
:
2256 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2257 FETCH( &r
[0], 0, chan_index
);
2258 micro_not( &r
[0], &r
[0] );
2259 STORE( &r
[0], 0, chan_index
);
2263 case TGSI_OPCODE_TRUNC
:
2264 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2265 FETCH( &r
[0], 0, chan_index
);
2266 micro_trunc( &r
[0], &r
[0] );
2267 STORE( &r
[0], 0, chan_index
);
2271 case TGSI_OPCODE_SHL
:
2272 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2273 FETCH( &r
[0], 0, chan_index
);
2274 FETCH( &r
[1], 1, chan_index
);
2275 micro_shl( &r
[0], &r
[0], &r
[1] );
2276 STORE( &r
[0], 0, chan_index
);
2280 case TGSI_OPCODE_SHR
:
2281 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2282 FETCH( &r
[0], 0, chan_index
);
2283 FETCH( &r
[1], 1, chan_index
);
2284 micro_ishr( &r
[0], &r
[0], &r
[1] );
2285 STORE( &r
[0], 0, chan_index
);
2289 case TGSI_OPCODE_AND
:
2290 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2291 FETCH( &r
[0], 0, chan_index
);
2292 FETCH( &r
[1], 1, chan_index
);
2293 micro_and( &r
[0], &r
[0], &r
[1] );
2294 STORE( &r
[0], 0, chan_index
);
2298 case TGSI_OPCODE_OR
:
2299 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2300 FETCH( &r
[0], 0, chan_index
);
2301 FETCH( &r
[1], 1, chan_index
);
2302 micro_or( &r
[0], &r
[0], &r
[1] );
2303 STORE( &r
[0], 0, chan_index
);
2307 case TGSI_OPCODE_MOD
:
2311 case TGSI_OPCODE_XOR
:
2312 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2313 FETCH( &r
[0], 0, chan_index
);
2314 FETCH( &r
[1], 1, chan_index
);
2315 micro_xor( &r
[0], &r
[0], &r
[1] );
2316 STORE( &r
[0], 0, chan_index
);
2320 case TGSI_OPCODE_SAD
:
2324 case TGSI_OPCODE_TXF
:
2328 case TGSI_OPCODE_TXQ
:
2332 case TGSI_OPCODE_EMIT
:
2333 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2334 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2337 case TGSI_OPCODE_ENDPRIM
:
2338 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2339 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2342 case TGSI_OPCODE_LOOP
:
2343 /* fall-through (for now) */
2344 case TGSI_OPCODE_BGNLOOP2
:
2345 /* push LoopMask and ContMasks */
2346 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2347 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2348 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2349 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2352 case TGSI_OPCODE_ENDLOOP
:
2353 /* fall-through (for now at least) */
2354 case TGSI_OPCODE_ENDLOOP2
:
2355 /* Restore ContMask, but don't pop */
2356 assert(mach
->ContStackTop
> 0);
2357 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2358 if (mach
->LoopMask
) {
2359 /* repeat loop: jump to instruction just past BGNLOOP */
2360 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2363 /* exit loop: pop LoopMask */
2364 assert(mach
->LoopStackTop
> 0);
2365 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2367 assert(mach
->ContStackTop
> 0);
2368 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2370 UPDATE_EXEC_MASK(mach
);
2373 case TGSI_OPCODE_BRK
:
2374 /* turn off loop channels for each enabled exec channel */
2375 mach
->LoopMask
&= ~mach
->ExecMask
;
2376 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2377 UPDATE_EXEC_MASK(mach
);
2380 case TGSI_OPCODE_CONT
:
2381 /* turn off cont channels for each enabled exec channel */
2382 mach
->ContMask
&= ~mach
->ExecMask
;
2383 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2384 UPDATE_EXEC_MASK(mach
);
2387 case TGSI_OPCODE_BGNSUB
:
2391 case TGSI_OPCODE_ENDSUB
:
2395 case TGSI_OPCODE_NOISE1
:
2399 case TGSI_OPCODE_NOISE2
:
2403 case TGSI_OPCODE_NOISE3
:
2407 case TGSI_OPCODE_NOISE4
:
2411 case TGSI_OPCODE_NOP
:
2421 * Run TGSI interpreter.
2422 * \return bitmask of "alive" quad components
2425 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2430 mach
->CondMask
= 0xf;
2431 mach
->LoopMask
= 0xf;
2432 mach
->ContMask
= 0xf;
2433 mach
->FuncMask
= 0xf;
2434 mach
->ExecMask
= 0xf;
2436 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2437 assert(mach
->CondStackTop
== 0);
2438 assert(mach
->LoopStackTop
== 0);
2439 assert(mach
->ContStackTop
== 0);
2440 assert(mach
->CallStackTop
== 0);
2442 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2443 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2445 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2446 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2447 mach
->Primitives
[0] = 0;
2451 /* execute declarations (interpolants) */
2452 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2453 exec_declaration( mach
, mach
->Declarations
+i
);
2456 /* execute instructions, until pc is set to -1 */
2458 assert(pc
< mach
->NumInstructions
);
2459 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2463 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2464 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2466 * Scale back depth component.
2468 for (i
= 0; i
< 4; i
++)
2469 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2473 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];