1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpretor/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
92 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
93 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
94 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
95 #define TEMP_R0 TGSI_EXEC_TEMP_R0
97 #define FOR_EACH_CHANNEL(CHAN)\
98 for (CHAN = 0; CHAN < 4; CHAN++)
100 #define IS_CHANNEL_ENABLED(INST, CHAN)\
101 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
103 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
104 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
106 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED( INST, CHAN ))
110 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
111 FOR_EACH_CHANNEL( CHAN )\
112 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
115 /** The execution mask depends on the conditional mask and the loop mask */
116 #define UPDATE_EXEC_MASK(MACH) \
117 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
128 * Initialize machine state by expanding tokens to full instructions,
129 * allocating temporary storage, setting up constants, etc.
130 * After this, we can call tgsi_exec_machine_run() many times.
133 tgsi_exec_machine_bind_shader(
134 struct tgsi_exec_machine
*mach
,
135 const struct tgsi_token
*tokens
,
137 struct tgsi_sampler
*samplers
)
140 struct tgsi_parse_context parse
;
141 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
142 struct tgsi_full_instruction
*instructions
;
143 struct tgsi_full_declaration
*declarations
;
144 uint maxInstructions
= 10, numInstructions
= 0;
145 uint maxDeclarations
= 10, numDeclarations
= 0;
149 tgsi_dump(tokens
, 0);
152 mach
->Tokens
= tokens
;
153 mach
->Samplers
= samplers
;
155 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
156 if (k
!= TGSI_PARSE_OK
) {
157 debug_printf( "Problem parsing!\n" );
161 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
165 declarations
= (struct tgsi_full_declaration
*)
166 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
172 instructions
= (struct tgsi_full_instruction
*)
173 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
176 FREE( declarations
);
180 while( !tgsi_parse_end_of_tokens( &parse
) ) {
181 uint pointer
= parse
.Position
;
184 tgsi_parse_token( &parse
);
185 switch( parse
.FullToken
.Token
.Type
) {
186 case TGSI_TOKEN_TYPE_DECLARATION
:
187 /* save expanded declaration */
188 if (numDeclarations
== maxDeclarations
) {
189 declarations
= REALLOC(declarations
,
191 * sizeof(struct tgsi_full_declaration
),
192 (maxDeclarations
+ 10)
193 * sizeof(struct tgsi_full_declaration
));
194 maxDeclarations
+= 10;
196 memcpy(declarations
+ numDeclarations
,
197 &parse
.FullToken
.FullDeclaration
,
198 sizeof(declarations
[0]));
202 case TGSI_TOKEN_TYPE_IMMEDIATE
:
204 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
205 assert( size
% 4 == 0 );
206 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
208 for( i
= 0; i
< size
; i
++ ) {
209 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] =
210 parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
212 mach
->ImmLimit
+= size
/ 4;
216 case TGSI_TOKEN_TYPE_INSTRUCTION
:
217 assert( labels
->count
< MAX_LABELS
);
219 labels
->labels
[labels
->count
][0] = instno
;
220 labels
->labels
[labels
->count
][1] = pointer
;
223 /* save expanded instruction */
224 if (numInstructions
== maxInstructions
) {
225 instructions
= REALLOC(instructions
,
227 * sizeof(struct tgsi_full_instruction
),
228 (maxInstructions
+ 10)
229 * sizeof(struct tgsi_full_instruction
));
230 maxInstructions
+= 10;
232 memcpy(instructions
+ numInstructions
,
233 &parse
.FullToken
.FullInstruction
,
234 sizeof(instructions
[0]));
242 tgsi_parse_free (&parse
);
244 if (mach
->Declarations
) {
245 FREE( mach
->Declarations
);
247 mach
->Declarations
= declarations
;
248 mach
->NumDeclarations
= numDeclarations
;
250 if (mach
->Instructions
) {
251 FREE( mach
->Instructions
);
253 mach
->Instructions
= instructions
;
254 mach
->NumInstructions
= numInstructions
;
259 tgsi_exec_machine_init(
260 struct tgsi_exec_machine
*mach
)
264 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
265 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
267 /* Setup constants. */
268 for( i
= 0; i
< 4; i
++ ) {
269 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
270 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
271 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
272 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
273 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
274 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
275 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
276 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
277 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
278 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
284 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
286 if (mach
->Instructions
) {
287 FREE(mach
->Instructions
);
288 mach
->Instructions
= NULL
;
289 mach
->NumInstructions
= 0;
291 if (mach
->Declarations
) {
292 FREE(mach
->Declarations
);
293 mach
->Declarations
= NULL
;
294 mach
->NumDeclarations
= 0;
301 union tgsi_exec_channel
*dst
,
302 const union tgsi_exec_channel
*src
)
304 dst
->f
[0] = fabsf( src
->f
[0] );
305 dst
->f
[1] = fabsf( src
->f
[1] );
306 dst
->f
[2] = fabsf( src
->f
[2] );
307 dst
->f
[3] = fabsf( src
->f
[3] );
312 union tgsi_exec_channel
*dst
,
313 const union tgsi_exec_channel
*src0
,
314 const union tgsi_exec_channel
*src1
)
316 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
317 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
318 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
319 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
324 union tgsi_exec_channel
*dst
,
325 const union tgsi_exec_channel
*src0
,
326 const union tgsi_exec_channel
*src1
)
328 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
329 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
330 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
331 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
336 union tgsi_exec_channel
*dst
,
337 const union tgsi_exec_channel
*src0
,
338 const union tgsi_exec_channel
*src1
)
340 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
341 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
342 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
343 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
348 union tgsi_exec_channel
*dst
,
349 const union tgsi_exec_channel
*src
)
351 dst
->f
[0] = ceilf( src
->f
[0] );
352 dst
->f
[1] = ceilf( src
->f
[1] );
353 dst
->f
[2] = ceilf( src
->f
[2] );
354 dst
->f
[3] = ceilf( src
->f
[3] );
359 union tgsi_exec_channel
*dst
,
360 const union tgsi_exec_channel
*src
)
362 dst
->f
[0] = cosf( src
->f
[0] );
363 dst
->f
[1] = cosf( src
->f
[1] );
364 dst
->f
[2] = cosf( src
->f
[2] );
365 dst
->f
[3] = cosf( src
->f
[3] );
370 union tgsi_exec_channel
*dst
,
371 const union tgsi_exec_channel
*src
)
376 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
381 union tgsi_exec_channel
*dst
,
382 const union tgsi_exec_channel
*src
)
387 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
392 union tgsi_exec_channel
*dst
,
393 const union tgsi_exec_channel
*src0
,
394 const union tgsi_exec_channel
*src1
)
396 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
397 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
398 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
399 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
404 union tgsi_exec_channel
*dst
,
405 const union tgsi_exec_channel
*src0
,
406 const union tgsi_exec_channel
*src1
)
408 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
409 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
410 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
411 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
416 union tgsi_exec_channel
*dst
,
417 const union tgsi_exec_channel
*src0
,
418 const union tgsi_exec_channel
*src1
,
419 const union tgsi_exec_channel
*src2
,
420 const union tgsi_exec_channel
*src3
)
422 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
423 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
424 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
425 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
430 union tgsi_exec_channel
*dst
,
431 const union tgsi_exec_channel
*src0
,
432 const union tgsi_exec_channel
*src1
,
433 const union tgsi_exec_channel
*src2
,
434 const union tgsi_exec_channel
*src3
)
436 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
437 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
438 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
439 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
444 union tgsi_exec_channel
*dst
,
445 const union tgsi_exec_channel
*src
)
447 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
448 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
449 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
450 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
455 union tgsi_exec_channel
*dst
,
456 const union tgsi_exec_channel
*src
)
458 dst
->i
[0] = (int) src
->f
[0];
459 dst
->i
[1] = (int) src
->f
[1];
460 dst
->i
[2] = (int) src
->f
[2];
461 dst
->i
[3] = (int) src
->f
[3];
466 union tgsi_exec_channel
*dst
,
467 const union tgsi_exec_channel
*src
)
469 dst
->u
[0] = (uint
) src
->f
[0];
470 dst
->u
[1] = (uint
) src
->f
[1];
471 dst
->u
[2] = (uint
) src
->f
[2];
472 dst
->u
[3] = (uint
) src
->f
[3];
477 union tgsi_exec_channel
*dst
,
478 const union tgsi_exec_channel
*src
)
480 dst
->f
[0] = floorf( src
->f
[0] );
481 dst
->f
[1] = floorf( src
->f
[1] );
482 dst
->f
[2] = floorf( src
->f
[2] );
483 dst
->f
[3] = floorf( src
->f
[3] );
488 union tgsi_exec_channel
*dst
,
489 const union tgsi_exec_channel
*src
)
491 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
492 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
493 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
494 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
499 union tgsi_exec_channel
*dst
,
500 const union tgsi_exec_channel
*src0
,
501 const union tgsi_exec_channel
*src1
,
502 const union tgsi_exec_channel
*src2
,
503 const union tgsi_exec_channel
*src3
)
505 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
506 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
507 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
508 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
513 union tgsi_exec_channel
*dst
,
514 const union tgsi_exec_channel
*src
)
516 dst
->f
[0] = (float) src
->i
[0];
517 dst
->f
[1] = (float) src
->i
[1];
518 dst
->f
[2] = (float) src
->i
[2];
519 dst
->f
[3] = (float) src
->i
[3];
524 union tgsi_exec_channel
*dst
,
525 const union tgsi_exec_channel
*src
)
527 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
528 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
529 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
530 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
535 union tgsi_exec_channel
*dst
,
536 const union tgsi_exec_channel
*src0
,
537 const union tgsi_exec_channel
*src1
,
538 const union tgsi_exec_channel
*src2
,
539 const union tgsi_exec_channel
*src3
)
541 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
542 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
543 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
544 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
549 union tgsi_exec_channel
*dst
,
550 const union tgsi_exec_channel
*src0
,
551 const union tgsi_exec_channel
*src1
,
552 const union tgsi_exec_channel
*src2
,
553 const union tgsi_exec_channel
*src3
)
555 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
556 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
557 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
558 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
563 union tgsi_exec_channel
*dst
,
564 const union tgsi_exec_channel
*src0
,
565 const union tgsi_exec_channel
*src1
,
566 const union tgsi_exec_channel
*src2
,
567 const union tgsi_exec_channel
*src3
)
569 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
570 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
571 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
572 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
577 union tgsi_exec_channel
*dst
,
578 const union tgsi_exec_channel
*src0
,
579 const union tgsi_exec_channel
*src1
,
580 const union tgsi_exec_channel
*src2
,
581 const union tgsi_exec_channel
*src3
)
583 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
584 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
585 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
586 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
591 union tgsi_exec_channel
*dst
,
592 const union tgsi_exec_channel
*src0
,
593 const union tgsi_exec_channel
*src1
)
595 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
596 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
597 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
598 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
603 union tgsi_exec_channel
*dst
,
604 const union tgsi_exec_channel
*src0
,
605 const union tgsi_exec_channel
*src1
)
607 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
608 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
609 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
610 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
615 union tgsi_exec_channel
*dst
,
616 const union tgsi_exec_channel
*src0
,
617 const union tgsi_exec_channel
*src1
)
619 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
620 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
621 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
622 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
627 union tgsi_exec_channel
*dst
,
628 const union tgsi_exec_channel
*src0
,
629 const union tgsi_exec_channel
*src1
)
631 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
632 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
633 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
634 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
639 union tgsi_exec_channel
*dst
,
640 const union tgsi_exec_channel
*src0
,
641 const union tgsi_exec_channel
*src1
)
643 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
644 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
645 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
646 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
651 union tgsi_exec_channel
*dst
,
652 const union tgsi_exec_channel
*src0
,
653 const union tgsi_exec_channel
*src1
)
655 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
656 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
657 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
658 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
663 union tgsi_exec_channel
*dst
,
664 const union tgsi_exec_channel
*src0
,
665 const union tgsi_exec_channel
*src1
)
667 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
668 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
669 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
670 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
675 union tgsi_exec_channel
*dst
,
676 const union tgsi_exec_channel
*src0
,
677 const union tgsi_exec_channel
*src1
)
679 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
680 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
681 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
682 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
687 union tgsi_exec_channel
*dst
,
688 const union tgsi_exec_channel
*src0
,
689 const union tgsi_exec_channel
*src1
)
691 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
692 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
693 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
694 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
699 union tgsi_exec_channel
*dst0
,
700 union tgsi_exec_channel
*dst1
,
701 const union tgsi_exec_channel
*src0
,
702 const union tgsi_exec_channel
*src1
)
704 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
705 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
706 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
707 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
716 union tgsi_exec_channel
*dst0
,
717 union tgsi_exec_channel
*dst1
,
718 const union tgsi_exec_channel
*src0
,
719 const union tgsi_exec_channel
*src1
)
721 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
722 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
723 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
724 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
733 union tgsi_exec_channel
*dst
,
734 const union tgsi_exec_channel
*src0
,
735 const union tgsi_exec_channel
*src1
,
736 const union tgsi_exec_channel
*src2
)
738 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
739 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
740 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
741 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
746 union tgsi_exec_channel
*dst
,
747 const union tgsi_exec_channel
*src
)
749 dst
->f
[0] = -src
->f
[0];
750 dst
->f
[1] = -src
->f
[1];
751 dst
->f
[2] = -src
->f
[2];
752 dst
->f
[3] = -src
->f
[3];
757 union tgsi_exec_channel
*dst
,
758 const union tgsi_exec_channel
*src
)
760 dst
->i
[0] = -src
->i
[0];
761 dst
->i
[1] = -src
->i
[1];
762 dst
->i
[2] = -src
->i
[2];
763 dst
->i
[3] = -src
->i
[3];
768 union tgsi_exec_channel
*dst
,
769 const union tgsi_exec_channel
*src
)
771 dst
->u
[0] = ~src
->u
[0];
772 dst
->u
[1] = ~src
->u
[1];
773 dst
->u
[2] = ~src
->u
[2];
774 dst
->u
[3] = ~src
->u
[3];
779 union tgsi_exec_channel
*dst
,
780 const union tgsi_exec_channel
*src0
,
781 const union tgsi_exec_channel
*src1
)
783 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
784 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
785 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
786 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
791 union tgsi_exec_channel
*dst
,
792 const union tgsi_exec_channel
*src0
,
793 const union tgsi_exec_channel
*src1
)
795 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
796 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
797 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
798 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
803 union tgsi_exec_channel
*dst
,
804 const union tgsi_exec_channel
*src
)
806 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
807 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
808 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
809 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
814 union tgsi_exec_channel
*dst
,
815 const union tgsi_exec_channel
*src0
,
816 const union tgsi_exec_channel
*src1
)
818 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
819 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
820 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
821 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
826 union tgsi_exec_channel
*dst
,
827 const union tgsi_exec_channel
*src0
,
828 const union tgsi_exec_channel
*src1
)
830 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
831 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
832 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
833 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
838 union tgsi_exec_channel
*dst
,
839 const union tgsi_exec_channel
*src0
)
841 dst
->f
[0] = (float) (int) src0
->f
[0];
842 dst
->f
[1] = (float) (int) src0
->f
[1];
843 dst
->f
[2] = (float) (int) src0
->f
[2];
844 dst
->f
[3] = (float) (int) src0
->f
[3];
849 union tgsi_exec_channel
*dst
,
850 const union tgsi_exec_channel
*src0
,
851 const union tgsi_exec_channel
*src1
)
853 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
854 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
855 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
856 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
861 union tgsi_exec_channel
*dst
,
862 const union tgsi_exec_channel
*src
)
864 dst
->f
[0] = sinf( src
->f
[0] );
865 dst
->f
[1] = sinf( src
->f
[1] );
866 dst
->f
[2] = sinf( src
->f
[2] );
867 dst
->f
[3] = sinf( src
->f
[3] );
871 micro_sqrt( union tgsi_exec_channel
*dst
,
872 const union tgsi_exec_channel
*src
)
874 dst
->f
[0] = sqrtf( src
->f
[0] );
875 dst
->f
[1] = sqrtf( src
->f
[1] );
876 dst
->f
[2] = sqrtf( src
->f
[2] );
877 dst
->f
[3] = sqrtf( src
->f
[3] );
882 union tgsi_exec_channel
*dst
,
883 const union tgsi_exec_channel
*src0
,
884 const union tgsi_exec_channel
*src1
)
886 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
887 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
888 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
889 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
894 union tgsi_exec_channel
*dst
,
895 const union tgsi_exec_channel
*src
)
897 dst
->f
[0] = (float) src
->u
[0];
898 dst
->f
[1] = (float) src
->u
[1];
899 dst
->f
[2] = (float) src
->u
[2];
900 dst
->f
[3] = (float) src
->u
[3];
905 union tgsi_exec_channel
*dst
,
906 const union tgsi_exec_channel
*src0
,
907 const union tgsi_exec_channel
*src1
)
909 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
910 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
911 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
912 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
916 fetch_src_file_channel(
917 const struct tgsi_exec_machine
*mach
,
920 const union tgsi_exec_channel
*index
,
921 union tgsi_exec_channel
*chan
)
924 case TGSI_EXTSWIZZLE_X
:
925 case TGSI_EXTSWIZZLE_Y
:
926 case TGSI_EXTSWIZZLE_Z
:
927 case TGSI_EXTSWIZZLE_W
:
929 case TGSI_FILE_CONSTANT
:
930 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
931 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
932 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
933 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
936 case TGSI_FILE_INPUT
:
937 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
938 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
939 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
940 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
943 case TGSI_FILE_TEMPORARY
:
944 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
945 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
946 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
947 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
948 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
951 case TGSI_FILE_IMMEDIATE
:
952 assert( index
->i
[0] < (int) mach
->ImmLimit
);
953 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
954 assert( index
->i
[1] < (int) mach
->ImmLimit
);
955 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
956 assert( index
->i
[2] < (int) mach
->ImmLimit
);
957 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
958 assert( index
->i
[3] < (int) mach
->ImmLimit
);
959 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
962 case TGSI_FILE_ADDRESS
:
963 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
964 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
965 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
966 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
969 case TGSI_FILE_OUTPUT
:
970 /* vertex/fragment output vars can be read too */
971 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
972 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
973 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
974 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
982 case TGSI_EXTSWIZZLE_ZERO
:
983 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
986 case TGSI_EXTSWIZZLE_ONE
:
987 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
997 const struct tgsi_exec_machine
*mach
,
998 union tgsi_exec_channel
*chan
,
999 const struct tgsi_full_src_register
*reg
,
1000 const uint chan_index
)
1002 union tgsi_exec_channel index
;
1008 index
.i
[3] = reg
->SrcRegister
.Index
;
1010 if (reg
->SrcRegister
.Indirect
) {
1011 union tgsi_exec_channel index2
;
1012 union tgsi_exec_channel indir_index
;
1017 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
1019 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1020 fetch_src_file_channel(
1022 reg
->SrcRegisterInd
.File
,
1027 index
.i
[0] += indir_index
.i
[0];
1028 index
.i
[1] += indir_index
.i
[1];
1029 index
.i
[2] += indir_index
.i
[2];
1030 index
.i
[3] += indir_index
.i
[3];
1033 if( reg
->SrcRegister
.Dimension
) {
1034 switch( reg
->SrcRegister
.File
) {
1035 case TGSI_FILE_INPUT
:
1041 case TGSI_FILE_CONSTANT
:
1051 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1052 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1053 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1054 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1056 if (reg
->SrcRegisterDim
.Indirect
) {
1057 union tgsi_exec_channel index2
;
1058 union tgsi_exec_channel indir_index
;
1063 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1065 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1066 fetch_src_file_channel(
1068 reg
->SrcRegisterDimInd
.File
,
1073 index
.i
[0] += indir_index
.i
[0];
1074 index
.i
[1] += indir_index
.i
[1];
1075 index
.i
[2] += indir_index
.i
[2];
1076 index
.i
[3] += indir_index
.i
[3];
1080 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1081 fetch_src_file_channel(
1083 reg
->SrcRegister
.File
,
1088 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1089 case TGSI_UTIL_SIGN_CLEAR
:
1090 micro_abs( chan
, chan
);
1093 case TGSI_UTIL_SIGN_SET
:
1094 micro_abs( chan
, chan
);
1095 micro_neg( chan
, chan
);
1098 case TGSI_UTIL_SIGN_TOGGLE
:
1099 micro_neg( chan
, chan
);
1102 case TGSI_UTIL_SIGN_KEEP
:
1106 if (reg
->SrcRegisterExtMod
.Complement
) {
1107 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1113 struct tgsi_exec_machine
*mach
,
1114 const union tgsi_exec_channel
*chan
,
1115 const struct tgsi_full_dst_register
*reg
,
1116 const struct tgsi_full_instruction
*inst
,
1119 union tgsi_exec_channel
*dst
;
1121 switch( reg
->DstRegister
.File
) {
1122 case TGSI_FILE_NULL
:
1125 case TGSI_FILE_OUTPUT
:
1126 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1127 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1130 case TGSI_FILE_TEMPORARY
:
1131 assert(reg
->DstRegister
.Index
< TGSI_EXEC_NUM_TEMPS
);
1132 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1135 case TGSI_FILE_ADDRESS
:
1136 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1144 switch (inst
->Instruction
.Saturate
)
1147 if (mach
->ExecMask
& 0x1)
1148 dst
->i
[0] = chan
->i
[0];
1149 if (mach
->ExecMask
& 0x2)
1150 dst
->i
[1] = chan
->i
[1];
1151 if (mach
->ExecMask
& 0x4)
1152 dst
->i
[2] = chan
->i
[2];
1153 if (mach
->ExecMask
& 0x8)
1154 dst
->i
[3] = chan
->i
[3];
1157 case TGSI_SAT_ZERO_ONE
:
1158 /* XXX need to obey ExecMask here */
1159 micro_max(dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1160 micro_min(dst
, dst
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
1163 case TGSI_SAT_MINUS_PLUS_ONE
:
1172 #define FETCH(VAL,INDEX,CHAN)\
1173 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1175 #define STORE(VAL,INDEX,CHAN)\
1176 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1180 * Execute ARB-style KIL which is predicated by a src register.
1181 * Kill fragment if any of the four values is less than zero.
1184 exec_kilp(struct tgsi_exec_machine
*mach
,
1185 const struct tgsi_full_instruction
*inst
)
1189 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1190 union tgsi_exec_channel r
[1];
1192 /* This mask stores component bits that were already tested. Note that
1193 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1195 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1197 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1202 /* unswizzle channel */
1203 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1204 &inst
->FullSrcRegisters
[0],
1207 /* check if the component has not been already tested */
1208 if (uniquemask
& (1 << swizzle
))
1210 uniquemask
|= 1 << swizzle
;
1212 FETCH(&r
[0], 0, chan_index
);
1213 for (i
= 0; i
< 4; i
++)
1214 if (r
[0].f
[i
] < 0.0f
)
1218 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1223 * Fetch a texel using STR texture coordinates.
1226 fetch_texel( struct tgsi_sampler
*sampler
,
1227 const union tgsi_exec_channel
*s
,
1228 const union tgsi_exec_channel
*t
,
1229 const union tgsi_exec_channel
*p
,
1230 float lodbias
, /* XXX should be float[4] */
1231 union tgsi_exec_channel
*r
,
1232 union tgsi_exec_channel
*g
,
1233 union tgsi_exec_channel
*b
,
1234 union tgsi_exec_channel
*a
)
1237 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1239 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1241 for (j
= 0; j
< 4; j
++) {
1242 r
->f
[j
] = rgba
[0][j
];
1243 g
->f
[j
] = rgba
[1][j
];
1244 b
->f
[j
] = rgba
[2][j
];
1245 a
->f
[j
] = rgba
[3][j
];
1251 exec_tex(struct tgsi_exec_machine
*mach
,
1252 const struct tgsi_full_instruction
*inst
,
1256 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1257 union tgsi_exec_channel r
[8];
1261 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1263 switch (inst
->InstructionExtTexture
.Texture
) {
1264 case TGSI_TEXTURE_1D
:
1266 FETCH(&r
[0], 0, CHAN_X
);
1269 FETCH(&r
[1], 0, CHAN_W
);
1270 micro_div( &r
[0], &r
[0], &r
[1] );
1274 FETCH(&r
[1], 0, CHAN_W
);
1275 lodBias
= r
[2].f
[0];
1280 fetch_texel(&mach
->Samplers
[unit
],
1281 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1282 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1285 case TGSI_TEXTURE_2D
:
1286 case TGSI_TEXTURE_RECT
:
1288 FETCH(&r
[0], 0, CHAN_X
);
1289 FETCH(&r
[1], 0, CHAN_Y
);
1290 FETCH(&r
[2], 0, CHAN_Z
);
1293 FETCH(&r
[3], 0, CHAN_W
);
1294 micro_div( &r
[0], &r
[0], &r
[3] );
1295 micro_div( &r
[1], &r
[1], &r
[3] );
1296 micro_div( &r
[2], &r
[2], &r
[3] );
1300 FETCH(&r
[3], 0, CHAN_W
);
1301 lodBias
= r
[3].f
[0];
1306 fetch_texel(&mach
->Samplers
[unit
],
1307 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1308 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1311 case TGSI_TEXTURE_3D
:
1312 case TGSI_TEXTURE_CUBE
:
1314 FETCH(&r
[0], 0, CHAN_X
);
1315 FETCH(&r
[1], 0, CHAN_Y
);
1316 FETCH(&r
[2], 0, CHAN_Z
);
1319 FETCH(&r
[3], 0, CHAN_W
);
1320 micro_div( &r
[0], &r
[0], &r
[3] );
1321 micro_div( &r
[1], &r
[1], &r
[3] );
1322 micro_div( &r
[2], &r
[2], &r
[3] );
1326 FETCH(&r
[3], 0, CHAN_W
);
1327 lodBias
= r
[3].f
[0];
1332 fetch_texel(&mach
->Samplers
[unit
],
1333 &r
[0], &r
[1], &r
[2], lodBias
,
1334 &r
[0], &r
[1], &r
[2], &r
[3]);
1341 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1342 STORE( &r
[chan_index
], 0, chan_index
);
1348 * Evaluate a constant-valued coefficient at the position of the
1353 struct tgsi_exec_machine
*mach
,
1359 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1360 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1365 * Evaluate a linear-valued coefficient at the position of the
1370 struct tgsi_exec_machine
*mach
,
1374 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1375 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1376 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1377 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1378 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1379 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1380 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1381 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1382 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1386 * Evaluate a perspective-valued coefficient at the position of the
1390 eval_perspective_coef(
1391 struct tgsi_exec_machine
*mach
,
1395 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1396 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1397 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1398 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1399 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1400 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1401 /* divide by W here */
1402 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1403 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1404 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1405 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1409 typedef void (* eval_coef_func
)(
1410 struct tgsi_exec_machine
*mach
,
1416 struct tgsi_exec_machine
*mach
,
1417 const struct tgsi_full_declaration
*decl
)
1419 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1420 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1421 unsigned first
, last
, mask
;
1422 eval_coef_func eval
;
1424 first
= decl
->DeclarationRange
.First
;
1425 last
= decl
->DeclarationRange
.Last
;
1426 mask
= decl
->Declaration
.UsageMask
;
1428 switch( decl
->Declaration
.Interpolate
) {
1429 case TGSI_INTERPOLATE_CONSTANT
:
1430 eval
= eval_constant_coef
;
1433 case TGSI_INTERPOLATE_LINEAR
:
1434 eval
= eval_linear_coef
;
1437 case TGSI_INTERPOLATE_PERSPECTIVE
:
1438 eval
= eval_perspective_coef
;
1445 if( mask
== TGSI_WRITEMASK_XYZW
) {
1448 for( i
= first
; i
<= last
; i
++ ) {
1449 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1457 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1458 if( mask
& (1 << j
) ) {
1459 for( i
= first
; i
<= last
; i
++ ) {
1471 struct tgsi_exec_machine
*mach
,
1472 const struct tgsi_full_instruction
*inst
,
1476 union tgsi_exec_channel r
[8];
1480 switch (inst
->Instruction
.Opcode
) {
1481 case TGSI_OPCODE_ARL
:
1482 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1483 FETCH( &r
[0], 0, chan_index
);
1484 micro_f2it( &r
[0], &r
[0] );
1485 STORE( &r
[0], 0, chan_index
);
1489 case TGSI_OPCODE_MOV
:
1490 case TGSI_OPCODE_SWZ
:
1491 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1492 FETCH( &r
[0], 0, chan_index
);
1493 STORE( &r
[0], 0, chan_index
);
1497 case TGSI_OPCODE_LIT
:
1498 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1499 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1502 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1503 FETCH( &r
[0], 0, CHAN_X
);
1504 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1505 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1506 STORE( &r
[0], 0, CHAN_Y
);
1509 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1510 FETCH( &r
[1], 0, CHAN_Y
);
1511 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1513 FETCH( &r
[2], 0, CHAN_W
);
1514 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1515 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1516 micro_pow( &r
[1], &r
[1], &r
[2] );
1517 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1518 STORE( &r
[0], 0, CHAN_Z
);
1522 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1523 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1527 case TGSI_OPCODE_RCP
:
1528 /* TGSI_OPCODE_RECIP */
1529 FETCH( &r
[0], 0, CHAN_X
);
1530 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1531 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1532 STORE( &r
[0], 0, chan_index
);
1536 case TGSI_OPCODE_RSQ
:
1537 /* TGSI_OPCODE_RECIPSQRT */
1538 FETCH( &r
[0], 0, CHAN_X
);
1539 micro_sqrt( &r
[0], &r
[0] );
1540 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1541 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1542 STORE( &r
[0], 0, chan_index
);
1546 case TGSI_OPCODE_EXP
:
1547 FETCH( &r
[0], 0, CHAN_X
);
1548 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
1549 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1550 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
1551 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
1553 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1554 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
1555 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
1557 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1558 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
1559 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
1561 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1562 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1566 case TGSI_OPCODE_LOG
:
1567 FETCH( &r
[0], 0, CHAN_X
);
1568 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
1569 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
1570 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
1571 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1572 STORE( &r
[0], 0, CHAN_X
);
1574 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1575 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
1576 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
1577 STORE( &r
[0], 0, CHAN_Y
);
1579 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1580 STORE( &r
[1], 0, CHAN_Z
);
1582 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1583 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1587 case TGSI_OPCODE_MUL
:
1588 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1590 FETCH(&r
[0], 0, chan_index
);
1591 FETCH(&r
[1], 1, chan_index
);
1593 micro_mul( &r
[0], &r
[0], &r
[1] );
1595 STORE(&r
[0], 0, chan_index
);
1599 case TGSI_OPCODE_ADD
:
1600 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1601 FETCH( &r
[0], 0, chan_index
);
1602 FETCH( &r
[1], 1, chan_index
);
1603 micro_add( &r
[0], &r
[0], &r
[1] );
1604 STORE( &r
[0], 0, chan_index
);
1608 case TGSI_OPCODE_DP3
:
1609 /* TGSI_OPCODE_DOT3 */
1610 FETCH( &r
[0], 0, CHAN_X
);
1611 FETCH( &r
[1], 1, CHAN_X
);
1612 micro_mul( &r
[0], &r
[0], &r
[1] );
1614 FETCH( &r
[1], 0, CHAN_Y
);
1615 FETCH( &r
[2], 1, CHAN_Y
);
1616 micro_mul( &r
[1], &r
[1], &r
[2] );
1617 micro_add( &r
[0], &r
[0], &r
[1] );
1619 FETCH( &r
[1], 0, CHAN_Z
);
1620 FETCH( &r
[2], 1, CHAN_Z
);
1621 micro_mul( &r
[1], &r
[1], &r
[2] );
1622 micro_add( &r
[0], &r
[0], &r
[1] );
1624 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1625 STORE( &r
[0], 0, chan_index
);
1629 case TGSI_OPCODE_DP4
:
1630 /* TGSI_OPCODE_DOT4 */
1631 FETCH(&r
[0], 0, CHAN_X
);
1632 FETCH(&r
[1], 1, CHAN_X
);
1634 micro_mul( &r
[0], &r
[0], &r
[1] );
1636 FETCH(&r
[1], 0, CHAN_Y
);
1637 FETCH(&r
[2], 1, CHAN_Y
);
1639 micro_mul( &r
[1], &r
[1], &r
[2] );
1640 micro_add( &r
[0], &r
[0], &r
[1] );
1642 FETCH(&r
[1], 0, CHAN_Z
);
1643 FETCH(&r
[2], 1, CHAN_Z
);
1645 micro_mul( &r
[1], &r
[1], &r
[2] );
1646 micro_add( &r
[0], &r
[0], &r
[1] );
1648 FETCH(&r
[1], 0, CHAN_W
);
1649 FETCH(&r
[2], 1, CHAN_W
);
1651 micro_mul( &r
[1], &r
[1], &r
[2] );
1652 micro_add( &r
[0], &r
[0], &r
[1] );
1654 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1655 STORE( &r
[0], 0, chan_index
);
1659 case TGSI_OPCODE_DST
:
1660 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1661 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1664 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1665 FETCH( &r
[0], 0, CHAN_Y
);
1666 FETCH( &r
[1], 1, CHAN_Y
);
1667 micro_mul( &r
[0], &r
[0], &r
[1] );
1668 STORE( &r
[0], 0, CHAN_Y
);
1671 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1672 FETCH( &r
[0], 0, CHAN_Z
);
1673 STORE( &r
[0], 0, CHAN_Z
);
1676 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1677 FETCH( &r
[0], 1, CHAN_W
);
1678 STORE( &r
[0], 0, CHAN_W
);
1682 case TGSI_OPCODE_MIN
:
1683 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1684 FETCH(&r
[0], 0, chan_index
);
1685 FETCH(&r
[1], 1, chan_index
);
1687 /* XXX use micro_min()?? */
1688 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1690 STORE(&r
[0], 0, chan_index
);
1694 case TGSI_OPCODE_MAX
:
1695 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1696 FETCH(&r
[0], 0, chan_index
);
1697 FETCH(&r
[1], 1, chan_index
);
1699 /* XXX use micro_max()?? */
1700 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1702 STORE(&r
[0], 0, chan_index
);
1706 case TGSI_OPCODE_SLT
:
1707 /* TGSI_OPCODE_SETLT */
1708 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1709 FETCH( &r
[0], 0, chan_index
);
1710 FETCH( &r
[1], 1, chan_index
);
1711 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1712 STORE( &r
[0], 0, chan_index
);
1716 case TGSI_OPCODE_SGE
:
1717 /* TGSI_OPCODE_SETGE */
1718 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1719 FETCH( &r
[0], 0, chan_index
);
1720 FETCH( &r
[1], 1, chan_index
);
1721 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1722 STORE( &r
[0], 0, chan_index
);
1726 case TGSI_OPCODE_MAD
:
1727 /* TGSI_OPCODE_MADD */
1728 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1729 FETCH( &r
[0], 0, chan_index
);
1730 FETCH( &r
[1], 1, chan_index
);
1731 micro_mul( &r
[0], &r
[0], &r
[1] );
1732 FETCH( &r
[1], 2, chan_index
);
1733 micro_add( &r
[0], &r
[0], &r
[1] );
1734 STORE( &r
[0], 0, chan_index
);
1738 case TGSI_OPCODE_SUB
:
1739 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1740 FETCH(&r
[0], 0, chan_index
);
1741 FETCH(&r
[1], 1, chan_index
);
1743 micro_sub( &r
[0], &r
[0], &r
[1] );
1745 STORE(&r
[0], 0, chan_index
);
1749 case TGSI_OPCODE_LERP
:
1750 /* TGSI_OPCODE_LRP */
1751 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1752 FETCH(&r
[0], 0, chan_index
);
1753 FETCH(&r
[1], 1, chan_index
);
1754 FETCH(&r
[2], 2, chan_index
);
1756 micro_sub( &r
[1], &r
[1], &r
[2] );
1757 micro_mul( &r
[0], &r
[0], &r
[1] );
1758 micro_add( &r
[0], &r
[0], &r
[2] );
1760 STORE(&r
[0], 0, chan_index
);
1764 case TGSI_OPCODE_CND
:
1768 case TGSI_OPCODE_CND0
:
1772 case TGSI_OPCODE_DOT2ADD
:
1773 /* TGSI_OPCODE_DP2A */
1777 case TGSI_OPCODE_INDEX
:
1781 case TGSI_OPCODE_NEGATE
:
1785 case TGSI_OPCODE_FRAC
:
1786 /* TGSI_OPCODE_FRC */
1787 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1788 FETCH( &r
[0], 0, chan_index
);
1789 micro_frc( &r
[0], &r
[0] );
1790 STORE( &r
[0], 0, chan_index
);
1794 case TGSI_OPCODE_CLAMP
:
1798 case TGSI_OPCODE_FLOOR
:
1799 /* TGSI_OPCODE_FLR */
1800 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1801 FETCH( &r
[0], 0, chan_index
);
1802 micro_flr( &r
[0], &r
[0] );
1803 STORE( &r
[0], 0, chan_index
);
1807 case TGSI_OPCODE_ROUND
:
1808 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1809 FETCH( &r
[0], 0, chan_index
);
1810 micro_rnd( &r
[0], &r
[0] );
1811 STORE( &r
[0], 0, chan_index
);
1815 case TGSI_OPCODE_EXPBASE2
:
1816 /* TGSI_OPCODE_EX2 */
1817 FETCH(&r
[0], 0, CHAN_X
);
1819 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1821 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1822 STORE( &r
[0], 0, chan_index
);
1826 case TGSI_OPCODE_LOGBASE2
:
1827 /* TGSI_OPCODE_LG2 */
1828 FETCH( &r
[0], 0, CHAN_X
);
1829 micro_lg2( &r
[0], &r
[0] );
1830 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1831 STORE( &r
[0], 0, chan_index
);
1835 case TGSI_OPCODE_POWER
:
1836 /* TGSI_OPCODE_POW */
1837 FETCH(&r
[0], 0, CHAN_X
);
1838 FETCH(&r
[1], 1, CHAN_X
);
1840 micro_pow( &r
[0], &r
[0], &r
[1] );
1842 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1843 STORE( &r
[0], 0, chan_index
);
1847 case TGSI_OPCODE_CROSSPRODUCT
:
1848 /* TGSI_OPCODE_XPD */
1849 FETCH(&r
[0], 0, CHAN_Y
);
1850 FETCH(&r
[1], 1, CHAN_Z
);
1852 micro_mul( &r
[2], &r
[0], &r
[1] );
1854 FETCH(&r
[3], 0, CHAN_Z
);
1855 FETCH(&r
[4], 1, CHAN_Y
);
1857 micro_mul( &r
[5], &r
[3], &r
[4] );
1858 micro_sub( &r
[2], &r
[2], &r
[5] );
1860 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1861 STORE( &r
[2], 0, CHAN_X
);
1864 FETCH(&r
[2], 1, CHAN_X
);
1866 micro_mul( &r
[3], &r
[3], &r
[2] );
1868 FETCH(&r
[5], 0, CHAN_X
);
1870 micro_mul( &r
[1], &r
[1], &r
[5] );
1871 micro_sub( &r
[3], &r
[3], &r
[1] );
1873 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1874 STORE( &r
[3], 0, CHAN_Y
);
1877 micro_mul( &r
[5], &r
[5], &r
[4] );
1878 micro_mul( &r
[0], &r
[0], &r
[2] );
1879 micro_sub( &r
[5], &r
[5], &r
[0] );
1881 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1882 STORE( &r
[5], 0, CHAN_Z
);
1885 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1886 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1890 case TGSI_OPCODE_MULTIPLYMATRIX
:
1894 case TGSI_OPCODE_ABS
:
1895 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1896 FETCH(&r
[0], 0, chan_index
);
1898 micro_abs( &r
[0], &r
[0] );
1900 STORE(&r
[0], 0, chan_index
);
1904 case TGSI_OPCODE_RCC
:
1908 case TGSI_OPCODE_DPH
:
1909 FETCH(&r
[0], 0, CHAN_X
);
1910 FETCH(&r
[1], 1, CHAN_X
);
1912 micro_mul( &r
[0], &r
[0], &r
[1] );
1914 FETCH(&r
[1], 0, CHAN_Y
);
1915 FETCH(&r
[2], 1, CHAN_Y
);
1917 micro_mul( &r
[1], &r
[1], &r
[2] );
1918 micro_add( &r
[0], &r
[0], &r
[1] );
1920 FETCH(&r
[1], 0, CHAN_Z
);
1921 FETCH(&r
[2], 1, CHAN_Z
);
1923 micro_mul( &r
[1], &r
[1], &r
[2] );
1924 micro_add( &r
[0], &r
[0], &r
[1] );
1926 FETCH(&r
[1], 1, CHAN_W
);
1928 micro_add( &r
[0], &r
[0], &r
[1] );
1930 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1931 STORE( &r
[0], 0, chan_index
);
1935 case TGSI_OPCODE_COS
:
1936 FETCH(&r
[0], 0, CHAN_X
);
1938 micro_cos( &r
[0], &r
[0] );
1940 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1941 STORE( &r
[0], 0, chan_index
);
1945 case TGSI_OPCODE_DDX
:
1946 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1947 FETCH( &r
[0], 0, chan_index
);
1948 micro_ddx( &r
[0], &r
[0] );
1949 STORE( &r
[0], 0, chan_index
);
1953 case TGSI_OPCODE_DDY
:
1954 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1955 FETCH( &r
[0], 0, chan_index
);
1956 micro_ddy( &r
[0], &r
[0] );
1957 STORE( &r
[0], 0, chan_index
);
1961 case TGSI_OPCODE_KILP
:
1962 exec_kilp (mach
, inst
);
1965 case TGSI_OPCODE_KIL
:
1966 /* for enabled ExecMask bits, set the killed bit */
1967 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= mach
->ExecMask
;
1970 case TGSI_OPCODE_PK2H
:
1974 case TGSI_OPCODE_PK2US
:
1978 case TGSI_OPCODE_PK4B
:
1982 case TGSI_OPCODE_PK4UB
:
1986 case TGSI_OPCODE_RFL
:
1990 case TGSI_OPCODE_SEQ
:
1991 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1992 FETCH( &r
[0], 0, chan_index
);
1993 FETCH( &r
[1], 1, chan_index
);
1994 micro_eq( &r
[0], &r
[0], &r
[1],
1995 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
1996 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1997 STORE( &r
[0], 0, chan_index
);
2001 case TGSI_OPCODE_SFL
:
2005 case TGSI_OPCODE_SGT
:
2006 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2007 FETCH( &r
[0], 0, chan_index
);
2008 FETCH( &r
[1], 1, chan_index
);
2009 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2010 STORE( &r
[0], 0, chan_index
);
2014 case TGSI_OPCODE_SIN
:
2015 FETCH( &r
[0], 0, CHAN_X
);
2016 micro_sin( &r
[0], &r
[0] );
2017 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2018 STORE( &r
[0], 0, chan_index
);
2022 case TGSI_OPCODE_SLE
:
2023 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2024 FETCH( &r
[0], 0, chan_index
);
2025 FETCH( &r
[1], 1, chan_index
);
2026 micro_le( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2027 STORE( &r
[0], 0, chan_index
);
2031 case TGSI_OPCODE_SNE
:
2032 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2033 FETCH( &r
[0], 0, chan_index
);
2034 FETCH( &r
[1], 1, chan_index
);
2035 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
2036 STORE( &r
[0], 0, chan_index
);
2040 case TGSI_OPCODE_STR
:
2044 case TGSI_OPCODE_TEX
:
2045 /* simple texture lookup */
2046 /* src[0] = texcoord */
2047 /* src[1] = sampler unit */
2048 exec_tex(mach
, inst
, FALSE
, FALSE
);
2051 case TGSI_OPCODE_TXB
:
2052 /* Texture lookup with lod bias */
2053 /* src[0] = texcoord (src[0].w = LOD bias) */
2054 /* src[1] = sampler unit */
2055 exec_tex(mach
, inst
, TRUE
, FALSE
);
2058 case TGSI_OPCODE_TXD
:
2059 /* Texture lookup with explict partial derivatives */
2060 /* src[0] = texcoord */
2061 /* src[1] = d[strq]/dx */
2062 /* src[2] = d[strq]/dy */
2063 /* src[3] = sampler unit */
2067 case TGSI_OPCODE_TXL
:
2068 /* Texture lookup with explit LOD */
2069 /* src[0] = texcoord (src[0].w = LOD) */
2070 /* src[1] = sampler unit */
2071 exec_tex(mach
, inst
, TRUE
, FALSE
);
2074 case TGSI_OPCODE_TXP
:
2075 /* Texture lookup with projection */
2076 /* src[0] = texcoord (src[0].w = projection) */
2077 /* src[1] = sampler unit */
2078 exec_tex(mach
, inst
, FALSE
, TRUE
);
2081 case TGSI_OPCODE_UP2H
:
2085 case TGSI_OPCODE_UP2US
:
2089 case TGSI_OPCODE_UP4B
:
2093 case TGSI_OPCODE_UP4UB
:
2097 case TGSI_OPCODE_X2D
:
2101 case TGSI_OPCODE_ARA
:
2105 case TGSI_OPCODE_ARR
:
2109 case TGSI_OPCODE_BRA
:
2113 case TGSI_OPCODE_CAL
:
2114 /* skip the call if no execution channels are enabled */
2115 if (mach
->ExecMask
) {
2118 /* push the Cond, Loop, Cont stacks */
2119 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2120 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2121 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2122 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2123 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2124 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2126 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2127 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2129 /* note that PC was already incremented above */
2130 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2131 *pc
= inst
->InstructionExtLabel
.Label
;
2135 case TGSI_OPCODE_RET
:
2136 mach
->FuncMask
&= ~mach
->ExecMask
;
2137 UPDATE_EXEC_MASK(mach
);
2139 if (mach
->ExecMask
== 0x0) {
2140 /* really return now (otherwise, keep executing */
2142 if (mach
->CallStackTop
== 0) {
2143 /* returning from main() */
2147 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2149 /* pop the Cond, Loop, Cont stacks */
2150 assert(mach
->CondStackTop
> 0);
2151 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2152 assert(mach
->LoopStackTop
> 0);
2153 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2154 assert(mach
->ContStackTop
> 0);
2155 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2156 assert(mach
->FuncStackTop
> 0);
2157 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2159 UPDATE_EXEC_MASK(mach
);
2163 case TGSI_OPCODE_SSG
:
2167 case TGSI_OPCODE_CMP
:
2168 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2169 FETCH(&r
[0], 0, chan_index
);
2170 FETCH(&r
[1], 1, chan_index
);
2171 FETCH(&r
[2], 2, chan_index
);
2173 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2175 STORE(&r
[0], 0, chan_index
);
2179 case TGSI_OPCODE_SCS
:
2180 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2181 FETCH( &r
[0], 0, CHAN_X
);
2183 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2184 micro_cos( &r
[1], &r
[0] );
2185 STORE( &r
[1], 0, CHAN_X
);
2187 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2188 micro_sin( &r
[1], &r
[0] );
2189 STORE( &r
[1], 0, CHAN_Y
);
2191 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2192 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2194 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2195 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2199 case TGSI_OPCODE_NRM
:
2203 case TGSI_OPCODE_DIV
:
2207 case TGSI_OPCODE_DP2
:
2208 FETCH( &r
[0], 0, CHAN_X
);
2209 FETCH( &r
[1], 1, CHAN_X
);
2210 micro_mul( &r
[0], &r
[0], &r
[1] );
2212 FETCH( &r
[1], 0, CHAN_Y
);
2213 FETCH( &r
[2], 1, CHAN_Y
);
2214 micro_mul( &r
[1], &r
[1], &r
[2] );
2215 micro_add( &r
[0], &r
[0], &r
[1] );
2217 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2218 STORE( &r
[0], 0, chan_index
);
2222 case TGSI_OPCODE_IF
:
2224 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2225 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2226 FETCH( &r
[0], 0, CHAN_X
);
2227 /* update CondMask */
2229 mach
->CondMask
&= ~0x1;
2232 mach
->CondMask
&= ~0x2;
2235 mach
->CondMask
&= ~0x4;
2238 mach
->CondMask
&= ~0x8;
2240 UPDATE_EXEC_MASK(mach
);
2241 /* Todo: If CondMask==0, jump to ELSE */
2244 case TGSI_OPCODE_ELSE
:
2245 /* invert CondMask wrt previous mask */
2248 assert(mach
->CondStackTop
> 0);
2249 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2250 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2251 UPDATE_EXEC_MASK(mach
);
2252 /* Todo: If CondMask==0, jump to ENDIF */
2256 case TGSI_OPCODE_ENDIF
:
2258 assert(mach
->CondStackTop
> 0);
2259 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2260 UPDATE_EXEC_MASK(mach
);
2263 case TGSI_OPCODE_END
:
2264 /* halt execution */
2268 case TGSI_OPCODE_REP
:
2272 case TGSI_OPCODE_ENDREP
:
2276 case TGSI_OPCODE_PUSHA
:
2280 case TGSI_OPCODE_POPA
:
2284 case TGSI_OPCODE_CEIL
:
2285 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2286 FETCH( &r
[0], 0, chan_index
);
2287 micro_ceil( &r
[0], &r
[0] );
2288 STORE( &r
[0], 0, chan_index
);
2292 case TGSI_OPCODE_I2F
:
2293 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2294 FETCH( &r
[0], 0, chan_index
);
2295 micro_i2f( &r
[0], &r
[0] );
2296 STORE( &r
[0], 0, chan_index
);
2300 case TGSI_OPCODE_NOT
:
2301 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2302 FETCH( &r
[0], 0, chan_index
);
2303 micro_not( &r
[0], &r
[0] );
2304 STORE( &r
[0], 0, chan_index
);
2308 case TGSI_OPCODE_TRUNC
:
2309 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2310 FETCH( &r
[0], 0, chan_index
);
2311 micro_trunc( &r
[0], &r
[0] );
2312 STORE( &r
[0], 0, chan_index
);
2316 case TGSI_OPCODE_SHL
:
2317 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2318 FETCH( &r
[0], 0, chan_index
);
2319 FETCH( &r
[1], 1, chan_index
);
2320 micro_shl( &r
[0], &r
[0], &r
[1] );
2321 STORE( &r
[0], 0, chan_index
);
2325 case TGSI_OPCODE_SHR
:
2326 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2327 FETCH( &r
[0], 0, chan_index
);
2328 FETCH( &r
[1], 1, chan_index
);
2329 micro_ishr( &r
[0], &r
[0], &r
[1] );
2330 STORE( &r
[0], 0, chan_index
);
2334 case TGSI_OPCODE_AND
:
2335 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2336 FETCH( &r
[0], 0, chan_index
);
2337 FETCH( &r
[1], 1, chan_index
);
2338 micro_and( &r
[0], &r
[0], &r
[1] );
2339 STORE( &r
[0], 0, chan_index
);
2343 case TGSI_OPCODE_OR
:
2344 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2345 FETCH( &r
[0], 0, chan_index
);
2346 FETCH( &r
[1], 1, chan_index
);
2347 micro_or( &r
[0], &r
[0], &r
[1] );
2348 STORE( &r
[0], 0, chan_index
);
2352 case TGSI_OPCODE_MOD
:
2356 case TGSI_OPCODE_XOR
:
2357 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2358 FETCH( &r
[0], 0, chan_index
);
2359 FETCH( &r
[1], 1, chan_index
);
2360 micro_xor( &r
[0], &r
[0], &r
[1] );
2361 STORE( &r
[0], 0, chan_index
);
2365 case TGSI_OPCODE_SAD
:
2369 case TGSI_OPCODE_TXF
:
2373 case TGSI_OPCODE_TXQ
:
2377 case TGSI_OPCODE_EMIT
:
2378 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2379 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2382 case TGSI_OPCODE_ENDPRIM
:
2383 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2384 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2387 case TGSI_OPCODE_LOOP
:
2388 /* fall-through (for now) */
2389 case TGSI_OPCODE_BGNLOOP2
:
2390 /* push LoopMask and ContMasks */
2391 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2392 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2393 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2394 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2397 case TGSI_OPCODE_ENDLOOP
:
2398 /* fall-through (for now at least) */
2399 case TGSI_OPCODE_ENDLOOP2
:
2400 /* Restore ContMask, but don't pop */
2401 assert(mach
->ContStackTop
> 0);
2402 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2403 UPDATE_EXEC_MASK(mach
);
2404 if (mach
->ExecMask
) {
2405 /* repeat loop: jump to instruction just past BGNLOOP */
2406 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2409 /* exit loop: pop LoopMask */
2410 assert(mach
->LoopStackTop
> 0);
2411 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2413 assert(mach
->ContStackTop
> 0);
2414 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2416 UPDATE_EXEC_MASK(mach
);
2419 case TGSI_OPCODE_BRK
:
2420 /* turn off loop channels for each enabled exec channel */
2421 mach
->LoopMask
&= ~mach
->ExecMask
;
2422 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2423 UPDATE_EXEC_MASK(mach
);
2426 case TGSI_OPCODE_CONT
:
2427 /* turn off cont channels for each enabled exec channel */
2428 mach
->ContMask
&= ~mach
->ExecMask
;
2429 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2430 UPDATE_EXEC_MASK(mach
);
2433 case TGSI_OPCODE_BGNSUB
:
2437 case TGSI_OPCODE_ENDSUB
:
2441 case TGSI_OPCODE_NOISE1
:
2445 case TGSI_OPCODE_NOISE2
:
2449 case TGSI_OPCODE_NOISE3
:
2453 case TGSI_OPCODE_NOISE4
:
2457 case TGSI_OPCODE_NOP
:
2467 * Run TGSI interpreter.
2468 * \return bitmask of "alive" quad components
2471 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2476 mach
->CondMask
= 0xf;
2477 mach
->LoopMask
= 0xf;
2478 mach
->ContMask
= 0xf;
2479 mach
->FuncMask
= 0xf;
2480 mach
->ExecMask
= 0xf;
2482 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2483 assert(mach
->CondStackTop
== 0);
2484 assert(mach
->LoopStackTop
== 0);
2485 assert(mach
->ContStackTop
== 0);
2486 assert(mach
->CallStackTop
== 0);
2488 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2489 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2491 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2492 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2493 mach
->Primitives
[0] = 0;
2497 /* execute declarations (interpolants) */
2498 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2499 exec_declaration( mach
, mach
->Declarations
+i
);
2502 /* execute instructions, until pc is set to -1 */
2504 assert(pc
< (int) mach
->NumInstructions
);
2505 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2509 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2510 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2512 * Scale back depth component.
2514 for (i
= 0; i
< 4; i
++)
2515 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2519 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];