1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * TGSI interpretor/executor.
31 * Flow control information:
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "pipe/tgsi/util/tgsi_parse.h"
58 #include "pipe/tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
124 tgsi_exec_prepare( struct tgsi_exec_machine
*mach
)
126 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
127 struct tgsi_parse_context parse
;
128 struct tgsi_full_instruction
*instructions
;
129 struct tgsi_full_declaration
*declarations
;
130 uint maxInstructions
= 10, numInstructions
= 0;
131 uint maxDeclarations
= 10, numDeclarations
= 0;
138 declarations
= (struct tgsi_full_declaration
*)
139 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
141 instructions
= (struct tgsi_full_instruction
*)
142 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
144 k
= tgsi_parse_init( &parse
, mach
->Tokens
);
145 if (k
!= TGSI_PARSE_OK
) {
146 debug_printf("Problem parsing!\n");
150 while( !tgsi_parse_end_of_tokens( &parse
) ) {
151 uint pointer
= parse
.Position
;
154 tgsi_parse_token( &parse
);
155 switch( parse
.FullToken
.Token
.Type
) {
156 case TGSI_TOKEN_TYPE_DECLARATION
:
157 /* save expanded declaration */
158 if (numDeclarations
== maxDeclarations
) {
159 declarations
= REALLOC(declarations
,
161 * sizeof(struct tgsi_full_instruction
),
162 (maxDeclarations
+ 10)
163 * sizeof(struct tgsi_full_instruction
));
164 maxDeclarations
+= 10;
166 memcpy(declarations
+ numDeclarations
,
167 &parse
.FullToken
.FullInstruction
,
168 sizeof(declarations
[0]));
172 case TGSI_TOKEN_TYPE_IMMEDIATE
:
174 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1;
175 assert( size
% 4 == 0 );
176 assert( mach
->ImmLimit
+ size
/ 4 <= TGSI_EXEC_NUM_IMMEDIATES
);
178 for( i
= 0; i
< size
; i
++ ) {
179 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] = parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
181 mach
->ImmLimit
+= size
/ 4;
185 case TGSI_TOKEN_TYPE_INSTRUCTION
:
186 assert( labels
->count
< 128 );
188 labels
->labels
[labels
->count
][0] = instno
;
189 labels
->labels
[labels
->count
][1] = pointer
;
192 /* save expanded instruction */
193 if (numInstructions
== maxInstructions
) {
194 instructions
= REALLOC(instructions
,
196 * sizeof(struct tgsi_full_instruction
),
197 (maxInstructions
+ 10)
198 * sizeof(struct tgsi_full_instruction
));
199 maxInstructions
+= 10;
201 memcpy(instructions
+ numInstructions
,
202 &parse
.FullToken
.FullInstruction
,
203 sizeof(instructions
[0]));
211 tgsi_parse_free (&parse
);
213 if (mach
->Declarations
) {
214 FREE( mach
->Declarations
);
216 mach
->Declarations
= declarations
;
217 mach
->NumDeclarations
= numDeclarations
;
219 if (mach
->Instructions
) {
220 FREE( mach
->Instructions
);
222 mach
->Instructions
= instructions
;
223 mach
->NumInstructions
= numInstructions
;
228 * Initialize machine state by expanding tokens to full instructions,
229 * allocating temporary storage, setting up constants, etc.
230 * After this, we can call tgsi_exec_machine_run() many times.
233 tgsi_exec_machine_init(
234 struct tgsi_exec_machine
*mach
,
235 const struct tgsi_token
*tokens
,
237 struct tgsi_sampler
*samplers
)
240 struct tgsi_parse_context parse
;
243 tgsi_dump(tokens
, 0);
246 mach
->Tokens
= tokens
;
248 mach
->Samplers
= samplers
;
250 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
251 if (k
!= TGSI_PARSE_OK
) {
252 debug_printf( "Problem parsing!\n" );
256 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
257 tgsi_parse_free (&parse
);
259 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
260 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_NUM_TEMPS
];
262 /* Setup constants. */
263 for( i
= 0; i
< 4; i
++ ) {
264 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
265 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
266 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
267 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
268 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
269 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
270 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
271 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
274 tgsi_exec_prepare( mach
);
279 tgsi_exec_machine_free_data(struct tgsi_exec_machine
*mach
)
281 if (mach
->Instructions
) {
282 FREE(mach
->Instructions
);
283 mach
->Instructions
= NULL
;
284 mach
->NumInstructions
= 0;
286 if (mach
->Declarations
) {
287 FREE(mach
->Declarations
);
288 mach
->Declarations
= NULL
;
289 mach
->NumDeclarations
= 0;
296 union tgsi_exec_channel
*dst
,
297 const union tgsi_exec_channel
*src
)
299 dst
->f
[0] = (float) fabs( (double) src
->f
[0] );
300 dst
->f
[1] = (float) fabs( (double) src
->f
[1] );
301 dst
->f
[2] = (float) fabs( (double) src
->f
[2] );
302 dst
->f
[3] = (float) fabs( (double) src
->f
[3] );
307 union tgsi_exec_channel
*dst
,
308 const union tgsi_exec_channel
*src0
,
309 const union tgsi_exec_channel
*src1
)
311 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
312 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
313 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
314 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
319 union tgsi_exec_channel
*dst
,
320 const union tgsi_exec_channel
*src0
,
321 const union tgsi_exec_channel
*src1
)
323 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
324 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
325 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
326 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
331 union tgsi_exec_channel
*dst
,
332 const union tgsi_exec_channel
*src0
,
333 const union tgsi_exec_channel
*src1
)
335 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
336 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
337 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
338 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
343 union tgsi_exec_channel
*dst
,
344 const union tgsi_exec_channel
*src
)
346 dst
->f
[0] = (float) ceil( (double) src
->f
[0] );
347 dst
->f
[1] = (float) ceil( (double) src
->f
[1] );
348 dst
->f
[2] = (float) ceil( (double) src
->f
[2] );
349 dst
->f
[3] = (float) ceil( (double) src
->f
[3] );
354 union tgsi_exec_channel
*dst
,
355 const union tgsi_exec_channel
*src
)
357 dst
->f
[0] = (float) cos( (double) src
->f
[0] );
358 dst
->f
[1] = (float) cos( (double) src
->f
[1] );
359 dst
->f
[2] = (float) cos( (double) src
->f
[2] );
360 dst
->f
[3] = (float) cos( (double) src
->f
[3] );
365 union tgsi_exec_channel
*dst
,
366 const union tgsi_exec_channel
*src
)
371 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
376 union tgsi_exec_channel
*dst
,
377 const union tgsi_exec_channel
*src
)
382 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
387 union tgsi_exec_channel
*dst
,
388 const union tgsi_exec_channel
*src0
,
389 const union tgsi_exec_channel
*src1
)
391 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
392 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
393 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
394 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
399 union tgsi_exec_channel
*dst
,
400 const union tgsi_exec_channel
*src0
,
401 const union tgsi_exec_channel
*src1
)
403 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
404 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
405 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
406 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
411 union tgsi_exec_channel
*dst
,
412 const union tgsi_exec_channel
*src0
,
413 const union tgsi_exec_channel
*src1
,
414 const union tgsi_exec_channel
*src2
,
415 const union tgsi_exec_channel
*src3
)
417 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
418 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
419 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
420 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
425 union tgsi_exec_channel
*dst
,
426 const union tgsi_exec_channel
*src0
,
427 const union tgsi_exec_channel
*src1
,
428 const union tgsi_exec_channel
*src2
,
429 const union tgsi_exec_channel
*src3
)
431 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
432 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
433 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
434 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
439 union tgsi_exec_channel
*dst
,
440 const union tgsi_exec_channel
*src
)
442 dst
->f
[0] = (float) pow( 2.0, (double) src
->f
[0] );
443 dst
->f
[1] = (float) pow( 2.0, (double) src
->f
[1] );
444 dst
->f
[2] = (float) pow( 2.0, (double) src
->f
[2] );
445 dst
->f
[3] = (float) pow( 2.0, (double) src
->f
[3] );
450 union tgsi_exec_channel
*dst
,
451 const union tgsi_exec_channel
*src
)
453 dst
->i
[0] = (int) src
->f
[0];
454 dst
->i
[1] = (int) src
->f
[1];
455 dst
->i
[2] = (int) src
->f
[2];
456 dst
->i
[3] = (int) src
->f
[3];
461 union tgsi_exec_channel
*dst
,
462 const union tgsi_exec_channel
*src
)
464 dst
->u
[0] = (uint
) src
->f
[0];
465 dst
->u
[1] = (uint
) src
->f
[1];
466 dst
->u
[2] = (uint
) src
->f
[2];
467 dst
->u
[3] = (uint
) src
->f
[3];
472 union tgsi_exec_channel
*dst
,
473 const union tgsi_exec_channel
*src
)
475 dst
->f
[0] = (float) floor( (double) src
->f
[0] );
476 dst
->f
[1] = (float) floor( (double) src
->f
[1] );
477 dst
->f
[2] = (float) floor( (double) src
->f
[2] );
478 dst
->f
[3] = (float) floor( (double) src
->f
[3] );
483 union tgsi_exec_channel
*dst
,
484 const union tgsi_exec_channel
*src
)
486 dst
->f
[0] = src
->f
[0] - (float) floor( (double) src
->f
[0] );
487 dst
->f
[1] = src
->f
[1] - (float) floor( (double) src
->f
[1] );
488 dst
->f
[2] = src
->f
[2] - (float) floor( (double) src
->f
[2] );
489 dst
->f
[3] = src
->f
[3] - (float) floor( (double) src
->f
[3] );
494 union tgsi_exec_channel
*dst
,
495 const union tgsi_exec_channel
*src0
,
496 const union tgsi_exec_channel
*src1
,
497 const union tgsi_exec_channel
*src2
,
498 const union tgsi_exec_channel
*src3
)
500 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
501 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
502 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
503 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
508 union tgsi_exec_channel
*dst
,
509 const union tgsi_exec_channel
*src
)
511 dst
->f
[0] = (float) src
->i
[0];
512 dst
->f
[1] = (float) src
->i
[1];
513 dst
->f
[2] = (float) src
->i
[2];
514 dst
->f
[3] = (float) src
->i
[3];
519 union tgsi_exec_channel
*dst
,
520 const union tgsi_exec_channel
*src
)
522 dst
->f
[0] = (float) log( (double) src
->f
[0] ) * 1.442695f
;
523 dst
->f
[1] = (float) log( (double) src
->f
[1] ) * 1.442695f
;
524 dst
->f
[2] = (float) log( (double) src
->f
[2] ) * 1.442695f
;
525 dst
->f
[3] = (float) log( (double) src
->f
[3] ) * 1.442695f
;
530 union tgsi_exec_channel
*dst
,
531 const union tgsi_exec_channel
*src0
,
532 const union tgsi_exec_channel
*src1
,
533 const union tgsi_exec_channel
*src2
,
534 const union tgsi_exec_channel
*src3
)
536 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
537 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
538 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
539 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
544 union tgsi_exec_channel
*dst
,
545 const union tgsi_exec_channel
*src0
,
546 const union tgsi_exec_channel
*src1
,
547 const union tgsi_exec_channel
*src2
,
548 const union tgsi_exec_channel
*src3
)
550 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
551 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
552 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
553 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
558 union tgsi_exec_channel
*dst
,
559 const union tgsi_exec_channel
*src0
,
560 const union tgsi_exec_channel
*src1
,
561 const union tgsi_exec_channel
*src2
,
562 const union tgsi_exec_channel
*src3
)
564 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
565 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
566 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
567 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
572 union tgsi_exec_channel
*dst
,
573 const union tgsi_exec_channel
*src0
,
574 const union tgsi_exec_channel
*src1
)
576 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
577 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
578 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
579 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
584 union tgsi_exec_channel
*dst
,
585 const union tgsi_exec_channel
*src0
,
586 const union tgsi_exec_channel
*src1
)
588 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
589 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
590 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
591 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
596 union tgsi_exec_channel
*dst
,
597 const union tgsi_exec_channel
*src0
,
598 const union tgsi_exec_channel
*src1
)
600 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
601 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
602 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
603 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
608 union tgsi_exec_channel
*dst
,
609 const union tgsi_exec_channel
*src0
,
610 const union tgsi_exec_channel
*src1
)
612 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
613 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
614 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
615 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
620 union tgsi_exec_channel
*dst
,
621 const union tgsi_exec_channel
*src0
,
622 const union tgsi_exec_channel
*src1
)
624 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
625 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
626 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
627 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
632 union tgsi_exec_channel
*dst
,
633 const union tgsi_exec_channel
*src0
,
634 const union tgsi_exec_channel
*src1
)
636 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
637 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
638 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
639 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
644 union tgsi_exec_channel
*dst
,
645 const union tgsi_exec_channel
*src0
,
646 const union tgsi_exec_channel
*src1
)
648 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
649 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
650 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
651 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
656 union tgsi_exec_channel
*dst
,
657 const union tgsi_exec_channel
*src0
,
658 const union tgsi_exec_channel
*src1
)
660 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
661 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
662 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
663 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
668 union tgsi_exec_channel
*dst
,
669 const union tgsi_exec_channel
*src0
,
670 const union tgsi_exec_channel
*src1
)
672 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
673 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
674 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
675 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
680 union tgsi_exec_channel
*dst0
,
681 union tgsi_exec_channel
*dst1
,
682 const union tgsi_exec_channel
*src0
,
683 const union tgsi_exec_channel
*src1
)
685 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
686 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
687 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
688 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
697 union tgsi_exec_channel
*dst0
,
698 union tgsi_exec_channel
*dst1
,
699 const union tgsi_exec_channel
*src0
,
700 const union tgsi_exec_channel
*src1
)
702 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
703 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
704 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
705 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
714 union tgsi_exec_channel
*dst
,
715 const union tgsi_exec_channel
*src0
,
716 const union tgsi_exec_channel
*src1
,
717 const union tgsi_exec_channel
*src2
)
719 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
720 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
721 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
722 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
727 union tgsi_exec_channel
*dst
,
728 const union tgsi_exec_channel
*src
)
730 dst
->f
[0] = -src
->f
[0];
731 dst
->f
[1] = -src
->f
[1];
732 dst
->f
[2] = -src
->f
[2];
733 dst
->f
[3] = -src
->f
[3];
738 union tgsi_exec_channel
*dst
,
739 const union tgsi_exec_channel
*src
)
741 dst
->i
[0] = -src
->i
[0];
742 dst
->i
[1] = -src
->i
[1];
743 dst
->i
[2] = -src
->i
[2];
744 dst
->i
[3] = -src
->i
[3];
749 union tgsi_exec_channel
*dst
,
750 const union tgsi_exec_channel
*src
)
752 dst
->u
[0] = ~src
->u
[0];
753 dst
->u
[1] = ~src
->u
[1];
754 dst
->u
[2] = ~src
->u
[2];
755 dst
->u
[3] = ~src
->u
[3];
760 union tgsi_exec_channel
*dst
,
761 const union tgsi_exec_channel
*src0
,
762 const union tgsi_exec_channel
*src1
)
764 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
765 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
766 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
767 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
772 union tgsi_exec_channel
*dst
,
773 const union tgsi_exec_channel
*src0
,
774 const union tgsi_exec_channel
*src1
)
776 dst
->f
[0] = (float) pow( (double) src0
->f
[0], (double) src1
->f
[0] );
777 dst
->f
[1] = (float) pow( (double) src0
->f
[1], (double) src1
->f
[1] );
778 dst
->f
[2] = (float) pow( (double) src0
->f
[2], (double) src1
->f
[2] );
779 dst
->f
[3] = (float) pow( (double) src0
->f
[3], (double) src1
->f
[3] );
784 union tgsi_exec_channel
*dst
,
785 const union tgsi_exec_channel
*src
)
787 dst
->f
[0] = (float) floor( (double) (src
->f
[0] + 0.5f
) );
788 dst
->f
[1] = (float) floor( (double) (src
->f
[1] + 0.5f
) );
789 dst
->f
[2] = (float) floor( (double) (src
->f
[2] + 0.5f
) );
790 dst
->f
[3] = (float) floor( (double) (src
->f
[3] + 0.5f
) );
795 union tgsi_exec_channel
*dst
,
796 const union tgsi_exec_channel
*src0
,
797 const union tgsi_exec_channel
*src1
)
799 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
800 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
801 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
802 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
807 union tgsi_exec_channel
*dst
,
808 const union tgsi_exec_channel
*src0
,
809 const union tgsi_exec_channel
*src1
)
811 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
812 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
813 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
814 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
819 union tgsi_exec_channel
*dst
,
820 const union tgsi_exec_channel
*src0
)
822 dst
->f
[0] = (float) (int) src0
->f
[0];
823 dst
->f
[1] = (float) (int) src0
->f
[1];
824 dst
->f
[2] = (float) (int) src0
->f
[2];
825 dst
->f
[3] = (float) (int) src0
->f
[3];
830 union tgsi_exec_channel
*dst
,
831 const union tgsi_exec_channel
*src0
,
832 const union tgsi_exec_channel
*src1
)
834 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
835 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
836 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
837 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
842 union tgsi_exec_channel
*dst
,
843 const union tgsi_exec_channel
*src
)
845 dst
->f
[0] = (float) sin( (double) src
->f
[0] );
846 dst
->f
[1] = (float) sin( (double) src
->f
[1] );
847 dst
->f
[2] = (float) sin( (double) src
->f
[2] );
848 dst
->f
[3] = (float) sin( (double) src
->f
[3] );
852 micro_sqrt( union tgsi_exec_channel
*dst
,
853 const union tgsi_exec_channel
*src
)
855 dst
->f
[0] = (float) sqrt( (double) src
->f
[0] );
856 dst
->f
[1] = (float) sqrt( (double) src
->f
[1] );
857 dst
->f
[2] = (float) sqrt( (double) src
->f
[2] );
858 dst
->f
[3] = (float) sqrt( (double) src
->f
[3] );
863 union tgsi_exec_channel
*dst
,
864 const union tgsi_exec_channel
*src0
,
865 const union tgsi_exec_channel
*src1
)
867 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
868 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
869 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
870 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
875 union tgsi_exec_channel
*dst
,
876 const union tgsi_exec_channel
*src
)
878 dst
->f
[0] = (float) src
->u
[0];
879 dst
->f
[1] = (float) src
->u
[1];
880 dst
->f
[2] = (float) src
->u
[2];
881 dst
->f
[3] = (float) src
->u
[3];
886 union tgsi_exec_channel
*dst
,
887 const union tgsi_exec_channel
*src0
,
888 const union tgsi_exec_channel
*src1
)
890 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
891 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
892 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
893 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
897 fetch_src_file_channel(
898 const struct tgsi_exec_machine
*mach
,
901 const union tgsi_exec_channel
*index
,
902 union tgsi_exec_channel
*chan
)
905 case TGSI_EXTSWIZZLE_X
:
906 case TGSI_EXTSWIZZLE_Y
:
907 case TGSI_EXTSWIZZLE_Z
:
908 case TGSI_EXTSWIZZLE_W
:
910 case TGSI_FILE_CONSTANT
:
911 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
912 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
913 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
914 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
917 case TGSI_FILE_INPUT
:
918 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
919 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
920 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
921 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
924 case TGSI_FILE_TEMPORARY
:
925 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
926 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
927 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
928 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
931 case TGSI_FILE_IMMEDIATE
:
932 assert( index
->i
[0] < (int) mach
->ImmLimit
);
933 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
934 assert( index
->i
[1] < (int) mach
->ImmLimit
);
935 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
936 assert( index
->i
[2] < (int) mach
->ImmLimit
);
937 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
938 assert( index
->i
[3] < (int) mach
->ImmLimit
);
939 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
942 case TGSI_FILE_ADDRESS
:
943 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
944 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
945 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
946 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
949 case TGSI_FILE_OUTPUT
:
950 /* vertex/fragment output vars can be read too */
951 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
952 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
953 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
954 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
962 case TGSI_EXTSWIZZLE_ZERO
:
963 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
966 case TGSI_EXTSWIZZLE_ONE
:
967 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
977 const struct tgsi_exec_machine
*mach
,
978 union tgsi_exec_channel
*chan
,
979 const struct tgsi_full_src_register
*reg
,
980 const uint chan_index
)
982 union tgsi_exec_channel index
;
988 index
.i
[3] = reg
->SrcRegister
.Index
;
990 if (reg
->SrcRegister
.Indirect
) {
991 union tgsi_exec_channel index2
;
992 union tgsi_exec_channel indir_index
;
997 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
999 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
1000 fetch_src_file_channel(
1002 reg
->SrcRegisterInd
.File
,
1007 index
.i
[0] += indir_index
.i
[0];
1008 index
.i
[1] += indir_index
.i
[1];
1009 index
.i
[2] += indir_index
.i
[2];
1010 index
.i
[3] += indir_index
.i
[3];
1013 if( reg
->SrcRegister
.Dimension
) {
1014 switch( reg
->SrcRegister
.File
) {
1015 case TGSI_FILE_INPUT
:
1021 case TGSI_FILE_CONSTANT
:
1031 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
1032 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
1033 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
1034 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
1036 if (reg
->SrcRegisterDim
.Indirect
) {
1037 union tgsi_exec_channel index2
;
1038 union tgsi_exec_channel indir_index
;
1043 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
1045 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
1046 fetch_src_file_channel(
1048 reg
->SrcRegisterDimInd
.File
,
1053 index
.i
[0] += indir_index
.i
[0];
1054 index
.i
[1] += indir_index
.i
[1];
1055 index
.i
[2] += indir_index
.i
[2];
1056 index
.i
[3] += indir_index
.i
[3];
1060 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1061 fetch_src_file_channel(
1063 reg
->SrcRegister
.File
,
1068 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
1069 case TGSI_UTIL_SIGN_CLEAR
:
1070 micro_abs( chan
, chan
);
1073 case TGSI_UTIL_SIGN_SET
:
1074 micro_abs( chan
, chan
);
1075 micro_neg( chan
, chan
);
1078 case TGSI_UTIL_SIGN_TOGGLE
:
1079 micro_neg( chan
, chan
);
1082 case TGSI_UTIL_SIGN_KEEP
:
1086 if (reg
->SrcRegisterExtMod
.Complement
) {
1087 micro_sub( chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
);
1093 struct tgsi_exec_machine
*mach
,
1094 const union tgsi_exec_channel
*chan
,
1095 const struct tgsi_full_dst_register
*reg
,
1096 const struct tgsi_full_instruction
*inst
,
1099 union tgsi_exec_channel
*dst
;
1101 switch( reg
->DstRegister
.File
) {
1102 case TGSI_FILE_NULL
:
1105 case TGSI_FILE_OUTPUT
:
1106 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1107 + reg
->DstRegister
.Index
].xyzw
[chan_index
];
1110 case TGSI_FILE_TEMPORARY
:
1111 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1114 case TGSI_FILE_ADDRESS
:
1115 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
1123 switch (inst
->Instruction
.Saturate
)
1126 if (mach
->ExecMask
& 0x1)
1127 dst
->i
[0] = chan
->i
[0];
1128 if (mach
->ExecMask
& 0x2)
1129 dst
->i
[1] = chan
->i
[1];
1130 if (mach
->ExecMask
& 0x4)
1131 dst
->i
[2] = chan
->i
[2];
1132 if (mach
->ExecMask
& 0x8)
1133 dst
->i
[3] = chan
->i
[3];
1136 case TGSI_SAT_ZERO_ONE
:
1137 /* XXX need to obey ExecMask here */
1138 micro_max(dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
1139 micro_min(dst
, dst
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
1142 case TGSI_SAT_MINUS_PLUS_ONE
:
1151 #define FETCH(VAL,INDEX,CHAN)\
1152 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1154 #define STORE(VAL,INDEX,CHAN)\
1155 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1159 * Execute ARB-style KIL which is predicated by a src register.
1160 * Kill fragment if any of the four values is less than zero.
1163 exec_kilp(struct tgsi_exec_machine
*mach
,
1164 const struct tgsi_full_instruction
*inst
)
1168 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1169 union tgsi_exec_channel r
[1];
1171 /* This mask stores component bits that were already tested. Note that
1172 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1174 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1176 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1181 /* unswizzle channel */
1182 swizzle
= tgsi_util_get_full_src_register_extswizzle (
1183 &inst
->FullSrcRegisters
[0],
1186 /* check if the component has not been already tested */
1187 if (uniquemask
& (1 << swizzle
))
1189 uniquemask
|= 1 << swizzle
;
1191 FETCH(&r
[0], 0, chan_index
);
1192 for (i
= 0; i
< 4; i
++)
1193 if (r
[0].f
[i
] < 0.0f
)
1197 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1202 * Fetch a texel using STR texture coordinates.
1205 fetch_texel( struct tgsi_sampler
*sampler
,
1206 const union tgsi_exec_channel
*s
,
1207 const union tgsi_exec_channel
*t
,
1208 const union tgsi_exec_channel
*p
,
1209 float lodbias
, /* XXX should be float[4] */
1210 union tgsi_exec_channel
*r
,
1211 union tgsi_exec_channel
*g
,
1212 union tgsi_exec_channel
*b
,
1213 union tgsi_exec_channel
*a
)
1216 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1218 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1220 for (j
= 0; j
< 4; j
++) {
1221 r
->f
[j
] = rgba
[0][j
];
1222 g
->f
[j
] = rgba
[1][j
];
1223 b
->f
[j
] = rgba
[2][j
];
1224 a
->f
[j
] = rgba
[3][j
];
1230 exec_tex(struct tgsi_exec_machine
*mach
,
1231 const struct tgsi_full_instruction
*inst
,
1234 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1235 union tgsi_exec_channel r
[8];
1239 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1241 switch (inst
->InstructionExtTexture
.Texture
) {
1242 case TGSI_TEXTURE_1D
:
1244 FETCH(&r
[0], 0, CHAN_X
);
1246 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1247 case TGSI_EXTSWIZZLE_W
:
1248 FETCH(&r
[1], 0, CHAN_W
);
1249 micro_div( &r
[0], &r
[0], &r
[1] );
1252 case TGSI_EXTSWIZZLE_ONE
:
1260 FETCH(&r
[1], 0, CHAN_W
);
1261 lodBias
= r
[2].f
[0];
1266 fetch_texel(&mach
->Samplers
[unit
],
1267 &r
[0], NULL
, NULL
, lodBias
, /* S, T, P, BIAS */
1268 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1271 case TGSI_TEXTURE_2D
:
1272 case TGSI_TEXTURE_RECT
:
1274 FETCH(&r
[0], 0, CHAN_X
);
1275 FETCH(&r
[1], 0, CHAN_Y
);
1276 FETCH(&r
[2], 0, CHAN_Z
);
1278 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1279 case TGSI_EXTSWIZZLE_W
:
1280 FETCH(&r
[3], 0, CHAN_W
);
1281 micro_div( &r
[0], &r
[0], &r
[3] );
1282 micro_div( &r
[1], &r
[1], &r
[3] );
1283 micro_div( &r
[2], &r
[2], &r
[3] );
1286 case TGSI_EXTSWIZZLE_ONE
:
1294 FETCH(&r
[3], 0, CHAN_W
);
1295 lodBias
= r
[3].f
[0];
1300 fetch_texel(&mach
->Samplers
[unit
],
1301 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1302 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1305 case TGSI_TEXTURE_3D
:
1306 case TGSI_TEXTURE_CUBE
:
1308 FETCH(&r
[0], 0, CHAN_X
);
1309 FETCH(&r
[1], 0, CHAN_Y
);
1310 FETCH(&r
[2], 0, CHAN_Z
);
1312 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1313 case TGSI_EXTSWIZZLE_W
:
1314 FETCH(&r
[3], 0, CHAN_W
);
1315 micro_div( &r
[0], &r
[0], &r
[3] );
1316 micro_div( &r
[1], &r
[1], &r
[3] );
1317 micro_div( &r
[2], &r
[2], &r
[3] );
1320 case TGSI_EXTSWIZZLE_ONE
:
1328 FETCH(&r
[3], 0, CHAN_W
);
1329 lodBias
= r
[3].f
[0];
1334 fetch_texel(&mach
->Samplers
[unit
],
1335 &r
[0], &r
[1], &r
[2], lodBias
,
1336 &r
[0], &r
[1], &r
[2], &r
[3]);
1343 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1344 STORE( &r
[chan_index
], 0, chan_index
);
1351 constant_interpolation(
1352 struct tgsi_exec_machine
*mach
,
1358 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1359 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1364 linear_interpolation(
1365 struct tgsi_exec_machine
*mach
,
1369 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1370 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1371 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1372 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1373 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1374 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1375 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1376 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1377 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1381 perspective_interpolation(
1382 struct tgsi_exec_machine
*mach
,
1386 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1387 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1388 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1389 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1390 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1391 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1392 /* divide by W here */
1393 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1394 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1395 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1396 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1400 typedef void (* interpolation_func
)(
1401 struct tgsi_exec_machine
*mach
,
1407 struct tgsi_exec_machine
*mach
,
1408 const struct tgsi_full_declaration
*decl
)
1410 if( mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1411 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1412 unsigned first
, last
, mask
;
1413 interpolation_func interp
;
1415 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
1417 first
= decl
->u
.DeclarationRange
.First
;
1418 last
= decl
->u
.DeclarationRange
.Last
;
1419 mask
= decl
->Declaration
.UsageMask
;
1421 switch( decl
->Interpolation
.Interpolate
) {
1422 case TGSI_INTERPOLATE_CONSTANT
:
1423 interp
= constant_interpolation
;
1426 case TGSI_INTERPOLATE_LINEAR
:
1427 interp
= linear_interpolation
;
1430 case TGSI_INTERPOLATE_PERSPECTIVE
:
1431 interp
= perspective_interpolation
;
1438 if( mask
== TGSI_WRITEMASK_XYZW
) {
1441 for( i
= first
; i
<= last
; i
++ ) {
1442 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1443 interp( mach
, i
, j
);
1450 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1451 if( mask
& (1 << j
) ) {
1452 for( i
= first
; i
<= last
; i
++ ) {
1453 interp( mach
, i
, j
);
1464 struct tgsi_exec_machine
*mach
,
1465 const struct tgsi_full_instruction
*inst
,
1469 union tgsi_exec_channel r
[8];
1473 switch (inst
->Instruction
.Opcode
) {
1474 case TGSI_OPCODE_ARL
:
1475 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1476 FETCH( &r
[0], 0, chan_index
);
1477 micro_f2it( &r
[0], &r
[0] );
1478 STORE( &r
[0], 0, chan_index
);
1482 case TGSI_OPCODE_MOV
:
1483 /* TGSI_OPCODE_SWZ */
1484 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1485 FETCH( &r
[0], 0, chan_index
);
1486 STORE( &r
[0], 0, chan_index
);
1490 case TGSI_OPCODE_LIT
:
1491 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1492 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1495 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1496 FETCH( &r
[0], 0, CHAN_X
);
1497 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1498 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1499 STORE( &r
[0], 0, CHAN_Y
);
1502 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1503 FETCH( &r
[1], 0, CHAN_Y
);
1504 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1506 FETCH( &r
[2], 0, CHAN_W
);
1507 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1508 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1509 micro_pow( &r
[1], &r
[1], &r
[2] );
1510 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1511 STORE( &r
[0], 0, CHAN_Z
);
1515 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1516 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1520 case TGSI_OPCODE_RCP
:
1521 /* TGSI_OPCODE_RECIP */
1522 FETCH( &r
[0], 0, CHAN_X
);
1523 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1524 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1525 STORE( &r
[0], 0, chan_index
);
1529 case TGSI_OPCODE_RSQ
:
1530 /* TGSI_OPCODE_RECIPSQRT */
1531 FETCH( &r
[0], 0, CHAN_X
);
1532 micro_sqrt( &r
[0], &r
[0] );
1533 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1534 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1535 STORE( &r
[0], 0, chan_index
);
1539 case TGSI_OPCODE_EXP
:
1543 case TGSI_OPCODE_LOG
:
1547 case TGSI_OPCODE_MUL
:
1548 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1550 FETCH(&r
[0], 0, chan_index
);
1551 FETCH(&r
[1], 1, chan_index
);
1553 micro_mul( &r
[0], &r
[0], &r
[1] );
1555 STORE(&r
[0], 0, chan_index
);
1559 case TGSI_OPCODE_ADD
:
1560 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1561 FETCH( &r
[0], 0, chan_index
);
1562 FETCH( &r
[1], 1, chan_index
);
1563 micro_add( &r
[0], &r
[0], &r
[1] );
1564 STORE( &r
[0], 0, chan_index
);
1568 case TGSI_OPCODE_DP3
:
1569 /* TGSI_OPCODE_DOT3 */
1570 FETCH( &r
[0], 0, CHAN_X
);
1571 FETCH( &r
[1], 1, CHAN_X
);
1572 micro_mul( &r
[0], &r
[0], &r
[1] );
1574 FETCH( &r
[1], 0, CHAN_Y
);
1575 FETCH( &r
[2], 1, CHAN_Y
);
1576 micro_mul( &r
[1], &r
[1], &r
[2] );
1577 micro_add( &r
[0], &r
[0], &r
[1] );
1579 FETCH( &r
[1], 0, CHAN_Z
);
1580 FETCH( &r
[2], 1, CHAN_Z
);
1581 micro_mul( &r
[1], &r
[1], &r
[2] );
1582 micro_add( &r
[0], &r
[0], &r
[1] );
1584 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1585 STORE( &r
[0], 0, chan_index
);
1589 case TGSI_OPCODE_DP4
:
1590 /* TGSI_OPCODE_DOT4 */
1591 FETCH(&r
[0], 0, CHAN_X
);
1592 FETCH(&r
[1], 1, CHAN_X
);
1594 micro_mul( &r
[0], &r
[0], &r
[1] );
1596 FETCH(&r
[1], 0, CHAN_Y
);
1597 FETCH(&r
[2], 1, CHAN_Y
);
1599 micro_mul( &r
[1], &r
[1], &r
[2] );
1600 micro_add( &r
[0], &r
[0], &r
[1] );
1602 FETCH(&r
[1], 0, CHAN_Z
);
1603 FETCH(&r
[2], 1, CHAN_Z
);
1605 micro_mul( &r
[1], &r
[1], &r
[2] );
1606 micro_add( &r
[0], &r
[0], &r
[1] );
1608 FETCH(&r
[1], 0, CHAN_W
);
1609 FETCH(&r
[2], 1, CHAN_W
);
1611 micro_mul( &r
[1], &r
[1], &r
[2] );
1612 micro_add( &r
[0], &r
[0], &r
[1] );
1614 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1615 STORE( &r
[0], 0, chan_index
);
1619 case TGSI_OPCODE_DST
:
1620 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1621 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1624 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1625 FETCH( &r
[0], 0, CHAN_Y
);
1626 FETCH( &r
[1], 1, CHAN_Y
);
1627 micro_mul( &r
[0], &r
[0], &r
[1] );
1628 STORE( &r
[0], 0, CHAN_Y
);
1631 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1632 FETCH( &r
[0], 0, CHAN_Z
);
1633 STORE( &r
[0], 0, CHAN_Z
);
1636 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1637 FETCH( &r
[0], 1, CHAN_W
);
1638 STORE( &r
[0], 0, CHAN_W
);
1642 case TGSI_OPCODE_MIN
:
1643 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1644 FETCH(&r
[0], 0, chan_index
);
1645 FETCH(&r
[1], 1, chan_index
);
1647 /* XXX use micro_min()?? */
1648 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1650 STORE(&r
[0], 0, chan_index
);
1654 case TGSI_OPCODE_MAX
:
1655 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1656 FETCH(&r
[0], 0, chan_index
);
1657 FETCH(&r
[1], 1, chan_index
);
1659 /* XXX use micro_max()?? */
1660 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1662 STORE(&r
[0], 0, chan_index
);
1666 case TGSI_OPCODE_SLT
:
1667 /* TGSI_OPCODE_SETLT */
1668 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1669 FETCH( &r
[0], 0, chan_index
);
1670 FETCH( &r
[1], 1, chan_index
);
1671 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1672 STORE( &r
[0], 0, chan_index
);
1676 case TGSI_OPCODE_SGE
:
1677 /* TGSI_OPCODE_SETGE */
1678 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1679 FETCH( &r
[0], 0, chan_index
);
1680 FETCH( &r
[1], 1, chan_index
);
1681 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1682 STORE( &r
[0], 0, chan_index
);
1686 case TGSI_OPCODE_MAD
:
1687 /* TGSI_OPCODE_MADD */
1688 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1689 FETCH( &r
[0], 0, chan_index
);
1690 FETCH( &r
[1], 1, chan_index
);
1691 micro_mul( &r
[0], &r
[0], &r
[1] );
1692 FETCH( &r
[1], 2, chan_index
);
1693 micro_add( &r
[0], &r
[0], &r
[1] );
1694 STORE( &r
[0], 0, chan_index
);
1698 case TGSI_OPCODE_SUB
:
1699 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1700 FETCH(&r
[0], 0, chan_index
);
1701 FETCH(&r
[1], 1, chan_index
);
1703 micro_sub( &r
[0], &r
[0], &r
[1] );
1705 STORE(&r
[0], 0, chan_index
);
1709 case TGSI_OPCODE_LERP
:
1710 /* TGSI_OPCODE_LRP */
1711 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1712 FETCH(&r
[0], 0, chan_index
);
1713 FETCH(&r
[1], 1, chan_index
);
1714 FETCH(&r
[2], 2, chan_index
);
1716 micro_sub( &r
[1], &r
[1], &r
[2] );
1717 micro_mul( &r
[0], &r
[0], &r
[1] );
1718 micro_add( &r
[0], &r
[0], &r
[2] );
1720 STORE(&r
[0], 0, chan_index
);
1724 case TGSI_OPCODE_CND
:
1728 case TGSI_OPCODE_CND0
:
1732 case TGSI_OPCODE_DOT2ADD
:
1733 /* TGSI_OPCODE_DP2A */
1737 case TGSI_OPCODE_INDEX
:
1741 case TGSI_OPCODE_NEGATE
:
1745 case TGSI_OPCODE_FRAC
:
1746 /* TGSI_OPCODE_FRC */
1747 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1748 FETCH( &r
[0], 0, chan_index
);
1749 micro_frc( &r
[0], &r
[0] );
1750 STORE( &r
[0], 0, chan_index
);
1754 case TGSI_OPCODE_CLAMP
:
1758 case TGSI_OPCODE_FLOOR
:
1759 /* TGSI_OPCODE_FLR */
1760 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1761 FETCH( &r
[0], 0, chan_index
);
1762 micro_flr( &r
[0], &r
[0] );
1763 STORE( &r
[0], 0, chan_index
);
1767 case TGSI_OPCODE_ROUND
:
1768 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1769 FETCH( &r
[0], 0, chan_index
);
1770 micro_rnd( &r
[0], &r
[0] );
1771 STORE( &r
[0], 0, chan_index
);
1775 case TGSI_OPCODE_EXPBASE2
:
1776 /* TGSI_OPCODE_EX2 */
1777 FETCH(&r
[0], 0, CHAN_X
);
1779 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1781 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1782 STORE( &r
[0], 0, chan_index
);
1786 case TGSI_OPCODE_LOGBASE2
:
1787 /* TGSI_OPCODE_LG2 */
1788 FETCH( &r
[0], 0, CHAN_X
);
1789 micro_lg2( &r
[0], &r
[0] );
1790 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1791 STORE( &r
[0], 0, chan_index
);
1795 case TGSI_OPCODE_POWER
:
1796 /* TGSI_OPCODE_POW */
1797 FETCH(&r
[0], 0, CHAN_X
);
1798 FETCH(&r
[1], 1, CHAN_X
);
1800 micro_pow( &r
[0], &r
[0], &r
[1] );
1802 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1803 STORE( &r
[0], 0, chan_index
);
1807 case TGSI_OPCODE_CROSSPRODUCT
:
1808 /* TGSI_OPCODE_XPD */
1809 FETCH(&r
[0], 0, CHAN_Y
);
1810 FETCH(&r
[1], 1, CHAN_Z
);
1812 micro_mul( &r
[2], &r
[0], &r
[1] );
1814 FETCH(&r
[3], 0, CHAN_Z
);
1815 FETCH(&r
[4], 1, CHAN_Y
);
1817 micro_mul( &r
[5], &r
[3], &r
[4] );
1818 micro_sub( &r
[2], &r
[2], &r
[5] );
1820 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1821 STORE( &r
[2], 0, CHAN_X
);
1824 FETCH(&r
[2], 1, CHAN_X
);
1826 micro_mul( &r
[3], &r
[3], &r
[2] );
1828 FETCH(&r
[5], 0, CHAN_X
);
1830 micro_mul( &r
[1], &r
[1], &r
[5] );
1831 micro_sub( &r
[3], &r
[3], &r
[1] );
1833 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1834 STORE( &r
[3], 0, CHAN_Y
);
1837 micro_mul( &r
[5], &r
[5], &r
[4] );
1838 micro_mul( &r
[0], &r
[0], &r
[2] );
1839 micro_sub( &r
[5], &r
[5], &r
[0] );
1841 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1842 STORE( &r
[5], 0, CHAN_Z
);
1845 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1846 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1850 case TGSI_OPCODE_MULTIPLYMATRIX
:
1854 case TGSI_OPCODE_ABS
:
1855 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1856 FETCH(&r
[0], 0, chan_index
);
1858 micro_abs( &r
[0], &r
[0] );
1860 STORE(&r
[0], 0, chan_index
);
1864 case TGSI_OPCODE_RCC
:
1868 case TGSI_OPCODE_DPH
:
1869 FETCH(&r
[0], 0, CHAN_X
);
1870 FETCH(&r
[1], 1, CHAN_X
);
1872 micro_mul( &r
[0], &r
[0], &r
[1] );
1874 FETCH(&r
[1], 0, CHAN_Y
);
1875 FETCH(&r
[2], 1, CHAN_Y
);
1877 micro_mul( &r
[1], &r
[1], &r
[2] );
1878 micro_add( &r
[0], &r
[0], &r
[1] );
1880 FETCH(&r
[1], 0, CHAN_Z
);
1881 FETCH(&r
[2], 1, CHAN_Z
);
1883 micro_mul( &r
[1], &r
[1], &r
[2] );
1884 micro_add( &r
[0], &r
[0], &r
[1] );
1886 FETCH(&r
[1], 1, CHAN_W
);
1888 micro_add( &r
[0], &r
[0], &r
[1] );
1890 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1891 STORE( &r
[0], 0, chan_index
);
1895 case TGSI_OPCODE_COS
:
1896 FETCH(&r
[0], 0, CHAN_X
);
1898 micro_cos( &r
[0], &r
[0] );
1900 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1901 STORE( &r
[0], 0, chan_index
);
1905 case TGSI_OPCODE_DDX
:
1906 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1907 FETCH( &r
[0], 0, chan_index
);
1908 micro_ddx( &r
[0], &r
[0] );
1909 STORE( &r
[0], 0, chan_index
);
1913 case TGSI_OPCODE_DDY
:
1914 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1915 FETCH( &r
[0], 0, chan_index
);
1916 micro_ddy( &r
[0], &r
[0] );
1917 STORE( &r
[0], 0, chan_index
);
1921 case TGSI_OPCODE_KILP
:
1922 exec_kilp (mach
, inst
);
1925 case TGSI_OPCODE_KIL
:
1926 /* for enabled ExecMask bits, set the killed bit */
1927 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= mach
->ExecMask
;
1930 case TGSI_OPCODE_PK2H
:
1934 case TGSI_OPCODE_PK2US
:
1938 case TGSI_OPCODE_PK4B
:
1942 case TGSI_OPCODE_PK4UB
:
1946 case TGSI_OPCODE_RFL
:
1950 case TGSI_OPCODE_SEQ
:
1951 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1952 FETCH( &r
[0], 0, chan_index
);
1953 FETCH( &r
[1], 1, chan_index
);
1954 micro_eq( &r
[0], &r
[0], &r
[1],
1955 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
],
1956 &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1957 STORE( &r
[0], 0, chan_index
);
1961 case TGSI_OPCODE_SFL
:
1965 case TGSI_OPCODE_SGT
:
1966 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1967 FETCH( &r
[0], 0, chan_index
);
1968 FETCH( &r
[1], 1, chan_index
);
1969 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1970 STORE( &r
[0], 0, chan_index
);
1974 case TGSI_OPCODE_SIN
:
1975 FETCH( &r
[0], 0, CHAN_X
);
1976 micro_sin( &r
[0], &r
[0] );
1977 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1978 STORE( &r
[0], 0, chan_index
);
1982 case TGSI_OPCODE_SLE
:
1983 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1984 FETCH( &r
[0], 0, chan_index
);
1985 FETCH( &r
[1], 1, chan_index
);
1986 micro_ge( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1987 STORE( &r
[0], 0, chan_index
);
1991 case TGSI_OPCODE_SNE
:
1992 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1993 FETCH( &r
[0], 0, chan_index
);
1994 FETCH( &r
[1], 1, chan_index
);
1995 micro_eq( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1996 STORE( &r
[0], 0, chan_index
);
2000 case TGSI_OPCODE_STR
:
2004 case TGSI_OPCODE_TEX
:
2005 /* simple texture lookup */
2006 /* src[0] = texcoord */
2007 /* src[1] = sampler unit */
2008 exec_tex(mach
, inst
, FALSE
);
2011 case TGSI_OPCODE_TXB
:
2012 /* Texture lookup with lod bias */
2013 /* src[0] = texcoord (src[0].w = LOD bias) */
2014 /* src[1] = sampler unit */
2015 exec_tex(mach
, inst
, TRUE
);
2018 case TGSI_OPCODE_TXD
:
2019 /* Texture lookup with explict partial derivatives */
2020 /* src[0] = texcoord */
2021 /* src[1] = d[strq]/dx */
2022 /* src[2] = d[strq]/dy */
2023 /* src[3] = sampler unit */
2027 case TGSI_OPCODE_TXL
:
2028 /* Texture lookup with explit LOD */
2029 /* src[0] = texcoord (src[0].w = LOD) */
2030 /* src[1] = sampler unit */
2031 exec_tex(mach
, inst
, TRUE
);
2034 case TGSI_OPCODE_UP2H
:
2038 case TGSI_OPCODE_UP2US
:
2042 case TGSI_OPCODE_UP4B
:
2046 case TGSI_OPCODE_UP4UB
:
2050 case TGSI_OPCODE_X2D
:
2054 case TGSI_OPCODE_ARA
:
2058 case TGSI_OPCODE_ARR
:
2062 case TGSI_OPCODE_BRA
:
2066 case TGSI_OPCODE_CAL
:
2067 /* skip the call if no execution channels are enabled */
2068 if (mach
->ExecMask
) {
2071 /* push the Cond, Loop, Cont stacks */
2072 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2073 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2074 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2075 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2076 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2077 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2079 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2080 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2082 /* note that PC was already incremented above */
2083 mach
->CallStack
[mach
->CallStackTop
++] = *pc
;
2084 *pc
= inst
->InstructionExtLabel
.Label
;
2088 case TGSI_OPCODE_RET
:
2089 mach
->FuncMask
&= ~mach
->ExecMask
;
2090 UPDATE_EXEC_MASK(mach
);
2092 if (mach
->ExecMask
== 0x0) {
2093 /* really return now (otherwise, keep executing */
2095 if (mach
->CallStackTop
== 0) {
2096 /* returning from main() */
2100 *pc
= mach
->CallStack
[--mach
->CallStackTop
];
2102 /* pop the Cond, Loop, Cont stacks */
2103 assert(mach
->CondStackTop
> 0);
2104 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2105 assert(mach
->LoopStackTop
> 0);
2106 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2107 assert(mach
->ContStackTop
> 0);
2108 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2109 assert(mach
->FuncStackTop
> 0);
2110 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
2112 UPDATE_EXEC_MASK(mach
);
2116 case TGSI_OPCODE_SSG
:
2120 case TGSI_OPCODE_CMP
:
2121 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2122 FETCH(&r
[0], 0, chan_index
);
2123 FETCH(&r
[1], 1, chan_index
);
2124 FETCH(&r
[2], 2, chan_index
);
2126 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
2128 STORE(&r
[0], 0, chan_index
);
2132 case TGSI_OPCODE_SCS
:
2133 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2134 FETCH( &r
[0], 0, CHAN_X
);
2136 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
2137 micro_cos( &r
[1], &r
[0] );
2138 STORE( &r
[1], 0, CHAN_X
);
2140 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
2141 micro_sin( &r
[1], &r
[0] );
2142 STORE( &r
[1], 0, CHAN_Y
);
2144 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
2145 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
2147 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
2148 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2152 case TGSI_OPCODE_NRM
:
2156 case TGSI_OPCODE_DIV
:
2160 case TGSI_OPCODE_DP2
:
2161 FETCH( &r
[0], 0, CHAN_X
);
2162 FETCH( &r
[1], 1, CHAN_X
);
2163 micro_mul( &r
[0], &r
[0], &r
[1] );
2165 FETCH( &r
[1], 0, CHAN_Y
);
2166 FETCH( &r
[2], 1, CHAN_Y
);
2167 micro_mul( &r
[1], &r
[1], &r
[2] );
2168 micro_add( &r
[0], &r
[0], &r
[1] );
2170 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2171 STORE( &r
[0], 0, chan_index
);
2175 case TGSI_OPCODE_IF
:
2177 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2178 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2179 FETCH( &r
[0], 0, CHAN_X
);
2180 /* update CondMask */
2182 mach
->CondMask
&= ~0x1;
2185 mach
->CondMask
&= ~0x2;
2188 mach
->CondMask
&= ~0x4;
2191 mach
->CondMask
&= ~0x8;
2193 UPDATE_EXEC_MASK(mach
);
2194 /* Todo: If CondMask==0, jump to ELSE */
2197 case TGSI_OPCODE_ELSE
:
2198 /* invert CondMask wrt previous mask */
2201 assert(mach
->CondStackTop
> 0);
2202 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
2203 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
2204 UPDATE_EXEC_MASK(mach
);
2205 /* Todo: If CondMask==0, jump to ENDIF */
2209 case TGSI_OPCODE_ENDIF
:
2211 assert(mach
->CondStackTop
> 0);
2212 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
2213 UPDATE_EXEC_MASK(mach
);
2216 case TGSI_OPCODE_END
:
2217 /* halt execution */
2221 case TGSI_OPCODE_REP
:
2225 case TGSI_OPCODE_ENDREP
:
2229 case TGSI_OPCODE_PUSHA
:
2233 case TGSI_OPCODE_POPA
:
2237 case TGSI_OPCODE_CEIL
:
2238 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2239 FETCH( &r
[0], 0, chan_index
);
2240 micro_ceil( &r
[0], &r
[0] );
2241 STORE( &r
[0], 0, chan_index
);
2245 case TGSI_OPCODE_I2F
:
2246 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2247 FETCH( &r
[0], 0, chan_index
);
2248 micro_i2f( &r
[0], &r
[0] );
2249 STORE( &r
[0], 0, chan_index
);
2253 case TGSI_OPCODE_NOT
:
2254 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2255 FETCH( &r
[0], 0, chan_index
);
2256 micro_not( &r
[0], &r
[0] );
2257 STORE( &r
[0], 0, chan_index
);
2261 case TGSI_OPCODE_TRUNC
:
2262 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2263 FETCH( &r
[0], 0, chan_index
);
2264 micro_trunc( &r
[0], &r
[0] );
2265 STORE( &r
[0], 0, chan_index
);
2269 case TGSI_OPCODE_SHL
:
2270 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2271 FETCH( &r
[0], 0, chan_index
);
2272 FETCH( &r
[1], 1, chan_index
);
2273 micro_shl( &r
[0], &r
[0], &r
[1] );
2274 STORE( &r
[0], 0, chan_index
);
2278 case TGSI_OPCODE_SHR
:
2279 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2280 FETCH( &r
[0], 0, chan_index
);
2281 FETCH( &r
[1], 1, chan_index
);
2282 micro_ishr( &r
[0], &r
[0], &r
[1] );
2283 STORE( &r
[0], 0, chan_index
);
2287 case TGSI_OPCODE_AND
:
2288 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2289 FETCH( &r
[0], 0, chan_index
);
2290 FETCH( &r
[1], 1, chan_index
);
2291 micro_and( &r
[0], &r
[0], &r
[1] );
2292 STORE( &r
[0], 0, chan_index
);
2296 case TGSI_OPCODE_OR
:
2297 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2298 FETCH( &r
[0], 0, chan_index
);
2299 FETCH( &r
[1], 1, chan_index
);
2300 micro_or( &r
[0], &r
[0], &r
[1] );
2301 STORE( &r
[0], 0, chan_index
);
2305 case TGSI_OPCODE_MOD
:
2309 case TGSI_OPCODE_XOR
:
2310 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2311 FETCH( &r
[0], 0, chan_index
);
2312 FETCH( &r
[1], 1, chan_index
);
2313 micro_xor( &r
[0], &r
[0], &r
[1] );
2314 STORE( &r
[0], 0, chan_index
);
2318 case TGSI_OPCODE_SAD
:
2322 case TGSI_OPCODE_TXF
:
2326 case TGSI_OPCODE_TXQ
:
2330 case TGSI_OPCODE_EMIT
:
2331 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2332 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2335 case TGSI_OPCODE_ENDPRIM
:
2336 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2337 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2340 case TGSI_OPCODE_LOOP
:
2341 /* fall-through (for now) */
2342 case TGSI_OPCODE_BGNLOOP2
:
2343 /* push LoopMask and ContMasks */
2344 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2345 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2346 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2347 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2350 case TGSI_OPCODE_ENDLOOP
:
2351 /* fall-through (for now at least) */
2352 case TGSI_OPCODE_ENDLOOP2
:
2353 /* Restore ContMask, but don't pop */
2354 assert(mach
->ContStackTop
> 0);
2355 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
2356 if (mach
->LoopMask
) {
2357 /* repeat loop: jump to instruction just past BGNLOOP */
2358 *pc
= inst
->InstructionExtLabel
.Label
+ 1;
2361 /* exit loop: pop LoopMask */
2362 assert(mach
->LoopStackTop
> 0);
2363 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
2365 assert(mach
->ContStackTop
> 0);
2366 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
2368 UPDATE_EXEC_MASK(mach
);
2371 case TGSI_OPCODE_BRK
:
2372 /* turn off loop channels for each enabled exec channel */
2373 mach
->LoopMask
&= ~mach
->ExecMask
;
2374 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2375 UPDATE_EXEC_MASK(mach
);
2378 case TGSI_OPCODE_CONT
:
2379 /* turn off cont channels for each enabled exec channel */
2380 mach
->ContMask
&= ~mach
->ExecMask
;
2381 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2382 UPDATE_EXEC_MASK(mach
);
2385 case TGSI_OPCODE_BGNSUB
:
2389 case TGSI_OPCODE_ENDSUB
:
2393 case TGSI_OPCODE_NOISE1
:
2397 case TGSI_OPCODE_NOISE2
:
2401 case TGSI_OPCODE_NOISE3
:
2405 case TGSI_OPCODE_NOISE4
:
2409 case TGSI_OPCODE_NOP
:
2419 * Run TGSI interpreter.
2420 * \return bitmask of "alive" quad components
2423 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
2428 mach
->CondMask
= 0xf;
2429 mach
->LoopMask
= 0xf;
2430 mach
->ContMask
= 0xf;
2431 mach
->FuncMask
= 0xf;
2432 mach
->ExecMask
= 0xf;
2434 mach
->CondStackTop
= 0; /* temporarily subvert this assertion */
2435 assert(mach
->CondStackTop
== 0);
2436 assert(mach
->LoopStackTop
== 0);
2437 assert(mach
->ContStackTop
== 0);
2438 assert(mach
->CallStackTop
== 0);
2440 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2441 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2443 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2444 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2445 mach
->Primitives
[0] = 0;
2449 /* execute declarations (interpolants) */
2450 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
2451 exec_declaration( mach
, mach
->Declarations
+i
);
2454 /* execute instructions, until pc is set to -1 */
2456 assert(pc
< mach
->NumInstructions
);
2457 exec_instruction( mach
, mach
->Instructions
+ pc
, &pc
);
2461 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2462 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2464 * Scale back depth component.
2466 for (i
= 0; i
< 4; i
++)
2467 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
2471 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];