1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
30 * TGSI interpreter/executor.
32 * Flow control information:
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
68 micro_iabs(union tgsi_exec_channel
*dst
,
69 const union tgsi_exec_channel
*src
)
71 dst
->i
[0] = src
->i
[0] >= 0 ? src
->i
[0] : -src
->i
[0];
72 dst
->i
[1] = src
->i
[1] >= 0 ? src
->i
[1] : -src
->i
[1];
73 dst
->i
[2] = src
->i
[2] >= 0 ? src
->i
[2] : -src
->i
[2];
74 dst
->i
[3] = src
->i
[3] >= 0 ? src
->i
[3] : -src
->i
[3];
78 micro_ineg(union tgsi_exec_channel
*dst
,
79 const union tgsi_exec_channel
*src
)
81 dst
->i
[0] = -src
->i
[0];
82 dst
->i
[1] = -src
->i
[1];
83 dst
->i
[2] = -src
->i
[2];
84 dst
->i
[3] = -src
->i
[3];
88 micro_mov(union tgsi_exec_channel
*dst
,
89 const union tgsi_exec_channel
*src
)
91 dst
->u
[0] = src
->u
[0];
92 dst
->u
[1] = src
->u
[1];
93 dst
->u
[2] = src
->u
[2];
94 dst
->u
[3] = src
->u
[3];
97 #define TILE_TOP_LEFT 0
98 #define TILE_TOP_RIGHT 1
99 #define TILE_BOTTOM_LEFT 2
100 #define TILE_BOTTOM_RIGHT 3
107 enum tgsi_exec_datatype
{
108 TGSI_EXEC_DATA_FLOAT
,
114 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
116 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
117 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
118 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
119 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
120 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
121 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
122 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
123 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
124 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
125 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
126 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
127 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
128 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
129 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
130 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
131 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
132 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
133 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
134 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
135 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
136 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
137 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
138 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
139 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
140 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
141 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
142 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
143 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
144 #define TEMP_R0 TGSI_EXEC_TEMP_R0
145 #define TEMP_P0 TGSI_EXEC_TEMP_P0
147 #define IS_CHANNEL_ENABLED(INST, CHAN)\
148 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
150 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
151 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
153 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
154 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
155 if (IS_CHANNEL_ENABLED( INST, CHAN ))
157 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
158 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
159 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
162 /** The execution mask depends on the conditional mask and the loop mask */
163 #define UPDATE_EXEC_MASK(MACH) \
164 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
167 static const union tgsi_exec_channel ZeroVec
=
168 { { 0.0, 0.0, 0.0, 0.0 } };
171 #define CHECK_INF_OR_NAN(chan) do {\
172 assert(!util_is_inf_or_nan((chan)->f[0]));\
173 assert(!util_is_inf_or_nan((chan)->f[1]));\
174 assert(!util_is_inf_or_nan((chan)->f[2]));\
175 assert(!util_is_inf_or_nan((chan)->f[3]));\
181 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
183 debug_printf("%s = {%f, %f, %f, %f}\n",
184 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
191 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
193 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
195 debug_printf("Temp[%u] =\n", index
);
196 for (i
= 0; i
< 4; i
++) {
197 debug_printf(" %c: { %f, %f, %f, %f }\n",
209 * Check if there's a potential src/dst register data dependency when
210 * using SOA execution.
213 * This would expand into:
218 * The second instruction will have the wrong value for t0 if executed as-is.
221 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
225 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
226 if (writemask
== TGSI_WRITEMASK_X
||
227 writemask
== TGSI_WRITEMASK_Y
||
228 writemask
== TGSI_WRITEMASK_Z
||
229 writemask
== TGSI_WRITEMASK_W
||
230 writemask
== TGSI_WRITEMASK_NONE
) {
231 /* no chance of data dependency */
235 /* loop over src regs */
236 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
237 if ((inst
->Src
[i
].Register
.File
==
238 inst
->Dst
[0].Register
.File
) &&
239 (inst
->Src
[i
].Register
.Index
==
240 inst
->Dst
[0].Register
.Index
)) {
241 /* loop over dest channels */
242 uint channelsWritten
= 0x0;
243 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
244 /* check if we're reading a channel that's been written */
245 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
246 if (channelsWritten
& (1 << swizzle
)) {
250 channelsWritten
|= (1 << chan
);
259 * Initialize machine state by expanding tokens to full instructions,
260 * allocating temporary storage, setting up constants, etc.
261 * After this, we can call tgsi_exec_machine_run() many times.
264 tgsi_exec_machine_bind_shader(
265 struct tgsi_exec_machine
*mach
,
266 const struct tgsi_token
*tokens
,
268 struct tgsi_sampler
**samplers
)
271 struct tgsi_parse_context parse
;
272 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
273 struct tgsi_full_instruction
*instructions
;
274 struct tgsi_full_declaration
*declarations
;
275 uint maxInstructions
= 10, numInstructions
= 0;
276 uint maxDeclarations
= 10, numDeclarations
= 0;
280 tgsi_dump(tokens
, 0);
285 mach
->Tokens
= tokens
;
286 mach
->Samplers
= samplers
;
288 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
289 if (k
!= TGSI_PARSE_OK
) {
290 debug_printf( "Problem parsing!\n" );
294 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
298 declarations
= (struct tgsi_full_declaration
*)
299 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
305 instructions
= (struct tgsi_full_instruction
*)
306 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
309 FREE( declarations
);
313 while( !tgsi_parse_end_of_tokens( &parse
) ) {
314 uint pointer
= parse
.Position
;
317 tgsi_parse_token( &parse
);
318 switch( parse
.FullToken
.Token
.Type
) {
319 case TGSI_TOKEN_TYPE_DECLARATION
:
320 /* save expanded declaration */
321 if (numDeclarations
== maxDeclarations
) {
322 declarations
= REALLOC(declarations
,
324 * sizeof(struct tgsi_full_declaration
),
325 (maxDeclarations
+ 10)
326 * sizeof(struct tgsi_full_declaration
));
327 maxDeclarations
+= 10;
329 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
331 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
332 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
337 memcpy(declarations
+ numDeclarations
,
338 &parse
.FullToken
.FullDeclaration
,
339 sizeof(declarations
[0]));
343 case TGSI_TOKEN_TYPE_IMMEDIATE
:
345 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
347 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
349 for( i
= 0; i
< size
; i
++ ) {
350 mach
->Imms
[mach
->ImmLimit
][i
] =
351 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
357 case TGSI_TOKEN_TYPE_INSTRUCTION
:
358 assert( labels
->count
< MAX_LABELS
);
360 labels
->labels
[labels
->count
][0] = instno
;
361 labels
->labels
[labels
->count
][1] = pointer
;
364 /* save expanded instruction */
365 if (numInstructions
== maxInstructions
) {
366 instructions
= REALLOC(instructions
,
368 * sizeof(struct tgsi_full_instruction
),
369 (maxInstructions
+ 10)
370 * sizeof(struct tgsi_full_instruction
));
371 maxInstructions
+= 10;
374 memcpy(instructions
+ numInstructions
,
375 &parse
.FullToken
.FullInstruction
,
376 sizeof(instructions
[0]));
381 case TGSI_TOKEN_TYPE_PROPERTY
:
388 tgsi_parse_free (&parse
);
390 if (mach
->Declarations
) {
391 FREE( mach
->Declarations
);
393 mach
->Declarations
= declarations
;
394 mach
->NumDeclarations
= numDeclarations
;
396 if (mach
->Instructions
) {
397 FREE( mach
->Instructions
);
399 mach
->Instructions
= instructions
;
400 mach
->NumInstructions
= numInstructions
;
404 struct tgsi_exec_machine
*
405 tgsi_exec_machine_create( void )
407 struct tgsi_exec_machine
*mach
;
410 mach
= align_malloc( sizeof *mach
, 16 );
414 memset(mach
, 0, sizeof(*mach
));
416 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
417 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
418 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
420 /* Setup constants. */
421 for( i
= 0; i
< 4; i
++ ) {
422 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
423 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
424 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
425 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
426 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
427 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
428 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
429 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
430 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
431 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
435 /* silence warnings */
449 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
452 FREE(mach
->Instructions
);
453 FREE(mach
->Declarations
);
462 union tgsi_exec_channel
*dst
,
463 const union tgsi_exec_channel
*src
)
465 dst
->f
[0] = fabsf( src
->f
[0] );
466 dst
->f
[1] = fabsf( src
->f
[1] );
467 dst
->f
[2] = fabsf( src
->f
[2] );
468 dst
->f
[3] = fabsf( src
->f
[3] );
473 union tgsi_exec_channel
*dst
,
474 const union tgsi_exec_channel
*src0
,
475 const union tgsi_exec_channel
*src1
)
477 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
478 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
479 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
480 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
485 union tgsi_exec_channel
*dst
,
486 const union tgsi_exec_channel
*src
)
488 dst
->f
[0] = ceilf( src
->f
[0] );
489 dst
->f
[1] = ceilf( src
->f
[1] );
490 dst
->f
[2] = ceilf( src
->f
[2] );
491 dst
->f
[3] = ceilf( src
->f
[3] );
496 union tgsi_exec_channel
*dst
,
497 const union tgsi_exec_channel
*src
)
499 dst
->f
[0] = cosf( src
->f
[0] );
500 dst
->f
[1] = cosf( src
->f
[1] );
501 dst
->f
[2] = cosf( src
->f
[2] );
502 dst
->f
[3] = cosf( src
->f
[3] );
507 union tgsi_exec_channel
*dst
,
508 const union tgsi_exec_channel
*src
)
513 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
518 union tgsi_exec_channel
*dst
,
519 const union tgsi_exec_channel
*src
)
524 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
529 union tgsi_exec_channel
*dst
,
530 const union tgsi_exec_channel
*src0
,
531 const union tgsi_exec_channel
*src1
)
533 if (src1
->f
[0] != 0) {
534 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
536 if (src1
->f
[1] != 0) {
537 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
539 if (src1
->f
[2] != 0) {
540 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
542 if (src1
->f
[3] != 0) {
543 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
549 union tgsi_exec_channel
*dst
,
550 const union tgsi_exec_channel
*src0
,
551 const union tgsi_exec_channel
*src1
,
552 const union tgsi_exec_channel
*src2
,
553 const union tgsi_exec_channel
*src3
)
555 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
556 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
557 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
558 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
563 union tgsi_exec_channel
*dst
,
564 const union tgsi_exec_channel
*src
)
567 dst
->f
[0] = util_fast_exp2( src
->f
[0] );
568 dst
->f
[1] = util_fast_exp2( src
->f
[1] );
569 dst
->f
[2] = util_fast_exp2( src
->f
[2] );
570 dst
->f
[3] = util_fast_exp2( src
->f
[3] );
574 /* Inf is okay for this instruction, so clamp it to silence assertions. */
576 union tgsi_exec_channel clamped
;
578 for (i
= 0; i
< 4; i
++) {
579 if (src
->f
[i
] > 127.99999f
) {
580 clamped
.f
[i
] = 127.99999f
;
581 } else if (src
->f
[i
] < -126.99999f
) {
582 clamped
.f
[i
] = -126.99999f
;
584 clamped
.f
[i
] = src
->f
[i
];
590 dst
->f
[0] = powf( 2.0f
, src
->f
[0] );
591 dst
->f
[1] = powf( 2.0f
, src
->f
[1] );
592 dst
->f
[2] = powf( 2.0f
, src
->f
[2] );
593 dst
->f
[3] = powf( 2.0f
, src
->f
[3] );
598 micro_float_clamp(union tgsi_exec_channel
*dst
,
599 const union tgsi_exec_channel
*src
)
603 for (i
= 0; i
< 4; i
++) {
604 if (src
->f
[i
] > 0.0f
) {
605 if (src
->f
[i
] > 1.884467e+019f
)
606 dst
->f
[i
] = 1.884467e+019f
;
607 else if (src
->f
[i
] < 5.42101e-020f
)
608 dst
->f
[i
] = 5.42101e-020f
;
610 dst
->f
[i
] = src
->f
[i
];
613 if (src
->f
[i
] < -1.884467e+019f
)
614 dst
->f
[i
] = -1.884467e+019f
;
615 else if (src
->f
[i
] > -5.42101e-020f
)
616 dst
->f
[i
] = -5.42101e-020f
;
618 dst
->f
[i
] = src
->f
[i
];
625 union tgsi_exec_channel
*dst
,
626 const union tgsi_exec_channel
*src
)
628 dst
->f
[0] = floorf( src
->f
[0] );
629 dst
->f
[1] = floorf( src
->f
[1] );
630 dst
->f
[2] = floorf( src
->f
[2] );
631 dst
->f
[3] = floorf( src
->f
[3] );
636 union tgsi_exec_channel
*dst
,
637 const union tgsi_exec_channel
*src
)
639 dst
->f
[0] = src
->f
[0] - floorf( src
->f
[0] );
640 dst
->f
[1] = src
->f
[1] - floorf( src
->f
[1] );
641 dst
->f
[2] = src
->f
[2] - floorf( src
->f
[2] );
642 dst
->f
[3] = src
->f
[3] - floorf( src
->f
[3] );
647 union tgsi_exec_channel
*dst
,
648 const union tgsi_exec_channel
*src
)
651 dst
->f
[0] = util_fast_log2( src
->f
[0] );
652 dst
->f
[1] = util_fast_log2( src
->f
[1] );
653 dst
->f
[2] = util_fast_log2( src
->f
[2] );
654 dst
->f
[3] = util_fast_log2( src
->f
[3] );
656 dst
->f
[0] = logf( src
->f
[0] ) * 1.442695f
;
657 dst
->f
[1] = logf( src
->f
[1] ) * 1.442695f
;
658 dst
->f
[2] = logf( src
->f
[2] ) * 1.442695f
;
659 dst
->f
[3] = logf( src
->f
[3] ) * 1.442695f
;
665 union tgsi_exec_channel
*dst
,
666 const union tgsi_exec_channel
*src0
,
667 const union tgsi_exec_channel
*src1
,
668 const union tgsi_exec_channel
*src2
,
669 const union tgsi_exec_channel
*src3
)
671 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
672 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
673 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
674 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
679 union tgsi_exec_channel
*dst
,
680 const union tgsi_exec_channel
*src0
,
681 const union tgsi_exec_channel
*src1
,
682 const union tgsi_exec_channel
*src2
,
683 const union tgsi_exec_channel
*src3
)
685 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
686 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
687 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
688 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
693 union tgsi_exec_channel
*dst
,
694 const union tgsi_exec_channel
*src0
,
695 const union tgsi_exec_channel
*src1
)
697 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
698 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
699 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
700 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
705 union tgsi_exec_channel
*dst
,
706 const union tgsi_exec_channel
*src0
,
707 const union tgsi_exec_channel
*src1
)
709 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
710 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
711 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
712 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
717 union tgsi_exec_channel
*dst
,
718 const union tgsi_exec_channel
*src0
,
719 const union tgsi_exec_channel
*src1
)
721 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
722 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
723 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
724 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
730 union tgsi_exec_channel
*dst0
,
731 union tgsi_exec_channel
*dst1
,
732 const union tgsi_exec_channel
*src0
,
733 const union tgsi_exec_channel
*src1
)
735 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
736 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
737 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
738 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
749 union tgsi_exec_channel
*dst0
,
750 union tgsi_exec_channel
*dst1
,
751 const union tgsi_exec_channel
*src0
,
752 const union tgsi_exec_channel
*src1
)
754 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
755 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
756 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
757 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
769 union tgsi_exec_channel
*dst
,
770 const union tgsi_exec_channel
*src0
,
771 const union tgsi_exec_channel
*src1
,
772 const union tgsi_exec_channel
*src2
)
774 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
775 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
776 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
777 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
783 union tgsi_exec_channel
*dst
,
784 const union tgsi_exec_channel
*src
)
786 dst
->f
[0] = -src
->f
[0];
787 dst
->f
[1] = -src
->f
[1];
788 dst
->f
[2] = -src
->f
[2];
789 dst
->f
[3] = -src
->f
[3];
794 union tgsi_exec_channel
*dst
,
795 const union tgsi_exec_channel
*src0
,
796 const union tgsi_exec_channel
*src1
)
799 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
800 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
801 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
802 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
804 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
805 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
806 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
807 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
813 union tgsi_exec_channel
*dst
,
814 const union tgsi_exec_channel
*src
)
816 dst
->f
[0] = floorf( src
->f
[0] + 0.5f
);
817 dst
->f
[1] = floorf( src
->f
[1] + 0.5f
);
818 dst
->f
[2] = floorf( src
->f
[2] + 0.5f
);
819 dst
->f
[3] = floorf( src
->f
[3] + 0.5f
);
824 union tgsi_exec_channel
*dst
,
825 const union tgsi_exec_channel
*src
)
827 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
828 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
829 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
830 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
835 union tgsi_exec_channel
*dst
,
836 const union tgsi_exec_channel
*src0
)
838 dst
->f
[0] = (float) (int) src0
->f
[0];
839 dst
->f
[1] = (float) (int) src0
->f
[1];
840 dst
->f
[2] = (float) (int) src0
->f
[2];
841 dst
->f
[3] = (float) (int) src0
->f
[3];
846 union tgsi_exec_channel
*dst
,
847 const union tgsi_exec_channel
*src
)
849 dst
->f
[0] = sinf( src
->f
[0] );
850 dst
->f
[1] = sinf( src
->f
[1] );
851 dst
->f
[2] = sinf( src
->f
[2] );
852 dst
->f
[3] = sinf( src
->f
[3] );
856 micro_sqrt( union tgsi_exec_channel
*dst
,
857 const union tgsi_exec_channel
*src
)
859 dst
->f
[0] = sqrtf( src
->f
[0] );
860 dst
->f
[1] = sqrtf( src
->f
[1] );
861 dst
->f
[2] = sqrtf( src
->f
[2] );
862 dst
->f
[3] = sqrtf( src
->f
[3] );
867 union tgsi_exec_channel
*dst
,
868 const union tgsi_exec_channel
*src0
,
869 const union tgsi_exec_channel
*src1
)
871 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
872 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
873 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
874 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
878 fetch_src_file_channel(
879 const struct tgsi_exec_machine
*mach
,
882 const union tgsi_exec_channel
*index
,
883 union tgsi_exec_channel
*chan
)
891 case TGSI_FILE_CONSTANT
:
892 assert(mach
->Consts
);
896 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
900 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
904 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
908 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
911 case TGSI_FILE_INPUT
:
912 case TGSI_FILE_SYSTEM_VALUE
:
913 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
914 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
915 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
916 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
919 case TGSI_FILE_TEMPORARY
:
920 assert(index
->i
[0] < TGSI_EXEC_NUM_TEMPS
);
921 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
922 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
923 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
924 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
927 case TGSI_FILE_IMMEDIATE
:
928 assert( index
->i
[0] < (int) mach
->ImmLimit
);
929 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
930 assert( index
->i
[1] < (int) mach
->ImmLimit
);
931 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
932 assert( index
->i
[2] < (int) mach
->ImmLimit
);
933 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
934 assert( index
->i
[3] < (int) mach
->ImmLimit
);
935 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
938 case TGSI_FILE_ADDRESS
:
939 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
940 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
941 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
942 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
945 case TGSI_FILE_PREDICATE
:
946 assert(index
->i
[0] < TGSI_EXEC_NUM_PREDS
);
947 assert(index
->i
[1] < TGSI_EXEC_NUM_PREDS
);
948 assert(index
->i
[2] < TGSI_EXEC_NUM_PREDS
);
949 assert(index
->i
[3] < TGSI_EXEC_NUM_PREDS
);
950 chan
->u
[0] = mach
->Predicates
[0].xyzw
[swizzle
].u
[0];
951 chan
->u
[1] = mach
->Predicates
[0].xyzw
[swizzle
].u
[1];
952 chan
->u
[2] = mach
->Predicates
[0].xyzw
[swizzle
].u
[2];
953 chan
->u
[3] = mach
->Predicates
[0].xyzw
[swizzle
].u
[3];
956 case TGSI_FILE_OUTPUT
:
957 /* vertex/fragment output vars can be read too */
958 chan
->u
[0] = mach
->Outputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
959 chan
->u
[1] = mach
->Outputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
960 chan
->u
[2] = mach
->Outputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
961 chan
->u
[3] = mach
->Outputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
975 fetch_source(const struct tgsi_exec_machine
*mach
,
976 union tgsi_exec_channel
*chan
,
977 const struct tgsi_full_src_register
*reg
,
978 const uint chan_index
,
979 enum tgsi_exec_datatype src_datatype
)
981 union tgsi_exec_channel index
;
984 /* We start with a direct index into a register file.
988 * file = Register.File
989 * [1] = Register.Index
994 index
.i
[3] = reg
->Register
.Index
;
996 /* There is an extra source register that indirectly subscripts
997 * a register file. The direct index now becomes an offset
998 * that is being added to the indirect register.
1002 * ind = Indirect.File
1003 * [2] = Indirect.Index
1004 * .x = Indirect.SwizzleX
1006 if (reg
->Register
.Indirect
) {
1007 union tgsi_exec_channel index2
;
1008 union tgsi_exec_channel indir_index
;
1009 const uint execmask
= mach
->ExecMask
;
1012 /* which address register (always zero now) */
1016 index2
.i
[3] = reg
->Indirect
.Index
;
1018 /* get current value of address register[swizzle] */
1019 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1020 fetch_src_file_channel(
1027 /* add value of address register to the offset */
1028 index
.i
[0] += (int) indir_index
.f
[0];
1029 index
.i
[1] += (int) indir_index
.f
[1];
1030 index
.i
[2] += (int) indir_index
.f
[2];
1031 index
.i
[3] += (int) indir_index
.f
[3];
1033 /* for disabled execution channels, zero-out the index to
1034 * avoid using a potential garbage value.
1036 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1037 if ((execmask
& (1 << i
)) == 0)
1042 /* There is an extra source register that is a second
1043 * subscript to a register file. Effectively it means that
1044 * the register file is actually a 2D array of registers.
1046 * file[1][3] == file[1*sizeof(file[1])+3],
1048 * [3] = Dimension.Index
1050 if (reg
->Register
.Dimension
) {
1051 /* The size of the first-order array depends on the register file type.
1052 * We need to multiply the index to the first array to get an effective,
1053 * "flat" index that points to the beginning of the second-order array.
1055 switch (reg
->Register
.File
) {
1056 case TGSI_FILE_INPUT
:
1057 case TGSI_FILE_SYSTEM_VALUE
:
1058 index
.i
[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1059 index
.i
[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1060 index
.i
[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1061 index
.i
[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS
;
1063 case TGSI_FILE_CONSTANT
:
1064 index
.i
[0] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1065 index
.i
[1] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1066 index
.i
[2] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1067 index
.i
[3] *= TGSI_EXEC_MAX_CONST_BUFFER
;
1073 index
.i
[0] += reg
->Dimension
.Index
;
1074 index
.i
[1] += reg
->Dimension
.Index
;
1075 index
.i
[2] += reg
->Dimension
.Index
;
1076 index
.i
[3] += reg
->Dimension
.Index
;
1078 /* Again, the second subscript index can be addressed indirectly
1079 * identically to the first one.
1080 * Nothing stops us from indirectly addressing the indirect register,
1081 * but there is no need for that, so we won't exercise it.
1083 * file[1][ind[4].y+3],
1085 * ind = DimIndirect.File
1086 * [4] = DimIndirect.Index
1087 * .y = DimIndirect.SwizzleX
1089 if (reg
->Dimension
.Indirect
) {
1090 union tgsi_exec_channel index2
;
1091 union tgsi_exec_channel indir_index
;
1092 const uint execmask
= mach
->ExecMask
;
1098 index2
.i
[3] = reg
->DimIndirect
.Index
;
1100 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, CHAN_X
);
1101 fetch_src_file_channel(
1103 reg
->DimIndirect
.File
,
1108 index
.i
[0] += (int) indir_index
.f
[0];
1109 index
.i
[1] += (int) indir_index
.f
[1];
1110 index
.i
[2] += (int) indir_index
.f
[2];
1111 index
.i
[3] += (int) indir_index
.f
[3];
1113 /* for disabled execution channels, zero-out the index to
1114 * avoid using a potential garbage value.
1116 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1117 if ((execmask
& (1 << i
)) == 0)
1122 /* If by any chance there was a need for a 3D array of register
1123 * files, we would have to check whether Dimension is followed
1124 * by a dimension register and continue the saga.
1128 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1129 fetch_src_file_channel(
1136 if (reg
->Register
.Absolute
) {
1137 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1138 micro_abs(chan
, chan
);
1140 micro_iabs(chan
, chan
);
1144 if (reg
->Register
.Negate
) {
1145 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1146 micro_neg(chan
, chan
);
1148 micro_ineg(chan
, chan
);
1154 store_dest(struct tgsi_exec_machine
*mach
,
1155 const union tgsi_exec_channel
*chan
,
1156 const struct tgsi_full_dst_register
*reg
,
1157 const struct tgsi_full_instruction
*inst
,
1159 enum tgsi_exec_datatype dst_datatype
)
1162 union tgsi_exec_channel null
;
1163 union tgsi_exec_channel
*dst
;
1164 uint execmask
= mach
->ExecMask
;
1165 int offset
= 0; /* indirection offset */
1168 if (dst_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1169 CHECK_INF_OR_NAN(chan
);
1172 /* There is an extra source register that indirectly subscripts
1173 * a register file. The direct index now becomes an offset
1174 * that is being added to the indirect register.
1178 * ind = Indirect.File
1179 * [2] = Indirect.Index
1180 * .x = Indirect.SwizzleX
1182 if (reg
->Register
.Indirect
) {
1183 union tgsi_exec_channel index
;
1184 union tgsi_exec_channel indir_index
;
1187 /* which address register (always zero for now) */
1191 index
.i
[3] = reg
->Indirect
.Index
;
1193 /* get current value of address register[swizzle] */
1194 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1196 /* fetch values from the address/indirection register */
1197 fetch_src_file_channel(
1204 /* save indirection offset */
1205 offset
= (int) indir_index
.f
[0];
1208 switch (reg
->Register
.File
) {
1209 case TGSI_FILE_NULL
:
1213 case TGSI_FILE_OUTPUT
:
1214 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1215 + reg
->Register
.Index
;
1216 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1218 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1219 fprintf(stderr
, "STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1220 for (i
= 0; i
< QUAD_SIZE
; i
++)
1221 if (execmask
& (1 << i
))
1222 fprintf(stderr
, "%f, ", chan
->f
[i
]);
1223 fprintf(stderr
, ")\n");
1228 case TGSI_FILE_TEMPORARY
:
1229 index
= reg
->Register
.Index
;
1230 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1231 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1234 case TGSI_FILE_ADDRESS
:
1235 index
= reg
->Register
.Index
;
1236 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1239 case TGSI_FILE_LOOP
:
1240 assert(reg
->Register
.Index
== 0);
1241 assert(mach
->LoopCounterStackTop
> 0);
1242 assert(chan_index
== CHAN_X
);
1243 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1246 case TGSI_FILE_PREDICATE
:
1247 index
= reg
->Register
.Index
;
1248 assert(index
< TGSI_EXEC_NUM_PREDS
);
1249 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1257 if (inst
->Instruction
.Predicate
) {
1259 union tgsi_exec_channel
*pred
;
1261 switch (chan_index
) {
1263 swizzle
= inst
->Predicate
.SwizzleX
;
1266 swizzle
= inst
->Predicate
.SwizzleY
;
1269 swizzle
= inst
->Predicate
.SwizzleZ
;
1272 swizzle
= inst
->Predicate
.SwizzleW
;
1279 assert(inst
->Predicate
.Index
== 0);
1281 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1283 if (inst
->Predicate
.Negate
) {
1284 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1286 execmask
&= ~(1 << i
);
1290 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1292 execmask
&= ~(1 << i
);
1298 switch (inst
->Instruction
.Saturate
) {
1300 for (i
= 0; i
< QUAD_SIZE
; i
++)
1301 if (execmask
& (1 << i
))
1302 dst
->i
[i
] = chan
->i
[i
];
1305 case TGSI_SAT_ZERO_ONE
:
1306 for (i
= 0; i
< QUAD_SIZE
; i
++)
1307 if (execmask
& (1 << i
)) {
1308 if (chan
->f
[i
] < 0.0f
)
1310 else if (chan
->f
[i
] > 1.0f
)
1313 dst
->i
[i
] = chan
->i
[i
];
1317 case TGSI_SAT_MINUS_PLUS_ONE
:
1318 for (i
= 0; i
< QUAD_SIZE
; i
++)
1319 if (execmask
& (1 << i
)) {
1320 if (chan
->f
[i
] < -1.0f
)
1322 else if (chan
->f
[i
] > 1.0f
)
1325 dst
->i
[i
] = chan
->i
[i
];
1334 #define FETCH(VAL,INDEX,CHAN)\
1335 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1337 #define STORE(VAL,INDEX,CHAN)\
1338 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1342 * Execute ARB-style KIL which is predicated by a src register.
1343 * Kill fragment if any of the four values is less than zero.
1346 exec_kil(struct tgsi_exec_machine
*mach
,
1347 const struct tgsi_full_instruction
*inst
)
1351 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1352 union tgsi_exec_channel r
[1];
1354 /* This mask stores component bits that were already tested. */
1357 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1362 /* unswizzle channel */
1363 swizzle
= tgsi_util_get_full_src_register_swizzle (
1367 /* check if the component has not been already tested */
1368 if (uniquemask
& (1 << swizzle
))
1370 uniquemask
|= 1 << swizzle
;
1372 FETCH(&r
[0], 0, chan_index
);
1373 for (i
= 0; i
< 4; i
++)
1374 if (r
[0].f
[i
] < 0.0f
)
1378 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1382 * Execute NVIDIA-style KIL which is predicated by a condition code.
1383 * Kill fragment if the condition code is TRUE.
1386 exec_kilp(struct tgsi_exec_machine
*mach
,
1387 const struct tgsi_full_instruction
*inst
)
1389 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1391 /* "unconditional" kil */
1392 kilmask
= mach
->ExecMask
;
1393 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1397 emit_vertex(struct tgsi_exec_machine
*mach
)
1399 /* FIXME: check for exec mask correctly
1401 for (i = 0; i < QUAD_SIZE; ++i) {
1402 if ((mach->ExecMask & (1 << i)))
1404 if (mach
->ExecMask
) {
1405 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
1406 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
1411 emit_primitive(struct tgsi_exec_machine
*mach
)
1413 unsigned *prim_count
= &mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0];
1414 /* FIXME: check for exec mask correctly
1416 for (i = 0; i < QUAD_SIZE; ++i) {
1417 if ((mach->ExecMask & (1 << i)))
1419 if (mach
->ExecMask
) {
1421 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
1422 mach
->Primitives
[*prim_count
] = 0;
1427 * Fetch a four texture samples using STR texture coordinates.
1430 fetch_texel( struct tgsi_sampler
*sampler
,
1431 const union tgsi_exec_channel
*s
,
1432 const union tgsi_exec_channel
*t
,
1433 const union tgsi_exec_channel
*p
,
1434 float lodbias
, /* XXX should be float[4] */
1435 union tgsi_exec_channel
*r
,
1436 union tgsi_exec_channel
*g
,
1437 union tgsi_exec_channel
*b
,
1438 union tgsi_exec_channel
*a
)
1441 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1443 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, lodbias
, rgba
);
1445 for (j
= 0; j
< 4; j
++) {
1446 r
->f
[j
] = rgba
[0][j
];
1447 g
->f
[j
] = rgba
[1][j
];
1448 b
->f
[j
] = rgba
[2][j
];
1449 a
->f
[j
] = rgba
[3][j
];
1455 exec_tex(struct tgsi_exec_machine
*mach
,
1456 const struct tgsi_full_instruction
*inst
,
1460 const uint unit
= inst
->Src
[1].Register
.Index
;
1461 union tgsi_exec_channel r
[4];
1465 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1467 switch (inst
->Texture
.Texture
) {
1468 case TGSI_TEXTURE_1D
:
1469 case TGSI_TEXTURE_SHADOW1D
:
1471 FETCH(&r
[0], 0, CHAN_X
);
1474 FETCH(&r
[1], 0, CHAN_W
);
1475 micro_div( &r
[0], &r
[0], &r
[1] );
1479 FETCH(&r
[1], 0, CHAN_W
);
1480 lodBias
= r
[2].f
[0];
1485 fetch_texel(mach
->Samplers
[unit
],
1486 &r
[0], &ZeroVec
, &ZeroVec
, lodBias
, /* S, T, P, BIAS */
1487 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1490 case TGSI_TEXTURE_2D
:
1491 case TGSI_TEXTURE_RECT
:
1492 case TGSI_TEXTURE_SHADOW2D
:
1493 case TGSI_TEXTURE_SHADOWRECT
:
1495 FETCH(&r
[0], 0, CHAN_X
);
1496 FETCH(&r
[1], 0, CHAN_Y
);
1497 FETCH(&r
[2], 0, CHAN_Z
);
1500 FETCH(&r
[3], 0, CHAN_W
);
1501 micro_div( &r
[0], &r
[0], &r
[3] );
1502 micro_div( &r
[1], &r
[1], &r
[3] );
1503 micro_div( &r
[2], &r
[2], &r
[3] );
1507 FETCH(&r
[3], 0, CHAN_W
);
1508 lodBias
= r
[3].f
[0];
1513 fetch_texel(mach
->Samplers
[unit
],
1514 &r
[0], &r
[1], &r
[2], lodBias
, /* inputs */
1515 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1518 case TGSI_TEXTURE_3D
:
1519 case TGSI_TEXTURE_CUBE
:
1521 FETCH(&r
[0], 0, CHAN_X
);
1522 FETCH(&r
[1], 0, CHAN_Y
);
1523 FETCH(&r
[2], 0, CHAN_Z
);
1526 FETCH(&r
[3], 0, CHAN_W
);
1527 micro_div( &r
[0], &r
[0], &r
[3] );
1528 micro_div( &r
[1], &r
[1], &r
[3] );
1529 micro_div( &r
[2], &r
[2], &r
[3] );
1533 FETCH(&r
[3], 0, CHAN_W
);
1534 lodBias
= r
[3].f
[0];
1539 fetch_texel(mach
->Samplers
[unit
],
1540 &r
[0], &r
[1], &r
[2], lodBias
,
1541 &r
[0], &r
[1], &r
[2], &r
[3]);
1548 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1549 STORE( &r
[chan_index
], 0, chan_index
);
1554 exec_txd(struct tgsi_exec_machine
*mach
,
1555 const struct tgsi_full_instruction
*inst
)
1557 const uint unit
= inst
->Src
[3].Register
.Index
;
1558 union tgsi_exec_channel r
[4];
1562 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1565 switch (inst
->Texture
.Texture
) {
1566 case TGSI_TEXTURE_1D
:
1567 case TGSI_TEXTURE_SHADOW1D
:
1569 FETCH(&r
[0], 0, CHAN_X
);
1571 fetch_texel(mach
->Samplers
[unit
],
1572 &r
[0], &ZeroVec
, &ZeroVec
, 0.0f
, /* S, T, P, BIAS */
1573 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1576 case TGSI_TEXTURE_2D
:
1577 case TGSI_TEXTURE_RECT
:
1578 case TGSI_TEXTURE_SHADOW2D
:
1579 case TGSI_TEXTURE_SHADOWRECT
:
1581 FETCH(&r
[0], 0, CHAN_X
);
1582 FETCH(&r
[1], 0, CHAN_Y
);
1583 FETCH(&r
[2], 0, CHAN_Z
);
1585 fetch_texel(mach
->Samplers
[unit
],
1586 &r
[0], &r
[1], &r
[2], 0.0f
, /* inputs */
1587 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1590 case TGSI_TEXTURE_3D
:
1591 case TGSI_TEXTURE_CUBE
:
1593 FETCH(&r
[0], 0, CHAN_X
);
1594 FETCH(&r
[1], 0, CHAN_Y
);
1595 FETCH(&r
[2], 0, CHAN_Z
);
1597 fetch_texel(mach
->Samplers
[unit
],
1598 &r
[0], &r
[1], &r
[2], 0.0f
,
1599 &r
[0], &r
[1], &r
[2], &r
[3]);
1606 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1607 STORE(&r
[chan_index
], 0, chan_index
);
1613 * Evaluate a constant-valued coefficient at the position of the
1618 struct tgsi_exec_machine
*mach
,
1624 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1625 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1630 * Evaluate a linear-valued coefficient at the position of the
1635 struct tgsi_exec_machine
*mach
,
1639 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1640 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1641 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1642 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1643 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1644 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1645 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1646 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1647 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1651 * Evaluate a perspective-valued coefficient at the position of the
1655 eval_perspective_coef(
1656 struct tgsi_exec_machine
*mach
,
1660 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1661 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1662 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1663 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1664 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1665 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1666 /* divide by W here */
1667 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1668 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1669 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1670 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1674 typedef void (* eval_coef_func
)(
1675 struct tgsi_exec_machine
*mach
,
1680 exec_declaration(struct tgsi_exec_machine
*mach
,
1681 const struct tgsi_full_declaration
*decl
)
1683 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1684 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
||
1685 decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1686 uint first
, last
, mask
;
1688 first
= decl
->Range
.First
;
1689 last
= decl
->Range
.Last
;
1690 mask
= decl
->Declaration
.UsageMask
;
1692 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1693 assert(decl
->Semantic
.Index
== 0);
1694 assert(first
== last
);
1695 assert(mask
== TGSI_WRITEMASK_XYZW
);
1697 mach
->Inputs
[first
] = mach
->QuadPos
;
1698 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1701 assert(decl
->Semantic
.Index
== 0);
1702 assert(first
== last
);
1704 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1705 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1708 eval_coef_func eval
;
1711 switch (decl
->Declaration
.Interpolate
) {
1712 case TGSI_INTERPOLATE_CONSTANT
:
1713 eval
= eval_constant_coef
;
1716 case TGSI_INTERPOLATE_LINEAR
:
1717 eval
= eval_linear_coef
;
1720 case TGSI_INTERPOLATE_PERSPECTIVE
:
1721 eval
= eval_perspective_coef
;
1729 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1730 if (mask
& (1 << j
)) {
1731 for (i
= first
; i
<= last
; i
++) {
1741 typedef void (* micro_op
)(union tgsi_exec_channel
*dst
,
1742 const union tgsi_exec_channel
*src
);
1745 exec_vector_unary(struct tgsi_exec_machine
*mach
,
1746 const struct tgsi_full_instruction
*inst
,
1748 enum tgsi_exec_datatype dst_datatype
,
1749 enum tgsi_exec_datatype src_datatype
)
1752 struct tgsi_exec_vector dst
;
1754 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1755 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1756 union tgsi_exec_channel src
;
1758 fetch_source(mach
, &src
, &inst
->Src
[0], chan
, src_datatype
);
1759 op(&dst
.xyzw
[chan
], &src
);
1762 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1763 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1764 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1770 exec_vector_binary(struct tgsi_exec_machine
*mach
,
1771 const struct tgsi_full_instruction
*inst
,
1773 enum tgsi_exec_datatype dst_datatype
,
1774 enum tgsi_exec_datatype src_datatype
)
1777 struct tgsi_exec_vector dst
;
1779 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1780 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1781 union tgsi_exec_channel src
[2];
1783 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1784 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1785 op(&dst
.xyzw
[chan
], src
);
1788 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1789 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1790 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1796 exec_vector_trinary(struct tgsi_exec_machine
*mach
,
1797 const struct tgsi_full_instruction
*inst
,
1799 enum tgsi_exec_datatype dst_datatype
,
1800 enum tgsi_exec_datatype src_datatype
)
1803 struct tgsi_exec_vector dst
;
1805 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1806 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1807 union tgsi_exec_channel src
[3];
1809 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1810 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1811 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
1812 op(&dst
.xyzw
[chan
], src
);
1815 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1816 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1817 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1823 exec_break(struct tgsi_exec_machine
*mach
)
1825 if (mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_LOOP
) {
1826 /* turn off loop channels for each enabled exec channel */
1827 mach
->LoopMask
&= ~mach
->ExecMask
;
1828 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1829 UPDATE_EXEC_MASK(mach
);
1831 assert(mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_SWITCH
);
1833 mach
->Switch
.mask
= 0x0;
1835 UPDATE_EXEC_MASK(mach
);
1840 exec_switch(struct tgsi_exec_machine
*mach
,
1841 const struct tgsi_full_instruction
*inst
)
1843 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
1844 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
1846 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
1847 fetch_source(mach
, &mach
->Switch
.selector
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
1848 mach
->Switch
.mask
= 0x0;
1849 mach
->Switch
.defaultMask
= 0x0;
1851 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
1852 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_SWITCH
;
1854 UPDATE_EXEC_MASK(mach
);
1858 exec_case(struct tgsi_exec_machine
*mach
,
1859 const struct tgsi_full_instruction
*inst
)
1861 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
1862 union tgsi_exec_channel src
;
1865 fetch_source(mach
, &src
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
1867 if (mach
->Switch
.selector
.u
[0] == src
.u
[0]) {
1870 if (mach
->Switch
.selector
.u
[1] == src
.u
[1]) {
1873 if (mach
->Switch
.selector
.u
[2] == src
.u
[2]) {
1876 if (mach
->Switch
.selector
.u
[3] == src
.u
[3]) {
1880 mach
->Switch
.defaultMask
|= mask
;
1882 mach
->Switch
.mask
|= mask
& prevMask
;
1884 UPDATE_EXEC_MASK(mach
);
1888 exec_default(struct tgsi_exec_machine
*mach
)
1890 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
1892 mach
->Switch
.mask
|= ~mach
->Switch
.defaultMask
& prevMask
;
1894 UPDATE_EXEC_MASK(mach
);
1898 exec_endswitch(struct tgsi_exec_machine
*mach
)
1900 mach
->Switch
= mach
->SwitchStack
[--mach
->SwitchStackTop
];
1901 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
1903 UPDATE_EXEC_MASK(mach
);
1907 micro_i2f(union tgsi_exec_channel
*dst
,
1908 const union tgsi_exec_channel
*src
)
1910 dst
->f
[0] = (float)src
->i
[0];
1911 dst
->f
[1] = (float)src
->i
[1];
1912 dst
->f
[2] = (float)src
->i
[2];
1913 dst
->f
[3] = (float)src
->i
[3];
1917 micro_not(union tgsi_exec_channel
*dst
,
1918 const union tgsi_exec_channel
*src
)
1920 dst
->u
[0] = ~src
->u
[0];
1921 dst
->u
[1] = ~src
->u
[1];
1922 dst
->u
[2] = ~src
->u
[2];
1923 dst
->u
[3] = ~src
->u
[3];
1927 micro_shl(union tgsi_exec_channel
*dst
,
1928 const union tgsi_exec_channel
*src
)
1930 dst
->u
[0] = src
[0].u
[0] << src
[1].u
[0];
1931 dst
->u
[1] = src
[0].u
[1] << src
[1].u
[1];
1932 dst
->u
[2] = src
[0].u
[2] << src
[1].u
[2];
1933 dst
->u
[3] = src
[0].u
[3] << src
[1].u
[3];
1937 micro_and(union tgsi_exec_channel
*dst
,
1938 const union tgsi_exec_channel
*src
)
1940 dst
->u
[0] = src
[0].u
[0] & src
[1].u
[0];
1941 dst
->u
[1] = src
[0].u
[1] & src
[1].u
[1];
1942 dst
->u
[2] = src
[0].u
[2] & src
[1].u
[2];
1943 dst
->u
[3] = src
[0].u
[3] & src
[1].u
[3];
1947 micro_or(union tgsi_exec_channel
*dst
,
1948 const union tgsi_exec_channel
*src
)
1950 dst
->u
[0] = src
[0].u
[0] | src
[1].u
[0];
1951 dst
->u
[1] = src
[0].u
[1] | src
[1].u
[1];
1952 dst
->u
[2] = src
[0].u
[2] | src
[1].u
[2];
1953 dst
->u
[3] = src
[0].u
[3] | src
[1].u
[3];
1957 micro_xor(union tgsi_exec_channel
*dst
,
1958 const union tgsi_exec_channel
*src
)
1960 dst
->u
[0] = src
[0].u
[0] ^ src
[1].u
[0];
1961 dst
->u
[1] = src
[0].u
[1] ^ src
[1].u
[1];
1962 dst
->u
[2] = src
[0].u
[2] ^ src
[1].u
[2];
1963 dst
->u
[3] = src
[0].u
[3] ^ src
[1].u
[3];
1967 micro_f2i(union tgsi_exec_channel
*dst
,
1968 const union tgsi_exec_channel
*src
)
1970 dst
->i
[0] = (int)src
->f
[0];
1971 dst
->i
[1] = (int)src
->f
[1];
1972 dst
->i
[2] = (int)src
->f
[2];
1973 dst
->i
[3] = (int)src
->f
[3];
1977 micro_idiv(union tgsi_exec_channel
*dst
,
1978 const union tgsi_exec_channel
*src
)
1980 dst
->i
[0] = src
[0].i
[0] / src
[1].i
[0];
1981 dst
->i
[1] = src
[0].i
[1] / src
[1].i
[1];
1982 dst
->i
[2] = src
[0].i
[2] / src
[1].i
[2];
1983 dst
->i
[3] = src
[0].i
[3] / src
[1].i
[3];
1987 micro_imax(union tgsi_exec_channel
*dst
,
1988 const union tgsi_exec_channel
*src
)
1990 dst
->i
[0] = src
[0].i
[0] > src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
1991 dst
->i
[1] = src
[0].i
[1] > src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
1992 dst
->i
[2] = src
[0].i
[2] > src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
1993 dst
->i
[3] = src
[0].i
[3] > src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
1997 micro_imin(union tgsi_exec_channel
*dst
,
1998 const union tgsi_exec_channel
*src
)
2000 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
2001 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
2002 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
2003 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
2007 micro_isge(union tgsi_exec_channel
*dst
,
2008 const union tgsi_exec_channel
*src
)
2010 dst
->i
[0] = src
[0].i
[0] >= src
[1].i
[0] ? -1 : 0;
2011 dst
->i
[1] = src
[0].i
[1] >= src
[1].i
[1] ? -1 : 0;
2012 dst
->i
[2] = src
[0].i
[2] >= src
[1].i
[2] ? -1 : 0;
2013 dst
->i
[3] = src
[0].i
[3] >= src
[1].i
[3] ? -1 : 0;
2017 micro_ishr(union tgsi_exec_channel
*dst
,
2018 const union tgsi_exec_channel
*src
)
2020 dst
->i
[0] = src
[0].i
[0] >> src
[1].i
[0];
2021 dst
->i
[1] = src
[0].i
[1] >> src
[1].i
[1];
2022 dst
->i
[2] = src
[0].i
[2] >> src
[1].i
[2];
2023 dst
->i
[3] = src
[0].i
[3] >> src
[1].i
[3];
2027 micro_islt(union tgsi_exec_channel
*dst
,
2028 const union tgsi_exec_channel
*src
)
2030 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? -1 : 0;
2031 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? -1 : 0;
2032 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? -1 : 0;
2033 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? -1 : 0;
2037 micro_f2u(union tgsi_exec_channel
*dst
,
2038 const union tgsi_exec_channel
*src
)
2040 dst
->u
[0] = (uint
)src
->f
[0];
2041 dst
->u
[1] = (uint
)src
->f
[1];
2042 dst
->u
[2] = (uint
)src
->f
[2];
2043 dst
->u
[3] = (uint
)src
->f
[3];
2047 micro_u2f(union tgsi_exec_channel
*dst
,
2048 const union tgsi_exec_channel
*src
)
2050 dst
->f
[0] = (float)src
->u
[0];
2051 dst
->f
[1] = (float)src
->u
[1];
2052 dst
->f
[2] = (float)src
->u
[2];
2053 dst
->f
[3] = (float)src
->u
[3];
2057 micro_uadd(union tgsi_exec_channel
*dst
,
2058 const union tgsi_exec_channel
*src
)
2060 dst
->u
[0] = src
[0].u
[0] + src
[1].u
[0];
2061 dst
->u
[1] = src
[0].u
[1] + src
[1].u
[1];
2062 dst
->u
[2] = src
[0].u
[2] + src
[1].u
[2];
2063 dst
->u
[3] = src
[0].u
[3] + src
[1].u
[3];
2067 micro_udiv(union tgsi_exec_channel
*dst
,
2068 const union tgsi_exec_channel
*src
)
2070 dst
->u
[0] = src
[0].u
[0] / src
[1].u
[0];
2071 dst
->u
[1] = src
[0].u
[1] / src
[1].u
[1];
2072 dst
->u
[2] = src
[0].u
[2] / src
[1].u
[2];
2073 dst
->u
[3] = src
[0].u
[3] / src
[1].u
[3];
2077 micro_umad(union tgsi_exec_channel
*dst
,
2078 const union tgsi_exec_channel
*src
)
2080 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0] + src
[2].u
[0];
2081 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1] + src
[2].u
[1];
2082 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2] + src
[2].u
[2];
2083 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3] + src
[2].u
[3];
2087 micro_umax(union tgsi_exec_channel
*dst
,
2088 const union tgsi_exec_channel
*src
)
2090 dst
->u
[0] = src
[0].u
[0] > src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2091 dst
->u
[1] = src
[0].u
[1] > src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2092 dst
->u
[2] = src
[0].u
[2] > src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2093 dst
->u
[3] = src
[0].u
[3] > src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2097 micro_umin(union tgsi_exec_channel
*dst
,
2098 const union tgsi_exec_channel
*src
)
2100 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2101 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2102 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2103 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2107 micro_umod(union tgsi_exec_channel
*dst
,
2108 const union tgsi_exec_channel
*src
)
2110 dst
->u
[0] = src
[0].u
[0] % src
[1].u
[0];
2111 dst
->u
[1] = src
[0].u
[1] % src
[1].u
[1];
2112 dst
->u
[2] = src
[0].u
[2] % src
[1].u
[2];
2113 dst
->u
[3] = src
[0].u
[3] % src
[1].u
[3];
2117 micro_umul(union tgsi_exec_channel
*dst
,
2118 const union tgsi_exec_channel
*src
)
2120 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0];
2121 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1];
2122 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2];
2123 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3];
2127 micro_useq(union tgsi_exec_channel
*dst
,
2128 const union tgsi_exec_channel
*src
)
2130 dst
->u
[0] = src
[0].u
[0] == src
[1].u
[0] ? ~0 : 0;
2131 dst
->u
[1] = src
[0].u
[1] == src
[1].u
[1] ? ~0 : 0;
2132 dst
->u
[2] = src
[0].u
[2] == src
[1].u
[2] ? ~0 : 0;
2133 dst
->u
[3] = src
[0].u
[3] == src
[1].u
[3] ? ~0 : 0;
2137 micro_usge(union tgsi_exec_channel
*dst
,
2138 const union tgsi_exec_channel
*src
)
2140 dst
->u
[0] = src
[0].u
[0] >= src
[1].u
[0] ? ~0 : 0;
2141 dst
->u
[1] = src
[0].u
[1] >= src
[1].u
[1] ? ~0 : 0;
2142 dst
->u
[2] = src
[0].u
[2] >= src
[1].u
[2] ? ~0 : 0;
2143 dst
->u
[3] = src
[0].u
[3] >= src
[1].u
[3] ? ~0 : 0;
2147 micro_ushr(union tgsi_exec_channel
*dst
,
2148 const union tgsi_exec_channel
*src
)
2150 dst
->u
[0] = src
[0].u
[0] >> src
[1].u
[0];
2151 dst
->u
[1] = src
[0].u
[1] >> src
[1].u
[1];
2152 dst
->u
[2] = src
[0].u
[2] >> src
[1].u
[2];
2153 dst
->u
[3] = src
[0].u
[3] >> src
[1].u
[3];
2157 micro_uslt(union tgsi_exec_channel
*dst
,
2158 const union tgsi_exec_channel
*src
)
2160 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? ~0 : 0;
2161 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? ~0 : 0;
2162 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? ~0 : 0;
2163 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? ~0 : 0;
2167 micro_usne(union tgsi_exec_channel
*dst
,
2168 const union tgsi_exec_channel
*src
)
2170 dst
->u
[0] = src
[0].u
[0] != src
[1].u
[0] ? ~0 : 0;
2171 dst
->u
[1] = src
[0].u
[1] != src
[1].u
[1] ? ~0 : 0;
2172 dst
->u
[2] = src
[0].u
[2] != src
[1].u
[2] ? ~0 : 0;
2173 dst
->u
[3] = src
[0].u
[3] != src
[1].u
[3] ? ~0 : 0;
2178 struct tgsi_exec_machine
*mach
,
2179 const struct tgsi_full_instruction
*inst
,
2183 union tgsi_exec_channel r
[10];
2184 union tgsi_exec_channel d
[8];
2188 switch (inst
->Instruction
.Opcode
) {
2189 case TGSI_OPCODE_ARL
:
2190 case TGSI_OPCODE_FLR
:
2191 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2192 FETCH( &r
[0], 0, chan_index
);
2193 micro_flr(&d
[chan_index
], &r
[0]);
2195 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2196 STORE(&d
[chan_index
], 0, chan_index
);
2200 case TGSI_OPCODE_MOV
:
2201 exec_vector_unary(mach
, inst
, micro_mov
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
2204 case TGSI_OPCODE_LIT
:
2205 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2206 FETCH( &r
[0], 0, CHAN_X
);
2207 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2208 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2211 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2212 FETCH( &r
[1], 0, CHAN_Y
);
2213 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2215 FETCH( &r
[2], 0, CHAN_W
);
2216 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
2217 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
2218 micro_pow( &r
[1], &r
[1], &r
[2] );
2219 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2222 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2223 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2225 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2226 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2229 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2230 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2232 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2233 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2237 case TGSI_OPCODE_RCP
:
2238 /* TGSI_OPCODE_RECIP */
2239 FETCH( &r
[0], 0, CHAN_X
);
2240 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2241 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2242 STORE( &r
[0], 0, chan_index
);
2246 case TGSI_OPCODE_RSQ
:
2247 /* TGSI_OPCODE_RECIPSQRT */
2248 FETCH( &r
[0], 0, CHAN_X
);
2249 micro_abs( &r
[0], &r
[0] );
2250 micro_sqrt( &r
[0], &r
[0] );
2251 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
2252 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2253 STORE( &r
[0], 0, chan_index
);
2257 case TGSI_OPCODE_EXP
:
2258 FETCH( &r
[0], 0, CHAN_X
);
2259 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2260 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2261 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2262 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2264 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2265 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2266 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2268 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2269 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2270 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2272 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2273 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2277 case TGSI_OPCODE_LOG
:
2278 FETCH( &r
[0], 0, CHAN_X
);
2279 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2280 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2281 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2282 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2283 STORE( &r
[0], 0, CHAN_X
);
2285 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2286 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2287 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2288 STORE( &r
[0], 0, CHAN_Y
);
2290 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2291 STORE( &r
[1], 0, CHAN_Z
);
2293 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2294 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2298 case TGSI_OPCODE_MUL
:
2299 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2300 FETCH(&r
[0], 0, chan_index
);
2301 FETCH(&r
[1], 1, chan_index
);
2302 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2304 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2305 STORE(&d
[chan_index
], 0, chan_index
);
2309 case TGSI_OPCODE_ADD
:
2310 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2311 FETCH( &r
[0], 0, chan_index
);
2312 FETCH( &r
[1], 1, chan_index
);
2313 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2315 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2316 STORE(&d
[chan_index
], 0, chan_index
);
2320 case TGSI_OPCODE_DP3
:
2321 /* TGSI_OPCODE_DOT3 */
2322 FETCH( &r
[0], 0, CHAN_X
);
2323 FETCH( &r
[1], 1, CHAN_X
);
2324 micro_mul( &r
[0], &r
[0], &r
[1] );
2326 FETCH( &r
[1], 0, CHAN_Y
);
2327 FETCH( &r
[2], 1, CHAN_Y
);
2328 micro_mul( &r
[1], &r
[1], &r
[2] );
2329 micro_add( &r
[0], &r
[0], &r
[1] );
2331 FETCH( &r
[1], 0, CHAN_Z
);
2332 FETCH( &r
[2], 1, CHAN_Z
);
2333 micro_mul( &r
[1], &r
[1], &r
[2] );
2334 micro_add( &r
[0], &r
[0], &r
[1] );
2336 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2337 STORE( &r
[0], 0, chan_index
);
2341 case TGSI_OPCODE_DP4
:
2342 /* TGSI_OPCODE_DOT4 */
2343 FETCH(&r
[0], 0, CHAN_X
);
2344 FETCH(&r
[1], 1, CHAN_X
);
2346 micro_mul( &r
[0], &r
[0], &r
[1] );
2348 FETCH(&r
[1], 0, CHAN_Y
);
2349 FETCH(&r
[2], 1, CHAN_Y
);
2351 micro_mul( &r
[1], &r
[1], &r
[2] );
2352 micro_add( &r
[0], &r
[0], &r
[1] );
2354 FETCH(&r
[1], 0, CHAN_Z
);
2355 FETCH(&r
[2], 1, CHAN_Z
);
2357 micro_mul( &r
[1], &r
[1], &r
[2] );
2358 micro_add( &r
[0], &r
[0], &r
[1] );
2360 FETCH(&r
[1], 0, CHAN_W
);
2361 FETCH(&r
[2], 1, CHAN_W
);
2363 micro_mul( &r
[1], &r
[1], &r
[2] );
2364 micro_add( &r
[0], &r
[0], &r
[1] );
2366 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2367 STORE( &r
[0], 0, chan_index
);
2371 case TGSI_OPCODE_DST
:
2372 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2373 FETCH( &r
[0], 0, CHAN_Y
);
2374 FETCH( &r
[1], 1, CHAN_Y
);
2375 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2377 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2378 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2380 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2381 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2384 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2385 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2387 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2388 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2390 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2391 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2393 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2394 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2398 case TGSI_OPCODE_MIN
:
2399 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2400 FETCH(&r
[0], 0, chan_index
);
2401 FETCH(&r
[1], 1, chan_index
);
2403 /* XXX use micro_min()?? */
2404 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2406 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2407 STORE(&d
[chan_index
], 0, chan_index
);
2411 case TGSI_OPCODE_MAX
:
2412 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2413 FETCH(&r
[0], 0, chan_index
);
2414 FETCH(&r
[1], 1, chan_index
);
2416 /* XXX use micro_max()?? */
2417 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2419 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2420 STORE(&d
[chan_index
], 0, chan_index
);
2424 case TGSI_OPCODE_SLT
:
2425 /* TGSI_OPCODE_SETLT */
2426 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2427 FETCH( &r
[0], 0, chan_index
);
2428 FETCH( &r
[1], 1, chan_index
);
2429 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2431 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2432 STORE(&d
[chan_index
], 0, chan_index
);
2436 case TGSI_OPCODE_SGE
:
2437 /* TGSI_OPCODE_SETGE */
2438 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2439 FETCH( &r
[0], 0, chan_index
);
2440 FETCH( &r
[1], 1, chan_index
);
2441 micro_le(&d
[chan_index
], &r
[1], &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2443 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2444 STORE(&d
[chan_index
], 0, chan_index
);
2448 case TGSI_OPCODE_MAD
:
2449 /* TGSI_OPCODE_MADD */
2450 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2451 FETCH( &r
[0], 0, chan_index
);
2452 FETCH( &r
[1], 1, chan_index
);
2453 micro_mul( &r
[0], &r
[0], &r
[1] );
2454 FETCH( &r
[1], 2, chan_index
);
2455 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2457 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2458 STORE(&d
[chan_index
], 0, chan_index
);
2462 case TGSI_OPCODE_SUB
:
2463 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2464 FETCH(&r
[0], 0, chan_index
);
2465 FETCH(&r
[1], 1, chan_index
);
2466 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2468 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2469 STORE(&d
[chan_index
], 0, chan_index
);
2473 case TGSI_OPCODE_LRP
:
2474 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2475 FETCH(&r
[0], 0, chan_index
);
2476 FETCH(&r
[1], 1, chan_index
);
2477 FETCH(&r
[2], 2, chan_index
);
2478 micro_sub( &r
[1], &r
[1], &r
[2] );
2479 micro_mul( &r
[0], &r
[0], &r
[1] );
2480 micro_add(&d
[chan_index
], &r
[0], &r
[2]);
2482 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2483 STORE(&d
[chan_index
], 0, chan_index
);
2487 case TGSI_OPCODE_CND
:
2488 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2489 FETCH(&r
[0], 0, chan_index
);
2490 FETCH(&r
[1], 1, chan_index
);
2491 FETCH(&r
[2], 2, chan_index
);
2492 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2494 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2495 STORE(&d
[chan_index
], 0, chan_index
);
2499 case TGSI_OPCODE_DP2A
:
2500 FETCH( &r
[0], 0, CHAN_X
);
2501 FETCH( &r
[1], 1, CHAN_X
);
2502 micro_mul( &r
[0], &r
[0], &r
[1] );
2504 FETCH( &r
[1], 0, CHAN_Y
);
2505 FETCH( &r
[2], 1, CHAN_Y
);
2506 micro_mul( &r
[1], &r
[1], &r
[2] );
2507 micro_add( &r
[0], &r
[0], &r
[1] );
2509 FETCH( &r
[2], 2, CHAN_X
);
2510 micro_add( &r
[0], &r
[0], &r
[2] );
2512 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2513 STORE( &r
[0], 0, chan_index
);
2517 case TGSI_OPCODE_FRC
:
2518 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2519 FETCH( &r
[0], 0, chan_index
);
2520 micro_frc(&d
[chan_index
], &r
[0]);
2522 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2523 STORE(&d
[chan_index
], 0, chan_index
);
2527 case TGSI_OPCODE_CLAMP
:
2528 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2529 FETCH(&r
[0], 0, chan_index
);
2530 FETCH(&r
[1], 1, chan_index
);
2531 micro_max(&r
[0], &r
[0], &r
[1]);
2532 FETCH(&r
[1], 2, chan_index
);
2533 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2535 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2536 STORE(&d
[chan_index
], 0, chan_index
);
2540 case TGSI_OPCODE_ROUND
:
2541 case TGSI_OPCODE_ARR
:
2542 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2543 FETCH( &r
[0], 0, chan_index
);
2544 micro_rnd(&d
[chan_index
], &r
[0]);
2546 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2547 STORE(&d
[chan_index
], 0, chan_index
);
2551 case TGSI_OPCODE_EX2
:
2552 FETCH(&r
[0], 0, CHAN_X
);
2554 micro_exp2( &r
[0], &r
[0] );
2556 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2557 STORE( &r
[0], 0, chan_index
);
2561 case TGSI_OPCODE_LG2
:
2562 FETCH( &r
[0], 0, CHAN_X
);
2563 micro_lg2( &r
[0], &r
[0] );
2564 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2565 STORE( &r
[0], 0, chan_index
);
2569 case TGSI_OPCODE_POW
:
2570 FETCH(&r
[0], 0, CHAN_X
);
2571 FETCH(&r
[1], 1, CHAN_X
);
2573 micro_pow( &r
[0], &r
[0], &r
[1] );
2575 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2576 STORE( &r
[0], 0, chan_index
);
2580 case TGSI_OPCODE_XPD
:
2581 FETCH(&r
[0], 0, CHAN_Y
);
2582 FETCH(&r
[1], 1, CHAN_Z
);
2584 micro_mul( &r
[2], &r
[0], &r
[1] );
2586 FETCH(&r
[3], 0, CHAN_Z
);
2587 FETCH(&r
[4], 1, CHAN_Y
);
2589 micro_mul( &r
[5], &r
[3], &r
[4] );
2590 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2592 FETCH(&r
[2], 1, CHAN_X
);
2594 micro_mul( &r
[3], &r
[3], &r
[2] );
2596 FETCH(&r
[5], 0, CHAN_X
);
2598 micro_mul( &r
[1], &r
[1], &r
[5] );
2599 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2601 micro_mul( &r
[5], &r
[5], &r
[4] );
2602 micro_mul( &r
[0], &r
[0], &r
[2] );
2603 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2605 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2606 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2608 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2609 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2611 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2612 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2614 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2615 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2619 case TGSI_OPCODE_ABS
:
2620 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2621 FETCH(&r
[0], 0, chan_index
);
2622 micro_abs(&d
[chan_index
], &r
[0]);
2624 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2625 STORE(&d
[chan_index
], 0, chan_index
);
2629 case TGSI_OPCODE_RCC
:
2630 FETCH(&r
[0], 0, CHAN_X
);
2631 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2632 micro_float_clamp(&r
[0], &r
[0]);
2633 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2634 STORE(&r
[0], 0, chan_index
);
2638 case TGSI_OPCODE_DPH
:
2639 FETCH(&r
[0], 0, CHAN_X
);
2640 FETCH(&r
[1], 1, CHAN_X
);
2642 micro_mul( &r
[0], &r
[0], &r
[1] );
2644 FETCH(&r
[1], 0, CHAN_Y
);
2645 FETCH(&r
[2], 1, CHAN_Y
);
2647 micro_mul( &r
[1], &r
[1], &r
[2] );
2648 micro_add( &r
[0], &r
[0], &r
[1] );
2650 FETCH(&r
[1], 0, CHAN_Z
);
2651 FETCH(&r
[2], 1, CHAN_Z
);
2653 micro_mul( &r
[1], &r
[1], &r
[2] );
2654 micro_add( &r
[0], &r
[0], &r
[1] );
2656 FETCH(&r
[1], 1, CHAN_W
);
2658 micro_add( &r
[0], &r
[0], &r
[1] );
2660 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2661 STORE( &r
[0], 0, chan_index
);
2665 case TGSI_OPCODE_COS
:
2666 FETCH(&r
[0], 0, CHAN_X
);
2668 micro_cos( &r
[0], &r
[0] );
2670 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2671 STORE( &r
[0], 0, chan_index
);
2675 case TGSI_OPCODE_DDX
:
2676 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2677 FETCH( &r
[0], 0, chan_index
);
2678 micro_ddx(&d
[chan_index
], &r
[0]);
2680 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2681 STORE(&d
[chan_index
], 0, chan_index
);
2685 case TGSI_OPCODE_DDY
:
2686 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2687 FETCH( &r
[0], 0, chan_index
);
2688 micro_ddy(&d
[chan_index
], &r
[0]);
2690 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2691 STORE(&d
[chan_index
], 0, chan_index
);
2695 case TGSI_OPCODE_KILP
:
2696 exec_kilp (mach
, inst
);
2699 case TGSI_OPCODE_KIL
:
2700 exec_kil (mach
, inst
);
2703 case TGSI_OPCODE_PK2H
:
2707 case TGSI_OPCODE_PK2US
:
2711 case TGSI_OPCODE_PK4B
:
2715 case TGSI_OPCODE_PK4UB
:
2719 case TGSI_OPCODE_RFL
:
2720 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2721 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2722 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2723 /* r0 = dp3(src0, src0) */
2724 FETCH(&r
[2], 0, CHAN_X
);
2725 micro_mul(&r
[0], &r
[2], &r
[2]);
2726 FETCH(&r
[4], 0, CHAN_Y
);
2727 micro_mul(&r
[8], &r
[4], &r
[4]);
2728 micro_add(&r
[0], &r
[0], &r
[8]);
2729 FETCH(&r
[6], 0, CHAN_Z
);
2730 micro_mul(&r
[8], &r
[6], &r
[6]);
2731 micro_add(&r
[0], &r
[0], &r
[8]);
2733 /* r1 = dp3(src0, src1) */
2734 FETCH(&r
[3], 1, CHAN_X
);
2735 micro_mul(&r
[1], &r
[2], &r
[3]);
2736 FETCH(&r
[5], 1, CHAN_Y
);
2737 micro_mul(&r
[8], &r
[4], &r
[5]);
2738 micro_add(&r
[1], &r
[1], &r
[8]);
2739 FETCH(&r
[7], 1, CHAN_Z
);
2740 micro_mul(&r
[8], &r
[6], &r
[7]);
2741 micro_add(&r
[1], &r
[1], &r
[8]);
2743 /* r1 = 2 * r1 / r0 */
2744 micro_add(&r
[1], &r
[1], &r
[1]);
2745 micro_div(&r
[1], &r
[1], &r
[0]);
2747 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2748 micro_mul(&r
[2], &r
[2], &r
[1]);
2749 micro_sub(&r
[2], &r
[2], &r
[3]);
2750 STORE(&r
[2], 0, CHAN_X
);
2752 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2753 micro_mul(&r
[4], &r
[4], &r
[1]);
2754 micro_sub(&r
[4], &r
[4], &r
[5]);
2755 STORE(&r
[4], 0, CHAN_Y
);
2757 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2758 micro_mul(&r
[6], &r
[6], &r
[1]);
2759 micro_sub(&r
[6], &r
[6], &r
[7]);
2760 STORE(&r
[6], 0, CHAN_Z
);
2763 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2764 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2768 case TGSI_OPCODE_SEQ
:
2769 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2770 FETCH( &r
[0], 0, chan_index
);
2771 FETCH( &r
[1], 1, chan_index
);
2772 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2774 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2775 STORE(&d
[chan_index
], 0, chan_index
);
2779 case TGSI_OPCODE_SFL
:
2780 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2781 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2785 case TGSI_OPCODE_SGT
:
2786 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2787 FETCH( &r
[0], 0, chan_index
);
2788 FETCH( &r
[1], 1, chan_index
);
2789 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2791 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2792 STORE(&d
[chan_index
], 0, chan_index
);
2796 case TGSI_OPCODE_SIN
:
2797 FETCH( &r
[0], 0, CHAN_X
);
2798 micro_sin( &r
[0], &r
[0] );
2799 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2800 STORE( &r
[0], 0, chan_index
);
2804 case TGSI_OPCODE_SLE
:
2805 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2806 FETCH( &r
[0], 0, chan_index
);
2807 FETCH( &r
[1], 1, chan_index
);
2808 micro_le(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2810 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2811 STORE(&d
[chan_index
], 0, chan_index
);
2815 case TGSI_OPCODE_SNE
:
2816 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2817 FETCH( &r
[0], 0, chan_index
);
2818 FETCH( &r
[1], 1, chan_index
);
2819 micro_eq(&d
[chan_index
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
2821 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2822 STORE(&d
[chan_index
], 0, chan_index
);
2826 case TGSI_OPCODE_STR
:
2827 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2828 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2832 case TGSI_OPCODE_TEX
:
2833 /* simple texture lookup */
2834 /* src[0] = texcoord */
2835 /* src[1] = sampler unit */
2836 exec_tex(mach
, inst
, FALSE
, FALSE
);
2839 case TGSI_OPCODE_TXB
:
2840 /* Texture lookup with lod bias */
2841 /* src[0] = texcoord (src[0].w = LOD bias) */
2842 /* src[1] = sampler unit */
2843 exec_tex(mach
, inst
, TRUE
, FALSE
);
2846 case TGSI_OPCODE_TXD
:
2847 /* Texture lookup with explict partial derivatives */
2848 /* src[0] = texcoord */
2849 /* src[1] = d[strq]/dx */
2850 /* src[2] = d[strq]/dy */
2851 /* src[3] = sampler unit */
2852 exec_txd(mach
, inst
);
2855 case TGSI_OPCODE_TXL
:
2856 /* Texture lookup with explit LOD */
2857 /* src[0] = texcoord (src[0].w = LOD) */
2858 /* src[1] = sampler unit */
2859 exec_tex(mach
, inst
, TRUE
, FALSE
);
2862 case TGSI_OPCODE_TXP
:
2863 /* Texture lookup with projection */
2864 /* src[0] = texcoord (src[0].w = projection) */
2865 /* src[1] = sampler unit */
2866 exec_tex(mach
, inst
, FALSE
, TRUE
);
2869 case TGSI_OPCODE_UP2H
:
2873 case TGSI_OPCODE_UP2US
:
2877 case TGSI_OPCODE_UP4B
:
2881 case TGSI_OPCODE_UP4UB
:
2885 case TGSI_OPCODE_X2D
:
2886 FETCH(&r
[0], 1, CHAN_X
);
2887 FETCH(&r
[1], 1, CHAN_Y
);
2888 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2889 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2890 FETCH(&r
[2], 2, CHAN_X
);
2891 micro_mul(&r
[2], &r
[2], &r
[0]);
2892 FETCH(&r
[3], 2, CHAN_Y
);
2893 micro_mul(&r
[3], &r
[3], &r
[1]);
2894 micro_add(&r
[2], &r
[2], &r
[3]);
2895 FETCH(&r
[3], 0, CHAN_X
);
2896 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2899 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2900 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2901 FETCH(&r
[2], 2, CHAN_Z
);
2902 micro_mul(&r
[2], &r
[2], &r
[0]);
2903 FETCH(&r
[3], 2, CHAN_W
);
2904 micro_mul(&r
[3], &r
[3], &r
[1]);
2905 micro_add(&r
[2], &r
[2], &r
[3]);
2906 FETCH(&r
[3], 0, CHAN_Y
);
2907 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2910 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2911 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2913 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2914 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2916 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2917 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2919 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2920 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2924 case TGSI_OPCODE_ARA
:
2928 case TGSI_OPCODE_BRA
:
2932 case TGSI_OPCODE_CAL
:
2933 /* skip the call if no execution channels are enabled */
2934 if (mach
->ExecMask
) {
2937 /* First, record the depths of the execution stacks.
2938 * This is important for deeply nested/looped return statements.
2939 * We have to unwind the stacks by the correct amount. For a
2940 * real code generator, we could determine the number of entries
2941 * to pop off each stack with simple static analysis and avoid
2942 * implementing this data structure at run time.
2944 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2945 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2946 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2947 mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
= mach
->SwitchStackTop
;
2948 mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
= mach
->BreakStackTop
;
2949 /* note that PC was already incremented above */
2950 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2952 mach
->CallStackTop
++;
2954 /* Second, push the Cond, Loop, Cont, Func stacks */
2955 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2956 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2957 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2958 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
2959 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
2960 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2962 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2963 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2964 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2965 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
2966 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
2967 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2969 /* Finally, jump to the subroutine */
2970 *pc
= inst
->Label
.Label
;
2974 case TGSI_OPCODE_RET
:
2975 mach
->FuncMask
&= ~mach
->ExecMask
;
2976 UPDATE_EXEC_MASK(mach
);
2978 if (mach
->FuncMask
== 0x0) {
2979 /* really return now (otherwise, keep executing */
2981 if (mach
->CallStackTop
== 0) {
2982 /* returning from main() */
2987 assert(mach
->CallStackTop
> 0);
2988 mach
->CallStackTop
--;
2990 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
2991 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
2993 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
2994 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
2996 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
2997 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
2999 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3000 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3002 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3003 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3005 assert(mach
->FuncStackTop
> 0);
3006 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3008 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3010 UPDATE_EXEC_MASK(mach
);
3014 case TGSI_OPCODE_SSG
:
3015 /* TGSI_OPCODE_SGN */
3016 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3017 FETCH( &r
[0], 0, chan_index
);
3018 micro_sgn(&d
[chan_index
], &r
[0]);
3020 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3021 STORE(&d
[chan_index
], 0, chan_index
);
3025 case TGSI_OPCODE_CMP
:
3026 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3027 FETCH(&r
[0], 0, chan_index
);
3028 FETCH(&r
[1], 1, chan_index
);
3029 FETCH(&r
[2], 2, chan_index
);
3030 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
3032 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3033 STORE(&d
[chan_index
], 0, chan_index
);
3037 case TGSI_OPCODE_SCS
:
3038 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
3039 FETCH( &r
[0], 0, CHAN_X
);
3040 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3041 micro_cos(&r
[1], &r
[0]);
3042 STORE(&r
[1], 0, CHAN_X
);
3044 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3045 micro_sin(&r
[1], &r
[0]);
3046 STORE(&r
[1], 0, CHAN_Y
);
3049 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
3050 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
3052 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
3053 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3057 case TGSI_OPCODE_NRM
:
3058 /* 3-component vector normalize */
3059 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
3060 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
3061 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3062 /* r3 = sqrt(dp3(src0, src0)) */
3063 FETCH(&r
[0], 0, CHAN_X
);
3064 micro_mul(&r
[3], &r
[0], &r
[0]);
3065 FETCH(&r
[1], 0, CHAN_Y
);
3066 micro_mul(&r
[4], &r
[1], &r
[1]);
3067 micro_add(&r
[3], &r
[3], &r
[4]);
3068 FETCH(&r
[2], 0, CHAN_Z
);
3069 micro_mul(&r
[4], &r
[2], &r
[2]);
3070 micro_add(&r
[3], &r
[3], &r
[4]);
3071 micro_sqrt(&r
[3], &r
[3]);
3073 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3074 micro_div(&r
[0], &r
[0], &r
[3]);
3075 STORE(&r
[0], 0, CHAN_X
);
3077 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3078 micro_div(&r
[1], &r
[1], &r
[3]);
3079 STORE(&r
[1], 0, CHAN_Y
);
3081 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3082 micro_div(&r
[2], &r
[2], &r
[3]);
3083 STORE(&r
[2], 0, CHAN_Z
);
3086 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
3087 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3091 case TGSI_OPCODE_NRM4
:
3092 /* 4-component vector normalize */
3094 union tgsi_exec_channel tmp
, dot
;
3096 /* tmp = dp4(src0, src0): */
3097 FETCH( &r
[0], 0, CHAN_X
);
3098 micro_mul( &tmp
, &r
[0], &r
[0] );
3100 FETCH( &r
[1], 0, CHAN_Y
);
3101 micro_mul( &dot
, &r
[1], &r
[1] );
3102 micro_add( &tmp
, &tmp
, &dot
);
3104 FETCH( &r
[2], 0, CHAN_Z
);
3105 micro_mul( &dot
, &r
[2], &r
[2] );
3106 micro_add( &tmp
, &tmp
, &dot
);
3108 FETCH( &r
[3], 0, CHAN_W
);
3109 micro_mul( &dot
, &r
[3], &r
[3] );
3110 micro_add( &tmp
, &tmp
, &dot
);
3112 /* tmp = 1 / sqrt(tmp) */
3113 micro_sqrt( &tmp
, &tmp
);
3114 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
3116 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3117 /* chan = chan * tmp */
3118 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
3119 STORE( &r
[chan_index
], 0, chan_index
);
3124 case TGSI_OPCODE_DIV
:
3128 case TGSI_OPCODE_DP2
:
3129 FETCH( &r
[0], 0, CHAN_X
);
3130 FETCH( &r
[1], 1, CHAN_X
);
3131 micro_mul( &r
[0], &r
[0], &r
[1] );
3133 FETCH( &r
[1], 0, CHAN_Y
);
3134 FETCH( &r
[2], 1, CHAN_Y
);
3135 micro_mul( &r
[1], &r
[1], &r
[2] );
3136 micro_add( &r
[0], &r
[0], &r
[1] );
3138 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3139 STORE( &r
[0], 0, chan_index
);
3143 case TGSI_OPCODE_IF
:
3145 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
3146 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
3147 FETCH( &r
[0], 0, CHAN_X
);
3148 /* update CondMask */
3150 mach
->CondMask
&= ~0x1;
3153 mach
->CondMask
&= ~0x2;
3156 mach
->CondMask
&= ~0x4;
3159 mach
->CondMask
&= ~0x8;
3161 UPDATE_EXEC_MASK(mach
);
3162 /* Todo: If CondMask==0, jump to ELSE */
3165 case TGSI_OPCODE_ELSE
:
3166 /* invert CondMask wrt previous mask */
3169 assert(mach
->CondStackTop
> 0);
3170 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
3171 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
3172 UPDATE_EXEC_MASK(mach
);
3173 /* Todo: If CondMask==0, jump to ENDIF */
3177 case TGSI_OPCODE_ENDIF
:
3179 assert(mach
->CondStackTop
> 0);
3180 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
3181 UPDATE_EXEC_MASK(mach
);
3184 case TGSI_OPCODE_END
:
3185 /* halt execution */
3189 case TGSI_OPCODE_REP
:
3193 case TGSI_OPCODE_ENDREP
:
3197 case TGSI_OPCODE_PUSHA
:
3201 case TGSI_OPCODE_POPA
:
3205 case TGSI_OPCODE_CEIL
:
3206 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3207 FETCH( &r
[0], 0, chan_index
);
3208 micro_ceil(&d
[chan_index
], &r
[0]);
3210 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3211 STORE(&d
[chan_index
], 0, chan_index
);
3215 case TGSI_OPCODE_I2F
:
3216 exec_vector_unary(mach
, inst
, micro_i2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_INT
);
3219 case TGSI_OPCODE_NOT
:
3220 exec_vector_unary(mach
, inst
, micro_not
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3223 case TGSI_OPCODE_TRUNC
:
3224 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3225 FETCH( &r
[0], 0, chan_index
);
3226 micro_trunc(&d
[chan_index
], &r
[0]);
3228 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3229 STORE(&d
[chan_index
], 0, chan_index
);
3233 case TGSI_OPCODE_SHL
:
3234 exec_vector_binary(mach
, inst
, micro_shl
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3237 case TGSI_OPCODE_AND
:
3238 exec_vector_binary(mach
, inst
, micro_and
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3241 case TGSI_OPCODE_OR
:
3242 exec_vector_binary(mach
, inst
, micro_or
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3245 case TGSI_OPCODE_MOD
:
3249 case TGSI_OPCODE_XOR
:
3250 exec_vector_binary(mach
, inst
, micro_xor
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3253 case TGSI_OPCODE_SAD
:
3257 case TGSI_OPCODE_TXF
:
3261 case TGSI_OPCODE_TXQ
:
3265 case TGSI_OPCODE_EMIT
:
3269 case TGSI_OPCODE_ENDPRIM
:
3270 emit_primitive(mach
);
3273 case TGSI_OPCODE_BGNFOR
:
3274 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3275 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3276 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3278 ++mach
->LoopCounterStackTop
;
3279 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3280 /* update LoopMask */
3281 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3282 mach
->LoopMask
&= ~0x1;
3284 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3285 mach
->LoopMask
&= ~0x2;
3287 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3288 mach
->LoopMask
&= ~0x4;
3290 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3291 mach
->LoopMask
&= ~0x8;
3293 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3294 UPDATE_EXEC_MASK(mach
);
3295 /* fall-through (for now) */
3296 case TGSI_OPCODE_BGNLOOP
:
3297 /* push LoopMask and ContMasks */
3298 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3299 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3300 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3301 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3303 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3304 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3305 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3306 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3307 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_LOOP
;
3310 case TGSI_OPCODE_ENDFOR
:
3311 assert(mach
->LoopCounterStackTop
> 0);
3312 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3313 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3314 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3315 /* update LoopMask */
3316 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3317 mach
->LoopMask
&= ~0x1;
3319 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3320 mach
->LoopMask
&= ~0x2;
3322 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3323 mach
->LoopMask
&= ~0x4;
3325 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3326 mach
->LoopMask
&= ~0x8;
3328 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3329 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3330 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3331 assert(mach
->LoopLabelStackTop
> 0);
3332 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3333 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3334 /* Restore ContMask, but don't pop */
3335 assert(mach
->ContStackTop
> 0);
3336 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3337 UPDATE_EXEC_MASK(mach
);
3338 if (mach
->ExecMask
) {
3339 /* repeat loop: jump to instruction just past BGNLOOP */
3340 assert(mach
->LoopLabelStackTop
> 0);
3341 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3344 /* exit loop: pop LoopMask */
3345 assert(mach
->LoopStackTop
> 0);
3346 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3348 assert(mach
->ContStackTop
> 0);
3349 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3350 assert(mach
->LoopLabelStackTop
> 0);
3351 --mach
->LoopLabelStackTop
;
3352 assert(mach
->LoopCounterStackTop
> 0);
3353 --mach
->LoopCounterStackTop
;
3355 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3357 UPDATE_EXEC_MASK(mach
);
3360 case TGSI_OPCODE_ENDLOOP
:
3361 /* Restore ContMask, but don't pop */
3362 assert(mach
->ContStackTop
> 0);
3363 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3364 UPDATE_EXEC_MASK(mach
);
3365 if (mach
->ExecMask
) {
3366 /* repeat loop: jump to instruction just past BGNLOOP */
3367 assert(mach
->LoopLabelStackTop
> 0);
3368 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3371 /* exit loop: pop LoopMask */
3372 assert(mach
->LoopStackTop
> 0);
3373 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3375 assert(mach
->ContStackTop
> 0);
3376 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3377 assert(mach
->LoopLabelStackTop
> 0);
3378 --mach
->LoopLabelStackTop
;
3380 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3382 UPDATE_EXEC_MASK(mach
);
3385 case TGSI_OPCODE_BRK
:
3389 case TGSI_OPCODE_CONT
:
3390 /* turn off cont channels for each enabled exec channel */
3391 mach
->ContMask
&= ~mach
->ExecMask
;
3392 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3393 UPDATE_EXEC_MASK(mach
);
3396 case TGSI_OPCODE_BGNSUB
:
3400 case TGSI_OPCODE_ENDSUB
:
3402 * XXX: This really should be a no-op. We should never reach this opcode.
3405 assert(mach
->CallStackTop
> 0);
3406 mach
->CallStackTop
--;
3408 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3409 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3411 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3412 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3414 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3415 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3417 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3418 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3420 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3421 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3423 assert(mach
->FuncStackTop
> 0);
3424 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3426 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3428 UPDATE_EXEC_MASK(mach
);
3431 case TGSI_OPCODE_NOP
:
3434 case TGSI_OPCODE_BREAKC
:
3435 FETCH(&r
[0], 0, CHAN_X
);
3436 /* update CondMask */
3437 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
3438 mach
->LoopMask
&= ~0x1;
3440 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
3441 mach
->LoopMask
&= ~0x2;
3443 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
3444 mach
->LoopMask
&= ~0x4;
3446 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
3447 mach
->LoopMask
&= ~0x8;
3449 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3450 UPDATE_EXEC_MASK(mach
);
3453 case TGSI_OPCODE_F2I
:
3454 exec_vector_unary(mach
, inst
, micro_f2i
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
3457 case TGSI_OPCODE_IDIV
:
3458 exec_vector_binary(mach
, inst
, micro_idiv
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3461 case TGSI_OPCODE_IMAX
:
3462 exec_vector_binary(mach
, inst
, micro_imax
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3465 case TGSI_OPCODE_IMIN
:
3466 exec_vector_binary(mach
, inst
, micro_imin
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3469 case TGSI_OPCODE_INEG
:
3470 exec_vector_unary(mach
, inst
, micro_ineg
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3473 case TGSI_OPCODE_ISGE
:
3474 exec_vector_binary(mach
, inst
, micro_isge
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3477 case TGSI_OPCODE_ISHR
:
3478 exec_vector_binary(mach
, inst
, micro_ishr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3481 case TGSI_OPCODE_ISLT
:
3482 exec_vector_binary(mach
, inst
, micro_islt
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3485 case TGSI_OPCODE_F2U
:
3486 exec_vector_unary(mach
, inst
, micro_f2u
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
3489 case TGSI_OPCODE_U2F
:
3490 exec_vector_unary(mach
, inst
, micro_u2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_UINT
);
3493 case TGSI_OPCODE_UADD
:
3494 exec_vector_binary(mach
, inst
, micro_uadd
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3497 case TGSI_OPCODE_UDIV
:
3498 exec_vector_binary(mach
, inst
, micro_udiv
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3501 case TGSI_OPCODE_UMAD
:
3502 exec_vector_trinary(mach
, inst
, micro_umad
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3505 case TGSI_OPCODE_UMAX
:
3506 exec_vector_binary(mach
, inst
, micro_umax
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3509 case TGSI_OPCODE_UMIN
:
3510 exec_vector_binary(mach
, inst
, micro_umin
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3513 case TGSI_OPCODE_UMOD
:
3514 exec_vector_binary(mach
, inst
, micro_umod
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3517 case TGSI_OPCODE_UMUL
:
3518 exec_vector_binary(mach
, inst
, micro_umul
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3521 case TGSI_OPCODE_USEQ
:
3522 exec_vector_binary(mach
, inst
, micro_useq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3525 case TGSI_OPCODE_USGE
:
3526 exec_vector_binary(mach
, inst
, micro_usge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3529 case TGSI_OPCODE_USHR
:
3530 exec_vector_binary(mach
, inst
, micro_ushr
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3533 case TGSI_OPCODE_USLT
:
3534 exec_vector_binary(mach
, inst
, micro_uslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3537 case TGSI_OPCODE_USNE
:
3538 exec_vector_binary(mach
, inst
, micro_usne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3541 case TGSI_OPCODE_SWITCH
:
3542 exec_switch(mach
, inst
);
3545 case TGSI_OPCODE_CASE
:
3546 exec_case(mach
, inst
);
3549 case TGSI_OPCODE_DEFAULT
:
3553 case TGSI_OPCODE_ENDSWITCH
:
3554 exec_endswitch(mach
);
3563 #define DEBUG_EXECUTION 0
3567 * Run TGSI interpreter.
3568 * \return bitmask of "alive" quad components
3571 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3576 mach
->CondMask
= 0xf;
3577 mach
->LoopMask
= 0xf;
3578 mach
->ContMask
= 0xf;
3579 mach
->FuncMask
= 0xf;
3580 mach
->ExecMask
= 0xf;
3582 mach
->Switch
.mask
= 0xf;
3584 assert(mach
->CondStackTop
== 0);
3585 assert(mach
->LoopStackTop
== 0);
3586 assert(mach
->ContStackTop
== 0);
3587 assert(mach
->SwitchStackTop
== 0);
3588 assert(mach
->BreakStackTop
== 0);
3589 assert(mach
->CallStackTop
== 0);
3591 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3592 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3594 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3595 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3596 mach
->Primitives
[0] = 0;
3599 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3600 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3601 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3602 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3603 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3604 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3607 /* execute declarations (interpolants) */
3608 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3609 exec_declaration( mach
, mach
->Declarations
+i
);
3614 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3615 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3618 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3619 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3622 /* execute instructions, until pc is set to -1 */
3628 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3631 assert(pc
< (int) mach
->NumInstructions
);
3632 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3635 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3636 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3639 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3640 debug_printf("TEMP[%2u] = ", i
);
3641 for (j
= 0; j
< 4; j
++) {
3645 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3646 temps
[i
].xyzw
[0].f
[j
], temps
[i
].xyzw
[0].u
[j
],
3647 temps
[i
].xyzw
[1].f
[j
], temps
[i
].xyzw
[1].u
[j
],
3648 temps
[i
].xyzw
[2].f
[j
], temps
[i
].xyzw
[2].u
[j
],
3649 temps
[i
].xyzw
[3].f
[j
], temps
[i
].xyzw
[3].u
[j
]);
3653 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3654 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3657 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3658 debug_printf("OUT[%2u] = ", i
);
3659 for (j
= 0; j
< 4; j
++) {
3663 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3664 outputs
[i
].xyzw
[0].f
[j
], outputs
[i
].xyzw
[0].u
[j
],
3665 outputs
[i
].xyzw
[1].f
[j
], outputs
[i
].xyzw
[1].u
[j
],
3666 outputs
[i
].xyzw
[2].f
[j
], outputs
[i
].xyzw
[2].u
[j
],
3667 outputs
[i
].xyzw
[3].f
[j
], outputs
[i
].xyzw
[3].u
[j
]);
3676 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3677 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3679 * Scale back depth component.
3681 for (i
= 0; i
< 4; i
++)
3682 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3686 assert(mach
->CondStackTop
== 0);
3687 assert(mach
->LoopStackTop
== 0);
3688 assert(mach
->ContStackTop
== 0);
3689 assert(mach
->SwitchStackTop
== 0);
3690 assert(mach
->BreakStackTop
== 0);
3691 assert(mach
->CallStackTop
== 0);
3693 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];