tgsi: Remove dead micro_umod().
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 static void
68 micro_iabs(union tgsi_exec_channel *dst,
69 const union tgsi_exec_channel *src)
70 {
71 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
72 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
73 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
74 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
75 }
76
77 static void
78 micro_ineg(union tgsi_exec_channel *dst,
79 const union tgsi_exec_channel *src)
80 {
81 dst->i[0] = -src->i[0];
82 dst->i[1] = -src->i[1];
83 dst->i[2] = -src->i[2];
84 dst->i[3] = -src->i[3];
85 }
86
87 static void
88 micro_mov(union tgsi_exec_channel *dst,
89 const union tgsi_exec_channel *src)
90 {
91 dst->u[0] = src->u[0];
92 dst->u[1] = src->u[1];
93 dst->u[2] = src->u[2];
94 dst->u[3] = src->u[3];
95 }
96
97 #define TILE_TOP_LEFT 0
98 #define TILE_TOP_RIGHT 1
99 #define TILE_BOTTOM_LEFT 2
100 #define TILE_BOTTOM_RIGHT 3
101
102 #define CHAN_X 0
103 #define CHAN_Y 1
104 #define CHAN_Z 2
105 #define CHAN_W 3
106
107 enum tgsi_exec_datatype {
108 TGSI_EXEC_DATA_FLOAT,
109 TGSI_EXEC_DATA_INT,
110 TGSI_EXEC_DATA_UINT
111 };
112
113 /*
114 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
115 */
116 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
117 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
118 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
119 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
120 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
121 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
122 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
123 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
124 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
125 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
126 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
127 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
128 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
129 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
130 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
131 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
132 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
133 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
134 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
135 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
136 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
137 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
138 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
139 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
140 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
141 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
142 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
143 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
144 #define TEMP_R0 TGSI_EXEC_TEMP_R0
145 #define TEMP_P0 TGSI_EXEC_TEMP_P0
146
147 #define IS_CHANNEL_ENABLED(INST, CHAN)\
148 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
149
150 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
151 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
152
153 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
154 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
155 if (IS_CHANNEL_ENABLED( INST, CHAN ))
156
157 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
158 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
159 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
160
161
162 /** The execution mask depends on the conditional mask and the loop mask */
163 #define UPDATE_EXEC_MASK(MACH) \
164 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
165
166
167 static const union tgsi_exec_channel ZeroVec =
168 { { 0.0, 0.0, 0.0, 0.0 } };
169
170
171 #define CHECK_INF_OR_NAN(chan) do {\
172 assert(!util_is_inf_or_nan((chan)->f[0]));\
173 assert(!util_is_inf_or_nan((chan)->f[1]));\
174 assert(!util_is_inf_or_nan((chan)->f[2]));\
175 assert(!util_is_inf_or_nan((chan)->f[3]));\
176 } while (0)
177
178
179 #ifdef DEBUG
180 static void
181 print_chan(const char *msg, const union tgsi_exec_channel *chan)
182 {
183 debug_printf("%s = {%f, %f, %f, %f}\n",
184 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
185 }
186 #endif
187
188
189 #ifdef DEBUG
190 static void
191 print_temp(const struct tgsi_exec_machine *mach, uint index)
192 {
193 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
194 int i;
195 debug_printf("Temp[%u] =\n", index);
196 for (i = 0; i < 4; i++) {
197 debug_printf(" %c: { %f, %f, %f, %f }\n",
198 "XYZW"[i],
199 tmp->xyzw[i].f[0],
200 tmp->xyzw[i].f[1],
201 tmp->xyzw[i].f[2],
202 tmp->xyzw[i].f[3]);
203 }
204 }
205 #endif
206
207
208 /**
209 * Check if there's a potential src/dst register data dependency when
210 * using SOA execution.
211 * Example:
212 * MOV T, T.yxwz;
213 * This would expand into:
214 * MOV t0, t1;
215 * MOV t1, t0;
216 * MOV t2, t3;
217 * MOV t3, t2;
218 * The second instruction will have the wrong value for t0 if executed as-is.
219 */
220 boolean
221 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
222 {
223 uint i, chan;
224
225 uint writemask = inst->Dst[0].Register.WriteMask;
226 if (writemask == TGSI_WRITEMASK_X ||
227 writemask == TGSI_WRITEMASK_Y ||
228 writemask == TGSI_WRITEMASK_Z ||
229 writemask == TGSI_WRITEMASK_W ||
230 writemask == TGSI_WRITEMASK_NONE) {
231 /* no chance of data dependency */
232 return FALSE;
233 }
234
235 /* loop over src regs */
236 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
237 if ((inst->Src[i].Register.File ==
238 inst->Dst[0].Register.File) &&
239 (inst->Src[i].Register.Index ==
240 inst->Dst[0].Register.Index)) {
241 /* loop over dest channels */
242 uint channelsWritten = 0x0;
243 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
244 /* check if we're reading a channel that's been written */
245 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
246 if (channelsWritten & (1 << swizzle)) {
247 return TRUE;
248 }
249
250 channelsWritten |= (1 << chan);
251 }
252 }
253 }
254 return FALSE;
255 }
256
257
258 /**
259 * Initialize machine state by expanding tokens to full instructions,
260 * allocating temporary storage, setting up constants, etc.
261 * After this, we can call tgsi_exec_machine_run() many times.
262 */
263 void
264 tgsi_exec_machine_bind_shader(
265 struct tgsi_exec_machine *mach,
266 const struct tgsi_token *tokens,
267 uint numSamplers,
268 struct tgsi_sampler **samplers)
269 {
270 uint k;
271 struct tgsi_parse_context parse;
272 struct tgsi_exec_labels *labels = &mach->Labels;
273 struct tgsi_full_instruction *instructions;
274 struct tgsi_full_declaration *declarations;
275 uint maxInstructions = 10, numInstructions = 0;
276 uint maxDeclarations = 10, numDeclarations = 0;
277 uint instno = 0;
278
279 #if 0
280 tgsi_dump(tokens, 0);
281 #endif
282
283 util_init_math();
284
285 mach->Tokens = tokens;
286 mach->Samplers = samplers;
287
288 k = tgsi_parse_init (&parse, mach->Tokens);
289 if (k != TGSI_PARSE_OK) {
290 debug_printf( "Problem parsing!\n" );
291 return;
292 }
293
294 mach->Processor = parse.FullHeader.Processor.Processor;
295 mach->ImmLimit = 0;
296 labels->count = 0;
297
298 declarations = (struct tgsi_full_declaration *)
299 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
300
301 if (!declarations) {
302 return;
303 }
304
305 instructions = (struct tgsi_full_instruction *)
306 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
307
308 if (!instructions) {
309 FREE( declarations );
310 return;
311 }
312
313 while( !tgsi_parse_end_of_tokens( &parse ) ) {
314 uint pointer = parse.Position;
315 uint i;
316
317 tgsi_parse_token( &parse );
318 switch( parse.FullToken.Token.Type ) {
319 case TGSI_TOKEN_TYPE_DECLARATION:
320 /* save expanded declaration */
321 if (numDeclarations == maxDeclarations) {
322 declarations = REALLOC(declarations,
323 maxDeclarations
324 * sizeof(struct tgsi_full_declaration),
325 (maxDeclarations + 10)
326 * sizeof(struct tgsi_full_declaration));
327 maxDeclarations += 10;
328 }
329 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
330 unsigned reg;
331 for (reg = parse.FullToken.FullDeclaration.Range.First;
332 reg <= parse.FullToken.FullDeclaration.Range.Last;
333 ++reg) {
334 ++mach->NumOutputs;
335 }
336 }
337 memcpy(declarations + numDeclarations,
338 &parse.FullToken.FullDeclaration,
339 sizeof(declarations[0]));
340 numDeclarations++;
341 break;
342
343 case TGSI_TOKEN_TYPE_IMMEDIATE:
344 {
345 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
346 assert( size <= 4 );
347 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
348
349 for( i = 0; i < size; i++ ) {
350 mach->Imms[mach->ImmLimit][i] =
351 parse.FullToken.FullImmediate.u[i].Float;
352 }
353 mach->ImmLimit += 1;
354 }
355 break;
356
357 case TGSI_TOKEN_TYPE_INSTRUCTION:
358 assert( labels->count < MAX_LABELS );
359
360 labels->labels[labels->count][0] = instno;
361 labels->labels[labels->count][1] = pointer;
362 labels->count++;
363
364 /* save expanded instruction */
365 if (numInstructions == maxInstructions) {
366 instructions = REALLOC(instructions,
367 maxInstructions
368 * sizeof(struct tgsi_full_instruction),
369 (maxInstructions + 10)
370 * sizeof(struct tgsi_full_instruction));
371 maxInstructions += 10;
372 }
373
374 memcpy(instructions + numInstructions,
375 &parse.FullToken.FullInstruction,
376 sizeof(instructions[0]));
377
378 numInstructions++;
379 break;
380
381 case TGSI_TOKEN_TYPE_PROPERTY:
382 break;
383
384 default:
385 assert( 0 );
386 }
387 }
388 tgsi_parse_free (&parse);
389
390 if (mach->Declarations) {
391 FREE( mach->Declarations );
392 }
393 mach->Declarations = declarations;
394 mach->NumDeclarations = numDeclarations;
395
396 if (mach->Instructions) {
397 FREE( mach->Instructions );
398 }
399 mach->Instructions = instructions;
400 mach->NumInstructions = numInstructions;
401 }
402
403
404 struct tgsi_exec_machine *
405 tgsi_exec_machine_create( void )
406 {
407 struct tgsi_exec_machine *mach;
408 uint i;
409
410 mach = align_malloc( sizeof *mach, 16 );
411 if (!mach)
412 goto fail;
413
414 memset(mach, 0, sizeof(*mach));
415
416 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
417 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
418 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
419
420 /* Setup constants. */
421 for( i = 0; i < 4; i++ ) {
422 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
423 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
424 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
425 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
426 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
427 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
428 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
429 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
430 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
431 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
432 }
433
434 #ifdef DEBUG
435 /* silence warnings */
436 (void) print_chan;
437 (void) print_temp;
438 #endif
439
440 return mach;
441
442 fail:
443 align_free(mach);
444 return NULL;
445 }
446
447
448 void
449 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
450 {
451 if (mach) {
452 FREE(mach->Instructions);
453 FREE(mach->Declarations);
454 }
455
456 align_free(mach);
457 }
458
459
460 static void
461 micro_abs(
462 union tgsi_exec_channel *dst,
463 const union tgsi_exec_channel *src )
464 {
465 dst->f[0] = fabsf( src->f[0] );
466 dst->f[1] = fabsf( src->f[1] );
467 dst->f[2] = fabsf( src->f[2] );
468 dst->f[3] = fabsf( src->f[3] );
469 }
470
471 static void
472 micro_add(
473 union tgsi_exec_channel *dst,
474 const union tgsi_exec_channel *src0,
475 const union tgsi_exec_channel *src1 )
476 {
477 dst->f[0] = src0->f[0] + src1->f[0];
478 dst->f[1] = src0->f[1] + src1->f[1];
479 dst->f[2] = src0->f[2] + src1->f[2];
480 dst->f[3] = src0->f[3] + src1->f[3];
481 }
482
483 static void
484 micro_ceil(
485 union tgsi_exec_channel *dst,
486 const union tgsi_exec_channel *src )
487 {
488 dst->f[0] = ceilf( src->f[0] );
489 dst->f[1] = ceilf( src->f[1] );
490 dst->f[2] = ceilf( src->f[2] );
491 dst->f[3] = ceilf( src->f[3] );
492 }
493
494 static void
495 micro_cos(
496 union tgsi_exec_channel *dst,
497 const union tgsi_exec_channel *src )
498 {
499 dst->f[0] = cosf( src->f[0] );
500 dst->f[1] = cosf( src->f[1] );
501 dst->f[2] = cosf( src->f[2] );
502 dst->f[3] = cosf( src->f[3] );
503 }
504
505 static void
506 micro_ddx(
507 union tgsi_exec_channel *dst,
508 const union tgsi_exec_channel *src )
509 {
510 dst->f[0] =
511 dst->f[1] =
512 dst->f[2] =
513 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
514 }
515
516 static void
517 micro_ddy(
518 union tgsi_exec_channel *dst,
519 const union tgsi_exec_channel *src )
520 {
521 dst->f[0] =
522 dst->f[1] =
523 dst->f[2] =
524 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
525 }
526
527 static void
528 micro_div(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src0,
531 const union tgsi_exec_channel *src1 )
532 {
533 if (src1->f[0] != 0) {
534 dst->f[0] = src0->f[0] / src1->f[0];
535 }
536 if (src1->f[1] != 0) {
537 dst->f[1] = src0->f[1] / src1->f[1];
538 }
539 if (src1->f[2] != 0) {
540 dst->f[2] = src0->f[2] / src1->f[2];
541 }
542 if (src1->f[3] != 0) {
543 dst->f[3] = src0->f[3] / src1->f[3];
544 }
545 }
546
547 static void
548 micro_eq(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src0,
551 const union tgsi_exec_channel *src1,
552 const union tgsi_exec_channel *src2,
553 const union tgsi_exec_channel *src3 )
554 {
555 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
556 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
557 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
558 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
559 }
560
561 static void
562 micro_exp2(
563 union tgsi_exec_channel *dst,
564 const union tgsi_exec_channel *src)
565 {
566 #if FAST_MATH
567 dst->f[0] = util_fast_exp2( src->f[0] );
568 dst->f[1] = util_fast_exp2( src->f[1] );
569 dst->f[2] = util_fast_exp2( src->f[2] );
570 dst->f[3] = util_fast_exp2( src->f[3] );
571 #else
572
573 #if DEBUG
574 /* Inf is okay for this instruction, so clamp it to silence assertions. */
575 uint i;
576 union tgsi_exec_channel clamped;
577
578 for (i = 0; i < 4; i++) {
579 if (src->f[i] > 127.99999f) {
580 clamped.f[i] = 127.99999f;
581 } else if (src->f[i] < -126.99999f) {
582 clamped.f[i] = -126.99999f;
583 } else {
584 clamped.f[i] = src->f[i];
585 }
586 }
587 src = &clamped;
588 #endif
589
590 dst->f[0] = powf( 2.0f, src->f[0] );
591 dst->f[1] = powf( 2.0f, src->f[1] );
592 dst->f[2] = powf( 2.0f, src->f[2] );
593 dst->f[3] = powf( 2.0f, src->f[3] );
594 #endif
595 }
596
597 static void
598 micro_float_clamp(union tgsi_exec_channel *dst,
599 const union tgsi_exec_channel *src)
600 {
601 uint i;
602
603 for (i = 0; i < 4; i++) {
604 if (src->f[i] > 0.0f) {
605 if (src->f[i] > 1.884467e+019f)
606 dst->f[i] = 1.884467e+019f;
607 else if (src->f[i] < 5.42101e-020f)
608 dst->f[i] = 5.42101e-020f;
609 else
610 dst->f[i] = src->f[i];
611 }
612 else {
613 if (src->f[i] < -1.884467e+019f)
614 dst->f[i] = -1.884467e+019f;
615 else if (src->f[i] > -5.42101e-020f)
616 dst->f[i] = -5.42101e-020f;
617 else
618 dst->f[i] = src->f[i];
619 }
620 }
621 }
622
623 static void
624 micro_flr(
625 union tgsi_exec_channel *dst,
626 const union tgsi_exec_channel *src )
627 {
628 dst->f[0] = floorf( src->f[0] );
629 dst->f[1] = floorf( src->f[1] );
630 dst->f[2] = floorf( src->f[2] );
631 dst->f[3] = floorf( src->f[3] );
632 }
633
634 static void
635 micro_frc(
636 union tgsi_exec_channel *dst,
637 const union tgsi_exec_channel *src )
638 {
639 dst->f[0] = src->f[0] - floorf( src->f[0] );
640 dst->f[1] = src->f[1] - floorf( src->f[1] );
641 dst->f[2] = src->f[2] - floorf( src->f[2] );
642 dst->f[3] = src->f[3] - floorf( src->f[3] );
643 }
644
645 static void
646 micro_lg2(
647 union tgsi_exec_channel *dst,
648 const union tgsi_exec_channel *src )
649 {
650 #if FAST_MATH
651 dst->f[0] = util_fast_log2( src->f[0] );
652 dst->f[1] = util_fast_log2( src->f[1] );
653 dst->f[2] = util_fast_log2( src->f[2] );
654 dst->f[3] = util_fast_log2( src->f[3] );
655 #else
656 dst->f[0] = logf( src->f[0] ) * 1.442695f;
657 dst->f[1] = logf( src->f[1] ) * 1.442695f;
658 dst->f[2] = logf( src->f[2] ) * 1.442695f;
659 dst->f[3] = logf( src->f[3] ) * 1.442695f;
660 #endif
661 }
662
663 static void
664 micro_le(
665 union tgsi_exec_channel *dst,
666 const union tgsi_exec_channel *src0,
667 const union tgsi_exec_channel *src1,
668 const union tgsi_exec_channel *src2,
669 const union tgsi_exec_channel *src3 )
670 {
671 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
672 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
673 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
674 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
675 }
676
677 static void
678 micro_lt(
679 union tgsi_exec_channel *dst,
680 const union tgsi_exec_channel *src0,
681 const union tgsi_exec_channel *src1,
682 const union tgsi_exec_channel *src2,
683 const union tgsi_exec_channel *src3 )
684 {
685 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
686 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
687 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
688 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
689 }
690
691 static void
692 micro_max(
693 union tgsi_exec_channel *dst,
694 const union tgsi_exec_channel *src0,
695 const union tgsi_exec_channel *src1 )
696 {
697 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
698 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
699 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
700 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
701 }
702
703 static void
704 micro_min(
705 union tgsi_exec_channel *dst,
706 const union tgsi_exec_channel *src0,
707 const union tgsi_exec_channel *src1 )
708 {
709 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
710 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
711 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
712 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
713 }
714
715 static void
716 micro_mul(
717 union tgsi_exec_channel *dst,
718 const union tgsi_exec_channel *src0,
719 const union tgsi_exec_channel *src1 )
720 {
721 dst->f[0] = src0->f[0] * src1->f[0];
722 dst->f[1] = src0->f[1] * src1->f[1];
723 dst->f[2] = src0->f[2] * src1->f[2];
724 dst->f[3] = src0->f[3] * src1->f[3];
725 }
726
727 #if 0
728 static void
729 micro_imul64(
730 union tgsi_exec_channel *dst0,
731 union tgsi_exec_channel *dst1,
732 const union tgsi_exec_channel *src0,
733 const union tgsi_exec_channel *src1 )
734 {
735 dst1->i[0] = src0->i[0] * src1->i[0];
736 dst1->i[1] = src0->i[1] * src1->i[1];
737 dst1->i[2] = src0->i[2] * src1->i[2];
738 dst1->i[3] = src0->i[3] * src1->i[3];
739 dst0->i[0] = 0;
740 dst0->i[1] = 0;
741 dst0->i[2] = 0;
742 dst0->i[3] = 0;
743 }
744 #endif
745
746 #if 0
747 static void
748 micro_umul64(
749 union tgsi_exec_channel *dst0,
750 union tgsi_exec_channel *dst1,
751 const union tgsi_exec_channel *src0,
752 const union tgsi_exec_channel *src1 )
753 {
754 dst1->u[0] = src0->u[0] * src1->u[0];
755 dst1->u[1] = src0->u[1] * src1->u[1];
756 dst1->u[2] = src0->u[2] * src1->u[2];
757 dst1->u[3] = src0->u[3] * src1->u[3];
758 dst0->u[0] = 0;
759 dst0->u[1] = 0;
760 dst0->u[2] = 0;
761 dst0->u[3] = 0;
762 }
763 #endif
764
765
766 #if 0
767 static void
768 micro_movc(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src0,
771 const union tgsi_exec_channel *src1,
772 const union tgsi_exec_channel *src2 )
773 {
774 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
775 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
776 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
777 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
778 }
779 #endif
780
781 static void
782 micro_neg(
783 union tgsi_exec_channel *dst,
784 const union tgsi_exec_channel *src )
785 {
786 dst->f[0] = -src->f[0];
787 dst->f[1] = -src->f[1];
788 dst->f[2] = -src->f[2];
789 dst->f[3] = -src->f[3];
790 }
791
792 static void
793 micro_pow(
794 union tgsi_exec_channel *dst,
795 const union tgsi_exec_channel *src0,
796 const union tgsi_exec_channel *src1 )
797 {
798 #if FAST_MATH
799 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
800 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
801 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
802 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
803 #else
804 dst->f[0] = powf( src0->f[0], src1->f[0] );
805 dst->f[1] = powf( src0->f[1], src1->f[1] );
806 dst->f[2] = powf( src0->f[2], src1->f[2] );
807 dst->f[3] = powf( src0->f[3], src1->f[3] );
808 #endif
809 }
810
811 static void
812 micro_rnd(
813 union tgsi_exec_channel *dst,
814 const union tgsi_exec_channel *src )
815 {
816 dst->f[0] = floorf( src->f[0] + 0.5f );
817 dst->f[1] = floorf( src->f[1] + 0.5f );
818 dst->f[2] = floorf( src->f[2] + 0.5f );
819 dst->f[3] = floorf( src->f[3] + 0.5f );
820 }
821
822 static void
823 micro_sgn(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src )
826 {
827 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
828 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
829 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
830 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
831 }
832
833 static void
834 micro_trunc(
835 union tgsi_exec_channel *dst,
836 const union tgsi_exec_channel *src0 )
837 {
838 dst->f[0] = (float) (int) src0->f[0];
839 dst->f[1] = (float) (int) src0->f[1];
840 dst->f[2] = (float) (int) src0->f[2];
841 dst->f[3] = (float) (int) src0->f[3];
842 }
843
844 static void
845 micro_sin(
846 union tgsi_exec_channel *dst,
847 const union tgsi_exec_channel *src )
848 {
849 dst->f[0] = sinf( src->f[0] );
850 dst->f[1] = sinf( src->f[1] );
851 dst->f[2] = sinf( src->f[2] );
852 dst->f[3] = sinf( src->f[3] );
853 }
854
855 static void
856 micro_sqrt( union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src )
858 {
859 dst->f[0] = sqrtf( src->f[0] );
860 dst->f[1] = sqrtf( src->f[1] );
861 dst->f[2] = sqrtf( src->f[2] );
862 dst->f[3] = sqrtf( src->f[3] );
863 }
864
865 static void
866 micro_sub(
867 union tgsi_exec_channel *dst,
868 const union tgsi_exec_channel *src0,
869 const union tgsi_exec_channel *src1 )
870 {
871 dst->f[0] = src0->f[0] - src1->f[0];
872 dst->f[1] = src0->f[1] - src1->f[1];
873 dst->f[2] = src0->f[2] - src1->f[2];
874 dst->f[3] = src0->f[3] - src1->f[3];
875 }
876
877 static void
878 fetch_src_file_channel(
879 const struct tgsi_exec_machine *mach,
880 const uint file,
881 const uint swizzle,
882 const union tgsi_exec_channel *index,
883 union tgsi_exec_channel *chan )
884 {
885 switch( swizzle ) {
886 case TGSI_SWIZZLE_X:
887 case TGSI_SWIZZLE_Y:
888 case TGSI_SWIZZLE_Z:
889 case TGSI_SWIZZLE_W:
890 switch( file ) {
891 case TGSI_FILE_CONSTANT:
892 assert(mach->Consts);
893 if (index->i[0] < 0)
894 chan->f[0] = 0.0f;
895 else
896 chan->f[0] = mach->Consts[index->i[0]][swizzle];
897 if (index->i[1] < 0)
898 chan->f[1] = 0.0f;
899 else
900 chan->f[1] = mach->Consts[index->i[1]][swizzle];
901 if (index->i[2] < 0)
902 chan->f[2] = 0.0f;
903 else
904 chan->f[2] = mach->Consts[index->i[2]][swizzle];
905 if (index->i[3] < 0)
906 chan->f[3] = 0.0f;
907 else
908 chan->f[3] = mach->Consts[index->i[3]][swizzle];
909 break;
910
911 case TGSI_FILE_INPUT:
912 case TGSI_FILE_SYSTEM_VALUE:
913 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
914 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
915 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
916 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
917 break;
918
919 case TGSI_FILE_TEMPORARY:
920 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
921 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
922 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
923 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
924 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
925 break;
926
927 case TGSI_FILE_IMMEDIATE:
928 assert( index->i[0] < (int) mach->ImmLimit );
929 chan->f[0] = mach->Imms[index->i[0]][swizzle];
930 assert( index->i[1] < (int) mach->ImmLimit );
931 chan->f[1] = mach->Imms[index->i[1]][swizzle];
932 assert( index->i[2] < (int) mach->ImmLimit );
933 chan->f[2] = mach->Imms[index->i[2]][swizzle];
934 assert( index->i[3] < (int) mach->ImmLimit );
935 chan->f[3] = mach->Imms[index->i[3]][swizzle];
936 break;
937
938 case TGSI_FILE_ADDRESS:
939 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
940 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
941 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
942 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
943 break;
944
945 case TGSI_FILE_PREDICATE:
946 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
947 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
948 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
949 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
950 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
951 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
952 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
953 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
954 break;
955
956 case TGSI_FILE_OUTPUT:
957 /* vertex/fragment output vars can be read too */
958 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
959 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
960 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
961 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
962 break;
963
964 default:
965 assert( 0 );
966 }
967 break;
968
969 default:
970 assert( 0 );
971 }
972 }
973
974 static void
975 fetch_source(const struct tgsi_exec_machine *mach,
976 union tgsi_exec_channel *chan,
977 const struct tgsi_full_src_register *reg,
978 const uint chan_index,
979 enum tgsi_exec_datatype src_datatype)
980 {
981 union tgsi_exec_channel index;
982 uint swizzle;
983
984 /* We start with a direct index into a register file.
985 *
986 * file[1],
987 * where:
988 * file = Register.File
989 * [1] = Register.Index
990 */
991 index.i[0] =
992 index.i[1] =
993 index.i[2] =
994 index.i[3] = reg->Register.Index;
995
996 /* There is an extra source register that indirectly subscripts
997 * a register file. The direct index now becomes an offset
998 * that is being added to the indirect register.
999 *
1000 * file[ind[2].x+1],
1001 * where:
1002 * ind = Indirect.File
1003 * [2] = Indirect.Index
1004 * .x = Indirect.SwizzleX
1005 */
1006 if (reg->Register.Indirect) {
1007 union tgsi_exec_channel index2;
1008 union tgsi_exec_channel indir_index;
1009 const uint execmask = mach->ExecMask;
1010 uint i;
1011
1012 /* which address register (always zero now) */
1013 index2.i[0] =
1014 index2.i[1] =
1015 index2.i[2] =
1016 index2.i[3] = reg->Indirect.Index;
1017
1018 /* get current value of address register[swizzle] */
1019 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1020 fetch_src_file_channel(
1021 mach,
1022 reg->Indirect.File,
1023 swizzle,
1024 &index2,
1025 &indir_index );
1026
1027 /* add value of address register to the offset */
1028 index.i[0] += (int) indir_index.f[0];
1029 index.i[1] += (int) indir_index.f[1];
1030 index.i[2] += (int) indir_index.f[2];
1031 index.i[3] += (int) indir_index.f[3];
1032
1033 /* for disabled execution channels, zero-out the index to
1034 * avoid using a potential garbage value.
1035 */
1036 for (i = 0; i < QUAD_SIZE; i++) {
1037 if ((execmask & (1 << i)) == 0)
1038 index.i[i] = 0;
1039 }
1040 }
1041
1042 /* There is an extra source register that is a second
1043 * subscript to a register file. Effectively it means that
1044 * the register file is actually a 2D array of registers.
1045 *
1046 * file[1][3] == file[1*sizeof(file[1])+3],
1047 * where:
1048 * [3] = Dimension.Index
1049 */
1050 if (reg->Register.Dimension) {
1051 /* The size of the first-order array depends on the register file type.
1052 * We need to multiply the index to the first array to get an effective,
1053 * "flat" index that points to the beginning of the second-order array.
1054 */
1055 switch (reg->Register.File) {
1056 case TGSI_FILE_INPUT:
1057 case TGSI_FILE_SYSTEM_VALUE:
1058 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1059 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1060 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1061 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1062 break;
1063 case TGSI_FILE_CONSTANT:
1064 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1065 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1066 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1067 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1068 break;
1069 default:
1070 assert( 0 );
1071 }
1072
1073 index.i[0] += reg->Dimension.Index;
1074 index.i[1] += reg->Dimension.Index;
1075 index.i[2] += reg->Dimension.Index;
1076 index.i[3] += reg->Dimension.Index;
1077
1078 /* Again, the second subscript index can be addressed indirectly
1079 * identically to the first one.
1080 * Nothing stops us from indirectly addressing the indirect register,
1081 * but there is no need for that, so we won't exercise it.
1082 *
1083 * file[1][ind[4].y+3],
1084 * where:
1085 * ind = DimIndirect.File
1086 * [4] = DimIndirect.Index
1087 * .y = DimIndirect.SwizzleX
1088 */
1089 if (reg->Dimension.Indirect) {
1090 union tgsi_exec_channel index2;
1091 union tgsi_exec_channel indir_index;
1092 const uint execmask = mach->ExecMask;
1093 uint i;
1094
1095 index2.i[0] =
1096 index2.i[1] =
1097 index2.i[2] =
1098 index2.i[3] = reg->DimIndirect.Index;
1099
1100 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1101 fetch_src_file_channel(
1102 mach,
1103 reg->DimIndirect.File,
1104 swizzle,
1105 &index2,
1106 &indir_index );
1107
1108 index.i[0] += (int) indir_index.f[0];
1109 index.i[1] += (int) indir_index.f[1];
1110 index.i[2] += (int) indir_index.f[2];
1111 index.i[3] += (int) indir_index.f[3];
1112
1113 /* for disabled execution channels, zero-out the index to
1114 * avoid using a potential garbage value.
1115 */
1116 for (i = 0; i < QUAD_SIZE; i++) {
1117 if ((execmask & (1 << i)) == 0)
1118 index.i[i] = 0;
1119 }
1120 }
1121
1122 /* If by any chance there was a need for a 3D array of register
1123 * files, we would have to check whether Dimension is followed
1124 * by a dimension register and continue the saga.
1125 */
1126 }
1127
1128 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1129 fetch_src_file_channel(
1130 mach,
1131 reg->Register.File,
1132 swizzle,
1133 &index,
1134 chan );
1135
1136 if (reg->Register.Absolute) {
1137 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1138 micro_abs(chan, chan);
1139 } else {
1140 micro_iabs(chan, chan);
1141 }
1142 }
1143
1144 if (reg->Register.Negate) {
1145 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1146 micro_neg(chan, chan);
1147 } else {
1148 micro_ineg(chan, chan);
1149 }
1150 }
1151 }
1152
1153 static void
1154 store_dest(struct tgsi_exec_machine *mach,
1155 const union tgsi_exec_channel *chan,
1156 const struct tgsi_full_dst_register *reg,
1157 const struct tgsi_full_instruction *inst,
1158 uint chan_index,
1159 enum tgsi_exec_datatype dst_datatype)
1160 {
1161 uint i;
1162 union tgsi_exec_channel null;
1163 union tgsi_exec_channel *dst;
1164 uint execmask = mach->ExecMask;
1165 int offset = 0; /* indirection offset */
1166 int index;
1167
1168 if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1169 CHECK_INF_OR_NAN(chan);
1170 }
1171
1172 /* There is an extra source register that indirectly subscripts
1173 * a register file. The direct index now becomes an offset
1174 * that is being added to the indirect register.
1175 *
1176 * file[ind[2].x+1],
1177 * where:
1178 * ind = Indirect.File
1179 * [2] = Indirect.Index
1180 * .x = Indirect.SwizzleX
1181 */
1182 if (reg->Register.Indirect) {
1183 union tgsi_exec_channel index;
1184 union tgsi_exec_channel indir_index;
1185 uint swizzle;
1186
1187 /* which address register (always zero for now) */
1188 index.i[0] =
1189 index.i[1] =
1190 index.i[2] =
1191 index.i[3] = reg->Indirect.Index;
1192
1193 /* get current value of address register[swizzle] */
1194 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1195
1196 /* fetch values from the address/indirection register */
1197 fetch_src_file_channel(
1198 mach,
1199 reg->Indirect.File,
1200 swizzle,
1201 &index,
1202 &indir_index );
1203
1204 /* save indirection offset */
1205 offset = (int) indir_index.f[0];
1206 }
1207
1208 switch (reg->Register.File) {
1209 case TGSI_FILE_NULL:
1210 dst = &null;
1211 break;
1212
1213 case TGSI_FILE_OUTPUT:
1214 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1215 + reg->Register.Index;
1216 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1217 #if 0
1218 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1219 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1220 for (i = 0; i < QUAD_SIZE; i++)
1221 if (execmask & (1 << i))
1222 fprintf(stderr, "%f, ", chan->f[i]);
1223 fprintf(stderr, ")\n");
1224 }
1225 #endif
1226 break;
1227
1228 case TGSI_FILE_TEMPORARY:
1229 index = reg->Register.Index;
1230 assert( index < TGSI_EXEC_NUM_TEMPS );
1231 dst = &mach->Temps[offset + index].xyzw[chan_index];
1232 break;
1233
1234 case TGSI_FILE_ADDRESS:
1235 index = reg->Register.Index;
1236 dst = &mach->Addrs[index].xyzw[chan_index];
1237 break;
1238
1239 case TGSI_FILE_LOOP:
1240 assert(reg->Register.Index == 0);
1241 assert(mach->LoopCounterStackTop > 0);
1242 assert(chan_index == CHAN_X);
1243 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1244 break;
1245
1246 case TGSI_FILE_PREDICATE:
1247 index = reg->Register.Index;
1248 assert(index < TGSI_EXEC_NUM_PREDS);
1249 dst = &mach->Predicates[index].xyzw[chan_index];
1250 break;
1251
1252 default:
1253 assert( 0 );
1254 return;
1255 }
1256
1257 if (inst->Instruction.Predicate) {
1258 uint swizzle;
1259 union tgsi_exec_channel *pred;
1260
1261 switch (chan_index) {
1262 case CHAN_X:
1263 swizzle = inst->Predicate.SwizzleX;
1264 break;
1265 case CHAN_Y:
1266 swizzle = inst->Predicate.SwizzleY;
1267 break;
1268 case CHAN_Z:
1269 swizzle = inst->Predicate.SwizzleZ;
1270 break;
1271 case CHAN_W:
1272 swizzle = inst->Predicate.SwizzleW;
1273 break;
1274 default:
1275 assert(0);
1276 return;
1277 }
1278
1279 assert(inst->Predicate.Index == 0);
1280
1281 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1282
1283 if (inst->Predicate.Negate) {
1284 for (i = 0; i < QUAD_SIZE; i++) {
1285 if (pred->u[i]) {
1286 execmask &= ~(1 << i);
1287 }
1288 }
1289 } else {
1290 for (i = 0; i < QUAD_SIZE; i++) {
1291 if (!pred->u[i]) {
1292 execmask &= ~(1 << i);
1293 }
1294 }
1295 }
1296 }
1297
1298 switch (inst->Instruction.Saturate) {
1299 case TGSI_SAT_NONE:
1300 for (i = 0; i < QUAD_SIZE; i++)
1301 if (execmask & (1 << i))
1302 dst->i[i] = chan->i[i];
1303 break;
1304
1305 case TGSI_SAT_ZERO_ONE:
1306 for (i = 0; i < QUAD_SIZE; i++)
1307 if (execmask & (1 << i)) {
1308 if (chan->f[i] < 0.0f)
1309 dst->f[i] = 0.0f;
1310 else if (chan->f[i] > 1.0f)
1311 dst->f[i] = 1.0f;
1312 else
1313 dst->i[i] = chan->i[i];
1314 }
1315 break;
1316
1317 case TGSI_SAT_MINUS_PLUS_ONE:
1318 for (i = 0; i < QUAD_SIZE; i++)
1319 if (execmask & (1 << i)) {
1320 if (chan->f[i] < -1.0f)
1321 dst->f[i] = -1.0f;
1322 else if (chan->f[i] > 1.0f)
1323 dst->f[i] = 1.0f;
1324 else
1325 dst->i[i] = chan->i[i];
1326 }
1327 break;
1328
1329 default:
1330 assert( 0 );
1331 }
1332 }
1333
1334 #define FETCH(VAL,INDEX,CHAN)\
1335 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1336
1337 #define STORE(VAL,INDEX,CHAN)\
1338 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1339
1340
1341 /**
1342 * Execute ARB-style KIL which is predicated by a src register.
1343 * Kill fragment if any of the four values is less than zero.
1344 */
1345 static void
1346 exec_kil(struct tgsi_exec_machine *mach,
1347 const struct tgsi_full_instruction *inst)
1348 {
1349 uint uniquemask;
1350 uint chan_index;
1351 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1352 union tgsi_exec_channel r[1];
1353
1354 /* This mask stores component bits that were already tested. */
1355 uniquemask = 0;
1356
1357 for (chan_index = 0; chan_index < 4; chan_index++)
1358 {
1359 uint swizzle;
1360 uint i;
1361
1362 /* unswizzle channel */
1363 swizzle = tgsi_util_get_full_src_register_swizzle (
1364 &inst->Src[0],
1365 chan_index);
1366
1367 /* check if the component has not been already tested */
1368 if (uniquemask & (1 << swizzle))
1369 continue;
1370 uniquemask |= 1 << swizzle;
1371
1372 FETCH(&r[0], 0, chan_index);
1373 for (i = 0; i < 4; i++)
1374 if (r[0].f[i] < 0.0f)
1375 kilmask |= 1 << i;
1376 }
1377
1378 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1379 }
1380
1381 /**
1382 * Execute NVIDIA-style KIL which is predicated by a condition code.
1383 * Kill fragment if the condition code is TRUE.
1384 */
1385 static void
1386 exec_kilp(struct tgsi_exec_machine *mach,
1387 const struct tgsi_full_instruction *inst)
1388 {
1389 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1390
1391 /* "unconditional" kil */
1392 kilmask = mach->ExecMask;
1393 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1394 }
1395
1396 static void
1397 emit_vertex(struct tgsi_exec_machine *mach)
1398 {
1399 /* FIXME: check for exec mask correctly
1400 unsigned i;
1401 for (i = 0; i < QUAD_SIZE; ++i) {
1402 if ((mach->ExecMask & (1 << i)))
1403 */
1404 if (mach->ExecMask) {
1405 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1406 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1407 }
1408 }
1409
1410 static void
1411 emit_primitive(struct tgsi_exec_machine *mach)
1412 {
1413 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1414 /* FIXME: check for exec mask correctly
1415 unsigned i;
1416 for (i = 0; i < QUAD_SIZE; ++i) {
1417 if ((mach->ExecMask & (1 << i)))
1418 */
1419 if (mach->ExecMask) {
1420 ++(*prim_count);
1421 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1422 mach->Primitives[*prim_count] = 0;
1423 }
1424 }
1425
1426 /*
1427 * Fetch a four texture samples using STR texture coordinates.
1428 */
1429 static void
1430 fetch_texel( struct tgsi_sampler *sampler,
1431 const union tgsi_exec_channel *s,
1432 const union tgsi_exec_channel *t,
1433 const union tgsi_exec_channel *p,
1434 float lodbias, /* XXX should be float[4] */
1435 union tgsi_exec_channel *r,
1436 union tgsi_exec_channel *g,
1437 union tgsi_exec_channel *b,
1438 union tgsi_exec_channel *a )
1439 {
1440 uint j;
1441 float rgba[NUM_CHANNELS][QUAD_SIZE];
1442
1443 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1444
1445 for (j = 0; j < 4; j++) {
1446 r->f[j] = rgba[0][j];
1447 g->f[j] = rgba[1][j];
1448 b->f[j] = rgba[2][j];
1449 a->f[j] = rgba[3][j];
1450 }
1451 }
1452
1453
1454 static void
1455 exec_tex(struct tgsi_exec_machine *mach,
1456 const struct tgsi_full_instruction *inst,
1457 boolean biasLod,
1458 boolean projected)
1459 {
1460 const uint unit = inst->Src[1].Register.Index;
1461 union tgsi_exec_channel r[4];
1462 uint chan_index;
1463 float lodBias;
1464
1465 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1466
1467 switch (inst->Texture.Texture) {
1468 case TGSI_TEXTURE_1D:
1469 case TGSI_TEXTURE_SHADOW1D:
1470
1471 FETCH(&r[0], 0, CHAN_X);
1472
1473 if (projected) {
1474 FETCH(&r[1], 0, CHAN_W);
1475 micro_div( &r[0], &r[0], &r[1] );
1476 }
1477
1478 if (biasLod) {
1479 FETCH(&r[1], 0, CHAN_W);
1480 lodBias = r[2].f[0];
1481 }
1482 else
1483 lodBias = 0.0;
1484
1485 fetch_texel(mach->Samplers[unit],
1486 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1487 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1488 break;
1489
1490 case TGSI_TEXTURE_2D:
1491 case TGSI_TEXTURE_RECT:
1492 case TGSI_TEXTURE_SHADOW2D:
1493 case TGSI_TEXTURE_SHADOWRECT:
1494
1495 FETCH(&r[0], 0, CHAN_X);
1496 FETCH(&r[1], 0, CHAN_Y);
1497 FETCH(&r[2], 0, CHAN_Z);
1498
1499 if (projected) {
1500 FETCH(&r[3], 0, CHAN_W);
1501 micro_div( &r[0], &r[0], &r[3] );
1502 micro_div( &r[1], &r[1], &r[3] );
1503 micro_div( &r[2], &r[2], &r[3] );
1504 }
1505
1506 if (biasLod) {
1507 FETCH(&r[3], 0, CHAN_W);
1508 lodBias = r[3].f[0];
1509 }
1510 else
1511 lodBias = 0.0;
1512
1513 fetch_texel(mach->Samplers[unit],
1514 &r[0], &r[1], &r[2], lodBias, /* inputs */
1515 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1516 break;
1517
1518 case TGSI_TEXTURE_3D:
1519 case TGSI_TEXTURE_CUBE:
1520
1521 FETCH(&r[0], 0, CHAN_X);
1522 FETCH(&r[1], 0, CHAN_Y);
1523 FETCH(&r[2], 0, CHAN_Z);
1524
1525 if (projected) {
1526 FETCH(&r[3], 0, CHAN_W);
1527 micro_div( &r[0], &r[0], &r[3] );
1528 micro_div( &r[1], &r[1], &r[3] );
1529 micro_div( &r[2], &r[2], &r[3] );
1530 }
1531
1532 if (biasLod) {
1533 FETCH(&r[3], 0, CHAN_W);
1534 lodBias = r[3].f[0];
1535 }
1536 else
1537 lodBias = 0.0;
1538
1539 fetch_texel(mach->Samplers[unit],
1540 &r[0], &r[1], &r[2], lodBias,
1541 &r[0], &r[1], &r[2], &r[3]);
1542 break;
1543
1544 default:
1545 assert (0);
1546 }
1547
1548 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1549 STORE( &r[chan_index], 0, chan_index );
1550 }
1551 }
1552
1553 static void
1554 exec_txd(struct tgsi_exec_machine *mach,
1555 const struct tgsi_full_instruction *inst)
1556 {
1557 const uint unit = inst->Src[3].Register.Index;
1558 union tgsi_exec_channel r[4];
1559 uint chan_index;
1560
1561 /*
1562 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1563 */
1564
1565 switch (inst->Texture.Texture) {
1566 case TGSI_TEXTURE_1D:
1567 case TGSI_TEXTURE_SHADOW1D:
1568
1569 FETCH(&r[0], 0, CHAN_X);
1570
1571 fetch_texel(mach->Samplers[unit],
1572 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1573 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1574 break;
1575
1576 case TGSI_TEXTURE_2D:
1577 case TGSI_TEXTURE_RECT:
1578 case TGSI_TEXTURE_SHADOW2D:
1579 case TGSI_TEXTURE_SHADOWRECT:
1580
1581 FETCH(&r[0], 0, CHAN_X);
1582 FETCH(&r[1], 0, CHAN_Y);
1583 FETCH(&r[2], 0, CHAN_Z);
1584
1585 fetch_texel(mach->Samplers[unit],
1586 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1587 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1588 break;
1589
1590 case TGSI_TEXTURE_3D:
1591 case TGSI_TEXTURE_CUBE:
1592
1593 FETCH(&r[0], 0, CHAN_X);
1594 FETCH(&r[1], 0, CHAN_Y);
1595 FETCH(&r[2], 0, CHAN_Z);
1596
1597 fetch_texel(mach->Samplers[unit],
1598 &r[0], &r[1], &r[2], 0.0f,
1599 &r[0], &r[1], &r[2], &r[3]);
1600 break;
1601
1602 default:
1603 assert(0);
1604 }
1605
1606 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1607 STORE(&r[chan_index], 0, chan_index);
1608 }
1609 }
1610
1611
1612 /**
1613 * Evaluate a constant-valued coefficient at the position of the
1614 * current quad.
1615 */
1616 static void
1617 eval_constant_coef(
1618 struct tgsi_exec_machine *mach,
1619 unsigned attrib,
1620 unsigned chan )
1621 {
1622 unsigned i;
1623
1624 for( i = 0; i < QUAD_SIZE; i++ ) {
1625 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1626 }
1627 }
1628
1629 /**
1630 * Evaluate a linear-valued coefficient at the position of the
1631 * current quad.
1632 */
1633 static void
1634 eval_linear_coef(
1635 struct tgsi_exec_machine *mach,
1636 unsigned attrib,
1637 unsigned chan )
1638 {
1639 const float x = mach->QuadPos.xyzw[0].f[0];
1640 const float y = mach->QuadPos.xyzw[1].f[0];
1641 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1642 const float dady = mach->InterpCoefs[attrib].dady[chan];
1643 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1644 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1645 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1646 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1647 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1648 }
1649
1650 /**
1651 * Evaluate a perspective-valued coefficient at the position of the
1652 * current quad.
1653 */
1654 static void
1655 eval_perspective_coef(
1656 struct tgsi_exec_machine *mach,
1657 unsigned attrib,
1658 unsigned chan )
1659 {
1660 const float x = mach->QuadPos.xyzw[0].f[0];
1661 const float y = mach->QuadPos.xyzw[1].f[0];
1662 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1663 const float dady = mach->InterpCoefs[attrib].dady[chan];
1664 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1665 const float *w = mach->QuadPos.xyzw[3].f;
1666 /* divide by W here */
1667 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1668 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1669 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1670 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1671 }
1672
1673
1674 typedef void (* eval_coef_func)(
1675 struct tgsi_exec_machine *mach,
1676 unsigned attrib,
1677 unsigned chan );
1678
1679 static void
1680 exec_declaration(struct tgsi_exec_machine *mach,
1681 const struct tgsi_full_declaration *decl)
1682 {
1683 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1684 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1685 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1686 uint first, last, mask;
1687
1688 first = decl->Range.First;
1689 last = decl->Range.Last;
1690 mask = decl->Declaration.UsageMask;
1691
1692 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1693 assert(decl->Semantic.Index == 0);
1694 assert(first == last);
1695 assert(mask == TGSI_WRITEMASK_XYZW);
1696
1697 mach->Inputs[first] = mach->QuadPos;
1698 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1699 uint i;
1700
1701 assert(decl->Semantic.Index == 0);
1702 assert(first == last);
1703
1704 for (i = 0; i < QUAD_SIZE; i++) {
1705 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1706 }
1707 } else {
1708 eval_coef_func eval;
1709 uint i, j;
1710
1711 switch (decl->Declaration.Interpolate) {
1712 case TGSI_INTERPOLATE_CONSTANT:
1713 eval = eval_constant_coef;
1714 break;
1715
1716 case TGSI_INTERPOLATE_LINEAR:
1717 eval = eval_linear_coef;
1718 break;
1719
1720 case TGSI_INTERPOLATE_PERSPECTIVE:
1721 eval = eval_perspective_coef;
1722 break;
1723
1724 default:
1725 assert(0);
1726 return;
1727 }
1728
1729 for (j = 0; j < NUM_CHANNELS; j++) {
1730 if (mask & (1 << j)) {
1731 for (i = first; i <= last; i++) {
1732 eval(mach, i, j);
1733 }
1734 }
1735 }
1736 }
1737 }
1738 }
1739 }
1740
1741 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1742 const union tgsi_exec_channel *src);
1743
1744 static void
1745 exec_vector_unary(struct tgsi_exec_machine *mach,
1746 const struct tgsi_full_instruction *inst,
1747 micro_op op,
1748 enum tgsi_exec_datatype dst_datatype,
1749 enum tgsi_exec_datatype src_datatype)
1750 {
1751 unsigned int chan;
1752 struct tgsi_exec_vector dst;
1753
1754 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1755 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1756 union tgsi_exec_channel src;
1757
1758 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1759 op(&dst.xyzw[chan], &src);
1760 }
1761 }
1762 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1763 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1764 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1765 }
1766 }
1767 }
1768
1769 static void
1770 exec_vector_binary(struct tgsi_exec_machine *mach,
1771 const struct tgsi_full_instruction *inst,
1772 micro_op op,
1773 enum tgsi_exec_datatype dst_datatype,
1774 enum tgsi_exec_datatype src_datatype)
1775 {
1776 unsigned int chan;
1777 struct tgsi_exec_vector dst;
1778
1779 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1780 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1781 union tgsi_exec_channel src[2];
1782
1783 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1784 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1785 op(&dst.xyzw[chan], src);
1786 }
1787 }
1788 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1789 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1790 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1791 }
1792 }
1793 }
1794
1795 static void
1796 exec_vector_trinary(struct tgsi_exec_machine *mach,
1797 const struct tgsi_full_instruction *inst,
1798 micro_op op,
1799 enum tgsi_exec_datatype dst_datatype,
1800 enum tgsi_exec_datatype src_datatype)
1801 {
1802 unsigned int chan;
1803 struct tgsi_exec_vector dst;
1804
1805 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1806 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1807 union tgsi_exec_channel src[3];
1808
1809 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1810 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1811 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1812 op(&dst.xyzw[chan], src);
1813 }
1814 }
1815 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1816 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1817 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1818 }
1819 }
1820 }
1821
1822 static void
1823 exec_break(struct tgsi_exec_machine *mach)
1824 {
1825 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
1826 /* turn off loop channels for each enabled exec channel */
1827 mach->LoopMask &= ~mach->ExecMask;
1828 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1829 UPDATE_EXEC_MASK(mach);
1830 } else {
1831 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
1832
1833 mach->Switch.mask = 0x0;
1834
1835 UPDATE_EXEC_MASK(mach);
1836 }
1837 }
1838
1839 static void
1840 exec_switch(struct tgsi_exec_machine *mach,
1841 const struct tgsi_full_instruction *inst)
1842 {
1843 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
1844 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
1845
1846 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
1847 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1848 mach->Switch.mask = 0x0;
1849 mach->Switch.defaultMask = 0x0;
1850
1851 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
1852 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
1853
1854 UPDATE_EXEC_MASK(mach);
1855 }
1856
1857 static void
1858 exec_case(struct tgsi_exec_machine *mach,
1859 const struct tgsi_full_instruction *inst)
1860 {
1861 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1862 union tgsi_exec_channel src;
1863 uint mask = 0;
1864
1865 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1866
1867 if (mach->Switch.selector.u[0] == src.u[0]) {
1868 mask |= 0x1;
1869 }
1870 if (mach->Switch.selector.u[1] == src.u[1]) {
1871 mask |= 0x2;
1872 }
1873 if (mach->Switch.selector.u[2] == src.u[2]) {
1874 mask |= 0x4;
1875 }
1876 if (mach->Switch.selector.u[3] == src.u[3]) {
1877 mask |= 0x8;
1878 }
1879
1880 mach->Switch.defaultMask |= mask;
1881
1882 mach->Switch.mask |= mask & prevMask;
1883
1884 UPDATE_EXEC_MASK(mach);
1885 }
1886
1887 static void
1888 exec_default(struct tgsi_exec_machine *mach)
1889 {
1890 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1891
1892 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
1893
1894 UPDATE_EXEC_MASK(mach);
1895 }
1896
1897 static void
1898 exec_endswitch(struct tgsi_exec_machine *mach)
1899 {
1900 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
1901 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
1902
1903 UPDATE_EXEC_MASK(mach);
1904 }
1905
1906 static void
1907 micro_i2f(union tgsi_exec_channel *dst,
1908 const union tgsi_exec_channel *src)
1909 {
1910 dst->f[0] = (float)src->i[0];
1911 dst->f[1] = (float)src->i[1];
1912 dst->f[2] = (float)src->i[2];
1913 dst->f[3] = (float)src->i[3];
1914 }
1915
1916 static void
1917 micro_not(union tgsi_exec_channel *dst,
1918 const union tgsi_exec_channel *src)
1919 {
1920 dst->u[0] = ~src->u[0];
1921 dst->u[1] = ~src->u[1];
1922 dst->u[2] = ~src->u[2];
1923 dst->u[3] = ~src->u[3];
1924 }
1925
1926 static void
1927 micro_shl(union tgsi_exec_channel *dst,
1928 const union tgsi_exec_channel *src)
1929 {
1930 dst->u[0] = src[0].u[0] << src[1].u[0];
1931 dst->u[1] = src[0].u[1] << src[1].u[1];
1932 dst->u[2] = src[0].u[2] << src[1].u[2];
1933 dst->u[3] = src[0].u[3] << src[1].u[3];
1934 }
1935
1936 static void
1937 micro_and(union tgsi_exec_channel *dst,
1938 const union tgsi_exec_channel *src)
1939 {
1940 dst->u[0] = src[0].u[0] & src[1].u[0];
1941 dst->u[1] = src[0].u[1] & src[1].u[1];
1942 dst->u[2] = src[0].u[2] & src[1].u[2];
1943 dst->u[3] = src[0].u[3] & src[1].u[3];
1944 }
1945
1946 static void
1947 micro_or(union tgsi_exec_channel *dst,
1948 const union tgsi_exec_channel *src)
1949 {
1950 dst->u[0] = src[0].u[0] | src[1].u[0];
1951 dst->u[1] = src[0].u[1] | src[1].u[1];
1952 dst->u[2] = src[0].u[2] | src[1].u[2];
1953 dst->u[3] = src[0].u[3] | src[1].u[3];
1954 }
1955
1956 static void
1957 micro_xor(union tgsi_exec_channel *dst,
1958 const union tgsi_exec_channel *src)
1959 {
1960 dst->u[0] = src[0].u[0] ^ src[1].u[0];
1961 dst->u[1] = src[0].u[1] ^ src[1].u[1];
1962 dst->u[2] = src[0].u[2] ^ src[1].u[2];
1963 dst->u[3] = src[0].u[3] ^ src[1].u[3];
1964 }
1965
1966 static void
1967 micro_f2i(union tgsi_exec_channel *dst,
1968 const union tgsi_exec_channel *src)
1969 {
1970 dst->i[0] = (int)src->f[0];
1971 dst->i[1] = (int)src->f[1];
1972 dst->i[2] = (int)src->f[2];
1973 dst->i[3] = (int)src->f[3];
1974 }
1975
1976 static void
1977 micro_idiv(union tgsi_exec_channel *dst,
1978 const union tgsi_exec_channel *src)
1979 {
1980 dst->i[0] = src[0].i[0] / src[1].i[0];
1981 dst->i[1] = src[0].i[1] / src[1].i[1];
1982 dst->i[2] = src[0].i[2] / src[1].i[2];
1983 dst->i[3] = src[0].i[3] / src[1].i[3];
1984 }
1985
1986 static void
1987 micro_imax(union tgsi_exec_channel *dst,
1988 const union tgsi_exec_channel *src)
1989 {
1990 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
1991 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
1992 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
1993 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
1994 }
1995
1996 static void
1997 micro_imin(union tgsi_exec_channel *dst,
1998 const union tgsi_exec_channel *src)
1999 {
2000 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2001 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2002 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2003 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2004 }
2005
2006 static void
2007 micro_isge(union tgsi_exec_channel *dst,
2008 const union tgsi_exec_channel *src)
2009 {
2010 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2011 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2012 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2013 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2014 }
2015
2016 static void
2017 micro_ishr(union tgsi_exec_channel *dst,
2018 const union tgsi_exec_channel *src)
2019 {
2020 dst->i[0] = src[0].i[0] >> src[1].i[0];
2021 dst->i[1] = src[0].i[1] >> src[1].i[1];
2022 dst->i[2] = src[0].i[2] >> src[1].i[2];
2023 dst->i[3] = src[0].i[3] >> src[1].i[3];
2024 }
2025
2026 static void
2027 micro_islt(union tgsi_exec_channel *dst,
2028 const union tgsi_exec_channel *src)
2029 {
2030 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2031 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2032 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2033 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2034 }
2035
2036 static void
2037 micro_f2u(union tgsi_exec_channel *dst,
2038 const union tgsi_exec_channel *src)
2039 {
2040 dst->u[0] = (uint)src->f[0];
2041 dst->u[1] = (uint)src->f[1];
2042 dst->u[2] = (uint)src->f[2];
2043 dst->u[3] = (uint)src->f[3];
2044 }
2045
2046 static void
2047 micro_u2f(union tgsi_exec_channel *dst,
2048 const union tgsi_exec_channel *src)
2049 {
2050 dst->f[0] = (float)src->u[0];
2051 dst->f[1] = (float)src->u[1];
2052 dst->f[2] = (float)src->u[2];
2053 dst->f[3] = (float)src->u[3];
2054 }
2055
2056 static void
2057 micro_uadd(union tgsi_exec_channel *dst,
2058 const union tgsi_exec_channel *src)
2059 {
2060 dst->u[0] = src[0].u[0] + src[1].u[0];
2061 dst->u[1] = src[0].u[1] + src[1].u[1];
2062 dst->u[2] = src[0].u[2] + src[1].u[2];
2063 dst->u[3] = src[0].u[3] + src[1].u[3];
2064 }
2065
2066 static void
2067 micro_udiv(union tgsi_exec_channel *dst,
2068 const union tgsi_exec_channel *src)
2069 {
2070 dst->u[0] = src[0].u[0] / src[1].u[0];
2071 dst->u[1] = src[0].u[1] / src[1].u[1];
2072 dst->u[2] = src[0].u[2] / src[1].u[2];
2073 dst->u[3] = src[0].u[3] / src[1].u[3];
2074 }
2075
2076 static void
2077 micro_umad(union tgsi_exec_channel *dst,
2078 const union tgsi_exec_channel *src)
2079 {
2080 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2081 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2082 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2083 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2084 }
2085
2086 static void
2087 micro_umax(union tgsi_exec_channel *dst,
2088 const union tgsi_exec_channel *src)
2089 {
2090 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2091 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2092 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2093 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2094 }
2095
2096 static void
2097 micro_umin(union tgsi_exec_channel *dst,
2098 const union tgsi_exec_channel *src)
2099 {
2100 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2101 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2102 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2103 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2104 }
2105
2106 static void
2107 micro_umod(union tgsi_exec_channel *dst,
2108 const union tgsi_exec_channel *src)
2109 {
2110 dst->u[0] = src[0].u[0] % src[1].u[0];
2111 dst->u[1] = src[0].u[1] % src[1].u[1];
2112 dst->u[2] = src[0].u[2] % src[1].u[2];
2113 dst->u[3] = src[0].u[3] % src[1].u[3];
2114 }
2115
2116 static void
2117 micro_umul(union tgsi_exec_channel *dst,
2118 const union tgsi_exec_channel *src)
2119 {
2120 dst->u[0] = src[0].u[0] * src[1].u[0];
2121 dst->u[1] = src[0].u[1] * src[1].u[1];
2122 dst->u[2] = src[0].u[2] * src[1].u[2];
2123 dst->u[3] = src[0].u[3] * src[1].u[3];
2124 }
2125
2126 static void
2127 micro_useq(union tgsi_exec_channel *dst,
2128 const union tgsi_exec_channel *src)
2129 {
2130 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2131 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2132 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2133 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2134 }
2135
2136 static void
2137 micro_usge(union tgsi_exec_channel *dst,
2138 const union tgsi_exec_channel *src)
2139 {
2140 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2141 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2142 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2143 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2144 }
2145
2146 static void
2147 micro_ushr(union tgsi_exec_channel *dst,
2148 const union tgsi_exec_channel *src)
2149 {
2150 dst->u[0] = src[0].u[0] >> src[1].u[0];
2151 dst->u[1] = src[0].u[1] >> src[1].u[1];
2152 dst->u[2] = src[0].u[2] >> src[1].u[2];
2153 dst->u[3] = src[0].u[3] >> src[1].u[3];
2154 }
2155
2156 static void
2157 micro_uslt(union tgsi_exec_channel *dst,
2158 const union tgsi_exec_channel *src)
2159 {
2160 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2161 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2162 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2163 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2164 }
2165
2166 static void
2167 micro_usne(union tgsi_exec_channel *dst,
2168 const union tgsi_exec_channel *src)
2169 {
2170 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2171 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2172 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2173 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2174 }
2175
2176 static void
2177 exec_instruction(
2178 struct tgsi_exec_machine *mach,
2179 const struct tgsi_full_instruction *inst,
2180 int *pc )
2181 {
2182 uint chan_index;
2183 union tgsi_exec_channel r[10];
2184 union tgsi_exec_channel d[8];
2185
2186 (*pc)++;
2187
2188 switch (inst->Instruction.Opcode) {
2189 case TGSI_OPCODE_ARL:
2190 case TGSI_OPCODE_FLR:
2191 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2192 FETCH( &r[0], 0, chan_index );
2193 micro_flr(&d[chan_index], &r[0]);
2194 }
2195 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2196 STORE(&d[chan_index], 0, chan_index);
2197 }
2198 break;
2199
2200 case TGSI_OPCODE_MOV:
2201 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
2202 break;
2203
2204 case TGSI_OPCODE_LIT:
2205 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2206 FETCH( &r[0], 0, CHAN_X );
2207 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2208 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2209 }
2210
2211 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2212 FETCH( &r[1], 0, CHAN_Y );
2213 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2214
2215 FETCH( &r[2], 0, CHAN_W );
2216 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2217 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2218 micro_pow( &r[1], &r[1], &r[2] );
2219 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2220 }
2221
2222 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2223 STORE(&d[CHAN_Y], 0, CHAN_Y);
2224 }
2225 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2226 STORE(&d[CHAN_Z], 0, CHAN_Z);
2227 }
2228 }
2229 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2230 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2231 }
2232 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2233 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2234 }
2235 break;
2236
2237 case TGSI_OPCODE_RCP:
2238 /* TGSI_OPCODE_RECIP */
2239 FETCH( &r[0], 0, CHAN_X );
2240 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2241 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2242 STORE( &r[0], 0, chan_index );
2243 }
2244 break;
2245
2246 case TGSI_OPCODE_RSQ:
2247 /* TGSI_OPCODE_RECIPSQRT */
2248 FETCH( &r[0], 0, CHAN_X );
2249 micro_abs( &r[0], &r[0] );
2250 micro_sqrt( &r[0], &r[0] );
2251 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2252 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2253 STORE( &r[0], 0, chan_index );
2254 }
2255 break;
2256
2257 case TGSI_OPCODE_EXP:
2258 FETCH( &r[0], 0, CHAN_X );
2259 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2260 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2261 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2262 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2263 }
2264 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2265 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2266 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2267 }
2268 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2269 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2270 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2271 }
2272 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2273 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2274 }
2275 break;
2276
2277 case TGSI_OPCODE_LOG:
2278 FETCH( &r[0], 0, CHAN_X );
2279 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2280 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2281 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2282 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2283 STORE( &r[0], 0, CHAN_X );
2284 }
2285 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2286 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2287 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2288 STORE( &r[0], 0, CHAN_Y );
2289 }
2290 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2291 STORE( &r[1], 0, CHAN_Z );
2292 }
2293 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2294 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2295 }
2296 break;
2297
2298 case TGSI_OPCODE_MUL:
2299 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2300 FETCH(&r[0], 0, chan_index);
2301 FETCH(&r[1], 1, chan_index);
2302 micro_mul(&d[chan_index], &r[0], &r[1]);
2303 }
2304 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2305 STORE(&d[chan_index], 0, chan_index);
2306 }
2307 break;
2308
2309 case TGSI_OPCODE_ADD:
2310 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2311 FETCH( &r[0], 0, chan_index );
2312 FETCH( &r[1], 1, chan_index );
2313 micro_add(&d[chan_index], &r[0], &r[1]);
2314 }
2315 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2316 STORE(&d[chan_index], 0, chan_index);
2317 }
2318 break;
2319
2320 case TGSI_OPCODE_DP3:
2321 /* TGSI_OPCODE_DOT3 */
2322 FETCH( &r[0], 0, CHAN_X );
2323 FETCH( &r[1], 1, CHAN_X );
2324 micro_mul( &r[0], &r[0], &r[1] );
2325
2326 FETCH( &r[1], 0, CHAN_Y );
2327 FETCH( &r[2], 1, CHAN_Y );
2328 micro_mul( &r[1], &r[1], &r[2] );
2329 micro_add( &r[0], &r[0], &r[1] );
2330
2331 FETCH( &r[1], 0, CHAN_Z );
2332 FETCH( &r[2], 1, CHAN_Z );
2333 micro_mul( &r[1], &r[1], &r[2] );
2334 micro_add( &r[0], &r[0], &r[1] );
2335
2336 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2337 STORE( &r[0], 0, chan_index );
2338 }
2339 break;
2340
2341 case TGSI_OPCODE_DP4:
2342 /* TGSI_OPCODE_DOT4 */
2343 FETCH(&r[0], 0, CHAN_X);
2344 FETCH(&r[1], 1, CHAN_X);
2345
2346 micro_mul( &r[0], &r[0], &r[1] );
2347
2348 FETCH(&r[1], 0, CHAN_Y);
2349 FETCH(&r[2], 1, CHAN_Y);
2350
2351 micro_mul( &r[1], &r[1], &r[2] );
2352 micro_add( &r[0], &r[0], &r[1] );
2353
2354 FETCH(&r[1], 0, CHAN_Z);
2355 FETCH(&r[2], 1, CHAN_Z);
2356
2357 micro_mul( &r[1], &r[1], &r[2] );
2358 micro_add( &r[0], &r[0], &r[1] );
2359
2360 FETCH(&r[1], 0, CHAN_W);
2361 FETCH(&r[2], 1, CHAN_W);
2362
2363 micro_mul( &r[1], &r[1], &r[2] );
2364 micro_add( &r[0], &r[0], &r[1] );
2365
2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2367 STORE( &r[0], 0, chan_index );
2368 }
2369 break;
2370
2371 case TGSI_OPCODE_DST:
2372 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2373 FETCH( &r[0], 0, CHAN_Y );
2374 FETCH( &r[1], 1, CHAN_Y);
2375 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2376 }
2377 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2378 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2379 }
2380 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2381 FETCH(&d[CHAN_W], 1, CHAN_W);
2382 }
2383
2384 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2385 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2386 }
2387 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2388 STORE(&d[CHAN_Y], 0, CHAN_Y);
2389 }
2390 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2391 STORE(&d[CHAN_Z], 0, CHAN_Z);
2392 }
2393 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2394 STORE(&d[CHAN_W], 0, CHAN_W);
2395 }
2396 break;
2397
2398 case TGSI_OPCODE_MIN:
2399 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2400 FETCH(&r[0], 0, chan_index);
2401 FETCH(&r[1], 1, chan_index);
2402
2403 /* XXX use micro_min()?? */
2404 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2405 }
2406 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2407 STORE(&d[chan_index], 0, chan_index);
2408 }
2409 break;
2410
2411 case TGSI_OPCODE_MAX:
2412 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2413 FETCH(&r[0], 0, chan_index);
2414 FETCH(&r[1], 1, chan_index);
2415
2416 /* XXX use micro_max()?? */
2417 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2418 }
2419 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2420 STORE(&d[chan_index], 0, chan_index);
2421 }
2422 break;
2423
2424 case TGSI_OPCODE_SLT:
2425 /* TGSI_OPCODE_SETLT */
2426 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2427 FETCH( &r[0], 0, chan_index );
2428 FETCH( &r[1], 1, chan_index );
2429 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2430 }
2431 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2432 STORE(&d[chan_index], 0, chan_index);
2433 }
2434 break;
2435
2436 case TGSI_OPCODE_SGE:
2437 /* TGSI_OPCODE_SETGE */
2438 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2439 FETCH( &r[0], 0, chan_index );
2440 FETCH( &r[1], 1, chan_index );
2441 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2442 }
2443 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2444 STORE(&d[chan_index], 0, chan_index);
2445 }
2446 break;
2447
2448 case TGSI_OPCODE_MAD:
2449 /* TGSI_OPCODE_MADD */
2450 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2451 FETCH( &r[0], 0, chan_index );
2452 FETCH( &r[1], 1, chan_index );
2453 micro_mul( &r[0], &r[0], &r[1] );
2454 FETCH( &r[1], 2, chan_index );
2455 micro_add(&d[chan_index], &r[0], &r[1]);
2456 }
2457 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2458 STORE(&d[chan_index], 0, chan_index);
2459 }
2460 break;
2461
2462 case TGSI_OPCODE_SUB:
2463 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2464 FETCH(&r[0], 0, chan_index);
2465 FETCH(&r[1], 1, chan_index);
2466 micro_sub(&d[chan_index], &r[0], &r[1]);
2467 }
2468 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2469 STORE(&d[chan_index], 0, chan_index);
2470 }
2471 break;
2472
2473 case TGSI_OPCODE_LRP:
2474 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2475 FETCH(&r[0], 0, chan_index);
2476 FETCH(&r[1], 1, chan_index);
2477 FETCH(&r[2], 2, chan_index);
2478 micro_sub( &r[1], &r[1], &r[2] );
2479 micro_mul( &r[0], &r[0], &r[1] );
2480 micro_add(&d[chan_index], &r[0], &r[2]);
2481 }
2482 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2483 STORE(&d[chan_index], 0, chan_index);
2484 }
2485 break;
2486
2487 case TGSI_OPCODE_CND:
2488 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2489 FETCH(&r[0], 0, chan_index);
2490 FETCH(&r[1], 1, chan_index);
2491 FETCH(&r[2], 2, chan_index);
2492 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2493 }
2494 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2495 STORE(&d[chan_index], 0, chan_index);
2496 }
2497 break;
2498
2499 case TGSI_OPCODE_DP2A:
2500 FETCH( &r[0], 0, CHAN_X );
2501 FETCH( &r[1], 1, CHAN_X );
2502 micro_mul( &r[0], &r[0], &r[1] );
2503
2504 FETCH( &r[1], 0, CHAN_Y );
2505 FETCH( &r[2], 1, CHAN_Y );
2506 micro_mul( &r[1], &r[1], &r[2] );
2507 micro_add( &r[0], &r[0], &r[1] );
2508
2509 FETCH( &r[2], 2, CHAN_X );
2510 micro_add( &r[0], &r[0], &r[2] );
2511
2512 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2513 STORE( &r[0], 0, chan_index );
2514 }
2515 break;
2516
2517 case TGSI_OPCODE_FRC:
2518 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2519 FETCH( &r[0], 0, chan_index );
2520 micro_frc(&d[chan_index], &r[0]);
2521 }
2522 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2523 STORE(&d[chan_index], 0, chan_index);
2524 }
2525 break;
2526
2527 case TGSI_OPCODE_CLAMP:
2528 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2529 FETCH(&r[0], 0, chan_index);
2530 FETCH(&r[1], 1, chan_index);
2531 micro_max(&r[0], &r[0], &r[1]);
2532 FETCH(&r[1], 2, chan_index);
2533 micro_min(&d[chan_index], &r[0], &r[1]);
2534 }
2535 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2536 STORE(&d[chan_index], 0, chan_index);
2537 }
2538 break;
2539
2540 case TGSI_OPCODE_ROUND:
2541 case TGSI_OPCODE_ARR:
2542 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2543 FETCH( &r[0], 0, chan_index );
2544 micro_rnd(&d[chan_index], &r[0]);
2545 }
2546 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2547 STORE(&d[chan_index], 0, chan_index);
2548 }
2549 break;
2550
2551 case TGSI_OPCODE_EX2:
2552 FETCH(&r[0], 0, CHAN_X);
2553
2554 micro_exp2( &r[0], &r[0] );
2555
2556 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2557 STORE( &r[0], 0, chan_index );
2558 }
2559 break;
2560
2561 case TGSI_OPCODE_LG2:
2562 FETCH( &r[0], 0, CHAN_X );
2563 micro_lg2( &r[0], &r[0] );
2564 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2565 STORE( &r[0], 0, chan_index );
2566 }
2567 break;
2568
2569 case TGSI_OPCODE_POW:
2570 FETCH(&r[0], 0, CHAN_X);
2571 FETCH(&r[1], 1, CHAN_X);
2572
2573 micro_pow( &r[0], &r[0], &r[1] );
2574
2575 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2576 STORE( &r[0], 0, chan_index );
2577 }
2578 break;
2579
2580 case TGSI_OPCODE_XPD:
2581 FETCH(&r[0], 0, CHAN_Y);
2582 FETCH(&r[1], 1, CHAN_Z);
2583
2584 micro_mul( &r[2], &r[0], &r[1] );
2585
2586 FETCH(&r[3], 0, CHAN_Z);
2587 FETCH(&r[4], 1, CHAN_Y);
2588
2589 micro_mul( &r[5], &r[3], &r[4] );
2590 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2591
2592 FETCH(&r[2], 1, CHAN_X);
2593
2594 micro_mul( &r[3], &r[3], &r[2] );
2595
2596 FETCH(&r[5], 0, CHAN_X);
2597
2598 micro_mul( &r[1], &r[1], &r[5] );
2599 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2600
2601 micro_mul( &r[5], &r[5], &r[4] );
2602 micro_mul( &r[0], &r[0], &r[2] );
2603 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2604
2605 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2606 STORE(&d[CHAN_X], 0, CHAN_X);
2607 }
2608 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2609 STORE(&d[CHAN_Y], 0, CHAN_Y);
2610 }
2611 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2612 STORE(&d[CHAN_Z], 0, CHAN_Z);
2613 }
2614 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2615 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2616 }
2617 break;
2618
2619 case TGSI_OPCODE_ABS:
2620 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2621 FETCH(&r[0], 0, chan_index);
2622 micro_abs(&d[chan_index], &r[0]);
2623 }
2624 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2625 STORE(&d[chan_index], 0, chan_index);
2626 }
2627 break;
2628
2629 case TGSI_OPCODE_RCC:
2630 FETCH(&r[0], 0, CHAN_X);
2631 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2632 micro_float_clamp(&r[0], &r[0]);
2633 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2634 STORE(&r[0], 0, chan_index);
2635 }
2636 break;
2637
2638 case TGSI_OPCODE_DPH:
2639 FETCH(&r[0], 0, CHAN_X);
2640 FETCH(&r[1], 1, CHAN_X);
2641
2642 micro_mul( &r[0], &r[0], &r[1] );
2643
2644 FETCH(&r[1], 0, CHAN_Y);
2645 FETCH(&r[2], 1, CHAN_Y);
2646
2647 micro_mul( &r[1], &r[1], &r[2] );
2648 micro_add( &r[0], &r[0], &r[1] );
2649
2650 FETCH(&r[1], 0, CHAN_Z);
2651 FETCH(&r[2], 1, CHAN_Z);
2652
2653 micro_mul( &r[1], &r[1], &r[2] );
2654 micro_add( &r[0], &r[0], &r[1] );
2655
2656 FETCH(&r[1], 1, CHAN_W);
2657
2658 micro_add( &r[0], &r[0], &r[1] );
2659
2660 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2661 STORE( &r[0], 0, chan_index );
2662 }
2663 break;
2664
2665 case TGSI_OPCODE_COS:
2666 FETCH(&r[0], 0, CHAN_X);
2667
2668 micro_cos( &r[0], &r[0] );
2669
2670 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2671 STORE( &r[0], 0, chan_index );
2672 }
2673 break;
2674
2675 case TGSI_OPCODE_DDX:
2676 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2677 FETCH( &r[0], 0, chan_index );
2678 micro_ddx(&d[chan_index], &r[0]);
2679 }
2680 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2681 STORE(&d[chan_index], 0, chan_index);
2682 }
2683 break;
2684
2685 case TGSI_OPCODE_DDY:
2686 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2687 FETCH( &r[0], 0, chan_index );
2688 micro_ddy(&d[chan_index], &r[0]);
2689 }
2690 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2691 STORE(&d[chan_index], 0, chan_index);
2692 }
2693 break;
2694
2695 case TGSI_OPCODE_KILP:
2696 exec_kilp (mach, inst);
2697 break;
2698
2699 case TGSI_OPCODE_KIL:
2700 exec_kil (mach, inst);
2701 break;
2702
2703 case TGSI_OPCODE_PK2H:
2704 assert (0);
2705 break;
2706
2707 case TGSI_OPCODE_PK2US:
2708 assert (0);
2709 break;
2710
2711 case TGSI_OPCODE_PK4B:
2712 assert (0);
2713 break;
2714
2715 case TGSI_OPCODE_PK4UB:
2716 assert (0);
2717 break;
2718
2719 case TGSI_OPCODE_RFL:
2720 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2721 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2722 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2723 /* r0 = dp3(src0, src0) */
2724 FETCH(&r[2], 0, CHAN_X);
2725 micro_mul(&r[0], &r[2], &r[2]);
2726 FETCH(&r[4], 0, CHAN_Y);
2727 micro_mul(&r[8], &r[4], &r[4]);
2728 micro_add(&r[0], &r[0], &r[8]);
2729 FETCH(&r[6], 0, CHAN_Z);
2730 micro_mul(&r[8], &r[6], &r[6]);
2731 micro_add(&r[0], &r[0], &r[8]);
2732
2733 /* r1 = dp3(src0, src1) */
2734 FETCH(&r[3], 1, CHAN_X);
2735 micro_mul(&r[1], &r[2], &r[3]);
2736 FETCH(&r[5], 1, CHAN_Y);
2737 micro_mul(&r[8], &r[4], &r[5]);
2738 micro_add(&r[1], &r[1], &r[8]);
2739 FETCH(&r[7], 1, CHAN_Z);
2740 micro_mul(&r[8], &r[6], &r[7]);
2741 micro_add(&r[1], &r[1], &r[8]);
2742
2743 /* r1 = 2 * r1 / r0 */
2744 micro_add(&r[1], &r[1], &r[1]);
2745 micro_div(&r[1], &r[1], &r[0]);
2746
2747 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2748 micro_mul(&r[2], &r[2], &r[1]);
2749 micro_sub(&r[2], &r[2], &r[3]);
2750 STORE(&r[2], 0, CHAN_X);
2751 }
2752 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2753 micro_mul(&r[4], &r[4], &r[1]);
2754 micro_sub(&r[4], &r[4], &r[5]);
2755 STORE(&r[4], 0, CHAN_Y);
2756 }
2757 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2758 micro_mul(&r[6], &r[6], &r[1]);
2759 micro_sub(&r[6], &r[6], &r[7]);
2760 STORE(&r[6], 0, CHAN_Z);
2761 }
2762 }
2763 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2764 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2765 }
2766 break;
2767
2768 case TGSI_OPCODE_SEQ:
2769 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2770 FETCH( &r[0], 0, chan_index );
2771 FETCH( &r[1], 1, chan_index );
2772 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2773 }
2774 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2775 STORE(&d[chan_index], 0, chan_index);
2776 }
2777 break;
2778
2779 case TGSI_OPCODE_SFL:
2780 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2781 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2782 }
2783 break;
2784
2785 case TGSI_OPCODE_SGT:
2786 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2787 FETCH( &r[0], 0, chan_index );
2788 FETCH( &r[1], 1, chan_index );
2789 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2790 }
2791 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2792 STORE(&d[chan_index], 0, chan_index);
2793 }
2794 break;
2795
2796 case TGSI_OPCODE_SIN:
2797 FETCH( &r[0], 0, CHAN_X );
2798 micro_sin( &r[0], &r[0] );
2799 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2800 STORE( &r[0], 0, chan_index );
2801 }
2802 break;
2803
2804 case TGSI_OPCODE_SLE:
2805 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2806 FETCH( &r[0], 0, chan_index );
2807 FETCH( &r[1], 1, chan_index );
2808 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2809 }
2810 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2811 STORE(&d[chan_index], 0, chan_index);
2812 }
2813 break;
2814
2815 case TGSI_OPCODE_SNE:
2816 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2817 FETCH( &r[0], 0, chan_index );
2818 FETCH( &r[1], 1, chan_index );
2819 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2820 }
2821 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2822 STORE(&d[chan_index], 0, chan_index);
2823 }
2824 break;
2825
2826 case TGSI_OPCODE_STR:
2827 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2828 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2829 }
2830 break;
2831
2832 case TGSI_OPCODE_TEX:
2833 /* simple texture lookup */
2834 /* src[0] = texcoord */
2835 /* src[1] = sampler unit */
2836 exec_tex(mach, inst, FALSE, FALSE);
2837 break;
2838
2839 case TGSI_OPCODE_TXB:
2840 /* Texture lookup with lod bias */
2841 /* src[0] = texcoord (src[0].w = LOD bias) */
2842 /* src[1] = sampler unit */
2843 exec_tex(mach, inst, TRUE, FALSE);
2844 break;
2845
2846 case TGSI_OPCODE_TXD:
2847 /* Texture lookup with explict partial derivatives */
2848 /* src[0] = texcoord */
2849 /* src[1] = d[strq]/dx */
2850 /* src[2] = d[strq]/dy */
2851 /* src[3] = sampler unit */
2852 exec_txd(mach, inst);
2853 break;
2854
2855 case TGSI_OPCODE_TXL:
2856 /* Texture lookup with explit LOD */
2857 /* src[0] = texcoord (src[0].w = LOD) */
2858 /* src[1] = sampler unit */
2859 exec_tex(mach, inst, TRUE, FALSE);
2860 break;
2861
2862 case TGSI_OPCODE_TXP:
2863 /* Texture lookup with projection */
2864 /* src[0] = texcoord (src[0].w = projection) */
2865 /* src[1] = sampler unit */
2866 exec_tex(mach, inst, FALSE, TRUE);
2867 break;
2868
2869 case TGSI_OPCODE_UP2H:
2870 assert (0);
2871 break;
2872
2873 case TGSI_OPCODE_UP2US:
2874 assert (0);
2875 break;
2876
2877 case TGSI_OPCODE_UP4B:
2878 assert (0);
2879 break;
2880
2881 case TGSI_OPCODE_UP4UB:
2882 assert (0);
2883 break;
2884
2885 case TGSI_OPCODE_X2D:
2886 FETCH(&r[0], 1, CHAN_X);
2887 FETCH(&r[1], 1, CHAN_Y);
2888 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2889 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2890 FETCH(&r[2], 2, CHAN_X);
2891 micro_mul(&r[2], &r[2], &r[0]);
2892 FETCH(&r[3], 2, CHAN_Y);
2893 micro_mul(&r[3], &r[3], &r[1]);
2894 micro_add(&r[2], &r[2], &r[3]);
2895 FETCH(&r[3], 0, CHAN_X);
2896 micro_add(&d[CHAN_X], &r[2], &r[3]);
2897
2898 }
2899 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2900 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2901 FETCH(&r[2], 2, CHAN_Z);
2902 micro_mul(&r[2], &r[2], &r[0]);
2903 FETCH(&r[3], 2, CHAN_W);
2904 micro_mul(&r[3], &r[3], &r[1]);
2905 micro_add(&r[2], &r[2], &r[3]);
2906 FETCH(&r[3], 0, CHAN_Y);
2907 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2908
2909 }
2910 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2911 STORE(&d[CHAN_X], 0, CHAN_X);
2912 }
2913 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2914 STORE(&d[CHAN_Y], 0, CHAN_Y);
2915 }
2916 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2917 STORE(&d[CHAN_X], 0, CHAN_Z);
2918 }
2919 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2920 STORE(&d[CHAN_Y], 0, CHAN_W);
2921 }
2922 break;
2923
2924 case TGSI_OPCODE_ARA:
2925 assert (0);
2926 break;
2927
2928 case TGSI_OPCODE_BRA:
2929 assert (0);
2930 break;
2931
2932 case TGSI_OPCODE_CAL:
2933 /* skip the call if no execution channels are enabled */
2934 if (mach->ExecMask) {
2935 /* do the call */
2936
2937 /* First, record the depths of the execution stacks.
2938 * This is important for deeply nested/looped return statements.
2939 * We have to unwind the stacks by the correct amount. For a
2940 * real code generator, we could determine the number of entries
2941 * to pop off each stack with simple static analysis and avoid
2942 * implementing this data structure at run time.
2943 */
2944 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2945 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2946 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2947 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
2948 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
2949 /* note that PC was already incremented above */
2950 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2951
2952 mach->CallStackTop++;
2953
2954 /* Second, push the Cond, Loop, Cont, Func stacks */
2955 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2956 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2957 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2958 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2959 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2960 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2961
2962 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2963 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2964 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2965 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2966 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2967 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2968
2969 /* Finally, jump to the subroutine */
2970 *pc = inst->Label.Label;
2971 }
2972 break;
2973
2974 case TGSI_OPCODE_RET:
2975 mach->FuncMask &= ~mach->ExecMask;
2976 UPDATE_EXEC_MASK(mach);
2977
2978 if (mach->FuncMask == 0x0) {
2979 /* really return now (otherwise, keep executing */
2980
2981 if (mach->CallStackTop == 0) {
2982 /* returning from main() */
2983 *pc = -1;
2984 return;
2985 }
2986
2987 assert(mach->CallStackTop > 0);
2988 mach->CallStackTop--;
2989
2990 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2991 mach->CondMask = mach->CondStack[mach->CondStackTop];
2992
2993 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2994 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2995
2996 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2997 mach->ContMask = mach->ContStack[mach->ContStackTop];
2998
2999 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3000 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3001
3002 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3003 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3004
3005 assert(mach->FuncStackTop > 0);
3006 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3007
3008 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3009
3010 UPDATE_EXEC_MASK(mach);
3011 }
3012 break;
3013
3014 case TGSI_OPCODE_SSG:
3015 /* TGSI_OPCODE_SGN */
3016 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3017 FETCH( &r[0], 0, chan_index );
3018 micro_sgn(&d[chan_index], &r[0]);
3019 }
3020 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3021 STORE(&d[chan_index], 0, chan_index);
3022 }
3023 break;
3024
3025 case TGSI_OPCODE_CMP:
3026 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3027 FETCH(&r[0], 0, chan_index);
3028 FETCH(&r[1], 1, chan_index);
3029 FETCH(&r[2], 2, chan_index);
3030 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
3031 }
3032 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3033 STORE(&d[chan_index], 0, chan_index);
3034 }
3035 break;
3036
3037 case TGSI_OPCODE_SCS:
3038 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3039 FETCH( &r[0], 0, CHAN_X );
3040 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3041 micro_cos(&r[1], &r[0]);
3042 STORE(&r[1], 0, CHAN_X);
3043 }
3044 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3045 micro_sin(&r[1], &r[0]);
3046 STORE(&r[1], 0, CHAN_Y);
3047 }
3048 }
3049 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3050 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3051 }
3052 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3053 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3054 }
3055 break;
3056
3057 case TGSI_OPCODE_NRM:
3058 /* 3-component vector normalize */
3059 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
3060 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
3061 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3062 /* r3 = sqrt(dp3(src0, src0)) */
3063 FETCH(&r[0], 0, CHAN_X);
3064 micro_mul(&r[3], &r[0], &r[0]);
3065 FETCH(&r[1], 0, CHAN_Y);
3066 micro_mul(&r[4], &r[1], &r[1]);
3067 micro_add(&r[3], &r[3], &r[4]);
3068 FETCH(&r[2], 0, CHAN_Z);
3069 micro_mul(&r[4], &r[2], &r[2]);
3070 micro_add(&r[3], &r[3], &r[4]);
3071 micro_sqrt(&r[3], &r[3]);
3072
3073 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3074 micro_div(&r[0], &r[0], &r[3]);
3075 STORE(&r[0], 0, CHAN_X);
3076 }
3077 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3078 micro_div(&r[1], &r[1], &r[3]);
3079 STORE(&r[1], 0, CHAN_Y);
3080 }
3081 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3082 micro_div(&r[2], &r[2], &r[3]);
3083 STORE(&r[2], 0, CHAN_Z);
3084 }
3085 }
3086 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
3087 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
3088 }
3089 break;
3090
3091 case TGSI_OPCODE_NRM4:
3092 /* 4-component vector normalize */
3093 {
3094 union tgsi_exec_channel tmp, dot;
3095
3096 /* tmp = dp4(src0, src0): */
3097 FETCH( &r[0], 0, CHAN_X );
3098 micro_mul( &tmp, &r[0], &r[0] );
3099
3100 FETCH( &r[1], 0, CHAN_Y );
3101 micro_mul( &dot, &r[1], &r[1] );
3102 micro_add( &tmp, &tmp, &dot );
3103
3104 FETCH( &r[2], 0, CHAN_Z );
3105 micro_mul( &dot, &r[2], &r[2] );
3106 micro_add( &tmp, &tmp, &dot );
3107
3108 FETCH( &r[3], 0, CHAN_W );
3109 micro_mul( &dot, &r[3], &r[3] );
3110 micro_add( &tmp, &tmp, &dot );
3111
3112 /* tmp = 1 / sqrt(tmp) */
3113 micro_sqrt( &tmp, &tmp );
3114 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
3115
3116 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3117 /* chan = chan * tmp */
3118 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
3119 STORE( &r[chan_index], 0, chan_index );
3120 }
3121 }
3122 break;
3123
3124 case TGSI_OPCODE_DIV:
3125 assert( 0 );
3126 break;
3127
3128 case TGSI_OPCODE_DP2:
3129 FETCH( &r[0], 0, CHAN_X );
3130 FETCH( &r[1], 1, CHAN_X );
3131 micro_mul( &r[0], &r[0], &r[1] );
3132
3133 FETCH( &r[1], 0, CHAN_Y );
3134 FETCH( &r[2], 1, CHAN_Y );
3135 micro_mul( &r[1], &r[1], &r[2] );
3136 micro_add( &r[0], &r[0], &r[1] );
3137
3138 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3139 STORE( &r[0], 0, chan_index );
3140 }
3141 break;
3142
3143 case TGSI_OPCODE_IF:
3144 /* push CondMask */
3145 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3146 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3147 FETCH( &r[0], 0, CHAN_X );
3148 /* update CondMask */
3149 if( ! r[0].u[0] ) {
3150 mach->CondMask &= ~0x1;
3151 }
3152 if( ! r[0].u[1] ) {
3153 mach->CondMask &= ~0x2;
3154 }
3155 if( ! r[0].u[2] ) {
3156 mach->CondMask &= ~0x4;
3157 }
3158 if( ! r[0].u[3] ) {
3159 mach->CondMask &= ~0x8;
3160 }
3161 UPDATE_EXEC_MASK(mach);
3162 /* Todo: If CondMask==0, jump to ELSE */
3163 break;
3164
3165 case TGSI_OPCODE_ELSE:
3166 /* invert CondMask wrt previous mask */
3167 {
3168 uint prevMask;
3169 assert(mach->CondStackTop > 0);
3170 prevMask = mach->CondStack[mach->CondStackTop - 1];
3171 mach->CondMask = ~mach->CondMask & prevMask;
3172 UPDATE_EXEC_MASK(mach);
3173 /* Todo: If CondMask==0, jump to ENDIF */
3174 }
3175 break;
3176
3177 case TGSI_OPCODE_ENDIF:
3178 /* pop CondMask */
3179 assert(mach->CondStackTop > 0);
3180 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3181 UPDATE_EXEC_MASK(mach);
3182 break;
3183
3184 case TGSI_OPCODE_END:
3185 /* halt execution */
3186 *pc = -1;
3187 break;
3188
3189 case TGSI_OPCODE_REP:
3190 assert (0);
3191 break;
3192
3193 case TGSI_OPCODE_ENDREP:
3194 assert (0);
3195 break;
3196
3197 case TGSI_OPCODE_PUSHA:
3198 assert (0);
3199 break;
3200
3201 case TGSI_OPCODE_POPA:
3202 assert (0);
3203 break;
3204
3205 case TGSI_OPCODE_CEIL:
3206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3207 FETCH( &r[0], 0, chan_index );
3208 micro_ceil(&d[chan_index], &r[0]);
3209 }
3210 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3211 STORE(&d[chan_index], 0, chan_index);
3212 }
3213 break;
3214
3215 case TGSI_OPCODE_I2F:
3216 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3217 break;
3218
3219 case TGSI_OPCODE_NOT:
3220 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3221 break;
3222
3223 case TGSI_OPCODE_TRUNC:
3224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3225 FETCH( &r[0], 0, chan_index );
3226 micro_trunc(&d[chan_index], &r[0]);
3227 }
3228 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3229 STORE(&d[chan_index], 0, chan_index);
3230 }
3231 break;
3232
3233 case TGSI_OPCODE_SHL:
3234 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3235 break;
3236
3237 case TGSI_OPCODE_AND:
3238 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3239 break;
3240
3241 case TGSI_OPCODE_OR:
3242 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3243 break;
3244
3245 case TGSI_OPCODE_MOD:
3246 assert (0);
3247 break;
3248
3249 case TGSI_OPCODE_XOR:
3250 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3251 break;
3252
3253 case TGSI_OPCODE_SAD:
3254 assert (0);
3255 break;
3256
3257 case TGSI_OPCODE_TXF:
3258 assert (0);
3259 break;
3260
3261 case TGSI_OPCODE_TXQ:
3262 assert (0);
3263 break;
3264
3265 case TGSI_OPCODE_EMIT:
3266 emit_vertex(mach);
3267 break;
3268
3269 case TGSI_OPCODE_ENDPRIM:
3270 emit_primitive(mach);
3271 break;
3272
3273 case TGSI_OPCODE_BGNFOR:
3274 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3275 for (chan_index = 0; chan_index < 3; chan_index++) {
3276 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3277 }
3278 ++mach->LoopCounterStackTop;
3279 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3280 /* update LoopMask */
3281 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3282 mach->LoopMask &= ~0x1;
3283 }
3284 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3285 mach->LoopMask &= ~0x2;
3286 }
3287 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3288 mach->LoopMask &= ~0x4;
3289 }
3290 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3291 mach->LoopMask &= ~0x8;
3292 }
3293 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3294 UPDATE_EXEC_MASK(mach);
3295 /* fall-through (for now) */
3296 case TGSI_OPCODE_BGNLOOP:
3297 /* push LoopMask and ContMasks */
3298 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3299 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3300 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3301 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3302
3303 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3304 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3305 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3306 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3307 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3308 break;
3309
3310 case TGSI_OPCODE_ENDFOR:
3311 assert(mach->LoopCounterStackTop > 0);
3312 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3313 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3314 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3315 /* update LoopMask */
3316 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3317 mach->LoopMask &= ~0x1;
3318 }
3319 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3320 mach->LoopMask &= ~0x2;
3321 }
3322 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3323 mach->LoopMask &= ~0x4;
3324 }
3325 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3326 mach->LoopMask &= ~0x8;
3327 }
3328 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3329 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3330 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3331 assert(mach->LoopLabelStackTop > 0);
3332 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3333 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3334 /* Restore ContMask, but don't pop */
3335 assert(mach->ContStackTop > 0);
3336 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3337 UPDATE_EXEC_MASK(mach);
3338 if (mach->ExecMask) {
3339 /* repeat loop: jump to instruction just past BGNLOOP */
3340 assert(mach->LoopLabelStackTop > 0);
3341 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3342 }
3343 else {
3344 /* exit loop: pop LoopMask */
3345 assert(mach->LoopStackTop > 0);
3346 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3347 /* pop ContMask */
3348 assert(mach->ContStackTop > 0);
3349 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3350 assert(mach->LoopLabelStackTop > 0);
3351 --mach->LoopLabelStackTop;
3352 assert(mach->LoopCounterStackTop > 0);
3353 --mach->LoopCounterStackTop;
3354
3355 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3356 }
3357 UPDATE_EXEC_MASK(mach);
3358 break;
3359
3360 case TGSI_OPCODE_ENDLOOP:
3361 /* Restore ContMask, but don't pop */
3362 assert(mach->ContStackTop > 0);
3363 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3364 UPDATE_EXEC_MASK(mach);
3365 if (mach->ExecMask) {
3366 /* repeat loop: jump to instruction just past BGNLOOP */
3367 assert(mach->LoopLabelStackTop > 0);
3368 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3369 }
3370 else {
3371 /* exit loop: pop LoopMask */
3372 assert(mach->LoopStackTop > 0);
3373 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3374 /* pop ContMask */
3375 assert(mach->ContStackTop > 0);
3376 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3377 assert(mach->LoopLabelStackTop > 0);
3378 --mach->LoopLabelStackTop;
3379
3380 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3381 }
3382 UPDATE_EXEC_MASK(mach);
3383 break;
3384
3385 case TGSI_OPCODE_BRK:
3386 exec_break(mach);
3387 break;
3388
3389 case TGSI_OPCODE_CONT:
3390 /* turn off cont channels for each enabled exec channel */
3391 mach->ContMask &= ~mach->ExecMask;
3392 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3393 UPDATE_EXEC_MASK(mach);
3394 break;
3395
3396 case TGSI_OPCODE_BGNSUB:
3397 /* no-op */
3398 break;
3399
3400 case TGSI_OPCODE_ENDSUB:
3401 /*
3402 * XXX: This really should be a no-op. We should never reach this opcode.
3403 */
3404
3405 assert(mach->CallStackTop > 0);
3406 mach->CallStackTop--;
3407
3408 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3409 mach->CondMask = mach->CondStack[mach->CondStackTop];
3410
3411 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3412 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3413
3414 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3415 mach->ContMask = mach->ContStack[mach->ContStackTop];
3416
3417 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3418 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3419
3420 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3421 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3422
3423 assert(mach->FuncStackTop > 0);
3424 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3425
3426 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3427
3428 UPDATE_EXEC_MASK(mach);
3429 break;
3430
3431 case TGSI_OPCODE_NOP:
3432 break;
3433
3434 case TGSI_OPCODE_BREAKC:
3435 FETCH(&r[0], 0, CHAN_X);
3436 /* update CondMask */
3437 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3438 mach->LoopMask &= ~0x1;
3439 }
3440 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3441 mach->LoopMask &= ~0x2;
3442 }
3443 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3444 mach->LoopMask &= ~0x4;
3445 }
3446 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3447 mach->LoopMask &= ~0x8;
3448 }
3449 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3450 UPDATE_EXEC_MASK(mach);
3451 break;
3452
3453 case TGSI_OPCODE_F2I:
3454 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3455 break;
3456
3457 case TGSI_OPCODE_IDIV:
3458 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3459 break;
3460
3461 case TGSI_OPCODE_IMAX:
3462 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3463 break;
3464
3465 case TGSI_OPCODE_IMIN:
3466 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3467 break;
3468
3469 case TGSI_OPCODE_INEG:
3470 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3471 break;
3472
3473 case TGSI_OPCODE_ISGE:
3474 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3475 break;
3476
3477 case TGSI_OPCODE_ISHR:
3478 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3479 break;
3480
3481 case TGSI_OPCODE_ISLT:
3482 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3483 break;
3484
3485 case TGSI_OPCODE_F2U:
3486 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3487 break;
3488
3489 case TGSI_OPCODE_U2F:
3490 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3491 break;
3492
3493 case TGSI_OPCODE_UADD:
3494 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3495 break;
3496
3497 case TGSI_OPCODE_UDIV:
3498 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3499 break;
3500
3501 case TGSI_OPCODE_UMAD:
3502 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3503 break;
3504
3505 case TGSI_OPCODE_UMAX:
3506 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3507 break;
3508
3509 case TGSI_OPCODE_UMIN:
3510 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3511 break;
3512
3513 case TGSI_OPCODE_UMOD:
3514 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3515 break;
3516
3517 case TGSI_OPCODE_UMUL:
3518 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3519 break;
3520
3521 case TGSI_OPCODE_USEQ:
3522 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3523 break;
3524
3525 case TGSI_OPCODE_USGE:
3526 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3527 break;
3528
3529 case TGSI_OPCODE_USHR:
3530 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3531 break;
3532
3533 case TGSI_OPCODE_USLT:
3534 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3535 break;
3536
3537 case TGSI_OPCODE_USNE:
3538 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3539 break;
3540
3541 case TGSI_OPCODE_SWITCH:
3542 exec_switch(mach, inst);
3543 break;
3544
3545 case TGSI_OPCODE_CASE:
3546 exec_case(mach, inst);
3547 break;
3548
3549 case TGSI_OPCODE_DEFAULT:
3550 exec_default(mach);
3551 break;
3552
3553 case TGSI_OPCODE_ENDSWITCH:
3554 exec_endswitch(mach);
3555 break;
3556
3557 default:
3558 assert( 0 );
3559 }
3560 }
3561
3562
3563 #define DEBUG_EXECUTION 0
3564
3565
3566 /**
3567 * Run TGSI interpreter.
3568 * \return bitmask of "alive" quad components
3569 */
3570 uint
3571 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3572 {
3573 uint i;
3574 int pc = 0;
3575
3576 mach->CondMask = 0xf;
3577 mach->LoopMask = 0xf;
3578 mach->ContMask = 0xf;
3579 mach->FuncMask = 0xf;
3580 mach->ExecMask = 0xf;
3581
3582 mach->Switch.mask = 0xf;
3583
3584 assert(mach->CondStackTop == 0);
3585 assert(mach->LoopStackTop == 0);
3586 assert(mach->ContStackTop == 0);
3587 assert(mach->SwitchStackTop == 0);
3588 assert(mach->BreakStackTop == 0);
3589 assert(mach->CallStackTop == 0);
3590
3591 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3592 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3593
3594 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3595 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3596 mach->Primitives[0] = 0;
3597 }
3598
3599 for (i = 0; i < QUAD_SIZE; i++) {
3600 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3601 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3602 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3603 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3604 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3605 }
3606
3607 /* execute declarations (interpolants) */
3608 for (i = 0; i < mach->NumDeclarations; i++) {
3609 exec_declaration( mach, mach->Declarations+i );
3610 }
3611
3612 {
3613 #if DEBUG_EXECUTION
3614 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3615 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3616 uint inst = 1;
3617
3618 memcpy(temps, mach->Temps, sizeof(temps));
3619 memcpy(outputs, mach->Outputs, sizeof(outputs));
3620 #endif
3621
3622 /* execute instructions, until pc is set to -1 */
3623 while (pc != -1) {
3624
3625 #if DEBUG_EXECUTION
3626 uint i;
3627
3628 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3629 #endif
3630
3631 assert(pc < (int) mach->NumInstructions);
3632 exec_instruction(mach, mach->Instructions + pc, &pc);
3633
3634 #if DEBUG_EXECUTION
3635 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3636 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3637 uint j;
3638
3639 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3640 debug_printf("TEMP[%2u] = ", i);
3641 for (j = 0; j < 4; j++) {
3642 if (j > 0) {
3643 debug_printf(" ");
3644 }
3645 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3646 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3647 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3648 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3649 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3650 }
3651 }
3652 }
3653 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3654 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3655 uint j;
3656
3657 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3658 debug_printf("OUT[%2u] = ", i);
3659 for (j = 0; j < 4; j++) {
3660 if (j > 0) {
3661 debug_printf(" ");
3662 }
3663 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3664 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3665 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3666 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3667 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3668 }
3669 }
3670 }
3671 #endif
3672 }
3673 }
3674
3675 #if 0
3676 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3677 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3678 /*
3679 * Scale back depth component.
3680 */
3681 for (i = 0; i < 4; i++)
3682 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3683 }
3684 #endif
3685
3686 assert(mach->CondStackTop == 0);
3687 assert(mach->LoopStackTop == 0);
3688 assert(mach->ContStackTop == 0);
3689 assert(mach->SwitchStackTop == 0);
3690 assert(mach->BreakStackTop == 0);
3691 assert(mach->CallStackTop == 0);
3692
3693 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3694 }