Merge branch 'gallium-edgeflags'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
62
63 #define FAST_MATH 1
64
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
69
70 #define CHAN_X 0
71 #define CHAN_Y 1
72 #define CHAN_Z 2
73 #define CHAN_W 3
74
75 /*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
108
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
111
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
114
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
118
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
122
123
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
127
128
129 static const union tgsi_exec_channel ZeroVec =
130 { { 0.0, 0.0, 0.0, 0.0 } };
131
132
133 #ifdef DEBUG
134 static void
135 check_inf_or_nan(const union tgsi_exec_channel *chan)
136 {
137 assert(!util_is_inf_or_nan(chan->f[0]));
138 assert(!util_is_inf_or_nan(chan->f[1]));
139 assert(!util_is_inf_or_nan(chan->f[2]));
140 assert(!util_is_inf_or_nan(chan->f[3]));
141 }
142 #endif
143
144
145 #ifdef DEBUG
146 static void
147 print_chan(const char *msg, const union tgsi_exec_channel *chan)
148 {
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
151 }
152 #endif
153
154
155 #ifdef DEBUG
156 static void
157 print_temp(const struct tgsi_exec_machine *mach, uint index)
158 {
159 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
160 int i;
161 debug_printf("Temp[%u] =\n", index);
162 for (i = 0; i < 4; i++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
164 "XYZW"[i],
165 tmp->xyzw[i].f[0],
166 tmp->xyzw[i].f[1],
167 tmp->xyzw[i].f[2],
168 tmp->xyzw[i].f[3]);
169 }
170 }
171 #endif
172
173
174 /**
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
177 * Example:
178 * MOV T, T.yxwz;
179 * This would expand into:
180 * MOV t0, t1;
181 * MOV t1, t0;
182 * MOV t2, t3;
183 * MOV t3, t2;
184 * The second instruction will have the wrong value for t0 if executed as-is.
185 */
186 boolean
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
188 {
189 uint i, chan;
190
191 uint writemask = inst->Dst[0].Register.WriteMask;
192 if (writemask == TGSI_WRITEMASK_X ||
193 writemask == TGSI_WRITEMASK_Y ||
194 writemask == TGSI_WRITEMASK_Z ||
195 writemask == TGSI_WRITEMASK_W ||
196 writemask == TGSI_WRITEMASK_NONE) {
197 /* no chance of data dependency */
198 return FALSE;
199 }
200
201 /* loop over src regs */
202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
203 if ((inst->Src[i].Register.File ==
204 inst->Dst[0].Register.File) &&
205 (inst->Src[i].Register.Index ==
206 inst->Dst[0].Register.Index)) {
207 /* loop over dest channels */
208 uint channelsWritten = 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
212 if (channelsWritten & (1 << swizzle)) {
213 return TRUE;
214 }
215
216 channelsWritten |= (1 << chan);
217 }
218 }
219 }
220 return FALSE;
221 }
222
223
224 /**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229 void
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine *mach,
232 const struct tgsi_token *tokens,
233 uint numSamplers,
234 struct tgsi_sampler **samplers)
235 {
236 uint k;
237 struct tgsi_parse_context parse;
238 struct tgsi_exec_labels *labels = &mach->Labels;
239 struct tgsi_full_instruction *instructions;
240 struct tgsi_full_declaration *declarations;
241 uint maxInstructions = 10, numInstructions = 0;
242 uint maxDeclarations = 10, numDeclarations = 0;
243 uint instno = 0;
244
245 #if 0
246 tgsi_dump(tokens, 0);
247 #endif
248
249 util_init_math();
250
251 mach->Tokens = tokens;
252 mach->Samplers = samplers;
253
254 k = tgsi_parse_init (&parse, mach->Tokens);
255 if (k != TGSI_PARSE_OK) {
256 debug_printf( "Problem parsing!\n" );
257 return;
258 }
259
260 mach->Processor = parse.FullHeader.Processor.Processor;
261 mach->ImmLimit = 0;
262 labels->count = 0;
263
264 declarations = (struct tgsi_full_declaration *)
265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267 if (!declarations) {
268 return;
269 }
270
271 instructions = (struct tgsi_full_instruction *)
272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274 if (!instructions) {
275 FREE( declarations );
276 return;
277 }
278
279 while( !tgsi_parse_end_of_tokens( &parse ) ) {
280 uint pointer = parse.Position;
281 uint i;
282
283 tgsi_parse_token( &parse );
284 switch( parse.FullToken.Token.Type ) {
285 case TGSI_TOKEN_TYPE_DECLARATION:
286 /* save expanded declaration */
287 if (numDeclarations == maxDeclarations) {
288 declarations = REALLOC(declarations,
289 maxDeclarations
290 * sizeof(struct tgsi_full_declaration),
291 (maxDeclarations + 10)
292 * sizeof(struct tgsi_full_declaration));
293 maxDeclarations += 10;
294 }
295 memcpy(declarations + numDeclarations,
296 &parse.FullToken.FullDeclaration,
297 sizeof(declarations[0]));
298 numDeclarations++;
299 break;
300
301 case TGSI_TOKEN_TYPE_IMMEDIATE:
302 {
303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
304 assert( size <= 4 );
305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
306
307 for( i = 0; i < size; i++ ) {
308 mach->Imms[mach->ImmLimit][i] =
309 parse.FullToken.FullImmediate.u[i].Float;
310 }
311 mach->ImmLimit += 1;
312 }
313 break;
314
315 case TGSI_TOKEN_TYPE_INSTRUCTION:
316 assert( labels->count < MAX_LABELS );
317
318 labels->labels[labels->count][0] = instno;
319 labels->labels[labels->count][1] = pointer;
320 labels->count++;
321
322 /* save expanded instruction */
323 if (numInstructions == maxInstructions) {
324 instructions = REALLOC(instructions,
325 maxInstructions
326 * sizeof(struct tgsi_full_instruction),
327 (maxInstructions + 10)
328 * sizeof(struct tgsi_full_instruction));
329 maxInstructions += 10;
330 }
331
332 memcpy(instructions + numInstructions,
333 &parse.FullToken.FullInstruction,
334 sizeof(instructions[0]));
335
336 numInstructions++;
337 break;
338
339 case TGSI_TOKEN_TYPE_PROPERTY:
340 break;
341
342 default:
343 assert( 0 );
344 }
345 }
346 tgsi_parse_free (&parse);
347
348 if (mach->Declarations) {
349 FREE( mach->Declarations );
350 }
351 mach->Declarations = declarations;
352 mach->NumDeclarations = numDeclarations;
353
354 if (mach->Instructions) {
355 FREE( mach->Instructions );
356 }
357 mach->Instructions = instructions;
358 mach->NumInstructions = numInstructions;
359 }
360
361
362 struct tgsi_exec_machine *
363 tgsi_exec_machine_create( void )
364 {
365 struct tgsi_exec_machine *mach;
366 uint i;
367
368 mach = align_malloc( sizeof *mach, 16 );
369 if (!mach)
370 goto fail;
371
372 memset(mach, 0, sizeof(*mach));
373
374 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
375 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
376
377 /* Setup constants. */
378 for( i = 0; i < 4; i++ ) {
379 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
380 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
381 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
382 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
383 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
384 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
385 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
386 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
387 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
388 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
389 }
390
391 #ifdef DEBUG
392 /* silence warnings */
393 (void) print_chan;
394 (void) print_temp;
395 #endif
396
397 return mach;
398
399 fail:
400 align_free(mach);
401 return NULL;
402 }
403
404
405 void
406 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
407 {
408 if (mach) {
409 FREE(mach->Instructions);
410 FREE(mach->Declarations);
411 }
412
413 align_free(mach);
414 }
415
416
417 static void
418 micro_abs(
419 union tgsi_exec_channel *dst,
420 const union tgsi_exec_channel *src )
421 {
422 dst->f[0] = fabsf( src->f[0] );
423 dst->f[1] = fabsf( src->f[1] );
424 dst->f[2] = fabsf( src->f[2] );
425 dst->f[3] = fabsf( src->f[3] );
426 }
427
428 static void
429 micro_add(
430 union tgsi_exec_channel *dst,
431 const union tgsi_exec_channel *src0,
432 const union tgsi_exec_channel *src1 )
433 {
434 dst->f[0] = src0->f[0] + src1->f[0];
435 dst->f[1] = src0->f[1] + src1->f[1];
436 dst->f[2] = src0->f[2] + src1->f[2];
437 dst->f[3] = src0->f[3] + src1->f[3];
438 }
439
440 #if 0
441 static void
442 micro_iadd(
443 union tgsi_exec_channel *dst,
444 const union tgsi_exec_channel *src0,
445 const union tgsi_exec_channel *src1 )
446 {
447 dst->i[0] = src0->i[0] + src1->i[0];
448 dst->i[1] = src0->i[1] + src1->i[1];
449 dst->i[2] = src0->i[2] + src1->i[2];
450 dst->i[3] = src0->i[3] + src1->i[3];
451 }
452 #endif
453
454 static void
455 micro_and(
456 union tgsi_exec_channel *dst,
457 const union tgsi_exec_channel *src0,
458 const union tgsi_exec_channel *src1 )
459 {
460 dst->u[0] = src0->u[0] & src1->u[0];
461 dst->u[1] = src0->u[1] & src1->u[1];
462 dst->u[2] = src0->u[2] & src1->u[2];
463 dst->u[3] = src0->u[3] & src1->u[3];
464 }
465
466 static void
467 micro_ceil(
468 union tgsi_exec_channel *dst,
469 const union tgsi_exec_channel *src )
470 {
471 dst->f[0] = ceilf( src->f[0] );
472 dst->f[1] = ceilf( src->f[1] );
473 dst->f[2] = ceilf( src->f[2] );
474 dst->f[3] = ceilf( src->f[3] );
475 }
476
477 static void
478 micro_cos(
479 union tgsi_exec_channel *dst,
480 const union tgsi_exec_channel *src )
481 {
482 dst->f[0] = cosf( src->f[0] );
483 dst->f[1] = cosf( src->f[1] );
484 dst->f[2] = cosf( src->f[2] );
485 dst->f[3] = cosf( src->f[3] );
486 }
487
488 static void
489 micro_ddx(
490 union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src )
492 {
493 dst->f[0] =
494 dst->f[1] =
495 dst->f[2] =
496 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
497 }
498
499 static void
500 micro_ddy(
501 union tgsi_exec_channel *dst,
502 const union tgsi_exec_channel *src )
503 {
504 dst->f[0] =
505 dst->f[1] =
506 dst->f[2] =
507 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
508 }
509
510 static void
511 micro_div(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src0,
514 const union tgsi_exec_channel *src1 )
515 {
516 if (src1->f[0] != 0) {
517 dst->f[0] = src0->f[0] / src1->f[0];
518 }
519 if (src1->f[1] != 0) {
520 dst->f[1] = src0->f[1] / src1->f[1];
521 }
522 if (src1->f[2] != 0) {
523 dst->f[2] = src0->f[2] / src1->f[2];
524 }
525 if (src1->f[3] != 0) {
526 dst->f[3] = src0->f[3] / src1->f[3];
527 }
528 }
529
530 #if 0
531 static void
532 micro_udiv(
533 union tgsi_exec_channel *dst,
534 const union tgsi_exec_channel *src0,
535 const union tgsi_exec_channel *src1 )
536 {
537 dst->u[0] = src0->u[0] / src1->u[0];
538 dst->u[1] = src0->u[1] / src1->u[1];
539 dst->u[2] = src0->u[2] / src1->u[2];
540 dst->u[3] = src0->u[3] / src1->u[3];
541 }
542 #endif
543
544 static void
545 micro_eq(
546 union tgsi_exec_channel *dst,
547 const union tgsi_exec_channel *src0,
548 const union tgsi_exec_channel *src1,
549 const union tgsi_exec_channel *src2,
550 const union tgsi_exec_channel *src3 )
551 {
552 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
553 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
554 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
555 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
556 }
557
558 #if 0
559 static void
560 micro_ieq(
561 union tgsi_exec_channel *dst,
562 const union tgsi_exec_channel *src0,
563 const union tgsi_exec_channel *src1,
564 const union tgsi_exec_channel *src2,
565 const union tgsi_exec_channel *src3 )
566 {
567 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
568 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
569 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
570 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
571 }
572 #endif
573
574 static void
575 micro_exp2(
576 union tgsi_exec_channel *dst,
577 const union tgsi_exec_channel *src)
578 {
579 #if FAST_MATH
580 dst->f[0] = util_fast_exp2( src->f[0] );
581 dst->f[1] = util_fast_exp2( src->f[1] );
582 dst->f[2] = util_fast_exp2( src->f[2] );
583 dst->f[3] = util_fast_exp2( src->f[3] );
584 #else
585
586 #if DEBUG
587 /* Inf is okay for this instruction, so clamp it to silence assertions. */
588 uint i;
589 union tgsi_exec_channel clamped;
590
591 for (i = 0; i < 4; i++) {
592 if (src->f[i] > 127.99999f) {
593 clamped.f[i] = 127.99999f;
594 } else if (src->f[i] < -126.99999f) {
595 clamped.f[i] = -126.99999f;
596 } else {
597 clamped.f[i] = src->f[i];
598 }
599 }
600 src = &clamped;
601 #endif
602
603 dst->f[0] = powf( 2.0f, src->f[0] );
604 dst->f[1] = powf( 2.0f, src->f[1] );
605 dst->f[2] = powf( 2.0f, src->f[2] );
606 dst->f[3] = powf( 2.0f, src->f[3] );
607 #endif
608 }
609
610 #if 0
611 static void
612 micro_f2ut(
613 union tgsi_exec_channel *dst,
614 const union tgsi_exec_channel *src )
615 {
616 dst->u[0] = (uint) src->f[0];
617 dst->u[1] = (uint) src->f[1];
618 dst->u[2] = (uint) src->f[2];
619 dst->u[3] = (uint) src->f[3];
620 }
621 #endif
622
623 static void
624 micro_float_clamp(union tgsi_exec_channel *dst,
625 const union tgsi_exec_channel *src)
626 {
627 uint i;
628
629 for (i = 0; i < 4; i++) {
630 if (src->f[i] > 0.0f) {
631 if (src->f[i] > 1.884467e+019f)
632 dst->f[i] = 1.884467e+019f;
633 else if (src->f[i] < 5.42101e-020f)
634 dst->f[i] = 5.42101e-020f;
635 else
636 dst->f[i] = src->f[i];
637 }
638 else {
639 if (src->f[i] < -1.884467e+019f)
640 dst->f[i] = -1.884467e+019f;
641 else if (src->f[i] > -5.42101e-020f)
642 dst->f[i] = -5.42101e-020f;
643 else
644 dst->f[i] = src->f[i];
645 }
646 }
647 }
648
649 static void
650 micro_flr(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src )
653 {
654 dst->f[0] = floorf( src->f[0] );
655 dst->f[1] = floorf( src->f[1] );
656 dst->f[2] = floorf( src->f[2] );
657 dst->f[3] = floorf( src->f[3] );
658 }
659
660 static void
661 micro_frc(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src )
664 {
665 dst->f[0] = src->f[0] - floorf( src->f[0] );
666 dst->f[1] = src->f[1] - floorf( src->f[1] );
667 dst->f[2] = src->f[2] - floorf( src->f[2] );
668 dst->f[3] = src->f[3] - floorf( src->f[3] );
669 }
670
671 static void
672 micro_i2f(
673 union tgsi_exec_channel *dst,
674 const union tgsi_exec_channel *src )
675 {
676 dst->f[0] = (float) src->i[0];
677 dst->f[1] = (float) src->i[1];
678 dst->f[2] = (float) src->i[2];
679 dst->f[3] = (float) src->i[3];
680 }
681
682 static void
683 micro_lg2(
684 union tgsi_exec_channel *dst,
685 const union tgsi_exec_channel *src )
686 {
687 #if FAST_MATH
688 dst->f[0] = util_fast_log2( src->f[0] );
689 dst->f[1] = util_fast_log2( src->f[1] );
690 dst->f[2] = util_fast_log2( src->f[2] );
691 dst->f[3] = util_fast_log2( src->f[3] );
692 #else
693 dst->f[0] = logf( src->f[0] ) * 1.442695f;
694 dst->f[1] = logf( src->f[1] ) * 1.442695f;
695 dst->f[2] = logf( src->f[2] ) * 1.442695f;
696 dst->f[3] = logf( src->f[3] ) * 1.442695f;
697 #endif
698 }
699
700 static void
701 micro_le(
702 union tgsi_exec_channel *dst,
703 const union tgsi_exec_channel *src0,
704 const union tgsi_exec_channel *src1,
705 const union tgsi_exec_channel *src2,
706 const union tgsi_exec_channel *src3 )
707 {
708 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
709 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
710 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
711 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
712 }
713
714 static void
715 micro_lt(
716 union tgsi_exec_channel *dst,
717 const union tgsi_exec_channel *src0,
718 const union tgsi_exec_channel *src1,
719 const union tgsi_exec_channel *src2,
720 const union tgsi_exec_channel *src3 )
721 {
722 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
723 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
724 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
725 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
726 }
727
728 #if 0
729 static void
730 micro_ilt(
731 union tgsi_exec_channel *dst,
732 const union tgsi_exec_channel *src0,
733 const union tgsi_exec_channel *src1,
734 const union tgsi_exec_channel *src2,
735 const union tgsi_exec_channel *src3 )
736 {
737 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
738 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
739 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
740 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
741 }
742 #endif
743
744 #if 0
745 static void
746 micro_ult(
747 union tgsi_exec_channel *dst,
748 const union tgsi_exec_channel *src0,
749 const union tgsi_exec_channel *src1,
750 const union tgsi_exec_channel *src2,
751 const union tgsi_exec_channel *src3 )
752 {
753 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
754 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
755 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
756 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
757 }
758 #endif
759
760 static void
761 micro_max(
762 union tgsi_exec_channel *dst,
763 const union tgsi_exec_channel *src0,
764 const union tgsi_exec_channel *src1 )
765 {
766 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
767 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
768 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
769 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
770 }
771
772 #if 0
773 static void
774 micro_imax(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src0,
777 const union tgsi_exec_channel *src1 )
778 {
779 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
780 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
781 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
782 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
783 }
784 #endif
785
786 #if 0
787 static void
788 micro_umax(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src0,
791 const union tgsi_exec_channel *src1 )
792 {
793 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
794 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
795 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
796 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
797 }
798 #endif
799
800 static void
801 micro_min(
802 union tgsi_exec_channel *dst,
803 const union tgsi_exec_channel *src0,
804 const union tgsi_exec_channel *src1 )
805 {
806 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
807 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
808 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
809 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
810 }
811
812 #if 0
813 static void
814 micro_imin(
815 union tgsi_exec_channel *dst,
816 const union tgsi_exec_channel *src0,
817 const union tgsi_exec_channel *src1 )
818 {
819 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
820 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
821 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
822 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
823 }
824 #endif
825
826 #if 0
827 static void
828 micro_umin(
829 union tgsi_exec_channel *dst,
830 const union tgsi_exec_channel *src0,
831 const union tgsi_exec_channel *src1 )
832 {
833 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
834 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
835 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
836 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
837 }
838 #endif
839
840 #if 0
841 static void
842 micro_umod(
843 union tgsi_exec_channel *dst,
844 const union tgsi_exec_channel *src0,
845 const union tgsi_exec_channel *src1 )
846 {
847 dst->u[0] = src0->u[0] % src1->u[0];
848 dst->u[1] = src0->u[1] % src1->u[1];
849 dst->u[2] = src0->u[2] % src1->u[2];
850 dst->u[3] = src0->u[3] % src1->u[3];
851 }
852 #endif
853
854 static void
855 micro_mul(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->f[0] = src0->f[0] * src1->f[0];
861 dst->f[1] = src0->f[1] * src1->f[1];
862 dst->f[2] = src0->f[2] * src1->f[2];
863 dst->f[3] = src0->f[3] * src1->f[3];
864 }
865
866 #if 0
867 static void
868 micro_imul(
869 union tgsi_exec_channel *dst,
870 const union tgsi_exec_channel *src0,
871 const union tgsi_exec_channel *src1 )
872 {
873 dst->i[0] = src0->i[0] * src1->i[0];
874 dst->i[1] = src0->i[1] * src1->i[1];
875 dst->i[2] = src0->i[2] * src1->i[2];
876 dst->i[3] = src0->i[3] * src1->i[3];
877 }
878 #endif
879
880 #if 0
881 static void
882 micro_imul64(
883 union tgsi_exec_channel *dst0,
884 union tgsi_exec_channel *dst1,
885 const union tgsi_exec_channel *src0,
886 const union tgsi_exec_channel *src1 )
887 {
888 dst1->i[0] = src0->i[0] * src1->i[0];
889 dst1->i[1] = src0->i[1] * src1->i[1];
890 dst1->i[2] = src0->i[2] * src1->i[2];
891 dst1->i[3] = src0->i[3] * src1->i[3];
892 dst0->i[0] = 0;
893 dst0->i[1] = 0;
894 dst0->i[2] = 0;
895 dst0->i[3] = 0;
896 }
897 #endif
898
899 #if 0
900 static void
901 micro_umul64(
902 union tgsi_exec_channel *dst0,
903 union tgsi_exec_channel *dst1,
904 const union tgsi_exec_channel *src0,
905 const union tgsi_exec_channel *src1 )
906 {
907 dst1->u[0] = src0->u[0] * src1->u[0];
908 dst1->u[1] = src0->u[1] * src1->u[1];
909 dst1->u[2] = src0->u[2] * src1->u[2];
910 dst1->u[3] = src0->u[3] * src1->u[3];
911 dst0->u[0] = 0;
912 dst0->u[1] = 0;
913 dst0->u[2] = 0;
914 dst0->u[3] = 0;
915 }
916 #endif
917
918
919 #if 0
920 static void
921 micro_movc(
922 union tgsi_exec_channel *dst,
923 const union tgsi_exec_channel *src0,
924 const union tgsi_exec_channel *src1,
925 const union tgsi_exec_channel *src2 )
926 {
927 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
928 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
929 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
930 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
931 }
932 #endif
933
934 static void
935 micro_neg(
936 union tgsi_exec_channel *dst,
937 const union tgsi_exec_channel *src )
938 {
939 dst->f[0] = -src->f[0];
940 dst->f[1] = -src->f[1];
941 dst->f[2] = -src->f[2];
942 dst->f[3] = -src->f[3];
943 }
944
945 #if 0
946 static void
947 micro_ineg(
948 union tgsi_exec_channel *dst,
949 const union tgsi_exec_channel *src )
950 {
951 dst->i[0] = -src->i[0];
952 dst->i[1] = -src->i[1];
953 dst->i[2] = -src->i[2];
954 dst->i[3] = -src->i[3];
955 }
956 #endif
957
958 static void
959 micro_not(
960 union tgsi_exec_channel *dst,
961 const union tgsi_exec_channel *src )
962 {
963 dst->u[0] = ~src->u[0];
964 dst->u[1] = ~src->u[1];
965 dst->u[2] = ~src->u[2];
966 dst->u[3] = ~src->u[3];
967 }
968
969 static void
970 micro_or(
971 union tgsi_exec_channel *dst,
972 const union tgsi_exec_channel *src0,
973 const union tgsi_exec_channel *src1 )
974 {
975 dst->u[0] = src0->u[0] | src1->u[0];
976 dst->u[1] = src0->u[1] | src1->u[1];
977 dst->u[2] = src0->u[2] | src1->u[2];
978 dst->u[3] = src0->u[3] | src1->u[3];
979 }
980
981 static void
982 micro_pow(
983 union tgsi_exec_channel *dst,
984 const union tgsi_exec_channel *src0,
985 const union tgsi_exec_channel *src1 )
986 {
987 #if FAST_MATH
988 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
989 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
990 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
991 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
992 #else
993 dst->f[0] = powf( src0->f[0], src1->f[0] );
994 dst->f[1] = powf( src0->f[1], src1->f[1] );
995 dst->f[2] = powf( src0->f[2], src1->f[2] );
996 dst->f[3] = powf( src0->f[3], src1->f[3] );
997 #endif
998 }
999
1000 static void
1001 micro_rnd(
1002 union tgsi_exec_channel *dst,
1003 const union tgsi_exec_channel *src )
1004 {
1005 dst->f[0] = floorf( src->f[0] + 0.5f );
1006 dst->f[1] = floorf( src->f[1] + 0.5f );
1007 dst->f[2] = floorf( src->f[2] + 0.5f );
1008 dst->f[3] = floorf( src->f[3] + 0.5f );
1009 }
1010
1011 static void
1012 micro_sgn(
1013 union tgsi_exec_channel *dst,
1014 const union tgsi_exec_channel *src )
1015 {
1016 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
1017 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
1018 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
1019 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
1020 }
1021
1022 static void
1023 micro_shl(
1024 union tgsi_exec_channel *dst,
1025 const union tgsi_exec_channel *src0,
1026 const union tgsi_exec_channel *src1 )
1027 {
1028 dst->i[0] = src0->i[0] << src1->i[0];
1029 dst->i[1] = src0->i[1] << src1->i[1];
1030 dst->i[2] = src0->i[2] << src1->i[2];
1031 dst->i[3] = src0->i[3] << src1->i[3];
1032 }
1033
1034 static void
1035 micro_ishr(
1036 union tgsi_exec_channel *dst,
1037 const union tgsi_exec_channel *src0,
1038 const union tgsi_exec_channel *src1 )
1039 {
1040 dst->i[0] = src0->i[0] >> src1->i[0];
1041 dst->i[1] = src0->i[1] >> src1->i[1];
1042 dst->i[2] = src0->i[2] >> src1->i[2];
1043 dst->i[3] = src0->i[3] >> src1->i[3];
1044 }
1045
1046 static void
1047 micro_trunc(
1048 union tgsi_exec_channel *dst,
1049 const union tgsi_exec_channel *src0 )
1050 {
1051 dst->f[0] = (float) (int) src0->f[0];
1052 dst->f[1] = (float) (int) src0->f[1];
1053 dst->f[2] = (float) (int) src0->f[2];
1054 dst->f[3] = (float) (int) src0->f[3];
1055 }
1056
1057 #if 0
1058 static void
1059 micro_ushr(
1060 union tgsi_exec_channel *dst,
1061 const union tgsi_exec_channel *src0,
1062 const union tgsi_exec_channel *src1 )
1063 {
1064 dst->u[0] = src0->u[0] >> src1->u[0];
1065 dst->u[1] = src0->u[1] >> src1->u[1];
1066 dst->u[2] = src0->u[2] >> src1->u[2];
1067 dst->u[3] = src0->u[3] >> src1->u[3];
1068 }
1069 #endif
1070
1071 static void
1072 micro_sin(
1073 union tgsi_exec_channel *dst,
1074 const union tgsi_exec_channel *src )
1075 {
1076 dst->f[0] = sinf( src->f[0] );
1077 dst->f[1] = sinf( src->f[1] );
1078 dst->f[2] = sinf( src->f[2] );
1079 dst->f[3] = sinf( src->f[3] );
1080 }
1081
1082 static void
1083 micro_sqrt( union tgsi_exec_channel *dst,
1084 const union tgsi_exec_channel *src )
1085 {
1086 dst->f[0] = sqrtf( src->f[0] );
1087 dst->f[1] = sqrtf( src->f[1] );
1088 dst->f[2] = sqrtf( src->f[2] );
1089 dst->f[3] = sqrtf( src->f[3] );
1090 }
1091
1092 static void
1093 micro_sub(
1094 union tgsi_exec_channel *dst,
1095 const union tgsi_exec_channel *src0,
1096 const union tgsi_exec_channel *src1 )
1097 {
1098 dst->f[0] = src0->f[0] - src1->f[0];
1099 dst->f[1] = src0->f[1] - src1->f[1];
1100 dst->f[2] = src0->f[2] - src1->f[2];
1101 dst->f[3] = src0->f[3] - src1->f[3];
1102 }
1103
1104 #if 0
1105 static void
1106 micro_u2f(
1107 union tgsi_exec_channel *dst,
1108 const union tgsi_exec_channel *src )
1109 {
1110 dst->f[0] = (float) src->u[0];
1111 dst->f[1] = (float) src->u[1];
1112 dst->f[2] = (float) src->u[2];
1113 dst->f[3] = (float) src->u[3];
1114 }
1115 #endif
1116
1117 static void
1118 micro_xor(
1119 union tgsi_exec_channel *dst,
1120 const union tgsi_exec_channel *src0,
1121 const union tgsi_exec_channel *src1 )
1122 {
1123 dst->u[0] = src0->u[0] ^ src1->u[0];
1124 dst->u[1] = src0->u[1] ^ src1->u[1];
1125 dst->u[2] = src0->u[2] ^ src1->u[2];
1126 dst->u[3] = src0->u[3] ^ src1->u[3];
1127 }
1128
1129 static void
1130 fetch_src_file_channel(
1131 const struct tgsi_exec_machine *mach,
1132 const uint file,
1133 const uint swizzle,
1134 const union tgsi_exec_channel *index,
1135 union tgsi_exec_channel *chan )
1136 {
1137 switch( swizzle ) {
1138 case TGSI_SWIZZLE_X:
1139 case TGSI_SWIZZLE_Y:
1140 case TGSI_SWIZZLE_Z:
1141 case TGSI_SWIZZLE_W:
1142 switch( file ) {
1143 case TGSI_FILE_CONSTANT:
1144 assert(mach->Consts);
1145 if (index->i[0] < 0)
1146 chan->f[0] = 0.0f;
1147 else
1148 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1149 if (index->i[1] < 0)
1150 chan->f[1] = 0.0f;
1151 else
1152 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1153 if (index->i[2] < 0)
1154 chan->f[2] = 0.0f;
1155 else
1156 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1157 if (index->i[3] < 0)
1158 chan->f[3] = 0.0f;
1159 else
1160 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1161 break;
1162
1163 case TGSI_FILE_INPUT:
1164 case TGSI_FILE_SYSTEM_VALUE:
1165 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1166 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1167 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1168 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1169 break;
1170
1171 case TGSI_FILE_TEMPORARY:
1172 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1173 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1174 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1175 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1176 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1177 break;
1178
1179 case TGSI_FILE_IMMEDIATE:
1180 assert( index->i[0] < (int) mach->ImmLimit );
1181 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1182 assert( index->i[1] < (int) mach->ImmLimit );
1183 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1184 assert( index->i[2] < (int) mach->ImmLimit );
1185 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1186 assert( index->i[3] < (int) mach->ImmLimit );
1187 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1188 break;
1189
1190 case TGSI_FILE_ADDRESS:
1191 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1192 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1193 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1194 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1195 break;
1196
1197 case TGSI_FILE_PREDICATE:
1198 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1199 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1200 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1201 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1202 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
1203 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
1204 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
1205 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
1206 break;
1207
1208 case TGSI_FILE_OUTPUT:
1209 /* vertex/fragment output vars can be read too */
1210 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1211 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1212 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1213 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1214 break;
1215
1216 default:
1217 assert( 0 );
1218 }
1219 break;
1220
1221 default:
1222 assert( 0 );
1223 }
1224 }
1225
1226 static void
1227 fetch_source(
1228 const struct tgsi_exec_machine *mach,
1229 union tgsi_exec_channel *chan,
1230 const struct tgsi_full_src_register *reg,
1231 const uint chan_index )
1232 {
1233 union tgsi_exec_channel index;
1234 uint swizzle;
1235
1236 /* We start with a direct index into a register file.
1237 *
1238 * file[1],
1239 * where:
1240 * file = Register.File
1241 * [1] = Register.Index
1242 */
1243 index.i[0] =
1244 index.i[1] =
1245 index.i[2] =
1246 index.i[3] = reg->Register.Index;
1247
1248 /* There is an extra source register that indirectly subscripts
1249 * a register file. The direct index now becomes an offset
1250 * that is being added to the indirect register.
1251 *
1252 * file[ind[2].x+1],
1253 * where:
1254 * ind = Indirect.File
1255 * [2] = Indirect.Index
1256 * .x = Indirect.SwizzleX
1257 */
1258 if (reg->Register.Indirect) {
1259 union tgsi_exec_channel index2;
1260 union tgsi_exec_channel indir_index;
1261 const uint execmask = mach->ExecMask;
1262 uint i;
1263
1264 /* which address register (always zero now) */
1265 index2.i[0] =
1266 index2.i[1] =
1267 index2.i[2] =
1268 index2.i[3] = reg->Indirect.Index;
1269
1270 /* get current value of address register[swizzle] */
1271 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1272 fetch_src_file_channel(
1273 mach,
1274 reg->Indirect.File,
1275 swizzle,
1276 &index2,
1277 &indir_index );
1278
1279 /* add value of address register to the offset */
1280 index.i[0] += (int) indir_index.f[0];
1281 index.i[1] += (int) indir_index.f[1];
1282 index.i[2] += (int) indir_index.f[2];
1283 index.i[3] += (int) indir_index.f[3];
1284
1285 /* for disabled execution channels, zero-out the index to
1286 * avoid using a potential garbage value.
1287 */
1288 for (i = 0; i < QUAD_SIZE; i++) {
1289 if ((execmask & (1 << i)) == 0)
1290 index.i[i] = 0;
1291 }
1292 }
1293
1294 /* There is an extra source register that is a second
1295 * subscript to a register file. Effectively it means that
1296 * the register file is actually a 2D array of registers.
1297 *
1298 * file[1][3] == file[1*sizeof(file[1])+3],
1299 * where:
1300 * [3] = Dimension.Index
1301 */
1302 if (reg->Register.Dimension) {
1303 /* The size of the first-order array depends on the register file type.
1304 * We need to multiply the index to the first array to get an effective,
1305 * "flat" index that points to the beginning of the second-order array.
1306 */
1307 switch (reg->Register.File) {
1308 case TGSI_FILE_INPUT:
1309 case TGSI_FILE_SYSTEM_VALUE:
1310 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1311 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1312 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1313 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1314 break;
1315 case TGSI_FILE_CONSTANT:
1316 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1317 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1318 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1319 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1320 break;
1321 default:
1322 assert( 0 );
1323 }
1324
1325 index.i[0] += reg->Dimension.Index;
1326 index.i[1] += reg->Dimension.Index;
1327 index.i[2] += reg->Dimension.Index;
1328 index.i[3] += reg->Dimension.Index;
1329
1330 /* Again, the second subscript index can be addressed indirectly
1331 * identically to the first one.
1332 * Nothing stops us from indirectly addressing the indirect register,
1333 * but there is no need for that, so we won't exercise it.
1334 *
1335 * file[1][ind[4].y+3],
1336 * where:
1337 * ind = DimIndirect.File
1338 * [4] = DimIndirect.Index
1339 * .y = DimIndirect.SwizzleX
1340 */
1341 if (reg->Dimension.Indirect) {
1342 union tgsi_exec_channel index2;
1343 union tgsi_exec_channel indir_index;
1344 const uint execmask = mach->ExecMask;
1345 uint i;
1346
1347 index2.i[0] =
1348 index2.i[1] =
1349 index2.i[2] =
1350 index2.i[3] = reg->DimIndirect.Index;
1351
1352 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1353 fetch_src_file_channel(
1354 mach,
1355 reg->DimIndirect.File,
1356 swizzle,
1357 &index2,
1358 &indir_index );
1359
1360 index.i[0] += (int) indir_index.f[0];
1361 index.i[1] += (int) indir_index.f[1];
1362 index.i[2] += (int) indir_index.f[2];
1363 index.i[3] += (int) indir_index.f[3];
1364
1365 /* for disabled execution channels, zero-out the index to
1366 * avoid using a potential garbage value.
1367 */
1368 for (i = 0; i < QUAD_SIZE; i++) {
1369 if ((execmask & (1 << i)) == 0)
1370 index.i[i] = 0;
1371 }
1372 }
1373
1374 /* If by any chance there was a need for a 3D array of register
1375 * files, we would have to check whether Dimension is followed
1376 * by a dimension register and continue the saga.
1377 */
1378 }
1379
1380 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1381 fetch_src_file_channel(
1382 mach,
1383 reg->Register.File,
1384 swizzle,
1385 &index,
1386 chan );
1387
1388 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1389 case TGSI_UTIL_SIGN_CLEAR:
1390 micro_abs( chan, chan );
1391 break;
1392
1393 case TGSI_UTIL_SIGN_SET:
1394 micro_abs( chan, chan );
1395 micro_neg( chan, chan );
1396 break;
1397
1398 case TGSI_UTIL_SIGN_TOGGLE:
1399 micro_neg( chan, chan );
1400 break;
1401
1402 case TGSI_UTIL_SIGN_KEEP:
1403 break;
1404 }
1405 }
1406
1407 static void
1408 store_dest(
1409 struct tgsi_exec_machine *mach,
1410 const union tgsi_exec_channel *chan,
1411 const struct tgsi_full_dst_register *reg,
1412 const struct tgsi_full_instruction *inst,
1413 uint chan_index )
1414 {
1415 uint i;
1416 union tgsi_exec_channel null;
1417 union tgsi_exec_channel *dst;
1418 uint execmask = mach->ExecMask;
1419 int offset = 0; /* indirection offset */
1420 int index;
1421
1422 #ifdef DEBUG
1423 check_inf_or_nan(chan);
1424 #endif
1425
1426 /* There is an extra source register that indirectly subscripts
1427 * a register file. The direct index now becomes an offset
1428 * that is being added to the indirect register.
1429 *
1430 * file[ind[2].x+1],
1431 * where:
1432 * ind = Indirect.File
1433 * [2] = Indirect.Index
1434 * .x = Indirect.SwizzleX
1435 */
1436 if (reg->Register.Indirect) {
1437 union tgsi_exec_channel index;
1438 union tgsi_exec_channel indir_index;
1439 uint swizzle;
1440
1441 /* which address register (always zero for now) */
1442 index.i[0] =
1443 index.i[1] =
1444 index.i[2] =
1445 index.i[3] = reg->Indirect.Index;
1446
1447 /* get current value of address register[swizzle] */
1448 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1449
1450 /* fetch values from the address/indirection register */
1451 fetch_src_file_channel(
1452 mach,
1453 reg->Indirect.File,
1454 swizzle,
1455 &index,
1456 &indir_index );
1457
1458 /* save indirection offset */
1459 offset = (int) indir_index.f[0];
1460 }
1461
1462 switch (reg->Register.File) {
1463 case TGSI_FILE_NULL:
1464 dst = &null;
1465 break;
1466
1467 case TGSI_FILE_OUTPUT:
1468 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1469 + reg->Register.Index;
1470 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1471 break;
1472
1473 case TGSI_FILE_TEMPORARY:
1474 index = reg->Register.Index;
1475 assert( index < TGSI_EXEC_NUM_TEMPS );
1476 dst = &mach->Temps[offset + index].xyzw[chan_index];
1477 break;
1478
1479 case TGSI_FILE_ADDRESS:
1480 index = reg->Register.Index;
1481 dst = &mach->Addrs[index].xyzw[chan_index];
1482 break;
1483
1484 case TGSI_FILE_LOOP:
1485 assert(reg->Register.Index == 0);
1486 assert(mach->LoopCounterStackTop > 0);
1487 assert(chan_index == CHAN_X);
1488 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1489 break;
1490
1491 case TGSI_FILE_PREDICATE:
1492 index = reg->Register.Index;
1493 assert(index < TGSI_EXEC_NUM_PREDS);
1494 dst = &mach->Predicates[index].xyzw[chan_index];
1495 break;
1496
1497 default:
1498 assert( 0 );
1499 return;
1500 }
1501
1502 if (inst->Instruction.Predicate) {
1503 uint swizzle;
1504 union tgsi_exec_channel *pred;
1505
1506 switch (chan_index) {
1507 case CHAN_X:
1508 swizzle = inst->Predicate.SwizzleX;
1509 break;
1510 case CHAN_Y:
1511 swizzle = inst->Predicate.SwizzleY;
1512 break;
1513 case CHAN_Z:
1514 swizzle = inst->Predicate.SwizzleZ;
1515 break;
1516 case CHAN_W:
1517 swizzle = inst->Predicate.SwizzleW;
1518 break;
1519 default:
1520 assert(0);
1521 return;
1522 }
1523
1524 assert(inst->Predicate.Index == 0);
1525
1526 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1527
1528 if (inst->Predicate.Negate) {
1529 for (i = 0; i < QUAD_SIZE; i++) {
1530 if (pred->u[i]) {
1531 execmask &= ~(1 << i);
1532 }
1533 }
1534 } else {
1535 for (i = 0; i < QUAD_SIZE; i++) {
1536 if (!pred->u[i]) {
1537 execmask &= ~(1 << i);
1538 }
1539 }
1540 }
1541 }
1542
1543 switch (inst->Instruction.Saturate) {
1544 case TGSI_SAT_NONE:
1545 for (i = 0; i < QUAD_SIZE; i++)
1546 if (execmask & (1 << i))
1547 dst->i[i] = chan->i[i];
1548 break;
1549
1550 case TGSI_SAT_ZERO_ONE:
1551 for (i = 0; i < QUAD_SIZE; i++)
1552 if (execmask & (1 << i)) {
1553 if (chan->f[i] < 0.0f)
1554 dst->f[i] = 0.0f;
1555 else if (chan->f[i] > 1.0f)
1556 dst->f[i] = 1.0f;
1557 else
1558 dst->i[i] = chan->i[i];
1559 }
1560 break;
1561
1562 case TGSI_SAT_MINUS_PLUS_ONE:
1563 for (i = 0; i < QUAD_SIZE; i++)
1564 if (execmask & (1 << i)) {
1565 if (chan->f[i] < -1.0f)
1566 dst->f[i] = -1.0f;
1567 else if (chan->f[i] > 1.0f)
1568 dst->f[i] = 1.0f;
1569 else
1570 dst->i[i] = chan->i[i];
1571 }
1572 break;
1573
1574 default:
1575 assert( 0 );
1576 }
1577 }
1578
1579 #define FETCH(VAL,INDEX,CHAN)\
1580 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
1581
1582 #define STORE(VAL,INDEX,CHAN)\
1583 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
1584
1585
1586 /**
1587 * Execute ARB-style KIL which is predicated by a src register.
1588 * Kill fragment if any of the four values is less than zero.
1589 */
1590 static void
1591 exec_kil(struct tgsi_exec_machine *mach,
1592 const struct tgsi_full_instruction *inst)
1593 {
1594 uint uniquemask;
1595 uint chan_index;
1596 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1597 union tgsi_exec_channel r[1];
1598
1599 /* This mask stores component bits that were already tested. */
1600 uniquemask = 0;
1601
1602 for (chan_index = 0; chan_index < 4; chan_index++)
1603 {
1604 uint swizzle;
1605 uint i;
1606
1607 /* unswizzle channel */
1608 swizzle = tgsi_util_get_full_src_register_swizzle (
1609 &inst->Src[0],
1610 chan_index);
1611
1612 /* check if the component has not been already tested */
1613 if (uniquemask & (1 << swizzle))
1614 continue;
1615 uniquemask |= 1 << swizzle;
1616
1617 FETCH(&r[0], 0, chan_index);
1618 for (i = 0; i < 4; i++)
1619 if (r[0].f[i] < 0.0f)
1620 kilmask |= 1 << i;
1621 }
1622
1623 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1624 }
1625
1626 /**
1627 * Execute NVIDIA-style KIL which is predicated by a condition code.
1628 * Kill fragment if the condition code is TRUE.
1629 */
1630 static void
1631 exec_kilp(struct tgsi_exec_machine *mach,
1632 const struct tgsi_full_instruction *inst)
1633 {
1634 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1635
1636 /* "unconditional" kil */
1637 kilmask = mach->ExecMask;
1638 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1639 }
1640
1641
1642 /*
1643 * Fetch a four texture samples using STR texture coordinates.
1644 */
1645 static void
1646 fetch_texel( struct tgsi_sampler *sampler,
1647 const union tgsi_exec_channel *s,
1648 const union tgsi_exec_channel *t,
1649 const union tgsi_exec_channel *p,
1650 float lodbias, /* XXX should be float[4] */
1651 union tgsi_exec_channel *r,
1652 union tgsi_exec_channel *g,
1653 union tgsi_exec_channel *b,
1654 union tgsi_exec_channel *a )
1655 {
1656 uint j;
1657 float rgba[NUM_CHANNELS][QUAD_SIZE];
1658
1659 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1660
1661 for (j = 0; j < 4; j++) {
1662 r->f[j] = rgba[0][j];
1663 g->f[j] = rgba[1][j];
1664 b->f[j] = rgba[2][j];
1665 a->f[j] = rgba[3][j];
1666 }
1667 }
1668
1669
1670 static void
1671 exec_tex(struct tgsi_exec_machine *mach,
1672 const struct tgsi_full_instruction *inst,
1673 boolean biasLod,
1674 boolean projected)
1675 {
1676 const uint unit = inst->Src[1].Register.Index;
1677 union tgsi_exec_channel r[4];
1678 uint chan_index;
1679 float lodBias;
1680
1681 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1682
1683 switch (inst->Texture.Texture) {
1684 case TGSI_TEXTURE_1D:
1685 case TGSI_TEXTURE_SHADOW1D:
1686
1687 FETCH(&r[0], 0, CHAN_X);
1688
1689 if (projected) {
1690 FETCH(&r[1], 0, CHAN_W);
1691 micro_div( &r[0], &r[0], &r[1] );
1692 }
1693
1694 if (biasLod) {
1695 FETCH(&r[1], 0, CHAN_W);
1696 lodBias = r[2].f[0];
1697 }
1698 else
1699 lodBias = 0.0;
1700
1701 fetch_texel(mach->Samplers[unit],
1702 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1703 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1704 break;
1705
1706 case TGSI_TEXTURE_2D:
1707 case TGSI_TEXTURE_RECT:
1708 case TGSI_TEXTURE_SHADOW2D:
1709 case TGSI_TEXTURE_SHADOWRECT:
1710
1711 FETCH(&r[0], 0, CHAN_X);
1712 FETCH(&r[1], 0, CHAN_Y);
1713 FETCH(&r[2], 0, CHAN_Z);
1714
1715 if (projected) {
1716 FETCH(&r[3], 0, CHAN_W);
1717 micro_div( &r[0], &r[0], &r[3] );
1718 micro_div( &r[1], &r[1], &r[3] );
1719 micro_div( &r[2], &r[2], &r[3] );
1720 }
1721
1722 if (biasLod) {
1723 FETCH(&r[3], 0, CHAN_W);
1724 lodBias = r[3].f[0];
1725 }
1726 else
1727 lodBias = 0.0;
1728
1729 fetch_texel(mach->Samplers[unit],
1730 &r[0], &r[1], &r[2], lodBias, /* inputs */
1731 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1732 break;
1733
1734 case TGSI_TEXTURE_3D:
1735 case TGSI_TEXTURE_CUBE:
1736
1737 FETCH(&r[0], 0, CHAN_X);
1738 FETCH(&r[1], 0, CHAN_Y);
1739 FETCH(&r[2], 0, CHAN_Z);
1740
1741 if (projected) {
1742 FETCH(&r[3], 0, CHAN_W);
1743 micro_div( &r[0], &r[0], &r[3] );
1744 micro_div( &r[1], &r[1], &r[3] );
1745 micro_div( &r[2], &r[2], &r[3] );
1746 }
1747
1748 if (biasLod) {
1749 FETCH(&r[3], 0, CHAN_W);
1750 lodBias = r[3].f[0];
1751 }
1752 else
1753 lodBias = 0.0;
1754
1755 fetch_texel(mach->Samplers[unit],
1756 &r[0], &r[1], &r[2], lodBias,
1757 &r[0], &r[1], &r[2], &r[3]);
1758 break;
1759
1760 default:
1761 assert (0);
1762 }
1763
1764 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1765 STORE( &r[chan_index], 0, chan_index );
1766 }
1767 }
1768
1769 static void
1770 exec_txd(struct tgsi_exec_machine *mach,
1771 const struct tgsi_full_instruction *inst)
1772 {
1773 const uint unit = inst->Src[3].Register.Index;
1774 union tgsi_exec_channel r[4];
1775 uint chan_index;
1776
1777 /*
1778 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1779 */
1780
1781 switch (inst->Texture.Texture) {
1782 case TGSI_TEXTURE_1D:
1783 case TGSI_TEXTURE_SHADOW1D:
1784
1785 FETCH(&r[0], 0, CHAN_X);
1786
1787 fetch_texel(mach->Samplers[unit],
1788 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1789 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1790 break;
1791
1792 case TGSI_TEXTURE_2D:
1793 case TGSI_TEXTURE_RECT:
1794 case TGSI_TEXTURE_SHADOW2D:
1795 case TGSI_TEXTURE_SHADOWRECT:
1796
1797 FETCH(&r[0], 0, CHAN_X);
1798 FETCH(&r[1], 0, CHAN_Y);
1799 FETCH(&r[2], 0, CHAN_Z);
1800
1801 fetch_texel(mach->Samplers[unit],
1802 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1803 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1804 break;
1805
1806 case TGSI_TEXTURE_3D:
1807 case TGSI_TEXTURE_CUBE:
1808
1809 FETCH(&r[0], 0, CHAN_X);
1810 FETCH(&r[1], 0, CHAN_Y);
1811 FETCH(&r[2], 0, CHAN_Z);
1812
1813 fetch_texel(mach->Samplers[unit],
1814 &r[0], &r[1], &r[2], 0.0f,
1815 &r[0], &r[1], &r[2], &r[3]);
1816 break;
1817
1818 default:
1819 assert(0);
1820 }
1821
1822 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1823 STORE(&r[chan_index], 0, chan_index);
1824 }
1825 }
1826
1827
1828 /**
1829 * Evaluate a constant-valued coefficient at the position of the
1830 * current quad.
1831 */
1832 static void
1833 eval_constant_coef(
1834 struct tgsi_exec_machine *mach,
1835 unsigned attrib,
1836 unsigned chan )
1837 {
1838 unsigned i;
1839
1840 for( i = 0; i < QUAD_SIZE; i++ ) {
1841 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1842 }
1843 }
1844
1845 /**
1846 * Evaluate a linear-valued coefficient at the position of the
1847 * current quad.
1848 */
1849 static void
1850 eval_linear_coef(
1851 struct tgsi_exec_machine *mach,
1852 unsigned attrib,
1853 unsigned chan )
1854 {
1855 const float x = mach->QuadPos.xyzw[0].f[0];
1856 const float y = mach->QuadPos.xyzw[1].f[0];
1857 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1858 const float dady = mach->InterpCoefs[attrib].dady[chan];
1859 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1860 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1861 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1862 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1863 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1864 }
1865
1866 /**
1867 * Evaluate a perspective-valued coefficient at the position of the
1868 * current quad.
1869 */
1870 static void
1871 eval_perspective_coef(
1872 struct tgsi_exec_machine *mach,
1873 unsigned attrib,
1874 unsigned chan )
1875 {
1876 const float x = mach->QuadPos.xyzw[0].f[0];
1877 const float y = mach->QuadPos.xyzw[1].f[0];
1878 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1879 const float dady = mach->InterpCoefs[attrib].dady[chan];
1880 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1881 const float *w = mach->QuadPos.xyzw[3].f;
1882 /* divide by W here */
1883 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1884 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1885 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1886 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1887 }
1888
1889
1890 typedef void (* eval_coef_func)(
1891 struct tgsi_exec_machine *mach,
1892 unsigned attrib,
1893 unsigned chan );
1894
1895 static void
1896 exec_declaration(struct tgsi_exec_machine *mach,
1897 const struct tgsi_full_declaration *decl)
1898 {
1899 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1900 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1901 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1902 uint first, last, mask;
1903
1904 first = decl->Range.First;
1905 last = decl->Range.Last;
1906 mask = decl->Declaration.UsageMask;
1907
1908 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1909 assert(decl->Semantic.Index == 0);
1910 assert(first == last);
1911 assert(mask = TGSI_WRITEMASK_XYZW);
1912
1913 mach->Inputs[first] = mach->QuadPos;
1914 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1915 uint i;
1916
1917 assert(decl->Semantic.Index == 0);
1918 assert(first == last);
1919
1920 for (i = 0; i < QUAD_SIZE; i++) {
1921 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1922 }
1923 } else {
1924 eval_coef_func eval;
1925 uint i, j;
1926
1927 switch (decl->Declaration.Interpolate) {
1928 case TGSI_INTERPOLATE_CONSTANT:
1929 eval = eval_constant_coef;
1930 break;
1931
1932 case TGSI_INTERPOLATE_LINEAR:
1933 eval = eval_linear_coef;
1934 break;
1935
1936 case TGSI_INTERPOLATE_PERSPECTIVE:
1937 eval = eval_perspective_coef;
1938 break;
1939
1940 default:
1941 assert(0);
1942 return;
1943 }
1944
1945 for (j = 0; j < NUM_CHANNELS; j++) {
1946 if (mask & (1 << j)) {
1947 for (i = first; i <= last; i++) {
1948 eval(mach, i, j);
1949 }
1950 }
1951 }
1952 }
1953 }
1954 }
1955 }
1956
1957 static void
1958 exec_instruction(
1959 struct tgsi_exec_machine *mach,
1960 const struct tgsi_full_instruction *inst,
1961 int *pc )
1962 {
1963 uint chan_index;
1964 union tgsi_exec_channel r[10];
1965 union tgsi_exec_channel d[8];
1966
1967 (*pc)++;
1968
1969 switch (inst->Instruction.Opcode) {
1970 case TGSI_OPCODE_ARL:
1971 case TGSI_OPCODE_FLR:
1972 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1973 FETCH( &r[0], 0, chan_index );
1974 micro_flr(&d[chan_index], &r[0]);
1975 }
1976 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1977 STORE(&d[chan_index], 0, chan_index);
1978 }
1979 break;
1980
1981 case TGSI_OPCODE_MOV:
1982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1983 FETCH(&d[chan_index], 0, chan_index);
1984 }
1985 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1986 STORE(&d[chan_index], 0, chan_index);
1987 }
1988 break;
1989
1990 case TGSI_OPCODE_LIT:
1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1992 FETCH( &r[0], 0, CHAN_X );
1993 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1994 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1995 }
1996
1997 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1998 FETCH( &r[1], 0, CHAN_Y );
1999 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2000
2001 FETCH( &r[2], 0, CHAN_W );
2002 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2003 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2004 micro_pow( &r[1], &r[1], &r[2] );
2005 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2006 }
2007
2008 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2009 STORE(&d[CHAN_Y], 0, CHAN_Y);
2010 }
2011 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2012 STORE(&d[CHAN_Z], 0, CHAN_Z);
2013 }
2014 }
2015 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2016 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2017 }
2018 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2019 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2020 }
2021 break;
2022
2023 case TGSI_OPCODE_RCP:
2024 /* TGSI_OPCODE_RECIP */
2025 FETCH( &r[0], 0, CHAN_X );
2026 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2027 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2028 STORE( &r[0], 0, chan_index );
2029 }
2030 break;
2031
2032 case TGSI_OPCODE_RSQ:
2033 /* TGSI_OPCODE_RECIPSQRT */
2034 FETCH( &r[0], 0, CHAN_X );
2035 micro_abs( &r[0], &r[0] );
2036 micro_sqrt( &r[0], &r[0] );
2037 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2038 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2039 STORE( &r[0], 0, chan_index );
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_EXP:
2044 FETCH( &r[0], 0, CHAN_X );
2045 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2046 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2047 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2048 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2049 }
2050 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2051 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2052 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2053 }
2054 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2055 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2056 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2057 }
2058 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2059 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2060 }
2061 break;
2062
2063 case TGSI_OPCODE_LOG:
2064 FETCH( &r[0], 0, CHAN_X );
2065 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2066 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2067 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2068 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2069 STORE( &r[0], 0, CHAN_X );
2070 }
2071 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2072 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2073 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2074 STORE( &r[0], 0, CHAN_Y );
2075 }
2076 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2077 STORE( &r[1], 0, CHAN_Z );
2078 }
2079 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2080 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2081 }
2082 break;
2083
2084 case TGSI_OPCODE_MUL:
2085 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2086 FETCH(&r[0], 0, chan_index);
2087 FETCH(&r[1], 1, chan_index);
2088 micro_mul(&d[chan_index], &r[0], &r[1]);
2089 }
2090 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2091 STORE(&d[chan_index], 0, chan_index);
2092 }
2093 break;
2094
2095 case TGSI_OPCODE_ADD:
2096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2097 FETCH( &r[0], 0, chan_index );
2098 FETCH( &r[1], 1, chan_index );
2099 micro_add(&d[chan_index], &r[0], &r[1]);
2100 }
2101 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2102 STORE(&d[chan_index], 0, chan_index);
2103 }
2104 break;
2105
2106 case TGSI_OPCODE_DP3:
2107 /* TGSI_OPCODE_DOT3 */
2108 FETCH( &r[0], 0, CHAN_X );
2109 FETCH( &r[1], 1, CHAN_X );
2110 micro_mul( &r[0], &r[0], &r[1] );
2111
2112 FETCH( &r[1], 0, CHAN_Y );
2113 FETCH( &r[2], 1, CHAN_Y );
2114 micro_mul( &r[1], &r[1], &r[2] );
2115 micro_add( &r[0], &r[0], &r[1] );
2116
2117 FETCH( &r[1], 0, CHAN_Z );
2118 FETCH( &r[2], 1, CHAN_Z );
2119 micro_mul( &r[1], &r[1], &r[2] );
2120 micro_add( &r[0], &r[0], &r[1] );
2121
2122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2123 STORE( &r[0], 0, chan_index );
2124 }
2125 break;
2126
2127 case TGSI_OPCODE_DP4:
2128 /* TGSI_OPCODE_DOT4 */
2129 FETCH(&r[0], 0, CHAN_X);
2130 FETCH(&r[1], 1, CHAN_X);
2131
2132 micro_mul( &r[0], &r[0], &r[1] );
2133
2134 FETCH(&r[1], 0, CHAN_Y);
2135 FETCH(&r[2], 1, CHAN_Y);
2136
2137 micro_mul( &r[1], &r[1], &r[2] );
2138 micro_add( &r[0], &r[0], &r[1] );
2139
2140 FETCH(&r[1], 0, CHAN_Z);
2141 FETCH(&r[2], 1, CHAN_Z);
2142
2143 micro_mul( &r[1], &r[1], &r[2] );
2144 micro_add( &r[0], &r[0], &r[1] );
2145
2146 FETCH(&r[1], 0, CHAN_W);
2147 FETCH(&r[2], 1, CHAN_W);
2148
2149 micro_mul( &r[1], &r[1], &r[2] );
2150 micro_add( &r[0], &r[0], &r[1] );
2151
2152 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2153 STORE( &r[0], 0, chan_index );
2154 }
2155 break;
2156
2157 case TGSI_OPCODE_DST:
2158 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2159 FETCH( &r[0], 0, CHAN_Y );
2160 FETCH( &r[1], 1, CHAN_Y);
2161 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2162 }
2163 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2164 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2165 }
2166 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2167 FETCH(&d[CHAN_W], 1, CHAN_W);
2168 }
2169
2170 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2171 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2172 }
2173 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2174 STORE(&d[CHAN_Y], 0, CHAN_Y);
2175 }
2176 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2177 STORE(&d[CHAN_Z], 0, CHAN_Z);
2178 }
2179 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2180 STORE(&d[CHAN_W], 0, CHAN_W);
2181 }
2182 break;
2183
2184 case TGSI_OPCODE_MIN:
2185 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2186 FETCH(&r[0], 0, chan_index);
2187 FETCH(&r[1], 1, chan_index);
2188
2189 /* XXX use micro_min()?? */
2190 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2191 }
2192 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2193 STORE(&d[chan_index], 0, chan_index);
2194 }
2195 break;
2196
2197 case TGSI_OPCODE_MAX:
2198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2199 FETCH(&r[0], 0, chan_index);
2200 FETCH(&r[1], 1, chan_index);
2201
2202 /* XXX use micro_max()?? */
2203 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2204 }
2205 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2206 STORE(&d[chan_index], 0, chan_index);
2207 }
2208 break;
2209
2210 case TGSI_OPCODE_SLT:
2211 /* TGSI_OPCODE_SETLT */
2212 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2213 FETCH( &r[0], 0, chan_index );
2214 FETCH( &r[1], 1, chan_index );
2215 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2216 }
2217 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2218 STORE(&d[chan_index], 0, chan_index);
2219 }
2220 break;
2221
2222 case TGSI_OPCODE_SGE:
2223 /* TGSI_OPCODE_SETGE */
2224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2225 FETCH( &r[0], 0, chan_index );
2226 FETCH( &r[1], 1, chan_index );
2227 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2228 }
2229 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2230 STORE(&d[chan_index], 0, chan_index);
2231 }
2232 break;
2233
2234 case TGSI_OPCODE_MAD:
2235 /* TGSI_OPCODE_MADD */
2236 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2237 FETCH( &r[0], 0, chan_index );
2238 FETCH( &r[1], 1, chan_index );
2239 micro_mul( &r[0], &r[0], &r[1] );
2240 FETCH( &r[1], 2, chan_index );
2241 micro_add(&d[chan_index], &r[0], &r[1]);
2242 }
2243 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2244 STORE(&d[chan_index], 0, chan_index);
2245 }
2246 break;
2247
2248 case TGSI_OPCODE_SUB:
2249 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2250 FETCH(&r[0], 0, chan_index);
2251 FETCH(&r[1], 1, chan_index);
2252 micro_sub(&d[chan_index], &r[0], &r[1]);
2253 }
2254 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2255 STORE(&d[chan_index], 0, chan_index);
2256 }
2257 break;
2258
2259 case TGSI_OPCODE_LRP:
2260 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2261 FETCH(&r[0], 0, chan_index);
2262 FETCH(&r[1], 1, chan_index);
2263 FETCH(&r[2], 2, chan_index);
2264 micro_sub( &r[1], &r[1], &r[2] );
2265 micro_mul( &r[0], &r[0], &r[1] );
2266 micro_add(&d[chan_index], &r[0], &r[2]);
2267 }
2268 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2269 STORE(&d[chan_index], 0, chan_index);
2270 }
2271 break;
2272
2273 case TGSI_OPCODE_CND:
2274 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2275 FETCH(&r[0], 0, chan_index);
2276 FETCH(&r[1], 1, chan_index);
2277 FETCH(&r[2], 2, chan_index);
2278 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2279 }
2280 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2281 STORE(&d[chan_index], 0, chan_index);
2282 }
2283 break;
2284
2285 case TGSI_OPCODE_DP2A:
2286 FETCH( &r[0], 0, CHAN_X );
2287 FETCH( &r[1], 1, CHAN_X );
2288 micro_mul( &r[0], &r[0], &r[1] );
2289
2290 FETCH( &r[1], 0, CHAN_Y );
2291 FETCH( &r[2], 1, CHAN_Y );
2292 micro_mul( &r[1], &r[1], &r[2] );
2293 micro_add( &r[0], &r[0], &r[1] );
2294
2295 FETCH( &r[2], 2, CHAN_X );
2296 micro_add( &r[0], &r[0], &r[2] );
2297
2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2299 STORE( &r[0], 0, chan_index );
2300 }
2301 break;
2302
2303 case TGSI_OPCODE_FRC:
2304 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2305 FETCH( &r[0], 0, chan_index );
2306 micro_frc(&d[chan_index], &r[0]);
2307 }
2308 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2309 STORE(&d[chan_index], 0, chan_index);
2310 }
2311 break;
2312
2313 case TGSI_OPCODE_CLAMP:
2314 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2315 FETCH(&r[0], 0, chan_index);
2316 FETCH(&r[1], 1, chan_index);
2317 micro_max(&r[0], &r[0], &r[1]);
2318 FETCH(&r[1], 2, chan_index);
2319 micro_min(&d[chan_index], &r[0], &r[1]);
2320 }
2321 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2322 STORE(&d[chan_index], 0, chan_index);
2323 }
2324 break;
2325
2326 case TGSI_OPCODE_ROUND:
2327 case TGSI_OPCODE_ARR:
2328 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2329 FETCH( &r[0], 0, chan_index );
2330 micro_rnd(&d[chan_index], &r[0]);
2331 }
2332 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2333 STORE(&d[chan_index], 0, chan_index);
2334 }
2335 break;
2336
2337 case TGSI_OPCODE_EX2:
2338 FETCH(&r[0], 0, CHAN_X);
2339
2340 micro_exp2( &r[0], &r[0] );
2341
2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2343 STORE( &r[0], 0, chan_index );
2344 }
2345 break;
2346
2347 case TGSI_OPCODE_LG2:
2348 FETCH( &r[0], 0, CHAN_X );
2349 micro_lg2( &r[0], &r[0] );
2350 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2351 STORE( &r[0], 0, chan_index );
2352 }
2353 break;
2354
2355 case TGSI_OPCODE_POW:
2356 FETCH(&r[0], 0, CHAN_X);
2357 FETCH(&r[1], 1, CHAN_X);
2358
2359 micro_pow( &r[0], &r[0], &r[1] );
2360
2361 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2362 STORE( &r[0], 0, chan_index );
2363 }
2364 break;
2365
2366 case TGSI_OPCODE_XPD:
2367 FETCH(&r[0], 0, CHAN_Y);
2368 FETCH(&r[1], 1, CHAN_Z);
2369
2370 micro_mul( &r[2], &r[0], &r[1] );
2371
2372 FETCH(&r[3], 0, CHAN_Z);
2373 FETCH(&r[4], 1, CHAN_Y);
2374
2375 micro_mul( &r[5], &r[3], &r[4] );
2376 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2377
2378 FETCH(&r[2], 1, CHAN_X);
2379
2380 micro_mul( &r[3], &r[3], &r[2] );
2381
2382 FETCH(&r[5], 0, CHAN_X);
2383
2384 micro_mul( &r[1], &r[1], &r[5] );
2385 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2386
2387 micro_mul( &r[5], &r[5], &r[4] );
2388 micro_mul( &r[0], &r[0], &r[2] );
2389 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2390
2391 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2392 STORE(&d[CHAN_X], 0, CHAN_X);
2393 }
2394 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2395 STORE(&d[CHAN_Y], 0, CHAN_Y);
2396 }
2397 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2398 STORE(&d[CHAN_Z], 0, CHAN_Z);
2399 }
2400 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2401 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2402 }
2403 break;
2404
2405 case TGSI_OPCODE_ABS:
2406 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2407 FETCH(&r[0], 0, chan_index);
2408 micro_abs(&d[chan_index], &r[0]);
2409 }
2410 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2411 STORE(&d[chan_index], 0, chan_index);
2412 }
2413 break;
2414
2415 case TGSI_OPCODE_RCC:
2416 FETCH(&r[0], 0, CHAN_X);
2417 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2418 micro_float_clamp(&r[0], &r[0]);
2419 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2420 STORE(&r[0], 0, chan_index);
2421 }
2422 break;
2423
2424 case TGSI_OPCODE_DPH:
2425 FETCH(&r[0], 0, CHAN_X);
2426 FETCH(&r[1], 1, CHAN_X);
2427
2428 micro_mul( &r[0], &r[0], &r[1] );
2429
2430 FETCH(&r[1], 0, CHAN_Y);
2431 FETCH(&r[2], 1, CHAN_Y);
2432
2433 micro_mul( &r[1], &r[1], &r[2] );
2434 micro_add( &r[0], &r[0], &r[1] );
2435
2436 FETCH(&r[1], 0, CHAN_Z);
2437 FETCH(&r[2], 1, CHAN_Z);
2438
2439 micro_mul( &r[1], &r[1], &r[2] );
2440 micro_add( &r[0], &r[0], &r[1] );
2441
2442 FETCH(&r[1], 1, CHAN_W);
2443
2444 micro_add( &r[0], &r[0], &r[1] );
2445
2446 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2447 STORE( &r[0], 0, chan_index );
2448 }
2449 break;
2450
2451 case TGSI_OPCODE_COS:
2452 FETCH(&r[0], 0, CHAN_X);
2453
2454 micro_cos( &r[0], &r[0] );
2455
2456 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2457 STORE( &r[0], 0, chan_index );
2458 }
2459 break;
2460
2461 case TGSI_OPCODE_DDX:
2462 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2463 FETCH( &r[0], 0, chan_index );
2464 micro_ddx(&d[chan_index], &r[0]);
2465 }
2466 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2467 STORE(&d[chan_index], 0, chan_index);
2468 }
2469 break;
2470
2471 case TGSI_OPCODE_DDY:
2472 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2473 FETCH( &r[0], 0, chan_index );
2474 micro_ddy(&d[chan_index], &r[0]);
2475 }
2476 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2477 STORE(&d[chan_index], 0, chan_index);
2478 }
2479 break;
2480
2481 case TGSI_OPCODE_KILP:
2482 exec_kilp (mach, inst);
2483 break;
2484
2485 case TGSI_OPCODE_KIL:
2486 exec_kil (mach, inst);
2487 break;
2488
2489 case TGSI_OPCODE_PK2H:
2490 assert (0);
2491 break;
2492
2493 case TGSI_OPCODE_PK2US:
2494 assert (0);
2495 break;
2496
2497 case TGSI_OPCODE_PK4B:
2498 assert (0);
2499 break;
2500
2501 case TGSI_OPCODE_PK4UB:
2502 assert (0);
2503 break;
2504
2505 case TGSI_OPCODE_RFL:
2506 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2507 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2508 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2509 /* r0 = dp3(src0, src0) */
2510 FETCH(&r[2], 0, CHAN_X);
2511 micro_mul(&r[0], &r[2], &r[2]);
2512 FETCH(&r[4], 0, CHAN_Y);
2513 micro_mul(&r[8], &r[4], &r[4]);
2514 micro_add(&r[0], &r[0], &r[8]);
2515 FETCH(&r[6], 0, CHAN_Z);
2516 micro_mul(&r[8], &r[6], &r[6]);
2517 micro_add(&r[0], &r[0], &r[8]);
2518
2519 /* r1 = dp3(src0, src1) */
2520 FETCH(&r[3], 1, CHAN_X);
2521 micro_mul(&r[1], &r[2], &r[3]);
2522 FETCH(&r[5], 1, CHAN_Y);
2523 micro_mul(&r[8], &r[4], &r[5]);
2524 micro_add(&r[1], &r[1], &r[8]);
2525 FETCH(&r[7], 1, CHAN_Z);
2526 micro_mul(&r[8], &r[6], &r[7]);
2527 micro_add(&r[1], &r[1], &r[8]);
2528
2529 /* r1 = 2 * r1 / r0 */
2530 micro_add(&r[1], &r[1], &r[1]);
2531 micro_div(&r[1], &r[1], &r[0]);
2532
2533 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2534 micro_mul(&r[2], &r[2], &r[1]);
2535 micro_sub(&r[2], &r[2], &r[3]);
2536 STORE(&r[2], 0, CHAN_X);
2537 }
2538 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2539 micro_mul(&r[4], &r[4], &r[1]);
2540 micro_sub(&r[4], &r[4], &r[5]);
2541 STORE(&r[4], 0, CHAN_Y);
2542 }
2543 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2544 micro_mul(&r[6], &r[6], &r[1]);
2545 micro_sub(&r[6], &r[6], &r[7]);
2546 STORE(&r[6], 0, CHAN_Z);
2547 }
2548 }
2549 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2550 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2551 }
2552 break;
2553
2554 case TGSI_OPCODE_SEQ:
2555 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2556 FETCH( &r[0], 0, chan_index );
2557 FETCH( &r[1], 1, chan_index );
2558 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2559 }
2560 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2561 STORE(&d[chan_index], 0, chan_index);
2562 }
2563 break;
2564
2565 case TGSI_OPCODE_SFL:
2566 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2567 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2568 }
2569 break;
2570
2571 case TGSI_OPCODE_SGT:
2572 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2573 FETCH( &r[0], 0, chan_index );
2574 FETCH( &r[1], 1, chan_index );
2575 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2576 }
2577 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2578 STORE(&d[chan_index], 0, chan_index);
2579 }
2580 break;
2581
2582 case TGSI_OPCODE_SIN:
2583 FETCH( &r[0], 0, CHAN_X );
2584 micro_sin( &r[0], &r[0] );
2585 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2586 STORE( &r[0], 0, chan_index );
2587 }
2588 break;
2589
2590 case TGSI_OPCODE_SLE:
2591 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2592 FETCH( &r[0], 0, chan_index );
2593 FETCH( &r[1], 1, chan_index );
2594 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2595 }
2596 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2597 STORE(&d[chan_index], 0, chan_index);
2598 }
2599 break;
2600
2601 case TGSI_OPCODE_SNE:
2602 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2603 FETCH( &r[0], 0, chan_index );
2604 FETCH( &r[1], 1, chan_index );
2605 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2606 }
2607 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2608 STORE(&d[chan_index], 0, chan_index);
2609 }
2610 break;
2611
2612 case TGSI_OPCODE_STR:
2613 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2614 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2615 }
2616 break;
2617
2618 case TGSI_OPCODE_TEX:
2619 /* simple texture lookup */
2620 /* src[0] = texcoord */
2621 /* src[1] = sampler unit */
2622 exec_tex(mach, inst, FALSE, FALSE);
2623 break;
2624
2625 case TGSI_OPCODE_TXB:
2626 /* Texture lookup with lod bias */
2627 /* src[0] = texcoord (src[0].w = LOD bias) */
2628 /* src[1] = sampler unit */
2629 exec_tex(mach, inst, TRUE, FALSE);
2630 break;
2631
2632 case TGSI_OPCODE_TXD:
2633 /* Texture lookup with explict partial derivatives */
2634 /* src[0] = texcoord */
2635 /* src[1] = d[strq]/dx */
2636 /* src[2] = d[strq]/dy */
2637 /* src[3] = sampler unit */
2638 exec_txd(mach, inst);
2639 break;
2640
2641 case TGSI_OPCODE_TXL:
2642 /* Texture lookup with explit LOD */
2643 /* src[0] = texcoord (src[0].w = LOD) */
2644 /* src[1] = sampler unit */
2645 exec_tex(mach, inst, TRUE, FALSE);
2646 break;
2647
2648 case TGSI_OPCODE_TXP:
2649 /* Texture lookup with projection */
2650 /* src[0] = texcoord (src[0].w = projection) */
2651 /* src[1] = sampler unit */
2652 exec_tex(mach, inst, FALSE, TRUE);
2653 break;
2654
2655 case TGSI_OPCODE_UP2H:
2656 assert (0);
2657 break;
2658
2659 case TGSI_OPCODE_UP2US:
2660 assert (0);
2661 break;
2662
2663 case TGSI_OPCODE_UP4B:
2664 assert (0);
2665 break;
2666
2667 case TGSI_OPCODE_UP4UB:
2668 assert (0);
2669 break;
2670
2671 case TGSI_OPCODE_X2D:
2672 FETCH(&r[0], 1, CHAN_X);
2673 FETCH(&r[1], 1, CHAN_Y);
2674 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2675 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2676 FETCH(&r[2], 2, CHAN_X);
2677 micro_mul(&r[2], &r[2], &r[0]);
2678 FETCH(&r[3], 2, CHAN_Y);
2679 micro_mul(&r[3], &r[3], &r[1]);
2680 micro_add(&r[2], &r[2], &r[3]);
2681 FETCH(&r[3], 0, CHAN_X);
2682 micro_add(&d[CHAN_X], &r[2], &r[3]);
2683
2684 }
2685 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2686 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2687 FETCH(&r[2], 2, CHAN_Z);
2688 micro_mul(&r[2], &r[2], &r[0]);
2689 FETCH(&r[3], 2, CHAN_W);
2690 micro_mul(&r[3], &r[3], &r[1]);
2691 micro_add(&r[2], &r[2], &r[3]);
2692 FETCH(&r[3], 0, CHAN_Y);
2693 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2694
2695 }
2696 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2697 STORE(&d[CHAN_X], 0, CHAN_X);
2698 }
2699 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2700 STORE(&d[CHAN_Y], 0, CHAN_Y);
2701 }
2702 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2703 STORE(&d[CHAN_X], 0, CHAN_Z);
2704 }
2705 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2706 STORE(&d[CHAN_Y], 0, CHAN_W);
2707 }
2708 break;
2709
2710 case TGSI_OPCODE_ARA:
2711 assert (0);
2712 break;
2713
2714 case TGSI_OPCODE_BRA:
2715 assert (0);
2716 break;
2717
2718 case TGSI_OPCODE_CAL:
2719 /* skip the call if no execution channels are enabled */
2720 if (mach->ExecMask) {
2721 /* do the call */
2722
2723 /* First, record the depths of the execution stacks.
2724 * This is important for deeply nested/looped return statements.
2725 * We have to unwind the stacks by the correct amount. For a
2726 * real code generator, we could determine the number of entries
2727 * to pop off each stack with simple static analysis and avoid
2728 * implementing this data structure at run time.
2729 */
2730 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2731 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2732 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2733 /* note that PC was already incremented above */
2734 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2735
2736 mach->CallStackTop++;
2737
2738 /* Second, push the Cond, Loop, Cont, Func stacks */
2739 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2740 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2741 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2742 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2743 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2744 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2745 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2746 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2747
2748 /* Finally, jump to the subroutine */
2749 *pc = inst->Label.Label;
2750 }
2751 break;
2752
2753 case TGSI_OPCODE_RET:
2754 mach->FuncMask &= ~mach->ExecMask;
2755 UPDATE_EXEC_MASK(mach);
2756
2757 if (mach->FuncMask == 0x0) {
2758 /* really return now (otherwise, keep executing */
2759
2760 if (mach->CallStackTop == 0) {
2761 /* returning from main() */
2762 *pc = -1;
2763 return;
2764 }
2765
2766 assert(mach->CallStackTop > 0);
2767 mach->CallStackTop--;
2768
2769 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2770 mach->CondMask = mach->CondStack[mach->CondStackTop];
2771
2772 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2773 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2774
2775 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2776 mach->ContMask = mach->ContStack[mach->ContStackTop];
2777
2778 assert(mach->FuncStackTop > 0);
2779 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2780
2781 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2782
2783 UPDATE_EXEC_MASK(mach);
2784 }
2785 break;
2786
2787 case TGSI_OPCODE_SSG:
2788 /* TGSI_OPCODE_SGN */
2789 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2790 FETCH( &r[0], 0, chan_index );
2791 micro_sgn(&d[chan_index], &r[0]);
2792 }
2793 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2794 STORE(&d[chan_index], 0, chan_index);
2795 }
2796 break;
2797
2798 case TGSI_OPCODE_CMP:
2799 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2800 FETCH(&r[0], 0, chan_index);
2801 FETCH(&r[1], 1, chan_index);
2802 FETCH(&r[2], 2, chan_index);
2803 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
2804 }
2805 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2806 STORE(&d[chan_index], 0, chan_index);
2807 }
2808 break;
2809
2810 case TGSI_OPCODE_SCS:
2811 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2812 FETCH( &r[0], 0, CHAN_X );
2813 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2814 micro_cos(&r[1], &r[0]);
2815 STORE(&r[1], 0, CHAN_X);
2816 }
2817 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2818 micro_sin(&r[1], &r[0]);
2819 STORE(&r[1], 0, CHAN_Y);
2820 }
2821 }
2822 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2823 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2824 }
2825 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2826 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2827 }
2828 break;
2829
2830 case TGSI_OPCODE_NRM:
2831 /* 3-component vector normalize */
2832 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2833 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2834 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2835 /* r3 = sqrt(dp3(src0, src0)) */
2836 FETCH(&r[0], 0, CHAN_X);
2837 micro_mul(&r[3], &r[0], &r[0]);
2838 FETCH(&r[1], 0, CHAN_Y);
2839 micro_mul(&r[4], &r[1], &r[1]);
2840 micro_add(&r[3], &r[3], &r[4]);
2841 FETCH(&r[2], 0, CHAN_Z);
2842 micro_mul(&r[4], &r[2], &r[2]);
2843 micro_add(&r[3], &r[3], &r[4]);
2844 micro_sqrt(&r[3], &r[3]);
2845
2846 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2847 micro_div(&r[0], &r[0], &r[3]);
2848 STORE(&r[0], 0, CHAN_X);
2849 }
2850 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2851 micro_div(&r[1], &r[1], &r[3]);
2852 STORE(&r[1], 0, CHAN_Y);
2853 }
2854 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2855 micro_div(&r[2], &r[2], &r[3]);
2856 STORE(&r[2], 0, CHAN_Z);
2857 }
2858 }
2859 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2860 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2861 }
2862 break;
2863
2864 case TGSI_OPCODE_NRM4:
2865 /* 4-component vector normalize */
2866 {
2867 union tgsi_exec_channel tmp, dot;
2868
2869 /* tmp = dp4(src0, src0): */
2870 FETCH( &r[0], 0, CHAN_X );
2871 micro_mul( &tmp, &r[0], &r[0] );
2872
2873 FETCH( &r[1], 0, CHAN_Y );
2874 micro_mul( &dot, &r[1], &r[1] );
2875 micro_add( &tmp, &tmp, &dot );
2876
2877 FETCH( &r[2], 0, CHAN_Z );
2878 micro_mul( &dot, &r[2], &r[2] );
2879 micro_add( &tmp, &tmp, &dot );
2880
2881 FETCH( &r[3], 0, CHAN_W );
2882 micro_mul( &dot, &r[3], &r[3] );
2883 micro_add( &tmp, &tmp, &dot );
2884
2885 /* tmp = 1 / sqrt(tmp) */
2886 micro_sqrt( &tmp, &tmp );
2887 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2888
2889 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2890 /* chan = chan * tmp */
2891 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2892 STORE( &r[chan_index], 0, chan_index );
2893 }
2894 }
2895 break;
2896
2897 case TGSI_OPCODE_DIV:
2898 assert( 0 );
2899 break;
2900
2901 case TGSI_OPCODE_DP2:
2902 FETCH( &r[0], 0, CHAN_X );
2903 FETCH( &r[1], 1, CHAN_X );
2904 micro_mul( &r[0], &r[0], &r[1] );
2905
2906 FETCH( &r[1], 0, CHAN_Y );
2907 FETCH( &r[2], 1, CHAN_Y );
2908 micro_mul( &r[1], &r[1], &r[2] );
2909 micro_add( &r[0], &r[0], &r[1] );
2910
2911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2912 STORE( &r[0], 0, chan_index );
2913 }
2914 break;
2915
2916 case TGSI_OPCODE_IF:
2917 /* push CondMask */
2918 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2919 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2920 FETCH( &r[0], 0, CHAN_X );
2921 /* update CondMask */
2922 if( ! r[0].u[0] ) {
2923 mach->CondMask &= ~0x1;
2924 }
2925 if( ! r[0].u[1] ) {
2926 mach->CondMask &= ~0x2;
2927 }
2928 if( ! r[0].u[2] ) {
2929 mach->CondMask &= ~0x4;
2930 }
2931 if( ! r[0].u[3] ) {
2932 mach->CondMask &= ~0x8;
2933 }
2934 UPDATE_EXEC_MASK(mach);
2935 /* Todo: If CondMask==0, jump to ELSE */
2936 break;
2937
2938 case TGSI_OPCODE_ELSE:
2939 /* invert CondMask wrt previous mask */
2940 {
2941 uint prevMask;
2942 assert(mach->CondStackTop > 0);
2943 prevMask = mach->CondStack[mach->CondStackTop - 1];
2944 mach->CondMask = ~mach->CondMask & prevMask;
2945 UPDATE_EXEC_MASK(mach);
2946 /* Todo: If CondMask==0, jump to ENDIF */
2947 }
2948 break;
2949
2950 case TGSI_OPCODE_ENDIF:
2951 /* pop CondMask */
2952 assert(mach->CondStackTop > 0);
2953 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2954 UPDATE_EXEC_MASK(mach);
2955 break;
2956
2957 case TGSI_OPCODE_END:
2958 /* halt execution */
2959 *pc = -1;
2960 break;
2961
2962 case TGSI_OPCODE_REP:
2963 assert (0);
2964 break;
2965
2966 case TGSI_OPCODE_ENDREP:
2967 assert (0);
2968 break;
2969
2970 case TGSI_OPCODE_PUSHA:
2971 assert (0);
2972 break;
2973
2974 case TGSI_OPCODE_POPA:
2975 assert (0);
2976 break;
2977
2978 case TGSI_OPCODE_CEIL:
2979 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2980 FETCH( &r[0], 0, chan_index );
2981 micro_ceil(&d[chan_index], &r[0]);
2982 }
2983 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2984 STORE(&d[chan_index], 0, chan_index);
2985 }
2986 break;
2987
2988 case TGSI_OPCODE_I2F:
2989 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2990 FETCH( &r[0], 0, chan_index );
2991 micro_i2f(&d[chan_index], &r[0]);
2992 }
2993 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2994 STORE(&d[chan_index], 0, chan_index);
2995 }
2996 break;
2997
2998 case TGSI_OPCODE_NOT:
2999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3000 FETCH( &r[0], 0, chan_index );
3001 micro_not(&d[chan_index], &r[0]);
3002 }
3003 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3004 STORE(&d[chan_index], 0, chan_index);
3005 }
3006 break;
3007
3008 case TGSI_OPCODE_TRUNC:
3009 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3010 FETCH( &r[0], 0, chan_index );
3011 micro_trunc(&d[chan_index], &r[0]);
3012 }
3013 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3014 STORE(&d[chan_index], 0, chan_index);
3015 }
3016 break;
3017
3018 case TGSI_OPCODE_SHL:
3019 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3020 FETCH( &r[0], 0, chan_index );
3021 FETCH( &r[1], 1, chan_index );
3022 micro_shl(&d[chan_index], &r[0], &r[1]);
3023 }
3024 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3025 STORE(&d[chan_index], 0, chan_index);
3026 }
3027 break;
3028
3029 case TGSI_OPCODE_SHR:
3030 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3031 FETCH( &r[0], 0, chan_index );
3032 FETCH( &r[1], 1, chan_index );
3033 micro_ishr(&d[chan_index], &r[0], &r[1]);
3034 }
3035 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3036 STORE(&d[chan_index], 0, chan_index);
3037 }
3038 break;
3039
3040 case TGSI_OPCODE_AND:
3041 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3042 FETCH( &r[0], 0, chan_index );
3043 FETCH( &r[1], 1, chan_index );
3044 micro_and(&d[chan_index], &r[0], &r[1]);
3045 }
3046 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3047 STORE(&d[chan_index], 0, chan_index);
3048 }
3049 break;
3050
3051 case TGSI_OPCODE_OR:
3052 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3053 FETCH( &r[0], 0, chan_index );
3054 FETCH( &r[1], 1, chan_index );
3055 micro_or(&d[chan_index], &r[0], &r[1]);
3056 }
3057 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3058 STORE(&d[chan_index], 0, chan_index);
3059 }
3060 break;
3061
3062 case TGSI_OPCODE_MOD:
3063 assert (0);
3064 break;
3065
3066 case TGSI_OPCODE_XOR:
3067 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3068 FETCH( &r[0], 0, chan_index );
3069 FETCH( &r[1], 1, chan_index );
3070 micro_xor(&d[chan_index], &r[0], &r[1]);
3071 }
3072 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3073 STORE(&d[chan_index], 0, chan_index);
3074 }
3075 break;
3076
3077 case TGSI_OPCODE_SAD:
3078 assert (0);
3079 break;
3080
3081 case TGSI_OPCODE_TXF:
3082 assert (0);
3083 break;
3084
3085 case TGSI_OPCODE_TXQ:
3086 assert (0);
3087 break;
3088
3089 case TGSI_OPCODE_EMIT:
3090 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
3091 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
3092 break;
3093
3094 case TGSI_OPCODE_ENDPRIM:
3095 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
3096 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
3097 break;
3098
3099 case TGSI_OPCODE_BGNFOR:
3100 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3101 for (chan_index = 0; chan_index < 3; chan_index++) {
3102 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3103 }
3104 ++mach->LoopCounterStackTop;
3105 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3106 /* update LoopMask */
3107 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3108 mach->LoopMask &= ~0x1;
3109 }
3110 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3111 mach->LoopMask &= ~0x2;
3112 }
3113 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3114 mach->LoopMask &= ~0x4;
3115 }
3116 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3117 mach->LoopMask &= ~0x8;
3118 }
3119 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3120 UPDATE_EXEC_MASK(mach);
3121 /* fall-through (for now) */
3122 case TGSI_OPCODE_BGNLOOP:
3123 /* push LoopMask and ContMasks */
3124 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3125 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3126 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3127 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3128 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3129 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3130 break;
3131
3132 case TGSI_OPCODE_ENDFOR:
3133 assert(mach->LoopCounterStackTop > 0);
3134 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3135 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3136 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3137 /* update LoopMask */
3138 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3139 mach->LoopMask &= ~0x1;
3140 }
3141 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3142 mach->LoopMask &= ~0x2;
3143 }
3144 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3145 mach->LoopMask &= ~0x4;
3146 }
3147 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3148 mach->LoopMask &= ~0x8;
3149 }
3150 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3151 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3152 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3153 assert(mach->LoopLabelStackTop > 0);
3154 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3155 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3156 /* Restore ContMask, but don't pop */
3157 assert(mach->ContStackTop > 0);
3158 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3159 UPDATE_EXEC_MASK(mach);
3160 if (mach->ExecMask) {
3161 /* repeat loop: jump to instruction just past BGNLOOP */
3162 assert(mach->LoopLabelStackTop > 0);
3163 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3164 }
3165 else {
3166 /* exit loop: pop LoopMask */
3167 assert(mach->LoopStackTop > 0);
3168 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3169 /* pop ContMask */
3170 assert(mach->ContStackTop > 0);
3171 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3172 assert(mach->LoopLabelStackTop > 0);
3173 --mach->LoopLabelStackTop;
3174 assert(mach->LoopCounterStackTop > 0);
3175 --mach->LoopCounterStackTop;
3176 }
3177 UPDATE_EXEC_MASK(mach);
3178 break;
3179
3180 case TGSI_OPCODE_ENDLOOP:
3181 /* Restore ContMask, but don't pop */
3182 assert(mach->ContStackTop > 0);
3183 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3184 UPDATE_EXEC_MASK(mach);
3185 if (mach->ExecMask) {
3186 /* repeat loop: jump to instruction just past BGNLOOP */
3187 assert(mach->LoopLabelStackTop > 0);
3188 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3189 }
3190 else {
3191 /* exit loop: pop LoopMask */
3192 assert(mach->LoopStackTop > 0);
3193 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3194 /* pop ContMask */
3195 assert(mach->ContStackTop > 0);
3196 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3197 assert(mach->LoopLabelStackTop > 0);
3198 --mach->LoopLabelStackTop;
3199 }
3200 UPDATE_EXEC_MASK(mach);
3201 break;
3202
3203 case TGSI_OPCODE_BRK:
3204 /* turn off loop channels for each enabled exec channel */
3205 mach->LoopMask &= ~mach->ExecMask;
3206 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3207 UPDATE_EXEC_MASK(mach);
3208 break;
3209
3210 case TGSI_OPCODE_CONT:
3211 /* turn off cont channels for each enabled exec channel */
3212 mach->ContMask &= ~mach->ExecMask;
3213 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3214 UPDATE_EXEC_MASK(mach);
3215 break;
3216
3217 case TGSI_OPCODE_BGNSUB:
3218 /* no-op */
3219 break;
3220
3221 case TGSI_OPCODE_ENDSUB:
3222 /*
3223 * XXX: This really should be a no-op. We should never reach this opcode.
3224 */
3225
3226 assert(mach->CallStackTop > 0);
3227 mach->CallStackTop--;
3228
3229 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3230 mach->CondMask = mach->CondStack[mach->CondStackTop];
3231
3232 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3233 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3234
3235 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3236 mach->ContMask = mach->ContStack[mach->ContStackTop];
3237
3238 assert(mach->FuncStackTop > 0);
3239 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3240
3241 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3242
3243 UPDATE_EXEC_MASK(mach);
3244 break;
3245
3246 case TGSI_OPCODE_NOP:
3247 break;
3248
3249 case TGSI_OPCODE_BREAKC:
3250 FETCH(&r[0], 0, CHAN_X);
3251 /* update CondMask */
3252 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3253 mach->LoopMask &= ~0x1;
3254 }
3255 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3256 mach->LoopMask &= ~0x2;
3257 }
3258 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3259 mach->LoopMask &= ~0x4;
3260 }
3261 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3262 mach->LoopMask &= ~0x8;
3263 }
3264 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3265 UPDATE_EXEC_MASK(mach);
3266 break;
3267
3268 default:
3269 assert( 0 );
3270 }
3271 }
3272
3273 #define DEBUG_EXECUTION 0
3274
3275
3276 /**
3277 * Run TGSI interpreter.
3278 * \return bitmask of "alive" quad components
3279 */
3280 uint
3281 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3282 {
3283 uint i;
3284 int pc = 0;
3285
3286 mach->CondMask = 0xf;
3287 mach->LoopMask = 0xf;
3288 mach->ContMask = 0xf;
3289 mach->FuncMask = 0xf;
3290 mach->ExecMask = 0xf;
3291
3292 assert(mach->CondStackTop == 0);
3293 assert(mach->LoopStackTop == 0);
3294 assert(mach->ContStackTop == 0);
3295 assert(mach->CallStackTop == 0);
3296
3297 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3298 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3299
3300 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3301 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3302 mach->Primitives[0] = 0;
3303 }
3304
3305 for (i = 0; i < QUAD_SIZE; i++) {
3306 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3307 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3308 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3309 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3310 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3311 }
3312
3313 /* execute declarations (interpolants) */
3314 for (i = 0; i < mach->NumDeclarations; i++) {
3315 exec_declaration( mach, mach->Declarations+i );
3316 }
3317
3318 {
3319 #if DEBUG_EXECUTION
3320 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3321 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3322 uint inst = 1;
3323
3324 memcpy(temps, mach->Temps, sizeof(temps));
3325 memcpy(outputs, mach->Outputs, sizeof(outputs));
3326 #endif
3327
3328 /* execute instructions, until pc is set to -1 */
3329 while (pc != -1) {
3330
3331 #if DEBUG_EXECUTION
3332 uint i;
3333
3334 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3335 #endif
3336
3337 assert(pc < (int) mach->NumInstructions);
3338 exec_instruction(mach, mach->Instructions + pc, &pc);
3339
3340 #if DEBUG_EXECUTION
3341 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3342 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3343 uint j;
3344
3345 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3346 debug_printf("TEMP[%2u] = ", i);
3347 for (j = 0; j < 4; j++) {
3348 if (j > 0) {
3349 debug_printf(" ");
3350 }
3351 debug_printf("(%6f, %6f, %6f, %6f)\n",
3352 temps[i].xyzw[0].f[j],
3353 temps[i].xyzw[1].f[j],
3354 temps[i].xyzw[2].f[j],
3355 temps[i].xyzw[3].f[j]);
3356 }
3357 }
3358 }
3359 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3360 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3361 uint j;
3362
3363 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3364 debug_printf("OUT[%2u] = ", i);
3365 for (j = 0; j < 4; j++) {
3366 if (j > 0) {
3367 debug_printf(" ");
3368 }
3369 debug_printf("{%6f, %6f, %6f, %6f}\n",
3370 outputs[i].xyzw[0].f[j],
3371 outputs[i].xyzw[1].f[j],
3372 outputs[i].xyzw[2].f[j],
3373 outputs[i].xyzw[3].f[j]);
3374 }
3375 }
3376 }
3377 #endif
3378 }
3379 }
3380
3381 #if 0
3382 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3383 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3384 /*
3385 * Scale back depth component.
3386 */
3387 for (i = 0; i < 4; i++)
3388 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3389 }
3390 #endif
3391
3392 assert(mach->CondStackTop == 0);
3393 assert(mach->LoopStackTop == 0);
3394 assert(mach->ContStackTop == 0);
3395 assert(mach->CallStackTop == 0);
3396
3397 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3398 }