Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
62
63 #define FAST_MATH 1
64
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
69
70 #define CHAN_X 0
71 #define CHAN_Y 1
72 #define CHAN_Z 2
73 #define CHAN_W 3
74
75 /*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
108
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
111
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
114
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
118
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
122
123
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
127
128
129 static const union tgsi_exec_channel ZeroVec =
130 { { 0.0, 0.0, 0.0, 0.0 } };
131
132
133 #ifdef DEBUG
134 static void
135 check_inf_or_nan(const union tgsi_exec_channel *chan)
136 {
137 assert(!util_is_inf_or_nan(chan->f[0]));
138 assert(!util_is_inf_or_nan(chan->f[1]));
139 assert(!util_is_inf_or_nan(chan->f[2]));
140 assert(!util_is_inf_or_nan(chan->f[3]));
141 }
142 #endif
143
144
145 #ifdef DEBUG
146 static void
147 print_chan(const char *msg, const union tgsi_exec_channel *chan)
148 {
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
151 }
152 #endif
153
154
155 #ifdef DEBUG
156 static void
157 print_temp(const struct tgsi_exec_machine *mach, uint index)
158 {
159 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
160 int i;
161 debug_printf("Temp[%u] =\n", index);
162 for (i = 0; i < 4; i++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
164 "XYZW"[i],
165 tmp->xyzw[i].f[0],
166 tmp->xyzw[i].f[1],
167 tmp->xyzw[i].f[2],
168 tmp->xyzw[i].f[3]);
169 }
170 }
171 #endif
172
173
174 /**
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
177 * Example:
178 * MOV T, T.yxwz;
179 * This would expand into:
180 * MOV t0, t1;
181 * MOV t1, t0;
182 * MOV t2, t3;
183 * MOV t3, t2;
184 * The second instruction will have the wrong value for t0 if executed as-is.
185 */
186 boolean
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
188 {
189 uint i, chan;
190
191 uint writemask = inst->Dst[0].Register.WriteMask;
192 if (writemask == TGSI_WRITEMASK_X ||
193 writemask == TGSI_WRITEMASK_Y ||
194 writemask == TGSI_WRITEMASK_Z ||
195 writemask == TGSI_WRITEMASK_W ||
196 writemask == TGSI_WRITEMASK_NONE) {
197 /* no chance of data dependency */
198 return FALSE;
199 }
200
201 /* loop over src regs */
202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
203 if ((inst->Src[i].Register.File ==
204 inst->Dst[0].Register.File) &&
205 (inst->Src[i].Register.Index ==
206 inst->Dst[0].Register.Index)) {
207 /* loop over dest channels */
208 uint channelsWritten = 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
212 if (channelsWritten & (1 << swizzle)) {
213 return TRUE;
214 }
215
216 channelsWritten |= (1 << chan);
217 }
218 }
219 }
220 return FALSE;
221 }
222
223
224 /**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229 void
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine *mach,
232 const struct tgsi_token *tokens,
233 uint numSamplers,
234 struct tgsi_sampler **samplers)
235 {
236 uint k;
237 struct tgsi_parse_context parse;
238 struct tgsi_exec_labels *labels = &mach->Labels;
239 struct tgsi_full_instruction *instructions;
240 struct tgsi_full_declaration *declarations;
241 uint maxInstructions = 10, numInstructions = 0;
242 uint maxDeclarations = 10, numDeclarations = 0;
243 uint instno = 0;
244
245 #if 0
246 tgsi_dump(tokens, 0);
247 #endif
248
249 util_init_math();
250
251 mach->Tokens = tokens;
252 mach->Samplers = samplers;
253
254 k = tgsi_parse_init (&parse, mach->Tokens);
255 if (k != TGSI_PARSE_OK) {
256 debug_printf( "Problem parsing!\n" );
257 return;
258 }
259
260 mach->Processor = parse.FullHeader.Processor.Processor;
261 mach->ImmLimit = 0;
262 labels->count = 0;
263
264 declarations = (struct tgsi_full_declaration *)
265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267 if (!declarations) {
268 return;
269 }
270
271 instructions = (struct tgsi_full_instruction *)
272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274 if (!instructions) {
275 FREE( declarations );
276 return;
277 }
278
279 while( !tgsi_parse_end_of_tokens( &parse ) ) {
280 uint pointer = parse.Position;
281 uint i;
282
283 tgsi_parse_token( &parse );
284 switch( parse.FullToken.Token.Type ) {
285 case TGSI_TOKEN_TYPE_DECLARATION:
286 /* save expanded declaration */
287 if (numDeclarations == maxDeclarations) {
288 declarations = REALLOC(declarations,
289 maxDeclarations
290 * sizeof(struct tgsi_full_declaration),
291 (maxDeclarations + 10)
292 * sizeof(struct tgsi_full_declaration));
293 maxDeclarations += 10;
294 }
295 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
296 unsigned reg;
297 for (reg = parse.FullToken.FullDeclaration.Range.First;
298 reg <= parse.FullToken.FullDeclaration.Range.Last;
299 ++reg) {
300 ++mach->NumOutputs;
301 }
302 }
303 memcpy(declarations + numDeclarations,
304 &parse.FullToken.FullDeclaration,
305 sizeof(declarations[0]));
306 numDeclarations++;
307 break;
308
309 case TGSI_TOKEN_TYPE_IMMEDIATE:
310 {
311 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
312 assert( size <= 4 );
313 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
314
315 for( i = 0; i < size; i++ ) {
316 mach->Imms[mach->ImmLimit][i] =
317 parse.FullToken.FullImmediate.u[i].Float;
318 }
319 mach->ImmLimit += 1;
320 }
321 break;
322
323 case TGSI_TOKEN_TYPE_INSTRUCTION:
324 assert( labels->count < MAX_LABELS );
325
326 labels->labels[labels->count][0] = instno;
327 labels->labels[labels->count][1] = pointer;
328 labels->count++;
329
330 /* save expanded instruction */
331 if (numInstructions == maxInstructions) {
332 instructions = REALLOC(instructions,
333 maxInstructions
334 * sizeof(struct tgsi_full_instruction),
335 (maxInstructions + 10)
336 * sizeof(struct tgsi_full_instruction));
337 maxInstructions += 10;
338 }
339
340 memcpy(instructions + numInstructions,
341 &parse.FullToken.FullInstruction,
342 sizeof(instructions[0]));
343
344 numInstructions++;
345 break;
346
347 case TGSI_TOKEN_TYPE_PROPERTY:
348 break;
349
350 default:
351 assert( 0 );
352 }
353 }
354 tgsi_parse_free (&parse);
355
356 if (mach->Declarations) {
357 FREE( mach->Declarations );
358 }
359 mach->Declarations = declarations;
360 mach->NumDeclarations = numDeclarations;
361
362 if (mach->Instructions) {
363 FREE( mach->Instructions );
364 }
365 mach->Instructions = instructions;
366 mach->NumInstructions = numInstructions;
367 }
368
369
370 struct tgsi_exec_machine *
371 tgsi_exec_machine_create( void )
372 {
373 struct tgsi_exec_machine *mach;
374 uint i;
375
376 mach = align_malloc( sizeof *mach, 16 );
377 if (!mach)
378 goto fail;
379
380 memset(mach, 0, sizeof(*mach));
381
382 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
383 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
384 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
385
386 /* Setup constants. */
387 for( i = 0; i < 4; i++ ) {
388 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
389 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
390 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
391 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
392 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
393 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
394 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
395 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
396 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
397 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
398 }
399
400 #ifdef DEBUG
401 /* silence warnings */
402 (void) print_chan;
403 (void) print_temp;
404 #endif
405
406 return mach;
407
408 fail:
409 align_free(mach);
410 return NULL;
411 }
412
413
414 void
415 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
416 {
417 if (mach) {
418 FREE(mach->Instructions);
419 FREE(mach->Declarations);
420 }
421
422 align_free(mach);
423 }
424
425
426 static void
427 micro_abs(
428 union tgsi_exec_channel *dst,
429 const union tgsi_exec_channel *src )
430 {
431 dst->f[0] = fabsf( src->f[0] );
432 dst->f[1] = fabsf( src->f[1] );
433 dst->f[2] = fabsf( src->f[2] );
434 dst->f[3] = fabsf( src->f[3] );
435 }
436
437 static void
438 micro_add(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1 )
442 {
443 dst->f[0] = src0->f[0] + src1->f[0];
444 dst->f[1] = src0->f[1] + src1->f[1];
445 dst->f[2] = src0->f[2] + src1->f[2];
446 dst->f[3] = src0->f[3] + src1->f[3];
447 }
448
449 #if 0
450 static void
451 micro_iadd(
452 union tgsi_exec_channel *dst,
453 const union tgsi_exec_channel *src0,
454 const union tgsi_exec_channel *src1 )
455 {
456 dst->i[0] = src0->i[0] + src1->i[0];
457 dst->i[1] = src0->i[1] + src1->i[1];
458 dst->i[2] = src0->i[2] + src1->i[2];
459 dst->i[3] = src0->i[3] + src1->i[3];
460 }
461 #endif
462
463 static void
464 micro_and(
465 union tgsi_exec_channel *dst,
466 const union tgsi_exec_channel *src0,
467 const union tgsi_exec_channel *src1 )
468 {
469 dst->u[0] = src0->u[0] & src1->u[0];
470 dst->u[1] = src0->u[1] & src1->u[1];
471 dst->u[2] = src0->u[2] & src1->u[2];
472 dst->u[3] = src0->u[3] & src1->u[3];
473 }
474
475 static void
476 micro_ceil(
477 union tgsi_exec_channel *dst,
478 const union tgsi_exec_channel *src )
479 {
480 dst->f[0] = ceilf( src->f[0] );
481 dst->f[1] = ceilf( src->f[1] );
482 dst->f[2] = ceilf( src->f[2] );
483 dst->f[3] = ceilf( src->f[3] );
484 }
485
486 static void
487 micro_cos(
488 union tgsi_exec_channel *dst,
489 const union tgsi_exec_channel *src )
490 {
491 dst->f[0] = cosf( src->f[0] );
492 dst->f[1] = cosf( src->f[1] );
493 dst->f[2] = cosf( src->f[2] );
494 dst->f[3] = cosf( src->f[3] );
495 }
496
497 static void
498 micro_ddx(
499 union tgsi_exec_channel *dst,
500 const union tgsi_exec_channel *src )
501 {
502 dst->f[0] =
503 dst->f[1] =
504 dst->f[2] =
505 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
506 }
507
508 static void
509 micro_ddy(
510 union tgsi_exec_channel *dst,
511 const union tgsi_exec_channel *src )
512 {
513 dst->f[0] =
514 dst->f[1] =
515 dst->f[2] =
516 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
517 }
518
519 static void
520 micro_div(
521 union tgsi_exec_channel *dst,
522 const union tgsi_exec_channel *src0,
523 const union tgsi_exec_channel *src1 )
524 {
525 if (src1->f[0] != 0) {
526 dst->f[0] = src0->f[0] / src1->f[0];
527 }
528 if (src1->f[1] != 0) {
529 dst->f[1] = src0->f[1] / src1->f[1];
530 }
531 if (src1->f[2] != 0) {
532 dst->f[2] = src0->f[2] / src1->f[2];
533 }
534 if (src1->f[3] != 0) {
535 dst->f[3] = src0->f[3] / src1->f[3];
536 }
537 }
538
539 #if 0
540 static void
541 micro_udiv(
542 union tgsi_exec_channel *dst,
543 const union tgsi_exec_channel *src0,
544 const union tgsi_exec_channel *src1 )
545 {
546 dst->u[0] = src0->u[0] / src1->u[0];
547 dst->u[1] = src0->u[1] / src1->u[1];
548 dst->u[2] = src0->u[2] / src1->u[2];
549 dst->u[3] = src0->u[3] / src1->u[3];
550 }
551 #endif
552
553 static void
554 micro_eq(
555 union tgsi_exec_channel *dst,
556 const union tgsi_exec_channel *src0,
557 const union tgsi_exec_channel *src1,
558 const union tgsi_exec_channel *src2,
559 const union tgsi_exec_channel *src3 )
560 {
561 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
562 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
563 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
564 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
565 }
566
567 #if 0
568 static void
569 micro_ieq(
570 union tgsi_exec_channel *dst,
571 const union tgsi_exec_channel *src0,
572 const union tgsi_exec_channel *src1,
573 const union tgsi_exec_channel *src2,
574 const union tgsi_exec_channel *src3 )
575 {
576 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
577 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
578 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
579 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
580 }
581 #endif
582
583 static void
584 micro_exp2(
585 union tgsi_exec_channel *dst,
586 const union tgsi_exec_channel *src)
587 {
588 #if FAST_MATH
589 dst->f[0] = util_fast_exp2( src->f[0] );
590 dst->f[1] = util_fast_exp2( src->f[1] );
591 dst->f[2] = util_fast_exp2( src->f[2] );
592 dst->f[3] = util_fast_exp2( src->f[3] );
593 #else
594
595 #if DEBUG
596 /* Inf is okay for this instruction, so clamp it to silence assertions. */
597 uint i;
598 union tgsi_exec_channel clamped;
599
600 for (i = 0; i < 4; i++) {
601 if (src->f[i] > 127.99999f) {
602 clamped.f[i] = 127.99999f;
603 } else if (src->f[i] < -126.99999f) {
604 clamped.f[i] = -126.99999f;
605 } else {
606 clamped.f[i] = src->f[i];
607 }
608 }
609 src = &clamped;
610 #endif
611
612 dst->f[0] = powf( 2.0f, src->f[0] );
613 dst->f[1] = powf( 2.0f, src->f[1] );
614 dst->f[2] = powf( 2.0f, src->f[2] );
615 dst->f[3] = powf( 2.0f, src->f[3] );
616 #endif
617 }
618
619 #if 0
620 static void
621 micro_f2ut(
622 union tgsi_exec_channel *dst,
623 const union tgsi_exec_channel *src )
624 {
625 dst->u[0] = (uint) src->f[0];
626 dst->u[1] = (uint) src->f[1];
627 dst->u[2] = (uint) src->f[2];
628 dst->u[3] = (uint) src->f[3];
629 }
630 #endif
631
632 static void
633 micro_float_clamp(union tgsi_exec_channel *dst,
634 const union tgsi_exec_channel *src)
635 {
636 uint i;
637
638 for (i = 0; i < 4; i++) {
639 if (src->f[i] > 0.0f) {
640 if (src->f[i] > 1.884467e+019f)
641 dst->f[i] = 1.884467e+019f;
642 else if (src->f[i] < 5.42101e-020f)
643 dst->f[i] = 5.42101e-020f;
644 else
645 dst->f[i] = src->f[i];
646 }
647 else {
648 if (src->f[i] < -1.884467e+019f)
649 dst->f[i] = -1.884467e+019f;
650 else if (src->f[i] > -5.42101e-020f)
651 dst->f[i] = -5.42101e-020f;
652 else
653 dst->f[i] = src->f[i];
654 }
655 }
656 }
657
658 static void
659 micro_flr(
660 union tgsi_exec_channel *dst,
661 const union tgsi_exec_channel *src )
662 {
663 dst->f[0] = floorf( src->f[0] );
664 dst->f[1] = floorf( src->f[1] );
665 dst->f[2] = floorf( src->f[2] );
666 dst->f[3] = floorf( src->f[3] );
667 }
668
669 static void
670 micro_frc(
671 union tgsi_exec_channel *dst,
672 const union tgsi_exec_channel *src )
673 {
674 dst->f[0] = src->f[0] - floorf( src->f[0] );
675 dst->f[1] = src->f[1] - floorf( src->f[1] );
676 dst->f[2] = src->f[2] - floorf( src->f[2] );
677 dst->f[3] = src->f[3] - floorf( src->f[3] );
678 }
679
680 static void
681 micro_i2f(
682 union tgsi_exec_channel *dst,
683 const union tgsi_exec_channel *src )
684 {
685 dst->f[0] = (float) src->i[0];
686 dst->f[1] = (float) src->i[1];
687 dst->f[2] = (float) src->i[2];
688 dst->f[3] = (float) src->i[3];
689 }
690
691 static void
692 micro_lg2(
693 union tgsi_exec_channel *dst,
694 const union tgsi_exec_channel *src )
695 {
696 #if FAST_MATH
697 dst->f[0] = util_fast_log2( src->f[0] );
698 dst->f[1] = util_fast_log2( src->f[1] );
699 dst->f[2] = util_fast_log2( src->f[2] );
700 dst->f[3] = util_fast_log2( src->f[3] );
701 #else
702 dst->f[0] = logf( src->f[0] ) * 1.442695f;
703 dst->f[1] = logf( src->f[1] ) * 1.442695f;
704 dst->f[2] = logf( src->f[2] ) * 1.442695f;
705 dst->f[3] = logf( src->f[3] ) * 1.442695f;
706 #endif
707 }
708
709 static void
710 micro_le(
711 union tgsi_exec_channel *dst,
712 const union tgsi_exec_channel *src0,
713 const union tgsi_exec_channel *src1,
714 const union tgsi_exec_channel *src2,
715 const union tgsi_exec_channel *src3 )
716 {
717 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
718 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
719 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
720 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
721 }
722
723 static void
724 micro_lt(
725 union tgsi_exec_channel *dst,
726 const union tgsi_exec_channel *src0,
727 const union tgsi_exec_channel *src1,
728 const union tgsi_exec_channel *src2,
729 const union tgsi_exec_channel *src3 )
730 {
731 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
732 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
733 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
734 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
735 }
736
737 #if 0
738 static void
739 micro_ilt(
740 union tgsi_exec_channel *dst,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1,
743 const union tgsi_exec_channel *src2,
744 const union tgsi_exec_channel *src3 )
745 {
746 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
747 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
748 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
749 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
750 }
751 #endif
752
753 #if 0
754 static void
755 micro_ult(
756 union tgsi_exec_channel *dst,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1,
759 const union tgsi_exec_channel *src2,
760 const union tgsi_exec_channel *src3 )
761 {
762 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
763 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
764 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
765 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
766 }
767 #endif
768
769 static void
770 micro_max(
771 union tgsi_exec_channel *dst,
772 const union tgsi_exec_channel *src0,
773 const union tgsi_exec_channel *src1 )
774 {
775 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
776 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
777 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
778 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
779 }
780
781 #if 0
782 static void
783 micro_imax(
784 union tgsi_exec_channel *dst,
785 const union tgsi_exec_channel *src0,
786 const union tgsi_exec_channel *src1 )
787 {
788 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
789 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
790 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
791 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
792 }
793 #endif
794
795 #if 0
796 static void
797 micro_umax(
798 union tgsi_exec_channel *dst,
799 const union tgsi_exec_channel *src0,
800 const union tgsi_exec_channel *src1 )
801 {
802 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
803 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
804 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
805 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
806 }
807 #endif
808
809 static void
810 micro_min(
811 union tgsi_exec_channel *dst,
812 const union tgsi_exec_channel *src0,
813 const union tgsi_exec_channel *src1 )
814 {
815 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
816 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
817 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
818 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
819 }
820
821 #if 0
822 static void
823 micro_imin(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src0,
826 const union tgsi_exec_channel *src1 )
827 {
828 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
829 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
830 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
831 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
832 }
833 #endif
834
835 #if 0
836 static void
837 micro_umin(
838 union tgsi_exec_channel *dst,
839 const union tgsi_exec_channel *src0,
840 const union tgsi_exec_channel *src1 )
841 {
842 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
843 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
844 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
845 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
846 }
847 #endif
848
849 #if 0
850 static void
851 micro_umod(
852 union tgsi_exec_channel *dst,
853 const union tgsi_exec_channel *src0,
854 const union tgsi_exec_channel *src1 )
855 {
856 dst->u[0] = src0->u[0] % src1->u[0];
857 dst->u[1] = src0->u[1] % src1->u[1];
858 dst->u[2] = src0->u[2] % src1->u[2];
859 dst->u[3] = src0->u[3] % src1->u[3];
860 }
861 #endif
862
863 static void
864 micro_mul(
865 union tgsi_exec_channel *dst,
866 const union tgsi_exec_channel *src0,
867 const union tgsi_exec_channel *src1 )
868 {
869 dst->f[0] = src0->f[0] * src1->f[0];
870 dst->f[1] = src0->f[1] * src1->f[1];
871 dst->f[2] = src0->f[2] * src1->f[2];
872 dst->f[3] = src0->f[3] * src1->f[3];
873 }
874
875 #if 0
876 static void
877 micro_imul(
878 union tgsi_exec_channel *dst,
879 const union tgsi_exec_channel *src0,
880 const union tgsi_exec_channel *src1 )
881 {
882 dst->i[0] = src0->i[0] * src1->i[0];
883 dst->i[1] = src0->i[1] * src1->i[1];
884 dst->i[2] = src0->i[2] * src1->i[2];
885 dst->i[3] = src0->i[3] * src1->i[3];
886 }
887 #endif
888
889 #if 0
890 static void
891 micro_imul64(
892 union tgsi_exec_channel *dst0,
893 union tgsi_exec_channel *dst1,
894 const union tgsi_exec_channel *src0,
895 const union tgsi_exec_channel *src1 )
896 {
897 dst1->i[0] = src0->i[0] * src1->i[0];
898 dst1->i[1] = src0->i[1] * src1->i[1];
899 dst1->i[2] = src0->i[2] * src1->i[2];
900 dst1->i[3] = src0->i[3] * src1->i[3];
901 dst0->i[0] = 0;
902 dst0->i[1] = 0;
903 dst0->i[2] = 0;
904 dst0->i[3] = 0;
905 }
906 #endif
907
908 #if 0
909 static void
910 micro_umul64(
911 union tgsi_exec_channel *dst0,
912 union tgsi_exec_channel *dst1,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst1->u[0] = src0->u[0] * src1->u[0];
917 dst1->u[1] = src0->u[1] * src1->u[1];
918 dst1->u[2] = src0->u[2] * src1->u[2];
919 dst1->u[3] = src0->u[3] * src1->u[3];
920 dst0->u[0] = 0;
921 dst0->u[1] = 0;
922 dst0->u[2] = 0;
923 dst0->u[3] = 0;
924 }
925 #endif
926
927
928 #if 0
929 static void
930 micro_movc(
931 union tgsi_exec_channel *dst,
932 const union tgsi_exec_channel *src0,
933 const union tgsi_exec_channel *src1,
934 const union tgsi_exec_channel *src2 )
935 {
936 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
937 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
938 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
939 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
940 }
941 #endif
942
943 static void
944 micro_neg(
945 union tgsi_exec_channel *dst,
946 const union tgsi_exec_channel *src )
947 {
948 dst->f[0] = -src->f[0];
949 dst->f[1] = -src->f[1];
950 dst->f[2] = -src->f[2];
951 dst->f[3] = -src->f[3];
952 }
953
954 #if 0
955 static void
956 micro_ineg(
957 union tgsi_exec_channel *dst,
958 const union tgsi_exec_channel *src )
959 {
960 dst->i[0] = -src->i[0];
961 dst->i[1] = -src->i[1];
962 dst->i[2] = -src->i[2];
963 dst->i[3] = -src->i[3];
964 }
965 #endif
966
967 static void
968 micro_not(
969 union tgsi_exec_channel *dst,
970 const union tgsi_exec_channel *src )
971 {
972 dst->u[0] = ~src->u[0];
973 dst->u[1] = ~src->u[1];
974 dst->u[2] = ~src->u[2];
975 dst->u[3] = ~src->u[3];
976 }
977
978 static void
979 micro_or(
980 union tgsi_exec_channel *dst,
981 const union tgsi_exec_channel *src0,
982 const union tgsi_exec_channel *src1 )
983 {
984 dst->u[0] = src0->u[0] | src1->u[0];
985 dst->u[1] = src0->u[1] | src1->u[1];
986 dst->u[2] = src0->u[2] | src1->u[2];
987 dst->u[3] = src0->u[3] | src1->u[3];
988 }
989
990 static void
991 micro_pow(
992 union tgsi_exec_channel *dst,
993 const union tgsi_exec_channel *src0,
994 const union tgsi_exec_channel *src1 )
995 {
996 #if FAST_MATH
997 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
998 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
999 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1000 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1001 #else
1002 dst->f[0] = powf( src0->f[0], src1->f[0] );
1003 dst->f[1] = powf( src0->f[1], src1->f[1] );
1004 dst->f[2] = powf( src0->f[2], src1->f[2] );
1005 dst->f[3] = powf( src0->f[3], src1->f[3] );
1006 #endif
1007 }
1008
1009 static void
1010 micro_rnd(
1011 union tgsi_exec_channel *dst,
1012 const union tgsi_exec_channel *src )
1013 {
1014 dst->f[0] = floorf( src->f[0] + 0.5f );
1015 dst->f[1] = floorf( src->f[1] + 0.5f );
1016 dst->f[2] = floorf( src->f[2] + 0.5f );
1017 dst->f[3] = floorf( src->f[3] + 0.5f );
1018 }
1019
1020 static void
1021 micro_sgn(
1022 union tgsi_exec_channel *dst,
1023 const union tgsi_exec_channel *src )
1024 {
1025 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
1026 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
1027 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
1028 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
1029 }
1030
1031 static void
1032 micro_shl(
1033 union tgsi_exec_channel *dst,
1034 const union tgsi_exec_channel *src0,
1035 const union tgsi_exec_channel *src1 )
1036 {
1037 dst->i[0] = src0->i[0] << src1->i[0];
1038 dst->i[1] = src0->i[1] << src1->i[1];
1039 dst->i[2] = src0->i[2] << src1->i[2];
1040 dst->i[3] = src0->i[3] << src1->i[3];
1041 }
1042
1043 static void
1044 micro_ishr(
1045 union tgsi_exec_channel *dst,
1046 const union tgsi_exec_channel *src0,
1047 const union tgsi_exec_channel *src1 )
1048 {
1049 dst->i[0] = src0->i[0] >> src1->i[0];
1050 dst->i[1] = src0->i[1] >> src1->i[1];
1051 dst->i[2] = src0->i[2] >> src1->i[2];
1052 dst->i[3] = src0->i[3] >> src1->i[3];
1053 }
1054
1055 static void
1056 micro_trunc(
1057 union tgsi_exec_channel *dst,
1058 const union tgsi_exec_channel *src0 )
1059 {
1060 dst->f[0] = (float) (int) src0->f[0];
1061 dst->f[1] = (float) (int) src0->f[1];
1062 dst->f[2] = (float) (int) src0->f[2];
1063 dst->f[3] = (float) (int) src0->f[3];
1064 }
1065
1066 #if 0
1067 static void
1068 micro_ushr(
1069 union tgsi_exec_channel *dst,
1070 const union tgsi_exec_channel *src0,
1071 const union tgsi_exec_channel *src1 )
1072 {
1073 dst->u[0] = src0->u[0] >> src1->u[0];
1074 dst->u[1] = src0->u[1] >> src1->u[1];
1075 dst->u[2] = src0->u[2] >> src1->u[2];
1076 dst->u[3] = src0->u[3] >> src1->u[3];
1077 }
1078 #endif
1079
1080 static void
1081 micro_sin(
1082 union tgsi_exec_channel *dst,
1083 const union tgsi_exec_channel *src )
1084 {
1085 dst->f[0] = sinf( src->f[0] );
1086 dst->f[1] = sinf( src->f[1] );
1087 dst->f[2] = sinf( src->f[2] );
1088 dst->f[3] = sinf( src->f[3] );
1089 }
1090
1091 static void
1092 micro_sqrt( union tgsi_exec_channel *dst,
1093 const union tgsi_exec_channel *src )
1094 {
1095 dst->f[0] = sqrtf( src->f[0] );
1096 dst->f[1] = sqrtf( src->f[1] );
1097 dst->f[2] = sqrtf( src->f[2] );
1098 dst->f[3] = sqrtf( src->f[3] );
1099 }
1100
1101 static void
1102 micro_sub(
1103 union tgsi_exec_channel *dst,
1104 const union tgsi_exec_channel *src0,
1105 const union tgsi_exec_channel *src1 )
1106 {
1107 dst->f[0] = src0->f[0] - src1->f[0];
1108 dst->f[1] = src0->f[1] - src1->f[1];
1109 dst->f[2] = src0->f[2] - src1->f[2];
1110 dst->f[3] = src0->f[3] - src1->f[3];
1111 }
1112
1113 #if 0
1114 static void
1115 micro_u2f(
1116 union tgsi_exec_channel *dst,
1117 const union tgsi_exec_channel *src )
1118 {
1119 dst->f[0] = (float) src->u[0];
1120 dst->f[1] = (float) src->u[1];
1121 dst->f[2] = (float) src->u[2];
1122 dst->f[3] = (float) src->u[3];
1123 }
1124 #endif
1125
1126 static void
1127 micro_xor(
1128 union tgsi_exec_channel *dst,
1129 const union tgsi_exec_channel *src0,
1130 const union tgsi_exec_channel *src1 )
1131 {
1132 dst->u[0] = src0->u[0] ^ src1->u[0];
1133 dst->u[1] = src0->u[1] ^ src1->u[1];
1134 dst->u[2] = src0->u[2] ^ src1->u[2];
1135 dst->u[3] = src0->u[3] ^ src1->u[3];
1136 }
1137
1138 static void
1139 fetch_src_file_channel(
1140 const struct tgsi_exec_machine *mach,
1141 const uint file,
1142 const uint swizzle,
1143 const union tgsi_exec_channel *index,
1144 union tgsi_exec_channel *chan )
1145 {
1146 switch( swizzle ) {
1147 case TGSI_SWIZZLE_X:
1148 case TGSI_SWIZZLE_Y:
1149 case TGSI_SWIZZLE_Z:
1150 case TGSI_SWIZZLE_W:
1151 switch( file ) {
1152 case TGSI_FILE_CONSTANT:
1153 assert(mach->Consts);
1154 if (index->i[0] < 0)
1155 chan->f[0] = 0.0f;
1156 else
1157 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1158 if (index->i[1] < 0)
1159 chan->f[1] = 0.0f;
1160 else
1161 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1162 if (index->i[2] < 0)
1163 chan->f[2] = 0.0f;
1164 else
1165 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1166 if (index->i[3] < 0)
1167 chan->f[3] = 0.0f;
1168 else
1169 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1170 break;
1171
1172 case TGSI_FILE_INPUT:
1173 case TGSI_FILE_SYSTEM_VALUE:
1174 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1175 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1176 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1177 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1178 break;
1179
1180 case TGSI_FILE_TEMPORARY:
1181 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1182 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1183 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1184 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1185 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1186 break;
1187
1188 case TGSI_FILE_IMMEDIATE:
1189 assert( index->i[0] < (int) mach->ImmLimit );
1190 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1191 assert( index->i[1] < (int) mach->ImmLimit );
1192 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1193 assert( index->i[2] < (int) mach->ImmLimit );
1194 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1195 assert( index->i[3] < (int) mach->ImmLimit );
1196 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1197 break;
1198
1199 case TGSI_FILE_ADDRESS:
1200 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1201 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1202 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1203 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1204 break;
1205
1206 case TGSI_FILE_PREDICATE:
1207 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1208 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1209 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1210 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1211 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
1212 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
1213 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
1214 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
1215 break;
1216
1217 case TGSI_FILE_OUTPUT:
1218 /* vertex/fragment output vars can be read too */
1219 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1220 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1221 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1222 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1223 break;
1224
1225 default:
1226 assert( 0 );
1227 }
1228 break;
1229
1230 default:
1231 assert( 0 );
1232 }
1233 }
1234
1235 static void
1236 fetch_source(
1237 const struct tgsi_exec_machine *mach,
1238 union tgsi_exec_channel *chan,
1239 const struct tgsi_full_src_register *reg,
1240 const uint chan_index )
1241 {
1242 union tgsi_exec_channel index;
1243 uint swizzle;
1244
1245 /* We start with a direct index into a register file.
1246 *
1247 * file[1],
1248 * where:
1249 * file = Register.File
1250 * [1] = Register.Index
1251 */
1252 index.i[0] =
1253 index.i[1] =
1254 index.i[2] =
1255 index.i[3] = reg->Register.Index;
1256
1257 /* There is an extra source register that indirectly subscripts
1258 * a register file. The direct index now becomes an offset
1259 * that is being added to the indirect register.
1260 *
1261 * file[ind[2].x+1],
1262 * where:
1263 * ind = Indirect.File
1264 * [2] = Indirect.Index
1265 * .x = Indirect.SwizzleX
1266 */
1267 if (reg->Register.Indirect) {
1268 union tgsi_exec_channel index2;
1269 union tgsi_exec_channel indir_index;
1270 const uint execmask = mach->ExecMask;
1271 uint i;
1272
1273 /* which address register (always zero now) */
1274 index2.i[0] =
1275 index2.i[1] =
1276 index2.i[2] =
1277 index2.i[3] = reg->Indirect.Index;
1278
1279 /* get current value of address register[swizzle] */
1280 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1281 fetch_src_file_channel(
1282 mach,
1283 reg->Indirect.File,
1284 swizzle,
1285 &index2,
1286 &indir_index );
1287
1288 /* add value of address register to the offset */
1289 index.i[0] += (int) indir_index.f[0];
1290 index.i[1] += (int) indir_index.f[1];
1291 index.i[2] += (int) indir_index.f[2];
1292 index.i[3] += (int) indir_index.f[3];
1293
1294 /* for disabled execution channels, zero-out the index to
1295 * avoid using a potential garbage value.
1296 */
1297 for (i = 0; i < QUAD_SIZE; i++) {
1298 if ((execmask & (1 << i)) == 0)
1299 index.i[i] = 0;
1300 }
1301 }
1302
1303 /* There is an extra source register that is a second
1304 * subscript to a register file. Effectively it means that
1305 * the register file is actually a 2D array of registers.
1306 *
1307 * file[1][3] == file[1*sizeof(file[1])+3],
1308 * where:
1309 * [3] = Dimension.Index
1310 */
1311 if (reg->Register.Dimension) {
1312 /* The size of the first-order array depends on the register file type.
1313 * We need to multiply the index to the first array to get an effective,
1314 * "flat" index that points to the beginning of the second-order array.
1315 */
1316 switch (reg->Register.File) {
1317 case TGSI_FILE_INPUT:
1318 case TGSI_FILE_SYSTEM_VALUE:
1319 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1320 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1321 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1322 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1323 break;
1324 case TGSI_FILE_CONSTANT:
1325 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1326 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1327 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1328 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1329 break;
1330 default:
1331 assert( 0 );
1332 }
1333
1334 index.i[0] += reg->Dimension.Index;
1335 index.i[1] += reg->Dimension.Index;
1336 index.i[2] += reg->Dimension.Index;
1337 index.i[3] += reg->Dimension.Index;
1338
1339 /* Again, the second subscript index can be addressed indirectly
1340 * identically to the first one.
1341 * Nothing stops us from indirectly addressing the indirect register,
1342 * but there is no need for that, so we won't exercise it.
1343 *
1344 * file[1][ind[4].y+3],
1345 * where:
1346 * ind = DimIndirect.File
1347 * [4] = DimIndirect.Index
1348 * .y = DimIndirect.SwizzleX
1349 */
1350 if (reg->Dimension.Indirect) {
1351 union tgsi_exec_channel index2;
1352 union tgsi_exec_channel indir_index;
1353 const uint execmask = mach->ExecMask;
1354 uint i;
1355
1356 index2.i[0] =
1357 index2.i[1] =
1358 index2.i[2] =
1359 index2.i[3] = reg->DimIndirect.Index;
1360
1361 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1362 fetch_src_file_channel(
1363 mach,
1364 reg->DimIndirect.File,
1365 swizzle,
1366 &index2,
1367 &indir_index );
1368
1369 index.i[0] += (int) indir_index.f[0];
1370 index.i[1] += (int) indir_index.f[1];
1371 index.i[2] += (int) indir_index.f[2];
1372 index.i[3] += (int) indir_index.f[3];
1373
1374 /* for disabled execution channels, zero-out the index to
1375 * avoid using a potential garbage value.
1376 */
1377 for (i = 0; i < QUAD_SIZE; i++) {
1378 if ((execmask & (1 << i)) == 0)
1379 index.i[i] = 0;
1380 }
1381 }
1382
1383 /* If by any chance there was a need for a 3D array of register
1384 * files, we would have to check whether Dimension is followed
1385 * by a dimension register and continue the saga.
1386 */
1387 }
1388
1389 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1390 fetch_src_file_channel(
1391 mach,
1392 reg->Register.File,
1393 swizzle,
1394 &index,
1395 chan );
1396
1397 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1398 case TGSI_UTIL_SIGN_CLEAR:
1399 micro_abs( chan, chan );
1400 break;
1401
1402 case TGSI_UTIL_SIGN_SET:
1403 micro_abs( chan, chan );
1404 micro_neg( chan, chan );
1405 break;
1406
1407 case TGSI_UTIL_SIGN_TOGGLE:
1408 micro_neg( chan, chan );
1409 break;
1410
1411 case TGSI_UTIL_SIGN_KEEP:
1412 break;
1413 }
1414 }
1415
1416 static void
1417 store_dest(
1418 struct tgsi_exec_machine *mach,
1419 const union tgsi_exec_channel *chan,
1420 const struct tgsi_full_dst_register *reg,
1421 const struct tgsi_full_instruction *inst,
1422 uint chan_index )
1423 {
1424 uint i;
1425 union tgsi_exec_channel null;
1426 union tgsi_exec_channel *dst;
1427 uint execmask = mach->ExecMask;
1428 int offset = 0; /* indirection offset */
1429 int index;
1430
1431 #ifdef DEBUG
1432 check_inf_or_nan(chan);
1433 #endif
1434
1435 /* There is an extra source register that indirectly subscripts
1436 * a register file. The direct index now becomes an offset
1437 * that is being added to the indirect register.
1438 *
1439 * file[ind[2].x+1],
1440 * where:
1441 * ind = Indirect.File
1442 * [2] = Indirect.Index
1443 * .x = Indirect.SwizzleX
1444 */
1445 if (reg->Register.Indirect) {
1446 union tgsi_exec_channel index;
1447 union tgsi_exec_channel indir_index;
1448 uint swizzle;
1449
1450 /* which address register (always zero for now) */
1451 index.i[0] =
1452 index.i[1] =
1453 index.i[2] =
1454 index.i[3] = reg->Indirect.Index;
1455
1456 /* get current value of address register[swizzle] */
1457 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1458
1459 /* fetch values from the address/indirection register */
1460 fetch_src_file_channel(
1461 mach,
1462 reg->Indirect.File,
1463 swizzle,
1464 &index,
1465 &indir_index );
1466
1467 /* save indirection offset */
1468 offset = (int) indir_index.f[0];
1469 }
1470
1471 switch (reg->Register.File) {
1472 case TGSI_FILE_NULL:
1473 dst = &null;
1474 break;
1475
1476 case TGSI_FILE_OUTPUT:
1477 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1478 + reg->Register.Index;
1479 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1480 #if 0
1481 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1482 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1483 for (i = 0; i < QUAD_SIZE; i++)
1484 if (execmask & (1 << i))
1485 fprintf(stderr, "%f, ", chan->f[i]);
1486 fprintf(stderr, ")\n");
1487 }
1488 #endif
1489 break;
1490
1491 case TGSI_FILE_TEMPORARY:
1492 index = reg->Register.Index;
1493 assert( index < TGSI_EXEC_NUM_TEMPS );
1494 dst = &mach->Temps[offset + index].xyzw[chan_index];
1495 break;
1496
1497 case TGSI_FILE_ADDRESS:
1498 index = reg->Register.Index;
1499 dst = &mach->Addrs[index].xyzw[chan_index];
1500 break;
1501
1502 case TGSI_FILE_LOOP:
1503 assert(reg->Register.Index == 0);
1504 assert(mach->LoopCounterStackTop > 0);
1505 assert(chan_index == CHAN_X);
1506 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1507 break;
1508
1509 case TGSI_FILE_PREDICATE:
1510 index = reg->Register.Index;
1511 assert(index < TGSI_EXEC_NUM_PREDS);
1512 dst = &mach->Predicates[index].xyzw[chan_index];
1513 break;
1514
1515 default:
1516 assert( 0 );
1517 return;
1518 }
1519
1520 if (inst->Instruction.Predicate) {
1521 uint swizzle;
1522 union tgsi_exec_channel *pred;
1523
1524 switch (chan_index) {
1525 case CHAN_X:
1526 swizzle = inst->Predicate.SwizzleX;
1527 break;
1528 case CHAN_Y:
1529 swizzle = inst->Predicate.SwizzleY;
1530 break;
1531 case CHAN_Z:
1532 swizzle = inst->Predicate.SwizzleZ;
1533 break;
1534 case CHAN_W:
1535 swizzle = inst->Predicate.SwizzleW;
1536 break;
1537 default:
1538 assert(0);
1539 return;
1540 }
1541
1542 assert(inst->Predicate.Index == 0);
1543
1544 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1545
1546 if (inst->Predicate.Negate) {
1547 for (i = 0; i < QUAD_SIZE; i++) {
1548 if (pred->u[i]) {
1549 execmask &= ~(1 << i);
1550 }
1551 }
1552 } else {
1553 for (i = 0; i < QUAD_SIZE; i++) {
1554 if (!pred->u[i]) {
1555 execmask &= ~(1 << i);
1556 }
1557 }
1558 }
1559 }
1560
1561 switch (inst->Instruction.Saturate) {
1562 case TGSI_SAT_NONE:
1563 for (i = 0; i < QUAD_SIZE; i++)
1564 if (execmask & (1 << i))
1565 dst->i[i] = chan->i[i];
1566 break;
1567
1568 case TGSI_SAT_ZERO_ONE:
1569 for (i = 0; i < QUAD_SIZE; i++)
1570 if (execmask & (1 << i)) {
1571 if (chan->f[i] < 0.0f)
1572 dst->f[i] = 0.0f;
1573 else if (chan->f[i] > 1.0f)
1574 dst->f[i] = 1.0f;
1575 else
1576 dst->i[i] = chan->i[i];
1577 }
1578 break;
1579
1580 case TGSI_SAT_MINUS_PLUS_ONE:
1581 for (i = 0; i < QUAD_SIZE; i++)
1582 if (execmask & (1 << i)) {
1583 if (chan->f[i] < -1.0f)
1584 dst->f[i] = -1.0f;
1585 else if (chan->f[i] > 1.0f)
1586 dst->f[i] = 1.0f;
1587 else
1588 dst->i[i] = chan->i[i];
1589 }
1590 break;
1591
1592 default:
1593 assert( 0 );
1594 }
1595 }
1596
1597 #define FETCH(VAL,INDEX,CHAN)\
1598 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
1599
1600 #define STORE(VAL,INDEX,CHAN)\
1601 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
1602
1603
1604 /**
1605 * Execute ARB-style KIL which is predicated by a src register.
1606 * Kill fragment if any of the four values is less than zero.
1607 */
1608 static void
1609 exec_kil(struct tgsi_exec_machine *mach,
1610 const struct tgsi_full_instruction *inst)
1611 {
1612 uint uniquemask;
1613 uint chan_index;
1614 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1615 union tgsi_exec_channel r[1];
1616
1617 /* This mask stores component bits that were already tested. */
1618 uniquemask = 0;
1619
1620 for (chan_index = 0; chan_index < 4; chan_index++)
1621 {
1622 uint swizzle;
1623 uint i;
1624
1625 /* unswizzle channel */
1626 swizzle = tgsi_util_get_full_src_register_swizzle (
1627 &inst->Src[0],
1628 chan_index);
1629
1630 /* check if the component has not been already tested */
1631 if (uniquemask & (1 << swizzle))
1632 continue;
1633 uniquemask |= 1 << swizzle;
1634
1635 FETCH(&r[0], 0, chan_index);
1636 for (i = 0; i < 4; i++)
1637 if (r[0].f[i] < 0.0f)
1638 kilmask |= 1 << i;
1639 }
1640
1641 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1642 }
1643
1644 /**
1645 * Execute NVIDIA-style KIL which is predicated by a condition code.
1646 * Kill fragment if the condition code is TRUE.
1647 */
1648 static void
1649 exec_kilp(struct tgsi_exec_machine *mach,
1650 const struct tgsi_full_instruction *inst)
1651 {
1652 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1653
1654 /* "unconditional" kil */
1655 kilmask = mach->ExecMask;
1656 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1657 }
1658
1659 static void
1660 emit_vertex(struct tgsi_exec_machine *mach)
1661 {
1662 /* FIXME: check for exec mask correctly
1663 unsigned i;
1664 for (i = 0; i < QUAD_SIZE; ++i) {
1665 if ((mach->ExecMask & (1 << i)))
1666 */
1667 if (mach->ExecMask) {
1668 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1669 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1670 }
1671 }
1672
1673 static void
1674 emit_primitive(struct tgsi_exec_machine *mach)
1675 {
1676 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1677 /* FIXME: check for exec mask correctly
1678 unsigned i;
1679 for (i = 0; i < QUAD_SIZE; ++i) {
1680 if ((mach->ExecMask & (1 << i)))
1681 */
1682 if (mach->ExecMask) {
1683 ++(*prim_count);
1684 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1685 mach->Primitives[*prim_count] = 0;
1686 }
1687 }
1688
1689 /*
1690 * Fetch a four texture samples using STR texture coordinates.
1691 */
1692 static void
1693 fetch_texel( struct tgsi_sampler *sampler,
1694 const union tgsi_exec_channel *s,
1695 const union tgsi_exec_channel *t,
1696 const union tgsi_exec_channel *p,
1697 float lodbias, /* XXX should be float[4] */
1698 union tgsi_exec_channel *r,
1699 union tgsi_exec_channel *g,
1700 union tgsi_exec_channel *b,
1701 union tgsi_exec_channel *a )
1702 {
1703 uint j;
1704 float rgba[NUM_CHANNELS][QUAD_SIZE];
1705
1706 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1707
1708 for (j = 0; j < 4; j++) {
1709 r->f[j] = rgba[0][j];
1710 g->f[j] = rgba[1][j];
1711 b->f[j] = rgba[2][j];
1712 a->f[j] = rgba[3][j];
1713 }
1714 }
1715
1716
1717 static void
1718 exec_tex(struct tgsi_exec_machine *mach,
1719 const struct tgsi_full_instruction *inst,
1720 boolean biasLod,
1721 boolean projected)
1722 {
1723 const uint unit = inst->Src[1].Register.Index;
1724 union tgsi_exec_channel r[4];
1725 uint chan_index;
1726 float lodBias;
1727
1728 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1729
1730 switch (inst->Texture.Texture) {
1731 case TGSI_TEXTURE_1D:
1732 case TGSI_TEXTURE_SHADOW1D:
1733
1734 FETCH(&r[0], 0, CHAN_X);
1735
1736 if (projected) {
1737 FETCH(&r[1], 0, CHAN_W);
1738 micro_div( &r[0], &r[0], &r[1] );
1739 }
1740
1741 if (biasLod) {
1742 FETCH(&r[1], 0, CHAN_W);
1743 lodBias = r[2].f[0];
1744 }
1745 else
1746 lodBias = 0.0;
1747
1748 fetch_texel(mach->Samplers[unit],
1749 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1750 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1751 break;
1752
1753 case TGSI_TEXTURE_2D:
1754 case TGSI_TEXTURE_RECT:
1755 case TGSI_TEXTURE_SHADOW2D:
1756 case TGSI_TEXTURE_SHADOWRECT:
1757
1758 FETCH(&r[0], 0, CHAN_X);
1759 FETCH(&r[1], 0, CHAN_Y);
1760 FETCH(&r[2], 0, CHAN_Z);
1761
1762 if (projected) {
1763 FETCH(&r[3], 0, CHAN_W);
1764 micro_div( &r[0], &r[0], &r[3] );
1765 micro_div( &r[1], &r[1], &r[3] );
1766 micro_div( &r[2], &r[2], &r[3] );
1767 }
1768
1769 if (biasLod) {
1770 FETCH(&r[3], 0, CHAN_W);
1771 lodBias = r[3].f[0];
1772 }
1773 else
1774 lodBias = 0.0;
1775
1776 fetch_texel(mach->Samplers[unit],
1777 &r[0], &r[1], &r[2], lodBias, /* inputs */
1778 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1779 break;
1780
1781 case TGSI_TEXTURE_3D:
1782 case TGSI_TEXTURE_CUBE:
1783
1784 FETCH(&r[0], 0, CHAN_X);
1785 FETCH(&r[1], 0, CHAN_Y);
1786 FETCH(&r[2], 0, CHAN_Z);
1787
1788 if (projected) {
1789 FETCH(&r[3], 0, CHAN_W);
1790 micro_div( &r[0], &r[0], &r[3] );
1791 micro_div( &r[1], &r[1], &r[3] );
1792 micro_div( &r[2], &r[2], &r[3] );
1793 }
1794
1795 if (biasLod) {
1796 FETCH(&r[3], 0, CHAN_W);
1797 lodBias = r[3].f[0];
1798 }
1799 else
1800 lodBias = 0.0;
1801
1802 fetch_texel(mach->Samplers[unit],
1803 &r[0], &r[1], &r[2], lodBias,
1804 &r[0], &r[1], &r[2], &r[3]);
1805 break;
1806
1807 default:
1808 assert (0);
1809 }
1810
1811 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1812 STORE( &r[chan_index], 0, chan_index );
1813 }
1814 }
1815
1816 static void
1817 exec_txd(struct tgsi_exec_machine *mach,
1818 const struct tgsi_full_instruction *inst)
1819 {
1820 const uint unit = inst->Src[3].Register.Index;
1821 union tgsi_exec_channel r[4];
1822 uint chan_index;
1823
1824 /*
1825 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1826 */
1827
1828 switch (inst->Texture.Texture) {
1829 case TGSI_TEXTURE_1D:
1830 case TGSI_TEXTURE_SHADOW1D:
1831
1832 FETCH(&r[0], 0, CHAN_X);
1833
1834 fetch_texel(mach->Samplers[unit],
1835 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1836 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1837 break;
1838
1839 case TGSI_TEXTURE_2D:
1840 case TGSI_TEXTURE_RECT:
1841 case TGSI_TEXTURE_SHADOW2D:
1842 case TGSI_TEXTURE_SHADOWRECT:
1843
1844 FETCH(&r[0], 0, CHAN_X);
1845 FETCH(&r[1], 0, CHAN_Y);
1846 FETCH(&r[2], 0, CHAN_Z);
1847
1848 fetch_texel(mach->Samplers[unit],
1849 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1850 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1851 break;
1852
1853 case TGSI_TEXTURE_3D:
1854 case TGSI_TEXTURE_CUBE:
1855
1856 FETCH(&r[0], 0, CHAN_X);
1857 FETCH(&r[1], 0, CHAN_Y);
1858 FETCH(&r[2], 0, CHAN_Z);
1859
1860 fetch_texel(mach->Samplers[unit],
1861 &r[0], &r[1], &r[2], 0.0f,
1862 &r[0], &r[1], &r[2], &r[3]);
1863 break;
1864
1865 default:
1866 assert(0);
1867 }
1868
1869 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1870 STORE(&r[chan_index], 0, chan_index);
1871 }
1872 }
1873
1874
1875 /**
1876 * Evaluate a constant-valued coefficient at the position of the
1877 * current quad.
1878 */
1879 static void
1880 eval_constant_coef(
1881 struct tgsi_exec_machine *mach,
1882 unsigned attrib,
1883 unsigned chan )
1884 {
1885 unsigned i;
1886
1887 for( i = 0; i < QUAD_SIZE; i++ ) {
1888 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1889 }
1890 }
1891
1892 /**
1893 * Evaluate a linear-valued coefficient at the position of the
1894 * current quad.
1895 */
1896 static void
1897 eval_linear_coef(
1898 struct tgsi_exec_machine *mach,
1899 unsigned attrib,
1900 unsigned chan )
1901 {
1902 const float x = mach->QuadPos.xyzw[0].f[0];
1903 const float y = mach->QuadPos.xyzw[1].f[0];
1904 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1905 const float dady = mach->InterpCoefs[attrib].dady[chan];
1906 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1907 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1908 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1909 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1910 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1911 }
1912
1913 /**
1914 * Evaluate a perspective-valued coefficient at the position of the
1915 * current quad.
1916 */
1917 static void
1918 eval_perspective_coef(
1919 struct tgsi_exec_machine *mach,
1920 unsigned attrib,
1921 unsigned chan )
1922 {
1923 const float x = mach->QuadPos.xyzw[0].f[0];
1924 const float y = mach->QuadPos.xyzw[1].f[0];
1925 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1926 const float dady = mach->InterpCoefs[attrib].dady[chan];
1927 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1928 const float *w = mach->QuadPos.xyzw[3].f;
1929 /* divide by W here */
1930 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1931 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1932 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1933 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1934 }
1935
1936
1937 typedef void (* eval_coef_func)(
1938 struct tgsi_exec_machine *mach,
1939 unsigned attrib,
1940 unsigned chan );
1941
1942 static void
1943 exec_declaration(struct tgsi_exec_machine *mach,
1944 const struct tgsi_full_declaration *decl)
1945 {
1946 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1947 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1948 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1949 uint first, last, mask;
1950
1951 first = decl->Range.First;
1952 last = decl->Range.Last;
1953 mask = decl->Declaration.UsageMask;
1954
1955 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1956 assert(decl->Semantic.Index == 0);
1957 assert(first == last);
1958 assert(mask = TGSI_WRITEMASK_XYZW);
1959
1960 mach->Inputs[first] = mach->QuadPos;
1961 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1962 uint i;
1963
1964 assert(decl->Semantic.Index == 0);
1965 assert(first == last);
1966
1967 for (i = 0; i < QUAD_SIZE; i++) {
1968 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1969 }
1970 } else {
1971 eval_coef_func eval;
1972 uint i, j;
1973
1974 switch (decl->Declaration.Interpolate) {
1975 case TGSI_INTERPOLATE_CONSTANT:
1976 eval = eval_constant_coef;
1977 break;
1978
1979 case TGSI_INTERPOLATE_LINEAR:
1980 eval = eval_linear_coef;
1981 break;
1982
1983 case TGSI_INTERPOLATE_PERSPECTIVE:
1984 eval = eval_perspective_coef;
1985 break;
1986
1987 default:
1988 assert(0);
1989 return;
1990 }
1991
1992 for (j = 0; j < NUM_CHANNELS; j++) {
1993 if (mask & (1 << j)) {
1994 for (i = first; i <= last; i++) {
1995 eval(mach, i, j);
1996 }
1997 }
1998 }
1999 }
2000 }
2001 }
2002 }
2003
2004 static void
2005 exec_instruction(
2006 struct tgsi_exec_machine *mach,
2007 const struct tgsi_full_instruction *inst,
2008 int *pc )
2009 {
2010 uint chan_index;
2011 union tgsi_exec_channel r[10];
2012 union tgsi_exec_channel d[8];
2013
2014 (*pc)++;
2015
2016 switch (inst->Instruction.Opcode) {
2017 case TGSI_OPCODE_ARL:
2018 case TGSI_OPCODE_FLR:
2019 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2020 FETCH( &r[0], 0, chan_index );
2021 micro_flr(&d[chan_index], &r[0]);
2022 }
2023 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2024 STORE(&d[chan_index], 0, chan_index);
2025 }
2026 break;
2027
2028 case TGSI_OPCODE_MOV:
2029 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2030 FETCH(&d[chan_index], 0, chan_index);
2031 }
2032 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2033 STORE(&d[chan_index], 0, chan_index);
2034 }
2035 break;
2036
2037 case TGSI_OPCODE_LIT:
2038 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2039 FETCH( &r[0], 0, CHAN_X );
2040 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2041 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2042 }
2043
2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2045 FETCH( &r[1], 0, CHAN_Y );
2046 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2047
2048 FETCH( &r[2], 0, CHAN_W );
2049 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2050 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2051 micro_pow( &r[1], &r[1], &r[2] );
2052 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2053 }
2054
2055 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2056 STORE(&d[CHAN_Y], 0, CHAN_Y);
2057 }
2058 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2059 STORE(&d[CHAN_Z], 0, CHAN_Z);
2060 }
2061 }
2062 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2063 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2064 }
2065 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2066 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2067 }
2068 break;
2069
2070 case TGSI_OPCODE_RCP:
2071 /* TGSI_OPCODE_RECIP */
2072 FETCH( &r[0], 0, CHAN_X );
2073 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2074 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2075 STORE( &r[0], 0, chan_index );
2076 }
2077 break;
2078
2079 case TGSI_OPCODE_RSQ:
2080 /* TGSI_OPCODE_RECIPSQRT */
2081 FETCH( &r[0], 0, CHAN_X );
2082 micro_abs( &r[0], &r[0] );
2083 micro_sqrt( &r[0], &r[0] );
2084 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2085 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2086 STORE( &r[0], 0, chan_index );
2087 }
2088 break;
2089
2090 case TGSI_OPCODE_EXP:
2091 FETCH( &r[0], 0, CHAN_X );
2092 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2093 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2094 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2095 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2096 }
2097 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2098 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2099 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2100 }
2101 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2102 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2103 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2104 }
2105 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2106 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2107 }
2108 break;
2109
2110 case TGSI_OPCODE_LOG:
2111 FETCH( &r[0], 0, CHAN_X );
2112 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2113 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2114 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2115 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2116 STORE( &r[0], 0, CHAN_X );
2117 }
2118 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2119 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2120 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2121 STORE( &r[0], 0, CHAN_Y );
2122 }
2123 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2124 STORE( &r[1], 0, CHAN_Z );
2125 }
2126 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2127 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2128 }
2129 break;
2130
2131 case TGSI_OPCODE_MUL:
2132 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2133 FETCH(&r[0], 0, chan_index);
2134 FETCH(&r[1], 1, chan_index);
2135 micro_mul(&d[chan_index], &r[0], &r[1]);
2136 }
2137 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2138 STORE(&d[chan_index], 0, chan_index);
2139 }
2140 break;
2141
2142 case TGSI_OPCODE_ADD:
2143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2144 FETCH( &r[0], 0, chan_index );
2145 FETCH( &r[1], 1, chan_index );
2146 micro_add(&d[chan_index], &r[0], &r[1]);
2147 }
2148 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2149 STORE(&d[chan_index], 0, chan_index);
2150 }
2151 break;
2152
2153 case TGSI_OPCODE_DP3:
2154 /* TGSI_OPCODE_DOT3 */
2155 FETCH( &r[0], 0, CHAN_X );
2156 FETCH( &r[1], 1, CHAN_X );
2157 micro_mul( &r[0], &r[0], &r[1] );
2158
2159 FETCH( &r[1], 0, CHAN_Y );
2160 FETCH( &r[2], 1, CHAN_Y );
2161 micro_mul( &r[1], &r[1], &r[2] );
2162 micro_add( &r[0], &r[0], &r[1] );
2163
2164 FETCH( &r[1], 0, CHAN_Z );
2165 FETCH( &r[2], 1, CHAN_Z );
2166 micro_mul( &r[1], &r[1], &r[2] );
2167 micro_add( &r[0], &r[0], &r[1] );
2168
2169 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2170 STORE( &r[0], 0, chan_index );
2171 }
2172 break;
2173
2174 case TGSI_OPCODE_DP4:
2175 /* TGSI_OPCODE_DOT4 */
2176 FETCH(&r[0], 0, CHAN_X);
2177 FETCH(&r[1], 1, CHAN_X);
2178
2179 micro_mul( &r[0], &r[0], &r[1] );
2180
2181 FETCH(&r[1], 0, CHAN_Y);
2182 FETCH(&r[2], 1, CHAN_Y);
2183
2184 micro_mul( &r[1], &r[1], &r[2] );
2185 micro_add( &r[0], &r[0], &r[1] );
2186
2187 FETCH(&r[1], 0, CHAN_Z);
2188 FETCH(&r[2], 1, CHAN_Z);
2189
2190 micro_mul( &r[1], &r[1], &r[2] );
2191 micro_add( &r[0], &r[0], &r[1] );
2192
2193 FETCH(&r[1], 0, CHAN_W);
2194 FETCH(&r[2], 1, CHAN_W);
2195
2196 micro_mul( &r[1], &r[1], &r[2] );
2197 micro_add( &r[0], &r[0], &r[1] );
2198
2199 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2200 STORE( &r[0], 0, chan_index );
2201 }
2202 break;
2203
2204 case TGSI_OPCODE_DST:
2205 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2206 FETCH( &r[0], 0, CHAN_Y );
2207 FETCH( &r[1], 1, CHAN_Y);
2208 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2209 }
2210 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2211 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2212 }
2213 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2214 FETCH(&d[CHAN_W], 1, CHAN_W);
2215 }
2216
2217 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2218 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2219 }
2220 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2221 STORE(&d[CHAN_Y], 0, CHAN_Y);
2222 }
2223 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2224 STORE(&d[CHAN_Z], 0, CHAN_Z);
2225 }
2226 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2227 STORE(&d[CHAN_W], 0, CHAN_W);
2228 }
2229 break;
2230
2231 case TGSI_OPCODE_MIN:
2232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2233 FETCH(&r[0], 0, chan_index);
2234 FETCH(&r[1], 1, chan_index);
2235
2236 /* XXX use micro_min()?? */
2237 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2238 }
2239 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2240 STORE(&d[chan_index], 0, chan_index);
2241 }
2242 break;
2243
2244 case TGSI_OPCODE_MAX:
2245 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2246 FETCH(&r[0], 0, chan_index);
2247 FETCH(&r[1], 1, chan_index);
2248
2249 /* XXX use micro_max()?? */
2250 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2251 }
2252 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2253 STORE(&d[chan_index], 0, chan_index);
2254 }
2255 break;
2256
2257 case TGSI_OPCODE_SLT:
2258 /* TGSI_OPCODE_SETLT */
2259 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2260 FETCH( &r[0], 0, chan_index );
2261 FETCH( &r[1], 1, chan_index );
2262 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2263 }
2264 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2265 STORE(&d[chan_index], 0, chan_index);
2266 }
2267 break;
2268
2269 case TGSI_OPCODE_SGE:
2270 /* TGSI_OPCODE_SETGE */
2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272 FETCH( &r[0], 0, chan_index );
2273 FETCH( &r[1], 1, chan_index );
2274 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2275 }
2276 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2277 STORE(&d[chan_index], 0, chan_index);
2278 }
2279 break;
2280
2281 case TGSI_OPCODE_MAD:
2282 /* TGSI_OPCODE_MADD */
2283 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2284 FETCH( &r[0], 0, chan_index );
2285 FETCH( &r[1], 1, chan_index );
2286 micro_mul( &r[0], &r[0], &r[1] );
2287 FETCH( &r[1], 2, chan_index );
2288 micro_add(&d[chan_index], &r[0], &r[1]);
2289 }
2290 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2291 STORE(&d[chan_index], 0, chan_index);
2292 }
2293 break;
2294
2295 case TGSI_OPCODE_SUB:
2296 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2297 FETCH(&r[0], 0, chan_index);
2298 FETCH(&r[1], 1, chan_index);
2299 micro_sub(&d[chan_index], &r[0], &r[1]);
2300 }
2301 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2302 STORE(&d[chan_index], 0, chan_index);
2303 }
2304 break;
2305
2306 case TGSI_OPCODE_LRP:
2307 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2308 FETCH(&r[0], 0, chan_index);
2309 FETCH(&r[1], 1, chan_index);
2310 FETCH(&r[2], 2, chan_index);
2311 micro_sub( &r[1], &r[1], &r[2] );
2312 micro_mul( &r[0], &r[0], &r[1] );
2313 micro_add(&d[chan_index], &r[0], &r[2]);
2314 }
2315 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2316 STORE(&d[chan_index], 0, chan_index);
2317 }
2318 break;
2319
2320 case TGSI_OPCODE_CND:
2321 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2322 FETCH(&r[0], 0, chan_index);
2323 FETCH(&r[1], 1, chan_index);
2324 FETCH(&r[2], 2, chan_index);
2325 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2326 }
2327 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2328 STORE(&d[chan_index], 0, chan_index);
2329 }
2330 break;
2331
2332 case TGSI_OPCODE_DP2A:
2333 FETCH( &r[0], 0, CHAN_X );
2334 FETCH( &r[1], 1, CHAN_X );
2335 micro_mul( &r[0], &r[0], &r[1] );
2336
2337 FETCH( &r[1], 0, CHAN_Y );
2338 FETCH( &r[2], 1, CHAN_Y );
2339 micro_mul( &r[1], &r[1], &r[2] );
2340 micro_add( &r[0], &r[0], &r[1] );
2341
2342 FETCH( &r[2], 2, CHAN_X );
2343 micro_add( &r[0], &r[0], &r[2] );
2344
2345 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2346 STORE( &r[0], 0, chan_index );
2347 }
2348 break;
2349
2350 case TGSI_OPCODE_FRC:
2351 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2352 FETCH( &r[0], 0, chan_index );
2353 micro_frc(&d[chan_index], &r[0]);
2354 }
2355 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2356 STORE(&d[chan_index], 0, chan_index);
2357 }
2358 break;
2359
2360 case TGSI_OPCODE_CLAMP:
2361 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2362 FETCH(&r[0], 0, chan_index);
2363 FETCH(&r[1], 1, chan_index);
2364 micro_max(&r[0], &r[0], &r[1]);
2365 FETCH(&r[1], 2, chan_index);
2366 micro_min(&d[chan_index], &r[0], &r[1]);
2367 }
2368 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2369 STORE(&d[chan_index], 0, chan_index);
2370 }
2371 break;
2372
2373 case TGSI_OPCODE_ROUND:
2374 case TGSI_OPCODE_ARR:
2375 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2376 FETCH( &r[0], 0, chan_index );
2377 micro_rnd(&d[chan_index], &r[0]);
2378 }
2379 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2380 STORE(&d[chan_index], 0, chan_index);
2381 }
2382 break;
2383
2384 case TGSI_OPCODE_EX2:
2385 FETCH(&r[0], 0, CHAN_X);
2386
2387 micro_exp2( &r[0], &r[0] );
2388
2389 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2390 STORE( &r[0], 0, chan_index );
2391 }
2392 break;
2393
2394 case TGSI_OPCODE_LG2:
2395 FETCH( &r[0], 0, CHAN_X );
2396 micro_lg2( &r[0], &r[0] );
2397 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2398 STORE( &r[0], 0, chan_index );
2399 }
2400 break;
2401
2402 case TGSI_OPCODE_POW:
2403 FETCH(&r[0], 0, CHAN_X);
2404 FETCH(&r[1], 1, CHAN_X);
2405
2406 micro_pow( &r[0], &r[0], &r[1] );
2407
2408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2409 STORE( &r[0], 0, chan_index );
2410 }
2411 break;
2412
2413 case TGSI_OPCODE_XPD:
2414 FETCH(&r[0], 0, CHAN_Y);
2415 FETCH(&r[1], 1, CHAN_Z);
2416
2417 micro_mul( &r[2], &r[0], &r[1] );
2418
2419 FETCH(&r[3], 0, CHAN_Z);
2420 FETCH(&r[4], 1, CHAN_Y);
2421
2422 micro_mul( &r[5], &r[3], &r[4] );
2423 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2424
2425 FETCH(&r[2], 1, CHAN_X);
2426
2427 micro_mul( &r[3], &r[3], &r[2] );
2428
2429 FETCH(&r[5], 0, CHAN_X);
2430
2431 micro_mul( &r[1], &r[1], &r[5] );
2432 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2433
2434 micro_mul( &r[5], &r[5], &r[4] );
2435 micro_mul( &r[0], &r[0], &r[2] );
2436 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2437
2438 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2439 STORE(&d[CHAN_X], 0, CHAN_X);
2440 }
2441 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2442 STORE(&d[CHAN_Y], 0, CHAN_Y);
2443 }
2444 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2445 STORE(&d[CHAN_Z], 0, CHAN_Z);
2446 }
2447 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2448 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2449 }
2450 break;
2451
2452 case TGSI_OPCODE_ABS:
2453 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2454 FETCH(&r[0], 0, chan_index);
2455 micro_abs(&d[chan_index], &r[0]);
2456 }
2457 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2458 STORE(&d[chan_index], 0, chan_index);
2459 }
2460 break;
2461
2462 case TGSI_OPCODE_RCC:
2463 FETCH(&r[0], 0, CHAN_X);
2464 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2465 micro_float_clamp(&r[0], &r[0]);
2466 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2467 STORE(&r[0], 0, chan_index);
2468 }
2469 break;
2470
2471 case TGSI_OPCODE_DPH:
2472 FETCH(&r[0], 0, CHAN_X);
2473 FETCH(&r[1], 1, CHAN_X);
2474
2475 micro_mul( &r[0], &r[0], &r[1] );
2476
2477 FETCH(&r[1], 0, CHAN_Y);
2478 FETCH(&r[2], 1, CHAN_Y);
2479
2480 micro_mul( &r[1], &r[1], &r[2] );
2481 micro_add( &r[0], &r[0], &r[1] );
2482
2483 FETCH(&r[1], 0, CHAN_Z);
2484 FETCH(&r[2], 1, CHAN_Z);
2485
2486 micro_mul( &r[1], &r[1], &r[2] );
2487 micro_add( &r[0], &r[0], &r[1] );
2488
2489 FETCH(&r[1], 1, CHAN_W);
2490
2491 micro_add( &r[0], &r[0], &r[1] );
2492
2493 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2494 STORE( &r[0], 0, chan_index );
2495 }
2496 break;
2497
2498 case TGSI_OPCODE_COS:
2499 FETCH(&r[0], 0, CHAN_X);
2500
2501 micro_cos( &r[0], &r[0] );
2502
2503 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2504 STORE( &r[0], 0, chan_index );
2505 }
2506 break;
2507
2508 case TGSI_OPCODE_DDX:
2509 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2510 FETCH( &r[0], 0, chan_index );
2511 micro_ddx(&d[chan_index], &r[0]);
2512 }
2513 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2514 STORE(&d[chan_index], 0, chan_index);
2515 }
2516 break;
2517
2518 case TGSI_OPCODE_DDY:
2519 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2520 FETCH( &r[0], 0, chan_index );
2521 micro_ddy(&d[chan_index], &r[0]);
2522 }
2523 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2524 STORE(&d[chan_index], 0, chan_index);
2525 }
2526 break;
2527
2528 case TGSI_OPCODE_KILP:
2529 exec_kilp (mach, inst);
2530 break;
2531
2532 case TGSI_OPCODE_KIL:
2533 exec_kil (mach, inst);
2534 break;
2535
2536 case TGSI_OPCODE_PK2H:
2537 assert (0);
2538 break;
2539
2540 case TGSI_OPCODE_PK2US:
2541 assert (0);
2542 break;
2543
2544 case TGSI_OPCODE_PK4B:
2545 assert (0);
2546 break;
2547
2548 case TGSI_OPCODE_PK4UB:
2549 assert (0);
2550 break;
2551
2552 case TGSI_OPCODE_RFL:
2553 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2554 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2555 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2556 /* r0 = dp3(src0, src0) */
2557 FETCH(&r[2], 0, CHAN_X);
2558 micro_mul(&r[0], &r[2], &r[2]);
2559 FETCH(&r[4], 0, CHAN_Y);
2560 micro_mul(&r[8], &r[4], &r[4]);
2561 micro_add(&r[0], &r[0], &r[8]);
2562 FETCH(&r[6], 0, CHAN_Z);
2563 micro_mul(&r[8], &r[6], &r[6]);
2564 micro_add(&r[0], &r[0], &r[8]);
2565
2566 /* r1 = dp3(src0, src1) */
2567 FETCH(&r[3], 1, CHAN_X);
2568 micro_mul(&r[1], &r[2], &r[3]);
2569 FETCH(&r[5], 1, CHAN_Y);
2570 micro_mul(&r[8], &r[4], &r[5]);
2571 micro_add(&r[1], &r[1], &r[8]);
2572 FETCH(&r[7], 1, CHAN_Z);
2573 micro_mul(&r[8], &r[6], &r[7]);
2574 micro_add(&r[1], &r[1], &r[8]);
2575
2576 /* r1 = 2 * r1 / r0 */
2577 micro_add(&r[1], &r[1], &r[1]);
2578 micro_div(&r[1], &r[1], &r[0]);
2579
2580 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2581 micro_mul(&r[2], &r[2], &r[1]);
2582 micro_sub(&r[2], &r[2], &r[3]);
2583 STORE(&r[2], 0, CHAN_X);
2584 }
2585 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2586 micro_mul(&r[4], &r[4], &r[1]);
2587 micro_sub(&r[4], &r[4], &r[5]);
2588 STORE(&r[4], 0, CHAN_Y);
2589 }
2590 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2591 micro_mul(&r[6], &r[6], &r[1]);
2592 micro_sub(&r[6], &r[6], &r[7]);
2593 STORE(&r[6], 0, CHAN_Z);
2594 }
2595 }
2596 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2597 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2598 }
2599 break;
2600
2601 case TGSI_OPCODE_SEQ:
2602 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2603 FETCH( &r[0], 0, chan_index );
2604 FETCH( &r[1], 1, chan_index );
2605 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2606 }
2607 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2608 STORE(&d[chan_index], 0, chan_index);
2609 }
2610 break;
2611
2612 case TGSI_OPCODE_SFL:
2613 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2614 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2615 }
2616 break;
2617
2618 case TGSI_OPCODE_SGT:
2619 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2620 FETCH( &r[0], 0, chan_index );
2621 FETCH( &r[1], 1, chan_index );
2622 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2623 }
2624 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2625 STORE(&d[chan_index], 0, chan_index);
2626 }
2627 break;
2628
2629 case TGSI_OPCODE_SIN:
2630 FETCH( &r[0], 0, CHAN_X );
2631 micro_sin( &r[0], &r[0] );
2632 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2633 STORE( &r[0], 0, chan_index );
2634 }
2635 break;
2636
2637 case TGSI_OPCODE_SLE:
2638 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2639 FETCH( &r[0], 0, chan_index );
2640 FETCH( &r[1], 1, chan_index );
2641 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2642 }
2643 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2644 STORE(&d[chan_index], 0, chan_index);
2645 }
2646 break;
2647
2648 case TGSI_OPCODE_SNE:
2649 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2650 FETCH( &r[0], 0, chan_index );
2651 FETCH( &r[1], 1, chan_index );
2652 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2653 }
2654 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2655 STORE(&d[chan_index], 0, chan_index);
2656 }
2657 break;
2658
2659 case TGSI_OPCODE_STR:
2660 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2661 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2662 }
2663 break;
2664
2665 case TGSI_OPCODE_TEX:
2666 /* simple texture lookup */
2667 /* src[0] = texcoord */
2668 /* src[1] = sampler unit */
2669 exec_tex(mach, inst, FALSE, FALSE);
2670 break;
2671
2672 case TGSI_OPCODE_TXB:
2673 /* Texture lookup with lod bias */
2674 /* src[0] = texcoord (src[0].w = LOD bias) */
2675 /* src[1] = sampler unit */
2676 exec_tex(mach, inst, TRUE, FALSE);
2677 break;
2678
2679 case TGSI_OPCODE_TXD:
2680 /* Texture lookup with explict partial derivatives */
2681 /* src[0] = texcoord */
2682 /* src[1] = d[strq]/dx */
2683 /* src[2] = d[strq]/dy */
2684 /* src[3] = sampler unit */
2685 exec_txd(mach, inst);
2686 break;
2687
2688 case TGSI_OPCODE_TXL:
2689 /* Texture lookup with explit LOD */
2690 /* src[0] = texcoord (src[0].w = LOD) */
2691 /* src[1] = sampler unit */
2692 exec_tex(mach, inst, TRUE, FALSE);
2693 break;
2694
2695 case TGSI_OPCODE_TXP:
2696 /* Texture lookup with projection */
2697 /* src[0] = texcoord (src[0].w = projection) */
2698 /* src[1] = sampler unit */
2699 exec_tex(mach, inst, FALSE, TRUE);
2700 break;
2701
2702 case TGSI_OPCODE_UP2H:
2703 assert (0);
2704 break;
2705
2706 case TGSI_OPCODE_UP2US:
2707 assert (0);
2708 break;
2709
2710 case TGSI_OPCODE_UP4B:
2711 assert (0);
2712 break;
2713
2714 case TGSI_OPCODE_UP4UB:
2715 assert (0);
2716 break;
2717
2718 case TGSI_OPCODE_X2D:
2719 FETCH(&r[0], 1, CHAN_X);
2720 FETCH(&r[1], 1, CHAN_Y);
2721 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2722 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2723 FETCH(&r[2], 2, CHAN_X);
2724 micro_mul(&r[2], &r[2], &r[0]);
2725 FETCH(&r[3], 2, CHAN_Y);
2726 micro_mul(&r[3], &r[3], &r[1]);
2727 micro_add(&r[2], &r[2], &r[3]);
2728 FETCH(&r[3], 0, CHAN_X);
2729 micro_add(&d[CHAN_X], &r[2], &r[3]);
2730
2731 }
2732 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2733 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2734 FETCH(&r[2], 2, CHAN_Z);
2735 micro_mul(&r[2], &r[2], &r[0]);
2736 FETCH(&r[3], 2, CHAN_W);
2737 micro_mul(&r[3], &r[3], &r[1]);
2738 micro_add(&r[2], &r[2], &r[3]);
2739 FETCH(&r[3], 0, CHAN_Y);
2740 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2741
2742 }
2743 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2744 STORE(&d[CHAN_X], 0, CHAN_X);
2745 }
2746 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2747 STORE(&d[CHAN_Y], 0, CHAN_Y);
2748 }
2749 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2750 STORE(&d[CHAN_X], 0, CHAN_Z);
2751 }
2752 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2753 STORE(&d[CHAN_Y], 0, CHAN_W);
2754 }
2755 break;
2756
2757 case TGSI_OPCODE_ARA:
2758 assert (0);
2759 break;
2760
2761 case TGSI_OPCODE_BRA:
2762 assert (0);
2763 break;
2764
2765 case TGSI_OPCODE_CAL:
2766 /* skip the call if no execution channels are enabled */
2767 if (mach->ExecMask) {
2768 /* do the call */
2769
2770 /* First, record the depths of the execution stacks.
2771 * This is important for deeply nested/looped return statements.
2772 * We have to unwind the stacks by the correct amount. For a
2773 * real code generator, we could determine the number of entries
2774 * to pop off each stack with simple static analysis and avoid
2775 * implementing this data structure at run time.
2776 */
2777 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2778 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2779 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2780 /* note that PC was already incremented above */
2781 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2782
2783 mach->CallStackTop++;
2784
2785 /* Second, push the Cond, Loop, Cont, Func stacks */
2786 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2787 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2788 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2789 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2790 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2791 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2792 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2793 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2794
2795 /* Finally, jump to the subroutine */
2796 *pc = inst->Label.Label;
2797 }
2798 break;
2799
2800 case TGSI_OPCODE_RET:
2801 mach->FuncMask &= ~mach->ExecMask;
2802 UPDATE_EXEC_MASK(mach);
2803
2804 if (mach->FuncMask == 0x0) {
2805 /* really return now (otherwise, keep executing */
2806
2807 if (mach->CallStackTop == 0) {
2808 /* returning from main() */
2809 *pc = -1;
2810 return;
2811 }
2812
2813 assert(mach->CallStackTop > 0);
2814 mach->CallStackTop--;
2815
2816 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2817 mach->CondMask = mach->CondStack[mach->CondStackTop];
2818
2819 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2820 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2821
2822 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2823 mach->ContMask = mach->ContStack[mach->ContStackTop];
2824
2825 assert(mach->FuncStackTop > 0);
2826 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2827
2828 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2829
2830 UPDATE_EXEC_MASK(mach);
2831 }
2832 break;
2833
2834 case TGSI_OPCODE_SSG:
2835 /* TGSI_OPCODE_SGN */
2836 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2837 FETCH( &r[0], 0, chan_index );
2838 micro_sgn(&d[chan_index], &r[0]);
2839 }
2840 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2841 STORE(&d[chan_index], 0, chan_index);
2842 }
2843 break;
2844
2845 case TGSI_OPCODE_CMP:
2846 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2847 FETCH(&r[0], 0, chan_index);
2848 FETCH(&r[1], 1, chan_index);
2849 FETCH(&r[2], 2, chan_index);
2850 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
2851 }
2852 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2853 STORE(&d[chan_index], 0, chan_index);
2854 }
2855 break;
2856
2857 case TGSI_OPCODE_SCS:
2858 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2859 FETCH( &r[0], 0, CHAN_X );
2860 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2861 micro_cos(&r[1], &r[0]);
2862 STORE(&r[1], 0, CHAN_X);
2863 }
2864 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2865 micro_sin(&r[1], &r[0]);
2866 STORE(&r[1], 0, CHAN_Y);
2867 }
2868 }
2869 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2870 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2871 }
2872 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2873 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2874 }
2875 break;
2876
2877 case TGSI_OPCODE_NRM:
2878 /* 3-component vector normalize */
2879 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2880 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2881 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2882 /* r3 = sqrt(dp3(src0, src0)) */
2883 FETCH(&r[0], 0, CHAN_X);
2884 micro_mul(&r[3], &r[0], &r[0]);
2885 FETCH(&r[1], 0, CHAN_Y);
2886 micro_mul(&r[4], &r[1], &r[1]);
2887 micro_add(&r[3], &r[3], &r[4]);
2888 FETCH(&r[2], 0, CHAN_Z);
2889 micro_mul(&r[4], &r[2], &r[2]);
2890 micro_add(&r[3], &r[3], &r[4]);
2891 micro_sqrt(&r[3], &r[3]);
2892
2893 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2894 micro_div(&r[0], &r[0], &r[3]);
2895 STORE(&r[0], 0, CHAN_X);
2896 }
2897 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2898 micro_div(&r[1], &r[1], &r[3]);
2899 STORE(&r[1], 0, CHAN_Y);
2900 }
2901 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2902 micro_div(&r[2], &r[2], &r[3]);
2903 STORE(&r[2], 0, CHAN_Z);
2904 }
2905 }
2906 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2907 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2908 }
2909 break;
2910
2911 case TGSI_OPCODE_NRM4:
2912 /* 4-component vector normalize */
2913 {
2914 union tgsi_exec_channel tmp, dot;
2915
2916 /* tmp = dp4(src0, src0): */
2917 FETCH( &r[0], 0, CHAN_X );
2918 micro_mul( &tmp, &r[0], &r[0] );
2919
2920 FETCH( &r[1], 0, CHAN_Y );
2921 micro_mul( &dot, &r[1], &r[1] );
2922 micro_add( &tmp, &tmp, &dot );
2923
2924 FETCH( &r[2], 0, CHAN_Z );
2925 micro_mul( &dot, &r[2], &r[2] );
2926 micro_add( &tmp, &tmp, &dot );
2927
2928 FETCH( &r[3], 0, CHAN_W );
2929 micro_mul( &dot, &r[3], &r[3] );
2930 micro_add( &tmp, &tmp, &dot );
2931
2932 /* tmp = 1 / sqrt(tmp) */
2933 micro_sqrt( &tmp, &tmp );
2934 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2935
2936 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2937 /* chan = chan * tmp */
2938 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2939 STORE( &r[chan_index], 0, chan_index );
2940 }
2941 }
2942 break;
2943
2944 case TGSI_OPCODE_DIV:
2945 assert( 0 );
2946 break;
2947
2948 case TGSI_OPCODE_DP2:
2949 FETCH( &r[0], 0, CHAN_X );
2950 FETCH( &r[1], 1, CHAN_X );
2951 micro_mul( &r[0], &r[0], &r[1] );
2952
2953 FETCH( &r[1], 0, CHAN_Y );
2954 FETCH( &r[2], 1, CHAN_Y );
2955 micro_mul( &r[1], &r[1], &r[2] );
2956 micro_add( &r[0], &r[0], &r[1] );
2957
2958 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2959 STORE( &r[0], 0, chan_index );
2960 }
2961 break;
2962
2963 case TGSI_OPCODE_IF:
2964 /* push CondMask */
2965 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2966 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2967 FETCH( &r[0], 0, CHAN_X );
2968 /* update CondMask */
2969 if( ! r[0].u[0] ) {
2970 mach->CondMask &= ~0x1;
2971 }
2972 if( ! r[0].u[1] ) {
2973 mach->CondMask &= ~0x2;
2974 }
2975 if( ! r[0].u[2] ) {
2976 mach->CondMask &= ~0x4;
2977 }
2978 if( ! r[0].u[3] ) {
2979 mach->CondMask &= ~0x8;
2980 }
2981 UPDATE_EXEC_MASK(mach);
2982 /* Todo: If CondMask==0, jump to ELSE */
2983 break;
2984
2985 case TGSI_OPCODE_ELSE:
2986 /* invert CondMask wrt previous mask */
2987 {
2988 uint prevMask;
2989 assert(mach->CondStackTop > 0);
2990 prevMask = mach->CondStack[mach->CondStackTop - 1];
2991 mach->CondMask = ~mach->CondMask & prevMask;
2992 UPDATE_EXEC_MASK(mach);
2993 /* Todo: If CondMask==0, jump to ENDIF */
2994 }
2995 break;
2996
2997 case TGSI_OPCODE_ENDIF:
2998 /* pop CondMask */
2999 assert(mach->CondStackTop > 0);
3000 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3001 UPDATE_EXEC_MASK(mach);
3002 break;
3003
3004 case TGSI_OPCODE_END:
3005 /* halt execution */
3006 *pc = -1;
3007 break;
3008
3009 case TGSI_OPCODE_REP:
3010 assert (0);
3011 break;
3012
3013 case TGSI_OPCODE_ENDREP:
3014 assert (0);
3015 break;
3016
3017 case TGSI_OPCODE_PUSHA:
3018 assert (0);
3019 break;
3020
3021 case TGSI_OPCODE_POPA:
3022 assert (0);
3023 break;
3024
3025 case TGSI_OPCODE_CEIL:
3026 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3027 FETCH( &r[0], 0, chan_index );
3028 micro_ceil(&d[chan_index], &r[0]);
3029 }
3030 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3031 STORE(&d[chan_index], 0, chan_index);
3032 }
3033 break;
3034
3035 case TGSI_OPCODE_I2F:
3036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3037 FETCH( &r[0], 0, chan_index );
3038 micro_i2f(&d[chan_index], &r[0]);
3039 }
3040 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3041 STORE(&d[chan_index], 0, chan_index);
3042 }
3043 break;
3044
3045 case TGSI_OPCODE_NOT:
3046 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3047 FETCH( &r[0], 0, chan_index );
3048 micro_not(&d[chan_index], &r[0]);
3049 }
3050 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3051 STORE(&d[chan_index], 0, chan_index);
3052 }
3053 break;
3054
3055 case TGSI_OPCODE_TRUNC:
3056 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3057 FETCH( &r[0], 0, chan_index );
3058 micro_trunc(&d[chan_index], &r[0]);
3059 }
3060 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3061 STORE(&d[chan_index], 0, chan_index);
3062 }
3063 break;
3064
3065 case TGSI_OPCODE_SHL:
3066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3067 FETCH( &r[0], 0, chan_index );
3068 FETCH( &r[1], 1, chan_index );
3069 micro_shl(&d[chan_index], &r[0], &r[1]);
3070 }
3071 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3072 STORE(&d[chan_index], 0, chan_index);
3073 }
3074 break;
3075
3076 case TGSI_OPCODE_SHR:
3077 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3078 FETCH( &r[0], 0, chan_index );
3079 FETCH( &r[1], 1, chan_index );
3080 micro_ishr(&d[chan_index], &r[0], &r[1]);
3081 }
3082 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3083 STORE(&d[chan_index], 0, chan_index);
3084 }
3085 break;
3086
3087 case TGSI_OPCODE_AND:
3088 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3089 FETCH( &r[0], 0, chan_index );
3090 FETCH( &r[1], 1, chan_index );
3091 micro_and(&d[chan_index], &r[0], &r[1]);
3092 }
3093 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3094 STORE(&d[chan_index], 0, chan_index);
3095 }
3096 break;
3097
3098 case TGSI_OPCODE_OR:
3099 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3100 FETCH( &r[0], 0, chan_index );
3101 FETCH( &r[1], 1, chan_index );
3102 micro_or(&d[chan_index], &r[0], &r[1]);
3103 }
3104 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3105 STORE(&d[chan_index], 0, chan_index);
3106 }
3107 break;
3108
3109 case TGSI_OPCODE_MOD:
3110 assert (0);
3111 break;
3112
3113 case TGSI_OPCODE_XOR:
3114 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3115 FETCH( &r[0], 0, chan_index );
3116 FETCH( &r[1], 1, chan_index );
3117 micro_xor(&d[chan_index], &r[0], &r[1]);
3118 }
3119 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3120 STORE(&d[chan_index], 0, chan_index);
3121 }
3122 break;
3123
3124 case TGSI_OPCODE_SAD:
3125 assert (0);
3126 break;
3127
3128 case TGSI_OPCODE_TXF:
3129 assert (0);
3130 break;
3131
3132 case TGSI_OPCODE_TXQ:
3133 assert (0);
3134 break;
3135
3136 case TGSI_OPCODE_EMIT:
3137 emit_vertex(mach);
3138 break;
3139
3140 case TGSI_OPCODE_ENDPRIM:
3141 emit_primitive(mach);
3142 break;
3143
3144 case TGSI_OPCODE_BGNFOR:
3145 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3146 for (chan_index = 0; chan_index < 3; chan_index++) {
3147 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3148 }
3149 ++mach->LoopCounterStackTop;
3150 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3151 /* update LoopMask */
3152 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3153 mach->LoopMask &= ~0x1;
3154 }
3155 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3156 mach->LoopMask &= ~0x2;
3157 }
3158 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3159 mach->LoopMask &= ~0x4;
3160 }
3161 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3162 mach->LoopMask &= ~0x8;
3163 }
3164 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3165 UPDATE_EXEC_MASK(mach);
3166 /* fall-through (for now) */
3167 case TGSI_OPCODE_BGNLOOP:
3168 /* push LoopMask and ContMasks */
3169 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3170 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3171 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3172 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3173 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3174 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3175 break;
3176
3177 case TGSI_OPCODE_ENDFOR:
3178 assert(mach->LoopCounterStackTop > 0);
3179 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3180 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3181 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3182 /* update LoopMask */
3183 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3184 mach->LoopMask &= ~0x1;
3185 }
3186 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3187 mach->LoopMask &= ~0x2;
3188 }
3189 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3190 mach->LoopMask &= ~0x4;
3191 }
3192 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3193 mach->LoopMask &= ~0x8;
3194 }
3195 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3196 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3197 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3198 assert(mach->LoopLabelStackTop > 0);
3199 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3200 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3201 /* Restore ContMask, but don't pop */
3202 assert(mach->ContStackTop > 0);
3203 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3204 UPDATE_EXEC_MASK(mach);
3205 if (mach->ExecMask) {
3206 /* repeat loop: jump to instruction just past BGNLOOP */
3207 assert(mach->LoopLabelStackTop > 0);
3208 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3209 }
3210 else {
3211 /* exit loop: pop LoopMask */
3212 assert(mach->LoopStackTop > 0);
3213 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3214 /* pop ContMask */
3215 assert(mach->ContStackTop > 0);
3216 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3217 assert(mach->LoopLabelStackTop > 0);
3218 --mach->LoopLabelStackTop;
3219 assert(mach->LoopCounterStackTop > 0);
3220 --mach->LoopCounterStackTop;
3221 }
3222 UPDATE_EXEC_MASK(mach);
3223 break;
3224
3225 case TGSI_OPCODE_ENDLOOP:
3226 /* Restore ContMask, but don't pop */
3227 assert(mach->ContStackTop > 0);
3228 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3229 UPDATE_EXEC_MASK(mach);
3230 if (mach->ExecMask) {
3231 /* repeat loop: jump to instruction just past BGNLOOP */
3232 assert(mach->LoopLabelStackTop > 0);
3233 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3234 }
3235 else {
3236 /* exit loop: pop LoopMask */
3237 assert(mach->LoopStackTop > 0);
3238 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3239 /* pop ContMask */
3240 assert(mach->ContStackTop > 0);
3241 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3242 assert(mach->LoopLabelStackTop > 0);
3243 --mach->LoopLabelStackTop;
3244 }
3245 UPDATE_EXEC_MASK(mach);
3246 break;
3247
3248 case TGSI_OPCODE_BRK:
3249 /* turn off loop channels for each enabled exec channel */
3250 mach->LoopMask &= ~mach->ExecMask;
3251 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3252 UPDATE_EXEC_MASK(mach);
3253 break;
3254
3255 case TGSI_OPCODE_CONT:
3256 /* turn off cont channels for each enabled exec channel */
3257 mach->ContMask &= ~mach->ExecMask;
3258 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3259 UPDATE_EXEC_MASK(mach);
3260 break;
3261
3262 case TGSI_OPCODE_BGNSUB:
3263 /* no-op */
3264 break;
3265
3266 case TGSI_OPCODE_ENDSUB:
3267 /*
3268 * XXX: This really should be a no-op. We should never reach this opcode.
3269 */
3270
3271 assert(mach->CallStackTop > 0);
3272 mach->CallStackTop--;
3273
3274 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3275 mach->CondMask = mach->CondStack[mach->CondStackTop];
3276
3277 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3278 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3279
3280 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3281 mach->ContMask = mach->ContStack[mach->ContStackTop];
3282
3283 assert(mach->FuncStackTop > 0);
3284 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3285
3286 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3287
3288 UPDATE_EXEC_MASK(mach);
3289 break;
3290
3291 case TGSI_OPCODE_NOP:
3292 break;
3293
3294 case TGSI_OPCODE_BREAKC:
3295 FETCH(&r[0], 0, CHAN_X);
3296 /* update CondMask */
3297 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3298 mach->LoopMask &= ~0x1;
3299 }
3300 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3301 mach->LoopMask &= ~0x2;
3302 }
3303 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3304 mach->LoopMask &= ~0x4;
3305 }
3306 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3307 mach->LoopMask &= ~0x8;
3308 }
3309 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3310 UPDATE_EXEC_MASK(mach);
3311 break;
3312
3313 default:
3314 assert( 0 );
3315 }
3316 }
3317
3318 #define DEBUG_EXECUTION 0
3319
3320
3321 /**
3322 * Run TGSI interpreter.
3323 * \return bitmask of "alive" quad components
3324 */
3325 uint
3326 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3327 {
3328 uint i;
3329 int pc = 0;
3330
3331 mach->CondMask = 0xf;
3332 mach->LoopMask = 0xf;
3333 mach->ContMask = 0xf;
3334 mach->FuncMask = 0xf;
3335 mach->ExecMask = 0xf;
3336
3337 assert(mach->CondStackTop == 0);
3338 assert(mach->LoopStackTop == 0);
3339 assert(mach->ContStackTop == 0);
3340 assert(mach->CallStackTop == 0);
3341
3342 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3343 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3344
3345 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3346 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3347 mach->Primitives[0] = 0;
3348 }
3349
3350 for (i = 0; i < QUAD_SIZE; i++) {
3351 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3352 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3353 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3354 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3355 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3356 }
3357
3358 /* execute declarations (interpolants) */
3359 for (i = 0; i < mach->NumDeclarations; i++) {
3360 exec_declaration( mach, mach->Declarations+i );
3361 }
3362
3363 {
3364 #if DEBUG_EXECUTION
3365 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3366 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3367 uint inst = 1;
3368
3369 memcpy(temps, mach->Temps, sizeof(temps));
3370 memcpy(outputs, mach->Outputs, sizeof(outputs));
3371 #endif
3372
3373 /* execute instructions, until pc is set to -1 */
3374 while (pc != -1) {
3375
3376 #if DEBUG_EXECUTION
3377 uint i;
3378
3379 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3380 #endif
3381
3382 assert(pc < (int) mach->NumInstructions);
3383 exec_instruction(mach, mach->Instructions + pc, &pc);
3384
3385 #if DEBUG_EXECUTION
3386 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3387 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3388 uint j;
3389
3390 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3391 debug_printf("TEMP[%2u] = ", i);
3392 for (j = 0; j < 4; j++) {
3393 if (j > 0) {
3394 debug_printf(" ");
3395 }
3396 debug_printf("(%6f, %6f, %6f, %6f)\n",
3397 temps[i].xyzw[0].f[j],
3398 temps[i].xyzw[1].f[j],
3399 temps[i].xyzw[2].f[j],
3400 temps[i].xyzw[3].f[j]);
3401 }
3402 }
3403 }
3404 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3405 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3406 uint j;
3407
3408 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3409 debug_printf("OUT[%2u] = ", i);
3410 for (j = 0; j < 4; j++) {
3411 if (j > 0) {
3412 debug_printf(" ");
3413 }
3414 debug_printf("{%6f, %6f, %6f, %6f}\n",
3415 outputs[i].xyzw[0].f[j],
3416 outputs[i].xyzw[1].f[j],
3417 outputs[i].xyzw[2].f[j],
3418 outputs[i].xyzw[3].f[j]);
3419 }
3420 }
3421 }
3422 #endif
3423 }
3424 }
3425
3426 #if 0
3427 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3428 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3429 /*
3430 * Scale back depth component.
3431 */
3432 for (i = 0; i < 4; i++)
3433 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3434 }
3435 #endif
3436
3437 assert(mach->CondStackTop == 0);
3438 assert(mach->LoopStackTop == 0);
3439 assert(mach->ContStackTop == 0);
3440 assert(mach->CallStackTop == 0);
3441
3442 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3443 }