tgsi: Provide ultimate solution for SOA dependencies in exec.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
62
63 #define FAST_MATH 1
64
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
69
70 #define CHAN_X 0
71 #define CHAN_Y 1
72 #define CHAN_Z 2
73 #define CHAN_W 3
74
75 /*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
108
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
111
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
114
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
118
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
122
123
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
127
128
129 static const union tgsi_exec_channel ZeroVec =
130 { { 0.0, 0.0, 0.0, 0.0 } };
131
132
133 #ifdef DEBUG
134 static void
135 check_inf_or_nan(const union tgsi_exec_channel *chan)
136 {
137 assert(!util_is_inf_or_nan(chan->f[0]));
138 assert(!util_is_inf_or_nan(chan->f[1]));
139 assert(!util_is_inf_or_nan(chan->f[2]));
140 assert(!util_is_inf_or_nan(chan->f[3]));
141 }
142 #endif
143
144
145 #ifdef DEBUG
146 static void
147 print_chan(const char *msg, const union tgsi_exec_channel *chan)
148 {
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
151 }
152 #endif
153
154
155 #ifdef DEBUG
156 static void
157 print_temp(const struct tgsi_exec_machine *mach, uint index)
158 {
159 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
160 int i;
161 debug_printf("Temp[%u] =\n", index);
162 for (i = 0; i < 4; i++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
164 "XYZW"[i],
165 tmp->xyzw[i].f[0],
166 tmp->xyzw[i].f[1],
167 tmp->xyzw[i].f[2],
168 tmp->xyzw[i].f[3]);
169 }
170 }
171 #endif
172
173
174 /**
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
177 * Example:
178 * MOV T, T.yxwz;
179 * This would expand into:
180 * MOV t0, t1;
181 * MOV t1, t0;
182 * MOV t2, t3;
183 * MOV t3, t2;
184 * The second instruction will have the wrong value for t0 if executed as-is.
185 */
186 boolean
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
188 {
189 uint i, chan;
190
191 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
192 if (writemask == TGSI_WRITEMASK_X ||
193 writemask == TGSI_WRITEMASK_Y ||
194 writemask == TGSI_WRITEMASK_Z ||
195 writemask == TGSI_WRITEMASK_W ||
196 writemask == TGSI_WRITEMASK_NONE) {
197 /* no chance of data dependency */
198 return FALSE;
199 }
200
201 /* loop over src regs */
202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
203 if ((inst->FullSrcRegisters[i].SrcRegister.File ==
204 inst->FullDstRegisters[0].DstRegister.File) &&
205 (inst->FullSrcRegisters[i].SrcRegister.Index ==
206 inst->FullDstRegisters[0].DstRegister.Index)) {
207 /* loop over dest channels */
208 uint channelsWritten = 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
212 if (channelsWritten & (1 << swizzle)) {
213 return TRUE;
214 }
215
216 channelsWritten |= (1 << chan);
217 }
218 }
219 }
220 return FALSE;
221 }
222
223
224 /**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229 void
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine *mach,
232 const struct tgsi_token *tokens,
233 uint numSamplers,
234 struct tgsi_sampler **samplers)
235 {
236 uint k;
237 struct tgsi_parse_context parse;
238 struct tgsi_exec_labels *labels = &mach->Labels;
239 struct tgsi_full_instruction *instructions;
240 struct tgsi_full_declaration *declarations;
241 uint maxInstructions = 10, numInstructions = 0;
242 uint maxDeclarations = 10, numDeclarations = 0;
243 uint instno = 0;
244
245 #if 0
246 tgsi_dump(tokens, 0);
247 #endif
248
249 util_init_math();
250
251 mach->Tokens = tokens;
252 mach->Samplers = samplers;
253
254 k = tgsi_parse_init (&parse, mach->Tokens);
255 if (k != TGSI_PARSE_OK) {
256 debug_printf( "Problem parsing!\n" );
257 return;
258 }
259
260 mach->Processor = parse.FullHeader.Processor.Processor;
261 mach->ImmLimit = 0;
262 labels->count = 0;
263
264 declarations = (struct tgsi_full_declaration *)
265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267 if (!declarations) {
268 return;
269 }
270
271 instructions = (struct tgsi_full_instruction *)
272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274 if (!instructions) {
275 FREE( declarations );
276 return;
277 }
278
279 while( !tgsi_parse_end_of_tokens( &parse ) ) {
280 uint pointer = parse.Position;
281 uint i;
282
283 tgsi_parse_token( &parse );
284 switch( parse.FullToken.Token.Type ) {
285 case TGSI_TOKEN_TYPE_DECLARATION:
286 /* save expanded declaration */
287 if (numDeclarations == maxDeclarations) {
288 declarations = REALLOC(declarations,
289 maxDeclarations
290 * sizeof(struct tgsi_full_declaration),
291 (maxDeclarations + 10)
292 * sizeof(struct tgsi_full_declaration));
293 maxDeclarations += 10;
294 }
295 memcpy(declarations + numDeclarations,
296 &parse.FullToken.FullDeclaration,
297 sizeof(declarations[0]));
298 numDeclarations++;
299 break;
300
301 case TGSI_TOKEN_TYPE_IMMEDIATE:
302 {
303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
304 assert( size <= 4 );
305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
306
307 for( i = 0; i < size; i++ ) {
308 mach->Imms[mach->ImmLimit][i] =
309 parse.FullToken.FullImmediate.u[i].Float;
310 }
311 mach->ImmLimit += 1;
312 }
313 break;
314
315 case TGSI_TOKEN_TYPE_INSTRUCTION:
316 assert( labels->count < MAX_LABELS );
317
318 labels->labels[labels->count][0] = instno;
319 labels->labels[labels->count][1] = pointer;
320 labels->count++;
321
322 /* save expanded instruction */
323 if (numInstructions == maxInstructions) {
324 instructions = REALLOC(instructions,
325 maxInstructions
326 * sizeof(struct tgsi_full_instruction),
327 (maxInstructions + 10)
328 * sizeof(struct tgsi_full_instruction));
329 maxInstructions += 10;
330 }
331
332 memcpy(instructions + numInstructions,
333 &parse.FullToken.FullInstruction,
334 sizeof(instructions[0]));
335
336 numInstructions++;
337 break;
338
339 default:
340 assert( 0 );
341 }
342 }
343 tgsi_parse_free (&parse);
344
345 if (mach->Declarations) {
346 FREE( mach->Declarations );
347 }
348 mach->Declarations = declarations;
349 mach->NumDeclarations = numDeclarations;
350
351 if (mach->Instructions) {
352 FREE( mach->Instructions );
353 }
354 mach->Instructions = instructions;
355 mach->NumInstructions = numInstructions;
356 }
357
358
359 struct tgsi_exec_machine *
360 tgsi_exec_machine_create( void )
361 {
362 struct tgsi_exec_machine *mach;
363 uint i;
364
365 mach = align_malloc( sizeof *mach, 16 );
366 if (!mach)
367 goto fail;
368
369 memset(mach, 0, sizeof(*mach));
370
371 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
372
373 /* Setup constants. */
374 for( i = 0; i < 4; i++ ) {
375 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
376 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
377 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
378 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
379 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
380 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
381 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
382 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
383 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
384 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
385 }
386
387 #ifdef DEBUG
388 /* silence warnings */
389 (void) print_chan;
390 (void) print_temp;
391 #endif
392
393 return mach;
394
395 fail:
396 align_free(mach);
397 return NULL;
398 }
399
400
401 void
402 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
403 {
404 if (mach) {
405 FREE(mach->Instructions);
406 FREE(mach->Declarations);
407 }
408
409 align_free(mach);
410 }
411
412
413 static void
414 micro_abs(
415 union tgsi_exec_channel *dst,
416 const union tgsi_exec_channel *src )
417 {
418 dst->f[0] = fabsf( src->f[0] );
419 dst->f[1] = fabsf( src->f[1] );
420 dst->f[2] = fabsf( src->f[2] );
421 dst->f[3] = fabsf( src->f[3] );
422 }
423
424 static void
425 micro_add(
426 union tgsi_exec_channel *dst,
427 const union tgsi_exec_channel *src0,
428 const union tgsi_exec_channel *src1 )
429 {
430 dst->f[0] = src0->f[0] + src1->f[0];
431 dst->f[1] = src0->f[1] + src1->f[1];
432 dst->f[2] = src0->f[2] + src1->f[2];
433 dst->f[3] = src0->f[3] + src1->f[3];
434 }
435
436 #if 0
437 static void
438 micro_iadd(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1 )
442 {
443 dst->i[0] = src0->i[0] + src1->i[0];
444 dst->i[1] = src0->i[1] + src1->i[1];
445 dst->i[2] = src0->i[2] + src1->i[2];
446 dst->i[3] = src0->i[3] + src1->i[3];
447 }
448 #endif
449
450 static void
451 micro_and(
452 union tgsi_exec_channel *dst,
453 const union tgsi_exec_channel *src0,
454 const union tgsi_exec_channel *src1 )
455 {
456 dst->u[0] = src0->u[0] & src1->u[0];
457 dst->u[1] = src0->u[1] & src1->u[1];
458 dst->u[2] = src0->u[2] & src1->u[2];
459 dst->u[3] = src0->u[3] & src1->u[3];
460 }
461
462 static void
463 micro_ceil(
464 union tgsi_exec_channel *dst,
465 const union tgsi_exec_channel *src )
466 {
467 dst->f[0] = ceilf( src->f[0] );
468 dst->f[1] = ceilf( src->f[1] );
469 dst->f[2] = ceilf( src->f[2] );
470 dst->f[3] = ceilf( src->f[3] );
471 }
472
473 static void
474 micro_cos(
475 union tgsi_exec_channel *dst,
476 const union tgsi_exec_channel *src )
477 {
478 dst->f[0] = cosf( src->f[0] );
479 dst->f[1] = cosf( src->f[1] );
480 dst->f[2] = cosf( src->f[2] );
481 dst->f[3] = cosf( src->f[3] );
482 }
483
484 static void
485 micro_ddx(
486 union tgsi_exec_channel *dst,
487 const union tgsi_exec_channel *src )
488 {
489 dst->f[0] =
490 dst->f[1] =
491 dst->f[2] =
492 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
493 }
494
495 static void
496 micro_ddy(
497 union tgsi_exec_channel *dst,
498 const union tgsi_exec_channel *src )
499 {
500 dst->f[0] =
501 dst->f[1] =
502 dst->f[2] =
503 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
504 }
505
506 static void
507 micro_div(
508 union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src0,
510 const union tgsi_exec_channel *src1 )
511 {
512 if (src1->f[0] != 0) {
513 dst->f[0] = src0->f[0] / src1->f[0];
514 }
515 if (src1->f[1] != 0) {
516 dst->f[1] = src0->f[1] / src1->f[1];
517 }
518 if (src1->f[2] != 0) {
519 dst->f[2] = src0->f[2] / src1->f[2];
520 }
521 if (src1->f[3] != 0) {
522 dst->f[3] = src0->f[3] / src1->f[3];
523 }
524 }
525
526 #if 0
527 static void
528 micro_udiv(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src0,
531 const union tgsi_exec_channel *src1 )
532 {
533 dst->u[0] = src0->u[0] / src1->u[0];
534 dst->u[1] = src0->u[1] / src1->u[1];
535 dst->u[2] = src0->u[2] / src1->u[2];
536 dst->u[3] = src0->u[3] / src1->u[3];
537 }
538 #endif
539
540 static void
541 micro_eq(
542 union tgsi_exec_channel *dst,
543 const union tgsi_exec_channel *src0,
544 const union tgsi_exec_channel *src1,
545 const union tgsi_exec_channel *src2,
546 const union tgsi_exec_channel *src3 )
547 {
548 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
549 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
550 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
551 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
552 }
553
554 #if 0
555 static void
556 micro_ieq(
557 union tgsi_exec_channel *dst,
558 const union tgsi_exec_channel *src0,
559 const union tgsi_exec_channel *src1,
560 const union tgsi_exec_channel *src2,
561 const union tgsi_exec_channel *src3 )
562 {
563 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
564 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
565 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
566 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
567 }
568 #endif
569
570 static void
571 micro_exp2(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src)
574 {
575 #if FAST_MATH
576 dst->f[0] = util_fast_exp2( src->f[0] );
577 dst->f[1] = util_fast_exp2( src->f[1] );
578 dst->f[2] = util_fast_exp2( src->f[2] );
579 dst->f[3] = util_fast_exp2( src->f[3] );
580 #else
581 dst->f[0] = powf( 2.0f, src->f[0] );
582 dst->f[1] = powf( 2.0f, src->f[1] );
583 dst->f[2] = powf( 2.0f, src->f[2] );
584 dst->f[3] = powf( 2.0f, src->f[3] );
585 #endif
586 }
587
588 #if 0
589 static void
590 micro_f2ut(
591 union tgsi_exec_channel *dst,
592 const union tgsi_exec_channel *src )
593 {
594 dst->u[0] = (uint) src->f[0];
595 dst->u[1] = (uint) src->f[1];
596 dst->u[2] = (uint) src->f[2];
597 dst->u[3] = (uint) src->f[3];
598 }
599 #endif
600
601 static void
602 micro_float_clamp(union tgsi_exec_channel *dst,
603 const union tgsi_exec_channel *src)
604 {
605 uint i;
606
607 for (i = 0; i < 4; i++) {
608 if (src->f[i] > 0.0f) {
609 if (src->f[i] > 1.884467e+019f)
610 dst->f[i] = 1.884467e+019f;
611 else if (src->f[i] < 5.42101e-020f)
612 dst->f[i] = 5.42101e-020f;
613 else
614 dst->f[i] = src->f[i];
615 }
616 else {
617 if (src->f[i] < -1.884467e+019f)
618 dst->f[i] = -1.884467e+019f;
619 else if (src->f[i] > -5.42101e-020f)
620 dst->f[i] = -5.42101e-020f;
621 else
622 dst->f[i] = src->f[i];
623 }
624 }
625 }
626
627 static void
628 micro_flr(
629 union tgsi_exec_channel *dst,
630 const union tgsi_exec_channel *src )
631 {
632 dst->f[0] = floorf( src->f[0] );
633 dst->f[1] = floorf( src->f[1] );
634 dst->f[2] = floorf( src->f[2] );
635 dst->f[3] = floorf( src->f[3] );
636 }
637
638 static void
639 micro_frc(
640 union tgsi_exec_channel *dst,
641 const union tgsi_exec_channel *src )
642 {
643 dst->f[0] = src->f[0] - floorf( src->f[0] );
644 dst->f[1] = src->f[1] - floorf( src->f[1] );
645 dst->f[2] = src->f[2] - floorf( src->f[2] );
646 dst->f[3] = src->f[3] - floorf( src->f[3] );
647 }
648
649 static void
650 micro_i2f(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src )
653 {
654 dst->f[0] = (float) src->i[0];
655 dst->f[1] = (float) src->i[1];
656 dst->f[2] = (float) src->i[2];
657 dst->f[3] = (float) src->i[3];
658 }
659
660 static void
661 micro_lg2(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src )
664 {
665 #if FAST_MATH
666 dst->f[0] = util_fast_log2( src->f[0] );
667 dst->f[1] = util_fast_log2( src->f[1] );
668 dst->f[2] = util_fast_log2( src->f[2] );
669 dst->f[3] = util_fast_log2( src->f[3] );
670 #else
671 dst->f[0] = logf( src->f[0] ) * 1.442695f;
672 dst->f[1] = logf( src->f[1] ) * 1.442695f;
673 dst->f[2] = logf( src->f[2] ) * 1.442695f;
674 dst->f[3] = logf( src->f[3] ) * 1.442695f;
675 #endif
676 }
677
678 static void
679 micro_le(
680 union tgsi_exec_channel *dst,
681 const union tgsi_exec_channel *src0,
682 const union tgsi_exec_channel *src1,
683 const union tgsi_exec_channel *src2,
684 const union tgsi_exec_channel *src3 )
685 {
686 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
687 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
688 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
689 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
690 }
691
692 static void
693 micro_lt(
694 union tgsi_exec_channel *dst,
695 const union tgsi_exec_channel *src0,
696 const union tgsi_exec_channel *src1,
697 const union tgsi_exec_channel *src2,
698 const union tgsi_exec_channel *src3 )
699 {
700 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
701 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
702 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
703 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
704 }
705
706 #if 0
707 static void
708 micro_ilt(
709 union tgsi_exec_channel *dst,
710 const union tgsi_exec_channel *src0,
711 const union tgsi_exec_channel *src1,
712 const union tgsi_exec_channel *src2,
713 const union tgsi_exec_channel *src3 )
714 {
715 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
716 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
717 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
718 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
719 }
720 #endif
721
722 #if 0
723 static void
724 micro_ult(
725 union tgsi_exec_channel *dst,
726 const union tgsi_exec_channel *src0,
727 const union tgsi_exec_channel *src1,
728 const union tgsi_exec_channel *src2,
729 const union tgsi_exec_channel *src3 )
730 {
731 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
732 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
733 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
734 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
735 }
736 #endif
737
738 static void
739 micro_max(
740 union tgsi_exec_channel *dst,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1 )
743 {
744 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
745 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
746 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
747 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
748 }
749
750 #if 0
751 static void
752 micro_imax(
753 union tgsi_exec_channel *dst,
754 const union tgsi_exec_channel *src0,
755 const union tgsi_exec_channel *src1 )
756 {
757 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
758 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
759 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
760 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
761 }
762 #endif
763
764 #if 0
765 static void
766 micro_umax(
767 union tgsi_exec_channel *dst,
768 const union tgsi_exec_channel *src0,
769 const union tgsi_exec_channel *src1 )
770 {
771 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
772 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
773 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
774 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
775 }
776 #endif
777
778 static void
779 micro_min(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src0,
782 const union tgsi_exec_channel *src1 )
783 {
784 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
785 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
786 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
787 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
788 }
789
790 #if 0
791 static void
792 micro_imin(
793 union tgsi_exec_channel *dst,
794 const union tgsi_exec_channel *src0,
795 const union tgsi_exec_channel *src1 )
796 {
797 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
798 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
799 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
800 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
801 }
802 #endif
803
804 #if 0
805 static void
806 micro_umin(
807 union tgsi_exec_channel *dst,
808 const union tgsi_exec_channel *src0,
809 const union tgsi_exec_channel *src1 )
810 {
811 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
812 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
813 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
814 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
815 }
816 #endif
817
818 #if 0
819 static void
820 micro_umod(
821 union tgsi_exec_channel *dst,
822 const union tgsi_exec_channel *src0,
823 const union tgsi_exec_channel *src1 )
824 {
825 dst->u[0] = src0->u[0] % src1->u[0];
826 dst->u[1] = src0->u[1] % src1->u[1];
827 dst->u[2] = src0->u[2] % src1->u[2];
828 dst->u[3] = src0->u[3] % src1->u[3];
829 }
830 #endif
831
832 static void
833 micro_mul(
834 union tgsi_exec_channel *dst,
835 const union tgsi_exec_channel *src0,
836 const union tgsi_exec_channel *src1 )
837 {
838 dst->f[0] = src0->f[0] * src1->f[0];
839 dst->f[1] = src0->f[1] * src1->f[1];
840 dst->f[2] = src0->f[2] * src1->f[2];
841 dst->f[3] = src0->f[3] * src1->f[3];
842 }
843
844 #if 0
845 static void
846 micro_imul(
847 union tgsi_exec_channel *dst,
848 const union tgsi_exec_channel *src0,
849 const union tgsi_exec_channel *src1 )
850 {
851 dst->i[0] = src0->i[0] * src1->i[0];
852 dst->i[1] = src0->i[1] * src1->i[1];
853 dst->i[2] = src0->i[2] * src1->i[2];
854 dst->i[3] = src0->i[3] * src1->i[3];
855 }
856 #endif
857
858 #if 0
859 static void
860 micro_imul64(
861 union tgsi_exec_channel *dst0,
862 union tgsi_exec_channel *dst1,
863 const union tgsi_exec_channel *src0,
864 const union tgsi_exec_channel *src1 )
865 {
866 dst1->i[0] = src0->i[0] * src1->i[0];
867 dst1->i[1] = src0->i[1] * src1->i[1];
868 dst1->i[2] = src0->i[2] * src1->i[2];
869 dst1->i[3] = src0->i[3] * src1->i[3];
870 dst0->i[0] = 0;
871 dst0->i[1] = 0;
872 dst0->i[2] = 0;
873 dst0->i[3] = 0;
874 }
875 #endif
876
877 #if 0
878 static void
879 micro_umul64(
880 union tgsi_exec_channel *dst0,
881 union tgsi_exec_channel *dst1,
882 const union tgsi_exec_channel *src0,
883 const union tgsi_exec_channel *src1 )
884 {
885 dst1->u[0] = src0->u[0] * src1->u[0];
886 dst1->u[1] = src0->u[1] * src1->u[1];
887 dst1->u[2] = src0->u[2] * src1->u[2];
888 dst1->u[3] = src0->u[3] * src1->u[3];
889 dst0->u[0] = 0;
890 dst0->u[1] = 0;
891 dst0->u[2] = 0;
892 dst0->u[3] = 0;
893 }
894 #endif
895
896
897 #if 0
898 static void
899 micro_movc(
900 union tgsi_exec_channel *dst,
901 const union tgsi_exec_channel *src0,
902 const union tgsi_exec_channel *src1,
903 const union tgsi_exec_channel *src2 )
904 {
905 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
906 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
907 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
908 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
909 }
910 #endif
911
912 static void
913 micro_neg(
914 union tgsi_exec_channel *dst,
915 const union tgsi_exec_channel *src )
916 {
917 dst->f[0] = -src->f[0];
918 dst->f[1] = -src->f[1];
919 dst->f[2] = -src->f[2];
920 dst->f[3] = -src->f[3];
921 }
922
923 #if 0
924 static void
925 micro_ineg(
926 union tgsi_exec_channel *dst,
927 const union tgsi_exec_channel *src )
928 {
929 dst->i[0] = -src->i[0];
930 dst->i[1] = -src->i[1];
931 dst->i[2] = -src->i[2];
932 dst->i[3] = -src->i[3];
933 }
934 #endif
935
936 static void
937 micro_not(
938 union tgsi_exec_channel *dst,
939 const union tgsi_exec_channel *src )
940 {
941 dst->u[0] = ~src->u[0];
942 dst->u[1] = ~src->u[1];
943 dst->u[2] = ~src->u[2];
944 dst->u[3] = ~src->u[3];
945 }
946
947 static void
948 micro_or(
949 union tgsi_exec_channel *dst,
950 const union tgsi_exec_channel *src0,
951 const union tgsi_exec_channel *src1 )
952 {
953 dst->u[0] = src0->u[0] | src1->u[0];
954 dst->u[1] = src0->u[1] | src1->u[1];
955 dst->u[2] = src0->u[2] | src1->u[2];
956 dst->u[3] = src0->u[3] | src1->u[3];
957 }
958
959 static void
960 micro_pow(
961 union tgsi_exec_channel *dst,
962 const union tgsi_exec_channel *src0,
963 const union tgsi_exec_channel *src1 )
964 {
965 #if FAST_MATH
966 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
967 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
968 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
969 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
970 #else
971 dst->f[0] = powf( src0->f[0], src1->f[0] );
972 dst->f[1] = powf( src0->f[1], src1->f[1] );
973 dst->f[2] = powf( src0->f[2], src1->f[2] );
974 dst->f[3] = powf( src0->f[3], src1->f[3] );
975 #endif
976 }
977
978 static void
979 micro_rnd(
980 union tgsi_exec_channel *dst,
981 const union tgsi_exec_channel *src )
982 {
983 dst->f[0] = floorf( src->f[0] + 0.5f );
984 dst->f[1] = floorf( src->f[1] + 0.5f );
985 dst->f[2] = floorf( src->f[2] + 0.5f );
986 dst->f[3] = floorf( src->f[3] + 0.5f );
987 }
988
989 static void
990 micro_sgn(
991 union tgsi_exec_channel *dst,
992 const union tgsi_exec_channel *src )
993 {
994 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
995 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
996 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
997 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
998 }
999
1000 static void
1001 micro_shl(
1002 union tgsi_exec_channel *dst,
1003 const union tgsi_exec_channel *src0,
1004 const union tgsi_exec_channel *src1 )
1005 {
1006 dst->i[0] = src0->i[0] << src1->i[0];
1007 dst->i[1] = src0->i[1] << src1->i[1];
1008 dst->i[2] = src0->i[2] << src1->i[2];
1009 dst->i[3] = src0->i[3] << src1->i[3];
1010 }
1011
1012 static void
1013 micro_ishr(
1014 union tgsi_exec_channel *dst,
1015 const union tgsi_exec_channel *src0,
1016 const union tgsi_exec_channel *src1 )
1017 {
1018 dst->i[0] = src0->i[0] >> src1->i[0];
1019 dst->i[1] = src0->i[1] >> src1->i[1];
1020 dst->i[2] = src0->i[2] >> src1->i[2];
1021 dst->i[3] = src0->i[3] >> src1->i[3];
1022 }
1023
1024 static void
1025 micro_trunc(
1026 union tgsi_exec_channel *dst,
1027 const union tgsi_exec_channel *src0 )
1028 {
1029 dst->f[0] = (float) (int) src0->f[0];
1030 dst->f[1] = (float) (int) src0->f[1];
1031 dst->f[2] = (float) (int) src0->f[2];
1032 dst->f[3] = (float) (int) src0->f[3];
1033 }
1034
1035 #if 0
1036 static void
1037 micro_ushr(
1038 union tgsi_exec_channel *dst,
1039 const union tgsi_exec_channel *src0,
1040 const union tgsi_exec_channel *src1 )
1041 {
1042 dst->u[0] = src0->u[0] >> src1->u[0];
1043 dst->u[1] = src0->u[1] >> src1->u[1];
1044 dst->u[2] = src0->u[2] >> src1->u[2];
1045 dst->u[3] = src0->u[3] >> src1->u[3];
1046 }
1047 #endif
1048
1049 static void
1050 micro_sin(
1051 union tgsi_exec_channel *dst,
1052 const union tgsi_exec_channel *src )
1053 {
1054 dst->f[0] = sinf( src->f[0] );
1055 dst->f[1] = sinf( src->f[1] );
1056 dst->f[2] = sinf( src->f[2] );
1057 dst->f[3] = sinf( src->f[3] );
1058 }
1059
1060 static void
1061 micro_sqrt( union tgsi_exec_channel *dst,
1062 const union tgsi_exec_channel *src )
1063 {
1064 dst->f[0] = sqrtf( src->f[0] );
1065 dst->f[1] = sqrtf( src->f[1] );
1066 dst->f[2] = sqrtf( src->f[2] );
1067 dst->f[3] = sqrtf( src->f[3] );
1068 }
1069
1070 static void
1071 micro_sub(
1072 union tgsi_exec_channel *dst,
1073 const union tgsi_exec_channel *src0,
1074 const union tgsi_exec_channel *src1 )
1075 {
1076 dst->f[0] = src0->f[0] - src1->f[0];
1077 dst->f[1] = src0->f[1] - src1->f[1];
1078 dst->f[2] = src0->f[2] - src1->f[2];
1079 dst->f[3] = src0->f[3] - src1->f[3];
1080 }
1081
1082 #if 0
1083 static void
1084 micro_u2f(
1085 union tgsi_exec_channel *dst,
1086 const union tgsi_exec_channel *src )
1087 {
1088 dst->f[0] = (float) src->u[0];
1089 dst->f[1] = (float) src->u[1];
1090 dst->f[2] = (float) src->u[2];
1091 dst->f[3] = (float) src->u[3];
1092 }
1093 #endif
1094
1095 static void
1096 micro_xor(
1097 union tgsi_exec_channel *dst,
1098 const union tgsi_exec_channel *src0,
1099 const union tgsi_exec_channel *src1 )
1100 {
1101 dst->u[0] = src0->u[0] ^ src1->u[0];
1102 dst->u[1] = src0->u[1] ^ src1->u[1];
1103 dst->u[2] = src0->u[2] ^ src1->u[2];
1104 dst->u[3] = src0->u[3] ^ src1->u[3];
1105 }
1106
1107 static void
1108 fetch_src_file_channel(
1109 const struct tgsi_exec_machine *mach,
1110 const uint file,
1111 const uint swizzle,
1112 const union tgsi_exec_channel *index,
1113 union tgsi_exec_channel *chan )
1114 {
1115 switch( swizzle ) {
1116 case TGSI_SWIZZLE_X:
1117 case TGSI_SWIZZLE_Y:
1118 case TGSI_SWIZZLE_Z:
1119 case TGSI_SWIZZLE_W:
1120 switch( file ) {
1121 case TGSI_FILE_CONSTANT:
1122 assert(mach->Consts);
1123 if (index->i[0] < 0)
1124 chan->f[0] = 0.0f;
1125 else
1126 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1127 if (index->i[1] < 0)
1128 chan->f[1] = 0.0f;
1129 else
1130 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1131 if (index->i[2] < 0)
1132 chan->f[2] = 0.0f;
1133 else
1134 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1135 if (index->i[3] < 0)
1136 chan->f[3] = 0.0f;
1137 else
1138 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1139 break;
1140
1141 case TGSI_FILE_INPUT:
1142 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1143 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1144 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1145 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1146 break;
1147
1148 case TGSI_FILE_TEMPORARY:
1149 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1150 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1151 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1152 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1153 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1154 break;
1155
1156 case TGSI_FILE_IMMEDIATE:
1157 assert( index->i[0] < (int) mach->ImmLimit );
1158 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1159 assert( index->i[1] < (int) mach->ImmLimit );
1160 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1161 assert( index->i[2] < (int) mach->ImmLimit );
1162 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1163 assert( index->i[3] < (int) mach->ImmLimit );
1164 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1165 break;
1166
1167 case TGSI_FILE_ADDRESS:
1168 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1169 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1170 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1171 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1172 break;
1173
1174 case TGSI_FILE_PREDICATE:
1175 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1176 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1177 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1178 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1179 chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
1180 chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
1181 chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
1182 chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
1183 break;
1184
1185 case TGSI_FILE_OUTPUT:
1186 /* vertex/fragment output vars can be read too */
1187 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1188 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1189 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1190 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1191 break;
1192
1193 default:
1194 assert( 0 );
1195 }
1196 break;
1197
1198 default:
1199 assert( 0 );
1200 }
1201 }
1202
1203 static void
1204 fetch_source(
1205 const struct tgsi_exec_machine *mach,
1206 union tgsi_exec_channel *chan,
1207 const struct tgsi_full_src_register *reg,
1208 const uint chan_index )
1209 {
1210 union tgsi_exec_channel index;
1211 uint swizzle;
1212
1213 /* We start with a direct index into a register file.
1214 *
1215 * file[1],
1216 * where:
1217 * file = SrcRegister.File
1218 * [1] = SrcRegister.Index
1219 */
1220 index.i[0] =
1221 index.i[1] =
1222 index.i[2] =
1223 index.i[3] = reg->SrcRegister.Index;
1224
1225 /* There is an extra source register that indirectly subscripts
1226 * a register file. The direct index now becomes an offset
1227 * that is being added to the indirect register.
1228 *
1229 * file[ind[2].x+1],
1230 * where:
1231 * ind = SrcRegisterInd.File
1232 * [2] = SrcRegisterInd.Index
1233 * .x = SrcRegisterInd.SwizzleX
1234 */
1235 if (reg->SrcRegister.Indirect) {
1236 union tgsi_exec_channel index2;
1237 union tgsi_exec_channel indir_index;
1238 const uint execmask = mach->ExecMask;
1239 uint i;
1240
1241 /* which address register (always zero now) */
1242 index2.i[0] =
1243 index2.i[1] =
1244 index2.i[2] =
1245 index2.i[3] = reg->SrcRegisterInd.Index;
1246
1247 /* get current value of address register[swizzle] */
1248 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1249 fetch_src_file_channel(
1250 mach,
1251 reg->SrcRegisterInd.File,
1252 swizzle,
1253 &index2,
1254 &indir_index );
1255
1256 /* add value of address register to the offset */
1257 index.i[0] += (int) indir_index.f[0];
1258 index.i[1] += (int) indir_index.f[1];
1259 index.i[2] += (int) indir_index.f[2];
1260 index.i[3] += (int) indir_index.f[3];
1261
1262 /* for disabled execution channels, zero-out the index to
1263 * avoid using a potential garbage value.
1264 */
1265 for (i = 0; i < QUAD_SIZE; i++) {
1266 if ((execmask & (1 << i)) == 0)
1267 index.i[i] = 0;
1268 }
1269 }
1270
1271 /* There is an extra source register that is a second
1272 * subscript to a register file. Effectively it means that
1273 * the register file is actually a 2D array of registers.
1274 *
1275 * file[1][3] == file[1*sizeof(file[1])+3],
1276 * where:
1277 * [3] = SrcRegisterDim.Index
1278 */
1279 if (reg->SrcRegister.Dimension) {
1280 /* The size of the first-order array depends on the register file type.
1281 * We need to multiply the index to the first array to get an effective,
1282 * "flat" index that points to the beginning of the second-order array.
1283 */
1284 switch (reg->SrcRegister.File) {
1285 case TGSI_FILE_INPUT:
1286 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1287 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1288 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1289 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1290 break;
1291 case TGSI_FILE_CONSTANT:
1292 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1293 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1294 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1295 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1296 break;
1297 default:
1298 assert( 0 );
1299 }
1300
1301 index.i[0] += reg->SrcRegisterDim.Index;
1302 index.i[1] += reg->SrcRegisterDim.Index;
1303 index.i[2] += reg->SrcRegisterDim.Index;
1304 index.i[3] += reg->SrcRegisterDim.Index;
1305
1306 /* Again, the second subscript index can be addressed indirectly
1307 * identically to the first one.
1308 * Nothing stops us from indirectly addressing the indirect register,
1309 * but there is no need for that, so we won't exercise it.
1310 *
1311 * file[1][ind[4].y+3],
1312 * where:
1313 * ind = SrcRegisterDimInd.File
1314 * [4] = SrcRegisterDimInd.Index
1315 * .y = SrcRegisterDimInd.SwizzleX
1316 */
1317 if (reg->SrcRegisterDim.Indirect) {
1318 union tgsi_exec_channel index2;
1319 union tgsi_exec_channel indir_index;
1320 const uint execmask = mach->ExecMask;
1321 uint i;
1322
1323 index2.i[0] =
1324 index2.i[1] =
1325 index2.i[2] =
1326 index2.i[3] = reg->SrcRegisterDimInd.Index;
1327
1328 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1329 fetch_src_file_channel(
1330 mach,
1331 reg->SrcRegisterDimInd.File,
1332 swizzle,
1333 &index2,
1334 &indir_index );
1335
1336 index.i[0] += (int) indir_index.f[0];
1337 index.i[1] += (int) indir_index.f[1];
1338 index.i[2] += (int) indir_index.f[2];
1339 index.i[3] += (int) indir_index.f[3];
1340
1341 /* for disabled execution channels, zero-out the index to
1342 * avoid using a potential garbage value.
1343 */
1344 for (i = 0; i < QUAD_SIZE; i++) {
1345 if ((execmask & (1 << i)) == 0)
1346 index.i[i] = 0;
1347 }
1348 }
1349
1350 /* If by any chance there was a need for a 3D array of register
1351 * files, we would have to check whether SrcRegisterDim is followed
1352 * by a dimension register and continue the saga.
1353 */
1354 }
1355
1356 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1357 fetch_src_file_channel(
1358 mach,
1359 reg->SrcRegister.File,
1360 swizzle,
1361 &index,
1362 chan );
1363
1364 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1365 case TGSI_UTIL_SIGN_CLEAR:
1366 micro_abs( chan, chan );
1367 break;
1368
1369 case TGSI_UTIL_SIGN_SET:
1370 micro_abs( chan, chan );
1371 micro_neg( chan, chan );
1372 break;
1373
1374 case TGSI_UTIL_SIGN_TOGGLE:
1375 micro_neg( chan, chan );
1376 break;
1377
1378 case TGSI_UTIL_SIGN_KEEP:
1379 break;
1380 }
1381
1382 if (reg->SrcRegisterExtMod.Complement) {
1383 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1384 }
1385 }
1386
1387 static void
1388 store_dest(
1389 struct tgsi_exec_machine *mach,
1390 const union tgsi_exec_channel *chan,
1391 const struct tgsi_full_dst_register *reg,
1392 const struct tgsi_full_instruction *inst,
1393 uint chan_index )
1394 {
1395 uint i;
1396 union tgsi_exec_channel null;
1397 union tgsi_exec_channel *dst;
1398 uint execmask = mach->ExecMask;
1399 int offset = 0; /* indirection offset */
1400 int index;
1401
1402 #ifdef DEBUG
1403 check_inf_or_nan(chan);
1404 #endif
1405
1406 /* There is an extra source register that indirectly subscripts
1407 * a register file. The direct index now becomes an offset
1408 * that is being added to the indirect register.
1409 *
1410 * file[ind[2].x+1],
1411 * where:
1412 * ind = DstRegisterInd.File
1413 * [2] = DstRegisterInd.Index
1414 * .x = DstRegisterInd.SwizzleX
1415 */
1416 if (reg->DstRegister.Indirect) {
1417 union tgsi_exec_channel index;
1418 union tgsi_exec_channel indir_index;
1419 uint swizzle;
1420
1421 /* which address register (always zero for now) */
1422 index.i[0] =
1423 index.i[1] =
1424 index.i[2] =
1425 index.i[3] = reg->DstRegisterInd.Index;
1426
1427 /* get current value of address register[swizzle] */
1428 swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
1429
1430 /* fetch values from the address/indirection register */
1431 fetch_src_file_channel(
1432 mach,
1433 reg->DstRegisterInd.File,
1434 swizzle,
1435 &index,
1436 &indir_index );
1437
1438 /* save indirection offset */
1439 offset = (int) indir_index.f[0];
1440 }
1441
1442 switch (reg->DstRegister.File) {
1443 case TGSI_FILE_NULL:
1444 dst = &null;
1445 break;
1446
1447 case TGSI_FILE_OUTPUT:
1448 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1449 + reg->DstRegister.Index;
1450 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1451 break;
1452
1453 case TGSI_FILE_TEMPORARY:
1454 index = reg->DstRegister.Index;
1455 assert( index < TGSI_EXEC_NUM_TEMPS );
1456 dst = &mach->Temps[offset + index].xyzw[chan_index];
1457 break;
1458
1459 case TGSI_FILE_ADDRESS:
1460 index = reg->DstRegister.Index;
1461 dst = &mach->Addrs[index].xyzw[chan_index];
1462 break;
1463
1464 case TGSI_FILE_PREDICATE:
1465 index = reg->DstRegister.Index;
1466 assert(index < TGSI_EXEC_NUM_PREDS);
1467 dst = &mach->Addrs[index].xyzw[chan_index];
1468 break;
1469
1470 default:
1471 assert( 0 );
1472 return;
1473 }
1474
1475 switch (inst->Instruction.Saturate) {
1476 case TGSI_SAT_NONE:
1477 for (i = 0; i < QUAD_SIZE; i++)
1478 if (execmask & (1 << i))
1479 dst->i[i] = chan->i[i];
1480 break;
1481
1482 case TGSI_SAT_ZERO_ONE:
1483 for (i = 0; i < QUAD_SIZE; i++)
1484 if (execmask & (1 << i)) {
1485 if (chan->f[i] < 0.0f)
1486 dst->f[i] = 0.0f;
1487 else if (chan->f[i] > 1.0f)
1488 dst->f[i] = 1.0f;
1489 else
1490 dst->i[i] = chan->i[i];
1491 }
1492 break;
1493
1494 case TGSI_SAT_MINUS_PLUS_ONE:
1495 for (i = 0; i < QUAD_SIZE; i++)
1496 if (execmask & (1 << i)) {
1497 if (chan->f[i] < -1.0f)
1498 dst->f[i] = -1.0f;
1499 else if (chan->f[i] > 1.0f)
1500 dst->f[i] = 1.0f;
1501 else
1502 dst->i[i] = chan->i[i];
1503 }
1504 break;
1505
1506 default:
1507 assert( 0 );
1508 }
1509 }
1510
1511 #define FETCH(VAL,INDEX,CHAN)\
1512 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1513
1514 #define STORE(VAL,INDEX,CHAN)\
1515 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1516
1517
1518 /**
1519 * Execute ARB-style KIL which is predicated by a src register.
1520 * Kill fragment if any of the four values is less than zero.
1521 */
1522 static void
1523 exec_kil(struct tgsi_exec_machine *mach,
1524 const struct tgsi_full_instruction *inst)
1525 {
1526 uint uniquemask;
1527 uint chan_index;
1528 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1529 union tgsi_exec_channel r[1];
1530
1531 /* This mask stores component bits that were already tested. */
1532 uniquemask = 0;
1533
1534 for (chan_index = 0; chan_index < 4; chan_index++)
1535 {
1536 uint swizzle;
1537 uint i;
1538
1539 /* unswizzle channel */
1540 swizzle = tgsi_util_get_full_src_register_swizzle (
1541 &inst->FullSrcRegisters[0],
1542 chan_index);
1543
1544 /* check if the component has not been already tested */
1545 if (uniquemask & (1 << swizzle))
1546 continue;
1547 uniquemask |= 1 << swizzle;
1548
1549 FETCH(&r[0], 0, chan_index);
1550 for (i = 0; i < 4; i++)
1551 if (r[0].f[i] < 0.0f)
1552 kilmask |= 1 << i;
1553 }
1554
1555 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1556 }
1557
1558 /**
1559 * Execute NVIDIA-style KIL which is predicated by a condition code.
1560 * Kill fragment if the condition code is TRUE.
1561 */
1562 static void
1563 exec_kilp(struct tgsi_exec_machine *mach,
1564 const struct tgsi_full_instruction *inst)
1565 {
1566 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1567
1568 /* "unconditional" kil */
1569 kilmask = mach->ExecMask;
1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1571 }
1572
1573
1574 /*
1575 * Fetch a four texture samples using STR texture coordinates.
1576 */
1577 static void
1578 fetch_texel( struct tgsi_sampler *sampler,
1579 const union tgsi_exec_channel *s,
1580 const union tgsi_exec_channel *t,
1581 const union tgsi_exec_channel *p,
1582 float lodbias, /* XXX should be float[4] */
1583 union tgsi_exec_channel *r,
1584 union tgsi_exec_channel *g,
1585 union tgsi_exec_channel *b,
1586 union tgsi_exec_channel *a )
1587 {
1588 uint j;
1589 float rgba[NUM_CHANNELS][QUAD_SIZE];
1590
1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1592
1593 for (j = 0; j < 4; j++) {
1594 r->f[j] = rgba[0][j];
1595 g->f[j] = rgba[1][j];
1596 b->f[j] = rgba[2][j];
1597 a->f[j] = rgba[3][j];
1598 }
1599 }
1600
1601
1602 static void
1603 exec_tex(struct tgsi_exec_machine *mach,
1604 const struct tgsi_full_instruction *inst,
1605 boolean biasLod,
1606 boolean projected)
1607 {
1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1609 union tgsi_exec_channel r[4];
1610 uint chan_index;
1611 float lodBias;
1612
1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1614
1615 switch (inst->InstructionExtTexture.Texture) {
1616 case TGSI_TEXTURE_1D:
1617 case TGSI_TEXTURE_SHADOW1D:
1618
1619 FETCH(&r[0], 0, CHAN_X);
1620
1621 if (projected) {
1622 FETCH(&r[1], 0, CHAN_W);
1623 micro_div( &r[0], &r[0], &r[1] );
1624 }
1625
1626 if (biasLod) {
1627 FETCH(&r[1], 0, CHAN_W);
1628 lodBias = r[2].f[0];
1629 }
1630 else
1631 lodBias = 0.0;
1632
1633 fetch_texel(mach->Samplers[unit],
1634 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1635 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1636 break;
1637
1638 case TGSI_TEXTURE_2D:
1639 case TGSI_TEXTURE_RECT:
1640 case TGSI_TEXTURE_SHADOW2D:
1641 case TGSI_TEXTURE_SHADOWRECT:
1642
1643 FETCH(&r[0], 0, CHAN_X);
1644 FETCH(&r[1], 0, CHAN_Y);
1645 FETCH(&r[2], 0, CHAN_Z);
1646
1647 if (projected) {
1648 FETCH(&r[3], 0, CHAN_W);
1649 micro_div( &r[0], &r[0], &r[3] );
1650 micro_div( &r[1], &r[1], &r[3] );
1651 micro_div( &r[2], &r[2], &r[3] );
1652 }
1653
1654 if (biasLod) {
1655 FETCH(&r[3], 0, CHAN_W);
1656 lodBias = r[3].f[0];
1657 }
1658 else
1659 lodBias = 0.0;
1660
1661 fetch_texel(mach->Samplers[unit],
1662 &r[0], &r[1], &r[2], lodBias, /* inputs */
1663 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1664 break;
1665
1666 case TGSI_TEXTURE_3D:
1667 case TGSI_TEXTURE_CUBE:
1668
1669 FETCH(&r[0], 0, CHAN_X);
1670 FETCH(&r[1], 0, CHAN_Y);
1671 FETCH(&r[2], 0, CHAN_Z);
1672
1673 if (projected) {
1674 FETCH(&r[3], 0, CHAN_W);
1675 micro_div( &r[0], &r[0], &r[3] );
1676 micro_div( &r[1], &r[1], &r[3] );
1677 micro_div( &r[2], &r[2], &r[3] );
1678 }
1679
1680 if (biasLod) {
1681 FETCH(&r[3], 0, CHAN_W);
1682 lodBias = r[3].f[0];
1683 }
1684 else
1685 lodBias = 0.0;
1686
1687 fetch_texel(mach->Samplers[unit],
1688 &r[0], &r[1], &r[2], lodBias,
1689 &r[0], &r[1], &r[2], &r[3]);
1690 break;
1691
1692 default:
1693 assert (0);
1694 }
1695
1696 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1697 STORE( &r[chan_index], 0, chan_index );
1698 }
1699 }
1700
1701
1702 /**
1703 * Evaluate a constant-valued coefficient at the position of the
1704 * current quad.
1705 */
1706 static void
1707 eval_constant_coef(
1708 struct tgsi_exec_machine *mach,
1709 unsigned attrib,
1710 unsigned chan )
1711 {
1712 unsigned i;
1713
1714 for( i = 0; i < QUAD_SIZE; i++ ) {
1715 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1716 }
1717 }
1718
1719 /**
1720 * Evaluate a linear-valued coefficient at the position of the
1721 * current quad.
1722 */
1723 static void
1724 eval_linear_coef(
1725 struct tgsi_exec_machine *mach,
1726 unsigned attrib,
1727 unsigned chan )
1728 {
1729 const float x = mach->QuadPos.xyzw[0].f[0];
1730 const float y = mach->QuadPos.xyzw[1].f[0];
1731 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1732 const float dady = mach->InterpCoefs[attrib].dady[chan];
1733 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1734 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1735 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1736 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1737 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1738 }
1739
1740 /**
1741 * Evaluate a perspective-valued coefficient at the position of the
1742 * current quad.
1743 */
1744 static void
1745 eval_perspective_coef(
1746 struct tgsi_exec_machine *mach,
1747 unsigned attrib,
1748 unsigned chan )
1749 {
1750 const float x = mach->QuadPos.xyzw[0].f[0];
1751 const float y = mach->QuadPos.xyzw[1].f[0];
1752 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1753 const float dady = mach->InterpCoefs[attrib].dady[chan];
1754 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1755 const float *w = mach->QuadPos.xyzw[3].f;
1756 /* divide by W here */
1757 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1758 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1759 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1760 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1761 }
1762
1763
1764 typedef void (* eval_coef_func)(
1765 struct tgsi_exec_machine *mach,
1766 unsigned attrib,
1767 unsigned chan );
1768
1769 static void
1770 exec_declaration(
1771 struct tgsi_exec_machine *mach,
1772 const struct tgsi_full_declaration *decl )
1773 {
1774 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1775 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1776 unsigned first, last, mask;
1777 eval_coef_func eval;
1778
1779 first = decl->DeclarationRange.First;
1780 last = decl->DeclarationRange.Last;
1781 mask = decl->Declaration.UsageMask;
1782
1783 switch( decl->Declaration.Interpolate ) {
1784 case TGSI_INTERPOLATE_CONSTANT:
1785 eval = eval_constant_coef;
1786 break;
1787
1788 case TGSI_INTERPOLATE_LINEAR:
1789 eval = eval_linear_coef;
1790 break;
1791
1792 case TGSI_INTERPOLATE_PERSPECTIVE:
1793 eval = eval_perspective_coef;
1794 break;
1795
1796 default:
1797 assert( 0 );
1798 return;
1799 }
1800
1801 if( mask == TGSI_WRITEMASK_XYZW ) {
1802 unsigned i, j;
1803
1804 for( i = first; i <= last; i++ ) {
1805 for( j = 0; j < NUM_CHANNELS; j++ ) {
1806 eval( mach, i, j );
1807 }
1808 }
1809 }
1810 else {
1811 unsigned i, j;
1812
1813 for( j = 0; j < NUM_CHANNELS; j++ ) {
1814 if( mask & (1 << j) ) {
1815 for( i = first; i <= last; i++ ) {
1816 eval( mach, i, j );
1817 }
1818 }
1819 }
1820 }
1821 }
1822 }
1823 }
1824
1825 static void
1826 exec_instruction(
1827 struct tgsi_exec_machine *mach,
1828 const struct tgsi_full_instruction *inst,
1829 int *pc )
1830 {
1831 uint chan_index;
1832 union tgsi_exec_channel r[10];
1833 union tgsi_exec_channel d[8];
1834
1835 (*pc)++;
1836
1837 switch (inst->Instruction.Opcode) {
1838 case TGSI_OPCODE_ARL:
1839 case TGSI_OPCODE_FLR:
1840 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1841 FETCH( &r[0], 0, chan_index );
1842 micro_flr(&d[chan_index], &r[0]);
1843 }
1844 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1845 STORE(&d[chan_index], 0, chan_index);
1846 }
1847 break;
1848
1849 case TGSI_OPCODE_MOV:
1850 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1851 FETCH(&d[chan_index], 0, chan_index);
1852 }
1853 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1854 STORE(&d[chan_index], 0, chan_index);
1855 }
1856 break;
1857
1858 case TGSI_OPCODE_LIT:
1859 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1860 FETCH( &r[0], 0, CHAN_X );
1861 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1862 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1863 }
1864
1865 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1866 FETCH( &r[1], 0, CHAN_Y );
1867 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1868
1869 FETCH( &r[2], 0, CHAN_W );
1870 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1871 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1872 micro_pow( &r[1], &r[1], &r[2] );
1873 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1874 }
1875
1876 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1877 STORE(&d[CHAN_Y], 0, CHAN_Y);
1878 }
1879 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1880 STORE(&d[CHAN_Z], 0, CHAN_Z);
1881 }
1882 }
1883 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1884 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1885 }
1886 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1887 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1888 }
1889 break;
1890
1891 case TGSI_OPCODE_RCP:
1892 /* TGSI_OPCODE_RECIP */
1893 FETCH( &r[0], 0, CHAN_X );
1894 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1895 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1896 STORE( &r[0], 0, chan_index );
1897 }
1898 break;
1899
1900 case TGSI_OPCODE_RSQ:
1901 /* TGSI_OPCODE_RECIPSQRT */
1902 FETCH( &r[0], 0, CHAN_X );
1903 micro_abs( &r[0], &r[0] );
1904 micro_sqrt( &r[0], &r[0] );
1905 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1906 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1907 STORE( &r[0], 0, chan_index );
1908 }
1909 break;
1910
1911 case TGSI_OPCODE_EXP:
1912 FETCH( &r[0], 0, CHAN_X );
1913 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1914 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1915 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1916 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1917 }
1918 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1919 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1920 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1921 }
1922 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1923 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1924 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1925 }
1926 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1927 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1928 }
1929 break;
1930
1931 case TGSI_OPCODE_LOG:
1932 FETCH( &r[0], 0, CHAN_X );
1933 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1934 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1935 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1936 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1937 STORE( &r[0], 0, CHAN_X );
1938 }
1939 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1940 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1941 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1942 STORE( &r[0], 0, CHAN_Y );
1943 }
1944 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1945 STORE( &r[1], 0, CHAN_Z );
1946 }
1947 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1948 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1949 }
1950 break;
1951
1952 case TGSI_OPCODE_MUL:
1953 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1954 FETCH(&r[0], 0, chan_index);
1955 FETCH(&r[1], 1, chan_index);
1956 micro_mul(&d[chan_index], &r[0], &r[1]);
1957 }
1958 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1959 STORE(&d[chan_index], 0, chan_index);
1960 }
1961 break;
1962
1963 case TGSI_OPCODE_ADD:
1964 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1965 FETCH( &r[0], 0, chan_index );
1966 FETCH( &r[1], 1, chan_index );
1967 micro_add(&d[chan_index], &r[0], &r[1]);
1968 }
1969 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1970 STORE(&d[chan_index], 0, chan_index);
1971 }
1972 break;
1973
1974 case TGSI_OPCODE_DP3:
1975 /* TGSI_OPCODE_DOT3 */
1976 FETCH( &r[0], 0, CHAN_X );
1977 FETCH( &r[1], 1, CHAN_X );
1978 micro_mul( &r[0], &r[0], &r[1] );
1979
1980 FETCH( &r[1], 0, CHAN_Y );
1981 FETCH( &r[2], 1, CHAN_Y );
1982 micro_mul( &r[1], &r[1], &r[2] );
1983 micro_add( &r[0], &r[0], &r[1] );
1984
1985 FETCH( &r[1], 0, CHAN_Z );
1986 FETCH( &r[2], 1, CHAN_Z );
1987 micro_mul( &r[1], &r[1], &r[2] );
1988 micro_add( &r[0], &r[0], &r[1] );
1989
1990 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1991 STORE( &r[0], 0, chan_index );
1992 }
1993 break;
1994
1995 case TGSI_OPCODE_DP4:
1996 /* TGSI_OPCODE_DOT4 */
1997 FETCH(&r[0], 0, CHAN_X);
1998 FETCH(&r[1], 1, CHAN_X);
1999
2000 micro_mul( &r[0], &r[0], &r[1] );
2001
2002 FETCH(&r[1], 0, CHAN_Y);
2003 FETCH(&r[2], 1, CHAN_Y);
2004
2005 micro_mul( &r[1], &r[1], &r[2] );
2006 micro_add( &r[0], &r[0], &r[1] );
2007
2008 FETCH(&r[1], 0, CHAN_Z);
2009 FETCH(&r[2], 1, CHAN_Z);
2010
2011 micro_mul( &r[1], &r[1], &r[2] );
2012 micro_add( &r[0], &r[0], &r[1] );
2013
2014 FETCH(&r[1], 0, CHAN_W);
2015 FETCH(&r[2], 1, CHAN_W);
2016
2017 micro_mul( &r[1], &r[1], &r[2] );
2018 micro_add( &r[0], &r[0], &r[1] );
2019
2020 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2021 STORE( &r[0], 0, chan_index );
2022 }
2023 break;
2024
2025 case TGSI_OPCODE_DST:
2026 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2027 FETCH( &r[0], 0, CHAN_Y );
2028 FETCH( &r[1], 1, CHAN_Y);
2029 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2030 }
2031 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2032 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2033 }
2034 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2035 FETCH(&d[CHAN_W], 1, CHAN_W);
2036 }
2037
2038 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2039 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2040 }
2041 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2042 STORE(&d[CHAN_Y], 0, CHAN_Y);
2043 }
2044 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2045 STORE(&d[CHAN_Z], 0, CHAN_Z);
2046 }
2047 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2048 STORE(&d[CHAN_W], 0, CHAN_W);
2049 }
2050 break;
2051
2052 case TGSI_OPCODE_MIN:
2053 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2054 FETCH(&r[0], 0, chan_index);
2055 FETCH(&r[1], 1, chan_index);
2056
2057 /* XXX use micro_min()?? */
2058 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2059 }
2060 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2061 STORE(&d[chan_index], 0, chan_index);
2062 }
2063 break;
2064
2065 case TGSI_OPCODE_MAX:
2066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2067 FETCH(&r[0], 0, chan_index);
2068 FETCH(&r[1], 1, chan_index);
2069
2070 /* XXX use micro_max()?? */
2071 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2072 }
2073 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2074 STORE(&d[chan_index], 0, chan_index);
2075 }
2076 break;
2077
2078 case TGSI_OPCODE_SLT:
2079 /* TGSI_OPCODE_SETLT */
2080 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2081 FETCH( &r[0], 0, chan_index );
2082 FETCH( &r[1], 1, chan_index );
2083 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2084 }
2085 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2086 STORE(&d[chan_index], 0, chan_index);
2087 }
2088 break;
2089
2090 case TGSI_OPCODE_SGE:
2091 /* TGSI_OPCODE_SETGE */
2092 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2093 FETCH( &r[0], 0, chan_index );
2094 FETCH( &r[1], 1, chan_index );
2095 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2096 }
2097 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2098 STORE(&d[chan_index], 0, chan_index);
2099 }
2100 break;
2101
2102 case TGSI_OPCODE_MAD:
2103 /* TGSI_OPCODE_MADD */
2104 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2105 FETCH( &r[0], 0, chan_index );
2106 FETCH( &r[1], 1, chan_index );
2107 micro_mul( &r[0], &r[0], &r[1] );
2108 FETCH( &r[1], 2, chan_index );
2109 micro_add(&d[chan_index], &r[0], &r[1]);
2110 }
2111 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2112 STORE(&d[chan_index], 0, chan_index);
2113 }
2114 break;
2115
2116 case TGSI_OPCODE_SUB:
2117 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2118 FETCH(&r[0], 0, chan_index);
2119 FETCH(&r[1], 1, chan_index);
2120 micro_sub(&d[chan_index], &r[0], &r[1]);
2121 }
2122 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2123 STORE(&d[chan_index], 0, chan_index);
2124 }
2125 break;
2126
2127 case TGSI_OPCODE_LRP:
2128 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2129 FETCH(&r[0], 0, chan_index);
2130 FETCH(&r[1], 1, chan_index);
2131 FETCH(&r[2], 2, chan_index);
2132 micro_sub( &r[1], &r[1], &r[2] );
2133 micro_mul( &r[0], &r[0], &r[1] );
2134 micro_add(&d[chan_index], &r[0], &r[2]);
2135 }
2136 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2137 STORE(&d[chan_index], 0, chan_index);
2138 }
2139 break;
2140
2141 case TGSI_OPCODE_CND:
2142 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2143 FETCH(&r[0], 0, chan_index);
2144 FETCH(&r[1], 1, chan_index);
2145 FETCH(&r[2], 2, chan_index);
2146 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2147 }
2148 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2149 STORE(&d[chan_index], 0, chan_index);
2150 }
2151 break;
2152
2153 case TGSI_OPCODE_DP2A:
2154 FETCH( &r[0], 0, CHAN_X );
2155 FETCH( &r[1], 1, CHAN_X );
2156 micro_mul( &r[0], &r[0], &r[1] );
2157
2158 FETCH( &r[1], 0, CHAN_Y );
2159 FETCH( &r[2], 1, CHAN_Y );
2160 micro_mul( &r[1], &r[1], &r[2] );
2161 micro_add( &r[0], &r[0], &r[1] );
2162
2163 FETCH( &r[2], 2, CHAN_X );
2164 micro_add( &r[0], &r[0], &r[2] );
2165
2166 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2167 STORE( &r[0], 0, chan_index );
2168 }
2169 break;
2170
2171 case TGSI_OPCODE_FRC:
2172 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2173 FETCH( &r[0], 0, chan_index );
2174 micro_frc(&d[chan_index], &r[0]);
2175 }
2176 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2177 STORE(&d[chan_index], 0, chan_index);
2178 }
2179 break;
2180
2181 case TGSI_OPCODE_CLAMP:
2182 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2183 FETCH(&r[0], 0, chan_index);
2184 FETCH(&r[1], 1, chan_index);
2185 micro_max(&r[0], &r[0], &r[1]);
2186 FETCH(&r[1], 2, chan_index);
2187 micro_min(&d[chan_index], &r[0], &r[1]);
2188 }
2189 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2190 STORE(&d[chan_index], 0, chan_index);
2191 }
2192 break;
2193
2194 case TGSI_OPCODE_ROUND:
2195 case TGSI_OPCODE_ARR:
2196 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2197 FETCH( &r[0], 0, chan_index );
2198 micro_rnd(&d[chan_index], &r[0]);
2199 }
2200 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2201 STORE(&d[chan_index], 0, chan_index);
2202 }
2203 break;
2204
2205 case TGSI_OPCODE_EX2:
2206 FETCH(&r[0], 0, CHAN_X);
2207
2208 #if FAST_MATH
2209 micro_exp2( &r[0], &r[0] );
2210 #else
2211 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2212 #endif
2213
2214 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2215 STORE( &r[0], 0, chan_index );
2216 }
2217 break;
2218
2219 case TGSI_OPCODE_LG2:
2220 FETCH( &r[0], 0, CHAN_X );
2221 micro_lg2( &r[0], &r[0] );
2222 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2223 STORE( &r[0], 0, chan_index );
2224 }
2225 break;
2226
2227 case TGSI_OPCODE_POW:
2228 FETCH(&r[0], 0, CHAN_X);
2229 FETCH(&r[1], 1, CHAN_X);
2230
2231 micro_pow( &r[0], &r[0], &r[1] );
2232
2233 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2234 STORE( &r[0], 0, chan_index );
2235 }
2236 break;
2237
2238 case TGSI_OPCODE_XPD:
2239 FETCH(&r[0], 0, CHAN_Y);
2240 FETCH(&r[1], 1, CHAN_Z);
2241
2242 micro_mul( &r[2], &r[0], &r[1] );
2243
2244 FETCH(&r[3], 0, CHAN_Z);
2245 FETCH(&r[4], 1, CHAN_Y);
2246
2247 micro_mul( &r[5], &r[3], &r[4] );
2248 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2249
2250 FETCH(&r[2], 1, CHAN_X);
2251
2252 micro_mul( &r[3], &r[3], &r[2] );
2253
2254 FETCH(&r[5], 0, CHAN_X);
2255
2256 micro_mul( &r[1], &r[1], &r[5] );
2257 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2258
2259 micro_mul( &r[5], &r[5], &r[4] );
2260 micro_mul( &r[0], &r[0], &r[2] );
2261 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2262
2263 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2264 STORE(&d[CHAN_X], 0, CHAN_X);
2265 }
2266 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2267 STORE(&d[CHAN_Y], 0, CHAN_Y);
2268 }
2269 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2270 STORE(&d[CHAN_Z], 0, CHAN_Z);
2271 }
2272 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2273 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2274 }
2275 break;
2276
2277 case TGSI_OPCODE_ABS:
2278 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2279 FETCH(&r[0], 0, chan_index);
2280 micro_abs(&d[chan_index], &r[0]);
2281 }
2282 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2283 STORE(&d[chan_index], 0, chan_index);
2284 }
2285 break;
2286
2287 case TGSI_OPCODE_RCC:
2288 FETCH(&r[0], 0, CHAN_X);
2289 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2290 micro_float_clamp(&r[0], &r[0]);
2291 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2292 STORE(&r[0], 0, chan_index);
2293 }
2294 break;
2295
2296 case TGSI_OPCODE_DPH:
2297 FETCH(&r[0], 0, CHAN_X);
2298 FETCH(&r[1], 1, CHAN_X);
2299
2300 micro_mul( &r[0], &r[0], &r[1] );
2301
2302 FETCH(&r[1], 0, CHAN_Y);
2303 FETCH(&r[2], 1, CHAN_Y);
2304
2305 micro_mul( &r[1], &r[1], &r[2] );
2306 micro_add( &r[0], &r[0], &r[1] );
2307
2308 FETCH(&r[1], 0, CHAN_Z);
2309 FETCH(&r[2], 1, CHAN_Z);
2310
2311 micro_mul( &r[1], &r[1], &r[2] );
2312 micro_add( &r[0], &r[0], &r[1] );
2313
2314 FETCH(&r[1], 1, CHAN_W);
2315
2316 micro_add( &r[0], &r[0], &r[1] );
2317
2318 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2319 STORE( &r[0], 0, chan_index );
2320 }
2321 break;
2322
2323 case TGSI_OPCODE_COS:
2324 FETCH(&r[0], 0, CHAN_X);
2325
2326 micro_cos( &r[0], &r[0] );
2327
2328 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2329 STORE( &r[0], 0, chan_index );
2330 }
2331 break;
2332
2333 case TGSI_OPCODE_DDX:
2334 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2335 FETCH( &r[0], 0, chan_index );
2336 micro_ddx(&d[chan_index], &r[0]);
2337 }
2338 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2339 STORE(&d[chan_index], 0, chan_index);
2340 }
2341 break;
2342
2343 case TGSI_OPCODE_DDY:
2344 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2345 FETCH( &r[0], 0, chan_index );
2346 micro_ddy(&d[chan_index], &r[0]);
2347 }
2348 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2349 STORE(&d[chan_index], 0, chan_index);
2350 }
2351 break;
2352
2353 case TGSI_OPCODE_KILP:
2354 exec_kilp (mach, inst);
2355 break;
2356
2357 case TGSI_OPCODE_KIL:
2358 exec_kil (mach, inst);
2359 break;
2360
2361 case TGSI_OPCODE_PK2H:
2362 assert (0);
2363 break;
2364
2365 case TGSI_OPCODE_PK2US:
2366 assert (0);
2367 break;
2368
2369 case TGSI_OPCODE_PK4B:
2370 assert (0);
2371 break;
2372
2373 case TGSI_OPCODE_PK4UB:
2374 assert (0);
2375 break;
2376
2377 case TGSI_OPCODE_RFL:
2378 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2379 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2380 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2381 /* r0 = dp3(src0, src0) */
2382 FETCH(&r[2], 0, CHAN_X);
2383 micro_mul(&r[0], &r[2], &r[2]);
2384 FETCH(&r[4], 0, CHAN_Y);
2385 micro_mul(&r[8], &r[4], &r[4]);
2386 micro_add(&r[0], &r[0], &r[8]);
2387 FETCH(&r[6], 0, CHAN_Z);
2388 micro_mul(&r[8], &r[6], &r[6]);
2389 micro_add(&r[0], &r[0], &r[8]);
2390
2391 /* r1 = dp3(src0, src1) */
2392 FETCH(&r[3], 1, CHAN_X);
2393 micro_mul(&r[1], &r[2], &r[3]);
2394 FETCH(&r[5], 1, CHAN_Y);
2395 micro_mul(&r[8], &r[4], &r[5]);
2396 micro_add(&r[1], &r[1], &r[8]);
2397 FETCH(&r[7], 1, CHAN_Z);
2398 micro_mul(&r[8], &r[6], &r[7]);
2399 micro_add(&r[1], &r[1], &r[8]);
2400
2401 /* r1 = 2 * r1 / r0 */
2402 micro_add(&r[1], &r[1], &r[1]);
2403 micro_div(&r[1], &r[1], &r[0]);
2404
2405 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2406 micro_mul(&r[2], &r[2], &r[1]);
2407 micro_sub(&r[2], &r[2], &r[3]);
2408 STORE(&r[2], 0, CHAN_X);
2409 }
2410 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2411 micro_mul(&r[4], &r[4], &r[1]);
2412 micro_sub(&r[4], &r[4], &r[5]);
2413 STORE(&r[4], 0, CHAN_Y);
2414 }
2415 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2416 micro_mul(&r[6], &r[6], &r[1]);
2417 micro_sub(&r[6], &r[6], &r[7]);
2418 STORE(&r[6], 0, CHAN_Z);
2419 }
2420 }
2421 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2422 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2423 }
2424 break;
2425
2426 case TGSI_OPCODE_SEQ:
2427 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2428 FETCH( &r[0], 0, chan_index );
2429 FETCH( &r[1], 1, chan_index );
2430 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2431 }
2432 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2433 STORE(&d[chan_index], 0, chan_index);
2434 }
2435 break;
2436
2437 case TGSI_OPCODE_SFL:
2438 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2439 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2440 }
2441 break;
2442
2443 case TGSI_OPCODE_SGT:
2444 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2445 FETCH( &r[0], 0, chan_index );
2446 FETCH( &r[1], 1, chan_index );
2447 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2448 }
2449 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2450 STORE(&d[chan_index], 0, chan_index);
2451 }
2452 break;
2453
2454 case TGSI_OPCODE_SIN:
2455 FETCH( &r[0], 0, CHAN_X );
2456 micro_sin( &r[0], &r[0] );
2457 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2458 STORE( &r[0], 0, chan_index );
2459 }
2460 break;
2461
2462 case TGSI_OPCODE_SLE:
2463 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2464 FETCH( &r[0], 0, chan_index );
2465 FETCH( &r[1], 1, chan_index );
2466 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2467 }
2468 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2469 STORE(&d[chan_index], 0, chan_index);
2470 }
2471 break;
2472
2473 case TGSI_OPCODE_SNE:
2474 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2475 FETCH( &r[0], 0, chan_index );
2476 FETCH( &r[1], 1, chan_index );
2477 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2478 }
2479 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2480 STORE(&d[chan_index], 0, chan_index);
2481 }
2482 break;
2483
2484 case TGSI_OPCODE_STR:
2485 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2486 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2487 }
2488 break;
2489
2490 case TGSI_OPCODE_TEX:
2491 /* simple texture lookup */
2492 /* src[0] = texcoord */
2493 /* src[1] = sampler unit */
2494 exec_tex(mach, inst, FALSE, FALSE);
2495 break;
2496
2497 case TGSI_OPCODE_TXB:
2498 /* Texture lookup with lod bias */
2499 /* src[0] = texcoord (src[0].w = LOD bias) */
2500 /* src[1] = sampler unit */
2501 exec_tex(mach, inst, TRUE, FALSE);
2502 break;
2503
2504 case TGSI_OPCODE_TXD:
2505 /* Texture lookup with explict partial derivatives */
2506 /* src[0] = texcoord */
2507 /* src[1] = d[strq]/dx */
2508 /* src[2] = d[strq]/dy */
2509 /* src[3] = sampler unit */
2510 assert (0);
2511 break;
2512
2513 case TGSI_OPCODE_TXL:
2514 /* Texture lookup with explit LOD */
2515 /* src[0] = texcoord (src[0].w = LOD) */
2516 /* src[1] = sampler unit */
2517 exec_tex(mach, inst, TRUE, FALSE);
2518 break;
2519
2520 case TGSI_OPCODE_TXP:
2521 /* Texture lookup with projection */
2522 /* src[0] = texcoord (src[0].w = projection) */
2523 /* src[1] = sampler unit */
2524 exec_tex(mach, inst, FALSE, TRUE);
2525 break;
2526
2527 case TGSI_OPCODE_UP2H:
2528 assert (0);
2529 break;
2530
2531 case TGSI_OPCODE_UP2US:
2532 assert (0);
2533 break;
2534
2535 case TGSI_OPCODE_UP4B:
2536 assert (0);
2537 break;
2538
2539 case TGSI_OPCODE_UP4UB:
2540 assert (0);
2541 break;
2542
2543 case TGSI_OPCODE_X2D:
2544 FETCH(&r[0], 1, CHAN_X);
2545 FETCH(&r[1], 1, CHAN_Y);
2546 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2547 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2548 FETCH(&r[2], 2, CHAN_X);
2549 micro_mul(&r[2], &r[2], &r[0]);
2550 FETCH(&r[3], 2, CHAN_Y);
2551 micro_mul(&r[3], &r[3], &r[1]);
2552 micro_add(&r[2], &r[2], &r[3]);
2553 FETCH(&r[3], 0, CHAN_X);
2554 micro_add(&d[CHAN_X], &r[2], &r[3]);
2555
2556 }
2557 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2558 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2559 FETCH(&r[2], 2, CHAN_Z);
2560 micro_mul(&r[2], &r[2], &r[0]);
2561 FETCH(&r[3], 2, CHAN_W);
2562 micro_mul(&r[3], &r[3], &r[1]);
2563 micro_add(&r[2], &r[2], &r[3]);
2564 FETCH(&r[3], 0, CHAN_Y);
2565 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2566
2567 }
2568 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2569 STORE(&d[CHAN_X], 0, CHAN_X);
2570 }
2571 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2572 STORE(&d[CHAN_Y], 0, CHAN_Y);
2573 }
2574 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2575 STORE(&d[CHAN_X], 0, CHAN_Z);
2576 }
2577 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2578 STORE(&d[CHAN_Y], 0, CHAN_W);
2579 }
2580 break;
2581
2582 case TGSI_OPCODE_ARA:
2583 assert (0);
2584 break;
2585
2586 case TGSI_OPCODE_BRA:
2587 assert (0);
2588 break;
2589
2590 case TGSI_OPCODE_CAL:
2591 /* skip the call if no execution channels are enabled */
2592 if (mach->ExecMask) {
2593 /* do the call */
2594
2595 /* First, record the depths of the execution stacks.
2596 * This is important for deeply nested/looped return statements.
2597 * We have to unwind the stacks by the correct amount. For a
2598 * real code generator, we could determine the number of entries
2599 * to pop off each stack with simple static analysis and avoid
2600 * implementing this data structure at run time.
2601 */
2602 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2603 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2604 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2605 /* note that PC was already incremented above */
2606 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2607
2608 mach->CallStackTop++;
2609
2610 /* Second, push the Cond, Loop, Cont, Func stacks */
2611 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2612 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2613 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2614 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2615 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2616 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2617 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2618 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2619
2620 /* Finally, jump to the subroutine */
2621 *pc = inst->InstructionExtLabel.Label;
2622 }
2623 break;
2624
2625 case TGSI_OPCODE_RET:
2626 mach->FuncMask &= ~mach->ExecMask;
2627 UPDATE_EXEC_MASK(mach);
2628
2629 if (mach->FuncMask == 0x0) {
2630 /* really return now (otherwise, keep executing */
2631
2632 if (mach->CallStackTop == 0) {
2633 /* returning from main() */
2634 *pc = -1;
2635 return;
2636 }
2637
2638 assert(mach->CallStackTop > 0);
2639 mach->CallStackTop--;
2640
2641 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2642 mach->CondMask = mach->CondStack[mach->CondStackTop];
2643
2644 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2645 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2646
2647 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2648 mach->ContMask = mach->ContStack[mach->ContStackTop];
2649
2650 assert(mach->FuncStackTop > 0);
2651 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2652
2653 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2654
2655 UPDATE_EXEC_MASK(mach);
2656 }
2657 break;
2658
2659 case TGSI_OPCODE_SSG:
2660 /* TGSI_OPCODE_SGN */
2661 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2662 FETCH( &r[0], 0, chan_index );
2663 micro_sgn(&d[chan_index], &r[0]);
2664 }
2665 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2666 STORE(&d[chan_index], 0, chan_index);
2667 }
2668 break;
2669
2670 case TGSI_OPCODE_CMP:
2671 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2672 FETCH(&r[0], 0, chan_index);
2673 FETCH(&r[1], 1, chan_index);
2674 FETCH(&r[2], 2, chan_index);
2675 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
2676 }
2677 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2678 STORE(&d[chan_index], 0, chan_index);
2679 }
2680 break;
2681
2682 case TGSI_OPCODE_SCS:
2683 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2684 FETCH( &r[0], 0, CHAN_X );
2685 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2686 micro_cos(&r[1], &r[0]);
2687 STORE(&r[1], 0, CHAN_X);
2688 }
2689 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2690 micro_sin(&r[1], &r[0]);
2691 STORE(&r[1], 0, CHAN_Y);
2692 }
2693 }
2694 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2695 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2696 }
2697 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2698 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2699 }
2700 break;
2701
2702 case TGSI_OPCODE_NRM:
2703 /* 3-component vector normalize */
2704 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2705 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2706 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2707 /* r3 = sqrt(dp3(src0, src0)) */
2708 FETCH(&r[0], 0, CHAN_X);
2709 micro_mul(&r[3], &r[0], &r[0]);
2710 FETCH(&r[1], 0, CHAN_Y);
2711 micro_mul(&r[4], &r[1], &r[1]);
2712 micro_add(&r[3], &r[3], &r[4]);
2713 FETCH(&r[2], 0, CHAN_Z);
2714 micro_mul(&r[4], &r[2], &r[2]);
2715 micro_add(&r[3], &r[3], &r[4]);
2716 micro_sqrt(&r[3], &r[3]);
2717
2718 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2719 micro_div(&r[0], &r[0], &r[3]);
2720 STORE(&r[0], 0, CHAN_X);
2721 }
2722 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2723 micro_div(&r[1], &r[1], &r[3]);
2724 STORE(&r[1], 0, CHAN_Y);
2725 }
2726 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2727 micro_div(&r[2], &r[2], &r[3]);
2728 STORE(&r[2], 0, CHAN_Z);
2729 }
2730 }
2731 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2732 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2733 }
2734 break;
2735
2736 case TGSI_OPCODE_NRM4:
2737 /* 4-component vector normalize */
2738 {
2739 union tgsi_exec_channel tmp, dot;
2740
2741 /* tmp = dp4(src0, src0): */
2742 FETCH( &r[0], 0, CHAN_X );
2743 micro_mul( &tmp, &r[0], &r[0] );
2744
2745 FETCH( &r[1], 0, CHAN_Y );
2746 micro_mul( &dot, &r[1], &r[1] );
2747 micro_add( &tmp, &tmp, &dot );
2748
2749 FETCH( &r[2], 0, CHAN_Z );
2750 micro_mul( &dot, &r[2], &r[2] );
2751 micro_add( &tmp, &tmp, &dot );
2752
2753 FETCH( &r[3], 0, CHAN_W );
2754 micro_mul( &dot, &r[3], &r[3] );
2755 micro_add( &tmp, &tmp, &dot );
2756
2757 /* tmp = 1 / sqrt(tmp) */
2758 micro_sqrt( &tmp, &tmp );
2759 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2760
2761 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2762 /* chan = chan * tmp */
2763 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2764 STORE( &r[chan_index], 0, chan_index );
2765 }
2766 }
2767 break;
2768
2769 case TGSI_OPCODE_DIV:
2770 assert( 0 );
2771 break;
2772
2773 case TGSI_OPCODE_DP2:
2774 FETCH( &r[0], 0, CHAN_X );
2775 FETCH( &r[1], 1, CHAN_X );
2776 micro_mul( &r[0], &r[0], &r[1] );
2777
2778 FETCH( &r[1], 0, CHAN_Y );
2779 FETCH( &r[2], 1, CHAN_Y );
2780 micro_mul( &r[1], &r[1], &r[2] );
2781 micro_add( &r[0], &r[0], &r[1] );
2782
2783 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2784 STORE( &r[0], 0, chan_index );
2785 }
2786 break;
2787
2788 case TGSI_OPCODE_IF:
2789 /* push CondMask */
2790 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2791 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2792 FETCH( &r[0], 0, CHAN_X );
2793 /* update CondMask */
2794 if( ! r[0].u[0] ) {
2795 mach->CondMask &= ~0x1;
2796 }
2797 if( ! r[0].u[1] ) {
2798 mach->CondMask &= ~0x2;
2799 }
2800 if( ! r[0].u[2] ) {
2801 mach->CondMask &= ~0x4;
2802 }
2803 if( ! r[0].u[3] ) {
2804 mach->CondMask &= ~0x8;
2805 }
2806 UPDATE_EXEC_MASK(mach);
2807 /* Todo: If CondMask==0, jump to ELSE */
2808 break;
2809
2810 case TGSI_OPCODE_ELSE:
2811 /* invert CondMask wrt previous mask */
2812 {
2813 uint prevMask;
2814 assert(mach->CondStackTop > 0);
2815 prevMask = mach->CondStack[mach->CondStackTop - 1];
2816 mach->CondMask = ~mach->CondMask & prevMask;
2817 UPDATE_EXEC_MASK(mach);
2818 /* Todo: If CondMask==0, jump to ENDIF */
2819 }
2820 break;
2821
2822 case TGSI_OPCODE_ENDIF:
2823 /* pop CondMask */
2824 assert(mach->CondStackTop > 0);
2825 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2826 UPDATE_EXEC_MASK(mach);
2827 break;
2828
2829 case TGSI_OPCODE_END:
2830 /* halt execution */
2831 *pc = -1;
2832 break;
2833
2834 case TGSI_OPCODE_REP:
2835 assert (0);
2836 break;
2837
2838 case TGSI_OPCODE_ENDREP:
2839 assert (0);
2840 break;
2841
2842 case TGSI_OPCODE_PUSHA:
2843 assert (0);
2844 break;
2845
2846 case TGSI_OPCODE_POPA:
2847 assert (0);
2848 break;
2849
2850 case TGSI_OPCODE_CEIL:
2851 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2852 FETCH( &r[0], 0, chan_index );
2853 micro_ceil(&d[chan_index], &r[0]);
2854 }
2855 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2856 STORE(&d[chan_index], 0, chan_index);
2857 }
2858 break;
2859
2860 case TGSI_OPCODE_I2F:
2861 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2862 FETCH( &r[0], 0, chan_index );
2863 micro_i2f(&d[chan_index], &r[0]);
2864 }
2865 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2866 STORE(&d[chan_index], 0, chan_index);
2867 }
2868 break;
2869
2870 case TGSI_OPCODE_NOT:
2871 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2872 FETCH( &r[0], 0, chan_index );
2873 micro_not(&d[chan_index], &r[0]);
2874 }
2875 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2876 STORE(&d[chan_index], 0, chan_index);
2877 }
2878 break;
2879
2880 case TGSI_OPCODE_TRUNC:
2881 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2882 FETCH( &r[0], 0, chan_index );
2883 micro_trunc(&d[chan_index], &r[0]);
2884 }
2885 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2886 STORE(&d[chan_index], 0, chan_index);
2887 }
2888 break;
2889
2890 case TGSI_OPCODE_SHL:
2891 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2892 FETCH( &r[0], 0, chan_index );
2893 FETCH( &r[1], 1, chan_index );
2894 micro_shl(&d[chan_index], &r[0], &r[1]);
2895 }
2896 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2897 STORE(&d[chan_index], 0, chan_index);
2898 }
2899 break;
2900
2901 case TGSI_OPCODE_SHR:
2902 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2903 FETCH( &r[0], 0, chan_index );
2904 FETCH( &r[1], 1, chan_index );
2905 micro_ishr(&d[chan_index], &r[0], &r[1]);
2906 }
2907 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2908 STORE(&d[chan_index], 0, chan_index);
2909 }
2910 break;
2911
2912 case TGSI_OPCODE_AND:
2913 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2914 FETCH( &r[0], 0, chan_index );
2915 FETCH( &r[1], 1, chan_index );
2916 micro_and(&d[chan_index], &r[0], &r[1]);
2917 }
2918 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2919 STORE(&d[chan_index], 0, chan_index);
2920 }
2921 break;
2922
2923 case TGSI_OPCODE_OR:
2924 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2925 FETCH( &r[0], 0, chan_index );
2926 FETCH( &r[1], 1, chan_index );
2927 micro_or(&d[chan_index], &r[0], &r[1]);
2928 }
2929 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2930 STORE(&d[chan_index], 0, chan_index);
2931 }
2932 break;
2933
2934 case TGSI_OPCODE_MOD:
2935 assert (0);
2936 break;
2937
2938 case TGSI_OPCODE_XOR:
2939 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2940 FETCH( &r[0], 0, chan_index );
2941 FETCH( &r[1], 1, chan_index );
2942 micro_xor(&d[chan_index], &r[0], &r[1]);
2943 }
2944 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2945 STORE(&d[chan_index], 0, chan_index);
2946 }
2947 break;
2948
2949 case TGSI_OPCODE_SAD:
2950 assert (0);
2951 break;
2952
2953 case TGSI_OPCODE_TXF:
2954 assert (0);
2955 break;
2956
2957 case TGSI_OPCODE_TXQ:
2958 assert (0);
2959 break;
2960
2961 case TGSI_OPCODE_EMIT:
2962 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2963 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2964 break;
2965
2966 case TGSI_OPCODE_ENDPRIM:
2967 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2968 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2969 break;
2970
2971 case TGSI_OPCODE_BGNFOR:
2972 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2973 for (chan_index = 0; chan_index < 3; chan_index++) {
2974 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
2975 }
2976 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
2977 ++mach->LoopCounterStackTop;
2978 /* fall-through (for now) */
2979 case TGSI_OPCODE_BGNLOOP:
2980 /* push LoopMask and ContMasks */
2981 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2982 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2983 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2984 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2985 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2986 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
2987 break;
2988
2989 case TGSI_OPCODE_ENDFOR:
2990 assert(mach->LoopCounterStackTop > 0);
2991 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
2992 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
2993 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2994 /* update LoopMask */
2995 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
2996 mach->LoopMask &= ~0x1;
2997 }
2998 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
2999 mach->LoopMask &= ~0x2;
3000 }
3001 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
3002 mach->LoopMask &= ~0x4;
3003 }
3004 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
3005 mach->LoopMask &= ~0x8;
3006 }
3007 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3008 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3009 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3010 assert(mach->LoopLabelStackTop > 0);
3011 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3012 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
3013 /* Restore ContMask, but don't pop */
3014 assert(mach->ContStackTop > 0);
3015 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3016 UPDATE_EXEC_MASK(mach);
3017 if (mach->ExecMask) {
3018 /* repeat loop: jump to instruction just past BGNLOOP */
3019 assert(mach->LoopLabelStackTop > 0);
3020 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3021 }
3022 else {
3023 /* exit loop: pop LoopMask */
3024 assert(mach->LoopStackTop > 0);
3025 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3026 /* pop ContMask */
3027 assert(mach->ContStackTop > 0);
3028 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3029 assert(mach->LoopLabelStackTop > 0);
3030 --mach->LoopLabelStackTop;
3031 assert(mach->LoopCounterStackTop > 0);
3032 --mach->LoopCounterStackTop;
3033 }
3034 UPDATE_EXEC_MASK(mach);
3035 break;
3036
3037 case TGSI_OPCODE_ENDLOOP:
3038 /* Restore ContMask, but don't pop */
3039 assert(mach->ContStackTop > 0);
3040 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3041 UPDATE_EXEC_MASK(mach);
3042 if (mach->ExecMask) {
3043 /* repeat loop: jump to instruction just past BGNLOOP */
3044 assert(mach->LoopLabelStackTop > 0);
3045 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3046 }
3047 else {
3048 /* exit loop: pop LoopMask */
3049 assert(mach->LoopStackTop > 0);
3050 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3051 /* pop ContMask */
3052 assert(mach->ContStackTop > 0);
3053 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3054 assert(mach->LoopLabelStackTop > 0);
3055 --mach->LoopLabelStackTop;
3056 }
3057 UPDATE_EXEC_MASK(mach);
3058 break;
3059
3060 case TGSI_OPCODE_BRK:
3061 /* turn off loop channels for each enabled exec channel */
3062 mach->LoopMask &= ~mach->ExecMask;
3063 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3064 UPDATE_EXEC_MASK(mach);
3065 break;
3066
3067 case TGSI_OPCODE_CONT:
3068 /* turn off cont channels for each enabled exec channel */
3069 mach->ContMask &= ~mach->ExecMask;
3070 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3071 UPDATE_EXEC_MASK(mach);
3072 break;
3073
3074 case TGSI_OPCODE_BGNSUB:
3075 /* no-op */
3076 break;
3077
3078 case TGSI_OPCODE_ENDSUB:
3079 /* no-op */
3080 break;
3081
3082 case TGSI_OPCODE_NOP:
3083 break;
3084
3085 default:
3086 assert( 0 );
3087 }
3088 }
3089
3090
3091 /**
3092 * Run TGSI interpreter.
3093 * \return bitmask of "alive" quad components
3094 */
3095 uint
3096 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3097 {
3098 uint i;
3099 int pc = 0;
3100
3101 mach->CondMask = 0xf;
3102 mach->LoopMask = 0xf;
3103 mach->ContMask = 0xf;
3104 mach->FuncMask = 0xf;
3105 mach->ExecMask = 0xf;
3106
3107 assert(mach->CondStackTop == 0);
3108 assert(mach->LoopStackTop == 0);
3109 assert(mach->ContStackTop == 0);
3110 assert(mach->CallStackTop == 0);
3111
3112 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3113 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3114
3115 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3116 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3117 mach->Primitives[0] = 0;
3118 }
3119
3120 for (i = 0; i < QUAD_SIZE; i++) {
3121 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3122 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3123 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3124 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3125 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3126 }
3127
3128 /* execute declarations (interpolants) */
3129 for (i = 0; i < mach->NumDeclarations; i++) {
3130 exec_declaration( mach, mach->Declarations+i );
3131 }
3132
3133 /* execute instructions, until pc is set to -1 */
3134 while (pc != -1) {
3135 assert(pc < (int) mach->NumInstructions);
3136 exec_instruction( mach, mach->Instructions + pc, &pc );
3137 }
3138
3139 #if 0
3140 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3141 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3142 /*
3143 * Scale back depth component.
3144 */
3145 for (i = 0; i < 4; i++)
3146 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3147 }
3148 #endif
3149
3150 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3151 }