tgsi: Bring BGNFOR/ENDFOR implementation up to spec.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
62
63 #define FAST_MATH 1
64
65 #define TILE_TOP_LEFT 0
66 #define TILE_TOP_RIGHT 1
67 #define TILE_BOTTOM_LEFT 2
68 #define TILE_BOTTOM_RIGHT 3
69
70 #define CHAN_X 0
71 #define CHAN_Y 1
72 #define CHAN_Z 2
73 #define CHAN_W 3
74
75 /*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
79 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
80 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
81 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
82 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
83 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
84 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
85 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
86 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
87 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
88 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
89 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
90 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
91 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
92 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
93 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
94 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
95 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
96 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
97 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
98 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
99 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
100 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
101 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
102 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
103 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
104 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
105 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107 #define TEMP_P0 TGSI_EXEC_TEMP_P0
108
109 #define IS_CHANNEL_ENABLED(INST, CHAN)\
110 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
111
112 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
113 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
114
115 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
117 if (IS_CHANNEL_ENABLED( INST, CHAN ))
118
119 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
121 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
122
123
124 /** The execution mask depends on the conditional mask and the loop mask */
125 #define UPDATE_EXEC_MASK(MACH) \
126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
127
128
129 static const union tgsi_exec_channel ZeroVec =
130 { { 0.0, 0.0, 0.0, 0.0 } };
131
132
133 #ifdef DEBUG
134 static void
135 check_inf_or_nan(const union tgsi_exec_channel *chan)
136 {
137 assert(!util_is_inf_or_nan(chan->f[0]));
138 assert(!util_is_inf_or_nan(chan->f[1]));
139 assert(!util_is_inf_or_nan(chan->f[2]));
140 assert(!util_is_inf_or_nan(chan->f[3]));
141 }
142 #endif
143
144
145 #ifdef DEBUG
146 static void
147 print_chan(const char *msg, const union tgsi_exec_channel *chan)
148 {
149 debug_printf("%s = {%f, %f, %f, %f}\n",
150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
151 }
152 #endif
153
154
155 #ifdef DEBUG
156 static void
157 print_temp(const struct tgsi_exec_machine *mach, uint index)
158 {
159 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
160 int i;
161 debug_printf("Temp[%u] =\n", index);
162 for (i = 0; i < 4; i++) {
163 debug_printf(" %c: { %f, %f, %f, %f }\n",
164 "XYZW"[i],
165 tmp->xyzw[i].f[0],
166 tmp->xyzw[i].f[1],
167 tmp->xyzw[i].f[2],
168 tmp->xyzw[i].f[3]);
169 }
170 }
171 #endif
172
173
174 /**
175 * Check if there's a potential src/dst register data dependency when
176 * using SOA execution.
177 * Example:
178 * MOV T, T.yxwz;
179 * This would expand into:
180 * MOV t0, t1;
181 * MOV t1, t0;
182 * MOV t2, t3;
183 * MOV t3, t2;
184 * The second instruction will have the wrong value for t0 if executed as-is.
185 */
186 boolean
187 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
188 {
189 uint i, chan;
190
191 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
192 if (writemask == TGSI_WRITEMASK_X ||
193 writemask == TGSI_WRITEMASK_Y ||
194 writemask == TGSI_WRITEMASK_Z ||
195 writemask == TGSI_WRITEMASK_W ||
196 writemask == TGSI_WRITEMASK_NONE) {
197 /* no chance of data dependency */
198 return FALSE;
199 }
200
201 /* loop over src regs */
202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
203 if ((inst->FullSrcRegisters[i].SrcRegister.File ==
204 inst->FullDstRegisters[0].DstRegister.File) &&
205 (inst->FullSrcRegisters[i].SrcRegister.Index ==
206 inst->FullDstRegisters[0].DstRegister.Index)) {
207 /* loop over dest channels */
208 uint channelsWritten = 0x0;
209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
210 /* check if we're reading a channel that's been written */
211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
212 if (channelsWritten & (1 << swizzle)) {
213 return TRUE;
214 }
215
216 channelsWritten |= (1 << chan);
217 }
218 }
219 }
220 return FALSE;
221 }
222
223
224 /**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229 void
230 tgsi_exec_machine_bind_shader(
231 struct tgsi_exec_machine *mach,
232 const struct tgsi_token *tokens,
233 uint numSamplers,
234 struct tgsi_sampler **samplers)
235 {
236 uint k;
237 struct tgsi_parse_context parse;
238 struct tgsi_exec_labels *labels = &mach->Labels;
239 struct tgsi_full_instruction *instructions;
240 struct tgsi_full_declaration *declarations;
241 uint maxInstructions = 10, numInstructions = 0;
242 uint maxDeclarations = 10, numDeclarations = 0;
243 uint instno = 0;
244
245 #if 0
246 tgsi_dump(tokens, 0);
247 #endif
248
249 util_init_math();
250
251 mach->Tokens = tokens;
252 mach->Samplers = samplers;
253
254 k = tgsi_parse_init (&parse, mach->Tokens);
255 if (k != TGSI_PARSE_OK) {
256 debug_printf( "Problem parsing!\n" );
257 return;
258 }
259
260 mach->Processor = parse.FullHeader.Processor.Processor;
261 mach->ImmLimit = 0;
262 labels->count = 0;
263
264 declarations = (struct tgsi_full_declaration *)
265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267 if (!declarations) {
268 return;
269 }
270
271 instructions = (struct tgsi_full_instruction *)
272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274 if (!instructions) {
275 FREE( declarations );
276 return;
277 }
278
279 while( !tgsi_parse_end_of_tokens( &parse ) ) {
280 uint pointer = parse.Position;
281 uint i;
282
283 tgsi_parse_token( &parse );
284 switch( parse.FullToken.Token.Type ) {
285 case TGSI_TOKEN_TYPE_DECLARATION:
286 /* save expanded declaration */
287 if (numDeclarations == maxDeclarations) {
288 declarations = REALLOC(declarations,
289 maxDeclarations
290 * sizeof(struct tgsi_full_declaration),
291 (maxDeclarations + 10)
292 * sizeof(struct tgsi_full_declaration));
293 maxDeclarations += 10;
294 }
295 memcpy(declarations + numDeclarations,
296 &parse.FullToken.FullDeclaration,
297 sizeof(declarations[0]));
298 numDeclarations++;
299 break;
300
301 case TGSI_TOKEN_TYPE_IMMEDIATE:
302 {
303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
304 assert( size <= 4 );
305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
306
307 for( i = 0; i < size; i++ ) {
308 mach->Imms[mach->ImmLimit][i] =
309 parse.FullToken.FullImmediate.u[i].Float;
310 }
311 mach->ImmLimit += 1;
312 }
313 break;
314
315 case TGSI_TOKEN_TYPE_INSTRUCTION:
316 assert( labels->count < MAX_LABELS );
317
318 labels->labels[labels->count][0] = instno;
319 labels->labels[labels->count][1] = pointer;
320 labels->count++;
321
322 /* save expanded instruction */
323 if (numInstructions == maxInstructions) {
324 instructions = REALLOC(instructions,
325 maxInstructions
326 * sizeof(struct tgsi_full_instruction),
327 (maxInstructions + 10)
328 * sizeof(struct tgsi_full_instruction));
329 maxInstructions += 10;
330 }
331
332 memcpy(instructions + numInstructions,
333 &parse.FullToken.FullInstruction,
334 sizeof(instructions[0]));
335
336 numInstructions++;
337 break;
338
339 default:
340 assert( 0 );
341 }
342 }
343 tgsi_parse_free (&parse);
344
345 if (mach->Declarations) {
346 FREE( mach->Declarations );
347 }
348 mach->Declarations = declarations;
349 mach->NumDeclarations = numDeclarations;
350
351 if (mach->Instructions) {
352 FREE( mach->Instructions );
353 }
354 mach->Instructions = instructions;
355 mach->NumInstructions = numInstructions;
356 }
357
358
359 struct tgsi_exec_machine *
360 tgsi_exec_machine_create( void )
361 {
362 struct tgsi_exec_machine *mach;
363 uint i;
364
365 mach = align_malloc( sizeof *mach, 16 );
366 if (!mach)
367 goto fail;
368
369 memset(mach, 0, sizeof(*mach));
370
371 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
372
373 /* Setup constants. */
374 for( i = 0; i < 4; i++ ) {
375 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
376 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
377 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
378 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
379 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
380 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
381 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
382 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
383 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
384 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
385 }
386
387 #ifdef DEBUG
388 /* silence warnings */
389 (void) print_chan;
390 (void) print_temp;
391 #endif
392
393 return mach;
394
395 fail:
396 align_free(mach);
397 return NULL;
398 }
399
400
401 void
402 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
403 {
404 if (mach) {
405 FREE(mach->Instructions);
406 FREE(mach->Declarations);
407 }
408
409 align_free(mach);
410 }
411
412
413 static void
414 micro_abs(
415 union tgsi_exec_channel *dst,
416 const union tgsi_exec_channel *src )
417 {
418 dst->f[0] = fabsf( src->f[0] );
419 dst->f[1] = fabsf( src->f[1] );
420 dst->f[2] = fabsf( src->f[2] );
421 dst->f[3] = fabsf( src->f[3] );
422 }
423
424 static void
425 micro_add(
426 union tgsi_exec_channel *dst,
427 const union tgsi_exec_channel *src0,
428 const union tgsi_exec_channel *src1 )
429 {
430 dst->f[0] = src0->f[0] + src1->f[0];
431 dst->f[1] = src0->f[1] + src1->f[1];
432 dst->f[2] = src0->f[2] + src1->f[2];
433 dst->f[3] = src0->f[3] + src1->f[3];
434 }
435
436 #if 0
437 static void
438 micro_iadd(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1 )
442 {
443 dst->i[0] = src0->i[0] + src1->i[0];
444 dst->i[1] = src0->i[1] + src1->i[1];
445 dst->i[2] = src0->i[2] + src1->i[2];
446 dst->i[3] = src0->i[3] + src1->i[3];
447 }
448 #endif
449
450 static void
451 micro_and(
452 union tgsi_exec_channel *dst,
453 const union tgsi_exec_channel *src0,
454 const union tgsi_exec_channel *src1 )
455 {
456 dst->u[0] = src0->u[0] & src1->u[0];
457 dst->u[1] = src0->u[1] & src1->u[1];
458 dst->u[2] = src0->u[2] & src1->u[2];
459 dst->u[3] = src0->u[3] & src1->u[3];
460 }
461
462 static void
463 micro_ceil(
464 union tgsi_exec_channel *dst,
465 const union tgsi_exec_channel *src )
466 {
467 dst->f[0] = ceilf( src->f[0] );
468 dst->f[1] = ceilf( src->f[1] );
469 dst->f[2] = ceilf( src->f[2] );
470 dst->f[3] = ceilf( src->f[3] );
471 }
472
473 static void
474 micro_cos(
475 union tgsi_exec_channel *dst,
476 const union tgsi_exec_channel *src )
477 {
478 dst->f[0] = cosf( src->f[0] );
479 dst->f[1] = cosf( src->f[1] );
480 dst->f[2] = cosf( src->f[2] );
481 dst->f[3] = cosf( src->f[3] );
482 }
483
484 static void
485 micro_ddx(
486 union tgsi_exec_channel *dst,
487 const union tgsi_exec_channel *src )
488 {
489 dst->f[0] =
490 dst->f[1] =
491 dst->f[2] =
492 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
493 }
494
495 static void
496 micro_ddy(
497 union tgsi_exec_channel *dst,
498 const union tgsi_exec_channel *src )
499 {
500 dst->f[0] =
501 dst->f[1] =
502 dst->f[2] =
503 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
504 }
505
506 static void
507 micro_div(
508 union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src0,
510 const union tgsi_exec_channel *src1 )
511 {
512 if (src1->f[0] != 0) {
513 dst->f[0] = src0->f[0] / src1->f[0];
514 }
515 if (src1->f[1] != 0) {
516 dst->f[1] = src0->f[1] / src1->f[1];
517 }
518 if (src1->f[2] != 0) {
519 dst->f[2] = src0->f[2] / src1->f[2];
520 }
521 if (src1->f[3] != 0) {
522 dst->f[3] = src0->f[3] / src1->f[3];
523 }
524 }
525
526 #if 0
527 static void
528 micro_udiv(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src0,
531 const union tgsi_exec_channel *src1 )
532 {
533 dst->u[0] = src0->u[0] / src1->u[0];
534 dst->u[1] = src0->u[1] / src1->u[1];
535 dst->u[2] = src0->u[2] / src1->u[2];
536 dst->u[3] = src0->u[3] / src1->u[3];
537 }
538 #endif
539
540 static void
541 micro_eq(
542 union tgsi_exec_channel *dst,
543 const union tgsi_exec_channel *src0,
544 const union tgsi_exec_channel *src1,
545 const union tgsi_exec_channel *src2,
546 const union tgsi_exec_channel *src3 )
547 {
548 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
549 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
550 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
551 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
552 }
553
554 #if 0
555 static void
556 micro_ieq(
557 union tgsi_exec_channel *dst,
558 const union tgsi_exec_channel *src0,
559 const union tgsi_exec_channel *src1,
560 const union tgsi_exec_channel *src2,
561 const union tgsi_exec_channel *src3 )
562 {
563 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
564 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
565 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
566 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
567 }
568 #endif
569
570 static void
571 micro_exp2(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src)
574 {
575 #if FAST_MATH
576 dst->f[0] = util_fast_exp2( src->f[0] );
577 dst->f[1] = util_fast_exp2( src->f[1] );
578 dst->f[2] = util_fast_exp2( src->f[2] );
579 dst->f[3] = util_fast_exp2( src->f[3] );
580 #else
581 dst->f[0] = powf( 2.0f, src->f[0] );
582 dst->f[1] = powf( 2.0f, src->f[1] );
583 dst->f[2] = powf( 2.0f, src->f[2] );
584 dst->f[3] = powf( 2.0f, src->f[3] );
585 #endif
586 }
587
588 #if 0
589 static void
590 micro_f2ut(
591 union tgsi_exec_channel *dst,
592 const union tgsi_exec_channel *src )
593 {
594 dst->u[0] = (uint) src->f[0];
595 dst->u[1] = (uint) src->f[1];
596 dst->u[2] = (uint) src->f[2];
597 dst->u[3] = (uint) src->f[3];
598 }
599 #endif
600
601 static void
602 micro_float_clamp(union tgsi_exec_channel *dst,
603 const union tgsi_exec_channel *src)
604 {
605 uint i;
606
607 for (i = 0; i < 4; i++) {
608 if (src->f[i] > 0.0f) {
609 if (src->f[i] > 1.884467e+019f)
610 dst->f[i] = 1.884467e+019f;
611 else if (src->f[i] < 5.42101e-020f)
612 dst->f[i] = 5.42101e-020f;
613 else
614 dst->f[i] = src->f[i];
615 }
616 else {
617 if (src->f[i] < -1.884467e+019f)
618 dst->f[i] = -1.884467e+019f;
619 else if (src->f[i] > -5.42101e-020f)
620 dst->f[i] = -5.42101e-020f;
621 else
622 dst->f[i] = src->f[i];
623 }
624 }
625 }
626
627 static void
628 micro_flr(
629 union tgsi_exec_channel *dst,
630 const union tgsi_exec_channel *src )
631 {
632 dst->f[0] = floorf( src->f[0] );
633 dst->f[1] = floorf( src->f[1] );
634 dst->f[2] = floorf( src->f[2] );
635 dst->f[3] = floorf( src->f[3] );
636 }
637
638 static void
639 micro_frc(
640 union tgsi_exec_channel *dst,
641 const union tgsi_exec_channel *src )
642 {
643 dst->f[0] = src->f[0] - floorf( src->f[0] );
644 dst->f[1] = src->f[1] - floorf( src->f[1] );
645 dst->f[2] = src->f[2] - floorf( src->f[2] );
646 dst->f[3] = src->f[3] - floorf( src->f[3] );
647 }
648
649 static void
650 micro_i2f(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src )
653 {
654 dst->f[0] = (float) src->i[0];
655 dst->f[1] = (float) src->i[1];
656 dst->f[2] = (float) src->i[2];
657 dst->f[3] = (float) src->i[3];
658 }
659
660 static void
661 micro_lg2(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src )
664 {
665 #if FAST_MATH
666 dst->f[0] = util_fast_log2( src->f[0] );
667 dst->f[1] = util_fast_log2( src->f[1] );
668 dst->f[2] = util_fast_log2( src->f[2] );
669 dst->f[3] = util_fast_log2( src->f[3] );
670 #else
671 dst->f[0] = logf( src->f[0] ) * 1.442695f;
672 dst->f[1] = logf( src->f[1] ) * 1.442695f;
673 dst->f[2] = logf( src->f[2] ) * 1.442695f;
674 dst->f[3] = logf( src->f[3] ) * 1.442695f;
675 #endif
676 }
677
678 static void
679 micro_le(
680 union tgsi_exec_channel *dst,
681 const union tgsi_exec_channel *src0,
682 const union tgsi_exec_channel *src1,
683 const union tgsi_exec_channel *src2,
684 const union tgsi_exec_channel *src3 )
685 {
686 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
687 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
688 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
689 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
690 }
691
692 static void
693 micro_lt(
694 union tgsi_exec_channel *dst,
695 const union tgsi_exec_channel *src0,
696 const union tgsi_exec_channel *src1,
697 const union tgsi_exec_channel *src2,
698 const union tgsi_exec_channel *src3 )
699 {
700 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
701 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
702 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
703 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
704 }
705
706 #if 0
707 static void
708 micro_ilt(
709 union tgsi_exec_channel *dst,
710 const union tgsi_exec_channel *src0,
711 const union tgsi_exec_channel *src1,
712 const union tgsi_exec_channel *src2,
713 const union tgsi_exec_channel *src3 )
714 {
715 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
716 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
717 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
718 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
719 }
720 #endif
721
722 #if 0
723 static void
724 micro_ult(
725 union tgsi_exec_channel *dst,
726 const union tgsi_exec_channel *src0,
727 const union tgsi_exec_channel *src1,
728 const union tgsi_exec_channel *src2,
729 const union tgsi_exec_channel *src3 )
730 {
731 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
732 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
733 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
734 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
735 }
736 #endif
737
738 static void
739 micro_max(
740 union tgsi_exec_channel *dst,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1 )
743 {
744 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
745 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
746 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
747 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
748 }
749
750 #if 0
751 static void
752 micro_imax(
753 union tgsi_exec_channel *dst,
754 const union tgsi_exec_channel *src0,
755 const union tgsi_exec_channel *src1 )
756 {
757 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
758 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
759 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
760 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
761 }
762 #endif
763
764 #if 0
765 static void
766 micro_umax(
767 union tgsi_exec_channel *dst,
768 const union tgsi_exec_channel *src0,
769 const union tgsi_exec_channel *src1 )
770 {
771 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
772 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
773 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
774 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
775 }
776 #endif
777
778 static void
779 micro_min(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src0,
782 const union tgsi_exec_channel *src1 )
783 {
784 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
785 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
786 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
787 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
788 }
789
790 #if 0
791 static void
792 micro_imin(
793 union tgsi_exec_channel *dst,
794 const union tgsi_exec_channel *src0,
795 const union tgsi_exec_channel *src1 )
796 {
797 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
798 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
799 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
800 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
801 }
802 #endif
803
804 #if 0
805 static void
806 micro_umin(
807 union tgsi_exec_channel *dst,
808 const union tgsi_exec_channel *src0,
809 const union tgsi_exec_channel *src1 )
810 {
811 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
812 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
813 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
814 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
815 }
816 #endif
817
818 #if 0
819 static void
820 micro_umod(
821 union tgsi_exec_channel *dst,
822 const union tgsi_exec_channel *src0,
823 const union tgsi_exec_channel *src1 )
824 {
825 dst->u[0] = src0->u[0] % src1->u[0];
826 dst->u[1] = src0->u[1] % src1->u[1];
827 dst->u[2] = src0->u[2] % src1->u[2];
828 dst->u[3] = src0->u[3] % src1->u[3];
829 }
830 #endif
831
832 static void
833 micro_mul(
834 union tgsi_exec_channel *dst,
835 const union tgsi_exec_channel *src0,
836 const union tgsi_exec_channel *src1 )
837 {
838 dst->f[0] = src0->f[0] * src1->f[0];
839 dst->f[1] = src0->f[1] * src1->f[1];
840 dst->f[2] = src0->f[2] * src1->f[2];
841 dst->f[3] = src0->f[3] * src1->f[3];
842 }
843
844 #if 0
845 static void
846 micro_imul(
847 union tgsi_exec_channel *dst,
848 const union tgsi_exec_channel *src0,
849 const union tgsi_exec_channel *src1 )
850 {
851 dst->i[0] = src0->i[0] * src1->i[0];
852 dst->i[1] = src0->i[1] * src1->i[1];
853 dst->i[2] = src0->i[2] * src1->i[2];
854 dst->i[3] = src0->i[3] * src1->i[3];
855 }
856 #endif
857
858 #if 0
859 static void
860 micro_imul64(
861 union tgsi_exec_channel *dst0,
862 union tgsi_exec_channel *dst1,
863 const union tgsi_exec_channel *src0,
864 const union tgsi_exec_channel *src1 )
865 {
866 dst1->i[0] = src0->i[0] * src1->i[0];
867 dst1->i[1] = src0->i[1] * src1->i[1];
868 dst1->i[2] = src0->i[2] * src1->i[2];
869 dst1->i[3] = src0->i[3] * src1->i[3];
870 dst0->i[0] = 0;
871 dst0->i[1] = 0;
872 dst0->i[2] = 0;
873 dst0->i[3] = 0;
874 }
875 #endif
876
877 #if 0
878 static void
879 micro_umul64(
880 union tgsi_exec_channel *dst0,
881 union tgsi_exec_channel *dst1,
882 const union tgsi_exec_channel *src0,
883 const union tgsi_exec_channel *src1 )
884 {
885 dst1->u[0] = src0->u[0] * src1->u[0];
886 dst1->u[1] = src0->u[1] * src1->u[1];
887 dst1->u[2] = src0->u[2] * src1->u[2];
888 dst1->u[3] = src0->u[3] * src1->u[3];
889 dst0->u[0] = 0;
890 dst0->u[1] = 0;
891 dst0->u[2] = 0;
892 dst0->u[3] = 0;
893 }
894 #endif
895
896
897 #if 0
898 static void
899 micro_movc(
900 union tgsi_exec_channel *dst,
901 const union tgsi_exec_channel *src0,
902 const union tgsi_exec_channel *src1,
903 const union tgsi_exec_channel *src2 )
904 {
905 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
906 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
907 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
908 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
909 }
910 #endif
911
912 static void
913 micro_neg(
914 union tgsi_exec_channel *dst,
915 const union tgsi_exec_channel *src )
916 {
917 dst->f[0] = -src->f[0];
918 dst->f[1] = -src->f[1];
919 dst->f[2] = -src->f[2];
920 dst->f[3] = -src->f[3];
921 }
922
923 #if 0
924 static void
925 micro_ineg(
926 union tgsi_exec_channel *dst,
927 const union tgsi_exec_channel *src )
928 {
929 dst->i[0] = -src->i[0];
930 dst->i[1] = -src->i[1];
931 dst->i[2] = -src->i[2];
932 dst->i[3] = -src->i[3];
933 }
934 #endif
935
936 static void
937 micro_not(
938 union tgsi_exec_channel *dst,
939 const union tgsi_exec_channel *src )
940 {
941 dst->u[0] = ~src->u[0];
942 dst->u[1] = ~src->u[1];
943 dst->u[2] = ~src->u[2];
944 dst->u[3] = ~src->u[3];
945 }
946
947 static void
948 micro_or(
949 union tgsi_exec_channel *dst,
950 const union tgsi_exec_channel *src0,
951 const union tgsi_exec_channel *src1 )
952 {
953 dst->u[0] = src0->u[0] | src1->u[0];
954 dst->u[1] = src0->u[1] | src1->u[1];
955 dst->u[2] = src0->u[2] | src1->u[2];
956 dst->u[3] = src0->u[3] | src1->u[3];
957 }
958
959 static void
960 micro_pow(
961 union tgsi_exec_channel *dst,
962 const union tgsi_exec_channel *src0,
963 const union tgsi_exec_channel *src1 )
964 {
965 #if FAST_MATH
966 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
967 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
968 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
969 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
970 #else
971 dst->f[0] = powf( src0->f[0], src1->f[0] );
972 dst->f[1] = powf( src0->f[1], src1->f[1] );
973 dst->f[2] = powf( src0->f[2], src1->f[2] );
974 dst->f[3] = powf( src0->f[3], src1->f[3] );
975 #endif
976 }
977
978 static void
979 micro_rnd(
980 union tgsi_exec_channel *dst,
981 const union tgsi_exec_channel *src )
982 {
983 dst->f[0] = floorf( src->f[0] + 0.5f );
984 dst->f[1] = floorf( src->f[1] + 0.5f );
985 dst->f[2] = floorf( src->f[2] + 0.5f );
986 dst->f[3] = floorf( src->f[3] + 0.5f );
987 }
988
989 static void
990 micro_sgn(
991 union tgsi_exec_channel *dst,
992 const union tgsi_exec_channel *src )
993 {
994 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
995 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
996 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
997 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
998 }
999
1000 static void
1001 micro_shl(
1002 union tgsi_exec_channel *dst,
1003 const union tgsi_exec_channel *src0,
1004 const union tgsi_exec_channel *src1 )
1005 {
1006 dst->i[0] = src0->i[0] << src1->i[0];
1007 dst->i[1] = src0->i[1] << src1->i[1];
1008 dst->i[2] = src0->i[2] << src1->i[2];
1009 dst->i[3] = src0->i[3] << src1->i[3];
1010 }
1011
1012 static void
1013 micro_ishr(
1014 union tgsi_exec_channel *dst,
1015 const union tgsi_exec_channel *src0,
1016 const union tgsi_exec_channel *src1 )
1017 {
1018 dst->i[0] = src0->i[0] >> src1->i[0];
1019 dst->i[1] = src0->i[1] >> src1->i[1];
1020 dst->i[2] = src0->i[2] >> src1->i[2];
1021 dst->i[3] = src0->i[3] >> src1->i[3];
1022 }
1023
1024 static void
1025 micro_trunc(
1026 union tgsi_exec_channel *dst,
1027 const union tgsi_exec_channel *src0 )
1028 {
1029 dst->f[0] = (float) (int) src0->f[0];
1030 dst->f[1] = (float) (int) src0->f[1];
1031 dst->f[2] = (float) (int) src0->f[2];
1032 dst->f[3] = (float) (int) src0->f[3];
1033 }
1034
1035 #if 0
1036 static void
1037 micro_ushr(
1038 union tgsi_exec_channel *dst,
1039 const union tgsi_exec_channel *src0,
1040 const union tgsi_exec_channel *src1 )
1041 {
1042 dst->u[0] = src0->u[0] >> src1->u[0];
1043 dst->u[1] = src0->u[1] >> src1->u[1];
1044 dst->u[2] = src0->u[2] >> src1->u[2];
1045 dst->u[3] = src0->u[3] >> src1->u[3];
1046 }
1047 #endif
1048
1049 static void
1050 micro_sin(
1051 union tgsi_exec_channel *dst,
1052 const union tgsi_exec_channel *src )
1053 {
1054 dst->f[0] = sinf( src->f[0] );
1055 dst->f[1] = sinf( src->f[1] );
1056 dst->f[2] = sinf( src->f[2] );
1057 dst->f[3] = sinf( src->f[3] );
1058 }
1059
1060 static void
1061 micro_sqrt( union tgsi_exec_channel *dst,
1062 const union tgsi_exec_channel *src )
1063 {
1064 dst->f[0] = sqrtf( src->f[0] );
1065 dst->f[1] = sqrtf( src->f[1] );
1066 dst->f[2] = sqrtf( src->f[2] );
1067 dst->f[3] = sqrtf( src->f[3] );
1068 }
1069
1070 static void
1071 micro_sub(
1072 union tgsi_exec_channel *dst,
1073 const union tgsi_exec_channel *src0,
1074 const union tgsi_exec_channel *src1 )
1075 {
1076 dst->f[0] = src0->f[0] - src1->f[0];
1077 dst->f[1] = src0->f[1] - src1->f[1];
1078 dst->f[2] = src0->f[2] - src1->f[2];
1079 dst->f[3] = src0->f[3] - src1->f[3];
1080 }
1081
1082 #if 0
1083 static void
1084 micro_u2f(
1085 union tgsi_exec_channel *dst,
1086 const union tgsi_exec_channel *src )
1087 {
1088 dst->f[0] = (float) src->u[0];
1089 dst->f[1] = (float) src->u[1];
1090 dst->f[2] = (float) src->u[2];
1091 dst->f[3] = (float) src->u[3];
1092 }
1093 #endif
1094
1095 static void
1096 micro_xor(
1097 union tgsi_exec_channel *dst,
1098 const union tgsi_exec_channel *src0,
1099 const union tgsi_exec_channel *src1 )
1100 {
1101 dst->u[0] = src0->u[0] ^ src1->u[0];
1102 dst->u[1] = src0->u[1] ^ src1->u[1];
1103 dst->u[2] = src0->u[2] ^ src1->u[2];
1104 dst->u[3] = src0->u[3] ^ src1->u[3];
1105 }
1106
1107 static void
1108 fetch_src_file_channel(
1109 const struct tgsi_exec_machine *mach,
1110 const uint file,
1111 const uint swizzle,
1112 const union tgsi_exec_channel *index,
1113 union tgsi_exec_channel *chan )
1114 {
1115 switch( swizzle ) {
1116 case TGSI_SWIZZLE_X:
1117 case TGSI_SWIZZLE_Y:
1118 case TGSI_SWIZZLE_Z:
1119 case TGSI_SWIZZLE_W:
1120 switch( file ) {
1121 case TGSI_FILE_CONSTANT:
1122 assert(mach->Consts);
1123 if (index->i[0] < 0)
1124 chan->f[0] = 0.0f;
1125 else
1126 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1127 if (index->i[1] < 0)
1128 chan->f[1] = 0.0f;
1129 else
1130 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1131 if (index->i[2] < 0)
1132 chan->f[2] = 0.0f;
1133 else
1134 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1135 if (index->i[3] < 0)
1136 chan->f[3] = 0.0f;
1137 else
1138 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1139 break;
1140
1141 case TGSI_FILE_INPUT:
1142 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1143 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1144 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1145 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1146 break;
1147
1148 case TGSI_FILE_TEMPORARY:
1149 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1150 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1151 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1152 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1153 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1154 break;
1155
1156 case TGSI_FILE_IMMEDIATE:
1157 assert( index->i[0] < (int) mach->ImmLimit );
1158 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1159 assert( index->i[1] < (int) mach->ImmLimit );
1160 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1161 assert( index->i[2] < (int) mach->ImmLimit );
1162 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1163 assert( index->i[3] < (int) mach->ImmLimit );
1164 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1165 break;
1166
1167 case TGSI_FILE_ADDRESS:
1168 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1169 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1170 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1171 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1172 break;
1173
1174 case TGSI_FILE_PREDICATE:
1175 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1176 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1177 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1178 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1179 chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
1180 chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
1181 chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
1182 chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
1183 break;
1184
1185 case TGSI_FILE_OUTPUT:
1186 /* vertex/fragment output vars can be read too */
1187 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1188 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1189 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1190 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1191 break;
1192
1193 default:
1194 assert( 0 );
1195 }
1196 break;
1197
1198 default:
1199 assert( 0 );
1200 }
1201 }
1202
1203 static void
1204 fetch_source(
1205 const struct tgsi_exec_machine *mach,
1206 union tgsi_exec_channel *chan,
1207 const struct tgsi_full_src_register *reg,
1208 const uint chan_index )
1209 {
1210 union tgsi_exec_channel index;
1211 uint swizzle;
1212
1213 /* We start with a direct index into a register file.
1214 *
1215 * file[1],
1216 * where:
1217 * file = SrcRegister.File
1218 * [1] = SrcRegister.Index
1219 */
1220 index.i[0] =
1221 index.i[1] =
1222 index.i[2] =
1223 index.i[3] = reg->SrcRegister.Index;
1224
1225 /* There is an extra source register that indirectly subscripts
1226 * a register file. The direct index now becomes an offset
1227 * that is being added to the indirect register.
1228 *
1229 * file[ind[2].x+1],
1230 * where:
1231 * ind = SrcRegisterInd.File
1232 * [2] = SrcRegisterInd.Index
1233 * .x = SrcRegisterInd.SwizzleX
1234 */
1235 if (reg->SrcRegister.Indirect) {
1236 union tgsi_exec_channel index2;
1237 union tgsi_exec_channel indir_index;
1238 const uint execmask = mach->ExecMask;
1239 uint i;
1240
1241 /* which address register (always zero now) */
1242 index2.i[0] =
1243 index2.i[1] =
1244 index2.i[2] =
1245 index2.i[3] = reg->SrcRegisterInd.Index;
1246
1247 /* get current value of address register[swizzle] */
1248 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1249 fetch_src_file_channel(
1250 mach,
1251 reg->SrcRegisterInd.File,
1252 swizzle,
1253 &index2,
1254 &indir_index );
1255
1256 /* add value of address register to the offset */
1257 index.i[0] += (int) indir_index.f[0];
1258 index.i[1] += (int) indir_index.f[1];
1259 index.i[2] += (int) indir_index.f[2];
1260 index.i[3] += (int) indir_index.f[3];
1261
1262 /* for disabled execution channels, zero-out the index to
1263 * avoid using a potential garbage value.
1264 */
1265 for (i = 0; i < QUAD_SIZE; i++) {
1266 if ((execmask & (1 << i)) == 0)
1267 index.i[i] = 0;
1268 }
1269 }
1270
1271 /* There is an extra source register that is a second
1272 * subscript to a register file. Effectively it means that
1273 * the register file is actually a 2D array of registers.
1274 *
1275 * file[1][3] == file[1*sizeof(file[1])+3],
1276 * where:
1277 * [3] = SrcRegisterDim.Index
1278 */
1279 if (reg->SrcRegister.Dimension) {
1280 /* The size of the first-order array depends on the register file type.
1281 * We need to multiply the index to the first array to get an effective,
1282 * "flat" index that points to the beginning of the second-order array.
1283 */
1284 switch (reg->SrcRegister.File) {
1285 case TGSI_FILE_INPUT:
1286 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1287 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1288 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1289 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1290 break;
1291 case TGSI_FILE_CONSTANT:
1292 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1293 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1294 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1295 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1296 break;
1297 default:
1298 assert( 0 );
1299 }
1300
1301 index.i[0] += reg->SrcRegisterDim.Index;
1302 index.i[1] += reg->SrcRegisterDim.Index;
1303 index.i[2] += reg->SrcRegisterDim.Index;
1304 index.i[3] += reg->SrcRegisterDim.Index;
1305
1306 /* Again, the second subscript index can be addressed indirectly
1307 * identically to the first one.
1308 * Nothing stops us from indirectly addressing the indirect register,
1309 * but there is no need for that, so we won't exercise it.
1310 *
1311 * file[1][ind[4].y+3],
1312 * where:
1313 * ind = SrcRegisterDimInd.File
1314 * [4] = SrcRegisterDimInd.Index
1315 * .y = SrcRegisterDimInd.SwizzleX
1316 */
1317 if (reg->SrcRegisterDim.Indirect) {
1318 union tgsi_exec_channel index2;
1319 union tgsi_exec_channel indir_index;
1320 const uint execmask = mach->ExecMask;
1321 uint i;
1322
1323 index2.i[0] =
1324 index2.i[1] =
1325 index2.i[2] =
1326 index2.i[3] = reg->SrcRegisterDimInd.Index;
1327
1328 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1329 fetch_src_file_channel(
1330 mach,
1331 reg->SrcRegisterDimInd.File,
1332 swizzle,
1333 &index2,
1334 &indir_index );
1335
1336 index.i[0] += (int) indir_index.f[0];
1337 index.i[1] += (int) indir_index.f[1];
1338 index.i[2] += (int) indir_index.f[2];
1339 index.i[3] += (int) indir_index.f[3];
1340
1341 /* for disabled execution channels, zero-out the index to
1342 * avoid using a potential garbage value.
1343 */
1344 for (i = 0; i < QUAD_SIZE; i++) {
1345 if ((execmask & (1 << i)) == 0)
1346 index.i[i] = 0;
1347 }
1348 }
1349
1350 /* If by any chance there was a need for a 3D array of register
1351 * files, we would have to check whether SrcRegisterDim is followed
1352 * by a dimension register and continue the saga.
1353 */
1354 }
1355
1356 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1357 fetch_src_file_channel(
1358 mach,
1359 reg->SrcRegister.File,
1360 swizzle,
1361 &index,
1362 chan );
1363
1364 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1365 case TGSI_UTIL_SIGN_CLEAR:
1366 micro_abs( chan, chan );
1367 break;
1368
1369 case TGSI_UTIL_SIGN_SET:
1370 micro_abs( chan, chan );
1371 micro_neg( chan, chan );
1372 break;
1373
1374 case TGSI_UTIL_SIGN_TOGGLE:
1375 micro_neg( chan, chan );
1376 break;
1377
1378 case TGSI_UTIL_SIGN_KEEP:
1379 break;
1380 }
1381
1382 if (reg->SrcRegisterExtMod.Complement) {
1383 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1384 }
1385 }
1386
1387 static void
1388 store_dest(
1389 struct tgsi_exec_machine *mach,
1390 const union tgsi_exec_channel *chan,
1391 const struct tgsi_full_dst_register *reg,
1392 const struct tgsi_full_instruction *inst,
1393 uint chan_index )
1394 {
1395 uint i;
1396 union tgsi_exec_channel null;
1397 union tgsi_exec_channel *dst;
1398 uint execmask = mach->ExecMask;
1399 int offset = 0; /* indirection offset */
1400 int index;
1401
1402 #ifdef DEBUG
1403 check_inf_or_nan(chan);
1404 #endif
1405
1406 /* There is an extra source register that indirectly subscripts
1407 * a register file. The direct index now becomes an offset
1408 * that is being added to the indirect register.
1409 *
1410 * file[ind[2].x+1],
1411 * where:
1412 * ind = DstRegisterInd.File
1413 * [2] = DstRegisterInd.Index
1414 * .x = DstRegisterInd.SwizzleX
1415 */
1416 if (reg->DstRegister.Indirect) {
1417 union tgsi_exec_channel index;
1418 union tgsi_exec_channel indir_index;
1419 uint swizzle;
1420
1421 /* which address register (always zero for now) */
1422 index.i[0] =
1423 index.i[1] =
1424 index.i[2] =
1425 index.i[3] = reg->DstRegisterInd.Index;
1426
1427 /* get current value of address register[swizzle] */
1428 swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
1429
1430 /* fetch values from the address/indirection register */
1431 fetch_src_file_channel(
1432 mach,
1433 reg->DstRegisterInd.File,
1434 swizzle,
1435 &index,
1436 &indir_index );
1437
1438 /* save indirection offset */
1439 offset = (int) indir_index.f[0];
1440 }
1441
1442 switch (reg->DstRegister.File) {
1443 case TGSI_FILE_NULL:
1444 dst = &null;
1445 break;
1446
1447 case TGSI_FILE_OUTPUT:
1448 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1449 + reg->DstRegister.Index;
1450 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1451 break;
1452
1453 case TGSI_FILE_TEMPORARY:
1454 index = reg->DstRegister.Index;
1455 assert( index < TGSI_EXEC_NUM_TEMPS );
1456 dst = &mach->Temps[offset + index].xyzw[chan_index];
1457 break;
1458
1459 case TGSI_FILE_ADDRESS:
1460 index = reg->DstRegister.Index;
1461 dst = &mach->Addrs[index].xyzw[chan_index];
1462 break;
1463
1464 case TGSI_FILE_LOOP:
1465 assert(reg->DstRegister.Index == 0);
1466 assert(mach->LoopCounterStackTop > 0);
1467 assert(chan_index == CHAN_X);
1468 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1469 break;
1470
1471 case TGSI_FILE_PREDICATE:
1472 index = reg->DstRegister.Index;
1473 assert(index < TGSI_EXEC_NUM_PREDS);
1474 dst = &mach->Addrs[index].xyzw[chan_index];
1475 break;
1476
1477 default:
1478 assert( 0 );
1479 return;
1480 }
1481
1482 switch (inst->Instruction.Saturate) {
1483 case TGSI_SAT_NONE:
1484 for (i = 0; i < QUAD_SIZE; i++)
1485 if (execmask & (1 << i))
1486 dst->i[i] = chan->i[i];
1487 break;
1488
1489 case TGSI_SAT_ZERO_ONE:
1490 for (i = 0; i < QUAD_SIZE; i++)
1491 if (execmask & (1 << i)) {
1492 if (chan->f[i] < 0.0f)
1493 dst->f[i] = 0.0f;
1494 else if (chan->f[i] > 1.0f)
1495 dst->f[i] = 1.0f;
1496 else
1497 dst->i[i] = chan->i[i];
1498 }
1499 break;
1500
1501 case TGSI_SAT_MINUS_PLUS_ONE:
1502 for (i = 0; i < QUAD_SIZE; i++)
1503 if (execmask & (1 << i)) {
1504 if (chan->f[i] < -1.0f)
1505 dst->f[i] = -1.0f;
1506 else if (chan->f[i] > 1.0f)
1507 dst->f[i] = 1.0f;
1508 else
1509 dst->i[i] = chan->i[i];
1510 }
1511 break;
1512
1513 default:
1514 assert( 0 );
1515 }
1516 }
1517
1518 #define FETCH(VAL,INDEX,CHAN)\
1519 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1520
1521 #define STORE(VAL,INDEX,CHAN)\
1522 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1523
1524
1525 /**
1526 * Execute ARB-style KIL which is predicated by a src register.
1527 * Kill fragment if any of the four values is less than zero.
1528 */
1529 static void
1530 exec_kil(struct tgsi_exec_machine *mach,
1531 const struct tgsi_full_instruction *inst)
1532 {
1533 uint uniquemask;
1534 uint chan_index;
1535 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1536 union tgsi_exec_channel r[1];
1537
1538 /* This mask stores component bits that were already tested. */
1539 uniquemask = 0;
1540
1541 for (chan_index = 0; chan_index < 4; chan_index++)
1542 {
1543 uint swizzle;
1544 uint i;
1545
1546 /* unswizzle channel */
1547 swizzle = tgsi_util_get_full_src_register_swizzle (
1548 &inst->FullSrcRegisters[0],
1549 chan_index);
1550
1551 /* check if the component has not been already tested */
1552 if (uniquemask & (1 << swizzle))
1553 continue;
1554 uniquemask |= 1 << swizzle;
1555
1556 FETCH(&r[0], 0, chan_index);
1557 for (i = 0; i < 4; i++)
1558 if (r[0].f[i] < 0.0f)
1559 kilmask |= 1 << i;
1560 }
1561
1562 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1563 }
1564
1565 /**
1566 * Execute NVIDIA-style KIL which is predicated by a condition code.
1567 * Kill fragment if the condition code is TRUE.
1568 */
1569 static void
1570 exec_kilp(struct tgsi_exec_machine *mach,
1571 const struct tgsi_full_instruction *inst)
1572 {
1573 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1574
1575 /* "unconditional" kil */
1576 kilmask = mach->ExecMask;
1577 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1578 }
1579
1580
1581 /*
1582 * Fetch a four texture samples using STR texture coordinates.
1583 */
1584 static void
1585 fetch_texel( struct tgsi_sampler *sampler,
1586 const union tgsi_exec_channel *s,
1587 const union tgsi_exec_channel *t,
1588 const union tgsi_exec_channel *p,
1589 float lodbias, /* XXX should be float[4] */
1590 union tgsi_exec_channel *r,
1591 union tgsi_exec_channel *g,
1592 union tgsi_exec_channel *b,
1593 union tgsi_exec_channel *a )
1594 {
1595 uint j;
1596 float rgba[NUM_CHANNELS][QUAD_SIZE];
1597
1598 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1599
1600 for (j = 0; j < 4; j++) {
1601 r->f[j] = rgba[0][j];
1602 g->f[j] = rgba[1][j];
1603 b->f[j] = rgba[2][j];
1604 a->f[j] = rgba[3][j];
1605 }
1606 }
1607
1608
1609 static void
1610 exec_tex(struct tgsi_exec_machine *mach,
1611 const struct tgsi_full_instruction *inst,
1612 boolean biasLod,
1613 boolean projected)
1614 {
1615 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1616 union tgsi_exec_channel r[4];
1617 uint chan_index;
1618 float lodBias;
1619
1620 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1621
1622 switch (inst->InstructionExtTexture.Texture) {
1623 case TGSI_TEXTURE_1D:
1624 case TGSI_TEXTURE_SHADOW1D:
1625
1626 FETCH(&r[0], 0, CHAN_X);
1627
1628 if (projected) {
1629 FETCH(&r[1], 0, CHAN_W);
1630 micro_div( &r[0], &r[0], &r[1] );
1631 }
1632
1633 if (biasLod) {
1634 FETCH(&r[1], 0, CHAN_W);
1635 lodBias = r[2].f[0];
1636 }
1637 else
1638 lodBias = 0.0;
1639
1640 fetch_texel(mach->Samplers[unit],
1641 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1642 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1643 break;
1644
1645 case TGSI_TEXTURE_2D:
1646 case TGSI_TEXTURE_RECT:
1647 case TGSI_TEXTURE_SHADOW2D:
1648 case TGSI_TEXTURE_SHADOWRECT:
1649
1650 FETCH(&r[0], 0, CHAN_X);
1651 FETCH(&r[1], 0, CHAN_Y);
1652 FETCH(&r[2], 0, CHAN_Z);
1653
1654 if (projected) {
1655 FETCH(&r[3], 0, CHAN_W);
1656 micro_div( &r[0], &r[0], &r[3] );
1657 micro_div( &r[1], &r[1], &r[3] );
1658 micro_div( &r[2], &r[2], &r[3] );
1659 }
1660
1661 if (biasLod) {
1662 FETCH(&r[3], 0, CHAN_W);
1663 lodBias = r[3].f[0];
1664 }
1665 else
1666 lodBias = 0.0;
1667
1668 fetch_texel(mach->Samplers[unit],
1669 &r[0], &r[1], &r[2], lodBias, /* inputs */
1670 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1671 break;
1672
1673 case TGSI_TEXTURE_3D:
1674 case TGSI_TEXTURE_CUBE:
1675
1676 FETCH(&r[0], 0, CHAN_X);
1677 FETCH(&r[1], 0, CHAN_Y);
1678 FETCH(&r[2], 0, CHAN_Z);
1679
1680 if (projected) {
1681 FETCH(&r[3], 0, CHAN_W);
1682 micro_div( &r[0], &r[0], &r[3] );
1683 micro_div( &r[1], &r[1], &r[3] );
1684 micro_div( &r[2], &r[2], &r[3] );
1685 }
1686
1687 if (biasLod) {
1688 FETCH(&r[3], 0, CHAN_W);
1689 lodBias = r[3].f[0];
1690 }
1691 else
1692 lodBias = 0.0;
1693
1694 fetch_texel(mach->Samplers[unit],
1695 &r[0], &r[1], &r[2], lodBias,
1696 &r[0], &r[1], &r[2], &r[3]);
1697 break;
1698
1699 default:
1700 assert (0);
1701 }
1702
1703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1704 STORE( &r[chan_index], 0, chan_index );
1705 }
1706 }
1707
1708 static void
1709 exec_txd(struct tgsi_exec_machine *mach,
1710 const struct tgsi_full_instruction *inst)
1711 {
1712 const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
1713 union tgsi_exec_channel r[4];
1714 uint chan_index;
1715
1716 /*
1717 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1718 */
1719
1720 switch (inst->InstructionExtTexture.Texture) {
1721 case TGSI_TEXTURE_1D:
1722 case TGSI_TEXTURE_SHADOW1D:
1723
1724 FETCH(&r[0], 0, CHAN_X);
1725
1726 fetch_texel(mach->Samplers[unit],
1727 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1728 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1729 break;
1730
1731 case TGSI_TEXTURE_2D:
1732 case TGSI_TEXTURE_RECT:
1733 case TGSI_TEXTURE_SHADOW2D:
1734 case TGSI_TEXTURE_SHADOWRECT:
1735
1736 FETCH(&r[0], 0, CHAN_X);
1737 FETCH(&r[1], 0, CHAN_Y);
1738 FETCH(&r[2], 0, CHAN_Z);
1739
1740 fetch_texel(mach->Samplers[unit],
1741 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1742 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1743 break;
1744
1745 case TGSI_TEXTURE_3D:
1746 case TGSI_TEXTURE_CUBE:
1747
1748 FETCH(&r[0], 0, CHAN_X);
1749 FETCH(&r[1], 0, CHAN_Y);
1750 FETCH(&r[2], 0, CHAN_Z);
1751
1752 fetch_texel(mach->Samplers[unit],
1753 &r[0], &r[1], &r[2], 0.0f,
1754 &r[0], &r[1], &r[2], &r[3]);
1755 break;
1756
1757 default:
1758 assert(0);
1759 }
1760
1761 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1762 STORE(&r[chan_index], 0, chan_index);
1763 }
1764 }
1765
1766
1767 /**
1768 * Evaluate a constant-valued coefficient at the position of the
1769 * current quad.
1770 */
1771 static void
1772 eval_constant_coef(
1773 struct tgsi_exec_machine *mach,
1774 unsigned attrib,
1775 unsigned chan )
1776 {
1777 unsigned i;
1778
1779 for( i = 0; i < QUAD_SIZE; i++ ) {
1780 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1781 }
1782 }
1783
1784 /**
1785 * Evaluate a linear-valued coefficient at the position of the
1786 * current quad.
1787 */
1788 static void
1789 eval_linear_coef(
1790 struct tgsi_exec_machine *mach,
1791 unsigned attrib,
1792 unsigned chan )
1793 {
1794 const float x = mach->QuadPos.xyzw[0].f[0];
1795 const float y = mach->QuadPos.xyzw[1].f[0];
1796 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1797 const float dady = mach->InterpCoefs[attrib].dady[chan];
1798 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1799 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1800 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1801 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1802 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1803 }
1804
1805 /**
1806 * Evaluate a perspective-valued coefficient at the position of the
1807 * current quad.
1808 */
1809 static void
1810 eval_perspective_coef(
1811 struct tgsi_exec_machine *mach,
1812 unsigned attrib,
1813 unsigned chan )
1814 {
1815 const float x = mach->QuadPos.xyzw[0].f[0];
1816 const float y = mach->QuadPos.xyzw[1].f[0];
1817 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1818 const float dady = mach->InterpCoefs[attrib].dady[chan];
1819 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1820 const float *w = mach->QuadPos.xyzw[3].f;
1821 /* divide by W here */
1822 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1823 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1824 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1825 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1826 }
1827
1828
1829 typedef void (* eval_coef_func)(
1830 struct tgsi_exec_machine *mach,
1831 unsigned attrib,
1832 unsigned chan );
1833
1834 static void
1835 exec_declaration(
1836 struct tgsi_exec_machine *mach,
1837 const struct tgsi_full_declaration *decl )
1838 {
1839 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1840 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1841 unsigned first, last, mask;
1842 eval_coef_func eval;
1843
1844 first = decl->DeclarationRange.First;
1845 last = decl->DeclarationRange.Last;
1846 mask = decl->Declaration.UsageMask;
1847
1848 switch( decl->Declaration.Interpolate ) {
1849 case TGSI_INTERPOLATE_CONSTANT:
1850 eval = eval_constant_coef;
1851 break;
1852
1853 case TGSI_INTERPOLATE_LINEAR:
1854 eval = eval_linear_coef;
1855 break;
1856
1857 case TGSI_INTERPOLATE_PERSPECTIVE:
1858 eval = eval_perspective_coef;
1859 break;
1860
1861 default:
1862 assert( 0 );
1863 return;
1864 }
1865
1866 if( mask == TGSI_WRITEMASK_XYZW ) {
1867 unsigned i, j;
1868
1869 for( i = first; i <= last; i++ ) {
1870 for( j = 0; j < NUM_CHANNELS; j++ ) {
1871 eval( mach, i, j );
1872 }
1873 }
1874 }
1875 else {
1876 unsigned i, j;
1877
1878 for( j = 0; j < NUM_CHANNELS; j++ ) {
1879 if( mask & (1 << j) ) {
1880 for( i = first; i <= last; i++ ) {
1881 eval( mach, i, j );
1882 }
1883 }
1884 }
1885 }
1886 }
1887 }
1888 }
1889
1890 static void
1891 exec_instruction(
1892 struct tgsi_exec_machine *mach,
1893 const struct tgsi_full_instruction *inst,
1894 int *pc )
1895 {
1896 uint chan_index;
1897 union tgsi_exec_channel r[10];
1898 union tgsi_exec_channel d[8];
1899
1900 (*pc)++;
1901
1902 switch (inst->Instruction.Opcode) {
1903 case TGSI_OPCODE_ARL:
1904 case TGSI_OPCODE_FLR:
1905 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1906 FETCH( &r[0], 0, chan_index );
1907 micro_flr(&d[chan_index], &r[0]);
1908 }
1909 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1910 STORE(&d[chan_index], 0, chan_index);
1911 }
1912 break;
1913
1914 case TGSI_OPCODE_MOV:
1915 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1916 FETCH(&d[chan_index], 0, chan_index);
1917 }
1918 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1919 STORE(&d[chan_index], 0, chan_index);
1920 }
1921 break;
1922
1923 case TGSI_OPCODE_LIT:
1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1925 FETCH( &r[0], 0, CHAN_X );
1926 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1927 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1928 }
1929
1930 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1931 FETCH( &r[1], 0, CHAN_Y );
1932 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1933
1934 FETCH( &r[2], 0, CHAN_W );
1935 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1936 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1937 micro_pow( &r[1], &r[1], &r[2] );
1938 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1939 }
1940
1941 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1942 STORE(&d[CHAN_Y], 0, CHAN_Y);
1943 }
1944 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1945 STORE(&d[CHAN_Z], 0, CHAN_Z);
1946 }
1947 }
1948 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1949 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1950 }
1951 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1952 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1953 }
1954 break;
1955
1956 case TGSI_OPCODE_RCP:
1957 /* TGSI_OPCODE_RECIP */
1958 FETCH( &r[0], 0, CHAN_X );
1959 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1960 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1961 STORE( &r[0], 0, chan_index );
1962 }
1963 break;
1964
1965 case TGSI_OPCODE_RSQ:
1966 /* TGSI_OPCODE_RECIPSQRT */
1967 FETCH( &r[0], 0, CHAN_X );
1968 micro_abs( &r[0], &r[0] );
1969 micro_sqrt( &r[0], &r[0] );
1970 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1972 STORE( &r[0], 0, chan_index );
1973 }
1974 break;
1975
1976 case TGSI_OPCODE_EXP:
1977 FETCH( &r[0], 0, CHAN_X );
1978 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1979 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1980 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1981 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1982 }
1983 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1984 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1985 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1986 }
1987 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1988 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1989 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1990 }
1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1992 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1993 }
1994 break;
1995
1996 case TGSI_OPCODE_LOG:
1997 FETCH( &r[0], 0, CHAN_X );
1998 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1999 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2000 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2001 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2002 STORE( &r[0], 0, CHAN_X );
2003 }
2004 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2005 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2006 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2007 STORE( &r[0], 0, CHAN_Y );
2008 }
2009 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2010 STORE( &r[1], 0, CHAN_Z );
2011 }
2012 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2013 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2014 }
2015 break;
2016
2017 case TGSI_OPCODE_MUL:
2018 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2019 FETCH(&r[0], 0, chan_index);
2020 FETCH(&r[1], 1, chan_index);
2021 micro_mul(&d[chan_index], &r[0], &r[1]);
2022 }
2023 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2024 STORE(&d[chan_index], 0, chan_index);
2025 }
2026 break;
2027
2028 case TGSI_OPCODE_ADD:
2029 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2030 FETCH( &r[0], 0, chan_index );
2031 FETCH( &r[1], 1, chan_index );
2032 micro_add(&d[chan_index], &r[0], &r[1]);
2033 }
2034 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2035 STORE(&d[chan_index], 0, chan_index);
2036 }
2037 break;
2038
2039 case TGSI_OPCODE_DP3:
2040 /* TGSI_OPCODE_DOT3 */
2041 FETCH( &r[0], 0, CHAN_X );
2042 FETCH( &r[1], 1, CHAN_X );
2043 micro_mul( &r[0], &r[0], &r[1] );
2044
2045 FETCH( &r[1], 0, CHAN_Y );
2046 FETCH( &r[2], 1, CHAN_Y );
2047 micro_mul( &r[1], &r[1], &r[2] );
2048 micro_add( &r[0], &r[0], &r[1] );
2049
2050 FETCH( &r[1], 0, CHAN_Z );
2051 FETCH( &r[2], 1, CHAN_Z );
2052 micro_mul( &r[1], &r[1], &r[2] );
2053 micro_add( &r[0], &r[0], &r[1] );
2054
2055 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2056 STORE( &r[0], 0, chan_index );
2057 }
2058 break;
2059
2060 case TGSI_OPCODE_DP4:
2061 /* TGSI_OPCODE_DOT4 */
2062 FETCH(&r[0], 0, CHAN_X);
2063 FETCH(&r[1], 1, CHAN_X);
2064
2065 micro_mul( &r[0], &r[0], &r[1] );
2066
2067 FETCH(&r[1], 0, CHAN_Y);
2068 FETCH(&r[2], 1, CHAN_Y);
2069
2070 micro_mul( &r[1], &r[1], &r[2] );
2071 micro_add( &r[0], &r[0], &r[1] );
2072
2073 FETCH(&r[1], 0, CHAN_Z);
2074 FETCH(&r[2], 1, CHAN_Z);
2075
2076 micro_mul( &r[1], &r[1], &r[2] );
2077 micro_add( &r[0], &r[0], &r[1] );
2078
2079 FETCH(&r[1], 0, CHAN_W);
2080 FETCH(&r[2], 1, CHAN_W);
2081
2082 micro_mul( &r[1], &r[1], &r[2] );
2083 micro_add( &r[0], &r[0], &r[1] );
2084
2085 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2086 STORE( &r[0], 0, chan_index );
2087 }
2088 break;
2089
2090 case TGSI_OPCODE_DST:
2091 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2092 FETCH( &r[0], 0, CHAN_Y );
2093 FETCH( &r[1], 1, CHAN_Y);
2094 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2095 }
2096 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2097 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2098 }
2099 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2100 FETCH(&d[CHAN_W], 1, CHAN_W);
2101 }
2102
2103 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2104 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2105 }
2106 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2107 STORE(&d[CHAN_Y], 0, CHAN_Y);
2108 }
2109 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2110 STORE(&d[CHAN_Z], 0, CHAN_Z);
2111 }
2112 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2113 STORE(&d[CHAN_W], 0, CHAN_W);
2114 }
2115 break;
2116
2117 case TGSI_OPCODE_MIN:
2118 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2119 FETCH(&r[0], 0, chan_index);
2120 FETCH(&r[1], 1, chan_index);
2121
2122 /* XXX use micro_min()?? */
2123 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2124 }
2125 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2126 STORE(&d[chan_index], 0, chan_index);
2127 }
2128 break;
2129
2130 case TGSI_OPCODE_MAX:
2131 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2132 FETCH(&r[0], 0, chan_index);
2133 FETCH(&r[1], 1, chan_index);
2134
2135 /* XXX use micro_max()?? */
2136 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2137 }
2138 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2139 STORE(&d[chan_index], 0, chan_index);
2140 }
2141 break;
2142
2143 case TGSI_OPCODE_SLT:
2144 /* TGSI_OPCODE_SETLT */
2145 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2146 FETCH( &r[0], 0, chan_index );
2147 FETCH( &r[1], 1, chan_index );
2148 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2149 }
2150 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2151 STORE(&d[chan_index], 0, chan_index);
2152 }
2153 break;
2154
2155 case TGSI_OPCODE_SGE:
2156 /* TGSI_OPCODE_SETGE */
2157 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2158 FETCH( &r[0], 0, chan_index );
2159 FETCH( &r[1], 1, chan_index );
2160 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2161 }
2162 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2163 STORE(&d[chan_index], 0, chan_index);
2164 }
2165 break;
2166
2167 case TGSI_OPCODE_MAD:
2168 /* TGSI_OPCODE_MADD */
2169 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2170 FETCH( &r[0], 0, chan_index );
2171 FETCH( &r[1], 1, chan_index );
2172 micro_mul( &r[0], &r[0], &r[1] );
2173 FETCH( &r[1], 2, chan_index );
2174 micro_add(&d[chan_index], &r[0], &r[1]);
2175 }
2176 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2177 STORE(&d[chan_index], 0, chan_index);
2178 }
2179 break;
2180
2181 case TGSI_OPCODE_SUB:
2182 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2183 FETCH(&r[0], 0, chan_index);
2184 FETCH(&r[1], 1, chan_index);
2185 micro_sub(&d[chan_index], &r[0], &r[1]);
2186 }
2187 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2188 STORE(&d[chan_index], 0, chan_index);
2189 }
2190 break;
2191
2192 case TGSI_OPCODE_LRP:
2193 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2194 FETCH(&r[0], 0, chan_index);
2195 FETCH(&r[1], 1, chan_index);
2196 FETCH(&r[2], 2, chan_index);
2197 micro_sub( &r[1], &r[1], &r[2] );
2198 micro_mul( &r[0], &r[0], &r[1] );
2199 micro_add(&d[chan_index], &r[0], &r[2]);
2200 }
2201 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2202 STORE(&d[chan_index], 0, chan_index);
2203 }
2204 break;
2205
2206 case TGSI_OPCODE_CND:
2207 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2208 FETCH(&r[0], 0, chan_index);
2209 FETCH(&r[1], 1, chan_index);
2210 FETCH(&r[2], 2, chan_index);
2211 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2212 }
2213 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2214 STORE(&d[chan_index], 0, chan_index);
2215 }
2216 break;
2217
2218 case TGSI_OPCODE_DP2A:
2219 FETCH( &r[0], 0, CHAN_X );
2220 FETCH( &r[1], 1, CHAN_X );
2221 micro_mul( &r[0], &r[0], &r[1] );
2222
2223 FETCH( &r[1], 0, CHAN_Y );
2224 FETCH( &r[2], 1, CHAN_Y );
2225 micro_mul( &r[1], &r[1], &r[2] );
2226 micro_add( &r[0], &r[0], &r[1] );
2227
2228 FETCH( &r[2], 2, CHAN_X );
2229 micro_add( &r[0], &r[0], &r[2] );
2230
2231 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2232 STORE( &r[0], 0, chan_index );
2233 }
2234 break;
2235
2236 case TGSI_OPCODE_FRC:
2237 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2238 FETCH( &r[0], 0, chan_index );
2239 micro_frc(&d[chan_index], &r[0]);
2240 }
2241 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2242 STORE(&d[chan_index], 0, chan_index);
2243 }
2244 break;
2245
2246 case TGSI_OPCODE_CLAMP:
2247 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2248 FETCH(&r[0], 0, chan_index);
2249 FETCH(&r[1], 1, chan_index);
2250 micro_max(&r[0], &r[0], &r[1]);
2251 FETCH(&r[1], 2, chan_index);
2252 micro_min(&d[chan_index], &r[0], &r[1]);
2253 }
2254 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2255 STORE(&d[chan_index], 0, chan_index);
2256 }
2257 break;
2258
2259 case TGSI_OPCODE_ROUND:
2260 case TGSI_OPCODE_ARR:
2261 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2262 FETCH( &r[0], 0, chan_index );
2263 micro_rnd(&d[chan_index], &r[0]);
2264 }
2265 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2266 STORE(&d[chan_index], 0, chan_index);
2267 }
2268 break;
2269
2270 case TGSI_OPCODE_EX2:
2271 FETCH(&r[0], 0, CHAN_X);
2272
2273 #if FAST_MATH
2274 micro_exp2( &r[0], &r[0] );
2275 #else
2276 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2277 #endif
2278
2279 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2280 STORE( &r[0], 0, chan_index );
2281 }
2282 break;
2283
2284 case TGSI_OPCODE_LG2:
2285 FETCH( &r[0], 0, CHAN_X );
2286 micro_lg2( &r[0], &r[0] );
2287 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2288 STORE( &r[0], 0, chan_index );
2289 }
2290 break;
2291
2292 case TGSI_OPCODE_POW:
2293 FETCH(&r[0], 0, CHAN_X);
2294 FETCH(&r[1], 1, CHAN_X);
2295
2296 micro_pow( &r[0], &r[0], &r[1] );
2297
2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2299 STORE( &r[0], 0, chan_index );
2300 }
2301 break;
2302
2303 case TGSI_OPCODE_XPD:
2304 FETCH(&r[0], 0, CHAN_Y);
2305 FETCH(&r[1], 1, CHAN_Z);
2306
2307 micro_mul( &r[2], &r[0], &r[1] );
2308
2309 FETCH(&r[3], 0, CHAN_Z);
2310 FETCH(&r[4], 1, CHAN_Y);
2311
2312 micro_mul( &r[5], &r[3], &r[4] );
2313 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2314
2315 FETCH(&r[2], 1, CHAN_X);
2316
2317 micro_mul( &r[3], &r[3], &r[2] );
2318
2319 FETCH(&r[5], 0, CHAN_X);
2320
2321 micro_mul( &r[1], &r[1], &r[5] );
2322 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2323
2324 micro_mul( &r[5], &r[5], &r[4] );
2325 micro_mul( &r[0], &r[0], &r[2] );
2326 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2327
2328 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2329 STORE(&d[CHAN_X], 0, CHAN_X);
2330 }
2331 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2332 STORE(&d[CHAN_Y], 0, CHAN_Y);
2333 }
2334 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2335 STORE(&d[CHAN_Z], 0, CHAN_Z);
2336 }
2337 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2338 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2339 }
2340 break;
2341
2342 case TGSI_OPCODE_ABS:
2343 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2344 FETCH(&r[0], 0, chan_index);
2345 micro_abs(&d[chan_index], &r[0]);
2346 }
2347 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2348 STORE(&d[chan_index], 0, chan_index);
2349 }
2350 break;
2351
2352 case TGSI_OPCODE_RCC:
2353 FETCH(&r[0], 0, CHAN_X);
2354 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2355 micro_float_clamp(&r[0], &r[0]);
2356 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2357 STORE(&r[0], 0, chan_index);
2358 }
2359 break;
2360
2361 case TGSI_OPCODE_DPH:
2362 FETCH(&r[0], 0, CHAN_X);
2363 FETCH(&r[1], 1, CHAN_X);
2364
2365 micro_mul( &r[0], &r[0], &r[1] );
2366
2367 FETCH(&r[1], 0, CHAN_Y);
2368 FETCH(&r[2], 1, CHAN_Y);
2369
2370 micro_mul( &r[1], &r[1], &r[2] );
2371 micro_add( &r[0], &r[0], &r[1] );
2372
2373 FETCH(&r[1], 0, CHAN_Z);
2374 FETCH(&r[2], 1, CHAN_Z);
2375
2376 micro_mul( &r[1], &r[1], &r[2] );
2377 micro_add( &r[0], &r[0], &r[1] );
2378
2379 FETCH(&r[1], 1, CHAN_W);
2380
2381 micro_add( &r[0], &r[0], &r[1] );
2382
2383 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2384 STORE( &r[0], 0, chan_index );
2385 }
2386 break;
2387
2388 case TGSI_OPCODE_COS:
2389 FETCH(&r[0], 0, CHAN_X);
2390
2391 micro_cos( &r[0], &r[0] );
2392
2393 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2394 STORE( &r[0], 0, chan_index );
2395 }
2396 break;
2397
2398 case TGSI_OPCODE_DDX:
2399 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2400 FETCH( &r[0], 0, chan_index );
2401 micro_ddx(&d[chan_index], &r[0]);
2402 }
2403 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2404 STORE(&d[chan_index], 0, chan_index);
2405 }
2406 break;
2407
2408 case TGSI_OPCODE_DDY:
2409 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2410 FETCH( &r[0], 0, chan_index );
2411 micro_ddy(&d[chan_index], &r[0]);
2412 }
2413 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2414 STORE(&d[chan_index], 0, chan_index);
2415 }
2416 break;
2417
2418 case TGSI_OPCODE_KILP:
2419 exec_kilp (mach, inst);
2420 break;
2421
2422 case TGSI_OPCODE_KIL:
2423 exec_kil (mach, inst);
2424 break;
2425
2426 case TGSI_OPCODE_PK2H:
2427 assert (0);
2428 break;
2429
2430 case TGSI_OPCODE_PK2US:
2431 assert (0);
2432 break;
2433
2434 case TGSI_OPCODE_PK4B:
2435 assert (0);
2436 break;
2437
2438 case TGSI_OPCODE_PK4UB:
2439 assert (0);
2440 break;
2441
2442 case TGSI_OPCODE_RFL:
2443 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2444 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2445 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2446 /* r0 = dp3(src0, src0) */
2447 FETCH(&r[2], 0, CHAN_X);
2448 micro_mul(&r[0], &r[2], &r[2]);
2449 FETCH(&r[4], 0, CHAN_Y);
2450 micro_mul(&r[8], &r[4], &r[4]);
2451 micro_add(&r[0], &r[0], &r[8]);
2452 FETCH(&r[6], 0, CHAN_Z);
2453 micro_mul(&r[8], &r[6], &r[6]);
2454 micro_add(&r[0], &r[0], &r[8]);
2455
2456 /* r1 = dp3(src0, src1) */
2457 FETCH(&r[3], 1, CHAN_X);
2458 micro_mul(&r[1], &r[2], &r[3]);
2459 FETCH(&r[5], 1, CHAN_Y);
2460 micro_mul(&r[8], &r[4], &r[5]);
2461 micro_add(&r[1], &r[1], &r[8]);
2462 FETCH(&r[7], 1, CHAN_Z);
2463 micro_mul(&r[8], &r[6], &r[7]);
2464 micro_add(&r[1], &r[1], &r[8]);
2465
2466 /* r1 = 2 * r1 / r0 */
2467 micro_add(&r[1], &r[1], &r[1]);
2468 micro_div(&r[1], &r[1], &r[0]);
2469
2470 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2471 micro_mul(&r[2], &r[2], &r[1]);
2472 micro_sub(&r[2], &r[2], &r[3]);
2473 STORE(&r[2], 0, CHAN_X);
2474 }
2475 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2476 micro_mul(&r[4], &r[4], &r[1]);
2477 micro_sub(&r[4], &r[4], &r[5]);
2478 STORE(&r[4], 0, CHAN_Y);
2479 }
2480 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2481 micro_mul(&r[6], &r[6], &r[1]);
2482 micro_sub(&r[6], &r[6], &r[7]);
2483 STORE(&r[6], 0, CHAN_Z);
2484 }
2485 }
2486 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2487 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2488 }
2489 break;
2490
2491 case TGSI_OPCODE_SEQ:
2492 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2493 FETCH( &r[0], 0, chan_index );
2494 FETCH( &r[1], 1, chan_index );
2495 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2496 }
2497 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2498 STORE(&d[chan_index], 0, chan_index);
2499 }
2500 break;
2501
2502 case TGSI_OPCODE_SFL:
2503 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2504 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2505 }
2506 break;
2507
2508 case TGSI_OPCODE_SGT:
2509 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2510 FETCH( &r[0], 0, chan_index );
2511 FETCH( &r[1], 1, chan_index );
2512 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2513 }
2514 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2515 STORE(&d[chan_index], 0, chan_index);
2516 }
2517 break;
2518
2519 case TGSI_OPCODE_SIN:
2520 FETCH( &r[0], 0, CHAN_X );
2521 micro_sin( &r[0], &r[0] );
2522 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2523 STORE( &r[0], 0, chan_index );
2524 }
2525 break;
2526
2527 case TGSI_OPCODE_SLE:
2528 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2529 FETCH( &r[0], 0, chan_index );
2530 FETCH( &r[1], 1, chan_index );
2531 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2532 }
2533 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2534 STORE(&d[chan_index], 0, chan_index);
2535 }
2536 break;
2537
2538 case TGSI_OPCODE_SNE:
2539 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2540 FETCH( &r[0], 0, chan_index );
2541 FETCH( &r[1], 1, chan_index );
2542 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2543 }
2544 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2545 STORE(&d[chan_index], 0, chan_index);
2546 }
2547 break;
2548
2549 case TGSI_OPCODE_STR:
2550 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2551 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2552 }
2553 break;
2554
2555 case TGSI_OPCODE_TEX:
2556 /* simple texture lookup */
2557 /* src[0] = texcoord */
2558 /* src[1] = sampler unit */
2559 exec_tex(mach, inst, FALSE, FALSE);
2560 break;
2561
2562 case TGSI_OPCODE_TXB:
2563 /* Texture lookup with lod bias */
2564 /* src[0] = texcoord (src[0].w = LOD bias) */
2565 /* src[1] = sampler unit */
2566 exec_tex(mach, inst, TRUE, FALSE);
2567 break;
2568
2569 case TGSI_OPCODE_TXD:
2570 /* Texture lookup with explict partial derivatives */
2571 /* src[0] = texcoord */
2572 /* src[1] = d[strq]/dx */
2573 /* src[2] = d[strq]/dy */
2574 /* src[3] = sampler unit */
2575 exec_txd(mach, inst);
2576 break;
2577
2578 case TGSI_OPCODE_TXL:
2579 /* Texture lookup with explit LOD */
2580 /* src[0] = texcoord (src[0].w = LOD) */
2581 /* src[1] = sampler unit */
2582 exec_tex(mach, inst, TRUE, FALSE);
2583 break;
2584
2585 case TGSI_OPCODE_TXP:
2586 /* Texture lookup with projection */
2587 /* src[0] = texcoord (src[0].w = projection) */
2588 /* src[1] = sampler unit */
2589 exec_tex(mach, inst, FALSE, TRUE);
2590 break;
2591
2592 case TGSI_OPCODE_UP2H:
2593 assert (0);
2594 break;
2595
2596 case TGSI_OPCODE_UP2US:
2597 assert (0);
2598 break;
2599
2600 case TGSI_OPCODE_UP4B:
2601 assert (0);
2602 break;
2603
2604 case TGSI_OPCODE_UP4UB:
2605 assert (0);
2606 break;
2607
2608 case TGSI_OPCODE_X2D:
2609 FETCH(&r[0], 1, CHAN_X);
2610 FETCH(&r[1], 1, CHAN_Y);
2611 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2612 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2613 FETCH(&r[2], 2, CHAN_X);
2614 micro_mul(&r[2], &r[2], &r[0]);
2615 FETCH(&r[3], 2, CHAN_Y);
2616 micro_mul(&r[3], &r[3], &r[1]);
2617 micro_add(&r[2], &r[2], &r[3]);
2618 FETCH(&r[3], 0, CHAN_X);
2619 micro_add(&d[CHAN_X], &r[2], &r[3]);
2620
2621 }
2622 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2623 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2624 FETCH(&r[2], 2, CHAN_Z);
2625 micro_mul(&r[2], &r[2], &r[0]);
2626 FETCH(&r[3], 2, CHAN_W);
2627 micro_mul(&r[3], &r[3], &r[1]);
2628 micro_add(&r[2], &r[2], &r[3]);
2629 FETCH(&r[3], 0, CHAN_Y);
2630 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2631
2632 }
2633 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2634 STORE(&d[CHAN_X], 0, CHAN_X);
2635 }
2636 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2637 STORE(&d[CHAN_Y], 0, CHAN_Y);
2638 }
2639 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2640 STORE(&d[CHAN_X], 0, CHAN_Z);
2641 }
2642 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2643 STORE(&d[CHAN_Y], 0, CHAN_W);
2644 }
2645 break;
2646
2647 case TGSI_OPCODE_ARA:
2648 assert (0);
2649 break;
2650
2651 case TGSI_OPCODE_BRA:
2652 assert (0);
2653 break;
2654
2655 case TGSI_OPCODE_CAL:
2656 /* skip the call if no execution channels are enabled */
2657 if (mach->ExecMask) {
2658 /* do the call */
2659
2660 /* First, record the depths of the execution stacks.
2661 * This is important for deeply nested/looped return statements.
2662 * We have to unwind the stacks by the correct amount. For a
2663 * real code generator, we could determine the number of entries
2664 * to pop off each stack with simple static analysis and avoid
2665 * implementing this data structure at run time.
2666 */
2667 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2668 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2669 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2670 /* note that PC was already incremented above */
2671 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2672
2673 mach->CallStackTop++;
2674
2675 /* Second, push the Cond, Loop, Cont, Func stacks */
2676 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2677 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2678 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2679 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2680 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2681 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2682 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2683 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2684
2685 /* Finally, jump to the subroutine */
2686 *pc = inst->InstructionExtLabel.Label;
2687 }
2688 break;
2689
2690 case TGSI_OPCODE_RET:
2691 mach->FuncMask &= ~mach->ExecMask;
2692 UPDATE_EXEC_MASK(mach);
2693
2694 if (mach->FuncMask == 0x0) {
2695 /* really return now (otherwise, keep executing */
2696
2697 if (mach->CallStackTop == 0) {
2698 /* returning from main() */
2699 *pc = -1;
2700 return;
2701 }
2702
2703 assert(mach->CallStackTop > 0);
2704 mach->CallStackTop--;
2705
2706 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2707 mach->CondMask = mach->CondStack[mach->CondStackTop];
2708
2709 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2710 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2711
2712 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2713 mach->ContMask = mach->ContStack[mach->ContStackTop];
2714
2715 assert(mach->FuncStackTop > 0);
2716 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2717
2718 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2719
2720 UPDATE_EXEC_MASK(mach);
2721 }
2722 break;
2723
2724 case TGSI_OPCODE_SSG:
2725 /* TGSI_OPCODE_SGN */
2726 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2727 FETCH( &r[0], 0, chan_index );
2728 micro_sgn(&d[chan_index], &r[0]);
2729 }
2730 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2731 STORE(&d[chan_index], 0, chan_index);
2732 }
2733 break;
2734
2735 case TGSI_OPCODE_CMP:
2736 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2737 FETCH(&r[0], 0, chan_index);
2738 FETCH(&r[1], 1, chan_index);
2739 FETCH(&r[2], 2, chan_index);
2740 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
2741 }
2742 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2743 STORE(&d[chan_index], 0, chan_index);
2744 }
2745 break;
2746
2747 case TGSI_OPCODE_SCS:
2748 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2749 FETCH( &r[0], 0, CHAN_X );
2750 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2751 micro_cos(&r[1], &r[0]);
2752 STORE(&r[1], 0, CHAN_X);
2753 }
2754 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2755 micro_sin(&r[1], &r[0]);
2756 STORE(&r[1], 0, CHAN_Y);
2757 }
2758 }
2759 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2760 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2761 }
2762 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2763 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2764 }
2765 break;
2766
2767 case TGSI_OPCODE_NRM:
2768 /* 3-component vector normalize */
2769 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2770 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2771 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2772 /* r3 = sqrt(dp3(src0, src0)) */
2773 FETCH(&r[0], 0, CHAN_X);
2774 micro_mul(&r[3], &r[0], &r[0]);
2775 FETCH(&r[1], 0, CHAN_Y);
2776 micro_mul(&r[4], &r[1], &r[1]);
2777 micro_add(&r[3], &r[3], &r[4]);
2778 FETCH(&r[2], 0, CHAN_Z);
2779 micro_mul(&r[4], &r[2], &r[2]);
2780 micro_add(&r[3], &r[3], &r[4]);
2781 micro_sqrt(&r[3], &r[3]);
2782
2783 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2784 micro_div(&r[0], &r[0], &r[3]);
2785 STORE(&r[0], 0, CHAN_X);
2786 }
2787 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2788 micro_div(&r[1], &r[1], &r[3]);
2789 STORE(&r[1], 0, CHAN_Y);
2790 }
2791 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2792 micro_div(&r[2], &r[2], &r[3]);
2793 STORE(&r[2], 0, CHAN_Z);
2794 }
2795 }
2796 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2797 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2798 }
2799 break;
2800
2801 case TGSI_OPCODE_NRM4:
2802 /* 4-component vector normalize */
2803 {
2804 union tgsi_exec_channel tmp, dot;
2805
2806 /* tmp = dp4(src0, src0): */
2807 FETCH( &r[0], 0, CHAN_X );
2808 micro_mul( &tmp, &r[0], &r[0] );
2809
2810 FETCH( &r[1], 0, CHAN_Y );
2811 micro_mul( &dot, &r[1], &r[1] );
2812 micro_add( &tmp, &tmp, &dot );
2813
2814 FETCH( &r[2], 0, CHAN_Z );
2815 micro_mul( &dot, &r[2], &r[2] );
2816 micro_add( &tmp, &tmp, &dot );
2817
2818 FETCH( &r[3], 0, CHAN_W );
2819 micro_mul( &dot, &r[3], &r[3] );
2820 micro_add( &tmp, &tmp, &dot );
2821
2822 /* tmp = 1 / sqrt(tmp) */
2823 micro_sqrt( &tmp, &tmp );
2824 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2825
2826 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2827 /* chan = chan * tmp */
2828 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2829 STORE( &r[chan_index], 0, chan_index );
2830 }
2831 }
2832 break;
2833
2834 case TGSI_OPCODE_DIV:
2835 assert( 0 );
2836 break;
2837
2838 case TGSI_OPCODE_DP2:
2839 FETCH( &r[0], 0, CHAN_X );
2840 FETCH( &r[1], 1, CHAN_X );
2841 micro_mul( &r[0], &r[0], &r[1] );
2842
2843 FETCH( &r[1], 0, CHAN_Y );
2844 FETCH( &r[2], 1, CHAN_Y );
2845 micro_mul( &r[1], &r[1], &r[2] );
2846 micro_add( &r[0], &r[0], &r[1] );
2847
2848 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2849 STORE( &r[0], 0, chan_index );
2850 }
2851 break;
2852
2853 case TGSI_OPCODE_IF:
2854 /* push CondMask */
2855 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2856 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2857 FETCH( &r[0], 0, CHAN_X );
2858 /* update CondMask */
2859 if( ! r[0].u[0] ) {
2860 mach->CondMask &= ~0x1;
2861 }
2862 if( ! r[0].u[1] ) {
2863 mach->CondMask &= ~0x2;
2864 }
2865 if( ! r[0].u[2] ) {
2866 mach->CondMask &= ~0x4;
2867 }
2868 if( ! r[0].u[3] ) {
2869 mach->CondMask &= ~0x8;
2870 }
2871 UPDATE_EXEC_MASK(mach);
2872 /* Todo: If CondMask==0, jump to ELSE */
2873 break;
2874
2875 case TGSI_OPCODE_ELSE:
2876 /* invert CondMask wrt previous mask */
2877 {
2878 uint prevMask;
2879 assert(mach->CondStackTop > 0);
2880 prevMask = mach->CondStack[mach->CondStackTop - 1];
2881 mach->CondMask = ~mach->CondMask & prevMask;
2882 UPDATE_EXEC_MASK(mach);
2883 /* Todo: If CondMask==0, jump to ENDIF */
2884 }
2885 break;
2886
2887 case TGSI_OPCODE_ENDIF:
2888 /* pop CondMask */
2889 assert(mach->CondStackTop > 0);
2890 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2891 UPDATE_EXEC_MASK(mach);
2892 break;
2893
2894 case TGSI_OPCODE_END:
2895 /* halt execution */
2896 *pc = -1;
2897 break;
2898
2899 case TGSI_OPCODE_REP:
2900 assert (0);
2901 break;
2902
2903 case TGSI_OPCODE_ENDREP:
2904 assert (0);
2905 break;
2906
2907 case TGSI_OPCODE_PUSHA:
2908 assert (0);
2909 break;
2910
2911 case TGSI_OPCODE_POPA:
2912 assert (0);
2913 break;
2914
2915 case TGSI_OPCODE_CEIL:
2916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2917 FETCH( &r[0], 0, chan_index );
2918 micro_ceil(&d[chan_index], &r[0]);
2919 }
2920 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2921 STORE(&d[chan_index], 0, chan_index);
2922 }
2923 break;
2924
2925 case TGSI_OPCODE_I2F:
2926 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2927 FETCH( &r[0], 0, chan_index );
2928 micro_i2f(&d[chan_index], &r[0]);
2929 }
2930 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2931 STORE(&d[chan_index], 0, chan_index);
2932 }
2933 break;
2934
2935 case TGSI_OPCODE_NOT:
2936 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2937 FETCH( &r[0], 0, chan_index );
2938 micro_not(&d[chan_index], &r[0]);
2939 }
2940 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2941 STORE(&d[chan_index], 0, chan_index);
2942 }
2943 break;
2944
2945 case TGSI_OPCODE_TRUNC:
2946 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2947 FETCH( &r[0], 0, chan_index );
2948 micro_trunc(&d[chan_index], &r[0]);
2949 }
2950 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2951 STORE(&d[chan_index], 0, chan_index);
2952 }
2953 break;
2954
2955 case TGSI_OPCODE_SHL:
2956 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2957 FETCH( &r[0], 0, chan_index );
2958 FETCH( &r[1], 1, chan_index );
2959 micro_shl(&d[chan_index], &r[0], &r[1]);
2960 }
2961 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2962 STORE(&d[chan_index], 0, chan_index);
2963 }
2964 break;
2965
2966 case TGSI_OPCODE_SHR:
2967 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2968 FETCH( &r[0], 0, chan_index );
2969 FETCH( &r[1], 1, chan_index );
2970 micro_ishr(&d[chan_index], &r[0], &r[1]);
2971 }
2972 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2973 STORE(&d[chan_index], 0, chan_index);
2974 }
2975 break;
2976
2977 case TGSI_OPCODE_AND:
2978 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2979 FETCH( &r[0], 0, chan_index );
2980 FETCH( &r[1], 1, chan_index );
2981 micro_and(&d[chan_index], &r[0], &r[1]);
2982 }
2983 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2984 STORE(&d[chan_index], 0, chan_index);
2985 }
2986 break;
2987
2988 case TGSI_OPCODE_OR:
2989 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2990 FETCH( &r[0], 0, chan_index );
2991 FETCH( &r[1], 1, chan_index );
2992 micro_or(&d[chan_index], &r[0], &r[1]);
2993 }
2994 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2995 STORE(&d[chan_index], 0, chan_index);
2996 }
2997 break;
2998
2999 case TGSI_OPCODE_MOD:
3000 assert (0);
3001 break;
3002
3003 case TGSI_OPCODE_XOR:
3004 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3005 FETCH( &r[0], 0, chan_index );
3006 FETCH( &r[1], 1, chan_index );
3007 micro_xor(&d[chan_index], &r[0], &r[1]);
3008 }
3009 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3010 STORE(&d[chan_index], 0, chan_index);
3011 }
3012 break;
3013
3014 case TGSI_OPCODE_SAD:
3015 assert (0);
3016 break;
3017
3018 case TGSI_OPCODE_TXF:
3019 assert (0);
3020 break;
3021
3022 case TGSI_OPCODE_TXQ:
3023 assert (0);
3024 break;
3025
3026 case TGSI_OPCODE_EMIT:
3027 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
3028 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
3029 break;
3030
3031 case TGSI_OPCODE_ENDPRIM:
3032 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
3033 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
3034 break;
3035
3036 case TGSI_OPCODE_BGNFOR:
3037 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3038 for (chan_index = 0; chan_index < 3; chan_index++) {
3039 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3040 }
3041 ++mach->LoopCounterStackTop;
3042 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3043 /* update LoopMask */
3044 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3045 mach->LoopMask &= ~0x1;
3046 }
3047 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3048 mach->LoopMask &= ~0x2;
3049 }
3050 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3051 mach->LoopMask &= ~0x4;
3052 }
3053 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3054 mach->LoopMask &= ~0x8;
3055 }
3056 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3057 UPDATE_EXEC_MASK(mach);
3058 /* fall-through (for now) */
3059 case TGSI_OPCODE_BGNLOOP:
3060 /* push LoopMask and ContMasks */
3061 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3062 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3063 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3064 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3065 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3066 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3067 break;
3068
3069 case TGSI_OPCODE_ENDFOR:
3070 assert(mach->LoopCounterStackTop > 0);
3071 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3072 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3073 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3074 /* update LoopMask */
3075 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3076 mach->LoopMask &= ~0x1;
3077 }
3078 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3079 mach->LoopMask &= ~0x2;
3080 }
3081 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3082 mach->LoopMask &= ~0x4;
3083 }
3084 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3085 mach->LoopMask &= ~0x8;
3086 }
3087 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3088 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3089 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3090 assert(mach->LoopLabelStackTop > 0);
3091 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3092 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3093 /* Restore ContMask, but don't pop */
3094 assert(mach->ContStackTop > 0);
3095 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3096 UPDATE_EXEC_MASK(mach);
3097 if (mach->ExecMask) {
3098 /* repeat loop: jump to instruction just past BGNLOOP */
3099 assert(mach->LoopLabelStackTop > 0);
3100 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3101 }
3102 else {
3103 /* exit loop: pop LoopMask */
3104 assert(mach->LoopStackTop > 0);
3105 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3106 /* pop ContMask */
3107 assert(mach->ContStackTop > 0);
3108 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3109 assert(mach->LoopLabelStackTop > 0);
3110 --mach->LoopLabelStackTop;
3111 assert(mach->LoopCounterStackTop > 0);
3112 --mach->LoopCounterStackTop;
3113 }
3114 UPDATE_EXEC_MASK(mach);
3115 break;
3116
3117 case TGSI_OPCODE_ENDLOOP:
3118 /* Restore ContMask, but don't pop */
3119 assert(mach->ContStackTop > 0);
3120 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3121 UPDATE_EXEC_MASK(mach);
3122 if (mach->ExecMask) {
3123 /* repeat loop: jump to instruction just past BGNLOOP */
3124 assert(mach->LoopLabelStackTop > 0);
3125 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3126 }
3127 else {
3128 /* exit loop: pop LoopMask */
3129 assert(mach->LoopStackTop > 0);
3130 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3131 /* pop ContMask */
3132 assert(mach->ContStackTop > 0);
3133 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3134 assert(mach->LoopLabelStackTop > 0);
3135 --mach->LoopLabelStackTop;
3136 }
3137 UPDATE_EXEC_MASK(mach);
3138 break;
3139
3140 case TGSI_OPCODE_BRK:
3141 /* turn off loop channels for each enabled exec channel */
3142 mach->LoopMask &= ~mach->ExecMask;
3143 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3144 UPDATE_EXEC_MASK(mach);
3145 break;
3146
3147 case TGSI_OPCODE_CONT:
3148 /* turn off cont channels for each enabled exec channel */
3149 mach->ContMask &= ~mach->ExecMask;
3150 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3151 UPDATE_EXEC_MASK(mach);
3152 break;
3153
3154 case TGSI_OPCODE_BGNSUB:
3155 /* no-op */
3156 break;
3157
3158 case TGSI_OPCODE_ENDSUB:
3159 /* no-op */
3160 break;
3161
3162 case TGSI_OPCODE_NOP:
3163 break;
3164
3165 default:
3166 assert( 0 );
3167 }
3168 }
3169
3170 #define DEBUG_EXECUTION 0
3171
3172
3173 /**
3174 * Run TGSI interpreter.
3175 * \return bitmask of "alive" quad components
3176 */
3177 uint
3178 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3179 {
3180 uint i;
3181 int pc = 0;
3182
3183 mach->CondMask = 0xf;
3184 mach->LoopMask = 0xf;
3185 mach->ContMask = 0xf;
3186 mach->FuncMask = 0xf;
3187 mach->ExecMask = 0xf;
3188
3189 assert(mach->CondStackTop == 0);
3190 assert(mach->LoopStackTop == 0);
3191 assert(mach->ContStackTop == 0);
3192 assert(mach->CallStackTop == 0);
3193
3194 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3195 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3196
3197 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3198 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3199 mach->Primitives[0] = 0;
3200 }
3201
3202 for (i = 0; i < QUAD_SIZE; i++) {
3203 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3204 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3205 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3206 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3207 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3208 }
3209
3210 /* execute declarations (interpolants) */
3211 for (i = 0; i < mach->NumDeclarations; i++) {
3212 exec_declaration( mach, mach->Declarations+i );
3213 }
3214
3215 {
3216 #if DEBUG_EXECUTION
3217 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3218 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3219 uint inst = 1;
3220
3221 memcpy(temps, mach->Temps, sizeof(temps));
3222 memcpy(outputs, mach->Outputs, sizeof(outputs));
3223 #endif
3224
3225 /* execute instructions, until pc is set to -1 */
3226 while (pc != -1) {
3227
3228 #if DEBUG_EXECUTION
3229 uint i;
3230
3231 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3232 #endif
3233
3234 assert(pc < (int) mach->NumInstructions);
3235 exec_instruction(mach, mach->Instructions + pc, &pc);
3236
3237 #if DEBUG_EXECUTION
3238 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3239 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3240 uint j;
3241
3242 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3243 debug_printf("TEMP[%2u] = ", i);
3244 for (j = 0; j < 4; j++) {
3245 if (j > 0) {
3246 debug_printf(" ");
3247 }
3248 debug_printf("(%6f, %6f, %6f, %6f)\n",
3249 temps[i].xyzw[0].f[j],
3250 temps[i].xyzw[1].f[j],
3251 temps[i].xyzw[2].f[j],
3252 temps[i].xyzw[3].f[j]);
3253 }
3254 }
3255 }
3256 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3257 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3258 uint j;
3259
3260 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3261 debug_printf("OUT[%2u] = ", i);
3262 for (j = 0; j < 4; j++) {
3263 if (j > 0) {
3264 debug_printf(" ");
3265 }
3266 debug_printf("{%6f, %6f, %6f, %6f}\n",
3267 outputs[i].xyzw[0].f[j],
3268 outputs[i].xyzw[1].f[j],
3269 outputs[i].xyzw[2].f[j],
3270 outputs[i].xyzw[3].f[j]);
3271 }
3272 }
3273 }
3274 #endif
3275 }
3276 }
3277
3278 #if 0
3279 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3280 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3281 /*
3282 * Scale back depth component.
3283 */
3284 for (i = 0; i < 4; i++)
3285 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3286 }
3287 #endif
3288
3289 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3290 }