tgsi: Treat MOV source operand as FLOAT so modifiers works as expected.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 static void
68 micro_arl(union tgsi_exec_channel *dst,
69 const union tgsi_exec_channel *src)
70 {
71 dst->i[0] = (int)floorf(src->f[0]);
72 dst->i[1] = (int)floorf(src->f[1]);
73 dst->i[2] = (int)floorf(src->f[2]);
74 dst->i[3] = (int)floorf(src->f[3]);
75 }
76
77 static void
78 micro_arr(union tgsi_exec_channel *dst,
79 const union tgsi_exec_channel *src)
80 {
81 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
82 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
83 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
84 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
85 }
86
87 static void
88 micro_iabs(union tgsi_exec_channel *dst,
89 const union tgsi_exec_channel *src)
90 {
91 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
92 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
93 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
94 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
95 }
96
97 static void
98 micro_ineg(union tgsi_exec_channel *dst,
99 const union tgsi_exec_channel *src)
100 {
101 dst->i[0] = -src->i[0];
102 dst->i[1] = -src->i[1];
103 dst->i[2] = -src->i[2];
104 dst->i[3] = -src->i[3];
105 }
106
107 static void
108 micro_mov(union tgsi_exec_channel *dst,
109 const union tgsi_exec_channel *src)
110 {
111 dst->u[0] = src->u[0];
112 dst->u[1] = src->u[1];
113 dst->u[2] = src->u[2];
114 dst->u[3] = src->u[3];
115 }
116
117 #define TILE_TOP_LEFT 0
118 #define TILE_TOP_RIGHT 1
119 #define TILE_BOTTOM_LEFT 2
120 #define TILE_BOTTOM_RIGHT 3
121
122 #define CHAN_X 0
123 #define CHAN_Y 1
124 #define CHAN_Z 2
125 #define CHAN_W 3
126
127 enum tgsi_exec_datatype {
128 TGSI_EXEC_DATA_FLOAT,
129 TGSI_EXEC_DATA_INT,
130 TGSI_EXEC_DATA_UINT
131 };
132
133 /*
134 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
135 */
136 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
137 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
138 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
139 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
140 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
141 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
142 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
143 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
144 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
145 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
146 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
147 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
148 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
149 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
150 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
151 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
152 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
153 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
154 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
155 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
156 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
157 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
158 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
159 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
160 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
161 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
162 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
163 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
164 #define TEMP_R0 TGSI_EXEC_TEMP_R0
165 #define TEMP_P0 TGSI_EXEC_TEMP_P0
166
167 #define IS_CHANNEL_ENABLED(INST, CHAN)\
168 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
169
170 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
171 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
172
173 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
174 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
175 if (IS_CHANNEL_ENABLED( INST, CHAN ))
176
177 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
178 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
179 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
180
181
182 /** The execution mask depends on the conditional mask and the loop mask */
183 #define UPDATE_EXEC_MASK(MACH) \
184 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
185
186
187 static const union tgsi_exec_channel ZeroVec =
188 { { 0.0, 0.0, 0.0, 0.0 } };
189
190
191 #define CHECK_INF_OR_NAN(chan) do {\
192 assert(!util_is_inf_or_nan((chan)->f[0]));\
193 assert(!util_is_inf_or_nan((chan)->f[1]));\
194 assert(!util_is_inf_or_nan((chan)->f[2]));\
195 assert(!util_is_inf_or_nan((chan)->f[3]));\
196 } while (0)
197
198
199 #ifdef DEBUG
200 static void
201 print_chan(const char *msg, const union tgsi_exec_channel *chan)
202 {
203 debug_printf("%s = {%f, %f, %f, %f}\n",
204 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
205 }
206 #endif
207
208
209 #ifdef DEBUG
210 static void
211 print_temp(const struct tgsi_exec_machine *mach, uint index)
212 {
213 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
214 int i;
215 debug_printf("Temp[%u] =\n", index);
216 for (i = 0; i < 4; i++) {
217 debug_printf(" %c: { %f, %f, %f, %f }\n",
218 "XYZW"[i],
219 tmp->xyzw[i].f[0],
220 tmp->xyzw[i].f[1],
221 tmp->xyzw[i].f[2],
222 tmp->xyzw[i].f[3]);
223 }
224 }
225 #endif
226
227
228 /**
229 * Check if there's a potential src/dst register data dependency when
230 * using SOA execution.
231 * Example:
232 * MOV T, T.yxwz;
233 * This would expand into:
234 * MOV t0, t1;
235 * MOV t1, t0;
236 * MOV t2, t3;
237 * MOV t3, t2;
238 * The second instruction will have the wrong value for t0 if executed as-is.
239 */
240 boolean
241 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
242 {
243 uint i, chan;
244
245 uint writemask = inst->Dst[0].Register.WriteMask;
246 if (writemask == TGSI_WRITEMASK_X ||
247 writemask == TGSI_WRITEMASK_Y ||
248 writemask == TGSI_WRITEMASK_Z ||
249 writemask == TGSI_WRITEMASK_W ||
250 writemask == TGSI_WRITEMASK_NONE) {
251 /* no chance of data dependency */
252 return FALSE;
253 }
254
255 /* loop over src regs */
256 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
257 if ((inst->Src[i].Register.File ==
258 inst->Dst[0].Register.File) &&
259 (inst->Src[i].Register.Index ==
260 inst->Dst[0].Register.Index)) {
261 /* loop over dest channels */
262 uint channelsWritten = 0x0;
263 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
264 /* check if we're reading a channel that's been written */
265 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
266 if (channelsWritten & (1 << swizzle)) {
267 return TRUE;
268 }
269
270 channelsWritten |= (1 << chan);
271 }
272 }
273 }
274 return FALSE;
275 }
276
277
278 /**
279 * Initialize machine state by expanding tokens to full instructions,
280 * allocating temporary storage, setting up constants, etc.
281 * After this, we can call tgsi_exec_machine_run() many times.
282 */
283 void
284 tgsi_exec_machine_bind_shader(
285 struct tgsi_exec_machine *mach,
286 const struct tgsi_token *tokens,
287 uint numSamplers,
288 struct tgsi_sampler **samplers)
289 {
290 uint k;
291 struct tgsi_parse_context parse;
292 struct tgsi_exec_labels *labels = &mach->Labels;
293 struct tgsi_full_instruction *instructions;
294 struct tgsi_full_declaration *declarations;
295 uint maxInstructions = 10, numInstructions = 0;
296 uint maxDeclarations = 10, numDeclarations = 0;
297 uint instno = 0;
298
299 #if 0
300 tgsi_dump(tokens, 0);
301 #endif
302
303 util_init_math();
304
305 mach->Tokens = tokens;
306 mach->Samplers = samplers;
307
308 k = tgsi_parse_init (&parse, mach->Tokens);
309 if (k != TGSI_PARSE_OK) {
310 debug_printf( "Problem parsing!\n" );
311 return;
312 }
313
314 mach->Processor = parse.FullHeader.Processor.Processor;
315 mach->ImmLimit = 0;
316 labels->count = 0;
317
318 declarations = (struct tgsi_full_declaration *)
319 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
320
321 if (!declarations) {
322 return;
323 }
324
325 instructions = (struct tgsi_full_instruction *)
326 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
327
328 if (!instructions) {
329 FREE( declarations );
330 return;
331 }
332
333 while( !tgsi_parse_end_of_tokens( &parse ) ) {
334 uint pointer = parse.Position;
335 uint i;
336
337 tgsi_parse_token( &parse );
338 switch( parse.FullToken.Token.Type ) {
339 case TGSI_TOKEN_TYPE_DECLARATION:
340 /* save expanded declaration */
341 if (numDeclarations == maxDeclarations) {
342 declarations = REALLOC(declarations,
343 maxDeclarations
344 * sizeof(struct tgsi_full_declaration),
345 (maxDeclarations + 10)
346 * sizeof(struct tgsi_full_declaration));
347 maxDeclarations += 10;
348 }
349 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
350 unsigned reg;
351 for (reg = parse.FullToken.FullDeclaration.Range.First;
352 reg <= parse.FullToken.FullDeclaration.Range.Last;
353 ++reg) {
354 ++mach->NumOutputs;
355 }
356 }
357 memcpy(declarations + numDeclarations,
358 &parse.FullToken.FullDeclaration,
359 sizeof(declarations[0]));
360 numDeclarations++;
361 break;
362
363 case TGSI_TOKEN_TYPE_IMMEDIATE:
364 {
365 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
366 assert( size <= 4 );
367 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
368
369 for( i = 0; i < size; i++ ) {
370 mach->Imms[mach->ImmLimit][i] =
371 parse.FullToken.FullImmediate.u[i].Float;
372 }
373 mach->ImmLimit += 1;
374 }
375 break;
376
377 case TGSI_TOKEN_TYPE_INSTRUCTION:
378 assert( labels->count < MAX_LABELS );
379
380 labels->labels[labels->count][0] = instno;
381 labels->labels[labels->count][1] = pointer;
382 labels->count++;
383
384 /* save expanded instruction */
385 if (numInstructions == maxInstructions) {
386 instructions = REALLOC(instructions,
387 maxInstructions
388 * sizeof(struct tgsi_full_instruction),
389 (maxInstructions + 10)
390 * sizeof(struct tgsi_full_instruction));
391 maxInstructions += 10;
392 }
393
394 memcpy(instructions + numInstructions,
395 &parse.FullToken.FullInstruction,
396 sizeof(instructions[0]));
397
398 numInstructions++;
399 break;
400
401 case TGSI_TOKEN_TYPE_PROPERTY:
402 break;
403
404 default:
405 assert( 0 );
406 }
407 }
408 tgsi_parse_free (&parse);
409
410 if (mach->Declarations) {
411 FREE( mach->Declarations );
412 }
413 mach->Declarations = declarations;
414 mach->NumDeclarations = numDeclarations;
415
416 if (mach->Instructions) {
417 FREE( mach->Instructions );
418 }
419 mach->Instructions = instructions;
420 mach->NumInstructions = numInstructions;
421 }
422
423
424 struct tgsi_exec_machine *
425 tgsi_exec_machine_create( void )
426 {
427 struct tgsi_exec_machine *mach;
428 uint i;
429
430 mach = align_malloc( sizeof *mach, 16 );
431 if (!mach)
432 goto fail;
433
434 memset(mach, 0, sizeof(*mach));
435
436 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
437 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
438 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
439
440 /* Setup constants. */
441 for( i = 0; i < 4; i++ ) {
442 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
443 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
444 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
445 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
446 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
447 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
448 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
449 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
450 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
451 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
452 }
453
454 #ifdef DEBUG
455 /* silence warnings */
456 (void) print_chan;
457 (void) print_temp;
458 #endif
459
460 return mach;
461
462 fail:
463 align_free(mach);
464 return NULL;
465 }
466
467
468 void
469 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
470 {
471 if (mach) {
472 FREE(mach->Instructions);
473 FREE(mach->Declarations);
474 }
475
476 align_free(mach);
477 }
478
479
480 static void
481 micro_abs(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->f[0] = fabsf( src->f[0] );
486 dst->f[1] = fabsf( src->f[1] );
487 dst->f[2] = fabsf( src->f[2] );
488 dst->f[3] = fabsf( src->f[3] );
489 }
490
491 static void
492 micro_add(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src0,
495 const union tgsi_exec_channel *src1 )
496 {
497 dst->f[0] = src0->f[0] + src1->f[0];
498 dst->f[1] = src0->f[1] + src1->f[1];
499 dst->f[2] = src0->f[2] + src1->f[2];
500 dst->f[3] = src0->f[3] + src1->f[3];
501 }
502
503 static void
504 micro_ceil(
505 union tgsi_exec_channel *dst,
506 const union tgsi_exec_channel *src )
507 {
508 dst->f[0] = ceilf( src->f[0] );
509 dst->f[1] = ceilf( src->f[1] );
510 dst->f[2] = ceilf( src->f[2] );
511 dst->f[3] = ceilf( src->f[3] );
512 }
513
514 static void
515 micro_cos(
516 union tgsi_exec_channel *dst,
517 const union tgsi_exec_channel *src )
518 {
519 dst->f[0] = cosf( src->f[0] );
520 dst->f[1] = cosf( src->f[1] );
521 dst->f[2] = cosf( src->f[2] );
522 dst->f[3] = cosf( src->f[3] );
523 }
524
525 static void
526 micro_ddx(
527 union tgsi_exec_channel *dst,
528 const union tgsi_exec_channel *src )
529 {
530 dst->f[0] =
531 dst->f[1] =
532 dst->f[2] =
533 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
534 }
535
536 static void
537 micro_ddy(
538 union tgsi_exec_channel *dst,
539 const union tgsi_exec_channel *src )
540 {
541 dst->f[0] =
542 dst->f[1] =
543 dst->f[2] =
544 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
545 }
546
547 static void
548 micro_div(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src0,
551 const union tgsi_exec_channel *src1 )
552 {
553 if (src1->f[0] != 0) {
554 dst->f[0] = src0->f[0] / src1->f[0];
555 }
556 if (src1->f[1] != 0) {
557 dst->f[1] = src0->f[1] / src1->f[1];
558 }
559 if (src1->f[2] != 0) {
560 dst->f[2] = src0->f[2] / src1->f[2];
561 }
562 if (src1->f[3] != 0) {
563 dst->f[3] = src0->f[3] / src1->f[3];
564 }
565 }
566
567 static void
568 micro_eq(
569 union tgsi_exec_channel *dst,
570 const union tgsi_exec_channel *src0,
571 const union tgsi_exec_channel *src1,
572 const union tgsi_exec_channel *src2,
573 const union tgsi_exec_channel *src3 )
574 {
575 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
576 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
577 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
578 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
579 }
580
581 static void
582 micro_exp2(
583 union tgsi_exec_channel *dst,
584 const union tgsi_exec_channel *src)
585 {
586 #if FAST_MATH
587 dst->f[0] = util_fast_exp2( src->f[0] );
588 dst->f[1] = util_fast_exp2( src->f[1] );
589 dst->f[2] = util_fast_exp2( src->f[2] );
590 dst->f[3] = util_fast_exp2( src->f[3] );
591 #else
592
593 #if DEBUG
594 /* Inf is okay for this instruction, so clamp it to silence assertions. */
595 uint i;
596 union tgsi_exec_channel clamped;
597
598 for (i = 0; i < 4; i++) {
599 if (src->f[i] > 127.99999f) {
600 clamped.f[i] = 127.99999f;
601 } else if (src->f[i] < -126.99999f) {
602 clamped.f[i] = -126.99999f;
603 } else {
604 clamped.f[i] = src->f[i];
605 }
606 }
607 src = &clamped;
608 #endif
609
610 dst->f[0] = powf( 2.0f, src->f[0] );
611 dst->f[1] = powf( 2.0f, src->f[1] );
612 dst->f[2] = powf( 2.0f, src->f[2] );
613 dst->f[3] = powf( 2.0f, src->f[3] );
614 #endif
615 }
616
617 static void
618 micro_float_clamp(union tgsi_exec_channel *dst,
619 const union tgsi_exec_channel *src)
620 {
621 uint i;
622
623 for (i = 0; i < 4; i++) {
624 if (src->f[i] > 0.0f) {
625 if (src->f[i] > 1.884467e+019f)
626 dst->f[i] = 1.884467e+019f;
627 else if (src->f[i] < 5.42101e-020f)
628 dst->f[i] = 5.42101e-020f;
629 else
630 dst->f[i] = src->f[i];
631 }
632 else {
633 if (src->f[i] < -1.884467e+019f)
634 dst->f[i] = -1.884467e+019f;
635 else if (src->f[i] > -5.42101e-020f)
636 dst->f[i] = -5.42101e-020f;
637 else
638 dst->f[i] = src->f[i];
639 }
640 }
641 }
642
643 static void
644 micro_flr(
645 union tgsi_exec_channel *dst,
646 const union tgsi_exec_channel *src )
647 {
648 dst->f[0] = floorf( src->f[0] );
649 dst->f[1] = floorf( src->f[1] );
650 dst->f[2] = floorf( src->f[2] );
651 dst->f[3] = floorf( src->f[3] );
652 }
653
654 static void
655 micro_frc(
656 union tgsi_exec_channel *dst,
657 const union tgsi_exec_channel *src )
658 {
659 dst->f[0] = src->f[0] - floorf( src->f[0] );
660 dst->f[1] = src->f[1] - floorf( src->f[1] );
661 dst->f[2] = src->f[2] - floorf( src->f[2] );
662 dst->f[3] = src->f[3] - floorf( src->f[3] );
663 }
664
665 static void
666 micro_lg2(
667 union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src )
669 {
670 #if FAST_MATH
671 dst->f[0] = util_fast_log2( src->f[0] );
672 dst->f[1] = util_fast_log2( src->f[1] );
673 dst->f[2] = util_fast_log2( src->f[2] );
674 dst->f[3] = util_fast_log2( src->f[3] );
675 #else
676 dst->f[0] = logf( src->f[0] ) * 1.442695f;
677 dst->f[1] = logf( src->f[1] ) * 1.442695f;
678 dst->f[2] = logf( src->f[2] ) * 1.442695f;
679 dst->f[3] = logf( src->f[3] ) * 1.442695f;
680 #endif
681 }
682
683 static void
684 micro_le(
685 union tgsi_exec_channel *dst,
686 const union tgsi_exec_channel *src0,
687 const union tgsi_exec_channel *src1,
688 const union tgsi_exec_channel *src2,
689 const union tgsi_exec_channel *src3 )
690 {
691 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
692 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
693 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
694 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
695 }
696
697 static void
698 micro_lt(
699 union tgsi_exec_channel *dst,
700 const union tgsi_exec_channel *src0,
701 const union tgsi_exec_channel *src1,
702 const union tgsi_exec_channel *src2,
703 const union tgsi_exec_channel *src3 )
704 {
705 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
706 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
707 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
708 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
709 }
710
711 static void
712 micro_max(
713 union tgsi_exec_channel *dst,
714 const union tgsi_exec_channel *src0,
715 const union tgsi_exec_channel *src1 )
716 {
717 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
718 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
719 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
720 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
721 }
722
723 static void
724 micro_min(
725 union tgsi_exec_channel *dst,
726 const union tgsi_exec_channel *src0,
727 const union tgsi_exec_channel *src1 )
728 {
729 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
730 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
731 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
732 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
733 }
734
735 static void
736 micro_mul(
737 union tgsi_exec_channel *dst,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1 )
740 {
741 dst->f[0] = src0->f[0] * src1->f[0];
742 dst->f[1] = src0->f[1] * src1->f[1];
743 dst->f[2] = src0->f[2] * src1->f[2];
744 dst->f[3] = src0->f[3] * src1->f[3];
745 }
746
747 #if 0
748 static void
749 micro_imul64(
750 union tgsi_exec_channel *dst0,
751 union tgsi_exec_channel *dst1,
752 const union tgsi_exec_channel *src0,
753 const union tgsi_exec_channel *src1 )
754 {
755 dst1->i[0] = src0->i[0] * src1->i[0];
756 dst1->i[1] = src0->i[1] * src1->i[1];
757 dst1->i[2] = src0->i[2] * src1->i[2];
758 dst1->i[3] = src0->i[3] * src1->i[3];
759 dst0->i[0] = 0;
760 dst0->i[1] = 0;
761 dst0->i[2] = 0;
762 dst0->i[3] = 0;
763 }
764 #endif
765
766 #if 0
767 static void
768 micro_umul64(
769 union tgsi_exec_channel *dst0,
770 union tgsi_exec_channel *dst1,
771 const union tgsi_exec_channel *src0,
772 const union tgsi_exec_channel *src1 )
773 {
774 dst1->u[0] = src0->u[0] * src1->u[0];
775 dst1->u[1] = src0->u[1] * src1->u[1];
776 dst1->u[2] = src0->u[2] * src1->u[2];
777 dst1->u[3] = src0->u[3] * src1->u[3];
778 dst0->u[0] = 0;
779 dst0->u[1] = 0;
780 dst0->u[2] = 0;
781 dst0->u[3] = 0;
782 }
783 #endif
784
785
786 #if 0
787 static void
788 micro_movc(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src0,
791 const union tgsi_exec_channel *src1,
792 const union tgsi_exec_channel *src2 )
793 {
794 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
795 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
796 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
797 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
798 }
799 #endif
800
801 static void
802 micro_neg(
803 union tgsi_exec_channel *dst,
804 const union tgsi_exec_channel *src )
805 {
806 dst->f[0] = -src->f[0];
807 dst->f[1] = -src->f[1];
808 dst->f[2] = -src->f[2];
809 dst->f[3] = -src->f[3];
810 }
811
812 static void
813 micro_pow(
814 union tgsi_exec_channel *dst,
815 const union tgsi_exec_channel *src0,
816 const union tgsi_exec_channel *src1 )
817 {
818 #if FAST_MATH
819 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
820 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
821 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
822 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
823 #else
824 dst->f[0] = powf( src0->f[0], src1->f[0] );
825 dst->f[1] = powf( src0->f[1], src1->f[1] );
826 dst->f[2] = powf( src0->f[2], src1->f[2] );
827 dst->f[3] = powf( src0->f[3], src1->f[3] );
828 #endif
829 }
830
831 static void
832 micro_rnd(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = floorf( src->f[0] + 0.5f );
837 dst->f[1] = floorf( src->f[1] + 0.5f );
838 dst->f[2] = floorf( src->f[2] + 0.5f );
839 dst->f[3] = floorf( src->f[3] + 0.5f );
840 }
841
842 static void
843 micro_sgn(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src )
846 {
847 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
848 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
849 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
850 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
851 }
852
853 static void
854 micro_trunc(
855 union tgsi_exec_channel *dst,
856 const union tgsi_exec_channel *src0 )
857 {
858 dst->f[0] = (float) (int) src0->f[0];
859 dst->f[1] = (float) (int) src0->f[1];
860 dst->f[2] = (float) (int) src0->f[2];
861 dst->f[3] = (float) (int) src0->f[3];
862 }
863
864 static void
865 micro_sin(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->f[0] = sinf( src->f[0] );
870 dst->f[1] = sinf( src->f[1] );
871 dst->f[2] = sinf( src->f[2] );
872 dst->f[3] = sinf( src->f[3] );
873 }
874
875 static void
876 micro_sqrt( union tgsi_exec_channel *dst,
877 const union tgsi_exec_channel *src )
878 {
879 dst->f[0] = sqrtf( src->f[0] );
880 dst->f[1] = sqrtf( src->f[1] );
881 dst->f[2] = sqrtf( src->f[2] );
882 dst->f[3] = sqrtf( src->f[3] );
883 }
884
885 static void
886 micro_sub(
887 union tgsi_exec_channel *dst,
888 const union tgsi_exec_channel *src0,
889 const union tgsi_exec_channel *src1 )
890 {
891 dst->f[0] = src0->f[0] - src1->f[0];
892 dst->f[1] = src0->f[1] - src1->f[1];
893 dst->f[2] = src0->f[2] - src1->f[2];
894 dst->f[3] = src0->f[3] - src1->f[3];
895 }
896
897 static void
898 fetch_src_file_channel(
899 const struct tgsi_exec_machine *mach,
900 const uint file,
901 const uint swizzle,
902 const union tgsi_exec_channel *index,
903 union tgsi_exec_channel *chan )
904 {
905 switch( swizzle ) {
906 case TGSI_SWIZZLE_X:
907 case TGSI_SWIZZLE_Y:
908 case TGSI_SWIZZLE_Z:
909 case TGSI_SWIZZLE_W:
910 switch( file ) {
911 case TGSI_FILE_CONSTANT:
912 assert(mach->Consts);
913 if (index->i[0] < 0)
914 chan->f[0] = 0.0f;
915 else
916 chan->f[0] = mach->Consts[index->i[0]][swizzle];
917 if (index->i[1] < 0)
918 chan->f[1] = 0.0f;
919 else
920 chan->f[1] = mach->Consts[index->i[1]][swizzle];
921 if (index->i[2] < 0)
922 chan->f[2] = 0.0f;
923 else
924 chan->f[2] = mach->Consts[index->i[2]][swizzle];
925 if (index->i[3] < 0)
926 chan->f[3] = 0.0f;
927 else
928 chan->f[3] = mach->Consts[index->i[3]][swizzle];
929 break;
930
931 case TGSI_FILE_INPUT:
932 case TGSI_FILE_SYSTEM_VALUE:
933 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
934 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
935 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
936 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
937 break;
938
939 case TGSI_FILE_TEMPORARY:
940 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
941 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
942 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
943 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
944 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
945 break;
946
947 case TGSI_FILE_IMMEDIATE:
948 assert( index->i[0] < (int) mach->ImmLimit );
949 chan->f[0] = mach->Imms[index->i[0]][swizzle];
950 assert( index->i[1] < (int) mach->ImmLimit );
951 chan->f[1] = mach->Imms[index->i[1]][swizzle];
952 assert( index->i[2] < (int) mach->ImmLimit );
953 chan->f[2] = mach->Imms[index->i[2]][swizzle];
954 assert( index->i[3] < (int) mach->ImmLimit );
955 chan->f[3] = mach->Imms[index->i[3]][swizzle];
956 break;
957
958 case TGSI_FILE_ADDRESS:
959 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
960 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
961 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
962 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
963 break;
964
965 case TGSI_FILE_PREDICATE:
966 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
967 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
968 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
969 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
970 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
971 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
972 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
973 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
974 break;
975
976 case TGSI_FILE_OUTPUT:
977 /* vertex/fragment output vars can be read too */
978 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
979 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
980 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
981 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
982 break;
983
984 default:
985 assert( 0 );
986 }
987 break;
988
989 default:
990 assert( 0 );
991 }
992 }
993
994 static void
995 fetch_source(const struct tgsi_exec_machine *mach,
996 union tgsi_exec_channel *chan,
997 const struct tgsi_full_src_register *reg,
998 const uint chan_index,
999 enum tgsi_exec_datatype src_datatype)
1000 {
1001 union tgsi_exec_channel index;
1002 uint swizzle;
1003
1004 /* We start with a direct index into a register file.
1005 *
1006 * file[1],
1007 * where:
1008 * file = Register.File
1009 * [1] = Register.Index
1010 */
1011 index.i[0] =
1012 index.i[1] =
1013 index.i[2] =
1014 index.i[3] = reg->Register.Index;
1015
1016 /* There is an extra source register that indirectly subscripts
1017 * a register file. The direct index now becomes an offset
1018 * that is being added to the indirect register.
1019 *
1020 * file[ind[2].x+1],
1021 * where:
1022 * ind = Indirect.File
1023 * [2] = Indirect.Index
1024 * .x = Indirect.SwizzleX
1025 */
1026 if (reg->Register.Indirect) {
1027 union tgsi_exec_channel index2;
1028 union tgsi_exec_channel indir_index;
1029 const uint execmask = mach->ExecMask;
1030 uint i;
1031
1032 /* which address register (always zero now) */
1033 index2.i[0] =
1034 index2.i[1] =
1035 index2.i[2] =
1036 index2.i[3] = reg->Indirect.Index;
1037
1038 /* get current value of address register[swizzle] */
1039 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1040 fetch_src_file_channel(
1041 mach,
1042 reg->Indirect.File,
1043 swizzle,
1044 &index2,
1045 &indir_index );
1046
1047 /* add value of address register to the offset */
1048 index.i[0] += indir_index.i[0];
1049 index.i[1] += indir_index.i[1];
1050 index.i[2] += indir_index.i[2];
1051 index.i[3] += indir_index.i[3];
1052
1053 /* for disabled execution channels, zero-out the index to
1054 * avoid using a potential garbage value.
1055 */
1056 for (i = 0; i < QUAD_SIZE; i++) {
1057 if ((execmask & (1 << i)) == 0)
1058 index.i[i] = 0;
1059 }
1060 }
1061
1062 /* There is an extra source register that is a second
1063 * subscript to a register file. Effectively it means that
1064 * the register file is actually a 2D array of registers.
1065 *
1066 * file[1][3] == file[1*sizeof(file[1])+3],
1067 * where:
1068 * [3] = Dimension.Index
1069 */
1070 if (reg->Register.Dimension) {
1071 /* The size of the first-order array depends on the register file type.
1072 * We need to multiply the index to the first array to get an effective,
1073 * "flat" index that points to the beginning of the second-order array.
1074 */
1075 switch (reg->Register.File) {
1076 case TGSI_FILE_INPUT:
1077 case TGSI_FILE_SYSTEM_VALUE:
1078 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1079 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1080 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1081 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1082 break;
1083 case TGSI_FILE_CONSTANT:
1084 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1085 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1086 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1087 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1088 break;
1089 default:
1090 assert( 0 );
1091 }
1092
1093 index.i[0] += reg->Dimension.Index;
1094 index.i[1] += reg->Dimension.Index;
1095 index.i[2] += reg->Dimension.Index;
1096 index.i[3] += reg->Dimension.Index;
1097
1098 /* Again, the second subscript index can be addressed indirectly
1099 * identically to the first one.
1100 * Nothing stops us from indirectly addressing the indirect register,
1101 * but there is no need for that, so we won't exercise it.
1102 *
1103 * file[1][ind[4].y+3],
1104 * where:
1105 * ind = DimIndirect.File
1106 * [4] = DimIndirect.Index
1107 * .y = DimIndirect.SwizzleX
1108 */
1109 if (reg->Dimension.Indirect) {
1110 union tgsi_exec_channel index2;
1111 union tgsi_exec_channel indir_index;
1112 const uint execmask = mach->ExecMask;
1113 uint i;
1114
1115 index2.i[0] =
1116 index2.i[1] =
1117 index2.i[2] =
1118 index2.i[3] = reg->DimIndirect.Index;
1119
1120 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1121 fetch_src_file_channel(
1122 mach,
1123 reg->DimIndirect.File,
1124 swizzle,
1125 &index2,
1126 &indir_index );
1127
1128 index.i[0] += indir_index.i[0];
1129 index.i[1] += indir_index.i[1];
1130 index.i[2] += indir_index.i[2];
1131 index.i[3] += indir_index.i[3];
1132
1133 /* for disabled execution channels, zero-out the index to
1134 * avoid using a potential garbage value.
1135 */
1136 for (i = 0; i < QUAD_SIZE; i++) {
1137 if ((execmask & (1 << i)) == 0)
1138 index.i[i] = 0;
1139 }
1140 }
1141
1142 /* If by any chance there was a need for a 3D array of register
1143 * files, we would have to check whether Dimension is followed
1144 * by a dimension register and continue the saga.
1145 */
1146 }
1147
1148 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1149 fetch_src_file_channel(
1150 mach,
1151 reg->Register.File,
1152 swizzle,
1153 &index,
1154 chan );
1155
1156 if (reg->Register.Absolute) {
1157 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1158 micro_abs(chan, chan);
1159 } else {
1160 micro_iabs(chan, chan);
1161 }
1162 }
1163
1164 if (reg->Register.Negate) {
1165 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1166 micro_neg(chan, chan);
1167 } else {
1168 micro_ineg(chan, chan);
1169 }
1170 }
1171 }
1172
1173 static void
1174 store_dest(struct tgsi_exec_machine *mach,
1175 const union tgsi_exec_channel *chan,
1176 const struct tgsi_full_dst_register *reg,
1177 const struct tgsi_full_instruction *inst,
1178 uint chan_index,
1179 enum tgsi_exec_datatype dst_datatype)
1180 {
1181 uint i;
1182 union tgsi_exec_channel null;
1183 union tgsi_exec_channel *dst;
1184 uint execmask = mach->ExecMask;
1185 int offset = 0; /* indirection offset */
1186 int index;
1187
1188 if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1189 CHECK_INF_OR_NAN(chan);
1190 }
1191
1192 /* There is an extra source register that indirectly subscripts
1193 * a register file. The direct index now becomes an offset
1194 * that is being added to the indirect register.
1195 *
1196 * file[ind[2].x+1],
1197 * where:
1198 * ind = Indirect.File
1199 * [2] = Indirect.Index
1200 * .x = Indirect.SwizzleX
1201 */
1202 if (reg->Register.Indirect) {
1203 union tgsi_exec_channel index;
1204 union tgsi_exec_channel indir_index;
1205 uint swizzle;
1206
1207 /* which address register (always zero for now) */
1208 index.i[0] =
1209 index.i[1] =
1210 index.i[2] =
1211 index.i[3] = reg->Indirect.Index;
1212
1213 /* get current value of address register[swizzle] */
1214 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1215
1216 /* fetch values from the address/indirection register */
1217 fetch_src_file_channel(
1218 mach,
1219 reg->Indirect.File,
1220 swizzle,
1221 &index,
1222 &indir_index );
1223
1224 /* save indirection offset */
1225 offset = indir_index.i[0];
1226 }
1227
1228 switch (reg->Register.File) {
1229 case TGSI_FILE_NULL:
1230 dst = &null;
1231 break;
1232
1233 case TGSI_FILE_OUTPUT:
1234 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1235 + reg->Register.Index;
1236 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1237 #if 0
1238 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1239 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1240 for (i = 0; i < QUAD_SIZE; i++)
1241 if (execmask & (1 << i))
1242 fprintf(stderr, "%f, ", chan->f[i]);
1243 fprintf(stderr, ")\n");
1244 }
1245 #endif
1246 break;
1247
1248 case TGSI_FILE_TEMPORARY:
1249 index = reg->Register.Index;
1250 assert( index < TGSI_EXEC_NUM_TEMPS );
1251 dst = &mach->Temps[offset + index].xyzw[chan_index];
1252 break;
1253
1254 case TGSI_FILE_ADDRESS:
1255 index = reg->Register.Index;
1256 dst = &mach->Addrs[index].xyzw[chan_index];
1257 break;
1258
1259 case TGSI_FILE_LOOP:
1260 assert(reg->Register.Index == 0);
1261 assert(mach->LoopCounterStackTop > 0);
1262 assert(chan_index == CHAN_X);
1263 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1264 break;
1265
1266 case TGSI_FILE_PREDICATE:
1267 index = reg->Register.Index;
1268 assert(index < TGSI_EXEC_NUM_PREDS);
1269 dst = &mach->Predicates[index].xyzw[chan_index];
1270 break;
1271
1272 default:
1273 assert( 0 );
1274 return;
1275 }
1276
1277 if (inst->Instruction.Predicate) {
1278 uint swizzle;
1279 union tgsi_exec_channel *pred;
1280
1281 switch (chan_index) {
1282 case CHAN_X:
1283 swizzle = inst->Predicate.SwizzleX;
1284 break;
1285 case CHAN_Y:
1286 swizzle = inst->Predicate.SwizzleY;
1287 break;
1288 case CHAN_Z:
1289 swizzle = inst->Predicate.SwizzleZ;
1290 break;
1291 case CHAN_W:
1292 swizzle = inst->Predicate.SwizzleW;
1293 break;
1294 default:
1295 assert(0);
1296 return;
1297 }
1298
1299 assert(inst->Predicate.Index == 0);
1300
1301 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1302
1303 if (inst->Predicate.Negate) {
1304 for (i = 0; i < QUAD_SIZE; i++) {
1305 if (pred->u[i]) {
1306 execmask &= ~(1 << i);
1307 }
1308 }
1309 } else {
1310 for (i = 0; i < QUAD_SIZE; i++) {
1311 if (!pred->u[i]) {
1312 execmask &= ~(1 << i);
1313 }
1314 }
1315 }
1316 }
1317
1318 switch (inst->Instruction.Saturate) {
1319 case TGSI_SAT_NONE:
1320 for (i = 0; i < QUAD_SIZE; i++)
1321 if (execmask & (1 << i))
1322 dst->i[i] = chan->i[i];
1323 break;
1324
1325 case TGSI_SAT_ZERO_ONE:
1326 for (i = 0; i < QUAD_SIZE; i++)
1327 if (execmask & (1 << i)) {
1328 if (chan->f[i] < 0.0f)
1329 dst->f[i] = 0.0f;
1330 else if (chan->f[i] > 1.0f)
1331 dst->f[i] = 1.0f;
1332 else
1333 dst->i[i] = chan->i[i];
1334 }
1335 break;
1336
1337 case TGSI_SAT_MINUS_PLUS_ONE:
1338 for (i = 0; i < QUAD_SIZE; i++)
1339 if (execmask & (1 << i)) {
1340 if (chan->f[i] < -1.0f)
1341 dst->f[i] = -1.0f;
1342 else if (chan->f[i] > 1.0f)
1343 dst->f[i] = 1.0f;
1344 else
1345 dst->i[i] = chan->i[i];
1346 }
1347 break;
1348
1349 default:
1350 assert( 0 );
1351 }
1352 }
1353
1354 #define FETCH(VAL,INDEX,CHAN)\
1355 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1356
1357 #define STORE(VAL,INDEX,CHAN)\
1358 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1359
1360
1361 /**
1362 * Execute ARB-style KIL which is predicated by a src register.
1363 * Kill fragment if any of the four values is less than zero.
1364 */
1365 static void
1366 exec_kil(struct tgsi_exec_machine *mach,
1367 const struct tgsi_full_instruction *inst)
1368 {
1369 uint uniquemask;
1370 uint chan_index;
1371 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1372 union tgsi_exec_channel r[1];
1373
1374 /* This mask stores component bits that were already tested. */
1375 uniquemask = 0;
1376
1377 for (chan_index = 0; chan_index < 4; chan_index++)
1378 {
1379 uint swizzle;
1380 uint i;
1381
1382 /* unswizzle channel */
1383 swizzle = tgsi_util_get_full_src_register_swizzle (
1384 &inst->Src[0],
1385 chan_index);
1386
1387 /* check if the component has not been already tested */
1388 if (uniquemask & (1 << swizzle))
1389 continue;
1390 uniquemask |= 1 << swizzle;
1391
1392 FETCH(&r[0], 0, chan_index);
1393 for (i = 0; i < 4; i++)
1394 if (r[0].f[i] < 0.0f)
1395 kilmask |= 1 << i;
1396 }
1397
1398 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1399 }
1400
1401 /**
1402 * Execute NVIDIA-style KIL which is predicated by a condition code.
1403 * Kill fragment if the condition code is TRUE.
1404 */
1405 static void
1406 exec_kilp(struct tgsi_exec_machine *mach,
1407 const struct tgsi_full_instruction *inst)
1408 {
1409 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1410
1411 /* "unconditional" kil */
1412 kilmask = mach->ExecMask;
1413 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1414 }
1415
1416 static void
1417 emit_vertex(struct tgsi_exec_machine *mach)
1418 {
1419 /* FIXME: check for exec mask correctly
1420 unsigned i;
1421 for (i = 0; i < QUAD_SIZE; ++i) {
1422 if ((mach->ExecMask & (1 << i)))
1423 */
1424 if (mach->ExecMask) {
1425 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1426 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1427 }
1428 }
1429
1430 static void
1431 emit_primitive(struct tgsi_exec_machine *mach)
1432 {
1433 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1434 /* FIXME: check for exec mask correctly
1435 unsigned i;
1436 for (i = 0; i < QUAD_SIZE; ++i) {
1437 if ((mach->ExecMask & (1 << i)))
1438 */
1439 if (mach->ExecMask) {
1440 ++(*prim_count);
1441 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1442 mach->Primitives[*prim_count] = 0;
1443 }
1444 }
1445
1446 /*
1447 * Fetch a four texture samples using STR texture coordinates.
1448 */
1449 static void
1450 fetch_texel( struct tgsi_sampler *sampler,
1451 const union tgsi_exec_channel *s,
1452 const union tgsi_exec_channel *t,
1453 const union tgsi_exec_channel *p,
1454 float lodbias, /* XXX should be float[4] */
1455 union tgsi_exec_channel *r,
1456 union tgsi_exec_channel *g,
1457 union tgsi_exec_channel *b,
1458 union tgsi_exec_channel *a )
1459 {
1460 uint j;
1461 float rgba[NUM_CHANNELS][QUAD_SIZE];
1462
1463 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1464
1465 for (j = 0; j < 4; j++) {
1466 r->f[j] = rgba[0][j];
1467 g->f[j] = rgba[1][j];
1468 b->f[j] = rgba[2][j];
1469 a->f[j] = rgba[3][j];
1470 }
1471 }
1472
1473
1474 static void
1475 exec_tex(struct tgsi_exec_machine *mach,
1476 const struct tgsi_full_instruction *inst,
1477 boolean biasLod,
1478 boolean projected)
1479 {
1480 const uint unit = inst->Src[1].Register.Index;
1481 union tgsi_exec_channel r[4];
1482 uint chan_index;
1483 float lodBias;
1484
1485 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1486
1487 switch (inst->Texture.Texture) {
1488 case TGSI_TEXTURE_1D:
1489 case TGSI_TEXTURE_SHADOW1D:
1490
1491 FETCH(&r[0], 0, CHAN_X);
1492
1493 if (projected) {
1494 FETCH(&r[1], 0, CHAN_W);
1495 micro_div( &r[0], &r[0], &r[1] );
1496 }
1497
1498 if (biasLod) {
1499 FETCH(&r[1], 0, CHAN_W);
1500 lodBias = r[2].f[0];
1501 }
1502 else
1503 lodBias = 0.0;
1504
1505 fetch_texel(mach->Samplers[unit],
1506 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1507 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1508 break;
1509
1510 case TGSI_TEXTURE_2D:
1511 case TGSI_TEXTURE_RECT:
1512 case TGSI_TEXTURE_SHADOW2D:
1513 case TGSI_TEXTURE_SHADOWRECT:
1514
1515 FETCH(&r[0], 0, CHAN_X);
1516 FETCH(&r[1], 0, CHAN_Y);
1517 FETCH(&r[2], 0, CHAN_Z);
1518
1519 if (projected) {
1520 FETCH(&r[3], 0, CHAN_W);
1521 micro_div( &r[0], &r[0], &r[3] );
1522 micro_div( &r[1], &r[1], &r[3] );
1523 micro_div( &r[2], &r[2], &r[3] );
1524 }
1525
1526 if (biasLod) {
1527 FETCH(&r[3], 0, CHAN_W);
1528 lodBias = r[3].f[0];
1529 }
1530 else
1531 lodBias = 0.0;
1532
1533 fetch_texel(mach->Samplers[unit],
1534 &r[0], &r[1], &r[2], lodBias, /* inputs */
1535 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1536 break;
1537
1538 case TGSI_TEXTURE_3D:
1539 case TGSI_TEXTURE_CUBE:
1540
1541 FETCH(&r[0], 0, CHAN_X);
1542 FETCH(&r[1], 0, CHAN_Y);
1543 FETCH(&r[2], 0, CHAN_Z);
1544
1545 if (projected) {
1546 FETCH(&r[3], 0, CHAN_W);
1547 micro_div( &r[0], &r[0], &r[3] );
1548 micro_div( &r[1], &r[1], &r[3] );
1549 micro_div( &r[2], &r[2], &r[3] );
1550 }
1551
1552 if (biasLod) {
1553 FETCH(&r[3], 0, CHAN_W);
1554 lodBias = r[3].f[0];
1555 }
1556 else
1557 lodBias = 0.0;
1558
1559 fetch_texel(mach->Samplers[unit],
1560 &r[0], &r[1], &r[2], lodBias,
1561 &r[0], &r[1], &r[2], &r[3]);
1562 break;
1563
1564 default:
1565 assert (0);
1566 }
1567
1568 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1569 STORE( &r[chan_index], 0, chan_index );
1570 }
1571 }
1572
1573 static void
1574 exec_txd(struct tgsi_exec_machine *mach,
1575 const struct tgsi_full_instruction *inst)
1576 {
1577 const uint unit = inst->Src[3].Register.Index;
1578 union tgsi_exec_channel r[4];
1579 uint chan_index;
1580
1581 /*
1582 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1583 */
1584
1585 switch (inst->Texture.Texture) {
1586 case TGSI_TEXTURE_1D:
1587 case TGSI_TEXTURE_SHADOW1D:
1588
1589 FETCH(&r[0], 0, CHAN_X);
1590
1591 fetch_texel(mach->Samplers[unit],
1592 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1593 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1594 break;
1595
1596 case TGSI_TEXTURE_2D:
1597 case TGSI_TEXTURE_RECT:
1598 case TGSI_TEXTURE_SHADOW2D:
1599 case TGSI_TEXTURE_SHADOWRECT:
1600
1601 FETCH(&r[0], 0, CHAN_X);
1602 FETCH(&r[1], 0, CHAN_Y);
1603 FETCH(&r[2], 0, CHAN_Z);
1604
1605 fetch_texel(mach->Samplers[unit],
1606 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1607 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1608 break;
1609
1610 case TGSI_TEXTURE_3D:
1611 case TGSI_TEXTURE_CUBE:
1612
1613 FETCH(&r[0], 0, CHAN_X);
1614 FETCH(&r[1], 0, CHAN_Y);
1615 FETCH(&r[2], 0, CHAN_Z);
1616
1617 fetch_texel(mach->Samplers[unit],
1618 &r[0], &r[1], &r[2], 0.0f,
1619 &r[0], &r[1], &r[2], &r[3]);
1620 break;
1621
1622 default:
1623 assert(0);
1624 }
1625
1626 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1627 STORE(&r[chan_index], 0, chan_index);
1628 }
1629 }
1630
1631
1632 /**
1633 * Evaluate a constant-valued coefficient at the position of the
1634 * current quad.
1635 */
1636 static void
1637 eval_constant_coef(
1638 struct tgsi_exec_machine *mach,
1639 unsigned attrib,
1640 unsigned chan )
1641 {
1642 unsigned i;
1643
1644 for( i = 0; i < QUAD_SIZE; i++ ) {
1645 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1646 }
1647 }
1648
1649 /**
1650 * Evaluate a linear-valued coefficient at the position of the
1651 * current quad.
1652 */
1653 static void
1654 eval_linear_coef(
1655 struct tgsi_exec_machine *mach,
1656 unsigned attrib,
1657 unsigned chan )
1658 {
1659 const float x = mach->QuadPos.xyzw[0].f[0];
1660 const float y = mach->QuadPos.xyzw[1].f[0];
1661 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1662 const float dady = mach->InterpCoefs[attrib].dady[chan];
1663 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1664 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1665 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1666 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1667 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1668 }
1669
1670 /**
1671 * Evaluate a perspective-valued coefficient at the position of the
1672 * current quad.
1673 */
1674 static void
1675 eval_perspective_coef(
1676 struct tgsi_exec_machine *mach,
1677 unsigned attrib,
1678 unsigned chan )
1679 {
1680 const float x = mach->QuadPos.xyzw[0].f[0];
1681 const float y = mach->QuadPos.xyzw[1].f[0];
1682 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1683 const float dady = mach->InterpCoefs[attrib].dady[chan];
1684 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1685 const float *w = mach->QuadPos.xyzw[3].f;
1686 /* divide by W here */
1687 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1688 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1689 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1690 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1691 }
1692
1693
1694 typedef void (* eval_coef_func)(
1695 struct tgsi_exec_machine *mach,
1696 unsigned attrib,
1697 unsigned chan );
1698
1699 static void
1700 exec_declaration(struct tgsi_exec_machine *mach,
1701 const struct tgsi_full_declaration *decl)
1702 {
1703 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1704 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1705 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1706 uint first, last, mask;
1707
1708 first = decl->Range.First;
1709 last = decl->Range.Last;
1710 mask = decl->Declaration.UsageMask;
1711
1712 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1713 assert(decl->Semantic.Index == 0);
1714 assert(first == last);
1715 assert(mask == TGSI_WRITEMASK_XYZW);
1716
1717 mach->Inputs[first] = mach->QuadPos;
1718 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1719 uint i;
1720
1721 assert(decl->Semantic.Index == 0);
1722 assert(first == last);
1723
1724 for (i = 0; i < QUAD_SIZE; i++) {
1725 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1726 }
1727 } else {
1728 eval_coef_func eval;
1729 uint i, j;
1730
1731 switch (decl->Declaration.Interpolate) {
1732 case TGSI_INTERPOLATE_CONSTANT:
1733 eval = eval_constant_coef;
1734 break;
1735
1736 case TGSI_INTERPOLATE_LINEAR:
1737 eval = eval_linear_coef;
1738 break;
1739
1740 case TGSI_INTERPOLATE_PERSPECTIVE:
1741 eval = eval_perspective_coef;
1742 break;
1743
1744 default:
1745 assert(0);
1746 return;
1747 }
1748
1749 for (j = 0; j < NUM_CHANNELS; j++) {
1750 if (mask & (1 << j)) {
1751 for (i = first; i <= last; i++) {
1752 eval(mach, i, j);
1753 }
1754 }
1755 }
1756 }
1757 }
1758 }
1759 }
1760
1761 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1762 const union tgsi_exec_channel *src);
1763
1764 static void
1765 exec_vector_unary(struct tgsi_exec_machine *mach,
1766 const struct tgsi_full_instruction *inst,
1767 micro_op op,
1768 enum tgsi_exec_datatype dst_datatype,
1769 enum tgsi_exec_datatype src_datatype)
1770 {
1771 unsigned int chan;
1772 struct tgsi_exec_vector dst;
1773
1774 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1775 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1776 union tgsi_exec_channel src;
1777
1778 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1779 op(&dst.xyzw[chan], &src);
1780 }
1781 }
1782 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1783 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1784 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1785 }
1786 }
1787 }
1788
1789 static void
1790 exec_vector_binary(struct tgsi_exec_machine *mach,
1791 const struct tgsi_full_instruction *inst,
1792 micro_op op,
1793 enum tgsi_exec_datatype dst_datatype,
1794 enum tgsi_exec_datatype src_datatype)
1795 {
1796 unsigned int chan;
1797 struct tgsi_exec_vector dst;
1798
1799 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1800 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1801 union tgsi_exec_channel src[2];
1802
1803 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1804 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1805 op(&dst.xyzw[chan], src);
1806 }
1807 }
1808 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1809 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1810 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1811 }
1812 }
1813 }
1814
1815 static void
1816 exec_vector_trinary(struct tgsi_exec_machine *mach,
1817 const struct tgsi_full_instruction *inst,
1818 micro_op op,
1819 enum tgsi_exec_datatype dst_datatype,
1820 enum tgsi_exec_datatype src_datatype)
1821 {
1822 unsigned int chan;
1823 struct tgsi_exec_vector dst;
1824
1825 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1826 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1827 union tgsi_exec_channel src[3];
1828
1829 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1830 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1831 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1832 op(&dst.xyzw[chan], src);
1833 }
1834 }
1835 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1836 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1837 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1838 }
1839 }
1840 }
1841
1842 static void
1843 exec_break(struct tgsi_exec_machine *mach)
1844 {
1845 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
1846 /* turn off loop channels for each enabled exec channel */
1847 mach->LoopMask &= ~mach->ExecMask;
1848 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1849 UPDATE_EXEC_MASK(mach);
1850 } else {
1851 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
1852
1853 mach->Switch.mask = 0x0;
1854
1855 UPDATE_EXEC_MASK(mach);
1856 }
1857 }
1858
1859 static void
1860 exec_switch(struct tgsi_exec_machine *mach,
1861 const struct tgsi_full_instruction *inst)
1862 {
1863 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
1864 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
1865
1866 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
1867 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1868 mach->Switch.mask = 0x0;
1869 mach->Switch.defaultMask = 0x0;
1870
1871 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
1872 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
1873
1874 UPDATE_EXEC_MASK(mach);
1875 }
1876
1877 static void
1878 exec_case(struct tgsi_exec_machine *mach,
1879 const struct tgsi_full_instruction *inst)
1880 {
1881 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1882 union tgsi_exec_channel src;
1883 uint mask = 0;
1884
1885 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1886
1887 if (mach->Switch.selector.u[0] == src.u[0]) {
1888 mask |= 0x1;
1889 }
1890 if (mach->Switch.selector.u[1] == src.u[1]) {
1891 mask |= 0x2;
1892 }
1893 if (mach->Switch.selector.u[2] == src.u[2]) {
1894 mask |= 0x4;
1895 }
1896 if (mach->Switch.selector.u[3] == src.u[3]) {
1897 mask |= 0x8;
1898 }
1899
1900 mach->Switch.defaultMask |= mask;
1901
1902 mach->Switch.mask |= mask & prevMask;
1903
1904 UPDATE_EXEC_MASK(mach);
1905 }
1906
1907 static void
1908 exec_default(struct tgsi_exec_machine *mach)
1909 {
1910 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1911
1912 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
1913
1914 UPDATE_EXEC_MASK(mach);
1915 }
1916
1917 static void
1918 exec_endswitch(struct tgsi_exec_machine *mach)
1919 {
1920 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
1921 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
1922
1923 UPDATE_EXEC_MASK(mach);
1924 }
1925
1926 static void
1927 micro_i2f(union tgsi_exec_channel *dst,
1928 const union tgsi_exec_channel *src)
1929 {
1930 dst->f[0] = (float)src->i[0];
1931 dst->f[1] = (float)src->i[1];
1932 dst->f[2] = (float)src->i[2];
1933 dst->f[3] = (float)src->i[3];
1934 }
1935
1936 static void
1937 micro_not(union tgsi_exec_channel *dst,
1938 const union tgsi_exec_channel *src)
1939 {
1940 dst->u[0] = ~src->u[0];
1941 dst->u[1] = ~src->u[1];
1942 dst->u[2] = ~src->u[2];
1943 dst->u[3] = ~src->u[3];
1944 }
1945
1946 static void
1947 micro_shl(union tgsi_exec_channel *dst,
1948 const union tgsi_exec_channel *src)
1949 {
1950 dst->u[0] = src[0].u[0] << src[1].u[0];
1951 dst->u[1] = src[0].u[1] << src[1].u[1];
1952 dst->u[2] = src[0].u[2] << src[1].u[2];
1953 dst->u[3] = src[0].u[3] << src[1].u[3];
1954 }
1955
1956 static void
1957 micro_and(union tgsi_exec_channel *dst,
1958 const union tgsi_exec_channel *src)
1959 {
1960 dst->u[0] = src[0].u[0] & src[1].u[0];
1961 dst->u[1] = src[0].u[1] & src[1].u[1];
1962 dst->u[2] = src[0].u[2] & src[1].u[2];
1963 dst->u[3] = src[0].u[3] & src[1].u[3];
1964 }
1965
1966 static void
1967 micro_or(union tgsi_exec_channel *dst,
1968 const union tgsi_exec_channel *src)
1969 {
1970 dst->u[0] = src[0].u[0] | src[1].u[0];
1971 dst->u[1] = src[0].u[1] | src[1].u[1];
1972 dst->u[2] = src[0].u[2] | src[1].u[2];
1973 dst->u[3] = src[0].u[3] | src[1].u[3];
1974 }
1975
1976 static void
1977 micro_xor(union tgsi_exec_channel *dst,
1978 const union tgsi_exec_channel *src)
1979 {
1980 dst->u[0] = src[0].u[0] ^ src[1].u[0];
1981 dst->u[1] = src[0].u[1] ^ src[1].u[1];
1982 dst->u[2] = src[0].u[2] ^ src[1].u[2];
1983 dst->u[3] = src[0].u[3] ^ src[1].u[3];
1984 }
1985
1986 static void
1987 micro_f2i(union tgsi_exec_channel *dst,
1988 const union tgsi_exec_channel *src)
1989 {
1990 dst->i[0] = (int)src->f[0];
1991 dst->i[1] = (int)src->f[1];
1992 dst->i[2] = (int)src->f[2];
1993 dst->i[3] = (int)src->f[3];
1994 }
1995
1996 static void
1997 micro_idiv(union tgsi_exec_channel *dst,
1998 const union tgsi_exec_channel *src)
1999 {
2000 dst->i[0] = src[0].i[0] / src[1].i[0];
2001 dst->i[1] = src[0].i[1] / src[1].i[1];
2002 dst->i[2] = src[0].i[2] / src[1].i[2];
2003 dst->i[3] = src[0].i[3] / src[1].i[3];
2004 }
2005
2006 static void
2007 micro_imax(union tgsi_exec_channel *dst,
2008 const union tgsi_exec_channel *src)
2009 {
2010 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
2011 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
2012 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
2013 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
2014 }
2015
2016 static void
2017 micro_imin(union tgsi_exec_channel *dst,
2018 const union tgsi_exec_channel *src)
2019 {
2020 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2021 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2022 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2023 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2024 }
2025
2026 static void
2027 micro_isge(union tgsi_exec_channel *dst,
2028 const union tgsi_exec_channel *src)
2029 {
2030 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2031 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2032 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2033 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2034 }
2035
2036 static void
2037 micro_ishr(union tgsi_exec_channel *dst,
2038 const union tgsi_exec_channel *src)
2039 {
2040 dst->i[0] = src[0].i[0] >> src[1].i[0];
2041 dst->i[1] = src[0].i[1] >> src[1].i[1];
2042 dst->i[2] = src[0].i[2] >> src[1].i[2];
2043 dst->i[3] = src[0].i[3] >> src[1].i[3];
2044 }
2045
2046 static void
2047 micro_islt(union tgsi_exec_channel *dst,
2048 const union tgsi_exec_channel *src)
2049 {
2050 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2051 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2052 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2053 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2054 }
2055
2056 static void
2057 micro_f2u(union tgsi_exec_channel *dst,
2058 const union tgsi_exec_channel *src)
2059 {
2060 dst->u[0] = (uint)src->f[0];
2061 dst->u[1] = (uint)src->f[1];
2062 dst->u[2] = (uint)src->f[2];
2063 dst->u[3] = (uint)src->f[3];
2064 }
2065
2066 static void
2067 micro_u2f(union tgsi_exec_channel *dst,
2068 const union tgsi_exec_channel *src)
2069 {
2070 dst->f[0] = (float)src->u[0];
2071 dst->f[1] = (float)src->u[1];
2072 dst->f[2] = (float)src->u[2];
2073 dst->f[3] = (float)src->u[3];
2074 }
2075
2076 static void
2077 micro_uadd(union tgsi_exec_channel *dst,
2078 const union tgsi_exec_channel *src)
2079 {
2080 dst->u[0] = src[0].u[0] + src[1].u[0];
2081 dst->u[1] = src[0].u[1] + src[1].u[1];
2082 dst->u[2] = src[0].u[2] + src[1].u[2];
2083 dst->u[3] = src[0].u[3] + src[1].u[3];
2084 }
2085
2086 static void
2087 micro_udiv(union tgsi_exec_channel *dst,
2088 const union tgsi_exec_channel *src)
2089 {
2090 dst->u[0] = src[0].u[0] / src[1].u[0];
2091 dst->u[1] = src[0].u[1] / src[1].u[1];
2092 dst->u[2] = src[0].u[2] / src[1].u[2];
2093 dst->u[3] = src[0].u[3] / src[1].u[3];
2094 }
2095
2096 static void
2097 micro_umad(union tgsi_exec_channel *dst,
2098 const union tgsi_exec_channel *src)
2099 {
2100 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2101 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2102 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2103 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2104 }
2105
2106 static void
2107 micro_umax(union tgsi_exec_channel *dst,
2108 const union tgsi_exec_channel *src)
2109 {
2110 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2111 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2112 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2113 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2114 }
2115
2116 static void
2117 micro_umin(union tgsi_exec_channel *dst,
2118 const union tgsi_exec_channel *src)
2119 {
2120 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2121 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2122 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2123 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2124 }
2125
2126 static void
2127 micro_umod(union tgsi_exec_channel *dst,
2128 const union tgsi_exec_channel *src)
2129 {
2130 dst->u[0] = src[0].u[0] % src[1].u[0];
2131 dst->u[1] = src[0].u[1] % src[1].u[1];
2132 dst->u[2] = src[0].u[2] % src[1].u[2];
2133 dst->u[3] = src[0].u[3] % src[1].u[3];
2134 }
2135
2136 static void
2137 micro_umul(union tgsi_exec_channel *dst,
2138 const union tgsi_exec_channel *src)
2139 {
2140 dst->u[0] = src[0].u[0] * src[1].u[0];
2141 dst->u[1] = src[0].u[1] * src[1].u[1];
2142 dst->u[2] = src[0].u[2] * src[1].u[2];
2143 dst->u[3] = src[0].u[3] * src[1].u[3];
2144 }
2145
2146 static void
2147 micro_useq(union tgsi_exec_channel *dst,
2148 const union tgsi_exec_channel *src)
2149 {
2150 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2151 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2152 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2153 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2154 }
2155
2156 static void
2157 micro_usge(union tgsi_exec_channel *dst,
2158 const union tgsi_exec_channel *src)
2159 {
2160 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2161 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2162 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2163 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2164 }
2165
2166 static void
2167 micro_ushr(union tgsi_exec_channel *dst,
2168 const union tgsi_exec_channel *src)
2169 {
2170 dst->u[0] = src[0].u[0] >> src[1].u[0];
2171 dst->u[1] = src[0].u[1] >> src[1].u[1];
2172 dst->u[2] = src[0].u[2] >> src[1].u[2];
2173 dst->u[3] = src[0].u[3] >> src[1].u[3];
2174 }
2175
2176 static void
2177 micro_uslt(union tgsi_exec_channel *dst,
2178 const union tgsi_exec_channel *src)
2179 {
2180 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2181 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2182 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2183 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2184 }
2185
2186 static void
2187 micro_usne(union tgsi_exec_channel *dst,
2188 const union tgsi_exec_channel *src)
2189 {
2190 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2191 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2192 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2193 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2194 }
2195
2196 static void
2197 exec_instruction(
2198 struct tgsi_exec_machine *mach,
2199 const struct tgsi_full_instruction *inst,
2200 int *pc )
2201 {
2202 uint chan_index;
2203 union tgsi_exec_channel r[10];
2204 union tgsi_exec_channel d[8];
2205
2206 (*pc)++;
2207
2208 switch (inst->Instruction.Opcode) {
2209 case TGSI_OPCODE_ARL:
2210 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2211 break;
2212
2213 case TGSI_OPCODE_MOV:
2214 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2215 break;
2216
2217 case TGSI_OPCODE_LIT:
2218 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2219 FETCH( &r[0], 0, CHAN_X );
2220 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2221 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2222 }
2223
2224 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2225 FETCH( &r[1], 0, CHAN_Y );
2226 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2227
2228 FETCH( &r[2], 0, CHAN_W );
2229 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2230 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2231 micro_pow( &r[1], &r[1], &r[2] );
2232 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2233 }
2234
2235 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2236 STORE(&d[CHAN_Y], 0, CHAN_Y);
2237 }
2238 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2239 STORE(&d[CHAN_Z], 0, CHAN_Z);
2240 }
2241 }
2242 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2243 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2244 }
2245 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2246 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2247 }
2248 break;
2249
2250 case TGSI_OPCODE_RCP:
2251 /* TGSI_OPCODE_RECIP */
2252 FETCH( &r[0], 0, CHAN_X );
2253 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2255 STORE( &r[0], 0, chan_index );
2256 }
2257 break;
2258
2259 case TGSI_OPCODE_RSQ:
2260 /* TGSI_OPCODE_RECIPSQRT */
2261 FETCH( &r[0], 0, CHAN_X );
2262 micro_abs( &r[0], &r[0] );
2263 micro_sqrt( &r[0], &r[0] );
2264 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2265 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2266 STORE( &r[0], 0, chan_index );
2267 }
2268 break;
2269
2270 case TGSI_OPCODE_EXP:
2271 FETCH( &r[0], 0, CHAN_X );
2272 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2273 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2274 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2275 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2276 }
2277 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2278 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2279 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2280 }
2281 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2282 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2283 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2284 }
2285 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2286 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2287 }
2288 break;
2289
2290 case TGSI_OPCODE_LOG:
2291 FETCH( &r[0], 0, CHAN_X );
2292 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2293 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2294 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2295 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2296 STORE( &r[0], 0, CHAN_X );
2297 }
2298 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2299 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2300 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2301 STORE( &r[0], 0, CHAN_Y );
2302 }
2303 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2304 STORE( &r[1], 0, CHAN_Z );
2305 }
2306 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2307 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2308 }
2309 break;
2310
2311 case TGSI_OPCODE_MUL:
2312 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2313 FETCH(&r[0], 0, chan_index);
2314 FETCH(&r[1], 1, chan_index);
2315 micro_mul(&d[chan_index], &r[0], &r[1]);
2316 }
2317 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2318 STORE(&d[chan_index], 0, chan_index);
2319 }
2320 break;
2321
2322 case TGSI_OPCODE_ADD:
2323 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2324 FETCH( &r[0], 0, chan_index );
2325 FETCH( &r[1], 1, chan_index );
2326 micro_add(&d[chan_index], &r[0], &r[1]);
2327 }
2328 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2329 STORE(&d[chan_index], 0, chan_index);
2330 }
2331 break;
2332
2333 case TGSI_OPCODE_DP3:
2334 /* TGSI_OPCODE_DOT3 */
2335 FETCH( &r[0], 0, CHAN_X );
2336 FETCH( &r[1], 1, CHAN_X );
2337 micro_mul( &r[0], &r[0], &r[1] );
2338
2339 FETCH( &r[1], 0, CHAN_Y );
2340 FETCH( &r[2], 1, CHAN_Y );
2341 micro_mul( &r[1], &r[1], &r[2] );
2342 micro_add( &r[0], &r[0], &r[1] );
2343
2344 FETCH( &r[1], 0, CHAN_Z );
2345 FETCH( &r[2], 1, CHAN_Z );
2346 micro_mul( &r[1], &r[1], &r[2] );
2347 micro_add( &r[0], &r[0], &r[1] );
2348
2349 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2350 STORE( &r[0], 0, chan_index );
2351 }
2352 break;
2353
2354 case TGSI_OPCODE_DP4:
2355 /* TGSI_OPCODE_DOT4 */
2356 FETCH(&r[0], 0, CHAN_X);
2357 FETCH(&r[1], 1, CHAN_X);
2358
2359 micro_mul( &r[0], &r[0], &r[1] );
2360
2361 FETCH(&r[1], 0, CHAN_Y);
2362 FETCH(&r[2], 1, CHAN_Y);
2363
2364 micro_mul( &r[1], &r[1], &r[2] );
2365 micro_add( &r[0], &r[0], &r[1] );
2366
2367 FETCH(&r[1], 0, CHAN_Z);
2368 FETCH(&r[2], 1, CHAN_Z);
2369
2370 micro_mul( &r[1], &r[1], &r[2] );
2371 micro_add( &r[0], &r[0], &r[1] );
2372
2373 FETCH(&r[1], 0, CHAN_W);
2374 FETCH(&r[2], 1, CHAN_W);
2375
2376 micro_mul( &r[1], &r[1], &r[2] );
2377 micro_add( &r[0], &r[0], &r[1] );
2378
2379 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2380 STORE( &r[0], 0, chan_index );
2381 }
2382 break;
2383
2384 case TGSI_OPCODE_DST:
2385 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2386 FETCH( &r[0], 0, CHAN_Y );
2387 FETCH( &r[1], 1, CHAN_Y);
2388 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2389 }
2390 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2391 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2392 }
2393 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2394 FETCH(&d[CHAN_W], 1, CHAN_W);
2395 }
2396
2397 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2398 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2399 }
2400 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2401 STORE(&d[CHAN_Y], 0, CHAN_Y);
2402 }
2403 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2404 STORE(&d[CHAN_Z], 0, CHAN_Z);
2405 }
2406 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2407 STORE(&d[CHAN_W], 0, CHAN_W);
2408 }
2409 break;
2410
2411 case TGSI_OPCODE_MIN:
2412 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2413 FETCH(&r[0], 0, chan_index);
2414 FETCH(&r[1], 1, chan_index);
2415
2416 /* XXX use micro_min()?? */
2417 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2418 }
2419 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2420 STORE(&d[chan_index], 0, chan_index);
2421 }
2422 break;
2423
2424 case TGSI_OPCODE_MAX:
2425 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2426 FETCH(&r[0], 0, chan_index);
2427 FETCH(&r[1], 1, chan_index);
2428
2429 /* XXX use micro_max()?? */
2430 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2431 }
2432 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2433 STORE(&d[chan_index], 0, chan_index);
2434 }
2435 break;
2436
2437 case TGSI_OPCODE_SLT:
2438 /* TGSI_OPCODE_SETLT */
2439 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2440 FETCH( &r[0], 0, chan_index );
2441 FETCH( &r[1], 1, chan_index );
2442 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2443 }
2444 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2445 STORE(&d[chan_index], 0, chan_index);
2446 }
2447 break;
2448
2449 case TGSI_OPCODE_SGE:
2450 /* TGSI_OPCODE_SETGE */
2451 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2452 FETCH( &r[0], 0, chan_index );
2453 FETCH( &r[1], 1, chan_index );
2454 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2455 }
2456 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2457 STORE(&d[chan_index], 0, chan_index);
2458 }
2459 break;
2460
2461 case TGSI_OPCODE_MAD:
2462 /* TGSI_OPCODE_MADD */
2463 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2464 FETCH( &r[0], 0, chan_index );
2465 FETCH( &r[1], 1, chan_index );
2466 micro_mul( &r[0], &r[0], &r[1] );
2467 FETCH( &r[1], 2, chan_index );
2468 micro_add(&d[chan_index], &r[0], &r[1]);
2469 }
2470 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2471 STORE(&d[chan_index], 0, chan_index);
2472 }
2473 break;
2474
2475 case TGSI_OPCODE_SUB:
2476 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2477 FETCH(&r[0], 0, chan_index);
2478 FETCH(&r[1], 1, chan_index);
2479 micro_sub(&d[chan_index], &r[0], &r[1]);
2480 }
2481 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2482 STORE(&d[chan_index], 0, chan_index);
2483 }
2484 break;
2485
2486 case TGSI_OPCODE_LRP:
2487 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2488 FETCH(&r[0], 0, chan_index);
2489 FETCH(&r[1], 1, chan_index);
2490 FETCH(&r[2], 2, chan_index);
2491 micro_sub( &r[1], &r[1], &r[2] );
2492 micro_mul( &r[0], &r[0], &r[1] );
2493 micro_add(&d[chan_index], &r[0], &r[2]);
2494 }
2495 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2496 STORE(&d[chan_index], 0, chan_index);
2497 }
2498 break;
2499
2500 case TGSI_OPCODE_CND:
2501 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2502 FETCH(&r[0], 0, chan_index);
2503 FETCH(&r[1], 1, chan_index);
2504 FETCH(&r[2], 2, chan_index);
2505 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2506 }
2507 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2508 STORE(&d[chan_index], 0, chan_index);
2509 }
2510 break;
2511
2512 case TGSI_OPCODE_DP2A:
2513 FETCH( &r[0], 0, CHAN_X );
2514 FETCH( &r[1], 1, CHAN_X );
2515 micro_mul( &r[0], &r[0], &r[1] );
2516
2517 FETCH( &r[1], 0, CHAN_Y );
2518 FETCH( &r[2], 1, CHAN_Y );
2519 micro_mul( &r[1], &r[1], &r[2] );
2520 micro_add( &r[0], &r[0], &r[1] );
2521
2522 FETCH( &r[2], 2, CHAN_X );
2523 micro_add( &r[0], &r[0], &r[2] );
2524
2525 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2526 STORE( &r[0], 0, chan_index );
2527 }
2528 break;
2529
2530 case TGSI_OPCODE_FRC:
2531 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2532 FETCH( &r[0], 0, chan_index );
2533 micro_frc(&d[chan_index], &r[0]);
2534 }
2535 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2536 STORE(&d[chan_index], 0, chan_index);
2537 }
2538 break;
2539
2540 case TGSI_OPCODE_CLAMP:
2541 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2542 FETCH(&r[0], 0, chan_index);
2543 FETCH(&r[1], 1, chan_index);
2544 micro_max(&r[0], &r[0], &r[1]);
2545 FETCH(&r[1], 2, chan_index);
2546 micro_min(&d[chan_index], &r[0], &r[1]);
2547 }
2548 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2549 STORE(&d[chan_index], 0, chan_index);
2550 }
2551 break;
2552
2553 case TGSI_OPCODE_FLR:
2554 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2555 FETCH( &r[0], 0, chan_index );
2556 micro_flr(&d[chan_index], &r[0]);
2557 }
2558 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2559 STORE(&d[chan_index], 0, chan_index);
2560 }
2561 break;
2562
2563 case TGSI_OPCODE_ROUND:
2564 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2565 FETCH( &r[0], 0, chan_index );
2566 micro_rnd(&d[chan_index], &r[0]);
2567 }
2568 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2569 STORE(&d[chan_index], 0, chan_index);
2570 }
2571 break;
2572
2573 case TGSI_OPCODE_EX2:
2574 FETCH(&r[0], 0, CHAN_X);
2575
2576 micro_exp2( &r[0], &r[0] );
2577
2578 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2579 STORE( &r[0], 0, chan_index );
2580 }
2581 break;
2582
2583 case TGSI_OPCODE_LG2:
2584 FETCH( &r[0], 0, CHAN_X );
2585 micro_lg2( &r[0], &r[0] );
2586 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2587 STORE( &r[0], 0, chan_index );
2588 }
2589 break;
2590
2591 case TGSI_OPCODE_POW:
2592 FETCH(&r[0], 0, CHAN_X);
2593 FETCH(&r[1], 1, CHAN_X);
2594
2595 micro_pow( &r[0], &r[0], &r[1] );
2596
2597 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2598 STORE( &r[0], 0, chan_index );
2599 }
2600 break;
2601
2602 case TGSI_OPCODE_XPD:
2603 FETCH(&r[0], 0, CHAN_Y);
2604 FETCH(&r[1], 1, CHAN_Z);
2605
2606 micro_mul( &r[2], &r[0], &r[1] );
2607
2608 FETCH(&r[3], 0, CHAN_Z);
2609 FETCH(&r[4], 1, CHAN_Y);
2610
2611 micro_mul( &r[5], &r[3], &r[4] );
2612 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2613
2614 FETCH(&r[2], 1, CHAN_X);
2615
2616 micro_mul( &r[3], &r[3], &r[2] );
2617
2618 FETCH(&r[5], 0, CHAN_X);
2619
2620 micro_mul( &r[1], &r[1], &r[5] );
2621 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2622
2623 micro_mul( &r[5], &r[5], &r[4] );
2624 micro_mul( &r[0], &r[0], &r[2] );
2625 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2626
2627 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2628 STORE(&d[CHAN_X], 0, CHAN_X);
2629 }
2630 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2631 STORE(&d[CHAN_Y], 0, CHAN_Y);
2632 }
2633 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2634 STORE(&d[CHAN_Z], 0, CHAN_Z);
2635 }
2636 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2637 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2638 }
2639 break;
2640
2641 case TGSI_OPCODE_ABS:
2642 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2643 FETCH(&r[0], 0, chan_index);
2644 micro_abs(&d[chan_index], &r[0]);
2645 }
2646 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2647 STORE(&d[chan_index], 0, chan_index);
2648 }
2649 break;
2650
2651 case TGSI_OPCODE_RCC:
2652 FETCH(&r[0], 0, CHAN_X);
2653 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2654 micro_float_clamp(&r[0], &r[0]);
2655 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2656 STORE(&r[0], 0, chan_index);
2657 }
2658 break;
2659
2660 case TGSI_OPCODE_DPH:
2661 FETCH(&r[0], 0, CHAN_X);
2662 FETCH(&r[1], 1, CHAN_X);
2663
2664 micro_mul( &r[0], &r[0], &r[1] );
2665
2666 FETCH(&r[1], 0, CHAN_Y);
2667 FETCH(&r[2], 1, CHAN_Y);
2668
2669 micro_mul( &r[1], &r[1], &r[2] );
2670 micro_add( &r[0], &r[0], &r[1] );
2671
2672 FETCH(&r[1], 0, CHAN_Z);
2673 FETCH(&r[2], 1, CHAN_Z);
2674
2675 micro_mul( &r[1], &r[1], &r[2] );
2676 micro_add( &r[0], &r[0], &r[1] );
2677
2678 FETCH(&r[1], 1, CHAN_W);
2679
2680 micro_add( &r[0], &r[0], &r[1] );
2681
2682 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2683 STORE( &r[0], 0, chan_index );
2684 }
2685 break;
2686
2687 case TGSI_OPCODE_COS:
2688 FETCH(&r[0], 0, CHAN_X);
2689
2690 micro_cos( &r[0], &r[0] );
2691
2692 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2693 STORE( &r[0], 0, chan_index );
2694 }
2695 break;
2696
2697 case TGSI_OPCODE_DDX:
2698 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2699 FETCH( &r[0], 0, chan_index );
2700 micro_ddx(&d[chan_index], &r[0]);
2701 }
2702 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2703 STORE(&d[chan_index], 0, chan_index);
2704 }
2705 break;
2706
2707 case TGSI_OPCODE_DDY:
2708 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2709 FETCH( &r[0], 0, chan_index );
2710 micro_ddy(&d[chan_index], &r[0]);
2711 }
2712 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2713 STORE(&d[chan_index], 0, chan_index);
2714 }
2715 break;
2716
2717 case TGSI_OPCODE_KILP:
2718 exec_kilp (mach, inst);
2719 break;
2720
2721 case TGSI_OPCODE_KIL:
2722 exec_kil (mach, inst);
2723 break;
2724
2725 case TGSI_OPCODE_PK2H:
2726 assert (0);
2727 break;
2728
2729 case TGSI_OPCODE_PK2US:
2730 assert (0);
2731 break;
2732
2733 case TGSI_OPCODE_PK4B:
2734 assert (0);
2735 break;
2736
2737 case TGSI_OPCODE_PK4UB:
2738 assert (0);
2739 break;
2740
2741 case TGSI_OPCODE_RFL:
2742 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2743 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2744 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2745 /* r0 = dp3(src0, src0) */
2746 FETCH(&r[2], 0, CHAN_X);
2747 micro_mul(&r[0], &r[2], &r[2]);
2748 FETCH(&r[4], 0, CHAN_Y);
2749 micro_mul(&r[8], &r[4], &r[4]);
2750 micro_add(&r[0], &r[0], &r[8]);
2751 FETCH(&r[6], 0, CHAN_Z);
2752 micro_mul(&r[8], &r[6], &r[6]);
2753 micro_add(&r[0], &r[0], &r[8]);
2754
2755 /* r1 = dp3(src0, src1) */
2756 FETCH(&r[3], 1, CHAN_X);
2757 micro_mul(&r[1], &r[2], &r[3]);
2758 FETCH(&r[5], 1, CHAN_Y);
2759 micro_mul(&r[8], &r[4], &r[5]);
2760 micro_add(&r[1], &r[1], &r[8]);
2761 FETCH(&r[7], 1, CHAN_Z);
2762 micro_mul(&r[8], &r[6], &r[7]);
2763 micro_add(&r[1], &r[1], &r[8]);
2764
2765 /* r1 = 2 * r1 / r0 */
2766 micro_add(&r[1], &r[1], &r[1]);
2767 micro_div(&r[1], &r[1], &r[0]);
2768
2769 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2770 micro_mul(&r[2], &r[2], &r[1]);
2771 micro_sub(&r[2], &r[2], &r[3]);
2772 STORE(&r[2], 0, CHAN_X);
2773 }
2774 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2775 micro_mul(&r[4], &r[4], &r[1]);
2776 micro_sub(&r[4], &r[4], &r[5]);
2777 STORE(&r[4], 0, CHAN_Y);
2778 }
2779 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2780 micro_mul(&r[6], &r[6], &r[1]);
2781 micro_sub(&r[6], &r[6], &r[7]);
2782 STORE(&r[6], 0, CHAN_Z);
2783 }
2784 }
2785 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2786 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2787 }
2788 break;
2789
2790 case TGSI_OPCODE_SEQ:
2791 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2792 FETCH( &r[0], 0, chan_index );
2793 FETCH( &r[1], 1, chan_index );
2794 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2795 }
2796 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2797 STORE(&d[chan_index], 0, chan_index);
2798 }
2799 break;
2800
2801 case TGSI_OPCODE_SFL:
2802 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2803 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2804 }
2805 break;
2806
2807 case TGSI_OPCODE_SGT:
2808 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2809 FETCH( &r[0], 0, chan_index );
2810 FETCH( &r[1], 1, chan_index );
2811 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2812 }
2813 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2814 STORE(&d[chan_index], 0, chan_index);
2815 }
2816 break;
2817
2818 case TGSI_OPCODE_SIN:
2819 FETCH( &r[0], 0, CHAN_X );
2820 micro_sin( &r[0], &r[0] );
2821 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2822 STORE( &r[0], 0, chan_index );
2823 }
2824 break;
2825
2826 case TGSI_OPCODE_SLE:
2827 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2828 FETCH( &r[0], 0, chan_index );
2829 FETCH( &r[1], 1, chan_index );
2830 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2831 }
2832 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2833 STORE(&d[chan_index], 0, chan_index);
2834 }
2835 break;
2836
2837 case TGSI_OPCODE_SNE:
2838 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2839 FETCH( &r[0], 0, chan_index );
2840 FETCH( &r[1], 1, chan_index );
2841 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
2842 }
2843 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2844 STORE(&d[chan_index], 0, chan_index);
2845 }
2846 break;
2847
2848 case TGSI_OPCODE_STR:
2849 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2850 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2851 }
2852 break;
2853
2854 case TGSI_OPCODE_TEX:
2855 /* simple texture lookup */
2856 /* src[0] = texcoord */
2857 /* src[1] = sampler unit */
2858 exec_tex(mach, inst, FALSE, FALSE);
2859 break;
2860
2861 case TGSI_OPCODE_TXB:
2862 /* Texture lookup with lod bias */
2863 /* src[0] = texcoord (src[0].w = LOD bias) */
2864 /* src[1] = sampler unit */
2865 exec_tex(mach, inst, TRUE, FALSE);
2866 break;
2867
2868 case TGSI_OPCODE_TXD:
2869 /* Texture lookup with explict partial derivatives */
2870 /* src[0] = texcoord */
2871 /* src[1] = d[strq]/dx */
2872 /* src[2] = d[strq]/dy */
2873 /* src[3] = sampler unit */
2874 exec_txd(mach, inst);
2875 break;
2876
2877 case TGSI_OPCODE_TXL:
2878 /* Texture lookup with explit LOD */
2879 /* src[0] = texcoord (src[0].w = LOD) */
2880 /* src[1] = sampler unit */
2881 exec_tex(mach, inst, TRUE, FALSE);
2882 break;
2883
2884 case TGSI_OPCODE_TXP:
2885 /* Texture lookup with projection */
2886 /* src[0] = texcoord (src[0].w = projection) */
2887 /* src[1] = sampler unit */
2888 exec_tex(mach, inst, FALSE, TRUE);
2889 break;
2890
2891 case TGSI_OPCODE_UP2H:
2892 assert (0);
2893 break;
2894
2895 case TGSI_OPCODE_UP2US:
2896 assert (0);
2897 break;
2898
2899 case TGSI_OPCODE_UP4B:
2900 assert (0);
2901 break;
2902
2903 case TGSI_OPCODE_UP4UB:
2904 assert (0);
2905 break;
2906
2907 case TGSI_OPCODE_X2D:
2908 FETCH(&r[0], 1, CHAN_X);
2909 FETCH(&r[1], 1, CHAN_Y);
2910 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2911 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2912 FETCH(&r[2], 2, CHAN_X);
2913 micro_mul(&r[2], &r[2], &r[0]);
2914 FETCH(&r[3], 2, CHAN_Y);
2915 micro_mul(&r[3], &r[3], &r[1]);
2916 micro_add(&r[2], &r[2], &r[3]);
2917 FETCH(&r[3], 0, CHAN_X);
2918 micro_add(&d[CHAN_X], &r[2], &r[3]);
2919
2920 }
2921 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2922 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2923 FETCH(&r[2], 2, CHAN_Z);
2924 micro_mul(&r[2], &r[2], &r[0]);
2925 FETCH(&r[3], 2, CHAN_W);
2926 micro_mul(&r[3], &r[3], &r[1]);
2927 micro_add(&r[2], &r[2], &r[3]);
2928 FETCH(&r[3], 0, CHAN_Y);
2929 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2930
2931 }
2932 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2933 STORE(&d[CHAN_X], 0, CHAN_X);
2934 }
2935 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2936 STORE(&d[CHAN_Y], 0, CHAN_Y);
2937 }
2938 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2939 STORE(&d[CHAN_X], 0, CHAN_Z);
2940 }
2941 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2942 STORE(&d[CHAN_Y], 0, CHAN_W);
2943 }
2944 break;
2945
2946 case TGSI_OPCODE_ARA:
2947 assert (0);
2948 break;
2949
2950 case TGSI_OPCODE_ARR:
2951 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2952 break;
2953
2954 case TGSI_OPCODE_BRA:
2955 assert (0);
2956 break;
2957
2958 case TGSI_OPCODE_CAL:
2959 /* skip the call if no execution channels are enabled */
2960 if (mach->ExecMask) {
2961 /* do the call */
2962
2963 /* First, record the depths of the execution stacks.
2964 * This is important for deeply nested/looped return statements.
2965 * We have to unwind the stacks by the correct amount. For a
2966 * real code generator, we could determine the number of entries
2967 * to pop off each stack with simple static analysis and avoid
2968 * implementing this data structure at run time.
2969 */
2970 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2971 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2972 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2973 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
2974 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
2975 /* note that PC was already incremented above */
2976 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2977
2978 mach->CallStackTop++;
2979
2980 /* Second, push the Cond, Loop, Cont, Func stacks */
2981 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2982 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2983 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2984 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2985 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2986 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2987
2988 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2989 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2990 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2991 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2992 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2993 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2994
2995 /* Finally, jump to the subroutine */
2996 *pc = inst->Label.Label;
2997 }
2998 break;
2999
3000 case TGSI_OPCODE_RET:
3001 mach->FuncMask &= ~mach->ExecMask;
3002 UPDATE_EXEC_MASK(mach);
3003
3004 if (mach->FuncMask == 0x0) {
3005 /* really return now (otherwise, keep executing */
3006
3007 if (mach->CallStackTop == 0) {
3008 /* returning from main() */
3009 *pc = -1;
3010 return;
3011 }
3012
3013 assert(mach->CallStackTop > 0);
3014 mach->CallStackTop--;
3015
3016 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3017 mach->CondMask = mach->CondStack[mach->CondStackTop];
3018
3019 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3020 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3021
3022 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3023 mach->ContMask = mach->ContStack[mach->ContStackTop];
3024
3025 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3026 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3027
3028 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3029 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3030
3031 assert(mach->FuncStackTop > 0);
3032 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3033
3034 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3035
3036 UPDATE_EXEC_MASK(mach);
3037 }
3038 break;
3039
3040 case TGSI_OPCODE_SSG:
3041 /* TGSI_OPCODE_SGN */
3042 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3043 FETCH( &r[0], 0, chan_index );
3044 micro_sgn(&d[chan_index], &r[0]);
3045 }
3046 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3047 STORE(&d[chan_index], 0, chan_index);
3048 }
3049 break;
3050
3051 case TGSI_OPCODE_CMP:
3052 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3053 FETCH(&r[0], 0, chan_index);
3054 FETCH(&r[1], 1, chan_index);
3055 FETCH(&r[2], 2, chan_index);
3056 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
3057 }
3058 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3059 STORE(&d[chan_index], 0, chan_index);
3060 }
3061 break;
3062
3063 case TGSI_OPCODE_SCS:
3064 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3065 FETCH( &r[0], 0, CHAN_X );
3066 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3067 micro_cos(&r[1], &r[0]);
3068 STORE(&r[1], 0, CHAN_X);
3069 }
3070 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3071 micro_sin(&r[1], &r[0]);
3072 STORE(&r[1], 0, CHAN_Y);
3073 }
3074 }
3075 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3076 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3077 }
3078 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3079 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3080 }
3081 break;
3082
3083 case TGSI_OPCODE_NRM:
3084 /* 3-component vector normalize */
3085 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
3086 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
3087 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3088 /* r3 = sqrt(dp3(src0, src0)) */
3089 FETCH(&r[0], 0, CHAN_X);
3090 micro_mul(&r[3], &r[0], &r[0]);
3091 FETCH(&r[1], 0, CHAN_Y);
3092 micro_mul(&r[4], &r[1], &r[1]);
3093 micro_add(&r[3], &r[3], &r[4]);
3094 FETCH(&r[2], 0, CHAN_Z);
3095 micro_mul(&r[4], &r[2], &r[2]);
3096 micro_add(&r[3], &r[3], &r[4]);
3097 micro_sqrt(&r[3], &r[3]);
3098
3099 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3100 micro_div(&r[0], &r[0], &r[3]);
3101 STORE(&r[0], 0, CHAN_X);
3102 }
3103 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3104 micro_div(&r[1], &r[1], &r[3]);
3105 STORE(&r[1], 0, CHAN_Y);
3106 }
3107 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3108 micro_div(&r[2], &r[2], &r[3]);
3109 STORE(&r[2], 0, CHAN_Z);
3110 }
3111 }
3112 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
3113 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
3114 }
3115 break;
3116
3117 case TGSI_OPCODE_NRM4:
3118 /* 4-component vector normalize */
3119 {
3120 union tgsi_exec_channel tmp, dot;
3121
3122 /* tmp = dp4(src0, src0): */
3123 FETCH( &r[0], 0, CHAN_X );
3124 micro_mul( &tmp, &r[0], &r[0] );
3125
3126 FETCH( &r[1], 0, CHAN_Y );
3127 micro_mul( &dot, &r[1], &r[1] );
3128 micro_add( &tmp, &tmp, &dot );
3129
3130 FETCH( &r[2], 0, CHAN_Z );
3131 micro_mul( &dot, &r[2], &r[2] );
3132 micro_add( &tmp, &tmp, &dot );
3133
3134 FETCH( &r[3], 0, CHAN_W );
3135 micro_mul( &dot, &r[3], &r[3] );
3136 micro_add( &tmp, &tmp, &dot );
3137
3138 /* tmp = 1 / sqrt(tmp) */
3139 micro_sqrt( &tmp, &tmp );
3140 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
3141
3142 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3143 /* chan = chan * tmp */
3144 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
3145 STORE( &r[chan_index], 0, chan_index );
3146 }
3147 }
3148 break;
3149
3150 case TGSI_OPCODE_DIV:
3151 assert( 0 );
3152 break;
3153
3154 case TGSI_OPCODE_DP2:
3155 FETCH( &r[0], 0, CHAN_X );
3156 FETCH( &r[1], 1, CHAN_X );
3157 micro_mul( &r[0], &r[0], &r[1] );
3158
3159 FETCH( &r[1], 0, CHAN_Y );
3160 FETCH( &r[2], 1, CHAN_Y );
3161 micro_mul( &r[1], &r[1], &r[2] );
3162 micro_add( &r[0], &r[0], &r[1] );
3163
3164 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3165 STORE( &r[0], 0, chan_index );
3166 }
3167 break;
3168
3169 case TGSI_OPCODE_IF:
3170 /* push CondMask */
3171 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3172 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3173 FETCH( &r[0], 0, CHAN_X );
3174 /* update CondMask */
3175 if( ! r[0].u[0] ) {
3176 mach->CondMask &= ~0x1;
3177 }
3178 if( ! r[0].u[1] ) {
3179 mach->CondMask &= ~0x2;
3180 }
3181 if( ! r[0].u[2] ) {
3182 mach->CondMask &= ~0x4;
3183 }
3184 if( ! r[0].u[3] ) {
3185 mach->CondMask &= ~0x8;
3186 }
3187 UPDATE_EXEC_MASK(mach);
3188 /* Todo: If CondMask==0, jump to ELSE */
3189 break;
3190
3191 case TGSI_OPCODE_ELSE:
3192 /* invert CondMask wrt previous mask */
3193 {
3194 uint prevMask;
3195 assert(mach->CondStackTop > 0);
3196 prevMask = mach->CondStack[mach->CondStackTop - 1];
3197 mach->CondMask = ~mach->CondMask & prevMask;
3198 UPDATE_EXEC_MASK(mach);
3199 /* Todo: If CondMask==0, jump to ENDIF */
3200 }
3201 break;
3202
3203 case TGSI_OPCODE_ENDIF:
3204 /* pop CondMask */
3205 assert(mach->CondStackTop > 0);
3206 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3207 UPDATE_EXEC_MASK(mach);
3208 break;
3209
3210 case TGSI_OPCODE_END:
3211 /* halt execution */
3212 *pc = -1;
3213 break;
3214
3215 case TGSI_OPCODE_REP:
3216 assert (0);
3217 break;
3218
3219 case TGSI_OPCODE_ENDREP:
3220 assert (0);
3221 break;
3222
3223 case TGSI_OPCODE_PUSHA:
3224 assert (0);
3225 break;
3226
3227 case TGSI_OPCODE_POPA:
3228 assert (0);
3229 break;
3230
3231 case TGSI_OPCODE_CEIL:
3232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3233 FETCH( &r[0], 0, chan_index );
3234 micro_ceil(&d[chan_index], &r[0]);
3235 }
3236 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3237 STORE(&d[chan_index], 0, chan_index);
3238 }
3239 break;
3240
3241 case TGSI_OPCODE_I2F:
3242 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3243 break;
3244
3245 case TGSI_OPCODE_NOT:
3246 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3247 break;
3248
3249 case TGSI_OPCODE_TRUNC:
3250 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3251 FETCH( &r[0], 0, chan_index );
3252 micro_trunc(&d[chan_index], &r[0]);
3253 }
3254 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3255 STORE(&d[chan_index], 0, chan_index);
3256 }
3257 break;
3258
3259 case TGSI_OPCODE_SHL:
3260 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3261 break;
3262
3263 case TGSI_OPCODE_AND:
3264 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3265 break;
3266
3267 case TGSI_OPCODE_OR:
3268 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3269 break;
3270
3271 case TGSI_OPCODE_MOD:
3272 assert (0);
3273 break;
3274
3275 case TGSI_OPCODE_XOR:
3276 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3277 break;
3278
3279 case TGSI_OPCODE_SAD:
3280 assert (0);
3281 break;
3282
3283 case TGSI_OPCODE_TXF:
3284 assert (0);
3285 break;
3286
3287 case TGSI_OPCODE_TXQ:
3288 assert (0);
3289 break;
3290
3291 case TGSI_OPCODE_EMIT:
3292 emit_vertex(mach);
3293 break;
3294
3295 case TGSI_OPCODE_ENDPRIM:
3296 emit_primitive(mach);
3297 break;
3298
3299 case TGSI_OPCODE_BGNFOR:
3300 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3301 for (chan_index = 0; chan_index < 3; chan_index++) {
3302 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3303 }
3304 ++mach->LoopCounterStackTop;
3305 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3306 /* update LoopMask */
3307 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3308 mach->LoopMask &= ~0x1;
3309 }
3310 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3311 mach->LoopMask &= ~0x2;
3312 }
3313 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3314 mach->LoopMask &= ~0x4;
3315 }
3316 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3317 mach->LoopMask &= ~0x8;
3318 }
3319 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3320 UPDATE_EXEC_MASK(mach);
3321 /* fall-through (for now) */
3322 case TGSI_OPCODE_BGNLOOP:
3323 /* push LoopMask and ContMasks */
3324 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3325 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3326 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3327 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3328
3329 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3330 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3331 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3332 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3333 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3334 break;
3335
3336 case TGSI_OPCODE_ENDFOR:
3337 assert(mach->LoopCounterStackTop > 0);
3338 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3339 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3340 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3341 /* update LoopMask */
3342 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3343 mach->LoopMask &= ~0x1;
3344 }
3345 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3346 mach->LoopMask &= ~0x2;
3347 }
3348 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3349 mach->LoopMask &= ~0x4;
3350 }
3351 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3352 mach->LoopMask &= ~0x8;
3353 }
3354 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3355 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3356 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3357 assert(mach->LoopLabelStackTop > 0);
3358 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3359 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3360 /* Restore ContMask, but don't pop */
3361 assert(mach->ContStackTop > 0);
3362 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3363 UPDATE_EXEC_MASK(mach);
3364 if (mach->ExecMask) {
3365 /* repeat loop: jump to instruction just past BGNLOOP */
3366 assert(mach->LoopLabelStackTop > 0);
3367 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3368 }
3369 else {
3370 /* exit loop: pop LoopMask */
3371 assert(mach->LoopStackTop > 0);
3372 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3373 /* pop ContMask */
3374 assert(mach->ContStackTop > 0);
3375 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3376 assert(mach->LoopLabelStackTop > 0);
3377 --mach->LoopLabelStackTop;
3378 assert(mach->LoopCounterStackTop > 0);
3379 --mach->LoopCounterStackTop;
3380
3381 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3382 }
3383 UPDATE_EXEC_MASK(mach);
3384 break;
3385
3386 case TGSI_OPCODE_ENDLOOP:
3387 /* Restore ContMask, but don't pop */
3388 assert(mach->ContStackTop > 0);
3389 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3390 UPDATE_EXEC_MASK(mach);
3391 if (mach->ExecMask) {
3392 /* repeat loop: jump to instruction just past BGNLOOP */
3393 assert(mach->LoopLabelStackTop > 0);
3394 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3395 }
3396 else {
3397 /* exit loop: pop LoopMask */
3398 assert(mach->LoopStackTop > 0);
3399 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3400 /* pop ContMask */
3401 assert(mach->ContStackTop > 0);
3402 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3403 assert(mach->LoopLabelStackTop > 0);
3404 --mach->LoopLabelStackTop;
3405
3406 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3407 }
3408 UPDATE_EXEC_MASK(mach);
3409 break;
3410
3411 case TGSI_OPCODE_BRK:
3412 exec_break(mach);
3413 break;
3414
3415 case TGSI_OPCODE_CONT:
3416 /* turn off cont channels for each enabled exec channel */
3417 mach->ContMask &= ~mach->ExecMask;
3418 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3419 UPDATE_EXEC_MASK(mach);
3420 break;
3421
3422 case TGSI_OPCODE_BGNSUB:
3423 /* no-op */
3424 break;
3425
3426 case TGSI_OPCODE_ENDSUB:
3427 /*
3428 * XXX: This really should be a no-op. We should never reach this opcode.
3429 */
3430
3431 assert(mach->CallStackTop > 0);
3432 mach->CallStackTop--;
3433
3434 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3435 mach->CondMask = mach->CondStack[mach->CondStackTop];
3436
3437 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3438 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3439
3440 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3441 mach->ContMask = mach->ContStack[mach->ContStackTop];
3442
3443 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3444 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3445
3446 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3447 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3448
3449 assert(mach->FuncStackTop > 0);
3450 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3451
3452 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3453
3454 UPDATE_EXEC_MASK(mach);
3455 break;
3456
3457 case TGSI_OPCODE_NOP:
3458 break;
3459
3460 case TGSI_OPCODE_BREAKC:
3461 FETCH(&r[0], 0, CHAN_X);
3462 /* update CondMask */
3463 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3464 mach->LoopMask &= ~0x1;
3465 }
3466 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3467 mach->LoopMask &= ~0x2;
3468 }
3469 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3470 mach->LoopMask &= ~0x4;
3471 }
3472 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3473 mach->LoopMask &= ~0x8;
3474 }
3475 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3476 UPDATE_EXEC_MASK(mach);
3477 break;
3478
3479 case TGSI_OPCODE_F2I:
3480 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3481 break;
3482
3483 case TGSI_OPCODE_IDIV:
3484 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3485 break;
3486
3487 case TGSI_OPCODE_IMAX:
3488 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3489 break;
3490
3491 case TGSI_OPCODE_IMIN:
3492 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3493 break;
3494
3495 case TGSI_OPCODE_INEG:
3496 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3497 break;
3498
3499 case TGSI_OPCODE_ISGE:
3500 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3501 break;
3502
3503 case TGSI_OPCODE_ISHR:
3504 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3505 break;
3506
3507 case TGSI_OPCODE_ISLT:
3508 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3509 break;
3510
3511 case TGSI_OPCODE_F2U:
3512 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3513 break;
3514
3515 case TGSI_OPCODE_U2F:
3516 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3517 break;
3518
3519 case TGSI_OPCODE_UADD:
3520 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3521 break;
3522
3523 case TGSI_OPCODE_UDIV:
3524 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3525 break;
3526
3527 case TGSI_OPCODE_UMAD:
3528 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3529 break;
3530
3531 case TGSI_OPCODE_UMAX:
3532 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3533 break;
3534
3535 case TGSI_OPCODE_UMIN:
3536 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3537 break;
3538
3539 case TGSI_OPCODE_UMOD:
3540 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3541 break;
3542
3543 case TGSI_OPCODE_UMUL:
3544 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3545 break;
3546
3547 case TGSI_OPCODE_USEQ:
3548 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3549 break;
3550
3551 case TGSI_OPCODE_USGE:
3552 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3553 break;
3554
3555 case TGSI_OPCODE_USHR:
3556 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3557 break;
3558
3559 case TGSI_OPCODE_USLT:
3560 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3561 break;
3562
3563 case TGSI_OPCODE_USNE:
3564 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3565 break;
3566
3567 case TGSI_OPCODE_SWITCH:
3568 exec_switch(mach, inst);
3569 break;
3570
3571 case TGSI_OPCODE_CASE:
3572 exec_case(mach, inst);
3573 break;
3574
3575 case TGSI_OPCODE_DEFAULT:
3576 exec_default(mach);
3577 break;
3578
3579 case TGSI_OPCODE_ENDSWITCH:
3580 exec_endswitch(mach);
3581 break;
3582
3583 default:
3584 assert( 0 );
3585 }
3586 }
3587
3588
3589 #define DEBUG_EXECUTION 0
3590
3591
3592 /**
3593 * Run TGSI interpreter.
3594 * \return bitmask of "alive" quad components
3595 */
3596 uint
3597 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3598 {
3599 uint i;
3600 int pc = 0;
3601
3602 mach->CondMask = 0xf;
3603 mach->LoopMask = 0xf;
3604 mach->ContMask = 0xf;
3605 mach->FuncMask = 0xf;
3606 mach->ExecMask = 0xf;
3607
3608 mach->Switch.mask = 0xf;
3609
3610 assert(mach->CondStackTop == 0);
3611 assert(mach->LoopStackTop == 0);
3612 assert(mach->ContStackTop == 0);
3613 assert(mach->SwitchStackTop == 0);
3614 assert(mach->BreakStackTop == 0);
3615 assert(mach->CallStackTop == 0);
3616
3617 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3618 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3619
3620 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3621 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3622 mach->Primitives[0] = 0;
3623 }
3624
3625 for (i = 0; i < QUAD_SIZE; i++) {
3626 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3627 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3628 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3629 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3630 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3631 }
3632
3633 /* execute declarations (interpolants) */
3634 for (i = 0; i < mach->NumDeclarations; i++) {
3635 exec_declaration( mach, mach->Declarations+i );
3636 }
3637
3638 {
3639 #if DEBUG_EXECUTION
3640 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3641 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3642 uint inst = 1;
3643
3644 memcpy(temps, mach->Temps, sizeof(temps));
3645 memcpy(outputs, mach->Outputs, sizeof(outputs));
3646 #endif
3647
3648 /* execute instructions, until pc is set to -1 */
3649 while (pc != -1) {
3650
3651 #if DEBUG_EXECUTION
3652 uint i;
3653
3654 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3655 #endif
3656
3657 assert(pc < (int) mach->NumInstructions);
3658 exec_instruction(mach, mach->Instructions + pc, &pc);
3659
3660 #if DEBUG_EXECUTION
3661 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3662 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3663 uint j;
3664
3665 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3666 debug_printf("TEMP[%2u] = ", i);
3667 for (j = 0; j < 4; j++) {
3668 if (j > 0) {
3669 debug_printf(" ");
3670 }
3671 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3672 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3673 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3674 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3675 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3676 }
3677 }
3678 }
3679 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3680 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3681 uint j;
3682
3683 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3684 debug_printf("OUT[%2u] = ", i);
3685 for (j = 0; j < 4; j++) {
3686 if (j > 0) {
3687 debug_printf(" ");
3688 }
3689 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3690 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3691 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3692 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3693 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3694 }
3695 }
3696 }
3697 #endif
3698 }
3699 }
3700
3701 #if 0
3702 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3703 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3704 /*
3705 * Scale back depth component.
3706 */
3707 for (i = 0; i < 4; i++)
3708 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3709 }
3710 #endif
3711
3712 assert(mach->CondStackTop == 0);
3713 assert(mach->LoopStackTop == 0);
3714 assert(mach->ContStackTop == 0);
3715 assert(mach->SwitchStackTop == 0);
3716 assert(mach->BreakStackTop == 0);
3717 assert(mach->CallStackTop == 0);
3718
3719 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3720 }