python/retrace: Use colors on windows console.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126
127 static const union tgsi_exec_channel ZeroVec =
128 { { 0.0, 0.0, 0.0, 0.0 } };
129
130
131 #ifdef DEBUG
132 static void
133 check_inf_or_nan(const union tgsi_exec_channel *chan)
134 {
135 assert(!util_is_inf_or_nan(chan->f[0]));
136 assert(!util_is_inf_or_nan(chan->f[1]));
137 assert(!util_is_inf_or_nan(chan->f[2]));
138 assert(!util_is_inf_or_nan(chan->f[3]));
139 }
140 #endif
141
142
143 #ifdef DEBUG
144 static void
145 print_chan(const char *msg, const union tgsi_exec_channel *chan)
146 {
147 debug_printf("%s = {%f, %f, %f, %f}\n",
148 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
149 }
150 #endif
151
152
153 #ifdef DEBUG
154 static void
155 print_temp(const struct tgsi_exec_machine *mach, uint index)
156 {
157 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
158 int i;
159 debug_printf("Temp[%u] =\n", index);
160 for (i = 0; i < 4; i++) {
161 debug_printf(" %c: { %f, %f, %f, %f }\n",
162 "XYZW"[i],
163 tmp->xyzw[i].f[0],
164 tmp->xyzw[i].f[1],
165 tmp->xyzw[i].f[2],
166 tmp->xyzw[i].f[3]);
167 }
168 }
169 #endif
170
171
172
173 /**
174 * Initialize machine state by expanding tokens to full instructions,
175 * allocating temporary storage, setting up constants, etc.
176 * After this, we can call tgsi_exec_machine_run() many times.
177 */
178 void
179 tgsi_exec_machine_bind_shader(
180 struct tgsi_exec_machine *mach,
181 const struct tgsi_token *tokens,
182 uint numSamplers,
183 struct tgsi_sampler **samplers)
184 {
185 uint k;
186 struct tgsi_parse_context parse;
187 struct tgsi_exec_labels *labels = &mach->Labels;
188 struct tgsi_full_instruction *instructions;
189 struct tgsi_full_declaration *declarations;
190 uint maxInstructions = 10, numInstructions = 0;
191 uint maxDeclarations = 10, numDeclarations = 0;
192 uint instno = 0;
193
194 #if 0
195 tgsi_dump(tokens, 0);
196 #endif
197
198 util_init_math();
199
200 mach->Tokens = tokens;
201 mach->Samplers = samplers;
202
203 k = tgsi_parse_init (&parse, mach->Tokens);
204 if (k != TGSI_PARSE_OK) {
205 debug_printf( "Problem parsing!\n" );
206 return;
207 }
208
209 mach->Processor = parse.FullHeader.Processor.Processor;
210 mach->ImmLimit = 0;
211 labels->count = 0;
212
213 declarations = (struct tgsi_full_declaration *)
214 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
215
216 if (!declarations) {
217 return;
218 }
219
220 instructions = (struct tgsi_full_instruction *)
221 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
222
223 if (!instructions) {
224 FREE( declarations );
225 return;
226 }
227
228 while( !tgsi_parse_end_of_tokens( &parse ) ) {
229 uint pointer = parse.Position;
230 uint i;
231
232 tgsi_parse_token( &parse );
233 switch( parse.FullToken.Token.Type ) {
234 case TGSI_TOKEN_TYPE_DECLARATION:
235 /* save expanded declaration */
236 if (numDeclarations == maxDeclarations) {
237 declarations = REALLOC(declarations,
238 maxDeclarations
239 * sizeof(struct tgsi_full_declaration),
240 (maxDeclarations + 10)
241 * sizeof(struct tgsi_full_declaration));
242 maxDeclarations += 10;
243 }
244 memcpy(declarations + numDeclarations,
245 &parse.FullToken.FullDeclaration,
246 sizeof(declarations[0]));
247 numDeclarations++;
248 break;
249
250 case TGSI_TOKEN_TYPE_IMMEDIATE:
251 {
252 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
253 assert( size % 4 == 0 );
254 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
255
256 for( i = 0; i < size; i++ ) {
257 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
258 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
259 }
260 mach->ImmLimit += size / 4;
261 }
262 break;
263
264 case TGSI_TOKEN_TYPE_INSTRUCTION:
265 assert( labels->count < MAX_LABELS );
266
267 labels->labels[labels->count][0] = instno;
268 labels->labels[labels->count][1] = pointer;
269 labels->count++;
270
271 /* save expanded instruction */
272 if (numInstructions == maxInstructions) {
273 instructions = REALLOC(instructions,
274 maxInstructions
275 * sizeof(struct tgsi_full_instruction),
276 (maxInstructions + 10)
277 * sizeof(struct tgsi_full_instruction));
278 maxInstructions += 10;
279 }
280 memcpy(instructions + numInstructions,
281 &parse.FullToken.FullInstruction,
282 sizeof(instructions[0]));
283 numInstructions++;
284 break;
285
286 default:
287 assert( 0 );
288 }
289 }
290 tgsi_parse_free (&parse);
291
292 if (mach->Declarations) {
293 FREE( mach->Declarations );
294 }
295 mach->Declarations = declarations;
296 mach->NumDeclarations = numDeclarations;
297
298 if (mach->Instructions) {
299 FREE( mach->Instructions );
300 }
301 mach->Instructions = instructions;
302 mach->NumInstructions = numInstructions;
303 }
304
305
306 void
307 tgsi_exec_machine_init(
308 struct tgsi_exec_machine *mach )
309 {
310 uint i;
311
312 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
313 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
314
315 /* Setup constants. */
316 for( i = 0; i < 4; i++ ) {
317 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
318 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
319 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
320 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
321 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
322 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
323 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
324 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
325 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
326 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
327 }
328
329 #ifdef DEBUG
330 /* silence warnings */
331 (void) print_chan;
332 (void) print_temp;
333 #endif
334 }
335
336
337 void
338 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
339 {
340 if (mach->Instructions) {
341 FREE(mach->Instructions);
342 mach->Instructions = NULL;
343 mach->NumInstructions = 0;
344 }
345 if (mach->Declarations) {
346 FREE(mach->Declarations);
347 mach->Declarations = NULL;
348 mach->NumDeclarations = 0;
349 }
350 }
351
352
353 static void
354 micro_abs(
355 union tgsi_exec_channel *dst,
356 const union tgsi_exec_channel *src )
357 {
358 dst->f[0] = fabsf( src->f[0] );
359 dst->f[1] = fabsf( src->f[1] );
360 dst->f[2] = fabsf( src->f[2] );
361 dst->f[3] = fabsf( src->f[3] );
362 }
363
364 static void
365 micro_add(
366 union tgsi_exec_channel *dst,
367 const union tgsi_exec_channel *src0,
368 const union tgsi_exec_channel *src1 )
369 {
370 dst->f[0] = src0->f[0] + src1->f[0];
371 dst->f[1] = src0->f[1] + src1->f[1];
372 dst->f[2] = src0->f[2] + src1->f[2];
373 dst->f[3] = src0->f[3] + src1->f[3];
374 }
375
376 #if 0
377 static void
378 micro_iadd(
379 union tgsi_exec_channel *dst,
380 const union tgsi_exec_channel *src0,
381 const union tgsi_exec_channel *src1 )
382 {
383 dst->i[0] = src0->i[0] + src1->i[0];
384 dst->i[1] = src0->i[1] + src1->i[1];
385 dst->i[2] = src0->i[2] + src1->i[2];
386 dst->i[3] = src0->i[3] + src1->i[3];
387 }
388 #endif
389
390 static void
391 micro_and(
392 union tgsi_exec_channel *dst,
393 const union tgsi_exec_channel *src0,
394 const union tgsi_exec_channel *src1 )
395 {
396 dst->u[0] = src0->u[0] & src1->u[0];
397 dst->u[1] = src0->u[1] & src1->u[1];
398 dst->u[2] = src0->u[2] & src1->u[2];
399 dst->u[3] = src0->u[3] & src1->u[3];
400 }
401
402 static void
403 micro_ceil(
404 union tgsi_exec_channel *dst,
405 const union tgsi_exec_channel *src )
406 {
407 dst->f[0] = ceilf( src->f[0] );
408 dst->f[1] = ceilf( src->f[1] );
409 dst->f[2] = ceilf( src->f[2] );
410 dst->f[3] = ceilf( src->f[3] );
411 }
412
413 static void
414 micro_cos(
415 union tgsi_exec_channel *dst,
416 const union tgsi_exec_channel *src )
417 {
418 dst->f[0] = cosf( src->f[0] );
419 dst->f[1] = cosf( src->f[1] );
420 dst->f[2] = cosf( src->f[2] );
421 dst->f[3] = cosf( src->f[3] );
422 }
423
424 static void
425 micro_ddx(
426 union tgsi_exec_channel *dst,
427 const union tgsi_exec_channel *src )
428 {
429 dst->f[0] =
430 dst->f[1] =
431 dst->f[2] =
432 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
433 }
434
435 static void
436 micro_ddy(
437 union tgsi_exec_channel *dst,
438 const union tgsi_exec_channel *src )
439 {
440 dst->f[0] =
441 dst->f[1] =
442 dst->f[2] =
443 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
444 }
445
446 static void
447 micro_div(
448 union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src0,
450 const union tgsi_exec_channel *src1 )
451 {
452 if (src1->f[0] != 0) {
453 dst->f[0] = src0->f[0] / src1->f[0];
454 }
455 if (src1->f[1] != 0) {
456 dst->f[1] = src0->f[1] / src1->f[1];
457 }
458 if (src1->f[2] != 0) {
459 dst->f[2] = src0->f[2] / src1->f[2];
460 }
461 if (src1->f[3] != 0) {
462 dst->f[3] = src0->f[3] / src1->f[3];
463 }
464 }
465
466 #if 0
467 static void
468 micro_udiv(
469 union tgsi_exec_channel *dst,
470 const union tgsi_exec_channel *src0,
471 const union tgsi_exec_channel *src1 )
472 {
473 dst->u[0] = src0->u[0] / src1->u[0];
474 dst->u[1] = src0->u[1] / src1->u[1];
475 dst->u[2] = src0->u[2] / src1->u[2];
476 dst->u[3] = src0->u[3] / src1->u[3];
477 }
478 #endif
479
480 static void
481 micro_eq(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src0,
484 const union tgsi_exec_channel *src1,
485 const union tgsi_exec_channel *src2,
486 const union tgsi_exec_channel *src3 )
487 {
488 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
489 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
490 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
491 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
492 }
493
494 #if 0
495 static void
496 micro_ieq(
497 union tgsi_exec_channel *dst,
498 const union tgsi_exec_channel *src0,
499 const union tgsi_exec_channel *src1,
500 const union tgsi_exec_channel *src2,
501 const union tgsi_exec_channel *src3 )
502 {
503 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
504 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
505 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
506 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
507 }
508 #endif
509
510 static void
511 micro_exp2(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src)
514 {
515 #if FAST_MATH
516 dst->f[0] = util_fast_exp2( src->f[0] );
517 dst->f[1] = util_fast_exp2( src->f[1] );
518 dst->f[2] = util_fast_exp2( src->f[2] );
519 dst->f[3] = util_fast_exp2( src->f[3] );
520 #else
521 dst->f[0] = powf( 2.0f, src->f[0] );
522 dst->f[1] = powf( 2.0f, src->f[1] );
523 dst->f[2] = powf( 2.0f, src->f[2] );
524 dst->f[3] = powf( 2.0f, src->f[3] );
525 #endif
526 }
527
528 #if 0
529 static void
530 micro_f2ut(
531 union tgsi_exec_channel *dst,
532 const union tgsi_exec_channel *src )
533 {
534 dst->u[0] = (uint) src->f[0];
535 dst->u[1] = (uint) src->f[1];
536 dst->u[2] = (uint) src->f[2];
537 dst->u[3] = (uint) src->f[3];
538 }
539 #endif
540
541 static void
542 micro_float_clamp(union tgsi_exec_channel *dst,
543 const union tgsi_exec_channel *src)
544 {
545 uint i;
546
547 for (i = 0; i < 4; i++) {
548 if (src->f[i] > 0.0f) {
549 if (src->f[i] > 1.884467e+019f)
550 dst->f[i] = 1.884467e+019f;
551 else if (src->f[i] < 5.42101e-020f)
552 dst->f[i] = 5.42101e-020f;
553 else
554 dst->f[i] = src->f[i];
555 }
556 else {
557 if (src->f[i] < -1.884467e+019f)
558 dst->f[i] = -1.884467e+019f;
559 else if (src->f[i] > -5.42101e-020f)
560 dst->f[i] = -5.42101e-020f;
561 else
562 dst->f[i] = src->f[i];
563 }
564 }
565 }
566
567 static void
568 micro_flr(
569 union tgsi_exec_channel *dst,
570 const union tgsi_exec_channel *src )
571 {
572 dst->f[0] = floorf( src->f[0] );
573 dst->f[1] = floorf( src->f[1] );
574 dst->f[2] = floorf( src->f[2] );
575 dst->f[3] = floorf( src->f[3] );
576 }
577
578 static void
579 micro_frc(
580 union tgsi_exec_channel *dst,
581 const union tgsi_exec_channel *src )
582 {
583 dst->f[0] = src->f[0] - floorf( src->f[0] );
584 dst->f[1] = src->f[1] - floorf( src->f[1] );
585 dst->f[2] = src->f[2] - floorf( src->f[2] );
586 dst->f[3] = src->f[3] - floorf( src->f[3] );
587 }
588
589 static void
590 micro_i2f(
591 union tgsi_exec_channel *dst,
592 const union tgsi_exec_channel *src )
593 {
594 dst->f[0] = (float) src->i[0];
595 dst->f[1] = (float) src->i[1];
596 dst->f[2] = (float) src->i[2];
597 dst->f[3] = (float) src->i[3];
598 }
599
600 static void
601 micro_lg2(
602 union tgsi_exec_channel *dst,
603 const union tgsi_exec_channel *src )
604 {
605 #if FAST_MATH
606 dst->f[0] = util_fast_log2( src->f[0] );
607 dst->f[1] = util_fast_log2( src->f[1] );
608 dst->f[2] = util_fast_log2( src->f[2] );
609 dst->f[3] = util_fast_log2( src->f[3] );
610 #else
611 dst->f[0] = logf( src->f[0] ) * 1.442695f;
612 dst->f[1] = logf( src->f[1] ) * 1.442695f;
613 dst->f[2] = logf( src->f[2] ) * 1.442695f;
614 dst->f[3] = logf( src->f[3] ) * 1.442695f;
615 #endif
616 }
617
618 static void
619 micro_le(
620 union tgsi_exec_channel *dst,
621 const union tgsi_exec_channel *src0,
622 const union tgsi_exec_channel *src1,
623 const union tgsi_exec_channel *src2,
624 const union tgsi_exec_channel *src3 )
625 {
626 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
627 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
628 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
629 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
630 }
631
632 static void
633 micro_lt(
634 union tgsi_exec_channel *dst,
635 const union tgsi_exec_channel *src0,
636 const union tgsi_exec_channel *src1,
637 const union tgsi_exec_channel *src2,
638 const union tgsi_exec_channel *src3 )
639 {
640 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
641 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
642 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
643 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
644 }
645
646 #if 0
647 static void
648 micro_ilt(
649 union tgsi_exec_channel *dst,
650 const union tgsi_exec_channel *src0,
651 const union tgsi_exec_channel *src1,
652 const union tgsi_exec_channel *src2,
653 const union tgsi_exec_channel *src3 )
654 {
655 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
656 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
657 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
658 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
659 }
660 #endif
661
662 #if 0
663 static void
664 micro_ult(
665 union tgsi_exec_channel *dst,
666 const union tgsi_exec_channel *src0,
667 const union tgsi_exec_channel *src1,
668 const union tgsi_exec_channel *src2,
669 const union tgsi_exec_channel *src3 )
670 {
671 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
672 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
673 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
674 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
675 }
676 #endif
677
678 static void
679 micro_max(
680 union tgsi_exec_channel *dst,
681 const union tgsi_exec_channel *src0,
682 const union tgsi_exec_channel *src1 )
683 {
684 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
685 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
686 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
687 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
688 }
689
690 #if 0
691 static void
692 micro_imax(
693 union tgsi_exec_channel *dst,
694 const union tgsi_exec_channel *src0,
695 const union tgsi_exec_channel *src1 )
696 {
697 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
698 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
699 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
700 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
701 }
702 #endif
703
704 #if 0
705 static void
706 micro_umax(
707 union tgsi_exec_channel *dst,
708 const union tgsi_exec_channel *src0,
709 const union tgsi_exec_channel *src1 )
710 {
711 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
712 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
713 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
714 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
715 }
716 #endif
717
718 static void
719 micro_min(
720 union tgsi_exec_channel *dst,
721 const union tgsi_exec_channel *src0,
722 const union tgsi_exec_channel *src1 )
723 {
724 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
725 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
726 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
727 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
728 }
729
730 #if 0
731 static void
732 micro_imin(
733 union tgsi_exec_channel *dst,
734 const union tgsi_exec_channel *src0,
735 const union tgsi_exec_channel *src1 )
736 {
737 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
738 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
739 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
740 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
741 }
742 #endif
743
744 #if 0
745 static void
746 micro_umin(
747 union tgsi_exec_channel *dst,
748 const union tgsi_exec_channel *src0,
749 const union tgsi_exec_channel *src1 )
750 {
751 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
752 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
753 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
754 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
755 }
756 #endif
757
758 #if 0
759 static void
760 micro_umod(
761 union tgsi_exec_channel *dst,
762 const union tgsi_exec_channel *src0,
763 const union tgsi_exec_channel *src1 )
764 {
765 dst->u[0] = src0->u[0] % src1->u[0];
766 dst->u[1] = src0->u[1] % src1->u[1];
767 dst->u[2] = src0->u[2] % src1->u[2];
768 dst->u[3] = src0->u[3] % src1->u[3];
769 }
770 #endif
771
772 static void
773 micro_mul(
774 union tgsi_exec_channel *dst,
775 const union tgsi_exec_channel *src0,
776 const union tgsi_exec_channel *src1 )
777 {
778 dst->f[0] = src0->f[0] * src1->f[0];
779 dst->f[1] = src0->f[1] * src1->f[1];
780 dst->f[2] = src0->f[2] * src1->f[2];
781 dst->f[3] = src0->f[3] * src1->f[3];
782 }
783
784 #if 0
785 static void
786 micro_imul(
787 union tgsi_exec_channel *dst,
788 const union tgsi_exec_channel *src0,
789 const union tgsi_exec_channel *src1 )
790 {
791 dst->i[0] = src0->i[0] * src1->i[0];
792 dst->i[1] = src0->i[1] * src1->i[1];
793 dst->i[2] = src0->i[2] * src1->i[2];
794 dst->i[3] = src0->i[3] * src1->i[3];
795 }
796 #endif
797
798 #if 0
799 static void
800 micro_imul64(
801 union tgsi_exec_channel *dst0,
802 union tgsi_exec_channel *dst1,
803 const union tgsi_exec_channel *src0,
804 const union tgsi_exec_channel *src1 )
805 {
806 dst1->i[0] = src0->i[0] * src1->i[0];
807 dst1->i[1] = src0->i[1] * src1->i[1];
808 dst1->i[2] = src0->i[2] * src1->i[2];
809 dst1->i[3] = src0->i[3] * src1->i[3];
810 dst0->i[0] = 0;
811 dst0->i[1] = 0;
812 dst0->i[2] = 0;
813 dst0->i[3] = 0;
814 }
815 #endif
816
817 #if 0
818 static void
819 micro_umul64(
820 union tgsi_exec_channel *dst0,
821 union tgsi_exec_channel *dst1,
822 const union tgsi_exec_channel *src0,
823 const union tgsi_exec_channel *src1 )
824 {
825 dst1->u[0] = src0->u[0] * src1->u[0];
826 dst1->u[1] = src0->u[1] * src1->u[1];
827 dst1->u[2] = src0->u[2] * src1->u[2];
828 dst1->u[3] = src0->u[3] * src1->u[3];
829 dst0->u[0] = 0;
830 dst0->u[1] = 0;
831 dst0->u[2] = 0;
832 dst0->u[3] = 0;
833 }
834 #endif
835
836
837 #if 0
838 static void
839 micro_movc(
840 union tgsi_exec_channel *dst,
841 const union tgsi_exec_channel *src0,
842 const union tgsi_exec_channel *src1,
843 const union tgsi_exec_channel *src2 )
844 {
845 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
846 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
847 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
848 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
849 }
850 #endif
851
852 static void
853 micro_neg(
854 union tgsi_exec_channel *dst,
855 const union tgsi_exec_channel *src )
856 {
857 dst->f[0] = -src->f[0];
858 dst->f[1] = -src->f[1];
859 dst->f[2] = -src->f[2];
860 dst->f[3] = -src->f[3];
861 }
862
863 #if 0
864 static void
865 micro_ineg(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->i[0] = -src->i[0];
870 dst->i[1] = -src->i[1];
871 dst->i[2] = -src->i[2];
872 dst->i[3] = -src->i[3];
873 }
874 #endif
875
876 static void
877 micro_not(
878 union tgsi_exec_channel *dst,
879 const union tgsi_exec_channel *src )
880 {
881 dst->u[0] = ~src->u[0];
882 dst->u[1] = ~src->u[1];
883 dst->u[2] = ~src->u[2];
884 dst->u[3] = ~src->u[3];
885 }
886
887 static void
888 micro_or(
889 union tgsi_exec_channel *dst,
890 const union tgsi_exec_channel *src0,
891 const union tgsi_exec_channel *src1 )
892 {
893 dst->u[0] = src0->u[0] | src1->u[0];
894 dst->u[1] = src0->u[1] | src1->u[1];
895 dst->u[2] = src0->u[2] | src1->u[2];
896 dst->u[3] = src0->u[3] | src1->u[3];
897 }
898
899 static void
900 micro_pow(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0,
903 const union tgsi_exec_channel *src1 )
904 {
905 #if FAST_MATH
906 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
907 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
908 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
909 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
910 #else
911 dst->f[0] = powf( src0->f[0], src1->f[0] );
912 dst->f[1] = powf( src0->f[1], src1->f[1] );
913 dst->f[2] = powf( src0->f[2], src1->f[2] );
914 dst->f[3] = powf( src0->f[3], src1->f[3] );
915 #endif
916 }
917
918 static void
919 micro_rnd(
920 union tgsi_exec_channel *dst,
921 const union tgsi_exec_channel *src )
922 {
923 dst->f[0] = floorf( src->f[0] + 0.5f );
924 dst->f[1] = floorf( src->f[1] + 0.5f );
925 dst->f[2] = floorf( src->f[2] + 0.5f );
926 dst->f[3] = floorf( src->f[3] + 0.5f );
927 }
928
929 static void
930 micro_sgn(
931 union tgsi_exec_channel *dst,
932 const union tgsi_exec_channel *src )
933 {
934 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
935 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
936 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
937 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
938 }
939
940 static void
941 micro_shl(
942 union tgsi_exec_channel *dst,
943 const union tgsi_exec_channel *src0,
944 const union tgsi_exec_channel *src1 )
945 {
946 dst->i[0] = src0->i[0] << src1->i[0];
947 dst->i[1] = src0->i[1] << src1->i[1];
948 dst->i[2] = src0->i[2] << src1->i[2];
949 dst->i[3] = src0->i[3] << src1->i[3];
950 }
951
952 static void
953 micro_ishr(
954 union tgsi_exec_channel *dst,
955 const union tgsi_exec_channel *src0,
956 const union tgsi_exec_channel *src1 )
957 {
958 dst->i[0] = src0->i[0] >> src1->i[0];
959 dst->i[1] = src0->i[1] >> src1->i[1];
960 dst->i[2] = src0->i[2] >> src1->i[2];
961 dst->i[3] = src0->i[3] >> src1->i[3];
962 }
963
964 static void
965 micro_trunc(
966 union tgsi_exec_channel *dst,
967 const union tgsi_exec_channel *src0 )
968 {
969 dst->f[0] = (float) (int) src0->f[0];
970 dst->f[1] = (float) (int) src0->f[1];
971 dst->f[2] = (float) (int) src0->f[2];
972 dst->f[3] = (float) (int) src0->f[3];
973 }
974
975 #if 0
976 static void
977 micro_ushr(
978 union tgsi_exec_channel *dst,
979 const union tgsi_exec_channel *src0,
980 const union tgsi_exec_channel *src1 )
981 {
982 dst->u[0] = src0->u[0] >> src1->u[0];
983 dst->u[1] = src0->u[1] >> src1->u[1];
984 dst->u[2] = src0->u[2] >> src1->u[2];
985 dst->u[3] = src0->u[3] >> src1->u[3];
986 }
987 #endif
988
989 static void
990 micro_sin(
991 union tgsi_exec_channel *dst,
992 const union tgsi_exec_channel *src )
993 {
994 dst->f[0] = sinf( src->f[0] );
995 dst->f[1] = sinf( src->f[1] );
996 dst->f[2] = sinf( src->f[2] );
997 dst->f[3] = sinf( src->f[3] );
998 }
999
1000 static void
1001 micro_sqrt( union tgsi_exec_channel *dst,
1002 const union tgsi_exec_channel *src )
1003 {
1004 dst->f[0] = sqrtf( src->f[0] );
1005 dst->f[1] = sqrtf( src->f[1] );
1006 dst->f[2] = sqrtf( src->f[2] );
1007 dst->f[3] = sqrtf( src->f[3] );
1008 }
1009
1010 static void
1011 micro_sub(
1012 union tgsi_exec_channel *dst,
1013 const union tgsi_exec_channel *src0,
1014 const union tgsi_exec_channel *src1 )
1015 {
1016 dst->f[0] = src0->f[0] - src1->f[0];
1017 dst->f[1] = src0->f[1] - src1->f[1];
1018 dst->f[2] = src0->f[2] - src1->f[2];
1019 dst->f[3] = src0->f[3] - src1->f[3];
1020 }
1021
1022 #if 0
1023 static void
1024 micro_u2f(
1025 union tgsi_exec_channel *dst,
1026 const union tgsi_exec_channel *src )
1027 {
1028 dst->f[0] = (float) src->u[0];
1029 dst->f[1] = (float) src->u[1];
1030 dst->f[2] = (float) src->u[2];
1031 dst->f[3] = (float) src->u[3];
1032 }
1033 #endif
1034
1035 static void
1036 micro_xor(
1037 union tgsi_exec_channel *dst,
1038 const union tgsi_exec_channel *src0,
1039 const union tgsi_exec_channel *src1 )
1040 {
1041 dst->u[0] = src0->u[0] ^ src1->u[0];
1042 dst->u[1] = src0->u[1] ^ src1->u[1];
1043 dst->u[2] = src0->u[2] ^ src1->u[2];
1044 dst->u[3] = src0->u[3] ^ src1->u[3];
1045 }
1046
1047 static void
1048 fetch_src_file_channel(
1049 const struct tgsi_exec_machine *mach,
1050 const uint file,
1051 const uint swizzle,
1052 const union tgsi_exec_channel *index,
1053 union tgsi_exec_channel *chan )
1054 {
1055 switch( swizzle ) {
1056 case TGSI_EXTSWIZZLE_X:
1057 case TGSI_EXTSWIZZLE_Y:
1058 case TGSI_EXTSWIZZLE_Z:
1059 case TGSI_EXTSWIZZLE_W:
1060 switch( file ) {
1061 case TGSI_FILE_CONSTANT:
1062 assert(mach->Consts);
1063 if (index->i[0] < 0)
1064 chan->f[0] = 0.0f;
1065 else
1066 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1067 if (index->i[1] < 0)
1068 chan->f[1] = 0.0f;
1069 else
1070 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1071 if (index->i[2] < 0)
1072 chan->f[2] = 0.0f;
1073 else
1074 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1075 if (index->i[3] < 0)
1076 chan->f[3] = 0.0f;
1077 else
1078 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1079 break;
1080
1081 case TGSI_FILE_INPUT:
1082 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1083 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1084 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1085 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1086 break;
1087
1088 case TGSI_FILE_TEMPORARY:
1089 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1090 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1091 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1092 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1093 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1094 break;
1095
1096 case TGSI_FILE_IMMEDIATE:
1097 assert( index->i[0] < (int) mach->ImmLimit );
1098 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1099 assert( index->i[1] < (int) mach->ImmLimit );
1100 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1101 assert( index->i[2] < (int) mach->ImmLimit );
1102 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1103 assert( index->i[3] < (int) mach->ImmLimit );
1104 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1105 break;
1106
1107 case TGSI_FILE_ADDRESS:
1108 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1109 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1110 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1111 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1112 break;
1113
1114 case TGSI_FILE_OUTPUT:
1115 /* vertex/fragment output vars can be read too */
1116 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1117 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1118 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1119 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1120 break;
1121
1122 default:
1123 assert( 0 );
1124 }
1125 break;
1126
1127 case TGSI_EXTSWIZZLE_ZERO:
1128 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1129 break;
1130
1131 case TGSI_EXTSWIZZLE_ONE:
1132 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1133 break;
1134
1135 default:
1136 assert( 0 );
1137 }
1138 }
1139
1140 static void
1141 fetch_source(
1142 const struct tgsi_exec_machine *mach,
1143 union tgsi_exec_channel *chan,
1144 const struct tgsi_full_src_register *reg,
1145 const uint chan_index )
1146 {
1147 union tgsi_exec_channel index;
1148 uint swizzle;
1149
1150 /* We start with a direct index into a register file.
1151 *
1152 * file[1],
1153 * where:
1154 * file = SrcRegister.File
1155 * [1] = SrcRegister.Index
1156 */
1157 index.i[0] =
1158 index.i[1] =
1159 index.i[2] =
1160 index.i[3] = reg->SrcRegister.Index;
1161
1162 /* There is an extra source register that indirectly subscripts
1163 * a register file. The direct index now becomes an offset
1164 * that is being added to the indirect register.
1165 *
1166 * file[ind[2].x+1],
1167 * where:
1168 * ind = SrcRegisterInd.File
1169 * [2] = SrcRegisterInd.Index
1170 * .x = SrcRegisterInd.SwizzleX
1171 */
1172 if (reg->SrcRegister.Indirect) {
1173 union tgsi_exec_channel index2;
1174 union tgsi_exec_channel indir_index;
1175 const uint execmask = mach->ExecMask;
1176 uint i;
1177
1178 /* which address register (always zero now) */
1179 index2.i[0] =
1180 index2.i[1] =
1181 index2.i[2] =
1182 index2.i[3] = reg->SrcRegisterInd.Index;
1183
1184 /* get current value of address register[swizzle] */
1185 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1186 fetch_src_file_channel(
1187 mach,
1188 reg->SrcRegisterInd.File,
1189 swizzle,
1190 &index2,
1191 &indir_index );
1192
1193 /* add value of address register to the offset */
1194 index.i[0] += (int) indir_index.f[0];
1195 index.i[1] += (int) indir_index.f[1];
1196 index.i[2] += (int) indir_index.f[2];
1197 index.i[3] += (int) indir_index.f[3];
1198
1199 /* for disabled execution channels, zero-out the index to
1200 * avoid using a potential garbage value.
1201 */
1202 for (i = 0; i < QUAD_SIZE; i++) {
1203 if ((execmask & (1 << i)) == 0)
1204 index.i[i] = 0;
1205 }
1206 }
1207
1208 /* There is an extra source register that is a second
1209 * subscript to a register file. Effectively it means that
1210 * the register file is actually a 2D array of registers.
1211 *
1212 * file[1][3] == file[1*sizeof(file[1])+3],
1213 * where:
1214 * [3] = SrcRegisterDim.Index
1215 */
1216 if (reg->SrcRegister.Dimension) {
1217 /* The size of the first-order array depends on the register file type.
1218 * We need to multiply the index to the first array to get an effective,
1219 * "flat" index that points to the beginning of the second-order array.
1220 */
1221 switch (reg->SrcRegister.File) {
1222 case TGSI_FILE_INPUT:
1223 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1224 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1225 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1226 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1227 break;
1228 case TGSI_FILE_CONSTANT:
1229 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1230 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1231 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1232 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1233 break;
1234 default:
1235 assert( 0 );
1236 }
1237
1238 index.i[0] += reg->SrcRegisterDim.Index;
1239 index.i[1] += reg->SrcRegisterDim.Index;
1240 index.i[2] += reg->SrcRegisterDim.Index;
1241 index.i[3] += reg->SrcRegisterDim.Index;
1242
1243 /* Again, the second subscript index can be addressed indirectly
1244 * identically to the first one.
1245 * Nothing stops us from indirectly addressing the indirect register,
1246 * but there is no need for that, so we won't exercise it.
1247 *
1248 * file[1][ind[4].y+3],
1249 * where:
1250 * ind = SrcRegisterDimInd.File
1251 * [4] = SrcRegisterDimInd.Index
1252 * .y = SrcRegisterDimInd.SwizzleX
1253 */
1254 if (reg->SrcRegisterDim.Indirect) {
1255 union tgsi_exec_channel index2;
1256 union tgsi_exec_channel indir_index;
1257 const uint execmask = mach->ExecMask;
1258 uint i;
1259
1260 index2.i[0] =
1261 index2.i[1] =
1262 index2.i[2] =
1263 index2.i[3] = reg->SrcRegisterDimInd.Index;
1264
1265 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1266 fetch_src_file_channel(
1267 mach,
1268 reg->SrcRegisterDimInd.File,
1269 swizzle,
1270 &index2,
1271 &indir_index );
1272
1273 index.i[0] += (int) indir_index.f[0];
1274 index.i[1] += (int) indir_index.f[1];
1275 index.i[2] += (int) indir_index.f[2];
1276 index.i[3] += (int) indir_index.f[3];
1277
1278 /* for disabled execution channels, zero-out the index to
1279 * avoid using a potential garbage value.
1280 */
1281 for (i = 0; i < QUAD_SIZE; i++) {
1282 if ((execmask & (1 << i)) == 0)
1283 index.i[i] = 0;
1284 }
1285 }
1286
1287 /* If by any chance there was a need for a 3D array of register
1288 * files, we would have to check whether SrcRegisterDim is followed
1289 * by a dimension register and continue the saga.
1290 */
1291 }
1292
1293 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1294 fetch_src_file_channel(
1295 mach,
1296 reg->SrcRegister.File,
1297 swizzle,
1298 &index,
1299 chan );
1300
1301 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1302 case TGSI_UTIL_SIGN_CLEAR:
1303 micro_abs( chan, chan );
1304 break;
1305
1306 case TGSI_UTIL_SIGN_SET:
1307 micro_abs( chan, chan );
1308 micro_neg( chan, chan );
1309 break;
1310
1311 case TGSI_UTIL_SIGN_TOGGLE:
1312 micro_neg( chan, chan );
1313 break;
1314
1315 case TGSI_UTIL_SIGN_KEEP:
1316 break;
1317 }
1318
1319 if (reg->SrcRegisterExtMod.Complement) {
1320 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1321 }
1322 }
1323
1324 static void
1325 store_dest(
1326 struct tgsi_exec_machine *mach,
1327 const union tgsi_exec_channel *chan,
1328 const struct tgsi_full_dst_register *reg,
1329 const struct tgsi_full_instruction *inst,
1330 uint chan_index )
1331 {
1332 uint i;
1333 union tgsi_exec_channel null;
1334 union tgsi_exec_channel *dst;
1335 uint execmask = mach->ExecMask;
1336
1337 #ifdef DEBUG
1338 check_inf_or_nan(chan);
1339 #endif
1340
1341 switch (reg->DstRegister.File) {
1342 case TGSI_FILE_NULL:
1343 dst = &null;
1344 break;
1345
1346 case TGSI_FILE_OUTPUT:
1347 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1348 + reg->DstRegister.Index].xyzw[chan_index];
1349 break;
1350
1351 case TGSI_FILE_TEMPORARY:
1352 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1353 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1354 break;
1355
1356 case TGSI_FILE_ADDRESS:
1357 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1358 break;
1359
1360 default:
1361 assert( 0 );
1362 return;
1363 }
1364
1365 if (inst->InstructionExtNv.CondFlowEnable) {
1366 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1367 uint swizzle;
1368 uint shift;
1369 uint mask;
1370 uint test;
1371
1372 /* Only CC0 supported.
1373 */
1374 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1375
1376 switch (chan_index) {
1377 case CHAN_X:
1378 swizzle = inst->InstructionExtNv.CondSwizzleX;
1379 break;
1380 case CHAN_Y:
1381 swizzle = inst->InstructionExtNv.CondSwizzleY;
1382 break;
1383 case CHAN_Z:
1384 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1385 break;
1386 case CHAN_W:
1387 swizzle = inst->InstructionExtNv.CondSwizzleW;
1388 break;
1389 default:
1390 assert( 0 );
1391 return;
1392 }
1393
1394 switch (swizzle) {
1395 case TGSI_SWIZZLE_X:
1396 shift = TGSI_EXEC_CC_X_SHIFT;
1397 mask = TGSI_EXEC_CC_X_MASK;
1398 break;
1399 case TGSI_SWIZZLE_Y:
1400 shift = TGSI_EXEC_CC_Y_SHIFT;
1401 mask = TGSI_EXEC_CC_Y_MASK;
1402 break;
1403 case TGSI_SWIZZLE_Z:
1404 shift = TGSI_EXEC_CC_Z_SHIFT;
1405 mask = TGSI_EXEC_CC_Z_MASK;
1406 break;
1407 case TGSI_SWIZZLE_W:
1408 shift = TGSI_EXEC_CC_W_SHIFT;
1409 mask = TGSI_EXEC_CC_W_MASK;
1410 break;
1411 default:
1412 assert( 0 );
1413 return;
1414 }
1415
1416 switch (inst->InstructionExtNv.CondMask) {
1417 case TGSI_CC_GT:
1418 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1419 for (i = 0; i < QUAD_SIZE; i++)
1420 if (cc->u[i] & test)
1421 execmask &= ~(1 << i);
1422 break;
1423
1424 case TGSI_CC_EQ:
1425 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1426 for (i = 0; i < QUAD_SIZE; i++)
1427 if (cc->u[i] & test)
1428 execmask &= ~(1 << i);
1429 break;
1430
1431 case TGSI_CC_LT:
1432 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1433 for (i = 0; i < QUAD_SIZE; i++)
1434 if (cc->u[i] & test)
1435 execmask &= ~(1 << i);
1436 break;
1437
1438 case TGSI_CC_GE:
1439 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1440 for (i = 0; i < QUAD_SIZE; i++)
1441 if (cc->u[i] & test)
1442 execmask &= ~(1 << i);
1443 break;
1444
1445 case TGSI_CC_LE:
1446 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1447 for (i = 0; i < QUAD_SIZE; i++)
1448 if (cc->u[i] & test)
1449 execmask &= ~(1 << i);
1450 break;
1451
1452 case TGSI_CC_NE:
1453 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1454 for (i = 0; i < QUAD_SIZE; i++)
1455 if (cc->u[i] & test)
1456 execmask &= ~(1 << i);
1457 break;
1458
1459 case TGSI_CC_TR:
1460 break;
1461
1462 case TGSI_CC_FL:
1463 for (i = 0; i < QUAD_SIZE; i++)
1464 execmask &= ~(1 << i);
1465 break;
1466
1467 default:
1468 assert( 0 );
1469 return;
1470 }
1471 }
1472
1473 switch (inst->Instruction.Saturate) {
1474 case TGSI_SAT_NONE:
1475 for (i = 0; i < QUAD_SIZE; i++)
1476 if (execmask & (1 << i))
1477 dst->i[i] = chan->i[i];
1478 break;
1479
1480 case TGSI_SAT_ZERO_ONE:
1481 for (i = 0; i < QUAD_SIZE; i++)
1482 if (execmask & (1 << i)) {
1483 if (chan->f[i] < 0.0f)
1484 dst->f[i] = 0.0f;
1485 else if (chan->f[i] > 1.0f)
1486 dst->f[i] = 1.0f;
1487 else
1488 dst->i[i] = chan->i[i];
1489 }
1490 break;
1491
1492 case TGSI_SAT_MINUS_PLUS_ONE:
1493 for (i = 0; i < QUAD_SIZE; i++)
1494 if (execmask & (1 << i)) {
1495 if (chan->f[i] < -1.0f)
1496 dst->f[i] = -1.0f;
1497 else if (chan->f[i] > 1.0f)
1498 dst->f[i] = 1.0f;
1499 else
1500 dst->i[i] = chan->i[i];
1501 }
1502 break;
1503
1504 default:
1505 assert( 0 );
1506 }
1507
1508 if (inst->InstructionExtNv.CondDstUpdate) {
1509 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1510 uint shift;
1511 uint mask;
1512
1513 /* Only CC0 supported.
1514 */
1515 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1516
1517 switch (chan_index) {
1518 case CHAN_X:
1519 shift = TGSI_EXEC_CC_X_SHIFT;
1520 mask = ~TGSI_EXEC_CC_X_MASK;
1521 break;
1522 case CHAN_Y:
1523 shift = TGSI_EXEC_CC_Y_SHIFT;
1524 mask = ~TGSI_EXEC_CC_Y_MASK;
1525 break;
1526 case CHAN_Z:
1527 shift = TGSI_EXEC_CC_Z_SHIFT;
1528 mask = ~TGSI_EXEC_CC_Z_MASK;
1529 break;
1530 case CHAN_W:
1531 shift = TGSI_EXEC_CC_W_SHIFT;
1532 mask = ~TGSI_EXEC_CC_W_MASK;
1533 break;
1534 default:
1535 assert( 0 );
1536 return;
1537 }
1538
1539 for (i = 0; i < QUAD_SIZE; i++)
1540 if (execmask & (1 << i)) {
1541 cc->u[i] &= mask;
1542 if (dst->f[i] < 0.0f)
1543 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1544 else if (dst->f[i] > 0.0f)
1545 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1546 else if (dst->f[i] == 0.0f)
1547 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1548 else
1549 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1550 }
1551 }
1552 }
1553
1554 #define FETCH(VAL,INDEX,CHAN)\
1555 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1556
1557 #define STORE(VAL,INDEX,CHAN)\
1558 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1559
1560
1561 /**
1562 * Execute ARB-style KIL which is predicated by a src register.
1563 * Kill fragment if any of the four values is less than zero.
1564 */
1565 static void
1566 exec_kil(struct tgsi_exec_machine *mach,
1567 const struct tgsi_full_instruction *inst)
1568 {
1569 uint uniquemask;
1570 uint chan_index;
1571 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1572 union tgsi_exec_channel r[1];
1573
1574 /* This mask stores component bits that were already tested. Note that
1575 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1576 * tested. */
1577 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1578
1579 for (chan_index = 0; chan_index < 4; chan_index++)
1580 {
1581 uint swizzle;
1582 uint i;
1583
1584 /* unswizzle channel */
1585 swizzle = tgsi_util_get_full_src_register_extswizzle (
1586 &inst->FullSrcRegisters[0],
1587 chan_index);
1588
1589 /* check if the component has not been already tested */
1590 if (uniquemask & (1 << swizzle))
1591 continue;
1592 uniquemask |= 1 << swizzle;
1593
1594 FETCH(&r[0], 0, chan_index);
1595 for (i = 0; i < 4; i++)
1596 if (r[0].f[i] < 0.0f)
1597 kilmask |= 1 << i;
1598 }
1599
1600 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1601 }
1602
1603 /**
1604 * Execute NVIDIA-style KIL which is predicated by a condition code.
1605 * Kill fragment if the condition code is TRUE.
1606 */
1607 static void
1608 exec_kilp(struct tgsi_exec_machine *mach,
1609 const struct tgsi_full_instruction *inst)
1610 {
1611 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1612
1613 if (inst->InstructionExtNv.CondFlowEnable) {
1614 uint swizzle[4];
1615 uint chan_index;
1616
1617 kilmask = 0x0;
1618
1619 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1620 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1621 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1622 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1623
1624 for (chan_index = 0; chan_index < 4; chan_index++)
1625 {
1626 uint i;
1627
1628 for (i = 0; i < 4; i++) {
1629 /* TODO: evaluate the condition code */
1630 if (0)
1631 kilmask |= 1 << i;
1632 }
1633 }
1634 }
1635 else {
1636 /* "unconditional" kil */
1637 kilmask = mach->ExecMask;
1638 }
1639 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1640 }
1641
1642
1643 /*
1644 * Fetch a four texture samples using STR texture coordinates.
1645 */
1646 static void
1647 fetch_texel( struct tgsi_sampler *sampler,
1648 const union tgsi_exec_channel *s,
1649 const union tgsi_exec_channel *t,
1650 const union tgsi_exec_channel *p,
1651 float lodbias, /* XXX should be float[4] */
1652 union tgsi_exec_channel *r,
1653 union tgsi_exec_channel *g,
1654 union tgsi_exec_channel *b,
1655 union tgsi_exec_channel *a )
1656 {
1657 uint j;
1658 float rgba[NUM_CHANNELS][QUAD_SIZE];
1659
1660 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1661
1662 for (j = 0; j < 4; j++) {
1663 r->f[j] = rgba[0][j];
1664 g->f[j] = rgba[1][j];
1665 b->f[j] = rgba[2][j];
1666 a->f[j] = rgba[3][j];
1667 }
1668 }
1669
1670
1671 static void
1672 exec_tex(struct tgsi_exec_machine *mach,
1673 const struct tgsi_full_instruction *inst,
1674 boolean biasLod,
1675 boolean projected)
1676 {
1677 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1678 union tgsi_exec_channel r[4];
1679 uint chan_index;
1680 float lodBias;
1681
1682 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1683
1684 switch (inst->InstructionExtTexture.Texture) {
1685 case TGSI_TEXTURE_1D:
1686 case TGSI_TEXTURE_SHADOW1D:
1687
1688 FETCH(&r[0], 0, CHAN_X);
1689
1690 if (projected) {
1691 FETCH(&r[1], 0, CHAN_W);
1692 micro_div( &r[0], &r[0], &r[1] );
1693 }
1694
1695 if (biasLod) {
1696 FETCH(&r[1], 0, CHAN_W);
1697 lodBias = r[2].f[0];
1698 }
1699 else
1700 lodBias = 0.0;
1701
1702 fetch_texel(mach->Samplers[unit],
1703 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1704 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1705 break;
1706
1707 case TGSI_TEXTURE_2D:
1708 case TGSI_TEXTURE_RECT:
1709 case TGSI_TEXTURE_SHADOW2D:
1710 case TGSI_TEXTURE_SHADOWRECT:
1711
1712 FETCH(&r[0], 0, CHAN_X);
1713 FETCH(&r[1], 0, CHAN_Y);
1714 FETCH(&r[2], 0, CHAN_Z);
1715
1716 if (projected) {
1717 FETCH(&r[3], 0, CHAN_W);
1718 micro_div( &r[0], &r[0], &r[3] );
1719 micro_div( &r[1], &r[1], &r[3] );
1720 micro_div( &r[2], &r[2], &r[3] );
1721 }
1722
1723 if (biasLod) {
1724 FETCH(&r[3], 0, CHAN_W);
1725 lodBias = r[3].f[0];
1726 }
1727 else
1728 lodBias = 0.0;
1729
1730 fetch_texel(mach->Samplers[unit],
1731 &r[0], &r[1], &r[2], lodBias, /* inputs */
1732 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1733 break;
1734
1735 case TGSI_TEXTURE_3D:
1736 case TGSI_TEXTURE_CUBE:
1737
1738 FETCH(&r[0], 0, CHAN_X);
1739 FETCH(&r[1], 0, CHAN_Y);
1740 FETCH(&r[2], 0, CHAN_Z);
1741
1742 if (projected) {
1743 FETCH(&r[3], 0, CHAN_W);
1744 micro_div( &r[0], &r[0], &r[3] );
1745 micro_div( &r[1], &r[1], &r[3] );
1746 micro_div( &r[2], &r[2], &r[3] );
1747 }
1748
1749 if (biasLod) {
1750 FETCH(&r[3], 0, CHAN_W);
1751 lodBias = r[3].f[0];
1752 }
1753 else
1754 lodBias = 0.0;
1755
1756 fetch_texel(mach->Samplers[unit],
1757 &r[0], &r[1], &r[2], lodBias,
1758 &r[0], &r[1], &r[2], &r[3]);
1759 break;
1760
1761 default:
1762 assert (0);
1763 }
1764
1765 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1766 STORE( &r[chan_index], 0, chan_index );
1767 }
1768 }
1769
1770
1771 /**
1772 * Evaluate a constant-valued coefficient at the position of the
1773 * current quad.
1774 */
1775 static void
1776 eval_constant_coef(
1777 struct tgsi_exec_machine *mach,
1778 unsigned attrib,
1779 unsigned chan )
1780 {
1781 unsigned i;
1782
1783 for( i = 0; i < QUAD_SIZE; i++ ) {
1784 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1785 }
1786 }
1787
1788 /**
1789 * Evaluate a linear-valued coefficient at the position of the
1790 * current quad.
1791 */
1792 static void
1793 eval_linear_coef(
1794 struct tgsi_exec_machine *mach,
1795 unsigned attrib,
1796 unsigned chan )
1797 {
1798 const float x = mach->QuadPos.xyzw[0].f[0];
1799 const float y = mach->QuadPos.xyzw[1].f[0];
1800 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1801 const float dady = mach->InterpCoefs[attrib].dady[chan];
1802 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1803 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1804 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1805 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1806 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1807 }
1808
1809 /**
1810 * Evaluate a perspective-valued coefficient at the position of the
1811 * current quad.
1812 */
1813 static void
1814 eval_perspective_coef(
1815 struct tgsi_exec_machine *mach,
1816 unsigned attrib,
1817 unsigned chan )
1818 {
1819 const float x = mach->QuadPos.xyzw[0].f[0];
1820 const float y = mach->QuadPos.xyzw[1].f[0];
1821 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1822 const float dady = mach->InterpCoefs[attrib].dady[chan];
1823 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1824 const float *w = mach->QuadPos.xyzw[3].f;
1825 /* divide by W here */
1826 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1827 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1828 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1829 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1830 }
1831
1832
1833 typedef void (* eval_coef_func)(
1834 struct tgsi_exec_machine *mach,
1835 unsigned attrib,
1836 unsigned chan );
1837
1838 static void
1839 exec_declaration(
1840 struct tgsi_exec_machine *mach,
1841 const struct tgsi_full_declaration *decl )
1842 {
1843 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1844 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1845 unsigned first, last, mask;
1846 eval_coef_func eval;
1847
1848 first = decl->DeclarationRange.First;
1849 last = decl->DeclarationRange.Last;
1850 mask = decl->Declaration.UsageMask;
1851
1852 switch( decl->Declaration.Interpolate ) {
1853 case TGSI_INTERPOLATE_CONSTANT:
1854 eval = eval_constant_coef;
1855 break;
1856
1857 case TGSI_INTERPOLATE_LINEAR:
1858 eval = eval_linear_coef;
1859 break;
1860
1861 case TGSI_INTERPOLATE_PERSPECTIVE:
1862 eval = eval_perspective_coef;
1863 break;
1864
1865 default:
1866 eval = NULL;
1867 assert( 0 );
1868 }
1869
1870 if( mask == TGSI_WRITEMASK_XYZW ) {
1871 unsigned i, j;
1872
1873 for( i = first; i <= last; i++ ) {
1874 for( j = 0; j < NUM_CHANNELS; j++ ) {
1875 eval( mach, i, j );
1876 }
1877 }
1878 }
1879 else {
1880 unsigned i, j;
1881
1882 for( j = 0; j < NUM_CHANNELS; j++ ) {
1883 if( mask & (1 << j) ) {
1884 for( i = first; i <= last; i++ ) {
1885 eval( mach, i, j );
1886 }
1887 }
1888 }
1889 }
1890 }
1891 }
1892 }
1893
1894 static void
1895 exec_instruction(
1896 struct tgsi_exec_machine *mach,
1897 const struct tgsi_full_instruction *inst,
1898 int *pc )
1899 {
1900 uint chan_index;
1901 union tgsi_exec_channel r[10];
1902
1903 (*pc)++;
1904
1905 switch (inst->Instruction.Opcode) {
1906 case TGSI_OPCODE_ARL:
1907 case TGSI_OPCODE_FLOOR:
1908 /* TGSI_OPCODE_FLR */
1909 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1910 FETCH( &r[0], 0, chan_index );
1911 micro_flr( &r[0], &r[0] );
1912 STORE( &r[0], 0, chan_index );
1913 }
1914 break;
1915
1916 case TGSI_OPCODE_MOV:
1917 case TGSI_OPCODE_SWZ:
1918 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1919 FETCH( &r[0], 0, chan_index );
1920 STORE( &r[0], 0, chan_index );
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_LIT:
1925 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1926 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1927 }
1928
1929 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1930 FETCH( &r[0], 0, CHAN_X );
1931 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1932 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1933 STORE( &r[0], 0, CHAN_Y );
1934 }
1935
1936 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1937 FETCH( &r[1], 0, CHAN_Y );
1938 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1939
1940 FETCH( &r[2], 0, CHAN_W );
1941 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1942 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1943 micro_pow( &r[1], &r[1], &r[2] );
1944 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1945 STORE( &r[0], 0, CHAN_Z );
1946 }
1947 }
1948
1949 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1950 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1951 }
1952 break;
1953
1954 case TGSI_OPCODE_RCP:
1955 /* TGSI_OPCODE_RECIP */
1956 FETCH( &r[0], 0, CHAN_X );
1957 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1958 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1959 STORE( &r[0], 0, chan_index );
1960 }
1961 break;
1962
1963 case TGSI_OPCODE_RSQ:
1964 /* TGSI_OPCODE_RECIPSQRT */
1965 FETCH( &r[0], 0, CHAN_X );
1966 micro_abs( &r[0], &r[0] );
1967 micro_sqrt( &r[0], &r[0] );
1968 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1969 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1970 STORE( &r[0], 0, chan_index );
1971 }
1972 break;
1973
1974 case TGSI_OPCODE_EXP:
1975 FETCH( &r[0], 0, CHAN_X );
1976 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1977 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1978 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1979 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1980 }
1981 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1982 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1983 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1984 }
1985 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1986 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1987 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1988 }
1989 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1990 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1991 }
1992 break;
1993
1994 case TGSI_OPCODE_LOG:
1995 FETCH( &r[0], 0, CHAN_X );
1996 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1997 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1998 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1999 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2000 STORE( &r[0], 0, CHAN_X );
2001 }
2002 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2003 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2004 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2005 STORE( &r[0], 0, CHAN_Y );
2006 }
2007 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2008 STORE( &r[1], 0, CHAN_Z );
2009 }
2010 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2011 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2012 }
2013 break;
2014
2015 case TGSI_OPCODE_MUL:
2016 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
2017 {
2018 FETCH(&r[0], 0, chan_index);
2019 FETCH(&r[1], 1, chan_index);
2020
2021 micro_mul( &r[0], &r[0], &r[1] );
2022
2023 STORE(&r[0], 0, chan_index);
2024 }
2025 break;
2026
2027 case TGSI_OPCODE_ADD:
2028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2029 FETCH( &r[0], 0, chan_index );
2030 FETCH( &r[1], 1, chan_index );
2031 micro_add( &r[0], &r[0], &r[1] );
2032 STORE( &r[0], 0, chan_index );
2033 }
2034 break;
2035
2036 case TGSI_OPCODE_DP3:
2037 /* TGSI_OPCODE_DOT3 */
2038 FETCH( &r[0], 0, CHAN_X );
2039 FETCH( &r[1], 1, CHAN_X );
2040 micro_mul( &r[0], &r[0], &r[1] );
2041
2042 FETCH( &r[1], 0, CHAN_Y );
2043 FETCH( &r[2], 1, CHAN_Y );
2044 micro_mul( &r[1], &r[1], &r[2] );
2045 micro_add( &r[0], &r[0], &r[1] );
2046
2047 FETCH( &r[1], 0, CHAN_Z );
2048 FETCH( &r[2], 1, CHAN_Z );
2049 micro_mul( &r[1], &r[1], &r[2] );
2050 micro_add( &r[0], &r[0], &r[1] );
2051
2052 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2053 STORE( &r[0], 0, chan_index );
2054 }
2055 break;
2056
2057 case TGSI_OPCODE_DP4:
2058 /* TGSI_OPCODE_DOT4 */
2059 FETCH(&r[0], 0, CHAN_X);
2060 FETCH(&r[1], 1, CHAN_X);
2061
2062 micro_mul( &r[0], &r[0], &r[1] );
2063
2064 FETCH(&r[1], 0, CHAN_Y);
2065 FETCH(&r[2], 1, CHAN_Y);
2066
2067 micro_mul( &r[1], &r[1], &r[2] );
2068 micro_add( &r[0], &r[0], &r[1] );
2069
2070 FETCH(&r[1], 0, CHAN_Z);
2071 FETCH(&r[2], 1, CHAN_Z);
2072
2073 micro_mul( &r[1], &r[1], &r[2] );
2074 micro_add( &r[0], &r[0], &r[1] );
2075
2076 FETCH(&r[1], 0, CHAN_W);
2077 FETCH(&r[2], 1, CHAN_W);
2078
2079 micro_mul( &r[1], &r[1], &r[2] );
2080 micro_add( &r[0], &r[0], &r[1] );
2081
2082 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2083 STORE( &r[0], 0, chan_index );
2084 }
2085 break;
2086
2087 case TGSI_OPCODE_DST:
2088 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2089 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2090 }
2091
2092 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2093 FETCH( &r[0], 0, CHAN_Y );
2094 FETCH( &r[1], 1, CHAN_Y);
2095 micro_mul( &r[0], &r[0], &r[1] );
2096 STORE( &r[0], 0, CHAN_Y );
2097 }
2098
2099 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2100 FETCH( &r[0], 0, CHAN_Z );
2101 STORE( &r[0], 0, CHAN_Z );
2102 }
2103
2104 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2105 FETCH( &r[0], 1, CHAN_W );
2106 STORE( &r[0], 0, CHAN_W );
2107 }
2108 break;
2109
2110 case TGSI_OPCODE_MIN:
2111 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2112 FETCH(&r[0], 0, chan_index);
2113 FETCH(&r[1], 1, chan_index);
2114
2115 /* XXX use micro_min()?? */
2116 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2117
2118 STORE(&r[0], 0, chan_index);
2119 }
2120 break;
2121
2122 case TGSI_OPCODE_MAX:
2123 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2124 FETCH(&r[0], 0, chan_index);
2125 FETCH(&r[1], 1, chan_index);
2126
2127 /* XXX use micro_max()?? */
2128 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2129
2130 STORE(&r[0], 0, chan_index );
2131 }
2132 break;
2133
2134 case TGSI_OPCODE_SLT:
2135 /* TGSI_OPCODE_SETLT */
2136 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2137 FETCH( &r[0], 0, chan_index );
2138 FETCH( &r[1], 1, chan_index );
2139 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2140 STORE( &r[0], 0, chan_index );
2141 }
2142 break;
2143
2144 case TGSI_OPCODE_SGE:
2145 /* TGSI_OPCODE_SETGE */
2146 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2147 FETCH( &r[0], 0, chan_index );
2148 FETCH( &r[1], 1, chan_index );
2149 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2150 STORE( &r[0], 0, chan_index );
2151 }
2152 break;
2153
2154 case TGSI_OPCODE_MAD:
2155 /* TGSI_OPCODE_MADD */
2156 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2157 FETCH( &r[0], 0, chan_index );
2158 FETCH( &r[1], 1, chan_index );
2159 micro_mul( &r[0], &r[0], &r[1] );
2160 FETCH( &r[1], 2, chan_index );
2161 micro_add( &r[0], &r[0], &r[1] );
2162 STORE( &r[0], 0, chan_index );
2163 }
2164 break;
2165
2166 case TGSI_OPCODE_SUB:
2167 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2168 FETCH(&r[0], 0, chan_index);
2169 FETCH(&r[1], 1, chan_index);
2170
2171 micro_sub( &r[0], &r[0], &r[1] );
2172
2173 STORE(&r[0], 0, chan_index);
2174 }
2175 break;
2176
2177 case TGSI_OPCODE_LERP:
2178 /* TGSI_OPCODE_LRP */
2179 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2180 FETCH(&r[0], 0, chan_index);
2181 FETCH(&r[1], 1, chan_index);
2182 FETCH(&r[2], 2, chan_index);
2183
2184 micro_sub( &r[1], &r[1], &r[2] );
2185 micro_mul( &r[0], &r[0], &r[1] );
2186 micro_add( &r[0], &r[0], &r[2] );
2187
2188 STORE(&r[0], 0, chan_index);
2189 }
2190 break;
2191
2192 case TGSI_OPCODE_CND:
2193 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2194 FETCH(&r[0], 0, chan_index);
2195 FETCH(&r[1], 1, chan_index);
2196 FETCH(&r[2], 2, chan_index);
2197 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2198 STORE(&r[0], 0, chan_index);
2199 }
2200 break;
2201
2202 case TGSI_OPCODE_CND0:
2203 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2204 FETCH(&r[0], 0, chan_index);
2205 FETCH(&r[1], 1, chan_index);
2206 FETCH(&r[2], 2, chan_index);
2207 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2208 STORE(&r[0], 0, chan_index);
2209 }
2210 break;
2211
2212 case TGSI_OPCODE_DOT2ADD:
2213 /* TGSI_OPCODE_DP2A */
2214 FETCH( &r[0], 0, CHAN_X );
2215 FETCH( &r[1], 1, CHAN_X );
2216 micro_mul( &r[0], &r[0], &r[1] );
2217
2218 FETCH( &r[1], 0, CHAN_Y );
2219 FETCH( &r[2], 1, CHAN_Y );
2220 micro_mul( &r[1], &r[1], &r[2] );
2221 micro_add( &r[0], &r[0], &r[1] );
2222
2223 FETCH( &r[2], 2, CHAN_X );
2224 micro_add( &r[0], &r[0], &r[2] );
2225
2226 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2227 STORE( &r[0], 0, chan_index );
2228 }
2229 break;
2230
2231 case TGSI_OPCODE_INDEX:
2232 /* XXX: considered for removal */
2233 assert (0);
2234 break;
2235
2236 case TGSI_OPCODE_NEGATE:
2237 /* XXX: considered for removal */
2238 assert (0);
2239 break;
2240
2241 case TGSI_OPCODE_FRAC:
2242 /* TGSI_OPCODE_FRC */
2243 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2244 FETCH( &r[0], 0, chan_index );
2245 micro_frc( &r[0], &r[0] );
2246 STORE( &r[0], 0, chan_index );
2247 }
2248 break;
2249
2250 case TGSI_OPCODE_CLAMP:
2251 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2252 FETCH(&r[0], 0, chan_index);
2253 FETCH(&r[1], 1, chan_index);
2254 micro_max(&r[0], &r[0], &r[1]);
2255 FETCH(&r[1], 2, chan_index);
2256 micro_min(&r[0], &r[0], &r[1]);
2257 STORE(&r[0], 0, chan_index);
2258 }
2259 break;
2260
2261 case TGSI_OPCODE_ROUND:
2262 case TGSI_OPCODE_ARR:
2263 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2264 FETCH( &r[0], 0, chan_index );
2265 micro_rnd( &r[0], &r[0] );
2266 STORE( &r[0], 0, chan_index );
2267 }
2268 break;
2269
2270 case TGSI_OPCODE_EXPBASE2:
2271 /* TGSI_OPCODE_EX2 */
2272 FETCH(&r[0], 0, CHAN_X);
2273
2274 #if FAST_MATH
2275 micro_exp2( &r[0], &r[0] );
2276 #else
2277 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2278 #endif
2279
2280 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2281 STORE( &r[0], 0, chan_index );
2282 }
2283 break;
2284
2285 case TGSI_OPCODE_LOGBASE2:
2286 /* TGSI_OPCODE_LG2 */
2287 FETCH( &r[0], 0, CHAN_X );
2288 micro_lg2( &r[0], &r[0] );
2289 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2290 STORE( &r[0], 0, chan_index );
2291 }
2292 break;
2293
2294 case TGSI_OPCODE_POWER:
2295 /* TGSI_OPCODE_POW */
2296 FETCH(&r[0], 0, CHAN_X);
2297 FETCH(&r[1], 1, CHAN_X);
2298
2299 micro_pow( &r[0], &r[0], &r[1] );
2300
2301 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2302 STORE( &r[0], 0, chan_index );
2303 }
2304 break;
2305
2306 case TGSI_OPCODE_CROSSPRODUCT:
2307 /* TGSI_OPCODE_XPD */
2308 FETCH(&r[0], 0, CHAN_Y);
2309 FETCH(&r[1], 1, CHAN_Z);
2310
2311 micro_mul( &r[2], &r[0], &r[1] );
2312
2313 FETCH(&r[3], 0, CHAN_Z);
2314 FETCH(&r[4], 1, CHAN_Y);
2315
2316 micro_mul( &r[5], &r[3], &r[4] );
2317 micro_sub( &r[2], &r[2], &r[5] );
2318
2319 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2320 STORE( &r[2], 0, CHAN_X );
2321 }
2322
2323 FETCH(&r[2], 1, CHAN_X);
2324
2325 micro_mul( &r[3], &r[3], &r[2] );
2326
2327 FETCH(&r[5], 0, CHAN_X);
2328
2329 micro_mul( &r[1], &r[1], &r[5] );
2330 micro_sub( &r[3], &r[3], &r[1] );
2331
2332 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2333 STORE( &r[3], 0, CHAN_Y );
2334 }
2335
2336 micro_mul( &r[5], &r[5], &r[4] );
2337 micro_mul( &r[0], &r[0], &r[2] );
2338 micro_sub( &r[5], &r[5], &r[0] );
2339
2340 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2341 STORE( &r[5], 0, CHAN_Z );
2342 }
2343
2344 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2345 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2346 }
2347 break;
2348
2349 case TGSI_OPCODE_MULTIPLYMATRIX:
2350 /* XXX: considered for removal */
2351 assert (0);
2352 break;
2353
2354 case TGSI_OPCODE_ABS:
2355 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2356 FETCH(&r[0], 0, chan_index);
2357
2358 micro_abs( &r[0], &r[0] );
2359
2360 STORE(&r[0], 0, chan_index);
2361 }
2362 break;
2363
2364 case TGSI_OPCODE_RCC:
2365 FETCH(&r[0], 0, CHAN_X);
2366 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2367 micro_float_clamp(&r[0], &r[0]);
2368 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2369 STORE(&r[0], 0, chan_index);
2370 }
2371 break;
2372
2373 case TGSI_OPCODE_DPH:
2374 FETCH(&r[0], 0, CHAN_X);
2375 FETCH(&r[1], 1, CHAN_X);
2376
2377 micro_mul( &r[0], &r[0], &r[1] );
2378
2379 FETCH(&r[1], 0, CHAN_Y);
2380 FETCH(&r[2], 1, CHAN_Y);
2381
2382 micro_mul( &r[1], &r[1], &r[2] );
2383 micro_add( &r[0], &r[0], &r[1] );
2384
2385 FETCH(&r[1], 0, CHAN_Z);
2386 FETCH(&r[2], 1, CHAN_Z);
2387
2388 micro_mul( &r[1], &r[1], &r[2] );
2389 micro_add( &r[0], &r[0], &r[1] );
2390
2391 FETCH(&r[1], 1, CHAN_W);
2392
2393 micro_add( &r[0], &r[0], &r[1] );
2394
2395 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2396 STORE( &r[0], 0, chan_index );
2397 }
2398 break;
2399
2400 case TGSI_OPCODE_COS:
2401 FETCH(&r[0], 0, CHAN_X);
2402
2403 micro_cos( &r[0], &r[0] );
2404
2405 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2406 STORE( &r[0], 0, chan_index );
2407 }
2408 break;
2409
2410 case TGSI_OPCODE_DDX:
2411 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2412 FETCH( &r[0], 0, chan_index );
2413 micro_ddx( &r[0], &r[0] );
2414 STORE( &r[0], 0, chan_index );
2415 }
2416 break;
2417
2418 case TGSI_OPCODE_DDY:
2419 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2420 FETCH( &r[0], 0, chan_index );
2421 micro_ddy( &r[0], &r[0] );
2422 STORE( &r[0], 0, chan_index );
2423 }
2424 break;
2425
2426 case TGSI_OPCODE_KILP:
2427 exec_kilp (mach, inst);
2428 break;
2429
2430 case TGSI_OPCODE_KIL:
2431 exec_kil (mach, inst);
2432 break;
2433
2434 case TGSI_OPCODE_PK2H:
2435 assert (0);
2436 break;
2437
2438 case TGSI_OPCODE_PK2US:
2439 assert (0);
2440 break;
2441
2442 case TGSI_OPCODE_PK4B:
2443 assert (0);
2444 break;
2445
2446 case TGSI_OPCODE_PK4UB:
2447 assert (0);
2448 break;
2449
2450 case TGSI_OPCODE_RFL:
2451 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2452 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2453 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2454 /* r0 = dp3(src0, src0) */
2455 FETCH(&r[2], 0, CHAN_X);
2456 micro_mul(&r[0], &r[2], &r[2]);
2457 FETCH(&r[4], 0, CHAN_Y);
2458 micro_mul(&r[8], &r[4], &r[4]);
2459 micro_add(&r[0], &r[0], &r[8]);
2460 FETCH(&r[6], 0, CHAN_Z);
2461 micro_mul(&r[8], &r[6], &r[6]);
2462 micro_add(&r[0], &r[0], &r[8]);
2463
2464 /* r1 = dp3(src0, src1) */
2465 FETCH(&r[3], 1, CHAN_X);
2466 micro_mul(&r[1], &r[2], &r[3]);
2467 FETCH(&r[5], 1, CHAN_Y);
2468 micro_mul(&r[8], &r[4], &r[5]);
2469 micro_add(&r[1], &r[1], &r[8]);
2470 FETCH(&r[7], 1, CHAN_Z);
2471 micro_mul(&r[8], &r[6], &r[7]);
2472 micro_add(&r[1], &r[1], &r[8]);
2473
2474 /* r1 = 2 * r1 / r0 */
2475 micro_add(&r[1], &r[1], &r[1]);
2476 micro_div(&r[1], &r[1], &r[0]);
2477
2478 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2479 micro_mul(&r[2], &r[2], &r[1]);
2480 micro_sub(&r[2], &r[2], &r[3]);
2481 STORE(&r[2], 0, CHAN_X);
2482 }
2483 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2484 micro_mul(&r[4], &r[4], &r[1]);
2485 micro_sub(&r[4], &r[4], &r[5]);
2486 STORE(&r[4], 0, CHAN_Y);
2487 }
2488 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2489 micro_mul(&r[6], &r[6], &r[1]);
2490 micro_sub(&r[6], &r[6], &r[7]);
2491 STORE(&r[6], 0, CHAN_Z);
2492 }
2493 }
2494 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2495 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2496 }
2497 break;
2498
2499 case TGSI_OPCODE_SEQ:
2500 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2501 FETCH( &r[0], 0, chan_index );
2502 FETCH( &r[1], 1, chan_index );
2503 micro_eq( &r[0], &r[0], &r[1],
2504 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2505 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2506 STORE( &r[0], 0, chan_index );
2507 }
2508 break;
2509
2510 case TGSI_OPCODE_SFL:
2511 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2512 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2513 }
2514 break;
2515
2516 case TGSI_OPCODE_SGT:
2517 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2518 FETCH( &r[0], 0, chan_index );
2519 FETCH( &r[1], 1, chan_index );
2520 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2521 STORE( &r[0], 0, chan_index );
2522 }
2523 break;
2524
2525 case TGSI_OPCODE_SIN:
2526 FETCH( &r[0], 0, CHAN_X );
2527 micro_sin( &r[0], &r[0] );
2528 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2529 STORE( &r[0], 0, chan_index );
2530 }
2531 break;
2532
2533 case TGSI_OPCODE_SLE:
2534 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2535 FETCH( &r[0], 0, chan_index );
2536 FETCH( &r[1], 1, chan_index );
2537 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2538 STORE( &r[0], 0, chan_index );
2539 }
2540 break;
2541
2542 case TGSI_OPCODE_SNE:
2543 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2544 FETCH( &r[0], 0, chan_index );
2545 FETCH( &r[1], 1, chan_index );
2546 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2547 STORE( &r[0], 0, chan_index );
2548 }
2549 break;
2550
2551 case TGSI_OPCODE_STR:
2552 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2553 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2554 }
2555 break;
2556
2557 case TGSI_OPCODE_TEX:
2558 /* simple texture lookup */
2559 /* src[0] = texcoord */
2560 /* src[1] = sampler unit */
2561 exec_tex(mach, inst, FALSE, FALSE);
2562 break;
2563
2564 case TGSI_OPCODE_TXB:
2565 /* Texture lookup with lod bias */
2566 /* src[0] = texcoord (src[0].w = LOD bias) */
2567 /* src[1] = sampler unit */
2568 exec_tex(mach, inst, TRUE, FALSE);
2569 break;
2570
2571 case TGSI_OPCODE_TXD:
2572 /* Texture lookup with explict partial derivatives */
2573 /* src[0] = texcoord */
2574 /* src[1] = d[strq]/dx */
2575 /* src[2] = d[strq]/dy */
2576 /* src[3] = sampler unit */
2577 assert (0);
2578 break;
2579
2580 case TGSI_OPCODE_TXL:
2581 /* Texture lookup with explit LOD */
2582 /* src[0] = texcoord (src[0].w = LOD) */
2583 /* src[1] = sampler unit */
2584 exec_tex(mach, inst, TRUE, FALSE);
2585 break;
2586
2587 case TGSI_OPCODE_TXP:
2588 /* Texture lookup with projection */
2589 /* src[0] = texcoord (src[0].w = projection) */
2590 /* src[1] = sampler unit */
2591 exec_tex(mach, inst, FALSE, TRUE);
2592 break;
2593
2594 case TGSI_OPCODE_UP2H:
2595 assert (0);
2596 break;
2597
2598 case TGSI_OPCODE_UP2US:
2599 assert (0);
2600 break;
2601
2602 case TGSI_OPCODE_UP4B:
2603 assert (0);
2604 break;
2605
2606 case TGSI_OPCODE_UP4UB:
2607 assert (0);
2608 break;
2609
2610 case TGSI_OPCODE_X2D:
2611 FETCH(&r[0], 1, CHAN_X);
2612 FETCH(&r[1], 1, CHAN_Y);
2613 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2614 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2615 FETCH(&r[2], 2, CHAN_X);
2616 micro_mul(&r[2], &r[2], &r[0]);
2617 FETCH(&r[3], 2, CHAN_Y);
2618 micro_mul(&r[3], &r[3], &r[1]);
2619 micro_add(&r[2], &r[2], &r[3]);
2620 FETCH(&r[3], 0, CHAN_X);
2621 micro_add(&r[2], &r[2], &r[3]);
2622 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2623 STORE(&r[2], 0, CHAN_X);
2624 }
2625 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2626 STORE(&r[2], 0, CHAN_Z);
2627 }
2628 }
2629 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2630 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2631 FETCH(&r[2], 2, CHAN_Z);
2632 micro_mul(&r[2], &r[2], &r[0]);
2633 FETCH(&r[3], 2, CHAN_W);
2634 micro_mul(&r[3], &r[3], &r[1]);
2635 micro_add(&r[2], &r[2], &r[3]);
2636 FETCH(&r[3], 0, CHAN_Y);
2637 micro_add(&r[2], &r[2], &r[3]);
2638 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2639 STORE(&r[2], 0, CHAN_Y);
2640 }
2641 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2642 STORE(&r[2], 0, CHAN_W);
2643 }
2644 }
2645 break;
2646
2647 case TGSI_OPCODE_ARA:
2648 assert (0);
2649 break;
2650
2651 case TGSI_OPCODE_BRA:
2652 assert (0);
2653 break;
2654
2655 case TGSI_OPCODE_CAL:
2656 /* skip the call if no execution channels are enabled */
2657 if (mach->ExecMask) {
2658 /* do the call */
2659
2660 /* push the Cond, Loop, Cont stacks */
2661 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2662 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2663 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2664 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2665 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2666 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2667
2668 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2669 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2670
2671 /* note that PC was already incremented above */
2672 mach->CallStack[mach->CallStackTop++] = *pc;
2673 *pc = inst->InstructionExtLabel.Label;
2674 }
2675 break;
2676
2677 case TGSI_OPCODE_RET:
2678 mach->FuncMask &= ~mach->ExecMask;
2679 UPDATE_EXEC_MASK(mach);
2680
2681 if (mach->FuncMask == 0x0) {
2682 /* really return now (otherwise, keep executing */
2683
2684 if (mach->CallStackTop == 0) {
2685 /* returning from main() */
2686 *pc = -1;
2687 return;
2688 }
2689 *pc = mach->CallStack[--mach->CallStackTop];
2690
2691 /* pop the Cond, Loop, Cont stacks */
2692 assert(mach->CondStackTop > 0);
2693 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2694 assert(mach->LoopStackTop > 0);
2695 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2696 assert(mach->ContStackTop > 0);
2697 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2698 assert(mach->FuncStackTop > 0);
2699 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2700
2701 UPDATE_EXEC_MASK(mach);
2702 }
2703 break;
2704
2705 case TGSI_OPCODE_SSG:
2706 /* TGSI_OPCODE_SGN */
2707 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2708 FETCH( &r[0], 0, chan_index );
2709 micro_sgn( &r[0], &r[0] );
2710 STORE( &r[0], 0, chan_index );
2711 }
2712 break;
2713
2714 case TGSI_OPCODE_CMP:
2715 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2716 FETCH(&r[0], 0, chan_index);
2717 FETCH(&r[1], 1, chan_index);
2718 FETCH(&r[2], 2, chan_index);
2719
2720 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2721
2722 STORE(&r[0], 0, chan_index);
2723 }
2724 break;
2725
2726 case TGSI_OPCODE_SCS:
2727 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2728 FETCH( &r[0], 0, CHAN_X );
2729 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2730 micro_cos(&r[1], &r[0]);
2731 STORE(&r[1], 0, CHAN_X);
2732 }
2733 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2734 micro_sin(&r[1], &r[0]);
2735 STORE(&r[1], 0, CHAN_Y);
2736 }
2737 }
2738 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2739 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2740 }
2741 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2742 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2743 }
2744 break;
2745
2746 case TGSI_OPCODE_NRM:
2747 /* 3-component vector normalize */
2748 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2749 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2750 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2751 /* r3 = sqrt(dp3(src0, src0)) */
2752 FETCH(&r[0], 0, CHAN_X);
2753 micro_mul(&r[3], &r[0], &r[0]);
2754 FETCH(&r[1], 0, CHAN_Y);
2755 micro_mul(&r[4], &r[1], &r[1]);
2756 micro_add(&r[3], &r[3], &r[4]);
2757 FETCH(&r[2], 0, CHAN_Z);
2758 micro_mul(&r[4], &r[2], &r[2]);
2759 micro_add(&r[3], &r[3], &r[4]);
2760 micro_sqrt(&r[3], &r[3]);
2761
2762 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2763 micro_div(&r[0], &r[0], &r[3]);
2764 STORE(&r[0], 0, CHAN_X);
2765 }
2766 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2767 micro_div(&r[1], &r[1], &r[3]);
2768 STORE(&r[1], 0, CHAN_Y);
2769 }
2770 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2771 micro_div(&r[2], &r[2], &r[3]);
2772 STORE(&r[2], 0, CHAN_Z);
2773 }
2774 }
2775 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2776 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2777 }
2778 break;
2779
2780 case TGSI_OPCODE_NRM4:
2781 /* 4-component vector normalize */
2782 {
2783 union tgsi_exec_channel tmp, dot;
2784
2785 /* tmp = dp4(src0, src0): */
2786 FETCH( &r[0], 0, CHAN_X );
2787 micro_mul( &tmp, &r[0], &r[0] );
2788
2789 FETCH( &r[1], 0, CHAN_Y );
2790 micro_mul( &dot, &r[1], &r[1] );
2791 micro_add( &tmp, &tmp, &dot );
2792
2793 FETCH( &r[2], 0, CHAN_Z );
2794 micro_mul( &dot, &r[2], &r[2] );
2795 micro_add( &tmp, &tmp, &dot );
2796
2797 FETCH( &r[3], 0, CHAN_W );
2798 micro_mul( &dot, &r[3], &r[3] );
2799 micro_add( &tmp, &tmp, &dot );
2800
2801 /* tmp = 1 / sqrt(tmp) */
2802 micro_sqrt( &tmp, &tmp );
2803 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2804
2805 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2806 /* chan = chan * tmp */
2807 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2808 STORE( &r[chan_index], 0, chan_index );
2809 }
2810 }
2811 break;
2812
2813 case TGSI_OPCODE_DIV:
2814 assert( 0 );
2815 break;
2816
2817 case TGSI_OPCODE_DP2:
2818 FETCH( &r[0], 0, CHAN_X );
2819 FETCH( &r[1], 1, CHAN_X );
2820 micro_mul( &r[0], &r[0], &r[1] );
2821
2822 FETCH( &r[1], 0, CHAN_Y );
2823 FETCH( &r[2], 1, CHAN_Y );
2824 micro_mul( &r[1], &r[1], &r[2] );
2825 micro_add( &r[0], &r[0], &r[1] );
2826
2827 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2828 STORE( &r[0], 0, chan_index );
2829 }
2830 break;
2831
2832 case TGSI_OPCODE_IF:
2833 /* push CondMask */
2834 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2835 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2836 FETCH( &r[0], 0, CHAN_X );
2837 /* update CondMask */
2838 if( ! r[0].u[0] ) {
2839 mach->CondMask &= ~0x1;
2840 }
2841 if( ! r[0].u[1] ) {
2842 mach->CondMask &= ~0x2;
2843 }
2844 if( ! r[0].u[2] ) {
2845 mach->CondMask &= ~0x4;
2846 }
2847 if( ! r[0].u[3] ) {
2848 mach->CondMask &= ~0x8;
2849 }
2850 UPDATE_EXEC_MASK(mach);
2851 /* Todo: If CondMask==0, jump to ELSE */
2852 break;
2853
2854 case TGSI_OPCODE_ELSE:
2855 /* invert CondMask wrt previous mask */
2856 {
2857 uint prevMask;
2858 assert(mach->CondStackTop > 0);
2859 prevMask = mach->CondStack[mach->CondStackTop - 1];
2860 mach->CondMask = ~mach->CondMask & prevMask;
2861 UPDATE_EXEC_MASK(mach);
2862 /* Todo: If CondMask==0, jump to ENDIF */
2863 }
2864 break;
2865
2866 case TGSI_OPCODE_ENDIF:
2867 /* pop CondMask */
2868 assert(mach->CondStackTop > 0);
2869 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2870 UPDATE_EXEC_MASK(mach);
2871 break;
2872
2873 case TGSI_OPCODE_END:
2874 /* halt execution */
2875 *pc = -1;
2876 break;
2877
2878 case TGSI_OPCODE_REP:
2879 assert (0);
2880 break;
2881
2882 case TGSI_OPCODE_ENDREP:
2883 assert (0);
2884 break;
2885
2886 case TGSI_OPCODE_PUSHA:
2887 assert (0);
2888 break;
2889
2890 case TGSI_OPCODE_POPA:
2891 assert (0);
2892 break;
2893
2894 case TGSI_OPCODE_CEIL:
2895 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2896 FETCH( &r[0], 0, chan_index );
2897 micro_ceil( &r[0], &r[0] );
2898 STORE( &r[0], 0, chan_index );
2899 }
2900 break;
2901
2902 case TGSI_OPCODE_I2F:
2903 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2904 FETCH( &r[0], 0, chan_index );
2905 micro_i2f( &r[0], &r[0] );
2906 STORE( &r[0], 0, chan_index );
2907 }
2908 break;
2909
2910 case TGSI_OPCODE_NOT:
2911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2912 FETCH( &r[0], 0, chan_index );
2913 micro_not( &r[0], &r[0] );
2914 STORE( &r[0], 0, chan_index );
2915 }
2916 break;
2917
2918 case TGSI_OPCODE_TRUNC:
2919 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2920 FETCH( &r[0], 0, chan_index );
2921 micro_trunc( &r[0], &r[0] );
2922 STORE( &r[0], 0, chan_index );
2923 }
2924 break;
2925
2926 case TGSI_OPCODE_SHL:
2927 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2928 FETCH( &r[0], 0, chan_index );
2929 FETCH( &r[1], 1, chan_index );
2930 micro_shl( &r[0], &r[0], &r[1] );
2931 STORE( &r[0], 0, chan_index );
2932 }
2933 break;
2934
2935 case TGSI_OPCODE_SHR:
2936 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2937 FETCH( &r[0], 0, chan_index );
2938 FETCH( &r[1], 1, chan_index );
2939 micro_ishr( &r[0], &r[0], &r[1] );
2940 STORE( &r[0], 0, chan_index );
2941 }
2942 break;
2943
2944 case TGSI_OPCODE_AND:
2945 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2946 FETCH( &r[0], 0, chan_index );
2947 FETCH( &r[1], 1, chan_index );
2948 micro_and( &r[0], &r[0], &r[1] );
2949 STORE( &r[0], 0, chan_index );
2950 }
2951 break;
2952
2953 case TGSI_OPCODE_OR:
2954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2955 FETCH( &r[0], 0, chan_index );
2956 FETCH( &r[1], 1, chan_index );
2957 micro_or( &r[0], &r[0], &r[1] );
2958 STORE( &r[0], 0, chan_index );
2959 }
2960 break;
2961
2962 case TGSI_OPCODE_MOD:
2963 assert (0);
2964 break;
2965
2966 case TGSI_OPCODE_XOR:
2967 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2968 FETCH( &r[0], 0, chan_index );
2969 FETCH( &r[1], 1, chan_index );
2970 micro_xor( &r[0], &r[0], &r[1] );
2971 STORE( &r[0], 0, chan_index );
2972 }
2973 break;
2974
2975 case TGSI_OPCODE_SAD:
2976 assert (0);
2977 break;
2978
2979 case TGSI_OPCODE_TXF:
2980 assert (0);
2981 break;
2982
2983 case TGSI_OPCODE_TXQ:
2984 assert (0);
2985 break;
2986
2987 case TGSI_OPCODE_EMIT:
2988 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2989 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2990 break;
2991
2992 case TGSI_OPCODE_ENDPRIM:
2993 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2994 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2995 break;
2996
2997 case TGSI_OPCODE_LOOP:
2998 /* fall-through (for now) */
2999 case TGSI_OPCODE_BGNLOOP2:
3000 /* push LoopMask and ContMasks */
3001 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3002 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3003 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3004 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3005 break;
3006
3007 case TGSI_OPCODE_ENDLOOP:
3008 /* fall-through (for now at least) */
3009 case TGSI_OPCODE_ENDLOOP2:
3010 /* Restore ContMask, but don't pop */
3011 assert(mach->ContStackTop > 0);
3012 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3013 UPDATE_EXEC_MASK(mach);
3014 if (mach->ExecMask) {
3015 /* repeat loop: jump to instruction just past BGNLOOP */
3016 *pc = inst->InstructionExtLabel.Label + 1;
3017 }
3018 else {
3019 /* exit loop: pop LoopMask */
3020 assert(mach->LoopStackTop > 0);
3021 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3022 /* pop ContMask */
3023 assert(mach->ContStackTop > 0);
3024 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3025 }
3026 UPDATE_EXEC_MASK(mach);
3027 break;
3028
3029 case TGSI_OPCODE_BRK:
3030 /* turn off loop channels for each enabled exec channel */
3031 mach->LoopMask &= ~mach->ExecMask;
3032 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3033 UPDATE_EXEC_MASK(mach);
3034 break;
3035
3036 case TGSI_OPCODE_CONT:
3037 /* turn off cont channels for each enabled exec channel */
3038 mach->ContMask &= ~mach->ExecMask;
3039 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3040 UPDATE_EXEC_MASK(mach);
3041 break;
3042
3043 case TGSI_OPCODE_BGNSUB:
3044 /* no-op */
3045 break;
3046
3047 case TGSI_OPCODE_ENDSUB:
3048 /* no-op */
3049 break;
3050
3051 case TGSI_OPCODE_NOISE1:
3052 assert( 0 );
3053 break;
3054
3055 case TGSI_OPCODE_NOISE2:
3056 assert( 0 );
3057 break;
3058
3059 case TGSI_OPCODE_NOISE3:
3060 assert( 0 );
3061 break;
3062
3063 case TGSI_OPCODE_NOISE4:
3064 assert( 0 );
3065 break;
3066
3067 case TGSI_OPCODE_NOP:
3068 break;
3069
3070 default:
3071 assert( 0 );
3072 }
3073 }
3074
3075
3076 /**
3077 * Run TGSI interpreter.
3078 * \return bitmask of "alive" quad components
3079 */
3080 uint
3081 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3082 {
3083 uint i;
3084 int pc = 0;
3085
3086 mach->CondMask = 0xf;
3087 mach->LoopMask = 0xf;
3088 mach->ContMask = 0xf;
3089 mach->FuncMask = 0xf;
3090 mach->ExecMask = 0xf;
3091
3092 mach->CondStackTop = 0; /* temporarily subvert this assertion */
3093 assert(mach->CondStackTop == 0);
3094 assert(mach->LoopStackTop == 0);
3095 assert(mach->ContStackTop == 0);
3096 assert(mach->CallStackTop == 0);
3097
3098 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3099 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3100
3101 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3102 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3103 mach->Primitives[0] = 0;
3104 }
3105
3106 for (i = 0; i < QUAD_SIZE; i++) {
3107 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3108 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3109 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3110 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3111 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3112 }
3113
3114 /* execute declarations (interpolants) */
3115 for (i = 0; i < mach->NumDeclarations; i++) {
3116 exec_declaration( mach, mach->Declarations+i );
3117 }
3118
3119 /* execute instructions, until pc is set to -1 */
3120 while (pc != -1) {
3121 assert(pc < (int) mach->NumInstructions);
3122 exec_instruction( mach, mach->Instructions + pc, &pc );
3123 }
3124
3125 #if 0
3126 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3127 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3128 /*
3129 * Scale back depth component.
3130 */
3131 for (i = 0; i < 4; i++)
3132 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3133 }
3134 #endif
3135
3136 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3137 }