Merge remote branch 'origin/mesa_7_6_branch'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_dump.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60 #include "util/u_memory.h"
61 #include "util/u_math.h"
62
63 #define FAST_MATH 1
64
65 /** for tgsi_full_instruction::Flags */
66 #define SOA_DEPENDENCY_FLAG 0x1
67
68 #define TILE_TOP_LEFT 0
69 #define TILE_TOP_RIGHT 1
70 #define TILE_BOTTOM_LEFT 2
71 #define TILE_BOTTOM_RIGHT 3
72
73 #define CHAN_X 0
74 #define CHAN_Y 1
75 #define CHAN_Z 2
76 #define CHAN_W 3
77
78 /*
79 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
80 */
81 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
82 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
83 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
84 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
85 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
86 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
87 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
88 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
89 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
90 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
91 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
92 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
93 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
94 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
95 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
96 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
97 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
98 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
99 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
100 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
101 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
102 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
103 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
104 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
105 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
106 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
107 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
108 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
109 #define TEMP_R0 TGSI_EXEC_TEMP_R0
110
111 #define IS_CHANNEL_ENABLED(INST, CHAN)\
112 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
113
114 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
115 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
116
117 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED( INST, CHAN ))
120
121 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
122 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
123 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124
125
126 /** The execution mask depends on the conditional mask and the loop mask */
127 #define UPDATE_EXEC_MASK(MACH) \
128 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129
130
131 static const union tgsi_exec_channel ZeroVec =
132 { { 0.0, 0.0, 0.0, 0.0 } };
133
134
135 #ifdef DEBUG
136 static void
137 check_inf_or_nan(const union tgsi_exec_channel *chan)
138 {
139 assert(!util_is_inf_or_nan(chan->f[0]));
140 assert(!util_is_inf_or_nan(chan->f[1]));
141 assert(!util_is_inf_or_nan(chan->f[2]));
142 assert(!util_is_inf_or_nan(chan->f[3]));
143 }
144 #endif
145
146
147 #ifdef DEBUG
148 static void
149 print_chan(const char *msg, const union tgsi_exec_channel *chan)
150 {
151 debug_printf("%s = {%f, %f, %f, %f}\n",
152 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
153 }
154 #endif
155
156
157 #ifdef DEBUG
158 static void
159 print_temp(const struct tgsi_exec_machine *mach, uint index)
160 {
161 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
162 int i;
163 debug_printf("Temp[%u] =\n", index);
164 for (i = 0; i < 4; i++) {
165 debug_printf(" %c: { %f, %f, %f, %f }\n",
166 "XYZW"[i],
167 tmp->xyzw[i].f[0],
168 tmp->xyzw[i].f[1],
169 tmp->xyzw[i].f[2],
170 tmp->xyzw[i].f[3]);
171 }
172 }
173 #endif
174
175
176 /**
177 * Check if there's a potential src/dst register data dependency when
178 * using SOA execution.
179 * Example:
180 * MOV T, T.yxwz;
181 * This would expand into:
182 * MOV t0, t1;
183 * MOV t1, t0;
184 * MOV t2, t3;
185 * MOV t3, t2;
186 * The second instruction will have the wrong value for t0 if executed as-is.
187 */
188 boolean
189 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
190 {
191 uint i, chan;
192
193 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
194 if (writemask == TGSI_WRITEMASK_X ||
195 writemask == TGSI_WRITEMASK_Y ||
196 writemask == TGSI_WRITEMASK_Z ||
197 writemask == TGSI_WRITEMASK_W ||
198 writemask == TGSI_WRITEMASK_NONE) {
199 /* no chance of data dependency */
200 return FALSE;
201 }
202
203 /* loop over src regs */
204 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
205 if ((inst->FullSrcRegisters[i].SrcRegister.File ==
206 inst->FullDstRegisters[0].DstRegister.File) &&
207 (inst->FullSrcRegisters[i].SrcRegister.Index ==
208 inst->FullDstRegisters[0].DstRegister.Index)) {
209 /* loop over dest channels */
210 uint channelsWritten = 0x0;
211 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
212 /* check if we're reading a channel that's been written */
213 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
214 if (channelsWritten & (1 << swizzle)) {
215 return TRUE;
216 }
217
218 channelsWritten |= (1 << chan);
219 }
220 }
221 }
222 return FALSE;
223 }
224
225
226 /**
227 * Initialize machine state by expanding tokens to full instructions,
228 * allocating temporary storage, setting up constants, etc.
229 * After this, we can call tgsi_exec_machine_run() many times.
230 */
231 void
232 tgsi_exec_machine_bind_shader(
233 struct tgsi_exec_machine *mach,
234 const struct tgsi_token *tokens,
235 uint numSamplers,
236 struct tgsi_sampler **samplers)
237 {
238 uint k;
239 struct tgsi_parse_context parse;
240 struct tgsi_exec_labels *labels = &mach->Labels;
241 struct tgsi_full_instruction *instructions;
242 struct tgsi_full_declaration *declarations;
243 uint maxInstructions = 10, numInstructions = 0;
244 uint maxDeclarations = 10, numDeclarations = 0;
245 uint instno = 0;
246
247 #if 0
248 tgsi_dump(tokens, 0);
249 #endif
250
251 util_init_math();
252
253 mach->Tokens = tokens;
254 mach->Samplers = samplers;
255
256 k = tgsi_parse_init (&parse, mach->Tokens);
257 if (k != TGSI_PARSE_OK) {
258 debug_printf( "Problem parsing!\n" );
259 return;
260 }
261
262 mach->Processor = parse.FullHeader.Processor.Processor;
263 mach->ImmLimit = 0;
264 labels->count = 0;
265
266 declarations = (struct tgsi_full_declaration *)
267 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
268
269 if (!declarations) {
270 return;
271 }
272
273 instructions = (struct tgsi_full_instruction *)
274 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
275
276 if (!instructions) {
277 FREE( declarations );
278 return;
279 }
280
281 while( !tgsi_parse_end_of_tokens( &parse ) ) {
282 uint pointer = parse.Position;
283 uint i;
284
285 tgsi_parse_token( &parse );
286 switch( parse.FullToken.Token.Type ) {
287 case TGSI_TOKEN_TYPE_DECLARATION:
288 /* save expanded declaration */
289 if (numDeclarations == maxDeclarations) {
290 declarations = REALLOC(declarations,
291 maxDeclarations
292 * sizeof(struct tgsi_full_declaration),
293 (maxDeclarations + 10)
294 * sizeof(struct tgsi_full_declaration));
295 maxDeclarations += 10;
296 }
297 memcpy(declarations + numDeclarations,
298 &parse.FullToken.FullDeclaration,
299 sizeof(declarations[0]));
300 numDeclarations++;
301 break;
302
303 case TGSI_TOKEN_TYPE_IMMEDIATE:
304 {
305 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
306 assert( size <= 4 );
307 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
308
309 for( i = 0; i < size; i++ ) {
310 mach->Imms[mach->ImmLimit][i] =
311 parse.FullToken.FullImmediate.u[i].Float;
312 }
313 mach->ImmLimit += 1;
314 }
315 break;
316
317 case TGSI_TOKEN_TYPE_INSTRUCTION:
318 assert( labels->count < MAX_LABELS );
319
320 labels->labels[labels->count][0] = instno;
321 labels->labels[labels->count][1] = pointer;
322 labels->count++;
323
324 /* save expanded instruction */
325 if (numInstructions == maxInstructions) {
326 instructions = REALLOC(instructions,
327 maxInstructions
328 * sizeof(struct tgsi_full_instruction),
329 (maxInstructions + 10)
330 * sizeof(struct tgsi_full_instruction));
331 maxInstructions += 10;
332 }
333
334 if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
335 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
336 parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
337 /* XXX we only handle SOA dependencies properly for MOV/SWZ
338 * at this time!
339 */
340 if (opcode != TGSI_OPCODE_MOV) {
341 debug_printf("Warning: SOA dependency in instruction"
342 " is not handled:\n");
343 tgsi_dump_instruction(&parse.FullToken.FullInstruction,
344 numInstructions);
345 }
346 }
347
348 memcpy(instructions + numInstructions,
349 &parse.FullToken.FullInstruction,
350 sizeof(instructions[0]));
351
352 numInstructions++;
353 break;
354
355 default:
356 assert( 0 );
357 }
358 }
359 tgsi_parse_free (&parse);
360
361 if (mach->Declarations) {
362 FREE( mach->Declarations );
363 }
364 mach->Declarations = declarations;
365 mach->NumDeclarations = numDeclarations;
366
367 if (mach->Instructions) {
368 FREE( mach->Instructions );
369 }
370 mach->Instructions = instructions;
371 mach->NumInstructions = numInstructions;
372 }
373
374
375 struct tgsi_exec_machine *
376 tgsi_exec_machine_create( void )
377 {
378 struct tgsi_exec_machine *mach;
379 uint i;
380
381 mach = align_malloc( sizeof *mach, 16 );
382 if (!mach)
383 goto fail;
384
385 memset(mach, 0, sizeof(*mach));
386
387 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
388
389 /* Setup constants. */
390 for( i = 0; i < 4; i++ ) {
391 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
392 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
393 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
394 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
395 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
396 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
397 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
398 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
399 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
400 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
401 }
402
403 #ifdef DEBUG
404 /* silence warnings */
405 (void) print_chan;
406 (void) print_temp;
407 #endif
408
409 return mach;
410
411 fail:
412 align_free(mach);
413 return NULL;
414 }
415
416
417 void
418 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
419 {
420 if (mach) {
421 FREE(mach->Instructions);
422 FREE(mach->Declarations);
423 }
424
425 align_free(mach);
426 }
427
428
429 static void
430 micro_abs(
431 union tgsi_exec_channel *dst,
432 const union tgsi_exec_channel *src )
433 {
434 dst->f[0] = fabsf( src->f[0] );
435 dst->f[1] = fabsf( src->f[1] );
436 dst->f[2] = fabsf( src->f[2] );
437 dst->f[3] = fabsf( src->f[3] );
438 }
439
440 static void
441 micro_add(
442 union tgsi_exec_channel *dst,
443 const union tgsi_exec_channel *src0,
444 const union tgsi_exec_channel *src1 )
445 {
446 dst->f[0] = src0->f[0] + src1->f[0];
447 dst->f[1] = src0->f[1] + src1->f[1];
448 dst->f[2] = src0->f[2] + src1->f[2];
449 dst->f[3] = src0->f[3] + src1->f[3];
450 }
451
452 #if 0
453 static void
454 micro_iadd(
455 union tgsi_exec_channel *dst,
456 const union tgsi_exec_channel *src0,
457 const union tgsi_exec_channel *src1 )
458 {
459 dst->i[0] = src0->i[0] + src1->i[0];
460 dst->i[1] = src0->i[1] + src1->i[1];
461 dst->i[2] = src0->i[2] + src1->i[2];
462 dst->i[3] = src0->i[3] + src1->i[3];
463 }
464 #endif
465
466 static void
467 micro_and(
468 union tgsi_exec_channel *dst,
469 const union tgsi_exec_channel *src0,
470 const union tgsi_exec_channel *src1 )
471 {
472 dst->u[0] = src0->u[0] & src1->u[0];
473 dst->u[1] = src0->u[1] & src1->u[1];
474 dst->u[2] = src0->u[2] & src1->u[2];
475 dst->u[3] = src0->u[3] & src1->u[3];
476 }
477
478 static void
479 micro_ceil(
480 union tgsi_exec_channel *dst,
481 const union tgsi_exec_channel *src )
482 {
483 dst->f[0] = ceilf( src->f[0] );
484 dst->f[1] = ceilf( src->f[1] );
485 dst->f[2] = ceilf( src->f[2] );
486 dst->f[3] = ceilf( src->f[3] );
487 }
488
489 static void
490 micro_cos(
491 union tgsi_exec_channel *dst,
492 const union tgsi_exec_channel *src )
493 {
494 dst->f[0] = cosf( src->f[0] );
495 dst->f[1] = cosf( src->f[1] );
496 dst->f[2] = cosf( src->f[2] );
497 dst->f[3] = cosf( src->f[3] );
498 }
499
500 static void
501 micro_ddx(
502 union tgsi_exec_channel *dst,
503 const union tgsi_exec_channel *src )
504 {
505 dst->f[0] =
506 dst->f[1] =
507 dst->f[2] =
508 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
509 }
510
511 static void
512 micro_ddy(
513 union tgsi_exec_channel *dst,
514 const union tgsi_exec_channel *src )
515 {
516 dst->f[0] =
517 dst->f[1] =
518 dst->f[2] =
519 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
520 }
521
522 static void
523 micro_div(
524 union tgsi_exec_channel *dst,
525 const union tgsi_exec_channel *src0,
526 const union tgsi_exec_channel *src1 )
527 {
528 if (src1->f[0] != 0) {
529 dst->f[0] = src0->f[0] / src1->f[0];
530 }
531 if (src1->f[1] != 0) {
532 dst->f[1] = src0->f[1] / src1->f[1];
533 }
534 if (src1->f[2] != 0) {
535 dst->f[2] = src0->f[2] / src1->f[2];
536 }
537 if (src1->f[3] != 0) {
538 dst->f[3] = src0->f[3] / src1->f[3];
539 }
540 }
541
542 #if 0
543 static void
544 micro_udiv(
545 union tgsi_exec_channel *dst,
546 const union tgsi_exec_channel *src0,
547 const union tgsi_exec_channel *src1 )
548 {
549 dst->u[0] = src0->u[0] / src1->u[0];
550 dst->u[1] = src0->u[1] / src1->u[1];
551 dst->u[2] = src0->u[2] / src1->u[2];
552 dst->u[3] = src0->u[3] / src1->u[3];
553 }
554 #endif
555
556 static void
557 micro_eq(
558 union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src0,
560 const union tgsi_exec_channel *src1,
561 const union tgsi_exec_channel *src2,
562 const union tgsi_exec_channel *src3 )
563 {
564 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
565 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
566 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
567 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
568 }
569
570 #if 0
571 static void
572 micro_ieq(
573 union tgsi_exec_channel *dst,
574 const union tgsi_exec_channel *src0,
575 const union tgsi_exec_channel *src1,
576 const union tgsi_exec_channel *src2,
577 const union tgsi_exec_channel *src3 )
578 {
579 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
580 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
581 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
582 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
583 }
584 #endif
585
586 static void
587 micro_exp2(
588 union tgsi_exec_channel *dst,
589 const union tgsi_exec_channel *src)
590 {
591 #if FAST_MATH
592 dst->f[0] = util_fast_exp2( src->f[0] );
593 dst->f[1] = util_fast_exp2( src->f[1] );
594 dst->f[2] = util_fast_exp2( src->f[2] );
595 dst->f[3] = util_fast_exp2( src->f[3] );
596 #else
597 dst->f[0] = powf( 2.0f, src->f[0] );
598 dst->f[1] = powf( 2.0f, src->f[1] );
599 dst->f[2] = powf( 2.0f, src->f[2] );
600 dst->f[3] = powf( 2.0f, src->f[3] );
601 #endif
602 }
603
604 #if 0
605 static void
606 micro_f2ut(
607 union tgsi_exec_channel *dst,
608 const union tgsi_exec_channel *src )
609 {
610 dst->u[0] = (uint) src->f[0];
611 dst->u[1] = (uint) src->f[1];
612 dst->u[2] = (uint) src->f[2];
613 dst->u[3] = (uint) src->f[3];
614 }
615 #endif
616
617 static void
618 micro_float_clamp(union tgsi_exec_channel *dst,
619 const union tgsi_exec_channel *src)
620 {
621 uint i;
622
623 for (i = 0; i < 4; i++) {
624 if (src->f[i] > 0.0f) {
625 if (src->f[i] > 1.884467e+019f)
626 dst->f[i] = 1.884467e+019f;
627 else if (src->f[i] < 5.42101e-020f)
628 dst->f[i] = 5.42101e-020f;
629 else
630 dst->f[i] = src->f[i];
631 }
632 else {
633 if (src->f[i] < -1.884467e+019f)
634 dst->f[i] = -1.884467e+019f;
635 else if (src->f[i] > -5.42101e-020f)
636 dst->f[i] = -5.42101e-020f;
637 else
638 dst->f[i] = src->f[i];
639 }
640 }
641 }
642
643 static void
644 micro_flr(
645 union tgsi_exec_channel *dst,
646 const union tgsi_exec_channel *src )
647 {
648 dst->f[0] = floorf( src->f[0] );
649 dst->f[1] = floorf( src->f[1] );
650 dst->f[2] = floorf( src->f[2] );
651 dst->f[3] = floorf( src->f[3] );
652 }
653
654 static void
655 micro_frc(
656 union tgsi_exec_channel *dst,
657 const union tgsi_exec_channel *src )
658 {
659 dst->f[0] = src->f[0] - floorf( src->f[0] );
660 dst->f[1] = src->f[1] - floorf( src->f[1] );
661 dst->f[2] = src->f[2] - floorf( src->f[2] );
662 dst->f[3] = src->f[3] - floorf( src->f[3] );
663 }
664
665 static void
666 micro_i2f(
667 union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src )
669 {
670 dst->f[0] = (float) src->i[0];
671 dst->f[1] = (float) src->i[1];
672 dst->f[2] = (float) src->i[2];
673 dst->f[3] = (float) src->i[3];
674 }
675
676 static void
677 micro_lg2(
678 union tgsi_exec_channel *dst,
679 const union tgsi_exec_channel *src )
680 {
681 #if FAST_MATH
682 dst->f[0] = util_fast_log2( src->f[0] );
683 dst->f[1] = util_fast_log2( src->f[1] );
684 dst->f[2] = util_fast_log2( src->f[2] );
685 dst->f[3] = util_fast_log2( src->f[3] );
686 #else
687 dst->f[0] = logf( src->f[0] ) * 1.442695f;
688 dst->f[1] = logf( src->f[1] ) * 1.442695f;
689 dst->f[2] = logf( src->f[2] ) * 1.442695f;
690 dst->f[3] = logf( src->f[3] ) * 1.442695f;
691 #endif
692 }
693
694 static void
695 micro_le(
696 union tgsi_exec_channel *dst,
697 const union tgsi_exec_channel *src0,
698 const union tgsi_exec_channel *src1,
699 const union tgsi_exec_channel *src2,
700 const union tgsi_exec_channel *src3 )
701 {
702 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
703 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
704 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
705 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
706 }
707
708 static void
709 micro_lt(
710 union tgsi_exec_channel *dst,
711 const union tgsi_exec_channel *src0,
712 const union tgsi_exec_channel *src1,
713 const union tgsi_exec_channel *src2,
714 const union tgsi_exec_channel *src3 )
715 {
716 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
717 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
718 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
719 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
720 }
721
722 #if 0
723 static void
724 micro_ilt(
725 union tgsi_exec_channel *dst,
726 const union tgsi_exec_channel *src0,
727 const union tgsi_exec_channel *src1,
728 const union tgsi_exec_channel *src2,
729 const union tgsi_exec_channel *src3 )
730 {
731 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
732 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
733 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
734 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
735 }
736 #endif
737
738 #if 0
739 static void
740 micro_ult(
741 union tgsi_exec_channel *dst,
742 const union tgsi_exec_channel *src0,
743 const union tgsi_exec_channel *src1,
744 const union tgsi_exec_channel *src2,
745 const union tgsi_exec_channel *src3 )
746 {
747 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
748 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
749 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
750 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
751 }
752 #endif
753
754 static void
755 micro_max(
756 union tgsi_exec_channel *dst,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1 )
759 {
760 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
761 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
762 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
763 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
764 }
765
766 #if 0
767 static void
768 micro_imax(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src0,
771 const union tgsi_exec_channel *src1 )
772 {
773 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
774 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
775 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
776 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
777 }
778 #endif
779
780 #if 0
781 static void
782 micro_umax(
783 union tgsi_exec_channel *dst,
784 const union tgsi_exec_channel *src0,
785 const union tgsi_exec_channel *src1 )
786 {
787 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
788 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
789 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
790 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
791 }
792 #endif
793
794 static void
795 micro_min(
796 union tgsi_exec_channel *dst,
797 const union tgsi_exec_channel *src0,
798 const union tgsi_exec_channel *src1 )
799 {
800 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
801 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
802 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
803 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
804 }
805
806 #if 0
807 static void
808 micro_imin(
809 union tgsi_exec_channel *dst,
810 const union tgsi_exec_channel *src0,
811 const union tgsi_exec_channel *src1 )
812 {
813 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
814 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
815 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
816 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
817 }
818 #endif
819
820 #if 0
821 static void
822 micro_umin(
823 union tgsi_exec_channel *dst,
824 const union tgsi_exec_channel *src0,
825 const union tgsi_exec_channel *src1 )
826 {
827 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
828 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
829 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
830 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
831 }
832 #endif
833
834 #if 0
835 static void
836 micro_umod(
837 union tgsi_exec_channel *dst,
838 const union tgsi_exec_channel *src0,
839 const union tgsi_exec_channel *src1 )
840 {
841 dst->u[0] = src0->u[0] % src1->u[0];
842 dst->u[1] = src0->u[1] % src1->u[1];
843 dst->u[2] = src0->u[2] % src1->u[2];
844 dst->u[3] = src0->u[3] % src1->u[3];
845 }
846 #endif
847
848 static void
849 micro_mul(
850 union tgsi_exec_channel *dst,
851 const union tgsi_exec_channel *src0,
852 const union tgsi_exec_channel *src1 )
853 {
854 dst->f[0] = src0->f[0] * src1->f[0];
855 dst->f[1] = src0->f[1] * src1->f[1];
856 dst->f[2] = src0->f[2] * src1->f[2];
857 dst->f[3] = src0->f[3] * src1->f[3];
858 }
859
860 #if 0
861 static void
862 micro_imul(
863 union tgsi_exec_channel *dst,
864 const union tgsi_exec_channel *src0,
865 const union tgsi_exec_channel *src1 )
866 {
867 dst->i[0] = src0->i[0] * src1->i[0];
868 dst->i[1] = src0->i[1] * src1->i[1];
869 dst->i[2] = src0->i[2] * src1->i[2];
870 dst->i[3] = src0->i[3] * src1->i[3];
871 }
872 #endif
873
874 #if 0
875 static void
876 micro_imul64(
877 union tgsi_exec_channel *dst0,
878 union tgsi_exec_channel *dst1,
879 const union tgsi_exec_channel *src0,
880 const union tgsi_exec_channel *src1 )
881 {
882 dst1->i[0] = src0->i[0] * src1->i[0];
883 dst1->i[1] = src0->i[1] * src1->i[1];
884 dst1->i[2] = src0->i[2] * src1->i[2];
885 dst1->i[3] = src0->i[3] * src1->i[3];
886 dst0->i[0] = 0;
887 dst0->i[1] = 0;
888 dst0->i[2] = 0;
889 dst0->i[3] = 0;
890 }
891 #endif
892
893 #if 0
894 static void
895 micro_umul64(
896 union tgsi_exec_channel *dst0,
897 union tgsi_exec_channel *dst1,
898 const union tgsi_exec_channel *src0,
899 const union tgsi_exec_channel *src1 )
900 {
901 dst1->u[0] = src0->u[0] * src1->u[0];
902 dst1->u[1] = src0->u[1] * src1->u[1];
903 dst1->u[2] = src0->u[2] * src1->u[2];
904 dst1->u[3] = src0->u[3] * src1->u[3];
905 dst0->u[0] = 0;
906 dst0->u[1] = 0;
907 dst0->u[2] = 0;
908 dst0->u[3] = 0;
909 }
910 #endif
911
912
913 #if 0
914 static void
915 micro_movc(
916 union tgsi_exec_channel *dst,
917 const union tgsi_exec_channel *src0,
918 const union tgsi_exec_channel *src1,
919 const union tgsi_exec_channel *src2 )
920 {
921 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
922 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
923 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
924 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
925 }
926 #endif
927
928 static void
929 micro_neg(
930 union tgsi_exec_channel *dst,
931 const union tgsi_exec_channel *src )
932 {
933 dst->f[0] = -src->f[0];
934 dst->f[1] = -src->f[1];
935 dst->f[2] = -src->f[2];
936 dst->f[3] = -src->f[3];
937 }
938
939 #if 0
940 static void
941 micro_ineg(
942 union tgsi_exec_channel *dst,
943 const union tgsi_exec_channel *src )
944 {
945 dst->i[0] = -src->i[0];
946 dst->i[1] = -src->i[1];
947 dst->i[2] = -src->i[2];
948 dst->i[3] = -src->i[3];
949 }
950 #endif
951
952 static void
953 micro_not(
954 union tgsi_exec_channel *dst,
955 const union tgsi_exec_channel *src )
956 {
957 dst->u[0] = ~src->u[0];
958 dst->u[1] = ~src->u[1];
959 dst->u[2] = ~src->u[2];
960 dst->u[3] = ~src->u[3];
961 }
962
963 static void
964 micro_or(
965 union tgsi_exec_channel *dst,
966 const union tgsi_exec_channel *src0,
967 const union tgsi_exec_channel *src1 )
968 {
969 dst->u[0] = src0->u[0] | src1->u[0];
970 dst->u[1] = src0->u[1] | src1->u[1];
971 dst->u[2] = src0->u[2] | src1->u[2];
972 dst->u[3] = src0->u[3] | src1->u[3];
973 }
974
975 static void
976 micro_pow(
977 union tgsi_exec_channel *dst,
978 const union tgsi_exec_channel *src0,
979 const union tgsi_exec_channel *src1 )
980 {
981 #if FAST_MATH
982 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
983 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
984 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
985 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
986 #else
987 dst->f[0] = powf( src0->f[0], src1->f[0] );
988 dst->f[1] = powf( src0->f[1], src1->f[1] );
989 dst->f[2] = powf( src0->f[2], src1->f[2] );
990 dst->f[3] = powf( src0->f[3], src1->f[3] );
991 #endif
992 }
993
994 static void
995 micro_rnd(
996 union tgsi_exec_channel *dst,
997 const union tgsi_exec_channel *src )
998 {
999 dst->f[0] = floorf( src->f[0] + 0.5f );
1000 dst->f[1] = floorf( src->f[1] + 0.5f );
1001 dst->f[2] = floorf( src->f[2] + 0.5f );
1002 dst->f[3] = floorf( src->f[3] + 0.5f );
1003 }
1004
1005 static void
1006 micro_sgn(
1007 union tgsi_exec_channel *dst,
1008 const union tgsi_exec_channel *src )
1009 {
1010 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
1011 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
1012 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
1013 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
1014 }
1015
1016 static void
1017 micro_shl(
1018 union tgsi_exec_channel *dst,
1019 const union tgsi_exec_channel *src0,
1020 const union tgsi_exec_channel *src1 )
1021 {
1022 dst->i[0] = src0->i[0] << src1->i[0];
1023 dst->i[1] = src0->i[1] << src1->i[1];
1024 dst->i[2] = src0->i[2] << src1->i[2];
1025 dst->i[3] = src0->i[3] << src1->i[3];
1026 }
1027
1028 static void
1029 micro_ishr(
1030 union tgsi_exec_channel *dst,
1031 const union tgsi_exec_channel *src0,
1032 const union tgsi_exec_channel *src1 )
1033 {
1034 dst->i[0] = src0->i[0] >> src1->i[0];
1035 dst->i[1] = src0->i[1] >> src1->i[1];
1036 dst->i[2] = src0->i[2] >> src1->i[2];
1037 dst->i[3] = src0->i[3] >> src1->i[3];
1038 }
1039
1040 static void
1041 micro_trunc(
1042 union tgsi_exec_channel *dst,
1043 const union tgsi_exec_channel *src0 )
1044 {
1045 dst->f[0] = (float) (int) src0->f[0];
1046 dst->f[1] = (float) (int) src0->f[1];
1047 dst->f[2] = (float) (int) src0->f[2];
1048 dst->f[3] = (float) (int) src0->f[3];
1049 }
1050
1051 #if 0
1052 static void
1053 micro_ushr(
1054 union tgsi_exec_channel *dst,
1055 const union tgsi_exec_channel *src0,
1056 const union tgsi_exec_channel *src1 )
1057 {
1058 dst->u[0] = src0->u[0] >> src1->u[0];
1059 dst->u[1] = src0->u[1] >> src1->u[1];
1060 dst->u[2] = src0->u[2] >> src1->u[2];
1061 dst->u[3] = src0->u[3] >> src1->u[3];
1062 }
1063 #endif
1064
1065 static void
1066 micro_sin(
1067 union tgsi_exec_channel *dst,
1068 const union tgsi_exec_channel *src )
1069 {
1070 dst->f[0] = sinf( src->f[0] );
1071 dst->f[1] = sinf( src->f[1] );
1072 dst->f[2] = sinf( src->f[2] );
1073 dst->f[3] = sinf( src->f[3] );
1074 }
1075
1076 static void
1077 micro_sqrt( union tgsi_exec_channel *dst,
1078 const union tgsi_exec_channel *src )
1079 {
1080 dst->f[0] = sqrtf( src->f[0] );
1081 dst->f[1] = sqrtf( src->f[1] );
1082 dst->f[2] = sqrtf( src->f[2] );
1083 dst->f[3] = sqrtf( src->f[3] );
1084 }
1085
1086 static void
1087 micro_sub(
1088 union tgsi_exec_channel *dst,
1089 const union tgsi_exec_channel *src0,
1090 const union tgsi_exec_channel *src1 )
1091 {
1092 dst->f[0] = src0->f[0] - src1->f[0];
1093 dst->f[1] = src0->f[1] - src1->f[1];
1094 dst->f[2] = src0->f[2] - src1->f[2];
1095 dst->f[3] = src0->f[3] - src1->f[3];
1096 }
1097
1098 #if 0
1099 static void
1100 micro_u2f(
1101 union tgsi_exec_channel *dst,
1102 const union tgsi_exec_channel *src )
1103 {
1104 dst->f[0] = (float) src->u[0];
1105 dst->f[1] = (float) src->u[1];
1106 dst->f[2] = (float) src->u[2];
1107 dst->f[3] = (float) src->u[3];
1108 }
1109 #endif
1110
1111 static void
1112 micro_xor(
1113 union tgsi_exec_channel *dst,
1114 const union tgsi_exec_channel *src0,
1115 const union tgsi_exec_channel *src1 )
1116 {
1117 dst->u[0] = src0->u[0] ^ src1->u[0];
1118 dst->u[1] = src0->u[1] ^ src1->u[1];
1119 dst->u[2] = src0->u[2] ^ src1->u[2];
1120 dst->u[3] = src0->u[3] ^ src1->u[3];
1121 }
1122
1123 static void
1124 fetch_src_file_channel(
1125 const struct tgsi_exec_machine *mach,
1126 const uint file,
1127 const uint swizzle,
1128 const union tgsi_exec_channel *index,
1129 union tgsi_exec_channel *chan )
1130 {
1131 switch( swizzle ) {
1132 case TGSI_SWIZZLE_X:
1133 case TGSI_SWIZZLE_Y:
1134 case TGSI_SWIZZLE_Z:
1135 case TGSI_SWIZZLE_W:
1136 switch( file ) {
1137 case TGSI_FILE_CONSTANT:
1138 assert(mach->Consts);
1139 if (index->i[0] < 0)
1140 chan->f[0] = 0.0f;
1141 else
1142 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1143 if (index->i[1] < 0)
1144 chan->f[1] = 0.0f;
1145 else
1146 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1147 if (index->i[2] < 0)
1148 chan->f[2] = 0.0f;
1149 else
1150 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1151 if (index->i[3] < 0)
1152 chan->f[3] = 0.0f;
1153 else
1154 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1155 break;
1156
1157 case TGSI_FILE_INPUT:
1158 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1159 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1160 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1161 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1162 break;
1163
1164 case TGSI_FILE_TEMPORARY:
1165 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1166 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1167 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1168 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1169 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1170 break;
1171
1172 case TGSI_FILE_IMMEDIATE:
1173 assert( index->i[0] < (int) mach->ImmLimit );
1174 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1175 assert( index->i[1] < (int) mach->ImmLimit );
1176 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1177 assert( index->i[2] < (int) mach->ImmLimit );
1178 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1179 assert( index->i[3] < (int) mach->ImmLimit );
1180 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1181 break;
1182
1183 case TGSI_FILE_ADDRESS:
1184 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1185 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1186 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1187 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1188 break;
1189
1190 case TGSI_FILE_OUTPUT:
1191 /* vertex/fragment output vars can be read too */
1192 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1193 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1194 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1195 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1196 break;
1197
1198 default:
1199 assert( 0 );
1200 }
1201 break;
1202
1203 default:
1204 assert( 0 );
1205 }
1206 }
1207
1208 static void
1209 fetch_source(
1210 const struct tgsi_exec_machine *mach,
1211 union tgsi_exec_channel *chan,
1212 const struct tgsi_full_src_register *reg,
1213 const uint chan_index )
1214 {
1215 union tgsi_exec_channel index;
1216 uint swizzle;
1217
1218 /* We start with a direct index into a register file.
1219 *
1220 * file[1],
1221 * where:
1222 * file = SrcRegister.File
1223 * [1] = SrcRegister.Index
1224 */
1225 index.i[0] =
1226 index.i[1] =
1227 index.i[2] =
1228 index.i[3] = reg->SrcRegister.Index;
1229
1230 /* There is an extra source register that indirectly subscripts
1231 * a register file. The direct index now becomes an offset
1232 * that is being added to the indirect register.
1233 *
1234 * file[ind[2].x+1],
1235 * where:
1236 * ind = SrcRegisterInd.File
1237 * [2] = SrcRegisterInd.Index
1238 * .x = SrcRegisterInd.SwizzleX
1239 */
1240 if (reg->SrcRegister.Indirect) {
1241 union tgsi_exec_channel index2;
1242 union tgsi_exec_channel indir_index;
1243 const uint execmask = mach->ExecMask;
1244 uint i;
1245
1246 /* which address register (always zero now) */
1247 index2.i[0] =
1248 index2.i[1] =
1249 index2.i[2] =
1250 index2.i[3] = reg->SrcRegisterInd.Index;
1251
1252 /* get current value of address register[swizzle] */
1253 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1254 fetch_src_file_channel(
1255 mach,
1256 reg->SrcRegisterInd.File,
1257 swizzle,
1258 &index2,
1259 &indir_index );
1260
1261 /* add value of address register to the offset */
1262 index.i[0] += (int) indir_index.f[0];
1263 index.i[1] += (int) indir_index.f[1];
1264 index.i[2] += (int) indir_index.f[2];
1265 index.i[3] += (int) indir_index.f[3];
1266
1267 /* for disabled execution channels, zero-out the index to
1268 * avoid using a potential garbage value.
1269 */
1270 for (i = 0; i < QUAD_SIZE; i++) {
1271 if ((execmask & (1 << i)) == 0)
1272 index.i[i] = 0;
1273 }
1274 }
1275
1276 /* There is an extra source register that is a second
1277 * subscript to a register file. Effectively it means that
1278 * the register file is actually a 2D array of registers.
1279 *
1280 * file[1][3] == file[1*sizeof(file[1])+3],
1281 * where:
1282 * [3] = SrcRegisterDim.Index
1283 */
1284 if (reg->SrcRegister.Dimension) {
1285 /* The size of the first-order array depends on the register file type.
1286 * We need to multiply the index to the first array to get an effective,
1287 * "flat" index that points to the beginning of the second-order array.
1288 */
1289 switch (reg->SrcRegister.File) {
1290 case TGSI_FILE_INPUT:
1291 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1292 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1293 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1294 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1295 break;
1296 case TGSI_FILE_CONSTANT:
1297 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1298 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1299 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1300 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1301 break;
1302 default:
1303 assert( 0 );
1304 }
1305
1306 index.i[0] += reg->SrcRegisterDim.Index;
1307 index.i[1] += reg->SrcRegisterDim.Index;
1308 index.i[2] += reg->SrcRegisterDim.Index;
1309 index.i[3] += reg->SrcRegisterDim.Index;
1310
1311 /* Again, the second subscript index can be addressed indirectly
1312 * identically to the first one.
1313 * Nothing stops us from indirectly addressing the indirect register,
1314 * but there is no need for that, so we won't exercise it.
1315 *
1316 * file[1][ind[4].y+3],
1317 * where:
1318 * ind = SrcRegisterDimInd.File
1319 * [4] = SrcRegisterDimInd.Index
1320 * .y = SrcRegisterDimInd.SwizzleX
1321 */
1322 if (reg->SrcRegisterDim.Indirect) {
1323 union tgsi_exec_channel index2;
1324 union tgsi_exec_channel indir_index;
1325 const uint execmask = mach->ExecMask;
1326 uint i;
1327
1328 index2.i[0] =
1329 index2.i[1] =
1330 index2.i[2] =
1331 index2.i[3] = reg->SrcRegisterDimInd.Index;
1332
1333 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1334 fetch_src_file_channel(
1335 mach,
1336 reg->SrcRegisterDimInd.File,
1337 swizzle,
1338 &index2,
1339 &indir_index );
1340
1341 index.i[0] += (int) indir_index.f[0];
1342 index.i[1] += (int) indir_index.f[1];
1343 index.i[2] += (int) indir_index.f[2];
1344 index.i[3] += (int) indir_index.f[3];
1345
1346 /* for disabled execution channels, zero-out the index to
1347 * avoid using a potential garbage value.
1348 */
1349 for (i = 0; i < QUAD_SIZE; i++) {
1350 if ((execmask & (1 << i)) == 0)
1351 index.i[i] = 0;
1352 }
1353 }
1354
1355 /* If by any chance there was a need for a 3D array of register
1356 * files, we would have to check whether SrcRegisterDim is followed
1357 * by a dimension register and continue the saga.
1358 */
1359 }
1360
1361 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1362 fetch_src_file_channel(
1363 mach,
1364 reg->SrcRegister.File,
1365 swizzle,
1366 &index,
1367 chan );
1368
1369 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1370 case TGSI_UTIL_SIGN_CLEAR:
1371 micro_abs( chan, chan );
1372 break;
1373
1374 case TGSI_UTIL_SIGN_SET:
1375 micro_abs( chan, chan );
1376 micro_neg( chan, chan );
1377 break;
1378
1379 case TGSI_UTIL_SIGN_TOGGLE:
1380 micro_neg( chan, chan );
1381 break;
1382
1383 case TGSI_UTIL_SIGN_KEEP:
1384 break;
1385 }
1386
1387 if (reg->SrcRegisterExtMod.Complement) {
1388 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1389 }
1390 }
1391
1392 static void
1393 store_dest(
1394 struct tgsi_exec_machine *mach,
1395 const union tgsi_exec_channel *chan,
1396 const struct tgsi_full_dst_register *reg,
1397 const struct tgsi_full_instruction *inst,
1398 uint chan_index )
1399 {
1400 uint i;
1401 union tgsi_exec_channel null;
1402 union tgsi_exec_channel *dst;
1403 uint execmask = mach->ExecMask;
1404 int offset = 0; /* indirection offset */
1405 int index;
1406
1407 #ifdef DEBUG
1408 check_inf_or_nan(chan);
1409 #endif
1410
1411 /* There is an extra source register that indirectly subscripts
1412 * a register file. The direct index now becomes an offset
1413 * that is being added to the indirect register.
1414 *
1415 * file[ind[2].x+1],
1416 * where:
1417 * ind = DstRegisterInd.File
1418 * [2] = DstRegisterInd.Index
1419 * .x = DstRegisterInd.SwizzleX
1420 */
1421 if (reg->DstRegister.Indirect) {
1422 union tgsi_exec_channel index;
1423 union tgsi_exec_channel indir_index;
1424 uint swizzle;
1425
1426 /* which address register (always zero for now) */
1427 index.i[0] =
1428 index.i[1] =
1429 index.i[2] =
1430 index.i[3] = reg->DstRegisterInd.Index;
1431
1432 /* get current value of address register[swizzle] */
1433 swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
1434
1435 /* fetch values from the address/indirection register */
1436 fetch_src_file_channel(
1437 mach,
1438 reg->DstRegisterInd.File,
1439 swizzle,
1440 &index,
1441 &indir_index );
1442
1443 /* save indirection offset */
1444 offset = (int) indir_index.f[0];
1445 }
1446
1447 switch (reg->DstRegister.File) {
1448 case TGSI_FILE_NULL:
1449 dst = &null;
1450 break;
1451
1452 case TGSI_FILE_OUTPUT:
1453 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1454 + reg->DstRegister.Index;
1455 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1456 break;
1457
1458 case TGSI_FILE_TEMPORARY:
1459 index = reg->DstRegister.Index;
1460 assert( index < TGSI_EXEC_NUM_TEMPS );
1461 dst = &mach->Temps[offset + index].xyzw[chan_index];
1462 break;
1463
1464 case TGSI_FILE_ADDRESS:
1465 index = reg->DstRegister.Index;
1466 dst = &mach->Addrs[index].xyzw[chan_index];
1467 break;
1468
1469 default:
1470 assert( 0 );
1471 return;
1472 }
1473
1474 if (inst->InstructionExtNv.CondFlowEnable) {
1475 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1476 uint swizzle;
1477 uint shift;
1478 uint mask;
1479 uint test;
1480
1481 /* Only CC0 supported.
1482 */
1483 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1484
1485 switch (chan_index) {
1486 case CHAN_X:
1487 swizzle = inst->InstructionExtNv.CondSwizzleX;
1488 break;
1489 case CHAN_Y:
1490 swizzle = inst->InstructionExtNv.CondSwizzleY;
1491 break;
1492 case CHAN_Z:
1493 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1494 break;
1495 case CHAN_W:
1496 swizzle = inst->InstructionExtNv.CondSwizzleW;
1497 break;
1498 default:
1499 assert( 0 );
1500 return;
1501 }
1502
1503 switch (swizzle) {
1504 case TGSI_SWIZZLE_X:
1505 shift = TGSI_EXEC_CC_X_SHIFT;
1506 mask = TGSI_EXEC_CC_X_MASK;
1507 break;
1508 case TGSI_SWIZZLE_Y:
1509 shift = TGSI_EXEC_CC_Y_SHIFT;
1510 mask = TGSI_EXEC_CC_Y_MASK;
1511 break;
1512 case TGSI_SWIZZLE_Z:
1513 shift = TGSI_EXEC_CC_Z_SHIFT;
1514 mask = TGSI_EXEC_CC_Z_MASK;
1515 break;
1516 case TGSI_SWIZZLE_W:
1517 shift = TGSI_EXEC_CC_W_SHIFT;
1518 mask = TGSI_EXEC_CC_W_MASK;
1519 break;
1520 default:
1521 assert( 0 );
1522 return;
1523 }
1524
1525 switch (inst->InstructionExtNv.CondMask) {
1526 case TGSI_CC_GT:
1527 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1528 for (i = 0; i < QUAD_SIZE; i++)
1529 if (cc->u[i] & test)
1530 execmask &= ~(1 << i);
1531 break;
1532
1533 case TGSI_CC_EQ:
1534 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1535 for (i = 0; i < QUAD_SIZE; i++)
1536 if (cc->u[i] & test)
1537 execmask &= ~(1 << i);
1538 break;
1539
1540 case TGSI_CC_LT:
1541 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1542 for (i = 0; i < QUAD_SIZE; i++)
1543 if (cc->u[i] & test)
1544 execmask &= ~(1 << i);
1545 break;
1546
1547 case TGSI_CC_GE:
1548 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1549 for (i = 0; i < QUAD_SIZE; i++)
1550 if (cc->u[i] & test)
1551 execmask &= ~(1 << i);
1552 break;
1553
1554 case TGSI_CC_LE:
1555 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1556 for (i = 0; i < QUAD_SIZE; i++)
1557 if (cc->u[i] & test)
1558 execmask &= ~(1 << i);
1559 break;
1560
1561 case TGSI_CC_NE:
1562 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1563 for (i = 0; i < QUAD_SIZE; i++)
1564 if (cc->u[i] & test)
1565 execmask &= ~(1 << i);
1566 break;
1567
1568 case TGSI_CC_TR:
1569 break;
1570
1571 case TGSI_CC_FL:
1572 for (i = 0; i < QUAD_SIZE; i++)
1573 execmask &= ~(1 << i);
1574 break;
1575
1576 default:
1577 assert( 0 );
1578 return;
1579 }
1580 }
1581
1582 switch (inst->Instruction.Saturate) {
1583 case TGSI_SAT_NONE:
1584 for (i = 0; i < QUAD_SIZE; i++)
1585 if (execmask & (1 << i))
1586 dst->i[i] = chan->i[i];
1587 break;
1588
1589 case TGSI_SAT_ZERO_ONE:
1590 for (i = 0; i < QUAD_SIZE; i++)
1591 if (execmask & (1 << i)) {
1592 if (chan->f[i] < 0.0f)
1593 dst->f[i] = 0.0f;
1594 else if (chan->f[i] > 1.0f)
1595 dst->f[i] = 1.0f;
1596 else
1597 dst->i[i] = chan->i[i];
1598 }
1599 break;
1600
1601 case TGSI_SAT_MINUS_PLUS_ONE:
1602 for (i = 0; i < QUAD_SIZE; i++)
1603 if (execmask & (1 << i)) {
1604 if (chan->f[i] < -1.0f)
1605 dst->f[i] = -1.0f;
1606 else if (chan->f[i] > 1.0f)
1607 dst->f[i] = 1.0f;
1608 else
1609 dst->i[i] = chan->i[i];
1610 }
1611 break;
1612
1613 default:
1614 assert( 0 );
1615 }
1616
1617 if (inst->InstructionExtNv.CondDstUpdate) {
1618 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1619 uint shift;
1620 uint mask;
1621
1622 /* Only CC0 supported.
1623 */
1624 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1625
1626 switch (chan_index) {
1627 case CHAN_X:
1628 shift = TGSI_EXEC_CC_X_SHIFT;
1629 mask = ~TGSI_EXEC_CC_X_MASK;
1630 break;
1631 case CHAN_Y:
1632 shift = TGSI_EXEC_CC_Y_SHIFT;
1633 mask = ~TGSI_EXEC_CC_Y_MASK;
1634 break;
1635 case CHAN_Z:
1636 shift = TGSI_EXEC_CC_Z_SHIFT;
1637 mask = ~TGSI_EXEC_CC_Z_MASK;
1638 break;
1639 case CHAN_W:
1640 shift = TGSI_EXEC_CC_W_SHIFT;
1641 mask = ~TGSI_EXEC_CC_W_MASK;
1642 break;
1643 default:
1644 assert( 0 );
1645 return;
1646 }
1647
1648 for (i = 0; i < QUAD_SIZE; i++)
1649 if (execmask & (1 << i)) {
1650 cc->u[i] &= mask;
1651 if (dst->f[i] < 0.0f)
1652 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1653 else if (dst->f[i] > 0.0f)
1654 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1655 else if (dst->f[i] == 0.0f)
1656 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1657 else
1658 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1659 }
1660 }
1661 }
1662
1663 #define FETCH(VAL,INDEX,CHAN)\
1664 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1665
1666 #define STORE(VAL,INDEX,CHAN)\
1667 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1668
1669
1670 /**
1671 * Execute ARB-style KIL which is predicated by a src register.
1672 * Kill fragment if any of the four values is less than zero.
1673 */
1674 static void
1675 exec_kil(struct tgsi_exec_machine *mach,
1676 const struct tgsi_full_instruction *inst)
1677 {
1678 uint uniquemask;
1679 uint chan_index;
1680 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1681 union tgsi_exec_channel r[1];
1682
1683 /* This mask stores component bits that were already tested. */
1684 uniquemask = 0;
1685
1686 for (chan_index = 0; chan_index < 4; chan_index++)
1687 {
1688 uint swizzle;
1689 uint i;
1690
1691 /* unswizzle channel */
1692 swizzle = tgsi_util_get_full_src_register_swizzle (
1693 &inst->FullSrcRegisters[0],
1694 chan_index);
1695
1696 /* check if the component has not been already tested */
1697 if (uniquemask & (1 << swizzle))
1698 continue;
1699 uniquemask |= 1 << swizzle;
1700
1701 FETCH(&r[0], 0, chan_index);
1702 for (i = 0; i < 4; i++)
1703 if (r[0].f[i] < 0.0f)
1704 kilmask |= 1 << i;
1705 }
1706
1707 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1708 }
1709
1710 /**
1711 * Execute NVIDIA-style KIL which is predicated by a condition code.
1712 * Kill fragment if the condition code is TRUE.
1713 */
1714 static void
1715 exec_kilp(struct tgsi_exec_machine *mach,
1716 const struct tgsi_full_instruction *inst)
1717 {
1718 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1719
1720 if (inst->InstructionExtNv.CondFlowEnable) {
1721 uint swizzle[4];
1722 uint chan_index;
1723
1724 kilmask = 0x0;
1725
1726 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1727 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1728 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1729 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1730
1731 for (chan_index = 0; chan_index < 4; chan_index++)
1732 {
1733 uint i;
1734
1735 for (i = 0; i < 4; i++) {
1736 /* TODO: evaluate the condition code */
1737 if (0)
1738 kilmask |= 1 << i;
1739 }
1740 }
1741 }
1742 else {
1743 /* "unconditional" kil */
1744 kilmask = mach->ExecMask;
1745 }
1746 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1747 }
1748
1749
1750 /*
1751 * Fetch a four texture samples using STR texture coordinates.
1752 */
1753 static void
1754 fetch_texel( struct tgsi_sampler *sampler,
1755 const union tgsi_exec_channel *s,
1756 const union tgsi_exec_channel *t,
1757 const union tgsi_exec_channel *p,
1758 float lodbias, /* XXX should be float[4] */
1759 union tgsi_exec_channel *r,
1760 union tgsi_exec_channel *g,
1761 union tgsi_exec_channel *b,
1762 union tgsi_exec_channel *a )
1763 {
1764 uint j;
1765 float rgba[NUM_CHANNELS][QUAD_SIZE];
1766
1767 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1768
1769 for (j = 0; j < 4; j++) {
1770 r->f[j] = rgba[0][j];
1771 g->f[j] = rgba[1][j];
1772 b->f[j] = rgba[2][j];
1773 a->f[j] = rgba[3][j];
1774 }
1775 }
1776
1777
1778 static void
1779 exec_tex(struct tgsi_exec_machine *mach,
1780 const struct tgsi_full_instruction *inst,
1781 boolean biasLod,
1782 boolean projected)
1783 {
1784 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1785 union tgsi_exec_channel r[4];
1786 uint chan_index;
1787 float lodBias;
1788
1789 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1790
1791 switch (inst->InstructionExtTexture.Texture) {
1792 case TGSI_TEXTURE_1D:
1793 case TGSI_TEXTURE_SHADOW1D:
1794
1795 FETCH(&r[0], 0, CHAN_X);
1796
1797 if (projected) {
1798 FETCH(&r[1], 0, CHAN_W);
1799 micro_div( &r[0], &r[0], &r[1] );
1800 }
1801
1802 if (biasLod) {
1803 FETCH(&r[1], 0, CHAN_W);
1804 lodBias = r[2].f[0];
1805 }
1806 else
1807 lodBias = 0.0;
1808
1809 fetch_texel(mach->Samplers[unit],
1810 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1811 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1812 break;
1813
1814 case TGSI_TEXTURE_2D:
1815 case TGSI_TEXTURE_RECT:
1816 case TGSI_TEXTURE_SHADOW2D:
1817 case TGSI_TEXTURE_SHADOWRECT:
1818
1819 FETCH(&r[0], 0, CHAN_X);
1820 FETCH(&r[1], 0, CHAN_Y);
1821 FETCH(&r[2], 0, CHAN_Z);
1822
1823 if (projected) {
1824 FETCH(&r[3], 0, CHAN_W);
1825 micro_div( &r[0], &r[0], &r[3] );
1826 micro_div( &r[1], &r[1], &r[3] );
1827 micro_div( &r[2], &r[2], &r[3] );
1828 }
1829
1830 if (biasLod) {
1831 FETCH(&r[3], 0, CHAN_W);
1832 lodBias = r[3].f[0];
1833 }
1834 else
1835 lodBias = 0.0;
1836
1837 fetch_texel(mach->Samplers[unit],
1838 &r[0], &r[1], &r[2], lodBias, /* inputs */
1839 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1840 break;
1841
1842 case TGSI_TEXTURE_3D:
1843 case TGSI_TEXTURE_CUBE:
1844
1845 FETCH(&r[0], 0, CHAN_X);
1846 FETCH(&r[1], 0, CHAN_Y);
1847 FETCH(&r[2], 0, CHAN_Z);
1848
1849 if (projected) {
1850 FETCH(&r[3], 0, CHAN_W);
1851 micro_div( &r[0], &r[0], &r[3] );
1852 micro_div( &r[1], &r[1], &r[3] );
1853 micro_div( &r[2], &r[2], &r[3] );
1854 }
1855
1856 if (biasLod) {
1857 FETCH(&r[3], 0, CHAN_W);
1858 lodBias = r[3].f[0];
1859 }
1860 else
1861 lodBias = 0.0;
1862
1863 fetch_texel(mach->Samplers[unit],
1864 &r[0], &r[1], &r[2], lodBias,
1865 &r[0], &r[1], &r[2], &r[3]);
1866 break;
1867
1868 default:
1869 assert (0);
1870 }
1871
1872 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1873 STORE( &r[chan_index], 0, chan_index );
1874 }
1875 }
1876
1877
1878 /**
1879 * Evaluate a constant-valued coefficient at the position of the
1880 * current quad.
1881 */
1882 static void
1883 eval_constant_coef(
1884 struct tgsi_exec_machine *mach,
1885 unsigned attrib,
1886 unsigned chan )
1887 {
1888 unsigned i;
1889
1890 for( i = 0; i < QUAD_SIZE; i++ ) {
1891 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1892 }
1893 }
1894
1895 /**
1896 * Evaluate a linear-valued coefficient at the position of the
1897 * current quad.
1898 */
1899 static void
1900 eval_linear_coef(
1901 struct tgsi_exec_machine *mach,
1902 unsigned attrib,
1903 unsigned chan )
1904 {
1905 const float x = mach->QuadPos.xyzw[0].f[0];
1906 const float y = mach->QuadPos.xyzw[1].f[0];
1907 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1908 const float dady = mach->InterpCoefs[attrib].dady[chan];
1909 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1910 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1911 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1912 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1913 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1914 }
1915
1916 /**
1917 * Evaluate a perspective-valued coefficient at the position of the
1918 * current quad.
1919 */
1920 static void
1921 eval_perspective_coef(
1922 struct tgsi_exec_machine *mach,
1923 unsigned attrib,
1924 unsigned chan )
1925 {
1926 const float x = mach->QuadPos.xyzw[0].f[0];
1927 const float y = mach->QuadPos.xyzw[1].f[0];
1928 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1929 const float dady = mach->InterpCoefs[attrib].dady[chan];
1930 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1931 const float *w = mach->QuadPos.xyzw[3].f;
1932 /* divide by W here */
1933 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1934 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1935 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1936 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1937 }
1938
1939
1940 typedef void (* eval_coef_func)(
1941 struct tgsi_exec_machine *mach,
1942 unsigned attrib,
1943 unsigned chan );
1944
1945 static void
1946 exec_declaration(
1947 struct tgsi_exec_machine *mach,
1948 const struct tgsi_full_declaration *decl )
1949 {
1950 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1951 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1952 unsigned first, last, mask;
1953 eval_coef_func eval;
1954
1955 first = decl->DeclarationRange.First;
1956 last = decl->DeclarationRange.Last;
1957 mask = decl->Declaration.UsageMask;
1958
1959 switch( decl->Declaration.Interpolate ) {
1960 case TGSI_INTERPOLATE_CONSTANT:
1961 eval = eval_constant_coef;
1962 break;
1963
1964 case TGSI_INTERPOLATE_LINEAR:
1965 eval = eval_linear_coef;
1966 break;
1967
1968 case TGSI_INTERPOLATE_PERSPECTIVE:
1969 eval = eval_perspective_coef;
1970 break;
1971
1972 default:
1973 eval = NULL;
1974 assert( 0 );
1975 }
1976
1977 if( mask == TGSI_WRITEMASK_XYZW ) {
1978 unsigned i, j;
1979
1980 for( i = first; i <= last; i++ ) {
1981 for( j = 0; j < NUM_CHANNELS; j++ ) {
1982 eval( mach, i, j );
1983 }
1984 }
1985 }
1986 else {
1987 unsigned i, j;
1988
1989 for( j = 0; j < NUM_CHANNELS; j++ ) {
1990 if( mask & (1 << j) ) {
1991 for( i = first; i <= last; i++ ) {
1992 eval( mach, i, j );
1993 }
1994 }
1995 }
1996 }
1997 }
1998 }
1999 }
2000
2001 static void
2002 exec_instruction(
2003 struct tgsi_exec_machine *mach,
2004 const struct tgsi_full_instruction *inst,
2005 int *pc )
2006 {
2007 uint chan_index;
2008 union tgsi_exec_channel r[10];
2009
2010 (*pc)++;
2011
2012 switch (inst->Instruction.Opcode) {
2013 case TGSI_OPCODE_ARL:
2014 case TGSI_OPCODE_FLR:
2015 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2016 FETCH( &r[0], 0, chan_index );
2017 micro_flr( &r[0], &r[0] );
2018 STORE( &r[0], 0, chan_index );
2019 }
2020 break;
2021
2022 case TGSI_OPCODE_MOV:
2023 if (inst->Flags & SOA_DEPENDENCY_FLAG) {
2024 /* Do all fetches into temp regs, then do all stores to avoid
2025 * intermediate/accidental clobbering. This could be done all the
2026 * time for MOV but for other instructions we'll need more temps...
2027 */
2028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2029 FETCH( &r[chan_index], 0, chan_index );
2030 }
2031 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2032 STORE( &r[chan_index], 0, chan_index );
2033 }
2034 }
2035 else {
2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2037 FETCH( &r[0], 0, chan_index );
2038 STORE( &r[0], 0, chan_index );
2039 }
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_LIT:
2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2045 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2046 }
2047
2048 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2049 FETCH( &r[0], 0, CHAN_X );
2050 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2051 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2052 STORE( &r[0], 0, CHAN_Y );
2053 }
2054
2055 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2056 FETCH( &r[1], 0, CHAN_Y );
2057 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2058
2059 FETCH( &r[2], 0, CHAN_W );
2060 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2061 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2062 micro_pow( &r[1], &r[1], &r[2] );
2063 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2064 STORE( &r[0], 0, CHAN_Z );
2065 }
2066 }
2067
2068 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2069 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2070 }
2071 break;
2072
2073 case TGSI_OPCODE_RCP:
2074 /* TGSI_OPCODE_RECIP */
2075 FETCH( &r[0], 0, CHAN_X );
2076 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2077 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2078 STORE( &r[0], 0, chan_index );
2079 }
2080 break;
2081
2082 case TGSI_OPCODE_RSQ:
2083 /* TGSI_OPCODE_RECIPSQRT */
2084 FETCH( &r[0], 0, CHAN_X );
2085 micro_abs( &r[0], &r[0] );
2086 micro_sqrt( &r[0], &r[0] );
2087 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2088 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2089 STORE( &r[0], 0, chan_index );
2090 }
2091 break;
2092
2093 case TGSI_OPCODE_EXP:
2094 FETCH( &r[0], 0, CHAN_X );
2095 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2096 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2097 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2098 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2099 }
2100 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2101 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2102 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2103 }
2104 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2105 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2106 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2107 }
2108 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2109 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2110 }
2111 break;
2112
2113 case TGSI_OPCODE_LOG:
2114 FETCH( &r[0], 0, CHAN_X );
2115 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2116 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2117 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2118 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2119 STORE( &r[0], 0, CHAN_X );
2120 }
2121 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2122 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2123 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2124 STORE( &r[0], 0, CHAN_Y );
2125 }
2126 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2127 STORE( &r[1], 0, CHAN_Z );
2128 }
2129 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2130 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2131 }
2132 break;
2133
2134 case TGSI_OPCODE_MUL:
2135 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
2136 {
2137 FETCH(&r[0], 0, chan_index);
2138 FETCH(&r[1], 1, chan_index);
2139
2140 micro_mul( &r[0], &r[0], &r[1] );
2141
2142 STORE(&r[0], 0, chan_index);
2143 }
2144 break;
2145
2146 case TGSI_OPCODE_ADD:
2147 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2148 FETCH( &r[0], 0, chan_index );
2149 FETCH( &r[1], 1, chan_index );
2150 micro_add( &r[0], &r[0], &r[1] );
2151 STORE( &r[0], 0, chan_index );
2152 }
2153 break;
2154
2155 case TGSI_OPCODE_DP3:
2156 /* TGSI_OPCODE_DOT3 */
2157 FETCH( &r[0], 0, CHAN_X );
2158 FETCH( &r[1], 1, CHAN_X );
2159 micro_mul( &r[0], &r[0], &r[1] );
2160
2161 FETCH( &r[1], 0, CHAN_Y );
2162 FETCH( &r[2], 1, CHAN_Y );
2163 micro_mul( &r[1], &r[1], &r[2] );
2164 micro_add( &r[0], &r[0], &r[1] );
2165
2166 FETCH( &r[1], 0, CHAN_Z );
2167 FETCH( &r[2], 1, CHAN_Z );
2168 micro_mul( &r[1], &r[1], &r[2] );
2169 micro_add( &r[0], &r[0], &r[1] );
2170
2171 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2172 STORE( &r[0], 0, chan_index );
2173 }
2174 break;
2175
2176 case TGSI_OPCODE_DP4:
2177 /* TGSI_OPCODE_DOT4 */
2178 FETCH(&r[0], 0, CHAN_X);
2179 FETCH(&r[1], 1, CHAN_X);
2180
2181 micro_mul( &r[0], &r[0], &r[1] );
2182
2183 FETCH(&r[1], 0, CHAN_Y);
2184 FETCH(&r[2], 1, CHAN_Y);
2185
2186 micro_mul( &r[1], &r[1], &r[2] );
2187 micro_add( &r[0], &r[0], &r[1] );
2188
2189 FETCH(&r[1], 0, CHAN_Z);
2190 FETCH(&r[2], 1, CHAN_Z);
2191
2192 micro_mul( &r[1], &r[1], &r[2] );
2193 micro_add( &r[0], &r[0], &r[1] );
2194
2195 FETCH(&r[1], 0, CHAN_W);
2196 FETCH(&r[2], 1, CHAN_W);
2197
2198 micro_mul( &r[1], &r[1], &r[2] );
2199 micro_add( &r[0], &r[0], &r[1] );
2200
2201 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2202 STORE( &r[0], 0, chan_index );
2203 }
2204 break;
2205
2206 case TGSI_OPCODE_DST:
2207 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2208 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2209 }
2210
2211 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2212 FETCH( &r[0], 0, CHAN_Y );
2213 FETCH( &r[1], 1, CHAN_Y);
2214 micro_mul( &r[0], &r[0], &r[1] );
2215 STORE( &r[0], 0, CHAN_Y );
2216 }
2217
2218 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2219 FETCH( &r[0], 0, CHAN_Z );
2220 STORE( &r[0], 0, CHAN_Z );
2221 }
2222
2223 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2224 FETCH( &r[0], 1, CHAN_W );
2225 STORE( &r[0], 0, CHAN_W );
2226 }
2227 break;
2228
2229 case TGSI_OPCODE_MIN:
2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2231 FETCH(&r[0], 0, chan_index);
2232 FETCH(&r[1], 1, chan_index);
2233
2234 /* XXX use micro_min()?? */
2235 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2236
2237 STORE(&r[0], 0, chan_index);
2238 }
2239 break;
2240
2241 case TGSI_OPCODE_MAX:
2242 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2243 FETCH(&r[0], 0, chan_index);
2244 FETCH(&r[1], 1, chan_index);
2245
2246 /* XXX use micro_max()?? */
2247 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2248
2249 STORE(&r[0], 0, chan_index );
2250 }
2251 break;
2252
2253 case TGSI_OPCODE_SLT:
2254 /* TGSI_OPCODE_SETLT */
2255 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2256 FETCH( &r[0], 0, chan_index );
2257 FETCH( &r[1], 1, chan_index );
2258 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2259 STORE( &r[0], 0, chan_index );
2260 }
2261 break;
2262
2263 case TGSI_OPCODE_SGE:
2264 /* TGSI_OPCODE_SETGE */
2265 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2266 FETCH( &r[0], 0, chan_index );
2267 FETCH( &r[1], 1, chan_index );
2268 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2269 STORE( &r[0], 0, chan_index );
2270 }
2271 break;
2272
2273 case TGSI_OPCODE_MAD:
2274 /* TGSI_OPCODE_MADD */
2275 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2276 FETCH( &r[0], 0, chan_index );
2277 FETCH( &r[1], 1, chan_index );
2278 micro_mul( &r[0], &r[0], &r[1] );
2279 FETCH( &r[1], 2, chan_index );
2280 micro_add( &r[0], &r[0], &r[1] );
2281 STORE( &r[0], 0, chan_index );
2282 }
2283 break;
2284
2285 case TGSI_OPCODE_SUB:
2286 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2287 FETCH(&r[0], 0, chan_index);
2288 FETCH(&r[1], 1, chan_index);
2289
2290 micro_sub( &r[0], &r[0], &r[1] );
2291
2292 STORE(&r[0], 0, chan_index);
2293 }
2294 break;
2295
2296 case TGSI_OPCODE_LRP:
2297 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2298 FETCH(&r[0], 0, chan_index);
2299 FETCH(&r[1], 1, chan_index);
2300 FETCH(&r[2], 2, chan_index);
2301
2302 micro_sub( &r[1], &r[1], &r[2] );
2303 micro_mul( &r[0], &r[0], &r[1] );
2304 micro_add( &r[0], &r[0], &r[2] );
2305
2306 STORE(&r[0], 0, chan_index);
2307 }
2308 break;
2309
2310 case TGSI_OPCODE_CND:
2311 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2312 FETCH(&r[0], 0, chan_index);
2313 FETCH(&r[1], 1, chan_index);
2314 FETCH(&r[2], 2, chan_index);
2315 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2316 STORE(&r[0], 0, chan_index);
2317 }
2318 break;
2319
2320 case TGSI_OPCODE_DP2A:
2321 FETCH( &r[0], 0, CHAN_X );
2322 FETCH( &r[1], 1, CHAN_X );
2323 micro_mul( &r[0], &r[0], &r[1] );
2324
2325 FETCH( &r[1], 0, CHAN_Y );
2326 FETCH( &r[2], 1, CHAN_Y );
2327 micro_mul( &r[1], &r[1], &r[2] );
2328 micro_add( &r[0], &r[0], &r[1] );
2329
2330 FETCH( &r[2], 2, CHAN_X );
2331 micro_add( &r[0], &r[0], &r[2] );
2332
2333 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2334 STORE( &r[0], 0, chan_index );
2335 }
2336 break;
2337
2338 case TGSI_OPCODE_FRC:
2339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2340 FETCH( &r[0], 0, chan_index );
2341 micro_frc( &r[0], &r[0] );
2342 STORE( &r[0], 0, chan_index );
2343 }
2344 break;
2345
2346 case TGSI_OPCODE_CLAMP:
2347 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2348 FETCH(&r[0], 0, chan_index);
2349 FETCH(&r[1], 1, chan_index);
2350 micro_max(&r[0], &r[0], &r[1]);
2351 FETCH(&r[1], 2, chan_index);
2352 micro_min(&r[0], &r[0], &r[1]);
2353 STORE(&r[0], 0, chan_index);
2354 }
2355 break;
2356
2357 case TGSI_OPCODE_ROUND:
2358 case TGSI_OPCODE_ARR:
2359 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2360 FETCH( &r[0], 0, chan_index );
2361 micro_rnd( &r[0], &r[0] );
2362 STORE( &r[0], 0, chan_index );
2363 }
2364 break;
2365
2366 case TGSI_OPCODE_EX2:
2367 FETCH(&r[0], 0, CHAN_X);
2368
2369 #if FAST_MATH
2370 micro_exp2( &r[0], &r[0] );
2371 #else
2372 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2373 #endif
2374
2375 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2376 STORE( &r[0], 0, chan_index );
2377 }
2378 break;
2379
2380 case TGSI_OPCODE_LG2:
2381 FETCH( &r[0], 0, CHAN_X );
2382 micro_lg2( &r[0], &r[0] );
2383 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2384 STORE( &r[0], 0, chan_index );
2385 }
2386 break;
2387
2388 case TGSI_OPCODE_POW:
2389 FETCH(&r[0], 0, CHAN_X);
2390 FETCH(&r[1], 1, CHAN_X);
2391
2392 micro_pow( &r[0], &r[0], &r[1] );
2393
2394 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2395 STORE( &r[0], 0, chan_index );
2396 }
2397 break;
2398
2399 case TGSI_OPCODE_XPD:
2400 FETCH(&r[0], 0, CHAN_Y);
2401 FETCH(&r[1], 1, CHAN_Z);
2402
2403 micro_mul( &r[2], &r[0], &r[1] );
2404
2405 FETCH(&r[3], 0, CHAN_Z);
2406 FETCH(&r[4], 1, CHAN_Y);
2407
2408 micro_mul( &r[5], &r[3], &r[4] );
2409 micro_sub( &r[2], &r[2], &r[5] );
2410
2411 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2412 STORE( &r[2], 0, CHAN_X );
2413 }
2414
2415 FETCH(&r[2], 1, CHAN_X);
2416
2417 micro_mul( &r[3], &r[3], &r[2] );
2418
2419 FETCH(&r[5], 0, CHAN_X);
2420
2421 micro_mul( &r[1], &r[1], &r[5] );
2422 micro_sub( &r[3], &r[3], &r[1] );
2423
2424 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2425 STORE( &r[3], 0, CHAN_Y );
2426 }
2427
2428 micro_mul( &r[5], &r[5], &r[4] );
2429 micro_mul( &r[0], &r[0], &r[2] );
2430 micro_sub( &r[5], &r[5], &r[0] );
2431
2432 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2433 STORE( &r[5], 0, CHAN_Z );
2434 }
2435
2436 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2437 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2438 }
2439 break;
2440
2441 case TGSI_OPCODE_ABS:
2442 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2443 FETCH(&r[0], 0, chan_index);
2444
2445 micro_abs( &r[0], &r[0] );
2446
2447 STORE(&r[0], 0, chan_index);
2448 }
2449 break;
2450
2451 case TGSI_OPCODE_RCC:
2452 FETCH(&r[0], 0, CHAN_X);
2453 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2454 micro_float_clamp(&r[0], &r[0]);
2455 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2456 STORE(&r[0], 0, chan_index);
2457 }
2458 break;
2459
2460 case TGSI_OPCODE_DPH:
2461 FETCH(&r[0], 0, CHAN_X);
2462 FETCH(&r[1], 1, CHAN_X);
2463
2464 micro_mul( &r[0], &r[0], &r[1] );
2465
2466 FETCH(&r[1], 0, CHAN_Y);
2467 FETCH(&r[2], 1, CHAN_Y);
2468
2469 micro_mul( &r[1], &r[1], &r[2] );
2470 micro_add( &r[0], &r[0], &r[1] );
2471
2472 FETCH(&r[1], 0, CHAN_Z);
2473 FETCH(&r[2], 1, CHAN_Z);
2474
2475 micro_mul( &r[1], &r[1], &r[2] );
2476 micro_add( &r[0], &r[0], &r[1] );
2477
2478 FETCH(&r[1], 1, CHAN_W);
2479
2480 micro_add( &r[0], &r[0], &r[1] );
2481
2482 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2483 STORE( &r[0], 0, chan_index );
2484 }
2485 break;
2486
2487 case TGSI_OPCODE_COS:
2488 FETCH(&r[0], 0, CHAN_X);
2489
2490 micro_cos( &r[0], &r[0] );
2491
2492 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2493 STORE( &r[0], 0, chan_index );
2494 }
2495 break;
2496
2497 case TGSI_OPCODE_DDX:
2498 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2499 FETCH( &r[0], 0, chan_index );
2500 micro_ddx( &r[0], &r[0] );
2501 STORE( &r[0], 0, chan_index );
2502 }
2503 break;
2504
2505 case TGSI_OPCODE_DDY:
2506 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2507 FETCH( &r[0], 0, chan_index );
2508 micro_ddy( &r[0], &r[0] );
2509 STORE( &r[0], 0, chan_index );
2510 }
2511 break;
2512
2513 case TGSI_OPCODE_KILP:
2514 exec_kilp (mach, inst);
2515 break;
2516
2517 case TGSI_OPCODE_KIL:
2518 exec_kil (mach, inst);
2519 break;
2520
2521 case TGSI_OPCODE_PK2H:
2522 assert (0);
2523 break;
2524
2525 case TGSI_OPCODE_PK2US:
2526 assert (0);
2527 break;
2528
2529 case TGSI_OPCODE_PK4B:
2530 assert (0);
2531 break;
2532
2533 case TGSI_OPCODE_PK4UB:
2534 assert (0);
2535 break;
2536
2537 case TGSI_OPCODE_RFL:
2538 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2539 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2540 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2541 /* r0 = dp3(src0, src0) */
2542 FETCH(&r[2], 0, CHAN_X);
2543 micro_mul(&r[0], &r[2], &r[2]);
2544 FETCH(&r[4], 0, CHAN_Y);
2545 micro_mul(&r[8], &r[4], &r[4]);
2546 micro_add(&r[0], &r[0], &r[8]);
2547 FETCH(&r[6], 0, CHAN_Z);
2548 micro_mul(&r[8], &r[6], &r[6]);
2549 micro_add(&r[0], &r[0], &r[8]);
2550
2551 /* r1 = dp3(src0, src1) */
2552 FETCH(&r[3], 1, CHAN_X);
2553 micro_mul(&r[1], &r[2], &r[3]);
2554 FETCH(&r[5], 1, CHAN_Y);
2555 micro_mul(&r[8], &r[4], &r[5]);
2556 micro_add(&r[1], &r[1], &r[8]);
2557 FETCH(&r[7], 1, CHAN_Z);
2558 micro_mul(&r[8], &r[6], &r[7]);
2559 micro_add(&r[1], &r[1], &r[8]);
2560
2561 /* r1 = 2 * r1 / r0 */
2562 micro_add(&r[1], &r[1], &r[1]);
2563 micro_div(&r[1], &r[1], &r[0]);
2564
2565 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2566 micro_mul(&r[2], &r[2], &r[1]);
2567 micro_sub(&r[2], &r[2], &r[3]);
2568 STORE(&r[2], 0, CHAN_X);
2569 }
2570 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2571 micro_mul(&r[4], &r[4], &r[1]);
2572 micro_sub(&r[4], &r[4], &r[5]);
2573 STORE(&r[4], 0, CHAN_Y);
2574 }
2575 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2576 micro_mul(&r[6], &r[6], &r[1]);
2577 micro_sub(&r[6], &r[6], &r[7]);
2578 STORE(&r[6], 0, CHAN_Z);
2579 }
2580 }
2581 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2582 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2583 }
2584 break;
2585
2586 case TGSI_OPCODE_SEQ:
2587 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2588 FETCH( &r[0], 0, chan_index );
2589 FETCH( &r[1], 1, chan_index );
2590 micro_eq( &r[0], &r[0], &r[1],
2591 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2592 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2593 STORE( &r[0], 0, chan_index );
2594 }
2595 break;
2596
2597 case TGSI_OPCODE_SFL:
2598 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2599 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2600 }
2601 break;
2602
2603 case TGSI_OPCODE_SGT:
2604 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2605 FETCH( &r[0], 0, chan_index );
2606 FETCH( &r[1], 1, chan_index );
2607 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2608 STORE( &r[0], 0, chan_index );
2609 }
2610 break;
2611
2612 case TGSI_OPCODE_SIN:
2613 FETCH( &r[0], 0, CHAN_X );
2614 micro_sin( &r[0], &r[0] );
2615 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2616 STORE( &r[0], 0, chan_index );
2617 }
2618 break;
2619
2620 case TGSI_OPCODE_SLE:
2621 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2622 FETCH( &r[0], 0, chan_index );
2623 FETCH( &r[1], 1, chan_index );
2624 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2625 STORE( &r[0], 0, chan_index );
2626 }
2627 break;
2628
2629 case TGSI_OPCODE_SNE:
2630 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2631 FETCH( &r[0], 0, chan_index );
2632 FETCH( &r[1], 1, chan_index );
2633 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2634 STORE( &r[0], 0, chan_index );
2635 }
2636 break;
2637
2638 case TGSI_OPCODE_STR:
2639 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2640 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2641 }
2642 break;
2643
2644 case TGSI_OPCODE_TEX:
2645 /* simple texture lookup */
2646 /* src[0] = texcoord */
2647 /* src[1] = sampler unit */
2648 exec_tex(mach, inst, FALSE, FALSE);
2649 break;
2650
2651 case TGSI_OPCODE_TXB:
2652 /* Texture lookup with lod bias */
2653 /* src[0] = texcoord (src[0].w = LOD bias) */
2654 /* src[1] = sampler unit */
2655 exec_tex(mach, inst, TRUE, FALSE);
2656 break;
2657
2658 case TGSI_OPCODE_TXD:
2659 /* Texture lookup with explict partial derivatives */
2660 /* src[0] = texcoord */
2661 /* src[1] = d[strq]/dx */
2662 /* src[2] = d[strq]/dy */
2663 /* src[3] = sampler unit */
2664 assert (0);
2665 break;
2666
2667 case TGSI_OPCODE_TXL:
2668 /* Texture lookup with explit LOD */
2669 /* src[0] = texcoord (src[0].w = LOD) */
2670 /* src[1] = sampler unit */
2671 exec_tex(mach, inst, TRUE, FALSE);
2672 break;
2673
2674 case TGSI_OPCODE_TXP:
2675 /* Texture lookup with projection */
2676 /* src[0] = texcoord (src[0].w = projection) */
2677 /* src[1] = sampler unit */
2678 exec_tex(mach, inst, FALSE, TRUE);
2679 break;
2680
2681 case TGSI_OPCODE_UP2H:
2682 assert (0);
2683 break;
2684
2685 case TGSI_OPCODE_UP2US:
2686 assert (0);
2687 break;
2688
2689 case TGSI_OPCODE_UP4B:
2690 assert (0);
2691 break;
2692
2693 case TGSI_OPCODE_UP4UB:
2694 assert (0);
2695 break;
2696
2697 case TGSI_OPCODE_X2D:
2698 FETCH(&r[0], 1, CHAN_X);
2699 FETCH(&r[1], 1, CHAN_Y);
2700 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2701 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2702 FETCH(&r[2], 2, CHAN_X);
2703 micro_mul(&r[2], &r[2], &r[0]);
2704 FETCH(&r[3], 2, CHAN_Y);
2705 micro_mul(&r[3], &r[3], &r[1]);
2706 micro_add(&r[2], &r[2], &r[3]);
2707 FETCH(&r[3], 0, CHAN_X);
2708 micro_add(&r[2], &r[2], &r[3]);
2709 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2710 STORE(&r[2], 0, CHAN_X);
2711 }
2712 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2713 STORE(&r[2], 0, CHAN_Z);
2714 }
2715 }
2716 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2717 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2718 FETCH(&r[2], 2, CHAN_Z);
2719 micro_mul(&r[2], &r[2], &r[0]);
2720 FETCH(&r[3], 2, CHAN_W);
2721 micro_mul(&r[3], &r[3], &r[1]);
2722 micro_add(&r[2], &r[2], &r[3]);
2723 FETCH(&r[3], 0, CHAN_Y);
2724 micro_add(&r[2], &r[2], &r[3]);
2725 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2726 STORE(&r[2], 0, CHAN_Y);
2727 }
2728 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2729 STORE(&r[2], 0, CHAN_W);
2730 }
2731 }
2732 break;
2733
2734 case TGSI_OPCODE_ARA:
2735 assert (0);
2736 break;
2737
2738 case TGSI_OPCODE_BRA:
2739 assert (0);
2740 break;
2741
2742 case TGSI_OPCODE_CAL:
2743 /* skip the call if no execution channels are enabled */
2744 if (mach->ExecMask) {
2745 /* do the call */
2746
2747 /* First, record the depths of the execution stacks.
2748 * This is important for deeply nested/looped return statements.
2749 * We have to unwind the stacks by the correct amount. For a
2750 * real code generator, we could determine the number of entries
2751 * to pop off each stack with simple static analysis and avoid
2752 * implementing this data structure at run time.
2753 */
2754 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2755 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2756 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2757 /* note that PC was already incremented above */
2758 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2759
2760 mach->CallStackTop++;
2761
2762 /* Second, push the Cond, Loop, Cont, Func stacks */
2763 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2764 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2765 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2766 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2767 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2768 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2769 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2770 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2771
2772 /* Finally, jump to the subroutine */
2773 *pc = inst->InstructionExtLabel.Label;
2774 }
2775 break;
2776
2777 case TGSI_OPCODE_RET:
2778 mach->FuncMask &= ~mach->ExecMask;
2779 UPDATE_EXEC_MASK(mach);
2780
2781 if (mach->FuncMask == 0x0) {
2782 /* really return now (otherwise, keep executing */
2783
2784 if (mach->CallStackTop == 0) {
2785 /* returning from main() */
2786 *pc = -1;
2787 return;
2788 }
2789
2790 assert(mach->CallStackTop > 0);
2791 mach->CallStackTop--;
2792
2793 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2794 mach->CondMask = mach->CondStack[mach->CondStackTop];
2795
2796 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2797 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2798
2799 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2800 mach->ContMask = mach->ContStack[mach->ContStackTop];
2801
2802 assert(mach->FuncStackTop > 0);
2803 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2804
2805 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2806
2807 UPDATE_EXEC_MASK(mach);
2808 }
2809 break;
2810
2811 case TGSI_OPCODE_SSG:
2812 /* TGSI_OPCODE_SGN */
2813 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2814 FETCH( &r[0], 0, chan_index );
2815 micro_sgn( &r[0], &r[0] );
2816 STORE( &r[0], 0, chan_index );
2817 }
2818 break;
2819
2820 case TGSI_OPCODE_CMP:
2821 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2822 FETCH(&r[0], 0, chan_index);
2823 FETCH(&r[1], 1, chan_index);
2824 FETCH(&r[2], 2, chan_index);
2825
2826 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2827
2828 STORE(&r[0], 0, chan_index);
2829 }
2830 break;
2831
2832 case TGSI_OPCODE_SCS:
2833 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2834 FETCH( &r[0], 0, CHAN_X );
2835 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2836 micro_cos(&r[1], &r[0]);
2837 STORE(&r[1], 0, CHAN_X);
2838 }
2839 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2840 micro_sin(&r[1], &r[0]);
2841 STORE(&r[1], 0, CHAN_Y);
2842 }
2843 }
2844 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2845 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2846 }
2847 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2848 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2849 }
2850 break;
2851
2852 case TGSI_OPCODE_NRM:
2853 /* 3-component vector normalize */
2854 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2855 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2856 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2857 /* r3 = sqrt(dp3(src0, src0)) */
2858 FETCH(&r[0], 0, CHAN_X);
2859 micro_mul(&r[3], &r[0], &r[0]);
2860 FETCH(&r[1], 0, CHAN_Y);
2861 micro_mul(&r[4], &r[1], &r[1]);
2862 micro_add(&r[3], &r[3], &r[4]);
2863 FETCH(&r[2], 0, CHAN_Z);
2864 micro_mul(&r[4], &r[2], &r[2]);
2865 micro_add(&r[3], &r[3], &r[4]);
2866 micro_sqrt(&r[3], &r[3]);
2867
2868 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2869 micro_div(&r[0], &r[0], &r[3]);
2870 STORE(&r[0], 0, CHAN_X);
2871 }
2872 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2873 micro_div(&r[1], &r[1], &r[3]);
2874 STORE(&r[1], 0, CHAN_Y);
2875 }
2876 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2877 micro_div(&r[2], &r[2], &r[3]);
2878 STORE(&r[2], 0, CHAN_Z);
2879 }
2880 }
2881 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2882 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2883 }
2884 break;
2885
2886 case TGSI_OPCODE_NRM4:
2887 /* 4-component vector normalize */
2888 {
2889 union tgsi_exec_channel tmp, dot;
2890
2891 /* tmp = dp4(src0, src0): */
2892 FETCH( &r[0], 0, CHAN_X );
2893 micro_mul( &tmp, &r[0], &r[0] );
2894
2895 FETCH( &r[1], 0, CHAN_Y );
2896 micro_mul( &dot, &r[1], &r[1] );
2897 micro_add( &tmp, &tmp, &dot );
2898
2899 FETCH( &r[2], 0, CHAN_Z );
2900 micro_mul( &dot, &r[2], &r[2] );
2901 micro_add( &tmp, &tmp, &dot );
2902
2903 FETCH( &r[3], 0, CHAN_W );
2904 micro_mul( &dot, &r[3], &r[3] );
2905 micro_add( &tmp, &tmp, &dot );
2906
2907 /* tmp = 1 / sqrt(tmp) */
2908 micro_sqrt( &tmp, &tmp );
2909 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2910
2911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2912 /* chan = chan * tmp */
2913 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2914 STORE( &r[chan_index], 0, chan_index );
2915 }
2916 }
2917 break;
2918
2919 case TGSI_OPCODE_DIV:
2920 assert( 0 );
2921 break;
2922
2923 case TGSI_OPCODE_DP2:
2924 FETCH( &r[0], 0, CHAN_X );
2925 FETCH( &r[1], 1, CHAN_X );
2926 micro_mul( &r[0], &r[0], &r[1] );
2927
2928 FETCH( &r[1], 0, CHAN_Y );
2929 FETCH( &r[2], 1, CHAN_Y );
2930 micro_mul( &r[1], &r[1], &r[2] );
2931 micro_add( &r[0], &r[0], &r[1] );
2932
2933 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2934 STORE( &r[0], 0, chan_index );
2935 }
2936 break;
2937
2938 case TGSI_OPCODE_IF:
2939 /* push CondMask */
2940 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2941 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2942 FETCH( &r[0], 0, CHAN_X );
2943 /* update CondMask */
2944 if( ! r[0].u[0] ) {
2945 mach->CondMask &= ~0x1;
2946 }
2947 if( ! r[0].u[1] ) {
2948 mach->CondMask &= ~0x2;
2949 }
2950 if( ! r[0].u[2] ) {
2951 mach->CondMask &= ~0x4;
2952 }
2953 if( ! r[0].u[3] ) {
2954 mach->CondMask &= ~0x8;
2955 }
2956 UPDATE_EXEC_MASK(mach);
2957 /* Todo: If CondMask==0, jump to ELSE */
2958 break;
2959
2960 case TGSI_OPCODE_ELSE:
2961 /* invert CondMask wrt previous mask */
2962 {
2963 uint prevMask;
2964 assert(mach->CondStackTop > 0);
2965 prevMask = mach->CondStack[mach->CondStackTop - 1];
2966 mach->CondMask = ~mach->CondMask & prevMask;
2967 UPDATE_EXEC_MASK(mach);
2968 /* Todo: If CondMask==0, jump to ENDIF */
2969 }
2970 break;
2971
2972 case TGSI_OPCODE_ENDIF:
2973 /* pop CondMask */
2974 assert(mach->CondStackTop > 0);
2975 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2976 UPDATE_EXEC_MASK(mach);
2977 break;
2978
2979 case TGSI_OPCODE_END:
2980 /* halt execution */
2981 *pc = -1;
2982 break;
2983
2984 case TGSI_OPCODE_REP:
2985 assert (0);
2986 break;
2987
2988 case TGSI_OPCODE_ENDREP:
2989 assert (0);
2990 break;
2991
2992 case TGSI_OPCODE_PUSHA:
2993 assert (0);
2994 break;
2995
2996 case TGSI_OPCODE_POPA:
2997 assert (0);
2998 break;
2999
3000 case TGSI_OPCODE_CEIL:
3001 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3002 FETCH( &r[0], 0, chan_index );
3003 micro_ceil( &r[0], &r[0] );
3004 STORE( &r[0], 0, chan_index );
3005 }
3006 break;
3007
3008 case TGSI_OPCODE_I2F:
3009 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3010 FETCH( &r[0], 0, chan_index );
3011 micro_i2f( &r[0], &r[0] );
3012 STORE( &r[0], 0, chan_index );
3013 }
3014 break;
3015
3016 case TGSI_OPCODE_NOT:
3017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3018 FETCH( &r[0], 0, chan_index );
3019 micro_not( &r[0], &r[0] );
3020 STORE( &r[0], 0, chan_index );
3021 }
3022 break;
3023
3024 case TGSI_OPCODE_TRUNC:
3025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3026 FETCH( &r[0], 0, chan_index );
3027 micro_trunc( &r[0], &r[0] );
3028 STORE( &r[0], 0, chan_index );
3029 }
3030 break;
3031
3032 case TGSI_OPCODE_SHL:
3033 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3034 FETCH( &r[0], 0, chan_index );
3035 FETCH( &r[1], 1, chan_index );
3036 micro_shl( &r[0], &r[0], &r[1] );
3037 STORE( &r[0], 0, chan_index );
3038 }
3039 break;
3040
3041 case TGSI_OPCODE_SHR:
3042 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3043 FETCH( &r[0], 0, chan_index );
3044 FETCH( &r[1], 1, chan_index );
3045 micro_ishr( &r[0], &r[0], &r[1] );
3046 STORE( &r[0], 0, chan_index );
3047 }
3048 break;
3049
3050 case TGSI_OPCODE_AND:
3051 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3052 FETCH( &r[0], 0, chan_index );
3053 FETCH( &r[1], 1, chan_index );
3054 micro_and( &r[0], &r[0], &r[1] );
3055 STORE( &r[0], 0, chan_index );
3056 }
3057 break;
3058
3059 case TGSI_OPCODE_OR:
3060 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3061 FETCH( &r[0], 0, chan_index );
3062 FETCH( &r[1], 1, chan_index );
3063 micro_or( &r[0], &r[0], &r[1] );
3064 STORE( &r[0], 0, chan_index );
3065 }
3066 break;
3067
3068 case TGSI_OPCODE_MOD:
3069 assert (0);
3070 break;
3071
3072 case TGSI_OPCODE_XOR:
3073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3074 FETCH( &r[0], 0, chan_index );
3075 FETCH( &r[1], 1, chan_index );
3076 micro_xor( &r[0], &r[0], &r[1] );
3077 STORE( &r[0], 0, chan_index );
3078 }
3079 break;
3080
3081 case TGSI_OPCODE_SAD:
3082 assert (0);
3083 break;
3084
3085 case TGSI_OPCODE_TXF:
3086 assert (0);
3087 break;
3088
3089 case TGSI_OPCODE_TXQ:
3090 assert (0);
3091 break;
3092
3093 case TGSI_OPCODE_EMIT:
3094 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
3095 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
3096 break;
3097
3098 case TGSI_OPCODE_ENDPRIM:
3099 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
3100 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
3101 break;
3102
3103 case TGSI_OPCODE_BGNFOR:
3104 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3105 for (chan_index = 0; chan_index < 3; chan_index++) {
3106 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3107 }
3108 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
3109 ++mach->LoopCounterStackTop;
3110 /* fall-through (for now) */
3111 case TGSI_OPCODE_BGNLOOP:
3112 /* push LoopMask and ContMasks */
3113 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3114 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3115 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3116 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3117 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3118 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3119 break;
3120
3121 case TGSI_OPCODE_ENDFOR:
3122 assert(mach->LoopCounterStackTop > 0);
3123 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3124 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3125 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
3126 /* update LoopMask */
3127 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
3128 mach->LoopMask &= ~0x1;
3129 }
3130 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
3131 mach->LoopMask &= ~0x2;
3132 }
3133 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
3134 mach->LoopMask &= ~0x4;
3135 }
3136 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
3137 mach->LoopMask &= ~0x8;
3138 }
3139 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3140 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3141 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3142 assert(mach->LoopLabelStackTop > 0);
3143 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3144 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
3145 /* Restore ContMask, but don't pop */
3146 assert(mach->ContStackTop > 0);
3147 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3148 UPDATE_EXEC_MASK(mach);
3149 if (mach->ExecMask) {
3150 /* repeat loop: jump to instruction just past BGNLOOP */
3151 assert(mach->LoopLabelStackTop > 0);
3152 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3153 }
3154 else {
3155 /* exit loop: pop LoopMask */
3156 assert(mach->LoopStackTop > 0);
3157 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3158 /* pop ContMask */
3159 assert(mach->ContStackTop > 0);
3160 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3161 assert(mach->LoopLabelStackTop > 0);
3162 --mach->LoopLabelStackTop;
3163 assert(mach->LoopCounterStackTop > 0);
3164 --mach->LoopCounterStackTop;
3165 }
3166 UPDATE_EXEC_MASK(mach);
3167 break;
3168
3169 case TGSI_OPCODE_ENDLOOP:
3170 /* Restore ContMask, but don't pop */
3171 assert(mach->ContStackTop > 0);
3172 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3173 UPDATE_EXEC_MASK(mach);
3174 if (mach->ExecMask) {
3175 /* repeat loop: jump to instruction just past BGNLOOP */
3176 assert(mach->LoopLabelStackTop > 0);
3177 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3178 }
3179 else {
3180 /* exit loop: pop LoopMask */
3181 assert(mach->LoopStackTop > 0);
3182 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3183 /* pop ContMask */
3184 assert(mach->ContStackTop > 0);
3185 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3186 assert(mach->LoopLabelStackTop > 0);
3187 --mach->LoopLabelStackTop;
3188 }
3189 UPDATE_EXEC_MASK(mach);
3190 break;
3191
3192 case TGSI_OPCODE_BRK:
3193 /* turn off loop channels for each enabled exec channel */
3194 mach->LoopMask &= ~mach->ExecMask;
3195 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3196 UPDATE_EXEC_MASK(mach);
3197 break;
3198
3199 case TGSI_OPCODE_CONT:
3200 /* turn off cont channels for each enabled exec channel */
3201 mach->ContMask &= ~mach->ExecMask;
3202 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3203 UPDATE_EXEC_MASK(mach);
3204 break;
3205
3206 case TGSI_OPCODE_BGNSUB:
3207 /* no-op */
3208 break;
3209
3210 case TGSI_OPCODE_ENDSUB:
3211 /* no-op */
3212 break;
3213
3214 case TGSI_OPCODE_NOP:
3215 break;
3216
3217 default:
3218 assert( 0 );
3219 }
3220 }
3221
3222
3223 /**
3224 * Run TGSI interpreter.
3225 * \return bitmask of "alive" quad components
3226 */
3227 uint
3228 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3229 {
3230 uint i;
3231 int pc = 0;
3232
3233 mach->CondMask = 0xf;
3234 mach->LoopMask = 0xf;
3235 mach->ContMask = 0xf;
3236 mach->FuncMask = 0xf;
3237 mach->ExecMask = 0xf;
3238
3239 assert(mach->CondStackTop == 0);
3240 assert(mach->LoopStackTop == 0);
3241 assert(mach->ContStackTop == 0);
3242 assert(mach->CallStackTop == 0);
3243
3244 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3245 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3246
3247 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3248 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3249 mach->Primitives[0] = 0;
3250 }
3251
3252 for (i = 0; i < QUAD_SIZE; i++) {
3253 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3254 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3255 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3256 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3257 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3258 }
3259
3260 /* execute declarations (interpolants) */
3261 for (i = 0; i < mach->NumDeclarations; i++) {
3262 exec_declaration( mach, mach->Declarations+i );
3263 }
3264
3265 /* execute instructions, until pc is set to -1 */
3266 while (pc != -1) {
3267 assert(pc < (int) mach->NumInstructions);
3268 exec_instruction( mach, mach->Instructions + pc, &pc );
3269 }
3270
3271 #if 0
3272 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3273 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3274 /*
3275 * Scale back depth component.
3276 */
3277 for (i = 0; i < 4; i++)
3278 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3279 }
3280 #endif
3281
3282 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3283 }