Merge commit 'origin/draw-vbuf-interface'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 #if 0
324 static void
325 micro_iadd(
326 union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src0,
328 const union tgsi_exec_channel *src1 )
329 {
330 dst->i[0] = src0->i[0] + src1->i[0];
331 dst->i[1] = src0->i[1] + src1->i[1];
332 dst->i[2] = src0->i[2] + src1->i[2];
333 dst->i[3] = src0->i[3] + src1->i[3];
334 }
335 #endif
336
337 static void
338 micro_and(
339 union tgsi_exec_channel *dst,
340 const union tgsi_exec_channel *src0,
341 const union tgsi_exec_channel *src1 )
342 {
343 dst->u[0] = src0->u[0] & src1->u[0];
344 dst->u[1] = src0->u[1] & src1->u[1];
345 dst->u[2] = src0->u[2] & src1->u[2];
346 dst->u[3] = src0->u[3] & src1->u[3];
347 }
348
349 static void
350 micro_ceil(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = ceilf( src->f[0] );
355 dst->f[1] = ceilf( src->f[1] );
356 dst->f[2] = ceilf( src->f[2] );
357 dst->f[3] = ceilf( src->f[3] );
358 }
359
360 static void
361 micro_cos(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = cosf( src->f[0] );
366 dst->f[1] = cosf( src->f[1] );
367 dst->f[2] = cosf( src->f[2] );
368 dst->f[3] = cosf( src->f[3] );
369 }
370
371 static void
372 micro_ddx(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] =
377 dst->f[1] =
378 dst->f[2] =
379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380 }
381
382 static void
383 micro_ddy(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src )
386 {
387 dst->f[0] =
388 dst->f[1] =
389 dst->f[2] =
390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391 }
392
393 static void
394 micro_div(
395 union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src0,
397 const union tgsi_exec_channel *src1 )
398 {
399 if (src1->f[0] != 0) {
400 dst->f[0] = src0->f[0] / src1->f[0];
401 }
402 if (src1->f[1] != 0) {
403 dst->f[1] = src0->f[1] / src1->f[1];
404 }
405 if (src1->f[2] != 0) {
406 dst->f[2] = src0->f[2] / src1->f[2];
407 }
408 if (src1->f[3] != 0) {
409 dst->f[3] = src0->f[3] / src1->f[3];
410 }
411 }
412
413 #if 0
414 static void
415 micro_udiv(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1 )
419 {
420 dst->u[0] = src0->u[0] / src1->u[0];
421 dst->u[1] = src0->u[1] / src1->u[1];
422 dst->u[2] = src0->u[2] / src1->u[2];
423 dst->u[3] = src0->u[3] / src1->u[3];
424 }
425 #endif
426
427 static void
428 micro_eq(
429 union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src0,
431 const union tgsi_exec_channel *src1,
432 const union tgsi_exec_channel *src2,
433 const union tgsi_exec_channel *src3 )
434 {
435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439 }
440
441 #if 0
442 static void
443 micro_ieq(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1,
447 const union tgsi_exec_channel *src2,
448 const union tgsi_exec_channel *src3 )
449 {
450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454 }
455 #endif
456
457 static void
458 micro_exp2(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 #if FAST_MATH
463 dst->f[0] = util_fast_exp2( src->f[0] );
464 dst->f[1] = util_fast_exp2( src->f[1] );
465 dst->f[2] = util_fast_exp2( src->f[2] );
466 dst->f[3] = util_fast_exp2( src->f[3] );
467 #else
468 dst->f[0] = powf( 2.0f, src->f[0] );
469 dst->f[1] = powf( 2.0f, src->f[1] );
470 dst->f[2] = powf( 2.0f, src->f[2] );
471 dst->f[3] = powf( 2.0f, src->f[3] );
472 #endif
473 }
474
475 #if 0
476 static void
477 micro_f2ut(
478 union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src )
480 {
481 dst->u[0] = (uint) src->f[0];
482 dst->u[1] = (uint) src->f[1];
483 dst->u[2] = (uint) src->f[2];
484 dst->u[3] = (uint) src->f[3];
485 }
486 #endif
487
488 static void
489 micro_flr(
490 union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src )
492 {
493 dst->f[0] = floorf( src->f[0] );
494 dst->f[1] = floorf( src->f[1] );
495 dst->f[2] = floorf( src->f[2] );
496 dst->f[3] = floorf( src->f[3] );
497 }
498
499 static void
500 micro_frc(
501 union tgsi_exec_channel *dst,
502 const union tgsi_exec_channel *src )
503 {
504 dst->f[0] = src->f[0] - floorf( src->f[0] );
505 dst->f[1] = src->f[1] - floorf( src->f[1] );
506 dst->f[2] = src->f[2] - floorf( src->f[2] );
507 dst->f[3] = src->f[3] - floorf( src->f[3] );
508 }
509
510 static void
511 micro_ge(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src0,
514 const union tgsi_exec_channel *src1,
515 const union tgsi_exec_channel *src2,
516 const union tgsi_exec_channel *src3 )
517 {
518 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
519 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
520 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
521 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
522 }
523
524 static void
525 micro_i2f(
526 union tgsi_exec_channel *dst,
527 const union tgsi_exec_channel *src )
528 {
529 dst->f[0] = (float) src->i[0];
530 dst->f[1] = (float) src->i[1];
531 dst->f[2] = (float) src->i[2];
532 dst->f[3] = (float) src->i[3];
533 }
534
535 static void
536 micro_lg2(
537 union tgsi_exec_channel *dst,
538 const union tgsi_exec_channel *src )
539 {
540 #if FAST_MATH
541 dst->f[0] = util_fast_log2( src->f[0] );
542 dst->f[1] = util_fast_log2( src->f[1] );
543 dst->f[2] = util_fast_log2( src->f[2] );
544 dst->f[3] = util_fast_log2( src->f[3] );
545 #else
546 dst->f[0] = logf( src->f[0] ) * 1.442695f;
547 dst->f[1] = logf( src->f[1] ) * 1.442695f;
548 dst->f[2] = logf( src->f[2] ) * 1.442695f;
549 dst->f[3] = logf( src->f[3] ) * 1.442695f;
550 #endif
551 }
552
553 static void
554 micro_le(
555 union tgsi_exec_channel *dst,
556 const union tgsi_exec_channel *src0,
557 const union tgsi_exec_channel *src1,
558 const union tgsi_exec_channel *src2,
559 const union tgsi_exec_channel *src3 )
560 {
561 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
562 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
563 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
564 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
565 }
566
567 static void
568 micro_lt(
569 union tgsi_exec_channel *dst,
570 const union tgsi_exec_channel *src0,
571 const union tgsi_exec_channel *src1,
572 const union tgsi_exec_channel *src2,
573 const union tgsi_exec_channel *src3 )
574 {
575 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
576 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
577 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
578 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
579 }
580
581 #if 0
582 static void
583 micro_ilt(
584 union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src0,
586 const union tgsi_exec_channel *src1,
587 const union tgsi_exec_channel *src2,
588 const union tgsi_exec_channel *src3 )
589 {
590 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
591 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
592 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
593 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
594 }
595 #endif
596
597 #if 0
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611 #endif
612
613 static void
614 micro_max(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
620 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
621 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
622 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
623 }
624
625 #if 0
626 static void
627 micro_imax(
628 union tgsi_exec_channel *dst,
629 const union tgsi_exec_channel *src0,
630 const union tgsi_exec_channel *src1 )
631 {
632 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
633 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
634 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
635 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
636 }
637 #endif
638
639 #if 0
640 static void
641 micro_umax(
642 union tgsi_exec_channel *dst,
643 const union tgsi_exec_channel *src0,
644 const union tgsi_exec_channel *src1 )
645 {
646 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
647 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
648 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
649 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
650 }
651 #endif
652
653 static void
654 micro_min(
655 union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1 )
658 {
659 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
660 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
661 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
662 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
663 }
664
665 #if 0
666 static void
667 micro_imin(
668 union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1 )
671 {
672 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
673 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
674 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
675 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
676 }
677 #endif
678
679 #if 0
680 static void
681 micro_umin(
682 union tgsi_exec_channel *dst,
683 const union tgsi_exec_channel *src0,
684 const union tgsi_exec_channel *src1 )
685 {
686 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
687 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
688 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
689 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
690 }
691 #endif
692
693 #if 0
694 static void
695 micro_umod(
696 union tgsi_exec_channel *dst,
697 const union tgsi_exec_channel *src0,
698 const union tgsi_exec_channel *src1 )
699 {
700 dst->u[0] = src0->u[0] % src1->u[0];
701 dst->u[1] = src0->u[1] % src1->u[1];
702 dst->u[2] = src0->u[2] % src1->u[2];
703 dst->u[3] = src0->u[3] % src1->u[3];
704 }
705 #endif
706
707 static void
708 micro_mul(
709 union tgsi_exec_channel *dst,
710 const union tgsi_exec_channel *src0,
711 const union tgsi_exec_channel *src1 )
712 {
713 dst->f[0] = src0->f[0] * src1->f[0];
714 dst->f[1] = src0->f[1] * src1->f[1];
715 dst->f[2] = src0->f[2] * src1->f[2];
716 dst->f[3] = src0->f[3] * src1->f[3];
717 }
718
719 #if 0
720 static void
721 micro_imul(
722 union tgsi_exec_channel *dst,
723 const union tgsi_exec_channel *src0,
724 const union tgsi_exec_channel *src1 )
725 {
726 dst->i[0] = src0->i[0] * src1->i[0];
727 dst->i[1] = src0->i[1] * src1->i[1];
728 dst->i[2] = src0->i[2] * src1->i[2];
729 dst->i[3] = src0->i[3] * src1->i[3];
730 }
731 #endif
732
733 #if 0
734 static void
735 micro_imul64(
736 union tgsi_exec_channel *dst0,
737 union tgsi_exec_channel *dst1,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1 )
740 {
741 dst1->i[0] = src0->i[0] * src1->i[0];
742 dst1->i[1] = src0->i[1] * src1->i[1];
743 dst1->i[2] = src0->i[2] * src1->i[2];
744 dst1->i[3] = src0->i[3] * src1->i[3];
745 dst0->i[0] = 0;
746 dst0->i[1] = 0;
747 dst0->i[2] = 0;
748 dst0->i[3] = 0;
749 }
750 #endif
751
752 #if 0
753 static void
754 micro_umul64(
755 union tgsi_exec_channel *dst0,
756 union tgsi_exec_channel *dst1,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1 )
759 {
760 dst1->u[0] = src0->u[0] * src1->u[0];
761 dst1->u[1] = src0->u[1] * src1->u[1];
762 dst1->u[2] = src0->u[2] * src1->u[2];
763 dst1->u[3] = src0->u[3] * src1->u[3];
764 dst0->u[0] = 0;
765 dst0->u[1] = 0;
766 dst0->u[2] = 0;
767 dst0->u[3] = 0;
768 }
769 #endif
770
771
772 #if 0
773 static void
774 micro_movc(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src0,
777 const union tgsi_exec_channel *src1,
778 const union tgsi_exec_channel *src2 )
779 {
780 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
781 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
782 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
783 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
784 }
785 #endif
786
787 static void
788 micro_neg(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src )
791 {
792 dst->f[0] = -src->f[0];
793 dst->f[1] = -src->f[1];
794 dst->f[2] = -src->f[2];
795 dst->f[3] = -src->f[3];
796 }
797
798 #if 0
799 static void
800 micro_ineg(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src )
803 {
804 dst->i[0] = -src->i[0];
805 dst->i[1] = -src->i[1];
806 dst->i[2] = -src->i[2];
807 dst->i[3] = -src->i[3];
808 }
809 #endif
810
811 static void
812 micro_not(
813 union tgsi_exec_channel *dst,
814 const union tgsi_exec_channel *src )
815 {
816 dst->u[0] = ~src->u[0];
817 dst->u[1] = ~src->u[1];
818 dst->u[2] = ~src->u[2];
819 dst->u[3] = ~src->u[3];
820 }
821
822 static void
823 micro_or(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src0,
826 const union tgsi_exec_channel *src1 )
827 {
828 dst->u[0] = src0->u[0] | src1->u[0];
829 dst->u[1] = src0->u[1] | src1->u[1];
830 dst->u[2] = src0->u[2] | src1->u[2];
831 dst->u[3] = src0->u[3] | src1->u[3];
832 }
833
834 static void
835 micro_pow(
836 union tgsi_exec_channel *dst,
837 const union tgsi_exec_channel *src0,
838 const union tgsi_exec_channel *src1 )
839 {
840 #if FAST_MATH
841 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
842 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
843 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
844 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
845 #else
846 dst->f[0] = powf( src0->f[0], src1->f[0] );
847 dst->f[1] = powf( src0->f[1], src1->f[1] );
848 dst->f[2] = powf( src0->f[2], src1->f[2] );
849 dst->f[3] = powf( src0->f[3], src1->f[3] );
850 #endif
851 }
852
853 static void
854 micro_rnd(
855 union tgsi_exec_channel *dst,
856 const union tgsi_exec_channel *src )
857 {
858 dst->f[0] = floorf( src->f[0] + 0.5f );
859 dst->f[1] = floorf( src->f[1] + 0.5f );
860 dst->f[2] = floorf( src->f[2] + 0.5f );
861 dst->f[3] = floorf( src->f[3] + 0.5f );
862 }
863
864 static void
865 micro_sgn(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
870 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
871 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
872 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
873 }
874
875 static void
876 micro_shl(
877 union tgsi_exec_channel *dst,
878 const union tgsi_exec_channel *src0,
879 const union tgsi_exec_channel *src1 )
880 {
881 dst->i[0] = src0->i[0] << src1->i[0];
882 dst->i[1] = src0->i[1] << src1->i[1];
883 dst->i[2] = src0->i[2] << src1->i[2];
884 dst->i[3] = src0->i[3] << src1->i[3];
885 }
886
887 static void
888 micro_ishr(
889 union tgsi_exec_channel *dst,
890 const union tgsi_exec_channel *src0,
891 const union tgsi_exec_channel *src1 )
892 {
893 dst->i[0] = src0->i[0] >> src1->i[0];
894 dst->i[1] = src0->i[1] >> src1->i[1];
895 dst->i[2] = src0->i[2] >> src1->i[2];
896 dst->i[3] = src0->i[3] >> src1->i[3];
897 }
898
899 static void
900 micro_trunc(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0 )
903 {
904 dst->f[0] = (float) (int) src0->f[0];
905 dst->f[1] = (float) (int) src0->f[1];
906 dst->f[2] = (float) (int) src0->f[2];
907 dst->f[3] = (float) (int) src0->f[3];
908 }
909
910 #if 0
911 static void
912 micro_ushr(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src0,
915 const union tgsi_exec_channel *src1 )
916 {
917 dst->u[0] = src0->u[0] >> src1->u[0];
918 dst->u[1] = src0->u[1] >> src1->u[1];
919 dst->u[2] = src0->u[2] >> src1->u[2];
920 dst->u[3] = src0->u[3] >> src1->u[3];
921 }
922 #endif
923
924 static void
925 micro_sin(
926 union tgsi_exec_channel *dst,
927 const union tgsi_exec_channel *src )
928 {
929 dst->f[0] = sinf( src->f[0] );
930 dst->f[1] = sinf( src->f[1] );
931 dst->f[2] = sinf( src->f[2] );
932 dst->f[3] = sinf( src->f[3] );
933 }
934
935 static void
936 micro_sqrt( union tgsi_exec_channel *dst,
937 const union tgsi_exec_channel *src )
938 {
939 dst->f[0] = sqrtf( src->f[0] );
940 dst->f[1] = sqrtf( src->f[1] );
941 dst->f[2] = sqrtf( src->f[2] );
942 dst->f[3] = sqrtf( src->f[3] );
943 }
944
945 static void
946 micro_sub(
947 union tgsi_exec_channel *dst,
948 const union tgsi_exec_channel *src0,
949 const union tgsi_exec_channel *src1 )
950 {
951 dst->f[0] = src0->f[0] - src1->f[0];
952 dst->f[1] = src0->f[1] - src1->f[1];
953 dst->f[2] = src0->f[2] - src1->f[2];
954 dst->f[3] = src0->f[3] - src1->f[3];
955 }
956
957 #if 0
958 static void
959 micro_u2f(
960 union tgsi_exec_channel *dst,
961 const union tgsi_exec_channel *src )
962 {
963 dst->f[0] = (float) src->u[0];
964 dst->f[1] = (float) src->u[1];
965 dst->f[2] = (float) src->u[2];
966 dst->f[3] = (float) src->u[3];
967 }
968 #endif
969
970 static void
971 micro_xor(
972 union tgsi_exec_channel *dst,
973 const union tgsi_exec_channel *src0,
974 const union tgsi_exec_channel *src1 )
975 {
976 dst->u[0] = src0->u[0] ^ src1->u[0];
977 dst->u[1] = src0->u[1] ^ src1->u[1];
978 dst->u[2] = src0->u[2] ^ src1->u[2];
979 dst->u[3] = src0->u[3] ^ src1->u[3];
980 }
981
982 static void
983 fetch_src_file_channel(
984 const struct tgsi_exec_machine *mach,
985 const uint file,
986 const uint swizzle,
987 const union tgsi_exec_channel *index,
988 union tgsi_exec_channel *chan )
989 {
990 switch( swizzle ) {
991 case TGSI_EXTSWIZZLE_X:
992 case TGSI_EXTSWIZZLE_Y:
993 case TGSI_EXTSWIZZLE_Z:
994 case TGSI_EXTSWIZZLE_W:
995 switch( file ) {
996 case TGSI_FILE_CONSTANT:
997 assert(mach->Consts);
998 if (index->i[0] < 0)
999 chan->f[0] = 0.0f;
1000 else
1001 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1002 if (index->i[1] < 0)
1003 chan->f[1] = 0.0f;
1004 else
1005 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1006 if (index->i[2] < 0)
1007 chan->f[2] = 0.0f;
1008 else
1009 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1010 if (index->i[3] < 0)
1011 chan->f[3] = 0.0f;
1012 else
1013 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1014 break;
1015
1016 case TGSI_FILE_INPUT:
1017 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1018 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1019 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1020 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1021 break;
1022
1023 case TGSI_FILE_TEMPORARY:
1024 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1025 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1026 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1027 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1028 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1029 break;
1030
1031 case TGSI_FILE_IMMEDIATE:
1032 assert( index->i[0] < (int) mach->ImmLimit );
1033 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1034 assert( index->i[1] < (int) mach->ImmLimit );
1035 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1036 assert( index->i[2] < (int) mach->ImmLimit );
1037 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1038 assert( index->i[3] < (int) mach->ImmLimit );
1039 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1040 break;
1041
1042 case TGSI_FILE_ADDRESS:
1043 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1044 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1045 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1046 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1047 break;
1048
1049 case TGSI_FILE_OUTPUT:
1050 /* vertex/fragment output vars can be read too */
1051 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1052 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1053 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1054 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1055 break;
1056
1057 default:
1058 assert( 0 );
1059 }
1060 break;
1061
1062 case TGSI_EXTSWIZZLE_ZERO:
1063 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1064 break;
1065
1066 case TGSI_EXTSWIZZLE_ONE:
1067 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1068 break;
1069
1070 default:
1071 assert( 0 );
1072 }
1073 }
1074
1075 static void
1076 fetch_source(
1077 const struct tgsi_exec_machine *mach,
1078 union tgsi_exec_channel *chan,
1079 const struct tgsi_full_src_register *reg,
1080 const uint chan_index )
1081 {
1082 union tgsi_exec_channel index;
1083 uint swizzle;
1084
1085 /* We start with a direct index into a register file.
1086 *
1087 * file[1],
1088 * where:
1089 * file = SrcRegister.File
1090 * [1] = SrcRegister.Index
1091 */
1092 index.i[0] =
1093 index.i[1] =
1094 index.i[2] =
1095 index.i[3] = reg->SrcRegister.Index;
1096
1097 /* There is an extra source register that indirectly subscripts
1098 * a register file. The direct index now becomes an offset
1099 * that is being added to the indirect register.
1100 *
1101 * file[ind[2].x+1],
1102 * where:
1103 * ind = SrcRegisterInd.File
1104 * [2] = SrcRegisterInd.Index
1105 * .x = SrcRegisterInd.SwizzleX
1106 */
1107 if (reg->SrcRegister.Indirect) {
1108 union tgsi_exec_channel index2;
1109 union tgsi_exec_channel indir_index;
1110 const uint execmask = mach->ExecMask;
1111 uint i;
1112
1113 /* which address register (always zero now) */
1114 index2.i[0] =
1115 index2.i[1] =
1116 index2.i[2] =
1117 index2.i[3] = reg->SrcRegisterInd.Index;
1118
1119 /* get current value of address register[swizzle] */
1120 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1121 fetch_src_file_channel(
1122 mach,
1123 reg->SrcRegisterInd.File,
1124 swizzle,
1125 &index2,
1126 &indir_index );
1127
1128 /* add value of address register to the offset */
1129 index.i[0] += (int) indir_index.f[0];
1130 index.i[1] += (int) indir_index.f[1];
1131 index.i[2] += (int) indir_index.f[2];
1132 index.i[3] += (int) indir_index.f[3];
1133
1134 /* for disabled execution channels, zero-out the index to
1135 * avoid using a potential garbage value.
1136 */
1137 for (i = 0; i < QUAD_SIZE; i++) {
1138 if ((execmask & (1 << i)) == 0)
1139 index.i[i] = 0;
1140 }
1141 }
1142
1143 /* There is an extra source register that is a second
1144 * subscript to a register file. Effectively it means that
1145 * the register file is actually a 2D array of registers.
1146 *
1147 * file[1][3] == file[1*sizeof(file[1])+3],
1148 * where:
1149 * [3] = SrcRegisterDim.Index
1150 */
1151 if (reg->SrcRegister.Dimension) {
1152 /* The size of the first-order array depends on the register file type.
1153 * We need to multiply the index to the first array to get an effective,
1154 * "flat" index that points to the beginning of the second-order array.
1155 */
1156 switch (reg->SrcRegister.File) {
1157 case TGSI_FILE_INPUT:
1158 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1159 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1160 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1161 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1162 break;
1163 case TGSI_FILE_CONSTANT:
1164 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1165 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1166 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1167 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1168 break;
1169 default:
1170 assert( 0 );
1171 }
1172
1173 index.i[0] += reg->SrcRegisterDim.Index;
1174 index.i[1] += reg->SrcRegisterDim.Index;
1175 index.i[2] += reg->SrcRegisterDim.Index;
1176 index.i[3] += reg->SrcRegisterDim.Index;
1177
1178 /* Again, the second subscript index can be addressed indirectly
1179 * identically to the first one.
1180 * Nothing stops us from indirectly addressing the indirect register,
1181 * but there is no need for that, so we won't exercise it.
1182 *
1183 * file[1][ind[4].y+3],
1184 * where:
1185 * ind = SrcRegisterDimInd.File
1186 * [4] = SrcRegisterDimInd.Index
1187 * .y = SrcRegisterDimInd.SwizzleX
1188 */
1189 if (reg->SrcRegisterDim.Indirect) {
1190 union tgsi_exec_channel index2;
1191 union tgsi_exec_channel indir_index;
1192 const uint execmask = mach->ExecMask;
1193 uint i;
1194
1195 index2.i[0] =
1196 index2.i[1] =
1197 index2.i[2] =
1198 index2.i[3] = reg->SrcRegisterDimInd.Index;
1199
1200 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1201 fetch_src_file_channel(
1202 mach,
1203 reg->SrcRegisterDimInd.File,
1204 swizzle,
1205 &index2,
1206 &indir_index );
1207
1208 index.i[0] += (int) indir_index.f[0];
1209 index.i[1] += (int) indir_index.f[1];
1210 index.i[2] += (int) indir_index.f[2];
1211 index.i[3] += (int) indir_index.f[3];
1212
1213 /* for disabled execution channels, zero-out the index to
1214 * avoid using a potential garbage value.
1215 */
1216 for (i = 0; i < QUAD_SIZE; i++) {
1217 if ((execmask & (1 << i)) == 0)
1218 index.i[i] = 0;
1219 }
1220 }
1221
1222 /* If by any chance there was a need for a 3D array of register
1223 * files, we would have to check whether SrcRegisterDim is followed
1224 * by a dimension register and continue the saga.
1225 */
1226 }
1227
1228 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1229 fetch_src_file_channel(
1230 mach,
1231 reg->SrcRegister.File,
1232 swizzle,
1233 &index,
1234 chan );
1235
1236 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1237 case TGSI_UTIL_SIGN_CLEAR:
1238 micro_abs( chan, chan );
1239 break;
1240
1241 case TGSI_UTIL_SIGN_SET:
1242 micro_abs( chan, chan );
1243 micro_neg( chan, chan );
1244 break;
1245
1246 case TGSI_UTIL_SIGN_TOGGLE:
1247 micro_neg( chan, chan );
1248 break;
1249
1250 case TGSI_UTIL_SIGN_KEEP:
1251 break;
1252 }
1253
1254 if (reg->SrcRegisterExtMod.Complement) {
1255 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1256 }
1257 }
1258
1259 static void
1260 store_dest(
1261 struct tgsi_exec_machine *mach,
1262 const union tgsi_exec_channel *chan,
1263 const struct tgsi_full_dst_register *reg,
1264 const struct tgsi_full_instruction *inst,
1265 uint chan_index )
1266 {
1267 uint i;
1268 union tgsi_exec_channel null;
1269 union tgsi_exec_channel *dst;
1270 uint execmask = mach->ExecMask;
1271
1272 switch (reg->DstRegister.File) {
1273 case TGSI_FILE_NULL:
1274 dst = &null;
1275 break;
1276
1277 case TGSI_FILE_OUTPUT:
1278 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1279 + reg->DstRegister.Index].xyzw[chan_index];
1280 break;
1281
1282 case TGSI_FILE_TEMPORARY:
1283 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1284 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1285 break;
1286
1287 case TGSI_FILE_ADDRESS:
1288 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1289 break;
1290
1291 default:
1292 assert( 0 );
1293 return;
1294 }
1295
1296 if (inst->InstructionExtNv.CondFlowEnable) {
1297 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1298 uint swizzle;
1299 uint shift;
1300 uint mask;
1301 uint test;
1302
1303 /* Only CC0 supported.
1304 */
1305 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1306
1307 switch (chan_index) {
1308 case CHAN_X:
1309 swizzle = inst->InstructionExtNv.CondSwizzleX;
1310 break;
1311 case CHAN_Y:
1312 swizzle = inst->InstructionExtNv.CondSwizzleY;
1313 break;
1314 case CHAN_Z:
1315 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1316 break;
1317 case CHAN_W:
1318 swizzle = inst->InstructionExtNv.CondSwizzleW;
1319 break;
1320 default:
1321 assert( 0 );
1322 return;
1323 }
1324
1325 switch (swizzle) {
1326 case TGSI_SWIZZLE_X:
1327 shift = TGSI_EXEC_CC_X_SHIFT;
1328 mask = TGSI_EXEC_CC_X_MASK;
1329 break;
1330 case TGSI_SWIZZLE_Y:
1331 shift = TGSI_EXEC_CC_Y_SHIFT;
1332 mask = TGSI_EXEC_CC_Y_MASK;
1333 break;
1334 case TGSI_SWIZZLE_Z:
1335 shift = TGSI_EXEC_CC_Z_SHIFT;
1336 mask = TGSI_EXEC_CC_Z_MASK;
1337 break;
1338 case TGSI_SWIZZLE_W:
1339 shift = TGSI_EXEC_CC_W_SHIFT;
1340 mask = TGSI_EXEC_CC_W_MASK;
1341 break;
1342 default:
1343 assert( 0 );
1344 return;
1345 }
1346
1347 switch (inst->InstructionExtNv.CondMask) {
1348 case TGSI_CC_GT:
1349 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1350 for (i = 0; i < QUAD_SIZE; i++)
1351 if (cc->u[i] & test)
1352 execmask &= ~(1 << i);
1353 break;
1354
1355 case TGSI_CC_EQ:
1356 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1357 for (i = 0; i < QUAD_SIZE; i++)
1358 if (cc->u[i] & test)
1359 execmask &= ~(1 << i);
1360 break;
1361
1362 case TGSI_CC_LT:
1363 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1364 for (i = 0; i < QUAD_SIZE; i++)
1365 if (cc->u[i] & test)
1366 execmask &= ~(1 << i);
1367 break;
1368
1369 case TGSI_CC_GE:
1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1371 for (i = 0; i < QUAD_SIZE; i++)
1372 if (cc->u[i] & test)
1373 execmask &= ~(1 << i);
1374 break;
1375
1376 case TGSI_CC_LE:
1377 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1378 for (i = 0; i < QUAD_SIZE; i++)
1379 if (cc->u[i] & test)
1380 execmask &= ~(1 << i);
1381 break;
1382
1383 case TGSI_CC_NE:
1384 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1385 for (i = 0; i < QUAD_SIZE; i++)
1386 if (cc->u[i] & test)
1387 execmask &= ~(1 << i);
1388 break;
1389
1390 case TGSI_CC_TR:
1391 break;
1392
1393 case TGSI_CC_FL:
1394 for (i = 0; i < QUAD_SIZE; i++)
1395 execmask &= ~(1 << i);
1396 break;
1397
1398 default:
1399 assert( 0 );
1400 return;
1401 }
1402 }
1403
1404 switch (inst->Instruction.Saturate) {
1405 case TGSI_SAT_NONE:
1406 for (i = 0; i < QUAD_SIZE; i++)
1407 if (execmask & (1 << i))
1408 dst->i[i] = chan->i[i];
1409 break;
1410
1411 case TGSI_SAT_ZERO_ONE:
1412 for (i = 0; i < QUAD_SIZE; i++)
1413 if (execmask & (1 << i)) {
1414 if (chan->f[i] < 0.0f)
1415 dst->f[i] = 0.0f;
1416 else if (chan->f[i] > 1.0f)
1417 dst->f[i] = 1.0f;
1418 else
1419 dst->i[i] = chan->i[i];
1420 }
1421 break;
1422
1423 case TGSI_SAT_MINUS_PLUS_ONE:
1424 for (i = 0; i < QUAD_SIZE; i++)
1425 if (execmask & (1 << i)) {
1426 if (chan->f[i] < -1.0f)
1427 dst->f[i] = -1.0f;
1428 else if (chan->f[i] > 1.0f)
1429 dst->f[i] = 1.0f;
1430 else
1431 dst->i[i] = chan->i[i];
1432 }
1433 break;
1434
1435 default:
1436 assert( 0 );
1437 }
1438
1439 if (inst->InstructionExtNv.CondDstUpdate) {
1440 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1441 uint shift;
1442 uint mask;
1443
1444 /* Only CC0 supported.
1445 */
1446 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1447
1448 switch (chan_index) {
1449 case CHAN_X:
1450 shift = TGSI_EXEC_CC_X_SHIFT;
1451 mask = ~TGSI_EXEC_CC_X_MASK;
1452 break;
1453 case CHAN_Y:
1454 shift = TGSI_EXEC_CC_Y_SHIFT;
1455 mask = ~TGSI_EXEC_CC_Y_MASK;
1456 break;
1457 case CHAN_Z:
1458 shift = TGSI_EXEC_CC_Z_SHIFT;
1459 mask = ~TGSI_EXEC_CC_Z_MASK;
1460 break;
1461 case CHAN_W:
1462 shift = TGSI_EXEC_CC_W_SHIFT;
1463 mask = ~TGSI_EXEC_CC_W_MASK;
1464 break;
1465 default:
1466 assert( 0 );
1467 return;
1468 }
1469
1470 for (i = 0; i < QUAD_SIZE; i++)
1471 if (execmask & (1 << i)) {
1472 cc->u[i] &= mask;
1473 if (dst->f[i] < 0.0f)
1474 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1475 else if (dst->f[i] > 0.0f)
1476 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1477 else if (dst->f[i] == 0.0f)
1478 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1479 else
1480 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1481 }
1482 }
1483 }
1484
1485 #define FETCH(VAL,INDEX,CHAN)\
1486 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1487
1488 #define STORE(VAL,INDEX,CHAN)\
1489 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1490
1491
1492 /**
1493 * Execute ARB-style KIL which is predicated by a src register.
1494 * Kill fragment if any of the four values is less than zero.
1495 */
1496 static void
1497 exec_kil(struct tgsi_exec_machine *mach,
1498 const struct tgsi_full_instruction *inst)
1499 {
1500 uint uniquemask;
1501 uint chan_index;
1502 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1503 union tgsi_exec_channel r[1];
1504
1505 /* This mask stores component bits that were already tested. Note that
1506 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1507 * tested. */
1508 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1509
1510 for (chan_index = 0; chan_index < 4; chan_index++)
1511 {
1512 uint swizzle;
1513 uint i;
1514
1515 /* unswizzle channel */
1516 swizzle = tgsi_util_get_full_src_register_extswizzle (
1517 &inst->FullSrcRegisters[0],
1518 chan_index);
1519
1520 /* check if the component has not been already tested */
1521 if (uniquemask & (1 << swizzle))
1522 continue;
1523 uniquemask |= 1 << swizzle;
1524
1525 FETCH(&r[0], 0, chan_index);
1526 for (i = 0; i < 4; i++)
1527 if (r[0].f[i] < 0.0f)
1528 kilmask |= 1 << i;
1529 }
1530
1531 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1532 }
1533
1534 /**
1535 * Execute NVIDIA-style KIL which is predicated by a condition code.
1536 * Kill fragment if the condition code is TRUE.
1537 */
1538 static void
1539 exec_kilp(struct tgsi_exec_machine *mach,
1540 const struct tgsi_full_instruction *inst)
1541 {
1542 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1543
1544 if (inst->InstructionExtNv.CondFlowEnable) {
1545 uint swizzle[4];
1546 uint chan_index;
1547
1548 kilmask = 0x0;
1549
1550 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1551 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1552 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1553 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1554
1555 for (chan_index = 0; chan_index < 4; chan_index++)
1556 {
1557 uint i;
1558
1559 for (i = 0; i < 4; i++) {
1560 /* TODO: evaluate the condition code */
1561 if (0)
1562 kilmask |= 1 << i;
1563 }
1564 }
1565 }
1566 else {
1567 /* "unconditional" kil */
1568 kilmask = mach->ExecMask;
1569 }
1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1571 }
1572
1573
1574 /*
1575 * Fetch a four texture samples using STR texture coordinates.
1576 */
1577 static void
1578 fetch_texel( struct tgsi_sampler *sampler,
1579 const union tgsi_exec_channel *s,
1580 const union tgsi_exec_channel *t,
1581 const union tgsi_exec_channel *p,
1582 float lodbias, /* XXX should be float[4] */
1583 union tgsi_exec_channel *r,
1584 union tgsi_exec_channel *g,
1585 union tgsi_exec_channel *b,
1586 union tgsi_exec_channel *a )
1587 {
1588 uint j;
1589 float rgba[NUM_CHANNELS][QUAD_SIZE];
1590
1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1592
1593 for (j = 0; j < 4; j++) {
1594 r->f[j] = rgba[0][j];
1595 g->f[j] = rgba[1][j];
1596 b->f[j] = rgba[2][j];
1597 a->f[j] = rgba[3][j];
1598 }
1599 }
1600
1601
1602 static void
1603 exec_tex(struct tgsi_exec_machine *mach,
1604 const struct tgsi_full_instruction *inst,
1605 boolean biasLod,
1606 boolean projected)
1607 {
1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1609 union tgsi_exec_channel r[4];
1610 uint chan_index;
1611 float lodBias;
1612
1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1614
1615 switch (inst->InstructionExtTexture.Texture) {
1616 case TGSI_TEXTURE_1D:
1617
1618 FETCH(&r[0], 0, CHAN_X);
1619
1620 if (projected) {
1621 FETCH(&r[1], 0, CHAN_W);
1622 micro_div( &r[0], &r[0], &r[1] );
1623 }
1624
1625 if (biasLod) {
1626 FETCH(&r[1], 0, CHAN_W);
1627 lodBias = r[2].f[0];
1628 }
1629 else
1630 lodBias = 0.0;
1631
1632 fetch_texel(mach->Samplers[unit],
1633 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1634 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1635 break;
1636
1637 case TGSI_TEXTURE_2D:
1638 case TGSI_TEXTURE_RECT:
1639
1640 FETCH(&r[0], 0, CHAN_X);
1641 FETCH(&r[1], 0, CHAN_Y);
1642 FETCH(&r[2], 0, CHAN_Z);
1643
1644 if (projected) {
1645 FETCH(&r[3], 0, CHAN_W);
1646 micro_div( &r[0], &r[0], &r[3] );
1647 micro_div( &r[1], &r[1], &r[3] );
1648 micro_div( &r[2], &r[2], &r[3] );
1649 }
1650
1651 if (biasLod) {
1652 FETCH(&r[3], 0, CHAN_W);
1653 lodBias = r[3].f[0];
1654 }
1655 else
1656 lodBias = 0.0;
1657
1658 fetch_texel(mach->Samplers[unit],
1659 &r[0], &r[1], &r[2], lodBias, /* inputs */
1660 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1661 break;
1662
1663 case TGSI_TEXTURE_3D:
1664 case TGSI_TEXTURE_CUBE:
1665
1666 FETCH(&r[0], 0, CHAN_X);
1667 FETCH(&r[1], 0, CHAN_Y);
1668 FETCH(&r[2], 0, CHAN_Z);
1669
1670 if (projected) {
1671 FETCH(&r[3], 0, CHAN_W);
1672 micro_div( &r[0], &r[0], &r[3] );
1673 micro_div( &r[1], &r[1], &r[3] );
1674 micro_div( &r[2], &r[2], &r[3] );
1675 }
1676
1677 if (biasLod) {
1678 FETCH(&r[3], 0, CHAN_W);
1679 lodBias = r[3].f[0];
1680 }
1681 else
1682 lodBias = 0.0;
1683
1684 fetch_texel(mach->Samplers[unit],
1685 &r[0], &r[1], &r[2], lodBias,
1686 &r[0], &r[1], &r[2], &r[3]);
1687 break;
1688
1689 default:
1690 assert (0);
1691 }
1692
1693 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1694 STORE( &r[chan_index], 0, chan_index );
1695 }
1696 }
1697
1698
1699 /**
1700 * Evaluate a constant-valued coefficient at the position of the
1701 * current quad.
1702 */
1703 static void
1704 eval_constant_coef(
1705 struct tgsi_exec_machine *mach,
1706 unsigned attrib,
1707 unsigned chan )
1708 {
1709 unsigned i;
1710
1711 for( i = 0; i < QUAD_SIZE; i++ ) {
1712 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1713 }
1714 }
1715
1716 /**
1717 * Evaluate a linear-valued coefficient at the position of the
1718 * current quad.
1719 */
1720 static void
1721 eval_linear_coef(
1722 struct tgsi_exec_machine *mach,
1723 unsigned attrib,
1724 unsigned chan )
1725 {
1726 const float x = mach->QuadPos.xyzw[0].f[0];
1727 const float y = mach->QuadPos.xyzw[1].f[0];
1728 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1729 const float dady = mach->InterpCoefs[attrib].dady[chan];
1730 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1731 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1732 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1733 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1734 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1735 }
1736
1737 /**
1738 * Evaluate a perspective-valued coefficient at the position of the
1739 * current quad.
1740 */
1741 static void
1742 eval_perspective_coef(
1743 struct tgsi_exec_machine *mach,
1744 unsigned attrib,
1745 unsigned chan )
1746 {
1747 const float x = mach->QuadPos.xyzw[0].f[0];
1748 const float y = mach->QuadPos.xyzw[1].f[0];
1749 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1750 const float dady = mach->InterpCoefs[attrib].dady[chan];
1751 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1752 const float *w = mach->QuadPos.xyzw[3].f;
1753 /* divide by W here */
1754 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1755 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1756 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1757 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1758 }
1759
1760
1761 typedef void (* eval_coef_func)(
1762 struct tgsi_exec_machine *mach,
1763 unsigned attrib,
1764 unsigned chan );
1765
1766 static void
1767 exec_declaration(
1768 struct tgsi_exec_machine *mach,
1769 const struct tgsi_full_declaration *decl )
1770 {
1771 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1772 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1773 unsigned first, last, mask;
1774 eval_coef_func eval;
1775
1776 first = decl->DeclarationRange.First;
1777 last = decl->DeclarationRange.Last;
1778 mask = decl->Declaration.UsageMask;
1779
1780 switch( decl->Declaration.Interpolate ) {
1781 case TGSI_INTERPOLATE_CONSTANT:
1782 eval = eval_constant_coef;
1783 break;
1784
1785 case TGSI_INTERPOLATE_LINEAR:
1786 eval = eval_linear_coef;
1787 break;
1788
1789 case TGSI_INTERPOLATE_PERSPECTIVE:
1790 eval = eval_perspective_coef;
1791 break;
1792
1793 default:
1794 eval = NULL;
1795 assert( 0 );
1796 }
1797
1798 if( mask == TGSI_WRITEMASK_XYZW ) {
1799 unsigned i, j;
1800
1801 for( i = first; i <= last; i++ ) {
1802 for( j = 0; j < NUM_CHANNELS; j++ ) {
1803 eval( mach, i, j );
1804 }
1805 }
1806 }
1807 else {
1808 unsigned i, j;
1809
1810 for( j = 0; j < NUM_CHANNELS; j++ ) {
1811 if( mask & (1 << j) ) {
1812 for( i = first; i <= last; i++ ) {
1813 eval( mach, i, j );
1814 }
1815 }
1816 }
1817 }
1818 }
1819 }
1820 }
1821
1822 static void
1823 exec_instruction(
1824 struct tgsi_exec_machine *mach,
1825 const struct tgsi_full_instruction *inst,
1826 int *pc )
1827 {
1828 uint chan_index;
1829 union tgsi_exec_channel r[8];
1830
1831 (*pc)++;
1832
1833 switch (inst->Instruction.Opcode) {
1834 case TGSI_OPCODE_ARL:
1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1836 FETCH( &r[0], 0, chan_index );
1837 micro_flr( &r[0], &r[0] );
1838 STORE( &r[0], 0, chan_index );
1839 }
1840 break;
1841
1842 case TGSI_OPCODE_MOV:
1843 case TGSI_OPCODE_SWZ:
1844 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1845 FETCH( &r[0], 0, chan_index );
1846 STORE( &r[0], 0, chan_index );
1847 }
1848 break;
1849
1850 case TGSI_OPCODE_LIT:
1851 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1852 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1853 }
1854
1855 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1856 FETCH( &r[0], 0, CHAN_X );
1857 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1858 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1859 STORE( &r[0], 0, CHAN_Y );
1860 }
1861
1862 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1863 FETCH( &r[1], 0, CHAN_Y );
1864 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1865
1866 FETCH( &r[2], 0, CHAN_W );
1867 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1868 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1869 micro_pow( &r[1], &r[1], &r[2] );
1870 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1871 STORE( &r[0], 0, CHAN_Z );
1872 }
1873 }
1874
1875 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1876 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1877 }
1878 break;
1879
1880 case TGSI_OPCODE_RCP:
1881 /* TGSI_OPCODE_RECIP */
1882 FETCH( &r[0], 0, CHAN_X );
1883 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1884 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1885 STORE( &r[0], 0, chan_index );
1886 }
1887 break;
1888
1889 case TGSI_OPCODE_RSQ:
1890 /* TGSI_OPCODE_RECIPSQRT */
1891 FETCH( &r[0], 0, CHAN_X );
1892 micro_abs( &r[0], &r[0] );
1893 micro_sqrt( &r[0], &r[0] );
1894 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1895 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1896 STORE( &r[0], 0, chan_index );
1897 }
1898 break;
1899
1900 case TGSI_OPCODE_EXP:
1901 FETCH( &r[0], 0, CHAN_X );
1902 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1903 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1904 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1905 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1906 }
1907 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1908 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1909 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1910 }
1911 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1912 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1913 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1914 }
1915 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1916 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1917 }
1918 break;
1919
1920 case TGSI_OPCODE_LOG:
1921 FETCH( &r[0], 0, CHAN_X );
1922 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1923 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1924 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1925 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1926 STORE( &r[0], 0, CHAN_X );
1927 }
1928 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1929 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1930 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1931 STORE( &r[0], 0, CHAN_Y );
1932 }
1933 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1934 STORE( &r[1], 0, CHAN_Z );
1935 }
1936 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1937 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1938 }
1939 break;
1940
1941 case TGSI_OPCODE_MUL:
1942 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1943 {
1944 FETCH(&r[0], 0, chan_index);
1945 FETCH(&r[1], 1, chan_index);
1946
1947 micro_mul( &r[0], &r[0], &r[1] );
1948
1949 STORE(&r[0], 0, chan_index);
1950 }
1951 break;
1952
1953 case TGSI_OPCODE_ADD:
1954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1955 FETCH( &r[0], 0, chan_index );
1956 FETCH( &r[1], 1, chan_index );
1957 micro_add( &r[0], &r[0], &r[1] );
1958 STORE( &r[0], 0, chan_index );
1959 }
1960 break;
1961
1962 case TGSI_OPCODE_DP3:
1963 /* TGSI_OPCODE_DOT3 */
1964 FETCH( &r[0], 0, CHAN_X );
1965 FETCH( &r[1], 1, CHAN_X );
1966 micro_mul( &r[0], &r[0], &r[1] );
1967
1968 FETCH( &r[1], 0, CHAN_Y );
1969 FETCH( &r[2], 1, CHAN_Y );
1970 micro_mul( &r[1], &r[1], &r[2] );
1971 micro_add( &r[0], &r[0], &r[1] );
1972
1973 FETCH( &r[1], 0, CHAN_Z );
1974 FETCH( &r[2], 1, CHAN_Z );
1975 micro_mul( &r[1], &r[1], &r[2] );
1976 micro_add( &r[0], &r[0], &r[1] );
1977
1978 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1979 STORE( &r[0], 0, chan_index );
1980 }
1981 break;
1982
1983 case TGSI_OPCODE_DP4:
1984 /* TGSI_OPCODE_DOT4 */
1985 FETCH(&r[0], 0, CHAN_X);
1986 FETCH(&r[1], 1, CHAN_X);
1987
1988 micro_mul( &r[0], &r[0], &r[1] );
1989
1990 FETCH(&r[1], 0, CHAN_Y);
1991 FETCH(&r[2], 1, CHAN_Y);
1992
1993 micro_mul( &r[1], &r[1], &r[2] );
1994 micro_add( &r[0], &r[0], &r[1] );
1995
1996 FETCH(&r[1], 0, CHAN_Z);
1997 FETCH(&r[2], 1, CHAN_Z);
1998
1999 micro_mul( &r[1], &r[1], &r[2] );
2000 micro_add( &r[0], &r[0], &r[1] );
2001
2002 FETCH(&r[1], 0, CHAN_W);
2003 FETCH(&r[2], 1, CHAN_W);
2004
2005 micro_mul( &r[1], &r[1], &r[2] );
2006 micro_add( &r[0], &r[0], &r[1] );
2007
2008 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2009 STORE( &r[0], 0, chan_index );
2010 }
2011 break;
2012
2013 case TGSI_OPCODE_DST:
2014 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2015 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2016 }
2017
2018 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2019 FETCH( &r[0], 0, CHAN_Y );
2020 FETCH( &r[1], 1, CHAN_Y);
2021 micro_mul( &r[0], &r[0], &r[1] );
2022 STORE( &r[0], 0, CHAN_Y );
2023 }
2024
2025 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2026 FETCH( &r[0], 0, CHAN_Z );
2027 STORE( &r[0], 0, CHAN_Z );
2028 }
2029
2030 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2031 FETCH( &r[0], 1, CHAN_W );
2032 STORE( &r[0], 0, CHAN_W );
2033 }
2034 break;
2035
2036 case TGSI_OPCODE_MIN:
2037 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2038 FETCH(&r[0], 0, chan_index);
2039 FETCH(&r[1], 1, chan_index);
2040
2041 /* XXX use micro_min()?? */
2042 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2043
2044 STORE(&r[0], 0, chan_index);
2045 }
2046 break;
2047
2048 case TGSI_OPCODE_MAX:
2049 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2050 FETCH(&r[0], 0, chan_index);
2051 FETCH(&r[1], 1, chan_index);
2052
2053 /* XXX use micro_max()?? */
2054 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2055
2056 STORE(&r[0], 0, chan_index );
2057 }
2058 break;
2059
2060 case TGSI_OPCODE_SLT:
2061 /* TGSI_OPCODE_SETLT */
2062 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2063 FETCH( &r[0], 0, chan_index );
2064 FETCH( &r[1], 1, chan_index );
2065 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2066 STORE( &r[0], 0, chan_index );
2067 }
2068 break;
2069
2070 case TGSI_OPCODE_SGE:
2071 /* TGSI_OPCODE_SETGE */
2072 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2073 FETCH( &r[0], 0, chan_index );
2074 FETCH( &r[1], 1, chan_index );
2075 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2076 STORE( &r[0], 0, chan_index );
2077 }
2078 break;
2079
2080 case TGSI_OPCODE_MAD:
2081 /* TGSI_OPCODE_MADD */
2082 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2083 FETCH( &r[0], 0, chan_index );
2084 FETCH( &r[1], 1, chan_index );
2085 micro_mul( &r[0], &r[0], &r[1] );
2086 FETCH( &r[1], 2, chan_index );
2087 micro_add( &r[0], &r[0], &r[1] );
2088 STORE( &r[0], 0, chan_index );
2089 }
2090 break;
2091
2092 case TGSI_OPCODE_SUB:
2093 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2094 FETCH(&r[0], 0, chan_index);
2095 FETCH(&r[1], 1, chan_index);
2096
2097 micro_sub( &r[0], &r[0], &r[1] );
2098
2099 STORE(&r[0], 0, chan_index);
2100 }
2101 break;
2102
2103 case TGSI_OPCODE_LERP:
2104 /* TGSI_OPCODE_LRP */
2105 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2106 FETCH(&r[0], 0, chan_index);
2107 FETCH(&r[1], 1, chan_index);
2108 FETCH(&r[2], 2, chan_index);
2109
2110 micro_sub( &r[1], &r[1], &r[2] );
2111 micro_mul( &r[0], &r[0], &r[1] );
2112 micro_add( &r[0], &r[0], &r[2] );
2113
2114 STORE(&r[0], 0, chan_index);
2115 }
2116 break;
2117
2118 case TGSI_OPCODE_CND:
2119 assert (0);
2120 break;
2121
2122 case TGSI_OPCODE_CND0:
2123 assert (0);
2124 break;
2125
2126 case TGSI_OPCODE_DOT2ADD:
2127 /* TGSI_OPCODE_DP2A */
2128 FETCH( &r[0], 0, CHAN_X );
2129 FETCH( &r[1], 1, CHAN_X );
2130 micro_mul( &r[0], &r[0], &r[1] );
2131
2132 FETCH( &r[1], 0, CHAN_Y );
2133 FETCH( &r[2], 1, CHAN_Y );
2134 micro_mul( &r[1], &r[1], &r[2] );
2135 micro_add( &r[0], &r[0], &r[1] );
2136
2137 FETCH( &r[2], 2, CHAN_X );
2138 micro_add( &r[0], &r[0], &r[2] );
2139
2140 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2141 STORE( &r[0], 0, chan_index );
2142 }
2143 break;
2144
2145 case TGSI_OPCODE_INDEX:
2146 assert (0);
2147 break;
2148
2149 case TGSI_OPCODE_NEGATE:
2150 assert (0);
2151 break;
2152
2153 case TGSI_OPCODE_FRAC:
2154 /* TGSI_OPCODE_FRC */
2155 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2156 FETCH( &r[0], 0, chan_index );
2157 micro_frc( &r[0], &r[0] );
2158 STORE( &r[0], 0, chan_index );
2159 }
2160 break;
2161
2162 case TGSI_OPCODE_CLAMP:
2163 assert (0);
2164 break;
2165
2166 case TGSI_OPCODE_FLOOR:
2167 /* TGSI_OPCODE_FLR */
2168 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2169 FETCH( &r[0], 0, chan_index );
2170 micro_flr( &r[0], &r[0] );
2171 STORE( &r[0], 0, chan_index );
2172 }
2173 break;
2174
2175 case TGSI_OPCODE_ROUND:
2176 case TGSI_OPCODE_ARR:
2177 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2178 FETCH( &r[0], 0, chan_index );
2179 micro_rnd( &r[0], &r[0] );
2180 STORE( &r[0], 0, chan_index );
2181 }
2182 break;
2183
2184 case TGSI_OPCODE_EXPBASE2:
2185 /* TGSI_OPCODE_EX2 */
2186 FETCH(&r[0], 0, CHAN_X);
2187
2188 #if FAST_MATH
2189 micro_exp2( &r[0], &r[0] );
2190 #else
2191 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2192 #endif
2193
2194 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2195 STORE( &r[0], 0, chan_index );
2196 }
2197 break;
2198
2199 case TGSI_OPCODE_LOGBASE2:
2200 /* TGSI_OPCODE_LG2 */
2201 FETCH( &r[0], 0, CHAN_X );
2202 micro_lg2( &r[0], &r[0] );
2203 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2204 STORE( &r[0], 0, chan_index );
2205 }
2206 break;
2207
2208 case TGSI_OPCODE_POWER:
2209 /* TGSI_OPCODE_POW */
2210 FETCH(&r[0], 0, CHAN_X);
2211 FETCH(&r[1], 1, CHAN_X);
2212
2213 micro_pow( &r[0], &r[0], &r[1] );
2214
2215 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2216 STORE( &r[0], 0, chan_index );
2217 }
2218 break;
2219
2220 case TGSI_OPCODE_CROSSPRODUCT:
2221 /* TGSI_OPCODE_XPD */
2222 FETCH(&r[0], 0, CHAN_Y);
2223 FETCH(&r[1], 1, CHAN_Z);
2224
2225 micro_mul( &r[2], &r[0], &r[1] );
2226
2227 FETCH(&r[3], 0, CHAN_Z);
2228 FETCH(&r[4], 1, CHAN_Y);
2229
2230 micro_mul( &r[5], &r[3], &r[4] );
2231 micro_sub( &r[2], &r[2], &r[5] );
2232
2233 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2234 STORE( &r[2], 0, CHAN_X );
2235 }
2236
2237 FETCH(&r[2], 1, CHAN_X);
2238
2239 micro_mul( &r[3], &r[3], &r[2] );
2240
2241 FETCH(&r[5], 0, CHAN_X);
2242
2243 micro_mul( &r[1], &r[1], &r[5] );
2244 micro_sub( &r[3], &r[3], &r[1] );
2245
2246 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2247 STORE( &r[3], 0, CHAN_Y );
2248 }
2249
2250 micro_mul( &r[5], &r[5], &r[4] );
2251 micro_mul( &r[0], &r[0], &r[2] );
2252 micro_sub( &r[5], &r[5], &r[0] );
2253
2254 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2255 STORE( &r[5], 0, CHAN_Z );
2256 }
2257
2258 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2259 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2260 }
2261 break;
2262
2263 case TGSI_OPCODE_MULTIPLYMATRIX:
2264 assert (0);
2265 break;
2266
2267 case TGSI_OPCODE_ABS:
2268 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2269 FETCH(&r[0], 0, chan_index);
2270
2271 micro_abs( &r[0], &r[0] );
2272
2273 STORE(&r[0], 0, chan_index);
2274 }
2275 break;
2276
2277 case TGSI_OPCODE_RCC:
2278 assert (0);
2279 break;
2280
2281 case TGSI_OPCODE_DPH:
2282 FETCH(&r[0], 0, CHAN_X);
2283 FETCH(&r[1], 1, CHAN_X);
2284
2285 micro_mul( &r[0], &r[0], &r[1] );
2286
2287 FETCH(&r[1], 0, CHAN_Y);
2288 FETCH(&r[2], 1, CHAN_Y);
2289
2290 micro_mul( &r[1], &r[1], &r[2] );
2291 micro_add( &r[0], &r[0], &r[1] );
2292
2293 FETCH(&r[1], 0, CHAN_Z);
2294 FETCH(&r[2], 1, CHAN_Z);
2295
2296 micro_mul( &r[1], &r[1], &r[2] );
2297 micro_add( &r[0], &r[0], &r[1] );
2298
2299 FETCH(&r[1], 1, CHAN_W);
2300
2301 micro_add( &r[0], &r[0], &r[1] );
2302
2303 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2304 STORE( &r[0], 0, chan_index );
2305 }
2306 break;
2307
2308 case TGSI_OPCODE_COS:
2309 FETCH(&r[0], 0, CHAN_X);
2310
2311 micro_cos( &r[0], &r[0] );
2312
2313 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2314 STORE( &r[0], 0, chan_index );
2315 }
2316 break;
2317
2318 case TGSI_OPCODE_DDX:
2319 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2320 FETCH( &r[0], 0, chan_index );
2321 micro_ddx( &r[0], &r[0] );
2322 STORE( &r[0], 0, chan_index );
2323 }
2324 break;
2325
2326 case TGSI_OPCODE_DDY:
2327 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2328 FETCH( &r[0], 0, chan_index );
2329 micro_ddy( &r[0], &r[0] );
2330 STORE( &r[0], 0, chan_index );
2331 }
2332 break;
2333
2334 case TGSI_OPCODE_KILP:
2335 exec_kilp (mach, inst);
2336 break;
2337
2338 case TGSI_OPCODE_KIL:
2339 exec_kil (mach, inst);
2340 break;
2341
2342 case TGSI_OPCODE_PK2H:
2343 assert (0);
2344 break;
2345
2346 case TGSI_OPCODE_PK2US:
2347 assert (0);
2348 break;
2349
2350 case TGSI_OPCODE_PK4B:
2351 assert (0);
2352 break;
2353
2354 case TGSI_OPCODE_PK4UB:
2355 assert (0);
2356 break;
2357
2358 case TGSI_OPCODE_RFL:
2359 assert (0);
2360 break;
2361
2362 case TGSI_OPCODE_SEQ:
2363 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2364 FETCH( &r[0], 0, chan_index );
2365 FETCH( &r[1], 1, chan_index );
2366 micro_eq( &r[0], &r[0], &r[1],
2367 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2368 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2369 STORE( &r[0], 0, chan_index );
2370 }
2371 break;
2372
2373 case TGSI_OPCODE_SFL:
2374 assert (0);
2375 break;
2376
2377 case TGSI_OPCODE_SGT:
2378 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2379 FETCH( &r[0], 0, chan_index );
2380 FETCH( &r[1], 1, chan_index );
2381 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2382 STORE( &r[0], 0, chan_index );
2383 }
2384 break;
2385
2386 case TGSI_OPCODE_SIN:
2387 FETCH( &r[0], 0, CHAN_X );
2388 micro_sin( &r[0], &r[0] );
2389 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2390 STORE( &r[0], 0, chan_index );
2391 }
2392 break;
2393
2394 case TGSI_OPCODE_SLE:
2395 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2396 FETCH( &r[0], 0, chan_index );
2397 FETCH( &r[1], 1, chan_index );
2398 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2399 STORE( &r[0], 0, chan_index );
2400 }
2401 break;
2402
2403 case TGSI_OPCODE_SNE:
2404 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2405 FETCH( &r[0], 0, chan_index );
2406 FETCH( &r[1], 1, chan_index );
2407 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2408 STORE( &r[0], 0, chan_index );
2409 }
2410 break;
2411
2412 case TGSI_OPCODE_STR:
2413 assert (0);
2414 break;
2415
2416 case TGSI_OPCODE_TEX:
2417 /* simple texture lookup */
2418 /* src[0] = texcoord */
2419 /* src[1] = sampler unit */
2420 exec_tex(mach, inst, FALSE, FALSE);
2421 break;
2422
2423 case TGSI_OPCODE_TXB:
2424 /* Texture lookup with lod bias */
2425 /* src[0] = texcoord (src[0].w = LOD bias) */
2426 /* src[1] = sampler unit */
2427 exec_tex(mach, inst, TRUE, FALSE);
2428 break;
2429
2430 case TGSI_OPCODE_TXD:
2431 /* Texture lookup with explict partial derivatives */
2432 /* src[0] = texcoord */
2433 /* src[1] = d[strq]/dx */
2434 /* src[2] = d[strq]/dy */
2435 /* src[3] = sampler unit */
2436 assert (0);
2437 break;
2438
2439 case TGSI_OPCODE_TXL:
2440 /* Texture lookup with explit LOD */
2441 /* src[0] = texcoord (src[0].w = LOD) */
2442 /* src[1] = sampler unit */
2443 exec_tex(mach, inst, TRUE, FALSE);
2444 break;
2445
2446 case TGSI_OPCODE_TXP:
2447 /* Texture lookup with projection */
2448 /* src[0] = texcoord (src[0].w = projection) */
2449 /* src[1] = sampler unit */
2450 exec_tex(mach, inst, FALSE, TRUE);
2451 break;
2452
2453 case TGSI_OPCODE_UP2H:
2454 assert (0);
2455 break;
2456
2457 case TGSI_OPCODE_UP2US:
2458 assert (0);
2459 break;
2460
2461 case TGSI_OPCODE_UP4B:
2462 assert (0);
2463 break;
2464
2465 case TGSI_OPCODE_UP4UB:
2466 assert (0);
2467 break;
2468
2469 case TGSI_OPCODE_X2D:
2470 assert (0);
2471 break;
2472
2473 case TGSI_OPCODE_ARA:
2474 assert (0);
2475 break;
2476
2477 case TGSI_OPCODE_BRA:
2478 assert (0);
2479 break;
2480
2481 case TGSI_OPCODE_CAL:
2482 /* skip the call if no execution channels are enabled */
2483 if (mach->ExecMask) {
2484 /* do the call */
2485
2486 /* push the Cond, Loop, Cont stacks */
2487 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2488 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2489 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2490 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2491 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2492 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2493
2494 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2495 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2496
2497 /* note that PC was already incremented above */
2498 mach->CallStack[mach->CallStackTop++] = *pc;
2499 *pc = inst->InstructionExtLabel.Label;
2500 }
2501 break;
2502
2503 case TGSI_OPCODE_RET:
2504 mach->FuncMask &= ~mach->ExecMask;
2505 UPDATE_EXEC_MASK(mach);
2506
2507 if (mach->FuncMask == 0x0) {
2508 /* really return now (otherwise, keep executing */
2509
2510 if (mach->CallStackTop == 0) {
2511 /* returning from main() */
2512 *pc = -1;
2513 return;
2514 }
2515 *pc = mach->CallStack[--mach->CallStackTop];
2516
2517 /* pop the Cond, Loop, Cont stacks */
2518 assert(mach->CondStackTop > 0);
2519 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2520 assert(mach->LoopStackTop > 0);
2521 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2522 assert(mach->ContStackTop > 0);
2523 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2524 assert(mach->FuncStackTop > 0);
2525 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2526
2527 UPDATE_EXEC_MASK(mach);
2528 }
2529 break;
2530
2531 case TGSI_OPCODE_SSG:
2532 /* TGSI_OPCODE_SGN */
2533 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2534 FETCH( &r[0], 0, chan_index );
2535 micro_sgn( &r[0], &r[0] );
2536 STORE( &r[0], 0, chan_index );
2537 }
2538 break;
2539
2540 case TGSI_OPCODE_CMP:
2541 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2542 FETCH(&r[0], 0, chan_index);
2543 FETCH(&r[1], 1, chan_index);
2544 FETCH(&r[2], 2, chan_index);
2545
2546 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2547
2548 STORE(&r[0], 0, chan_index);
2549 }
2550 break;
2551
2552 case TGSI_OPCODE_SCS:
2553 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2554 FETCH( &r[0], 0, CHAN_X );
2555 }
2556 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2557 micro_cos( &r[1], &r[0] );
2558 STORE( &r[1], 0, CHAN_X );
2559 }
2560 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2561 micro_sin( &r[1], &r[0] );
2562 STORE( &r[1], 0, CHAN_Y );
2563 }
2564 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2565 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2566 }
2567 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2568 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2569 }
2570 break;
2571
2572 case TGSI_OPCODE_NRM:
2573 /* 3-component vector normalize */
2574 {
2575 union tgsi_exec_channel tmp, dot;
2576
2577 /* tmp = dp3(src0, src0): */
2578 FETCH( &r[0], 0, CHAN_X );
2579 micro_mul( &tmp, &r[0], &r[0] );
2580
2581 FETCH( &r[1], 0, CHAN_Y );
2582 micro_mul( &dot, &r[1], &r[1] );
2583 micro_add( &tmp, &tmp, &dot );
2584
2585 FETCH( &r[2], 0, CHAN_Z );
2586 micro_mul( &dot, &r[2], &r[2] );
2587 micro_add( &tmp, &tmp, &dot );
2588
2589 /* tmp = 1 / sqrt(tmp) */
2590 micro_sqrt( &tmp, &tmp );
2591 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2592
2593 /* note: w channel is undefined */
2594 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2595 /* chan = chan * tmp */
2596 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2597 STORE( &r[chan_index], 0, chan_index );
2598 }
2599 }
2600 break;
2601
2602 case TGSI_OPCODE_NRM4:
2603 /* 4-component vector normalize */
2604 {
2605 union tgsi_exec_channel tmp, dot;
2606
2607 /* tmp = dp4(src0, src0): */
2608 FETCH( &r[0], 0, CHAN_X );
2609 micro_mul( &tmp, &r[0], &r[0] );
2610
2611 FETCH( &r[1], 0, CHAN_Y );
2612 micro_mul( &dot, &r[1], &r[1] );
2613 micro_add( &tmp, &tmp, &dot );
2614
2615 FETCH( &r[2], 0, CHAN_Z );
2616 micro_mul( &dot, &r[2], &r[2] );
2617 micro_add( &tmp, &tmp, &dot );
2618
2619 FETCH( &r[3], 0, CHAN_W );
2620 micro_mul( &dot, &r[3], &r[3] );
2621 micro_add( &tmp, &tmp, &dot );
2622
2623 /* tmp = 1 / sqrt(tmp) */
2624 micro_sqrt( &tmp, &tmp );
2625 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2626
2627 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2628 /* chan = chan * tmp */
2629 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2630 STORE( &r[chan_index], 0, chan_index );
2631 }
2632 }
2633 break;
2634
2635 case TGSI_OPCODE_DIV:
2636 assert( 0 );
2637 break;
2638
2639 case TGSI_OPCODE_DP2:
2640 FETCH( &r[0], 0, CHAN_X );
2641 FETCH( &r[1], 1, CHAN_X );
2642 micro_mul( &r[0], &r[0], &r[1] );
2643
2644 FETCH( &r[1], 0, CHAN_Y );
2645 FETCH( &r[2], 1, CHAN_Y );
2646 micro_mul( &r[1], &r[1], &r[2] );
2647 micro_add( &r[0], &r[0], &r[1] );
2648
2649 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2650 STORE( &r[0], 0, chan_index );
2651 }
2652 break;
2653
2654 case TGSI_OPCODE_IF:
2655 /* push CondMask */
2656 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2657 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2658 FETCH( &r[0], 0, CHAN_X );
2659 /* update CondMask */
2660 if( ! r[0].u[0] ) {
2661 mach->CondMask &= ~0x1;
2662 }
2663 if( ! r[0].u[1] ) {
2664 mach->CondMask &= ~0x2;
2665 }
2666 if( ! r[0].u[2] ) {
2667 mach->CondMask &= ~0x4;
2668 }
2669 if( ! r[0].u[3] ) {
2670 mach->CondMask &= ~0x8;
2671 }
2672 UPDATE_EXEC_MASK(mach);
2673 /* Todo: If CondMask==0, jump to ELSE */
2674 break;
2675
2676 case TGSI_OPCODE_ELSE:
2677 /* invert CondMask wrt previous mask */
2678 {
2679 uint prevMask;
2680 assert(mach->CondStackTop > 0);
2681 prevMask = mach->CondStack[mach->CondStackTop - 1];
2682 mach->CondMask = ~mach->CondMask & prevMask;
2683 UPDATE_EXEC_MASK(mach);
2684 /* Todo: If CondMask==0, jump to ENDIF */
2685 }
2686 break;
2687
2688 case TGSI_OPCODE_ENDIF:
2689 /* pop CondMask */
2690 assert(mach->CondStackTop > 0);
2691 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2692 UPDATE_EXEC_MASK(mach);
2693 break;
2694
2695 case TGSI_OPCODE_END:
2696 /* halt execution */
2697 *pc = -1;
2698 break;
2699
2700 case TGSI_OPCODE_REP:
2701 assert (0);
2702 break;
2703
2704 case TGSI_OPCODE_ENDREP:
2705 assert (0);
2706 break;
2707
2708 case TGSI_OPCODE_PUSHA:
2709 assert (0);
2710 break;
2711
2712 case TGSI_OPCODE_POPA:
2713 assert (0);
2714 break;
2715
2716 case TGSI_OPCODE_CEIL:
2717 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2718 FETCH( &r[0], 0, chan_index );
2719 micro_ceil( &r[0], &r[0] );
2720 STORE( &r[0], 0, chan_index );
2721 }
2722 break;
2723
2724 case TGSI_OPCODE_I2F:
2725 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2726 FETCH( &r[0], 0, chan_index );
2727 micro_i2f( &r[0], &r[0] );
2728 STORE( &r[0], 0, chan_index );
2729 }
2730 break;
2731
2732 case TGSI_OPCODE_NOT:
2733 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2734 FETCH( &r[0], 0, chan_index );
2735 micro_not( &r[0], &r[0] );
2736 STORE( &r[0], 0, chan_index );
2737 }
2738 break;
2739
2740 case TGSI_OPCODE_TRUNC:
2741 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2742 FETCH( &r[0], 0, chan_index );
2743 micro_trunc( &r[0], &r[0] );
2744 STORE( &r[0], 0, chan_index );
2745 }
2746 break;
2747
2748 case TGSI_OPCODE_SHL:
2749 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2750 FETCH( &r[0], 0, chan_index );
2751 FETCH( &r[1], 1, chan_index );
2752 micro_shl( &r[0], &r[0], &r[1] );
2753 STORE( &r[0], 0, chan_index );
2754 }
2755 break;
2756
2757 case TGSI_OPCODE_SHR:
2758 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2759 FETCH( &r[0], 0, chan_index );
2760 FETCH( &r[1], 1, chan_index );
2761 micro_ishr( &r[0], &r[0], &r[1] );
2762 STORE( &r[0], 0, chan_index );
2763 }
2764 break;
2765
2766 case TGSI_OPCODE_AND:
2767 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2768 FETCH( &r[0], 0, chan_index );
2769 FETCH( &r[1], 1, chan_index );
2770 micro_and( &r[0], &r[0], &r[1] );
2771 STORE( &r[0], 0, chan_index );
2772 }
2773 break;
2774
2775 case TGSI_OPCODE_OR:
2776 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2777 FETCH( &r[0], 0, chan_index );
2778 FETCH( &r[1], 1, chan_index );
2779 micro_or( &r[0], &r[0], &r[1] );
2780 STORE( &r[0], 0, chan_index );
2781 }
2782 break;
2783
2784 case TGSI_OPCODE_MOD:
2785 assert (0);
2786 break;
2787
2788 case TGSI_OPCODE_XOR:
2789 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2790 FETCH( &r[0], 0, chan_index );
2791 FETCH( &r[1], 1, chan_index );
2792 micro_xor( &r[0], &r[0], &r[1] );
2793 STORE( &r[0], 0, chan_index );
2794 }
2795 break;
2796
2797 case TGSI_OPCODE_SAD:
2798 assert (0);
2799 break;
2800
2801 case TGSI_OPCODE_TXF:
2802 assert (0);
2803 break;
2804
2805 case TGSI_OPCODE_TXQ:
2806 assert (0);
2807 break;
2808
2809 case TGSI_OPCODE_EMIT:
2810 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2811 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2812 break;
2813
2814 case TGSI_OPCODE_ENDPRIM:
2815 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2816 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2817 break;
2818
2819 case TGSI_OPCODE_LOOP:
2820 /* fall-through (for now) */
2821 case TGSI_OPCODE_BGNLOOP2:
2822 /* push LoopMask and ContMasks */
2823 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2824 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2825 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2826 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2827 break;
2828
2829 case TGSI_OPCODE_ENDLOOP:
2830 /* fall-through (for now at least) */
2831 case TGSI_OPCODE_ENDLOOP2:
2832 /* Restore ContMask, but don't pop */
2833 assert(mach->ContStackTop > 0);
2834 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2835 UPDATE_EXEC_MASK(mach);
2836 if (mach->ExecMask) {
2837 /* repeat loop: jump to instruction just past BGNLOOP */
2838 *pc = inst->InstructionExtLabel.Label + 1;
2839 }
2840 else {
2841 /* exit loop: pop LoopMask */
2842 assert(mach->LoopStackTop > 0);
2843 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2844 /* pop ContMask */
2845 assert(mach->ContStackTop > 0);
2846 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2847 }
2848 UPDATE_EXEC_MASK(mach);
2849 break;
2850
2851 case TGSI_OPCODE_BRK:
2852 /* turn off loop channels for each enabled exec channel */
2853 mach->LoopMask &= ~mach->ExecMask;
2854 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2855 UPDATE_EXEC_MASK(mach);
2856 break;
2857
2858 case TGSI_OPCODE_CONT:
2859 /* turn off cont channels for each enabled exec channel */
2860 mach->ContMask &= ~mach->ExecMask;
2861 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2862 UPDATE_EXEC_MASK(mach);
2863 break;
2864
2865 case TGSI_OPCODE_BGNSUB:
2866 /* no-op */
2867 break;
2868
2869 case TGSI_OPCODE_ENDSUB:
2870 /* no-op */
2871 break;
2872
2873 case TGSI_OPCODE_NOISE1:
2874 assert( 0 );
2875 break;
2876
2877 case TGSI_OPCODE_NOISE2:
2878 assert( 0 );
2879 break;
2880
2881 case TGSI_OPCODE_NOISE3:
2882 assert( 0 );
2883 break;
2884
2885 case TGSI_OPCODE_NOISE4:
2886 assert( 0 );
2887 break;
2888
2889 case TGSI_OPCODE_NOP:
2890 break;
2891
2892 default:
2893 assert( 0 );
2894 }
2895 }
2896
2897
2898 /**
2899 * Run TGSI interpreter.
2900 * \return bitmask of "alive" quad components
2901 */
2902 uint
2903 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2904 {
2905 uint i;
2906 int pc = 0;
2907
2908 mach->CondMask = 0xf;
2909 mach->LoopMask = 0xf;
2910 mach->ContMask = 0xf;
2911 mach->FuncMask = 0xf;
2912 mach->ExecMask = 0xf;
2913
2914 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2915 assert(mach->CondStackTop == 0);
2916 assert(mach->LoopStackTop == 0);
2917 assert(mach->ContStackTop == 0);
2918 assert(mach->CallStackTop == 0);
2919
2920 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2921 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2922
2923 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2924 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2925 mach->Primitives[0] = 0;
2926 }
2927
2928 for (i = 0; i < QUAD_SIZE; i++) {
2929 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2930 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2931 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2932 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2933 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2934 }
2935
2936 /* execute declarations (interpolants) */
2937 for (i = 0; i < mach->NumDeclarations; i++) {
2938 exec_declaration( mach, mach->Declarations+i );
2939 }
2940
2941 /* execute instructions, until pc is set to -1 */
2942 while (pc != -1) {
2943 assert(pc < (int) mach->NumInstructions);
2944 exec_instruction( mach, mach->Instructions + pc, &pc );
2945 }
2946
2947 #if 0
2948 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2949 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2950 /*
2951 * Scale back depth component.
2952 */
2953 for (i = 0; i < 4; i++)
2954 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2955 }
2956 #endif
2957
2958 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2959 }
2960
2961