Merge commit 'origin/gallium-0.1'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 #if 0
324 static void
325 micro_iadd(
326 union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src0,
328 const union tgsi_exec_channel *src1 )
329 {
330 dst->i[0] = src0->i[0] + src1->i[0];
331 dst->i[1] = src0->i[1] + src1->i[1];
332 dst->i[2] = src0->i[2] + src1->i[2];
333 dst->i[3] = src0->i[3] + src1->i[3];
334 }
335 #endif
336
337 static void
338 micro_and(
339 union tgsi_exec_channel *dst,
340 const union tgsi_exec_channel *src0,
341 const union tgsi_exec_channel *src1 )
342 {
343 dst->u[0] = src0->u[0] & src1->u[0];
344 dst->u[1] = src0->u[1] & src1->u[1];
345 dst->u[2] = src0->u[2] & src1->u[2];
346 dst->u[3] = src0->u[3] & src1->u[3];
347 }
348
349 static void
350 micro_ceil(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = ceilf( src->f[0] );
355 dst->f[1] = ceilf( src->f[1] );
356 dst->f[2] = ceilf( src->f[2] );
357 dst->f[3] = ceilf( src->f[3] );
358 }
359
360 static void
361 micro_cos(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = cosf( src->f[0] );
366 dst->f[1] = cosf( src->f[1] );
367 dst->f[2] = cosf( src->f[2] );
368 dst->f[3] = cosf( src->f[3] );
369 }
370
371 static void
372 micro_ddx(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] =
377 dst->f[1] =
378 dst->f[2] =
379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380 }
381
382 static void
383 micro_ddy(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src )
386 {
387 dst->f[0] =
388 dst->f[1] =
389 dst->f[2] =
390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391 }
392
393 static void
394 micro_div(
395 union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src0,
397 const union tgsi_exec_channel *src1 )
398 {
399 if (src1->f[0] != 0) {
400 dst->f[0] = src0->f[0] / src1->f[0];
401 }
402 if (src1->f[1] != 0) {
403 dst->f[1] = src0->f[1] / src1->f[1];
404 }
405 if (src1->f[2] != 0) {
406 dst->f[2] = src0->f[2] / src1->f[2];
407 }
408 if (src1->f[3] != 0) {
409 dst->f[3] = src0->f[3] / src1->f[3];
410 }
411 }
412
413 #if 0
414 static void
415 micro_udiv(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1 )
419 {
420 dst->u[0] = src0->u[0] / src1->u[0];
421 dst->u[1] = src0->u[1] / src1->u[1];
422 dst->u[2] = src0->u[2] / src1->u[2];
423 dst->u[3] = src0->u[3] / src1->u[3];
424 }
425 #endif
426
427 static void
428 micro_eq(
429 union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src0,
431 const union tgsi_exec_channel *src1,
432 const union tgsi_exec_channel *src2,
433 const union tgsi_exec_channel *src3 )
434 {
435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439 }
440
441 #if 0
442 static void
443 micro_ieq(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1,
447 const union tgsi_exec_channel *src2,
448 const union tgsi_exec_channel *src3 )
449 {
450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454 }
455 #endif
456
457 static void
458 micro_exp2(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 #if FAST_MATH
463 dst->f[0] = util_fast_exp2( src->f[0] );
464 dst->f[1] = util_fast_exp2( src->f[1] );
465 dst->f[2] = util_fast_exp2( src->f[2] );
466 dst->f[3] = util_fast_exp2( src->f[3] );
467 #else
468 dst->f[0] = powf( 2.0f, src->f[0] );
469 dst->f[1] = powf( 2.0f, src->f[1] );
470 dst->f[2] = powf( 2.0f, src->f[2] );
471 dst->f[3] = powf( 2.0f, src->f[3] );
472 #endif
473 }
474
475 #if 0
476 static void
477 micro_f2ut(
478 union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src )
480 {
481 dst->u[0] = (uint) src->f[0];
482 dst->u[1] = (uint) src->f[1];
483 dst->u[2] = (uint) src->f[2];
484 dst->u[3] = (uint) src->f[3];
485 }
486 #endif
487
488 static void
489 micro_flr(
490 union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src )
492 {
493 dst->f[0] = floorf( src->f[0] );
494 dst->f[1] = floorf( src->f[1] );
495 dst->f[2] = floorf( src->f[2] );
496 dst->f[3] = floorf( src->f[3] );
497 }
498
499 static void
500 micro_frc(
501 union tgsi_exec_channel *dst,
502 const union tgsi_exec_channel *src )
503 {
504 dst->f[0] = src->f[0] - floorf( src->f[0] );
505 dst->f[1] = src->f[1] - floorf( src->f[1] );
506 dst->f[2] = src->f[2] - floorf( src->f[2] );
507 dst->f[3] = src->f[3] - floorf( src->f[3] );
508 }
509
510 static void
511 micro_ge(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src0,
514 const union tgsi_exec_channel *src1,
515 const union tgsi_exec_channel *src2,
516 const union tgsi_exec_channel *src3 )
517 {
518 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
519 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
520 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
521 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
522 }
523
524 static void
525 micro_i2f(
526 union tgsi_exec_channel *dst,
527 const union tgsi_exec_channel *src )
528 {
529 dst->f[0] = (float) src->i[0];
530 dst->f[1] = (float) src->i[1];
531 dst->f[2] = (float) src->i[2];
532 dst->f[3] = (float) src->i[3];
533 }
534
535 static void
536 micro_lg2(
537 union tgsi_exec_channel *dst,
538 const union tgsi_exec_channel *src )
539 {
540 #if FAST_MATH
541 dst->f[0] = util_fast_log2( src->f[0] );
542 dst->f[1] = util_fast_log2( src->f[1] );
543 dst->f[2] = util_fast_log2( src->f[2] );
544 dst->f[3] = util_fast_log2( src->f[3] );
545 #else
546 dst->f[0] = logf( src->f[0] ) * 1.442695f;
547 dst->f[1] = logf( src->f[1] ) * 1.442695f;
548 dst->f[2] = logf( src->f[2] ) * 1.442695f;
549 dst->f[3] = logf( src->f[3] ) * 1.442695f;
550 #endif
551 }
552
553 static void
554 micro_le(
555 union tgsi_exec_channel *dst,
556 const union tgsi_exec_channel *src0,
557 const union tgsi_exec_channel *src1,
558 const union tgsi_exec_channel *src2,
559 const union tgsi_exec_channel *src3 )
560 {
561 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
562 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
563 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
564 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
565 }
566
567 static void
568 micro_lt(
569 union tgsi_exec_channel *dst,
570 const union tgsi_exec_channel *src0,
571 const union tgsi_exec_channel *src1,
572 const union tgsi_exec_channel *src2,
573 const union tgsi_exec_channel *src3 )
574 {
575 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
576 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
577 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
578 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
579 }
580
581 #if 0
582 static void
583 micro_ilt(
584 union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src0,
586 const union tgsi_exec_channel *src1,
587 const union tgsi_exec_channel *src2,
588 const union tgsi_exec_channel *src3 )
589 {
590 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
591 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
592 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
593 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
594 }
595 #endif
596
597 #if 0
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611 #endif
612
613 static void
614 micro_max(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
620 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
621 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
622 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
623 }
624
625 #if 0
626 static void
627 micro_imax(
628 union tgsi_exec_channel *dst,
629 const union tgsi_exec_channel *src0,
630 const union tgsi_exec_channel *src1 )
631 {
632 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
633 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
634 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
635 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
636 }
637 #endif
638
639 #if 0
640 static void
641 micro_umax(
642 union tgsi_exec_channel *dst,
643 const union tgsi_exec_channel *src0,
644 const union tgsi_exec_channel *src1 )
645 {
646 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
647 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
648 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
649 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
650 }
651 #endif
652
653 static void
654 micro_min(
655 union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1 )
658 {
659 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
660 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
661 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
662 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
663 }
664
665 #if 0
666 static void
667 micro_imin(
668 union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1 )
671 {
672 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
673 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
674 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
675 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
676 }
677 #endif
678
679 #if 0
680 static void
681 micro_umin(
682 union tgsi_exec_channel *dst,
683 const union tgsi_exec_channel *src0,
684 const union tgsi_exec_channel *src1 )
685 {
686 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
687 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
688 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
689 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
690 }
691 #endif
692
693 #if 0
694 static void
695 micro_umod(
696 union tgsi_exec_channel *dst,
697 const union tgsi_exec_channel *src0,
698 const union tgsi_exec_channel *src1 )
699 {
700 dst->u[0] = src0->u[0] % src1->u[0];
701 dst->u[1] = src0->u[1] % src1->u[1];
702 dst->u[2] = src0->u[2] % src1->u[2];
703 dst->u[3] = src0->u[3] % src1->u[3];
704 }
705 #endif
706
707 static void
708 micro_mul(
709 union tgsi_exec_channel *dst,
710 const union tgsi_exec_channel *src0,
711 const union tgsi_exec_channel *src1 )
712 {
713 dst->f[0] = src0->f[0] * src1->f[0];
714 dst->f[1] = src0->f[1] * src1->f[1];
715 dst->f[2] = src0->f[2] * src1->f[2];
716 dst->f[3] = src0->f[3] * src1->f[3];
717 }
718
719 #if 0
720 static void
721 micro_imul(
722 union tgsi_exec_channel *dst,
723 const union tgsi_exec_channel *src0,
724 const union tgsi_exec_channel *src1 )
725 {
726 dst->i[0] = src0->i[0] * src1->i[0];
727 dst->i[1] = src0->i[1] * src1->i[1];
728 dst->i[2] = src0->i[2] * src1->i[2];
729 dst->i[3] = src0->i[3] * src1->i[3];
730 }
731 #endif
732
733 #if 0
734 static void
735 micro_imul64(
736 union tgsi_exec_channel *dst0,
737 union tgsi_exec_channel *dst1,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1 )
740 {
741 dst1->i[0] = src0->i[0] * src1->i[0];
742 dst1->i[1] = src0->i[1] * src1->i[1];
743 dst1->i[2] = src0->i[2] * src1->i[2];
744 dst1->i[3] = src0->i[3] * src1->i[3];
745 dst0->i[0] = 0;
746 dst0->i[1] = 0;
747 dst0->i[2] = 0;
748 dst0->i[3] = 0;
749 }
750 #endif
751
752 #if 0
753 static void
754 micro_umul64(
755 union tgsi_exec_channel *dst0,
756 union tgsi_exec_channel *dst1,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1 )
759 {
760 dst1->u[0] = src0->u[0] * src1->u[0];
761 dst1->u[1] = src0->u[1] * src1->u[1];
762 dst1->u[2] = src0->u[2] * src1->u[2];
763 dst1->u[3] = src0->u[3] * src1->u[3];
764 dst0->u[0] = 0;
765 dst0->u[1] = 0;
766 dst0->u[2] = 0;
767 dst0->u[3] = 0;
768 }
769 #endif
770
771
772 #if 0
773 static void
774 micro_movc(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src0,
777 const union tgsi_exec_channel *src1,
778 const union tgsi_exec_channel *src2 )
779 {
780 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
781 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
782 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
783 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
784 }
785 #endif
786
787 static void
788 micro_neg(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src )
791 {
792 dst->f[0] = -src->f[0];
793 dst->f[1] = -src->f[1];
794 dst->f[2] = -src->f[2];
795 dst->f[3] = -src->f[3];
796 }
797
798 #if 0
799 static void
800 micro_ineg(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src )
803 {
804 dst->i[0] = -src->i[0];
805 dst->i[1] = -src->i[1];
806 dst->i[2] = -src->i[2];
807 dst->i[3] = -src->i[3];
808 }
809 #endif
810
811 static void
812 micro_not(
813 union tgsi_exec_channel *dst,
814 const union tgsi_exec_channel *src )
815 {
816 dst->u[0] = ~src->u[0];
817 dst->u[1] = ~src->u[1];
818 dst->u[2] = ~src->u[2];
819 dst->u[3] = ~src->u[3];
820 }
821
822 static void
823 micro_or(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src0,
826 const union tgsi_exec_channel *src1 )
827 {
828 dst->u[0] = src0->u[0] | src1->u[0];
829 dst->u[1] = src0->u[1] | src1->u[1];
830 dst->u[2] = src0->u[2] | src1->u[2];
831 dst->u[3] = src0->u[3] | src1->u[3];
832 }
833
834 static void
835 micro_pow(
836 union tgsi_exec_channel *dst,
837 const union tgsi_exec_channel *src0,
838 const union tgsi_exec_channel *src1 )
839 {
840 #if FAST_MATH
841 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
842 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
843 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
844 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
845 #else
846 dst->f[0] = powf( src0->f[0], src1->f[0] );
847 dst->f[1] = powf( src0->f[1], src1->f[1] );
848 dst->f[2] = powf( src0->f[2], src1->f[2] );
849 dst->f[3] = powf( src0->f[3], src1->f[3] );
850 #endif
851 }
852
853 static void
854 micro_rnd(
855 union tgsi_exec_channel *dst,
856 const union tgsi_exec_channel *src )
857 {
858 dst->f[0] = floorf( src->f[0] + 0.5f );
859 dst->f[1] = floorf( src->f[1] + 0.5f );
860 dst->f[2] = floorf( src->f[2] + 0.5f );
861 dst->f[3] = floorf( src->f[3] + 0.5f );
862 }
863
864 static void
865 micro_sgn(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
870 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
871 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
872 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
873 }
874
875 static void
876 micro_shl(
877 union tgsi_exec_channel *dst,
878 const union tgsi_exec_channel *src0,
879 const union tgsi_exec_channel *src1 )
880 {
881 dst->i[0] = src0->i[0] << src1->i[0];
882 dst->i[1] = src0->i[1] << src1->i[1];
883 dst->i[2] = src0->i[2] << src1->i[2];
884 dst->i[3] = src0->i[3] << src1->i[3];
885 }
886
887 static void
888 micro_ishr(
889 union tgsi_exec_channel *dst,
890 const union tgsi_exec_channel *src0,
891 const union tgsi_exec_channel *src1 )
892 {
893 dst->i[0] = src0->i[0] >> src1->i[0];
894 dst->i[1] = src0->i[1] >> src1->i[1];
895 dst->i[2] = src0->i[2] >> src1->i[2];
896 dst->i[3] = src0->i[3] >> src1->i[3];
897 }
898
899 static void
900 micro_trunc(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0 )
903 {
904 dst->f[0] = (float) (int) src0->f[0];
905 dst->f[1] = (float) (int) src0->f[1];
906 dst->f[2] = (float) (int) src0->f[2];
907 dst->f[3] = (float) (int) src0->f[3];
908 }
909
910 #if 0
911 static void
912 micro_ushr(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src0,
915 const union tgsi_exec_channel *src1 )
916 {
917 dst->u[0] = src0->u[0] >> src1->u[0];
918 dst->u[1] = src0->u[1] >> src1->u[1];
919 dst->u[2] = src0->u[2] >> src1->u[2];
920 dst->u[3] = src0->u[3] >> src1->u[3];
921 }
922 #endif
923
924 static void
925 micro_sin(
926 union tgsi_exec_channel *dst,
927 const union tgsi_exec_channel *src )
928 {
929 dst->f[0] = sinf( src->f[0] );
930 dst->f[1] = sinf( src->f[1] );
931 dst->f[2] = sinf( src->f[2] );
932 dst->f[3] = sinf( src->f[3] );
933 }
934
935 static void
936 micro_sqrt( union tgsi_exec_channel *dst,
937 const union tgsi_exec_channel *src )
938 {
939 dst->f[0] = sqrtf( src->f[0] );
940 dst->f[1] = sqrtf( src->f[1] );
941 dst->f[2] = sqrtf( src->f[2] );
942 dst->f[3] = sqrtf( src->f[3] );
943 }
944
945 static void
946 micro_sub(
947 union tgsi_exec_channel *dst,
948 const union tgsi_exec_channel *src0,
949 const union tgsi_exec_channel *src1 )
950 {
951 dst->f[0] = src0->f[0] - src1->f[0];
952 dst->f[1] = src0->f[1] - src1->f[1];
953 dst->f[2] = src0->f[2] - src1->f[2];
954 dst->f[3] = src0->f[3] - src1->f[3];
955 }
956
957 #if 0
958 static void
959 micro_u2f(
960 union tgsi_exec_channel *dst,
961 const union tgsi_exec_channel *src )
962 {
963 dst->f[0] = (float) src->u[0];
964 dst->f[1] = (float) src->u[1];
965 dst->f[2] = (float) src->u[2];
966 dst->f[3] = (float) src->u[3];
967 }
968 #endif
969
970 static void
971 micro_xor(
972 union tgsi_exec_channel *dst,
973 const union tgsi_exec_channel *src0,
974 const union tgsi_exec_channel *src1 )
975 {
976 dst->u[0] = src0->u[0] ^ src1->u[0];
977 dst->u[1] = src0->u[1] ^ src1->u[1];
978 dst->u[2] = src0->u[2] ^ src1->u[2];
979 dst->u[3] = src0->u[3] ^ src1->u[3];
980 }
981
982 static void
983 fetch_src_file_channel(
984 const struct tgsi_exec_machine *mach,
985 const uint file,
986 const uint swizzle,
987 const union tgsi_exec_channel *index,
988 union tgsi_exec_channel *chan )
989 {
990 switch( swizzle ) {
991 case TGSI_EXTSWIZZLE_X:
992 case TGSI_EXTSWIZZLE_Y:
993 case TGSI_EXTSWIZZLE_Z:
994 case TGSI_EXTSWIZZLE_W:
995 switch( file ) {
996 case TGSI_FILE_CONSTANT:
997 assert(mach->Consts);
998 if (index->i[0] < 0)
999 chan->f[0] = 0.0f;
1000 else
1001 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1002 if (index->i[1] < 0)
1003 chan->f[1] = 0.0f;
1004 else
1005 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1006 if (index->i[2] < 0)
1007 chan->f[2] = 0.0f;
1008 else
1009 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1010 if (index->i[3] < 0)
1011 chan->f[3] = 0.0f;
1012 else
1013 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1014 break;
1015
1016 case TGSI_FILE_INPUT:
1017 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1018 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1019 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1020 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1021 break;
1022
1023 case TGSI_FILE_TEMPORARY:
1024 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1025 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1026 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1027 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1028 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1029 break;
1030
1031 case TGSI_FILE_IMMEDIATE:
1032 assert( index->i[0] < (int) mach->ImmLimit );
1033 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1034 assert( index->i[1] < (int) mach->ImmLimit );
1035 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1036 assert( index->i[2] < (int) mach->ImmLimit );
1037 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1038 assert( index->i[3] < (int) mach->ImmLimit );
1039 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1040 break;
1041
1042 case TGSI_FILE_ADDRESS:
1043 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1044 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1045 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1046 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1047 break;
1048
1049 case TGSI_FILE_OUTPUT:
1050 /* vertex/fragment output vars can be read too */
1051 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1052 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1053 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1054 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1055 break;
1056
1057 default:
1058 assert( 0 );
1059 }
1060 break;
1061
1062 case TGSI_EXTSWIZZLE_ZERO:
1063 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1064 break;
1065
1066 case TGSI_EXTSWIZZLE_ONE:
1067 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1068 break;
1069
1070 default:
1071 assert( 0 );
1072 }
1073 }
1074
1075 static void
1076 fetch_source(
1077 const struct tgsi_exec_machine *mach,
1078 union tgsi_exec_channel *chan,
1079 const struct tgsi_full_src_register *reg,
1080 const uint chan_index )
1081 {
1082 union tgsi_exec_channel index;
1083 uint swizzle;
1084
1085 /* We start with a direct index into a register file.
1086 *
1087 * file[1],
1088 * where:
1089 * file = SrcRegister.File
1090 * [1] = SrcRegister.Index
1091 */
1092 index.i[0] =
1093 index.i[1] =
1094 index.i[2] =
1095 index.i[3] = reg->SrcRegister.Index;
1096
1097 /* There is an extra source register that indirectly subscripts
1098 * a register file. The direct index now becomes an offset
1099 * that is being added to the indirect register.
1100 *
1101 * file[ind[2].x+1],
1102 * where:
1103 * ind = SrcRegisterInd.File
1104 * [2] = SrcRegisterInd.Index
1105 * .x = SrcRegisterInd.SwizzleX
1106 */
1107 if (reg->SrcRegister.Indirect) {
1108 union tgsi_exec_channel index2;
1109 union tgsi_exec_channel indir_index;
1110 const uint execmask = mach->ExecMask;
1111 uint i;
1112
1113 /* which address register (always zero now) */
1114 index2.i[0] =
1115 index2.i[1] =
1116 index2.i[2] =
1117 index2.i[3] = reg->SrcRegisterInd.Index;
1118
1119 /* get current value of address register[swizzle] */
1120 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1121 fetch_src_file_channel(
1122 mach,
1123 reg->SrcRegisterInd.File,
1124 swizzle,
1125 &index2,
1126 &indir_index );
1127
1128 /* add value of address register to the offset */
1129 index.i[0] += (int) indir_index.f[0];
1130 index.i[1] += (int) indir_index.f[1];
1131 index.i[2] += (int) indir_index.f[2];
1132 index.i[3] += (int) indir_index.f[3];
1133
1134 /* for disabled execution channels, zero-out the index to
1135 * avoid using a potential garbage value.
1136 */
1137 for (i = 0; i < QUAD_SIZE; i++) {
1138 if ((execmask & (1 << i)) == 0)
1139 index.i[i] = 0;
1140 }
1141 }
1142
1143 /* There is an extra source register that is a second
1144 * subscript to a register file. Effectively it means that
1145 * the register file is actually a 2D array of registers.
1146 *
1147 * file[1][3] == file[1*sizeof(file[1])+3],
1148 * where:
1149 * [3] = SrcRegisterDim.Index
1150 */
1151 if (reg->SrcRegister.Dimension) {
1152 /* The size of the first-order array depends on the register file type.
1153 * We need to multiply the index to the first array to get an effective,
1154 * "flat" index that points to the beginning of the second-order array.
1155 */
1156 switch (reg->SrcRegister.File) {
1157 case TGSI_FILE_INPUT:
1158 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1159 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1160 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1161 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1162 break;
1163 case TGSI_FILE_CONSTANT:
1164 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1165 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1166 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1167 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1168 break;
1169 default:
1170 assert( 0 );
1171 }
1172
1173 index.i[0] += reg->SrcRegisterDim.Index;
1174 index.i[1] += reg->SrcRegisterDim.Index;
1175 index.i[2] += reg->SrcRegisterDim.Index;
1176 index.i[3] += reg->SrcRegisterDim.Index;
1177
1178 /* Again, the second subscript index can be addressed indirectly
1179 * identically to the first one.
1180 * Nothing stops us from indirectly addressing the indirect register,
1181 * but there is no need for that, so we won't exercise it.
1182 *
1183 * file[1][ind[4].y+3],
1184 * where:
1185 * ind = SrcRegisterDimInd.File
1186 * [4] = SrcRegisterDimInd.Index
1187 * .y = SrcRegisterDimInd.SwizzleX
1188 */
1189 if (reg->SrcRegisterDim.Indirect) {
1190 union tgsi_exec_channel index2;
1191 union tgsi_exec_channel indir_index;
1192 const uint execmask = mach->ExecMask;
1193 uint i;
1194
1195 index2.i[0] =
1196 index2.i[1] =
1197 index2.i[2] =
1198 index2.i[3] = reg->SrcRegisterDimInd.Index;
1199
1200 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1201 fetch_src_file_channel(
1202 mach,
1203 reg->SrcRegisterDimInd.File,
1204 swizzle,
1205 &index2,
1206 &indir_index );
1207
1208 index.i[0] += (int) indir_index.f[0];
1209 index.i[1] += (int) indir_index.f[1];
1210 index.i[2] += (int) indir_index.f[2];
1211 index.i[3] += (int) indir_index.f[3];
1212
1213 /* for disabled execution channels, zero-out the index to
1214 * avoid using a potential garbage value.
1215 */
1216 for (i = 0; i < QUAD_SIZE; i++) {
1217 if ((execmask & (1 << i)) == 0)
1218 index.i[i] = 0;
1219 }
1220 }
1221
1222 /* If by any chance there was a need for a 3D array of register
1223 * files, we would have to check whether SrcRegisterDim is followed
1224 * by a dimension register and continue the saga.
1225 */
1226 }
1227
1228 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1229 fetch_src_file_channel(
1230 mach,
1231 reg->SrcRegister.File,
1232 swizzle,
1233 &index,
1234 chan );
1235
1236 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1237 case TGSI_UTIL_SIGN_CLEAR:
1238 micro_abs( chan, chan );
1239 break;
1240
1241 case TGSI_UTIL_SIGN_SET:
1242 micro_abs( chan, chan );
1243 micro_neg( chan, chan );
1244 break;
1245
1246 case TGSI_UTIL_SIGN_TOGGLE:
1247 micro_neg( chan, chan );
1248 break;
1249
1250 case TGSI_UTIL_SIGN_KEEP:
1251 break;
1252 }
1253
1254 if (reg->SrcRegisterExtMod.Complement) {
1255 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1256 }
1257 }
1258
1259 static void
1260 store_dest(
1261 struct tgsi_exec_machine *mach,
1262 const union tgsi_exec_channel *chan,
1263 const struct tgsi_full_dst_register *reg,
1264 const struct tgsi_full_instruction *inst,
1265 uint chan_index )
1266 {
1267 uint i;
1268 union tgsi_exec_channel null;
1269 union tgsi_exec_channel *dst;
1270 uint execmask = mach->ExecMask;
1271
1272 switch (reg->DstRegister.File) {
1273 case TGSI_FILE_NULL:
1274 dst = &null;
1275 break;
1276
1277 case TGSI_FILE_OUTPUT:
1278 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1279 + reg->DstRegister.Index].xyzw[chan_index];
1280 break;
1281
1282 case TGSI_FILE_TEMPORARY:
1283 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1284 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1285 break;
1286
1287 case TGSI_FILE_ADDRESS:
1288 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1289 break;
1290
1291 default:
1292 assert( 0 );
1293 return;
1294 }
1295
1296 if (inst->InstructionExtNv.CondFlowEnable) {
1297 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1298 uint swizzle;
1299 uint shift;
1300 uint mask;
1301 uint test;
1302
1303 /* Only CC0 supported.
1304 */
1305 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1306
1307 switch (chan_index) {
1308 case CHAN_X:
1309 swizzle = inst->InstructionExtNv.CondSwizzleX;
1310 break;
1311 case CHAN_Y:
1312 swizzle = inst->InstructionExtNv.CondSwizzleY;
1313 break;
1314 case CHAN_Z:
1315 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1316 break;
1317 case CHAN_W:
1318 swizzle = inst->InstructionExtNv.CondSwizzleW;
1319 break;
1320 default:
1321 assert( 0 );
1322 return;
1323 }
1324
1325 switch (swizzle) {
1326 case TGSI_SWIZZLE_X:
1327 shift = TGSI_EXEC_CC_X_SHIFT;
1328 mask = TGSI_EXEC_CC_X_MASK;
1329 break;
1330 case TGSI_SWIZZLE_Y:
1331 shift = TGSI_EXEC_CC_Y_SHIFT;
1332 mask = TGSI_EXEC_CC_Y_MASK;
1333 break;
1334 case TGSI_SWIZZLE_Z:
1335 shift = TGSI_EXEC_CC_Z_SHIFT;
1336 mask = TGSI_EXEC_CC_Z_MASK;
1337 break;
1338 case TGSI_SWIZZLE_W:
1339 shift = TGSI_EXEC_CC_W_SHIFT;
1340 mask = TGSI_EXEC_CC_W_MASK;
1341 break;
1342 default:
1343 assert( 0 );
1344 return;
1345 }
1346
1347 switch (inst->InstructionExtNv.CondMask) {
1348 case TGSI_CC_GT:
1349 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1350 for (i = 0; i < QUAD_SIZE; i++)
1351 if (cc->u[i] & test)
1352 execmask &= ~(1 << i);
1353 break;
1354
1355 case TGSI_CC_EQ:
1356 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1357 for (i = 0; i < QUAD_SIZE; i++)
1358 if (cc->u[i] & test)
1359 execmask &= ~(1 << i);
1360 break;
1361
1362 case TGSI_CC_LT:
1363 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1364 for (i = 0; i < QUAD_SIZE; i++)
1365 if (cc->u[i] & test)
1366 execmask &= ~(1 << i);
1367 break;
1368
1369 case TGSI_CC_GE:
1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1371 for (i = 0; i < QUAD_SIZE; i++)
1372 if (cc->u[i] & test)
1373 execmask &= ~(1 << i);
1374 break;
1375
1376 case TGSI_CC_LE:
1377 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1378 for (i = 0; i < QUAD_SIZE; i++)
1379 if (cc->u[i] & test)
1380 execmask &= ~(1 << i);
1381 break;
1382
1383 case TGSI_CC_NE:
1384 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1385 for (i = 0; i < QUAD_SIZE; i++)
1386 if (cc->u[i] & test)
1387 execmask &= ~(1 << i);
1388 break;
1389
1390 case TGSI_CC_TR:
1391 break;
1392
1393 case TGSI_CC_FL:
1394 for (i = 0; i < QUAD_SIZE; i++)
1395 execmask &= ~(1 << i);
1396 break;
1397
1398 default:
1399 assert( 0 );
1400 return;
1401 }
1402 }
1403
1404 switch (inst->Instruction.Saturate) {
1405 case TGSI_SAT_NONE:
1406 for (i = 0; i < QUAD_SIZE; i++)
1407 if (execmask & (1 << i))
1408 dst->i[i] = chan->i[i];
1409 break;
1410
1411 case TGSI_SAT_ZERO_ONE:
1412 for (i = 0; i < QUAD_SIZE; i++)
1413 if (execmask & (1 << i)) {
1414 if (chan->f[i] < 0.0f)
1415 dst->f[i] = 0.0f;
1416 else if (chan->f[i] > 1.0f)
1417 dst->f[i] = 1.0f;
1418 else
1419 dst->i[i] = chan->i[i];
1420 }
1421 break;
1422
1423 case TGSI_SAT_MINUS_PLUS_ONE:
1424 for (i = 0; i < QUAD_SIZE; i++)
1425 if (execmask & (1 << i)) {
1426 if (chan->f[i] < -1.0f)
1427 dst->f[i] = -1.0f;
1428 else if (chan->f[i] > 1.0f)
1429 dst->f[i] = 1.0f;
1430 else
1431 dst->i[i] = chan->i[i];
1432 }
1433 break;
1434
1435 default:
1436 assert( 0 );
1437 }
1438
1439 if (inst->InstructionExtNv.CondDstUpdate) {
1440 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1441 uint shift;
1442 uint mask;
1443
1444 /* Only CC0 supported.
1445 */
1446 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1447
1448 switch (chan_index) {
1449 case CHAN_X:
1450 shift = TGSI_EXEC_CC_X_SHIFT;
1451 mask = ~TGSI_EXEC_CC_X_MASK;
1452 break;
1453 case CHAN_Y:
1454 shift = TGSI_EXEC_CC_Y_SHIFT;
1455 mask = ~TGSI_EXEC_CC_Y_MASK;
1456 break;
1457 case CHAN_Z:
1458 shift = TGSI_EXEC_CC_Z_SHIFT;
1459 mask = ~TGSI_EXEC_CC_Z_MASK;
1460 break;
1461 case CHAN_W:
1462 shift = TGSI_EXEC_CC_W_SHIFT;
1463 mask = ~TGSI_EXEC_CC_W_MASK;
1464 break;
1465 default:
1466 assert( 0 );
1467 return;
1468 }
1469
1470 for (i = 0; i < QUAD_SIZE; i++)
1471 if (execmask & (1 << i)) {
1472 cc->u[i] &= mask;
1473 if (dst->f[i] < 0.0f)
1474 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1475 else if (dst->f[i] > 0.0f)
1476 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1477 else if (dst->f[i] == 0.0f)
1478 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1479 else
1480 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1481 }
1482 }
1483 }
1484
1485 #define FETCH(VAL,INDEX,CHAN)\
1486 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1487
1488 #define STORE(VAL,INDEX,CHAN)\
1489 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1490
1491
1492 /**
1493 * Execute ARB-style KIL which is predicated by a src register.
1494 * Kill fragment if any of the four values is less than zero.
1495 */
1496 static void
1497 exec_kil(struct tgsi_exec_machine *mach,
1498 const struct tgsi_full_instruction *inst)
1499 {
1500 uint uniquemask;
1501 uint chan_index;
1502 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1503 union tgsi_exec_channel r[1];
1504
1505 /* This mask stores component bits that were already tested. Note that
1506 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1507 * tested. */
1508 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1509
1510 for (chan_index = 0; chan_index < 4; chan_index++)
1511 {
1512 uint swizzle;
1513 uint i;
1514
1515 /* unswizzle channel */
1516 swizzle = tgsi_util_get_full_src_register_extswizzle (
1517 &inst->FullSrcRegisters[0],
1518 chan_index);
1519
1520 /* check if the component has not been already tested */
1521 if (uniquemask & (1 << swizzle))
1522 continue;
1523 uniquemask |= 1 << swizzle;
1524
1525 FETCH(&r[0], 0, chan_index);
1526 for (i = 0; i < 4; i++)
1527 if (r[0].f[i] < 0.0f)
1528 kilmask |= 1 << i;
1529 }
1530
1531 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1532 }
1533
1534 /**
1535 * Execute NVIDIA-style KIL which is predicated by a condition code.
1536 * Kill fragment if the condition code is TRUE.
1537 */
1538 static void
1539 exec_kilp(struct tgsi_exec_machine *mach,
1540 const struct tgsi_full_instruction *inst)
1541 {
1542 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1543
1544 if (inst->InstructionExtNv.CondFlowEnable) {
1545 uint swizzle[4];
1546 uint chan_index;
1547
1548 kilmask = 0x0;
1549
1550 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1551 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1552 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1553 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1554
1555 for (chan_index = 0; chan_index < 4; chan_index++)
1556 {
1557 uint i;
1558
1559 for (i = 0; i < 4; i++) {
1560 /* TODO: evaluate the condition code */
1561 if (0)
1562 kilmask |= 1 << i;
1563 }
1564 }
1565 }
1566 else {
1567 /* "unconditional" kil */
1568 kilmask = mach->ExecMask;
1569 }
1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1571 }
1572
1573
1574 /*
1575 * Fetch a four texture samples using STR texture coordinates.
1576 */
1577 static void
1578 fetch_texel( struct tgsi_sampler *sampler,
1579 const union tgsi_exec_channel *s,
1580 const union tgsi_exec_channel *t,
1581 const union tgsi_exec_channel *p,
1582 float lodbias, /* XXX should be float[4] */
1583 union tgsi_exec_channel *r,
1584 union tgsi_exec_channel *g,
1585 union tgsi_exec_channel *b,
1586 union tgsi_exec_channel *a )
1587 {
1588 uint j;
1589 float rgba[NUM_CHANNELS][QUAD_SIZE];
1590
1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1592
1593 for (j = 0; j < 4; j++) {
1594 r->f[j] = rgba[0][j];
1595 g->f[j] = rgba[1][j];
1596 b->f[j] = rgba[2][j];
1597 a->f[j] = rgba[3][j];
1598 }
1599 }
1600
1601
1602 static void
1603 exec_tex(struct tgsi_exec_machine *mach,
1604 const struct tgsi_full_instruction *inst,
1605 boolean biasLod,
1606 boolean projected)
1607 {
1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1609 union tgsi_exec_channel r[4];
1610 uint chan_index;
1611 float lodBias;
1612
1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1614
1615 switch (inst->InstructionExtTexture.Texture) {
1616 case TGSI_TEXTURE_1D:
1617 case TGSI_TEXTURE_SHADOW1D:
1618
1619 FETCH(&r[0], 0, CHAN_X);
1620
1621 if (projected) {
1622 FETCH(&r[1], 0, CHAN_W);
1623 micro_div( &r[0], &r[0], &r[1] );
1624 }
1625
1626 if (biasLod) {
1627 FETCH(&r[1], 0, CHAN_W);
1628 lodBias = r[2].f[0];
1629 }
1630 else
1631 lodBias = 0.0;
1632
1633 fetch_texel(mach->Samplers[unit],
1634 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1635 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1636 break;
1637
1638 case TGSI_TEXTURE_2D:
1639 case TGSI_TEXTURE_RECT:
1640 case TGSI_TEXTURE_SHADOW2D:
1641 case TGSI_TEXTURE_SHADOWRECT:
1642
1643 FETCH(&r[0], 0, CHAN_X);
1644 FETCH(&r[1], 0, CHAN_Y);
1645 FETCH(&r[2], 0, CHAN_Z);
1646
1647 if (projected) {
1648 FETCH(&r[3], 0, CHAN_W);
1649 micro_div( &r[0], &r[0], &r[3] );
1650 micro_div( &r[1], &r[1], &r[3] );
1651 micro_div( &r[2], &r[2], &r[3] );
1652 }
1653
1654 if (biasLod) {
1655 FETCH(&r[3], 0, CHAN_W);
1656 lodBias = r[3].f[0];
1657 }
1658 else
1659 lodBias = 0.0;
1660
1661 fetch_texel(mach->Samplers[unit],
1662 &r[0], &r[1], &r[2], lodBias, /* inputs */
1663 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1664 break;
1665
1666 case TGSI_TEXTURE_3D:
1667 case TGSI_TEXTURE_CUBE:
1668
1669 FETCH(&r[0], 0, CHAN_X);
1670 FETCH(&r[1], 0, CHAN_Y);
1671 FETCH(&r[2], 0, CHAN_Z);
1672
1673 if (projected) {
1674 FETCH(&r[3], 0, CHAN_W);
1675 micro_div( &r[0], &r[0], &r[3] );
1676 micro_div( &r[1], &r[1], &r[3] );
1677 micro_div( &r[2], &r[2], &r[3] );
1678 }
1679
1680 if (biasLod) {
1681 FETCH(&r[3], 0, CHAN_W);
1682 lodBias = r[3].f[0];
1683 }
1684 else
1685 lodBias = 0.0;
1686
1687 fetch_texel(mach->Samplers[unit],
1688 &r[0], &r[1], &r[2], lodBias,
1689 &r[0], &r[1], &r[2], &r[3]);
1690 break;
1691
1692 default:
1693 assert (0);
1694 }
1695
1696 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1697 STORE( &r[chan_index], 0, chan_index );
1698 }
1699 }
1700
1701
1702 /**
1703 * Evaluate a constant-valued coefficient at the position of the
1704 * current quad.
1705 */
1706 static void
1707 eval_constant_coef(
1708 struct tgsi_exec_machine *mach,
1709 unsigned attrib,
1710 unsigned chan )
1711 {
1712 unsigned i;
1713
1714 for( i = 0; i < QUAD_SIZE; i++ ) {
1715 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1716 }
1717 }
1718
1719 /**
1720 * Evaluate a linear-valued coefficient at the position of the
1721 * current quad.
1722 */
1723 static void
1724 eval_linear_coef(
1725 struct tgsi_exec_machine *mach,
1726 unsigned attrib,
1727 unsigned chan )
1728 {
1729 const float x = mach->QuadPos.xyzw[0].f[0];
1730 const float y = mach->QuadPos.xyzw[1].f[0];
1731 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1732 const float dady = mach->InterpCoefs[attrib].dady[chan];
1733 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1734 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1735 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1736 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1737 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1738 }
1739
1740 /**
1741 * Evaluate a perspective-valued coefficient at the position of the
1742 * current quad.
1743 */
1744 static void
1745 eval_perspective_coef(
1746 struct tgsi_exec_machine *mach,
1747 unsigned attrib,
1748 unsigned chan )
1749 {
1750 const float x = mach->QuadPos.xyzw[0].f[0];
1751 const float y = mach->QuadPos.xyzw[1].f[0];
1752 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1753 const float dady = mach->InterpCoefs[attrib].dady[chan];
1754 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1755 const float *w = mach->QuadPos.xyzw[3].f;
1756 /* divide by W here */
1757 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1758 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1759 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1760 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1761 }
1762
1763
1764 typedef void (* eval_coef_func)(
1765 struct tgsi_exec_machine *mach,
1766 unsigned attrib,
1767 unsigned chan );
1768
1769 static void
1770 exec_declaration(
1771 struct tgsi_exec_machine *mach,
1772 const struct tgsi_full_declaration *decl )
1773 {
1774 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1775 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1776 unsigned first, last, mask;
1777 eval_coef_func eval;
1778
1779 first = decl->DeclarationRange.First;
1780 last = decl->DeclarationRange.Last;
1781 mask = decl->Declaration.UsageMask;
1782
1783 switch( decl->Declaration.Interpolate ) {
1784 case TGSI_INTERPOLATE_CONSTANT:
1785 eval = eval_constant_coef;
1786 break;
1787
1788 case TGSI_INTERPOLATE_LINEAR:
1789 eval = eval_linear_coef;
1790 break;
1791
1792 case TGSI_INTERPOLATE_PERSPECTIVE:
1793 eval = eval_perspective_coef;
1794 break;
1795
1796 default:
1797 eval = NULL;
1798 assert( 0 );
1799 }
1800
1801 if( mask == TGSI_WRITEMASK_XYZW ) {
1802 unsigned i, j;
1803
1804 for( i = first; i <= last; i++ ) {
1805 for( j = 0; j < NUM_CHANNELS; j++ ) {
1806 eval( mach, i, j );
1807 }
1808 }
1809 }
1810 else {
1811 unsigned i, j;
1812
1813 for( j = 0; j < NUM_CHANNELS; j++ ) {
1814 if( mask & (1 << j) ) {
1815 for( i = first; i <= last; i++ ) {
1816 eval( mach, i, j );
1817 }
1818 }
1819 }
1820 }
1821 }
1822 }
1823 }
1824
1825 static void
1826 exec_instruction(
1827 struct tgsi_exec_machine *mach,
1828 const struct tgsi_full_instruction *inst,
1829 int *pc )
1830 {
1831 uint chan_index;
1832 union tgsi_exec_channel r[8];
1833
1834 (*pc)++;
1835
1836 switch (inst->Instruction.Opcode) {
1837 case TGSI_OPCODE_ARL:
1838 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1839 FETCH( &r[0], 0, chan_index );
1840 micro_flr( &r[0], &r[0] );
1841 STORE( &r[0], 0, chan_index );
1842 }
1843 break;
1844
1845 case TGSI_OPCODE_MOV:
1846 case TGSI_OPCODE_SWZ:
1847 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1848 FETCH( &r[0], 0, chan_index );
1849 STORE( &r[0], 0, chan_index );
1850 }
1851 break;
1852
1853 case TGSI_OPCODE_LIT:
1854 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1855 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1856 }
1857
1858 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1859 FETCH( &r[0], 0, CHAN_X );
1860 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1861 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1862 STORE( &r[0], 0, CHAN_Y );
1863 }
1864
1865 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1866 FETCH( &r[1], 0, CHAN_Y );
1867 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1868
1869 FETCH( &r[2], 0, CHAN_W );
1870 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1871 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1872 micro_pow( &r[1], &r[1], &r[2] );
1873 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1874 STORE( &r[0], 0, CHAN_Z );
1875 }
1876 }
1877
1878 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1879 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1880 }
1881 break;
1882
1883 case TGSI_OPCODE_RCP:
1884 /* TGSI_OPCODE_RECIP */
1885 FETCH( &r[0], 0, CHAN_X );
1886 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1887 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1888 STORE( &r[0], 0, chan_index );
1889 }
1890 break;
1891
1892 case TGSI_OPCODE_RSQ:
1893 /* TGSI_OPCODE_RECIPSQRT */
1894 FETCH( &r[0], 0, CHAN_X );
1895 micro_abs( &r[0], &r[0] );
1896 micro_sqrt( &r[0], &r[0] );
1897 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1898 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1899 STORE( &r[0], 0, chan_index );
1900 }
1901 break;
1902
1903 case TGSI_OPCODE_EXP:
1904 FETCH( &r[0], 0, CHAN_X );
1905 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1907 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1908 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1909 }
1910 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1911 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1912 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1913 }
1914 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1915 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1916 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1917 }
1918 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1919 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1920 }
1921 break;
1922
1923 case TGSI_OPCODE_LOG:
1924 FETCH( &r[0], 0, CHAN_X );
1925 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1926 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1927 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1928 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1929 STORE( &r[0], 0, CHAN_X );
1930 }
1931 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1932 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1933 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1934 STORE( &r[0], 0, CHAN_Y );
1935 }
1936 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1937 STORE( &r[1], 0, CHAN_Z );
1938 }
1939 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1940 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1941 }
1942 break;
1943
1944 case TGSI_OPCODE_MUL:
1945 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1946 {
1947 FETCH(&r[0], 0, chan_index);
1948 FETCH(&r[1], 1, chan_index);
1949
1950 micro_mul( &r[0], &r[0], &r[1] );
1951
1952 STORE(&r[0], 0, chan_index);
1953 }
1954 break;
1955
1956 case TGSI_OPCODE_ADD:
1957 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1958 FETCH( &r[0], 0, chan_index );
1959 FETCH( &r[1], 1, chan_index );
1960 micro_add( &r[0], &r[0], &r[1] );
1961 STORE( &r[0], 0, chan_index );
1962 }
1963 break;
1964
1965 case TGSI_OPCODE_DP3:
1966 /* TGSI_OPCODE_DOT3 */
1967 FETCH( &r[0], 0, CHAN_X );
1968 FETCH( &r[1], 1, CHAN_X );
1969 micro_mul( &r[0], &r[0], &r[1] );
1970
1971 FETCH( &r[1], 0, CHAN_Y );
1972 FETCH( &r[2], 1, CHAN_Y );
1973 micro_mul( &r[1], &r[1], &r[2] );
1974 micro_add( &r[0], &r[0], &r[1] );
1975
1976 FETCH( &r[1], 0, CHAN_Z );
1977 FETCH( &r[2], 1, CHAN_Z );
1978 micro_mul( &r[1], &r[1], &r[2] );
1979 micro_add( &r[0], &r[0], &r[1] );
1980
1981 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1982 STORE( &r[0], 0, chan_index );
1983 }
1984 break;
1985
1986 case TGSI_OPCODE_DP4:
1987 /* TGSI_OPCODE_DOT4 */
1988 FETCH(&r[0], 0, CHAN_X);
1989 FETCH(&r[1], 1, CHAN_X);
1990
1991 micro_mul( &r[0], &r[0], &r[1] );
1992
1993 FETCH(&r[1], 0, CHAN_Y);
1994 FETCH(&r[2], 1, CHAN_Y);
1995
1996 micro_mul( &r[1], &r[1], &r[2] );
1997 micro_add( &r[0], &r[0], &r[1] );
1998
1999 FETCH(&r[1], 0, CHAN_Z);
2000 FETCH(&r[2], 1, CHAN_Z);
2001
2002 micro_mul( &r[1], &r[1], &r[2] );
2003 micro_add( &r[0], &r[0], &r[1] );
2004
2005 FETCH(&r[1], 0, CHAN_W);
2006 FETCH(&r[2], 1, CHAN_W);
2007
2008 micro_mul( &r[1], &r[1], &r[2] );
2009 micro_add( &r[0], &r[0], &r[1] );
2010
2011 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2012 STORE( &r[0], 0, chan_index );
2013 }
2014 break;
2015
2016 case TGSI_OPCODE_DST:
2017 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2018 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2019 }
2020
2021 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2022 FETCH( &r[0], 0, CHAN_Y );
2023 FETCH( &r[1], 1, CHAN_Y);
2024 micro_mul( &r[0], &r[0], &r[1] );
2025 STORE( &r[0], 0, CHAN_Y );
2026 }
2027
2028 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2029 FETCH( &r[0], 0, CHAN_Z );
2030 STORE( &r[0], 0, CHAN_Z );
2031 }
2032
2033 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2034 FETCH( &r[0], 1, CHAN_W );
2035 STORE( &r[0], 0, CHAN_W );
2036 }
2037 break;
2038
2039 case TGSI_OPCODE_MIN:
2040 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2041 FETCH(&r[0], 0, chan_index);
2042 FETCH(&r[1], 1, chan_index);
2043
2044 /* XXX use micro_min()?? */
2045 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2046
2047 STORE(&r[0], 0, chan_index);
2048 }
2049 break;
2050
2051 case TGSI_OPCODE_MAX:
2052 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2053 FETCH(&r[0], 0, chan_index);
2054 FETCH(&r[1], 1, chan_index);
2055
2056 /* XXX use micro_max()?? */
2057 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2058
2059 STORE(&r[0], 0, chan_index );
2060 }
2061 break;
2062
2063 case TGSI_OPCODE_SLT:
2064 /* TGSI_OPCODE_SETLT */
2065 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2066 FETCH( &r[0], 0, chan_index );
2067 FETCH( &r[1], 1, chan_index );
2068 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2069 STORE( &r[0], 0, chan_index );
2070 }
2071 break;
2072
2073 case TGSI_OPCODE_SGE:
2074 /* TGSI_OPCODE_SETGE */
2075 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2076 FETCH( &r[0], 0, chan_index );
2077 FETCH( &r[1], 1, chan_index );
2078 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2079 STORE( &r[0], 0, chan_index );
2080 }
2081 break;
2082
2083 case TGSI_OPCODE_MAD:
2084 /* TGSI_OPCODE_MADD */
2085 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2086 FETCH( &r[0], 0, chan_index );
2087 FETCH( &r[1], 1, chan_index );
2088 micro_mul( &r[0], &r[0], &r[1] );
2089 FETCH( &r[1], 2, chan_index );
2090 micro_add( &r[0], &r[0], &r[1] );
2091 STORE( &r[0], 0, chan_index );
2092 }
2093 break;
2094
2095 case TGSI_OPCODE_SUB:
2096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2097 FETCH(&r[0], 0, chan_index);
2098 FETCH(&r[1], 1, chan_index);
2099
2100 micro_sub( &r[0], &r[0], &r[1] );
2101
2102 STORE(&r[0], 0, chan_index);
2103 }
2104 break;
2105
2106 case TGSI_OPCODE_LERP:
2107 /* TGSI_OPCODE_LRP */
2108 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2109 FETCH(&r[0], 0, chan_index);
2110 FETCH(&r[1], 1, chan_index);
2111 FETCH(&r[2], 2, chan_index);
2112
2113 micro_sub( &r[1], &r[1], &r[2] );
2114 micro_mul( &r[0], &r[0], &r[1] );
2115 micro_add( &r[0], &r[0], &r[2] );
2116
2117 STORE(&r[0], 0, chan_index);
2118 }
2119 break;
2120
2121 case TGSI_OPCODE_CND:
2122 assert (0);
2123 break;
2124
2125 case TGSI_OPCODE_CND0:
2126 assert (0);
2127 break;
2128
2129 case TGSI_OPCODE_DOT2ADD:
2130 /* TGSI_OPCODE_DP2A */
2131 FETCH( &r[0], 0, CHAN_X );
2132 FETCH( &r[1], 1, CHAN_X );
2133 micro_mul( &r[0], &r[0], &r[1] );
2134
2135 FETCH( &r[1], 0, CHAN_Y );
2136 FETCH( &r[2], 1, CHAN_Y );
2137 micro_mul( &r[1], &r[1], &r[2] );
2138 micro_add( &r[0], &r[0], &r[1] );
2139
2140 FETCH( &r[2], 2, CHAN_X );
2141 micro_add( &r[0], &r[0], &r[2] );
2142
2143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2144 STORE( &r[0], 0, chan_index );
2145 }
2146 break;
2147
2148 case TGSI_OPCODE_INDEX:
2149 assert (0);
2150 break;
2151
2152 case TGSI_OPCODE_NEGATE:
2153 assert (0);
2154 break;
2155
2156 case TGSI_OPCODE_FRAC:
2157 /* TGSI_OPCODE_FRC */
2158 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2159 FETCH( &r[0], 0, chan_index );
2160 micro_frc( &r[0], &r[0] );
2161 STORE( &r[0], 0, chan_index );
2162 }
2163 break;
2164
2165 case TGSI_OPCODE_CLAMP:
2166 assert (0);
2167 break;
2168
2169 case TGSI_OPCODE_FLOOR:
2170 /* TGSI_OPCODE_FLR */
2171 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2172 FETCH( &r[0], 0, chan_index );
2173 micro_flr( &r[0], &r[0] );
2174 STORE( &r[0], 0, chan_index );
2175 }
2176 break;
2177
2178 case TGSI_OPCODE_ROUND:
2179 case TGSI_OPCODE_ARR:
2180 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2181 FETCH( &r[0], 0, chan_index );
2182 micro_rnd( &r[0], &r[0] );
2183 STORE( &r[0], 0, chan_index );
2184 }
2185 break;
2186
2187 case TGSI_OPCODE_EXPBASE2:
2188 /* TGSI_OPCODE_EX2 */
2189 FETCH(&r[0], 0, CHAN_X);
2190
2191 #if FAST_MATH
2192 micro_exp2( &r[0], &r[0] );
2193 #else
2194 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2195 #endif
2196
2197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2198 STORE( &r[0], 0, chan_index );
2199 }
2200 break;
2201
2202 case TGSI_OPCODE_LOGBASE2:
2203 /* TGSI_OPCODE_LG2 */
2204 FETCH( &r[0], 0, CHAN_X );
2205 micro_lg2( &r[0], &r[0] );
2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207 STORE( &r[0], 0, chan_index );
2208 }
2209 break;
2210
2211 case TGSI_OPCODE_POWER:
2212 /* TGSI_OPCODE_POW */
2213 FETCH(&r[0], 0, CHAN_X);
2214 FETCH(&r[1], 1, CHAN_X);
2215
2216 micro_pow( &r[0], &r[0], &r[1] );
2217
2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2219 STORE( &r[0], 0, chan_index );
2220 }
2221 break;
2222
2223 case TGSI_OPCODE_CROSSPRODUCT:
2224 /* TGSI_OPCODE_XPD */
2225 FETCH(&r[0], 0, CHAN_Y);
2226 FETCH(&r[1], 1, CHAN_Z);
2227
2228 micro_mul( &r[2], &r[0], &r[1] );
2229
2230 FETCH(&r[3], 0, CHAN_Z);
2231 FETCH(&r[4], 1, CHAN_Y);
2232
2233 micro_mul( &r[5], &r[3], &r[4] );
2234 micro_sub( &r[2], &r[2], &r[5] );
2235
2236 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2237 STORE( &r[2], 0, CHAN_X );
2238 }
2239
2240 FETCH(&r[2], 1, CHAN_X);
2241
2242 micro_mul( &r[3], &r[3], &r[2] );
2243
2244 FETCH(&r[5], 0, CHAN_X);
2245
2246 micro_mul( &r[1], &r[1], &r[5] );
2247 micro_sub( &r[3], &r[3], &r[1] );
2248
2249 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2250 STORE( &r[3], 0, CHAN_Y );
2251 }
2252
2253 micro_mul( &r[5], &r[5], &r[4] );
2254 micro_mul( &r[0], &r[0], &r[2] );
2255 micro_sub( &r[5], &r[5], &r[0] );
2256
2257 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2258 STORE( &r[5], 0, CHAN_Z );
2259 }
2260
2261 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2262 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2263 }
2264 break;
2265
2266 case TGSI_OPCODE_MULTIPLYMATRIX:
2267 assert (0);
2268 break;
2269
2270 case TGSI_OPCODE_ABS:
2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272 FETCH(&r[0], 0, chan_index);
2273
2274 micro_abs( &r[0], &r[0] );
2275
2276 STORE(&r[0], 0, chan_index);
2277 }
2278 break;
2279
2280 case TGSI_OPCODE_RCC:
2281 assert (0);
2282 break;
2283
2284 case TGSI_OPCODE_DPH:
2285 FETCH(&r[0], 0, CHAN_X);
2286 FETCH(&r[1], 1, CHAN_X);
2287
2288 micro_mul( &r[0], &r[0], &r[1] );
2289
2290 FETCH(&r[1], 0, CHAN_Y);
2291 FETCH(&r[2], 1, CHAN_Y);
2292
2293 micro_mul( &r[1], &r[1], &r[2] );
2294 micro_add( &r[0], &r[0], &r[1] );
2295
2296 FETCH(&r[1], 0, CHAN_Z);
2297 FETCH(&r[2], 1, CHAN_Z);
2298
2299 micro_mul( &r[1], &r[1], &r[2] );
2300 micro_add( &r[0], &r[0], &r[1] );
2301
2302 FETCH(&r[1], 1, CHAN_W);
2303
2304 micro_add( &r[0], &r[0], &r[1] );
2305
2306 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2307 STORE( &r[0], 0, chan_index );
2308 }
2309 break;
2310
2311 case TGSI_OPCODE_COS:
2312 FETCH(&r[0], 0, CHAN_X);
2313
2314 micro_cos( &r[0], &r[0] );
2315
2316 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2317 STORE( &r[0], 0, chan_index );
2318 }
2319 break;
2320
2321 case TGSI_OPCODE_DDX:
2322 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2323 FETCH( &r[0], 0, chan_index );
2324 micro_ddx( &r[0], &r[0] );
2325 STORE( &r[0], 0, chan_index );
2326 }
2327 break;
2328
2329 case TGSI_OPCODE_DDY:
2330 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2331 FETCH( &r[0], 0, chan_index );
2332 micro_ddy( &r[0], &r[0] );
2333 STORE( &r[0], 0, chan_index );
2334 }
2335 break;
2336
2337 case TGSI_OPCODE_KILP:
2338 exec_kilp (mach, inst);
2339 break;
2340
2341 case TGSI_OPCODE_KIL:
2342 exec_kil (mach, inst);
2343 break;
2344
2345 case TGSI_OPCODE_PK2H:
2346 assert (0);
2347 break;
2348
2349 case TGSI_OPCODE_PK2US:
2350 assert (0);
2351 break;
2352
2353 case TGSI_OPCODE_PK4B:
2354 assert (0);
2355 break;
2356
2357 case TGSI_OPCODE_PK4UB:
2358 assert (0);
2359 break;
2360
2361 case TGSI_OPCODE_RFL:
2362 assert (0);
2363 break;
2364
2365 case TGSI_OPCODE_SEQ:
2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2367 FETCH( &r[0], 0, chan_index );
2368 FETCH( &r[1], 1, chan_index );
2369 micro_eq( &r[0], &r[0], &r[1],
2370 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2371 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2372 STORE( &r[0], 0, chan_index );
2373 }
2374 break;
2375
2376 case TGSI_OPCODE_SFL:
2377 assert (0);
2378 break;
2379
2380 case TGSI_OPCODE_SGT:
2381 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2382 FETCH( &r[0], 0, chan_index );
2383 FETCH( &r[1], 1, chan_index );
2384 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2385 STORE( &r[0], 0, chan_index );
2386 }
2387 break;
2388
2389 case TGSI_OPCODE_SIN:
2390 FETCH( &r[0], 0, CHAN_X );
2391 micro_sin( &r[0], &r[0] );
2392 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2393 STORE( &r[0], 0, chan_index );
2394 }
2395 break;
2396
2397 case TGSI_OPCODE_SLE:
2398 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2399 FETCH( &r[0], 0, chan_index );
2400 FETCH( &r[1], 1, chan_index );
2401 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2402 STORE( &r[0], 0, chan_index );
2403 }
2404 break;
2405
2406 case TGSI_OPCODE_SNE:
2407 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2408 FETCH( &r[0], 0, chan_index );
2409 FETCH( &r[1], 1, chan_index );
2410 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2411 STORE( &r[0], 0, chan_index );
2412 }
2413 break;
2414
2415 case TGSI_OPCODE_STR:
2416 assert (0);
2417 break;
2418
2419 case TGSI_OPCODE_TEX:
2420 /* simple texture lookup */
2421 /* src[0] = texcoord */
2422 /* src[1] = sampler unit */
2423 exec_tex(mach, inst, FALSE, FALSE);
2424 break;
2425
2426 case TGSI_OPCODE_TXB:
2427 /* Texture lookup with lod bias */
2428 /* src[0] = texcoord (src[0].w = LOD bias) */
2429 /* src[1] = sampler unit */
2430 exec_tex(mach, inst, TRUE, FALSE);
2431 break;
2432
2433 case TGSI_OPCODE_TXD:
2434 /* Texture lookup with explict partial derivatives */
2435 /* src[0] = texcoord */
2436 /* src[1] = d[strq]/dx */
2437 /* src[2] = d[strq]/dy */
2438 /* src[3] = sampler unit */
2439 assert (0);
2440 break;
2441
2442 case TGSI_OPCODE_TXL:
2443 /* Texture lookup with explit LOD */
2444 /* src[0] = texcoord (src[0].w = LOD) */
2445 /* src[1] = sampler unit */
2446 exec_tex(mach, inst, TRUE, FALSE);
2447 break;
2448
2449 case TGSI_OPCODE_TXP:
2450 /* Texture lookup with projection */
2451 /* src[0] = texcoord (src[0].w = projection) */
2452 /* src[1] = sampler unit */
2453 exec_tex(mach, inst, FALSE, TRUE);
2454 break;
2455
2456 case TGSI_OPCODE_UP2H:
2457 assert (0);
2458 break;
2459
2460 case TGSI_OPCODE_UP2US:
2461 assert (0);
2462 break;
2463
2464 case TGSI_OPCODE_UP4B:
2465 assert (0);
2466 break;
2467
2468 case TGSI_OPCODE_UP4UB:
2469 assert (0);
2470 break;
2471
2472 case TGSI_OPCODE_X2D:
2473 assert (0);
2474 break;
2475
2476 case TGSI_OPCODE_ARA:
2477 assert (0);
2478 break;
2479
2480 case TGSI_OPCODE_BRA:
2481 assert (0);
2482 break;
2483
2484 case TGSI_OPCODE_CAL:
2485 /* skip the call if no execution channels are enabled */
2486 if (mach->ExecMask) {
2487 /* do the call */
2488
2489 /* push the Cond, Loop, Cont stacks */
2490 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2491 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2492 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2493 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2494 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2495 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2496
2497 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2498 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2499
2500 /* note that PC was already incremented above */
2501 mach->CallStack[mach->CallStackTop++] = *pc;
2502 *pc = inst->InstructionExtLabel.Label;
2503 }
2504 break;
2505
2506 case TGSI_OPCODE_RET:
2507 mach->FuncMask &= ~mach->ExecMask;
2508 UPDATE_EXEC_MASK(mach);
2509
2510 if (mach->FuncMask == 0x0) {
2511 /* really return now (otherwise, keep executing */
2512
2513 if (mach->CallStackTop == 0) {
2514 /* returning from main() */
2515 *pc = -1;
2516 return;
2517 }
2518 *pc = mach->CallStack[--mach->CallStackTop];
2519
2520 /* pop the Cond, Loop, Cont stacks */
2521 assert(mach->CondStackTop > 0);
2522 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2523 assert(mach->LoopStackTop > 0);
2524 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2525 assert(mach->ContStackTop > 0);
2526 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2527 assert(mach->FuncStackTop > 0);
2528 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2529
2530 UPDATE_EXEC_MASK(mach);
2531 }
2532 break;
2533
2534 case TGSI_OPCODE_SSG:
2535 /* TGSI_OPCODE_SGN */
2536 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2537 FETCH( &r[0], 0, chan_index );
2538 micro_sgn( &r[0], &r[0] );
2539 STORE( &r[0], 0, chan_index );
2540 }
2541 break;
2542
2543 case TGSI_OPCODE_CMP:
2544 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2545 FETCH(&r[0], 0, chan_index);
2546 FETCH(&r[1], 1, chan_index);
2547 FETCH(&r[2], 2, chan_index);
2548
2549 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2550
2551 STORE(&r[0], 0, chan_index);
2552 }
2553 break;
2554
2555 case TGSI_OPCODE_SCS:
2556 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2557 FETCH( &r[0], 0, CHAN_X );
2558 }
2559 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2560 micro_cos( &r[1], &r[0] );
2561 STORE( &r[1], 0, CHAN_X );
2562 }
2563 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2564 micro_sin( &r[1], &r[0] );
2565 STORE( &r[1], 0, CHAN_Y );
2566 }
2567 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2568 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2569 }
2570 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2571 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2572 }
2573 break;
2574
2575 case TGSI_OPCODE_NRM:
2576 /* 3-component vector normalize */
2577 {
2578 union tgsi_exec_channel tmp, dot;
2579
2580 /* tmp = dp3(src0, src0): */
2581 FETCH( &r[0], 0, CHAN_X );
2582 micro_mul( &tmp, &r[0], &r[0] );
2583
2584 FETCH( &r[1], 0, CHAN_Y );
2585 micro_mul( &dot, &r[1], &r[1] );
2586 micro_add( &tmp, &tmp, &dot );
2587
2588 FETCH( &r[2], 0, CHAN_Z );
2589 micro_mul( &dot, &r[2], &r[2] );
2590 micro_add( &tmp, &tmp, &dot );
2591
2592 /* tmp = 1 / sqrt(tmp) */
2593 micro_sqrt( &tmp, &tmp );
2594 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2595
2596 /* note: w channel is undefined */
2597 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2598 /* chan = chan * tmp */
2599 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2600 STORE( &r[chan_index], 0, chan_index );
2601 }
2602 }
2603 break;
2604
2605 case TGSI_OPCODE_NRM4:
2606 /* 4-component vector normalize */
2607 {
2608 union tgsi_exec_channel tmp, dot;
2609
2610 /* tmp = dp4(src0, src0): */
2611 FETCH( &r[0], 0, CHAN_X );
2612 micro_mul( &tmp, &r[0], &r[0] );
2613
2614 FETCH( &r[1], 0, CHAN_Y );
2615 micro_mul( &dot, &r[1], &r[1] );
2616 micro_add( &tmp, &tmp, &dot );
2617
2618 FETCH( &r[2], 0, CHAN_Z );
2619 micro_mul( &dot, &r[2], &r[2] );
2620 micro_add( &tmp, &tmp, &dot );
2621
2622 FETCH( &r[3], 0, CHAN_W );
2623 micro_mul( &dot, &r[3], &r[3] );
2624 micro_add( &tmp, &tmp, &dot );
2625
2626 /* tmp = 1 / sqrt(tmp) */
2627 micro_sqrt( &tmp, &tmp );
2628 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2629
2630 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2631 /* chan = chan * tmp */
2632 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2633 STORE( &r[chan_index], 0, chan_index );
2634 }
2635 }
2636 break;
2637
2638 case TGSI_OPCODE_DIV:
2639 assert( 0 );
2640 break;
2641
2642 case TGSI_OPCODE_DP2:
2643 FETCH( &r[0], 0, CHAN_X );
2644 FETCH( &r[1], 1, CHAN_X );
2645 micro_mul( &r[0], &r[0], &r[1] );
2646
2647 FETCH( &r[1], 0, CHAN_Y );
2648 FETCH( &r[2], 1, CHAN_Y );
2649 micro_mul( &r[1], &r[1], &r[2] );
2650 micro_add( &r[0], &r[0], &r[1] );
2651
2652 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2653 STORE( &r[0], 0, chan_index );
2654 }
2655 break;
2656
2657 case TGSI_OPCODE_IF:
2658 /* push CondMask */
2659 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2660 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2661 FETCH( &r[0], 0, CHAN_X );
2662 /* update CondMask */
2663 if( ! r[0].u[0] ) {
2664 mach->CondMask &= ~0x1;
2665 }
2666 if( ! r[0].u[1] ) {
2667 mach->CondMask &= ~0x2;
2668 }
2669 if( ! r[0].u[2] ) {
2670 mach->CondMask &= ~0x4;
2671 }
2672 if( ! r[0].u[3] ) {
2673 mach->CondMask &= ~0x8;
2674 }
2675 UPDATE_EXEC_MASK(mach);
2676 /* Todo: If CondMask==0, jump to ELSE */
2677 break;
2678
2679 case TGSI_OPCODE_ELSE:
2680 /* invert CondMask wrt previous mask */
2681 {
2682 uint prevMask;
2683 assert(mach->CondStackTop > 0);
2684 prevMask = mach->CondStack[mach->CondStackTop - 1];
2685 mach->CondMask = ~mach->CondMask & prevMask;
2686 UPDATE_EXEC_MASK(mach);
2687 /* Todo: If CondMask==0, jump to ENDIF */
2688 }
2689 break;
2690
2691 case TGSI_OPCODE_ENDIF:
2692 /* pop CondMask */
2693 assert(mach->CondStackTop > 0);
2694 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2695 UPDATE_EXEC_MASK(mach);
2696 break;
2697
2698 case TGSI_OPCODE_END:
2699 /* halt execution */
2700 *pc = -1;
2701 break;
2702
2703 case TGSI_OPCODE_REP:
2704 assert (0);
2705 break;
2706
2707 case TGSI_OPCODE_ENDREP:
2708 assert (0);
2709 break;
2710
2711 case TGSI_OPCODE_PUSHA:
2712 assert (0);
2713 break;
2714
2715 case TGSI_OPCODE_POPA:
2716 assert (0);
2717 break;
2718
2719 case TGSI_OPCODE_CEIL:
2720 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2721 FETCH( &r[0], 0, chan_index );
2722 micro_ceil( &r[0], &r[0] );
2723 STORE( &r[0], 0, chan_index );
2724 }
2725 break;
2726
2727 case TGSI_OPCODE_I2F:
2728 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2729 FETCH( &r[0], 0, chan_index );
2730 micro_i2f( &r[0], &r[0] );
2731 STORE( &r[0], 0, chan_index );
2732 }
2733 break;
2734
2735 case TGSI_OPCODE_NOT:
2736 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2737 FETCH( &r[0], 0, chan_index );
2738 micro_not( &r[0], &r[0] );
2739 STORE( &r[0], 0, chan_index );
2740 }
2741 break;
2742
2743 case TGSI_OPCODE_TRUNC:
2744 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2745 FETCH( &r[0], 0, chan_index );
2746 micro_trunc( &r[0], &r[0] );
2747 STORE( &r[0], 0, chan_index );
2748 }
2749 break;
2750
2751 case TGSI_OPCODE_SHL:
2752 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2753 FETCH( &r[0], 0, chan_index );
2754 FETCH( &r[1], 1, chan_index );
2755 micro_shl( &r[0], &r[0], &r[1] );
2756 STORE( &r[0], 0, chan_index );
2757 }
2758 break;
2759
2760 case TGSI_OPCODE_SHR:
2761 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2762 FETCH( &r[0], 0, chan_index );
2763 FETCH( &r[1], 1, chan_index );
2764 micro_ishr( &r[0], &r[0], &r[1] );
2765 STORE( &r[0], 0, chan_index );
2766 }
2767 break;
2768
2769 case TGSI_OPCODE_AND:
2770 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2771 FETCH( &r[0], 0, chan_index );
2772 FETCH( &r[1], 1, chan_index );
2773 micro_and( &r[0], &r[0], &r[1] );
2774 STORE( &r[0], 0, chan_index );
2775 }
2776 break;
2777
2778 case TGSI_OPCODE_OR:
2779 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2780 FETCH( &r[0], 0, chan_index );
2781 FETCH( &r[1], 1, chan_index );
2782 micro_or( &r[0], &r[0], &r[1] );
2783 STORE( &r[0], 0, chan_index );
2784 }
2785 break;
2786
2787 case TGSI_OPCODE_MOD:
2788 assert (0);
2789 break;
2790
2791 case TGSI_OPCODE_XOR:
2792 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2793 FETCH( &r[0], 0, chan_index );
2794 FETCH( &r[1], 1, chan_index );
2795 micro_xor( &r[0], &r[0], &r[1] );
2796 STORE( &r[0], 0, chan_index );
2797 }
2798 break;
2799
2800 case TGSI_OPCODE_SAD:
2801 assert (0);
2802 break;
2803
2804 case TGSI_OPCODE_TXF:
2805 assert (0);
2806 break;
2807
2808 case TGSI_OPCODE_TXQ:
2809 assert (0);
2810 break;
2811
2812 case TGSI_OPCODE_EMIT:
2813 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2814 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2815 break;
2816
2817 case TGSI_OPCODE_ENDPRIM:
2818 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2819 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2820 break;
2821
2822 case TGSI_OPCODE_LOOP:
2823 /* fall-through (for now) */
2824 case TGSI_OPCODE_BGNLOOP2:
2825 /* push LoopMask and ContMasks */
2826 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2827 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2828 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2829 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2830 break;
2831
2832 case TGSI_OPCODE_ENDLOOP:
2833 /* fall-through (for now at least) */
2834 case TGSI_OPCODE_ENDLOOP2:
2835 /* Restore ContMask, but don't pop */
2836 assert(mach->ContStackTop > 0);
2837 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2838 UPDATE_EXEC_MASK(mach);
2839 if (mach->ExecMask) {
2840 /* repeat loop: jump to instruction just past BGNLOOP */
2841 *pc = inst->InstructionExtLabel.Label + 1;
2842 }
2843 else {
2844 /* exit loop: pop LoopMask */
2845 assert(mach->LoopStackTop > 0);
2846 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2847 /* pop ContMask */
2848 assert(mach->ContStackTop > 0);
2849 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2850 }
2851 UPDATE_EXEC_MASK(mach);
2852 break;
2853
2854 case TGSI_OPCODE_BRK:
2855 /* turn off loop channels for each enabled exec channel */
2856 mach->LoopMask &= ~mach->ExecMask;
2857 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2858 UPDATE_EXEC_MASK(mach);
2859 break;
2860
2861 case TGSI_OPCODE_CONT:
2862 /* turn off cont channels for each enabled exec channel */
2863 mach->ContMask &= ~mach->ExecMask;
2864 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2865 UPDATE_EXEC_MASK(mach);
2866 break;
2867
2868 case TGSI_OPCODE_BGNSUB:
2869 /* no-op */
2870 break;
2871
2872 case TGSI_OPCODE_ENDSUB:
2873 /* no-op */
2874 break;
2875
2876 case TGSI_OPCODE_NOISE1:
2877 assert( 0 );
2878 break;
2879
2880 case TGSI_OPCODE_NOISE2:
2881 assert( 0 );
2882 break;
2883
2884 case TGSI_OPCODE_NOISE3:
2885 assert( 0 );
2886 break;
2887
2888 case TGSI_OPCODE_NOISE4:
2889 assert( 0 );
2890 break;
2891
2892 case TGSI_OPCODE_NOP:
2893 break;
2894
2895 default:
2896 assert( 0 );
2897 }
2898 }
2899
2900
2901 /**
2902 * Run TGSI interpreter.
2903 * \return bitmask of "alive" quad components
2904 */
2905 uint
2906 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2907 {
2908 uint i;
2909 int pc = 0;
2910
2911 mach->CondMask = 0xf;
2912 mach->LoopMask = 0xf;
2913 mach->ContMask = 0xf;
2914 mach->FuncMask = 0xf;
2915 mach->ExecMask = 0xf;
2916
2917 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2918 assert(mach->CondStackTop == 0);
2919 assert(mach->LoopStackTop == 0);
2920 assert(mach->ContStackTop == 0);
2921 assert(mach->CallStackTop == 0);
2922
2923 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2924 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2925
2926 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2927 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2928 mach->Primitives[0] = 0;
2929 }
2930
2931 for (i = 0; i < QUAD_SIZE; i++) {
2932 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2933 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2934 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2935 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2936 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2937 }
2938
2939 /* execute declarations (interpolants) */
2940 for (i = 0; i < mach->NumDeclarations; i++) {
2941 exec_declaration( mach, mach->Declarations+i );
2942 }
2943
2944 /* execute instructions, until pc is set to -1 */
2945 while (pc != -1) {
2946 assert(pc < (int) mach->NumInstructions);
2947 exec_instruction( mach, mach->Instructions + pc, &pc );
2948 }
2949
2950 #if 0
2951 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2952 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2953 /*
2954 * Scale back depth component.
2955 */
2956 for (i = 0; i < 4; i++)
2957 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2958 }
2959 #endif
2960
2961 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2962 }
2963
2964