Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 #if 0
324 static void
325 micro_iadd(
326 union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src0,
328 const union tgsi_exec_channel *src1 )
329 {
330 dst->i[0] = src0->i[0] + src1->i[0];
331 dst->i[1] = src0->i[1] + src1->i[1];
332 dst->i[2] = src0->i[2] + src1->i[2];
333 dst->i[3] = src0->i[3] + src1->i[3];
334 }
335 #endif
336
337 static void
338 micro_and(
339 union tgsi_exec_channel *dst,
340 const union tgsi_exec_channel *src0,
341 const union tgsi_exec_channel *src1 )
342 {
343 dst->u[0] = src0->u[0] & src1->u[0];
344 dst->u[1] = src0->u[1] & src1->u[1];
345 dst->u[2] = src0->u[2] & src1->u[2];
346 dst->u[3] = src0->u[3] & src1->u[3];
347 }
348
349 static void
350 micro_ceil(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = ceilf( src->f[0] );
355 dst->f[1] = ceilf( src->f[1] );
356 dst->f[2] = ceilf( src->f[2] );
357 dst->f[3] = ceilf( src->f[3] );
358 }
359
360 static void
361 micro_cos(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = cosf( src->f[0] );
366 dst->f[1] = cosf( src->f[1] );
367 dst->f[2] = cosf( src->f[2] );
368 dst->f[3] = cosf( src->f[3] );
369 }
370
371 static void
372 micro_ddx(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] =
377 dst->f[1] =
378 dst->f[2] =
379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380 }
381
382 static void
383 micro_ddy(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src )
386 {
387 dst->f[0] =
388 dst->f[1] =
389 dst->f[2] =
390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391 }
392
393 static void
394 micro_div(
395 union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src0,
397 const union tgsi_exec_channel *src1 )
398 {
399 if (src1->f[0] != 0) {
400 dst->f[0] = src0->f[0] / src1->f[0];
401 }
402 if (src1->f[1] != 0) {
403 dst->f[1] = src0->f[1] / src1->f[1];
404 }
405 if (src1->f[2] != 0) {
406 dst->f[2] = src0->f[2] / src1->f[2];
407 }
408 if (src1->f[3] != 0) {
409 dst->f[3] = src0->f[3] / src1->f[3];
410 }
411 }
412
413 #if 0
414 static void
415 micro_udiv(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1 )
419 {
420 dst->u[0] = src0->u[0] / src1->u[0];
421 dst->u[1] = src0->u[1] / src1->u[1];
422 dst->u[2] = src0->u[2] / src1->u[2];
423 dst->u[3] = src0->u[3] / src1->u[3];
424 }
425 #endif
426
427 static void
428 micro_eq(
429 union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src0,
431 const union tgsi_exec_channel *src1,
432 const union tgsi_exec_channel *src2,
433 const union tgsi_exec_channel *src3 )
434 {
435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439 }
440
441 #if 0
442 static void
443 micro_ieq(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1,
447 const union tgsi_exec_channel *src2,
448 const union tgsi_exec_channel *src3 )
449 {
450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454 }
455 #endif
456
457 static void
458 micro_exp2(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 #if FAST_MATH
463 dst->f[0] = util_fast_exp2( src->f[0] );
464 dst->f[1] = util_fast_exp2( src->f[1] );
465 dst->f[2] = util_fast_exp2( src->f[2] );
466 dst->f[3] = util_fast_exp2( src->f[3] );
467 #else
468 dst->f[0] = powf( 2.0f, src->f[0] );
469 dst->f[1] = powf( 2.0f, src->f[1] );
470 dst->f[2] = powf( 2.0f, src->f[2] );
471 dst->f[3] = powf( 2.0f, src->f[3] );
472 #endif
473 }
474
475 #if 0
476 static void
477 micro_f2ut(
478 union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src )
480 {
481 dst->u[0] = (uint) src->f[0];
482 dst->u[1] = (uint) src->f[1];
483 dst->u[2] = (uint) src->f[2];
484 dst->u[3] = (uint) src->f[3];
485 }
486 #endif
487
488 static void
489 micro_flr(
490 union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src )
492 {
493 dst->f[0] = floorf( src->f[0] );
494 dst->f[1] = floorf( src->f[1] );
495 dst->f[2] = floorf( src->f[2] );
496 dst->f[3] = floorf( src->f[3] );
497 }
498
499 static void
500 micro_frc(
501 union tgsi_exec_channel *dst,
502 const union tgsi_exec_channel *src )
503 {
504 dst->f[0] = src->f[0] - floorf( src->f[0] );
505 dst->f[1] = src->f[1] - floorf( src->f[1] );
506 dst->f[2] = src->f[2] - floorf( src->f[2] );
507 dst->f[3] = src->f[3] - floorf( src->f[3] );
508 }
509
510 static void
511 micro_ge(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src0,
514 const union tgsi_exec_channel *src1,
515 const union tgsi_exec_channel *src2,
516 const union tgsi_exec_channel *src3 )
517 {
518 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
519 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
520 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
521 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
522 }
523
524 static void
525 micro_i2f(
526 union tgsi_exec_channel *dst,
527 const union tgsi_exec_channel *src )
528 {
529 dst->f[0] = (float) src->i[0];
530 dst->f[1] = (float) src->i[1];
531 dst->f[2] = (float) src->i[2];
532 dst->f[3] = (float) src->i[3];
533 }
534
535 static void
536 micro_lg2(
537 union tgsi_exec_channel *dst,
538 const union tgsi_exec_channel *src )
539 {
540 #if FAST_MATH
541 dst->f[0] = util_fast_log2( src->f[0] );
542 dst->f[1] = util_fast_log2( src->f[1] );
543 dst->f[2] = util_fast_log2( src->f[2] );
544 dst->f[3] = util_fast_log2( src->f[3] );
545 #else
546 dst->f[0] = logf( src->f[0] ) * 1.442695f;
547 dst->f[1] = logf( src->f[1] ) * 1.442695f;
548 dst->f[2] = logf( src->f[2] ) * 1.442695f;
549 dst->f[3] = logf( src->f[3] ) * 1.442695f;
550 #endif
551 }
552
553 static void
554 micro_le(
555 union tgsi_exec_channel *dst,
556 const union tgsi_exec_channel *src0,
557 const union tgsi_exec_channel *src1,
558 const union tgsi_exec_channel *src2,
559 const union tgsi_exec_channel *src3 )
560 {
561 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
562 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
563 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
564 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
565 }
566
567 static void
568 micro_lt(
569 union tgsi_exec_channel *dst,
570 const union tgsi_exec_channel *src0,
571 const union tgsi_exec_channel *src1,
572 const union tgsi_exec_channel *src2,
573 const union tgsi_exec_channel *src3 )
574 {
575 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
576 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
577 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
578 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
579 }
580
581 #if 0
582 static void
583 micro_ilt(
584 union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src0,
586 const union tgsi_exec_channel *src1,
587 const union tgsi_exec_channel *src2,
588 const union tgsi_exec_channel *src3 )
589 {
590 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
591 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
592 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
593 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
594 }
595 #endif
596
597 #if 0
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611 #endif
612
613 static void
614 micro_max(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
620 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
621 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
622 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
623 }
624
625 #if 0
626 static void
627 micro_imax(
628 union tgsi_exec_channel *dst,
629 const union tgsi_exec_channel *src0,
630 const union tgsi_exec_channel *src1 )
631 {
632 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
633 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
634 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
635 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
636 }
637 #endif
638
639 #if 0
640 static void
641 micro_umax(
642 union tgsi_exec_channel *dst,
643 const union tgsi_exec_channel *src0,
644 const union tgsi_exec_channel *src1 )
645 {
646 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
647 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
648 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
649 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
650 }
651 #endif
652
653 static void
654 micro_min(
655 union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1 )
658 {
659 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
660 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
661 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
662 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
663 }
664
665 #if 0
666 static void
667 micro_imin(
668 union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1 )
671 {
672 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
673 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
674 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
675 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
676 }
677 #endif
678
679 #if 0
680 static void
681 micro_umin(
682 union tgsi_exec_channel *dst,
683 const union tgsi_exec_channel *src0,
684 const union tgsi_exec_channel *src1 )
685 {
686 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
687 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
688 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
689 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
690 }
691 #endif
692
693 #if 0
694 static void
695 micro_umod(
696 union tgsi_exec_channel *dst,
697 const union tgsi_exec_channel *src0,
698 const union tgsi_exec_channel *src1 )
699 {
700 dst->u[0] = src0->u[0] % src1->u[0];
701 dst->u[1] = src0->u[1] % src1->u[1];
702 dst->u[2] = src0->u[2] % src1->u[2];
703 dst->u[3] = src0->u[3] % src1->u[3];
704 }
705 #endif
706
707 static void
708 micro_mul(
709 union tgsi_exec_channel *dst,
710 const union tgsi_exec_channel *src0,
711 const union tgsi_exec_channel *src1 )
712 {
713 dst->f[0] = src0->f[0] * src1->f[0];
714 dst->f[1] = src0->f[1] * src1->f[1];
715 dst->f[2] = src0->f[2] * src1->f[2];
716 dst->f[3] = src0->f[3] * src1->f[3];
717 }
718
719 #if 0
720 static void
721 micro_imul(
722 union tgsi_exec_channel *dst,
723 const union tgsi_exec_channel *src0,
724 const union tgsi_exec_channel *src1 )
725 {
726 dst->i[0] = src0->i[0] * src1->i[0];
727 dst->i[1] = src0->i[1] * src1->i[1];
728 dst->i[2] = src0->i[2] * src1->i[2];
729 dst->i[3] = src0->i[3] * src1->i[3];
730 }
731 #endif
732
733 #if 0
734 static void
735 micro_imul64(
736 union tgsi_exec_channel *dst0,
737 union tgsi_exec_channel *dst1,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1 )
740 {
741 dst1->i[0] = src0->i[0] * src1->i[0];
742 dst1->i[1] = src0->i[1] * src1->i[1];
743 dst1->i[2] = src0->i[2] * src1->i[2];
744 dst1->i[3] = src0->i[3] * src1->i[3];
745 dst0->i[0] = 0;
746 dst0->i[1] = 0;
747 dst0->i[2] = 0;
748 dst0->i[3] = 0;
749 }
750 #endif
751
752 #if 0
753 static void
754 micro_umul64(
755 union tgsi_exec_channel *dst0,
756 union tgsi_exec_channel *dst1,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1 )
759 {
760 dst1->u[0] = src0->u[0] * src1->u[0];
761 dst1->u[1] = src0->u[1] * src1->u[1];
762 dst1->u[2] = src0->u[2] * src1->u[2];
763 dst1->u[3] = src0->u[3] * src1->u[3];
764 dst0->u[0] = 0;
765 dst0->u[1] = 0;
766 dst0->u[2] = 0;
767 dst0->u[3] = 0;
768 }
769 #endif
770
771
772 #if 0
773 static void
774 micro_movc(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src0,
777 const union tgsi_exec_channel *src1,
778 const union tgsi_exec_channel *src2 )
779 {
780 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
781 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
782 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
783 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
784 }
785 #endif
786
787 static void
788 micro_neg(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src )
791 {
792 dst->f[0] = -src->f[0];
793 dst->f[1] = -src->f[1];
794 dst->f[2] = -src->f[2];
795 dst->f[3] = -src->f[3];
796 }
797
798 #if 0
799 static void
800 micro_ineg(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src )
803 {
804 dst->i[0] = -src->i[0];
805 dst->i[1] = -src->i[1];
806 dst->i[2] = -src->i[2];
807 dst->i[3] = -src->i[3];
808 }
809 #endif
810
811 static void
812 micro_not(
813 union tgsi_exec_channel *dst,
814 const union tgsi_exec_channel *src )
815 {
816 dst->u[0] = ~src->u[0];
817 dst->u[1] = ~src->u[1];
818 dst->u[2] = ~src->u[2];
819 dst->u[3] = ~src->u[3];
820 }
821
822 static void
823 micro_or(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src0,
826 const union tgsi_exec_channel *src1 )
827 {
828 dst->u[0] = src0->u[0] | src1->u[0];
829 dst->u[1] = src0->u[1] | src1->u[1];
830 dst->u[2] = src0->u[2] | src1->u[2];
831 dst->u[3] = src0->u[3] | src1->u[3];
832 }
833
834 static void
835 micro_pow(
836 union tgsi_exec_channel *dst,
837 const union tgsi_exec_channel *src0,
838 const union tgsi_exec_channel *src1 )
839 {
840 #if FAST_MATH
841 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
842 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
843 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
844 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
845 #else
846 dst->f[0] = powf( src0->f[0], src1->f[0] );
847 dst->f[1] = powf( src0->f[1], src1->f[1] );
848 dst->f[2] = powf( src0->f[2], src1->f[2] );
849 dst->f[3] = powf( src0->f[3], src1->f[3] );
850 #endif
851 }
852
853 static void
854 micro_rnd(
855 union tgsi_exec_channel *dst,
856 const union tgsi_exec_channel *src )
857 {
858 dst->f[0] = floorf( src->f[0] + 0.5f );
859 dst->f[1] = floorf( src->f[1] + 0.5f );
860 dst->f[2] = floorf( src->f[2] + 0.5f );
861 dst->f[3] = floorf( src->f[3] + 0.5f );
862 }
863
864 static void
865 micro_sgn(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
870 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
871 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
872 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
873 }
874
875 static void
876 micro_shl(
877 union tgsi_exec_channel *dst,
878 const union tgsi_exec_channel *src0,
879 const union tgsi_exec_channel *src1 )
880 {
881 dst->i[0] = src0->i[0] << src1->i[0];
882 dst->i[1] = src0->i[1] << src1->i[1];
883 dst->i[2] = src0->i[2] << src1->i[2];
884 dst->i[3] = src0->i[3] << src1->i[3];
885 }
886
887 static void
888 micro_ishr(
889 union tgsi_exec_channel *dst,
890 const union tgsi_exec_channel *src0,
891 const union tgsi_exec_channel *src1 )
892 {
893 dst->i[0] = src0->i[0] >> src1->i[0];
894 dst->i[1] = src0->i[1] >> src1->i[1];
895 dst->i[2] = src0->i[2] >> src1->i[2];
896 dst->i[3] = src0->i[3] >> src1->i[3];
897 }
898
899 static void
900 micro_trunc(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0 )
903 {
904 dst->f[0] = (float) (int) src0->f[0];
905 dst->f[1] = (float) (int) src0->f[1];
906 dst->f[2] = (float) (int) src0->f[2];
907 dst->f[3] = (float) (int) src0->f[3];
908 }
909
910 #if 0
911 static void
912 micro_ushr(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src0,
915 const union tgsi_exec_channel *src1 )
916 {
917 dst->u[0] = src0->u[0] >> src1->u[0];
918 dst->u[1] = src0->u[1] >> src1->u[1];
919 dst->u[2] = src0->u[2] >> src1->u[2];
920 dst->u[3] = src0->u[3] >> src1->u[3];
921 }
922 #endif
923
924 static void
925 micro_sin(
926 union tgsi_exec_channel *dst,
927 const union tgsi_exec_channel *src )
928 {
929 dst->f[0] = sinf( src->f[0] );
930 dst->f[1] = sinf( src->f[1] );
931 dst->f[2] = sinf( src->f[2] );
932 dst->f[3] = sinf( src->f[3] );
933 }
934
935 static void
936 micro_sqrt( union tgsi_exec_channel *dst,
937 const union tgsi_exec_channel *src )
938 {
939 dst->f[0] = sqrtf( src->f[0] );
940 dst->f[1] = sqrtf( src->f[1] );
941 dst->f[2] = sqrtf( src->f[2] );
942 dst->f[3] = sqrtf( src->f[3] );
943 }
944
945 static void
946 micro_sub(
947 union tgsi_exec_channel *dst,
948 const union tgsi_exec_channel *src0,
949 const union tgsi_exec_channel *src1 )
950 {
951 dst->f[0] = src0->f[0] - src1->f[0];
952 dst->f[1] = src0->f[1] - src1->f[1];
953 dst->f[2] = src0->f[2] - src1->f[2];
954 dst->f[3] = src0->f[3] - src1->f[3];
955 }
956
957 #if 0
958 static void
959 micro_u2f(
960 union tgsi_exec_channel *dst,
961 const union tgsi_exec_channel *src )
962 {
963 dst->f[0] = (float) src->u[0];
964 dst->f[1] = (float) src->u[1];
965 dst->f[2] = (float) src->u[2];
966 dst->f[3] = (float) src->u[3];
967 }
968 #endif
969
970 static void
971 micro_xor(
972 union tgsi_exec_channel *dst,
973 const union tgsi_exec_channel *src0,
974 const union tgsi_exec_channel *src1 )
975 {
976 dst->u[0] = src0->u[0] ^ src1->u[0];
977 dst->u[1] = src0->u[1] ^ src1->u[1];
978 dst->u[2] = src0->u[2] ^ src1->u[2];
979 dst->u[3] = src0->u[3] ^ src1->u[3];
980 }
981
982 static void
983 fetch_src_file_channel(
984 const struct tgsi_exec_machine *mach,
985 const uint file,
986 const uint swizzle,
987 const union tgsi_exec_channel *index,
988 union tgsi_exec_channel *chan )
989 {
990 switch( swizzle ) {
991 case TGSI_EXTSWIZZLE_X:
992 case TGSI_EXTSWIZZLE_Y:
993 case TGSI_EXTSWIZZLE_Z:
994 case TGSI_EXTSWIZZLE_W:
995 switch( file ) {
996 case TGSI_FILE_CONSTANT:
997 assert(mach->Consts);
998 if (index->i[0] < 0)
999 chan->f[0] = 0.0f;
1000 else
1001 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1002 if (index->i[1] < 0)
1003 chan->f[1] = 0.0f;
1004 else
1005 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1006 if (index->i[2] < 0)
1007 chan->f[2] = 0.0f;
1008 else
1009 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1010 if (index->i[3] < 0)
1011 chan->f[3] = 0.0f;
1012 else
1013 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1014 break;
1015
1016 case TGSI_FILE_INPUT:
1017 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1018 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1019 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1020 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1021 break;
1022
1023 case TGSI_FILE_TEMPORARY:
1024 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1025 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1026 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1027 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1028 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1029 break;
1030
1031 case TGSI_FILE_IMMEDIATE:
1032 assert( index->i[0] < (int) mach->ImmLimit );
1033 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1034 assert( index->i[1] < (int) mach->ImmLimit );
1035 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1036 assert( index->i[2] < (int) mach->ImmLimit );
1037 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1038 assert( index->i[3] < (int) mach->ImmLimit );
1039 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1040 break;
1041
1042 case TGSI_FILE_ADDRESS:
1043 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1044 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1045 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1046 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1047 break;
1048
1049 case TGSI_FILE_OUTPUT:
1050 /* vertex/fragment output vars can be read too */
1051 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1052 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1053 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1054 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1055 break;
1056
1057 default:
1058 assert( 0 );
1059 }
1060 break;
1061
1062 case TGSI_EXTSWIZZLE_ZERO:
1063 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1064 break;
1065
1066 case TGSI_EXTSWIZZLE_ONE:
1067 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1068 break;
1069
1070 default:
1071 assert( 0 );
1072 }
1073 }
1074
1075 static void
1076 fetch_source(
1077 const struct tgsi_exec_machine *mach,
1078 union tgsi_exec_channel *chan,
1079 const struct tgsi_full_src_register *reg,
1080 const uint chan_index )
1081 {
1082 union tgsi_exec_channel index;
1083 uint swizzle;
1084
1085 /* We start with a direct index into a register file.
1086 *
1087 * file[1],
1088 * where:
1089 * file = SrcRegister.File
1090 * [1] = SrcRegister.Index
1091 */
1092 index.i[0] =
1093 index.i[1] =
1094 index.i[2] =
1095 index.i[3] = reg->SrcRegister.Index;
1096
1097 /* There is an extra source register that indirectly subscripts
1098 * a register file. The direct index now becomes an offset
1099 * that is being added to the indirect register.
1100 *
1101 * file[ind[2].x+1],
1102 * where:
1103 * ind = SrcRegisterInd.File
1104 * [2] = SrcRegisterInd.Index
1105 * .x = SrcRegisterInd.SwizzleX
1106 */
1107 if (reg->SrcRegister.Indirect) {
1108 union tgsi_exec_channel index2;
1109 union tgsi_exec_channel indir_index;
1110 const uint execmask = mach->ExecMask;
1111 uint i;
1112
1113 /* which address register (always zero now) */
1114 index2.i[0] =
1115 index2.i[1] =
1116 index2.i[2] =
1117 index2.i[3] = reg->SrcRegisterInd.Index;
1118
1119 /* get current value of address register[swizzle] */
1120 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1121 fetch_src_file_channel(
1122 mach,
1123 reg->SrcRegisterInd.File,
1124 swizzle,
1125 &index2,
1126 &indir_index );
1127
1128 /* add value of address register to the offset */
1129 index.i[0] += (int) indir_index.f[0];
1130 index.i[1] += (int) indir_index.f[1];
1131 index.i[2] += (int) indir_index.f[2];
1132 index.i[3] += (int) indir_index.f[3];
1133
1134 /* for disabled execution channels, zero-out the index to
1135 * avoid using a potential garbage value.
1136 */
1137 for (i = 0; i < QUAD_SIZE; i++) {
1138 if ((execmask & (1 << i)) == 0)
1139 index.i[i] = 0;
1140 }
1141 }
1142
1143 /* There is an extra source register that is a second
1144 * subscript to a register file. Effectively it means that
1145 * the register file is actually a 2D array of registers.
1146 *
1147 * file[1][3] == file[1*sizeof(file[1])+3],
1148 * where:
1149 * [3] = SrcRegisterDim.Index
1150 */
1151 if (reg->SrcRegister.Dimension) {
1152 /* The size of the first-order array depends on the register file type.
1153 * We need to multiply the index to the first array to get an effective,
1154 * "flat" index that points to the beginning of the second-order array.
1155 */
1156 switch (reg->SrcRegister.File) {
1157 case TGSI_FILE_INPUT:
1158 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1159 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1160 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1161 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1162 break;
1163 case TGSI_FILE_CONSTANT:
1164 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1165 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1166 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1167 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1168 break;
1169 default:
1170 assert( 0 );
1171 }
1172
1173 index.i[0] += reg->SrcRegisterDim.Index;
1174 index.i[1] += reg->SrcRegisterDim.Index;
1175 index.i[2] += reg->SrcRegisterDim.Index;
1176 index.i[3] += reg->SrcRegisterDim.Index;
1177
1178 /* Again, the second subscript index can be addressed indirectly
1179 * identically to the first one.
1180 * Nothing stops us from indirectly addressing the indirect register,
1181 * but there is no need for that, so we won't exercise it.
1182 *
1183 * file[1][ind[4].y+3],
1184 * where:
1185 * ind = SrcRegisterDimInd.File
1186 * [4] = SrcRegisterDimInd.Index
1187 * .y = SrcRegisterDimInd.SwizzleX
1188 */
1189 if (reg->SrcRegisterDim.Indirect) {
1190 union tgsi_exec_channel index2;
1191 union tgsi_exec_channel indir_index;
1192 const uint execmask = mach->ExecMask;
1193 uint i;
1194
1195 index2.i[0] =
1196 index2.i[1] =
1197 index2.i[2] =
1198 index2.i[3] = reg->SrcRegisterDimInd.Index;
1199
1200 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1201 fetch_src_file_channel(
1202 mach,
1203 reg->SrcRegisterDimInd.File,
1204 swizzle,
1205 &index2,
1206 &indir_index );
1207
1208 index.i[0] += (int) indir_index.f[0];
1209 index.i[1] += (int) indir_index.f[1];
1210 index.i[2] += (int) indir_index.f[2];
1211 index.i[3] += (int) indir_index.f[3];
1212
1213 /* for disabled execution channels, zero-out the index to
1214 * avoid using a potential garbage value.
1215 */
1216 for (i = 0; i < QUAD_SIZE; i++) {
1217 if ((execmask & (1 << i)) == 0)
1218 index.i[i] = 0;
1219 }
1220 }
1221
1222 /* If by any chance there was a need for a 3D array of register
1223 * files, we would have to check whether SrcRegisterDim is followed
1224 * by a dimension register and continue the saga.
1225 */
1226 }
1227
1228 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1229 fetch_src_file_channel(
1230 mach,
1231 reg->SrcRegister.File,
1232 swizzle,
1233 &index,
1234 chan );
1235
1236 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1237 case TGSI_UTIL_SIGN_CLEAR:
1238 micro_abs( chan, chan );
1239 break;
1240
1241 case TGSI_UTIL_SIGN_SET:
1242 micro_abs( chan, chan );
1243 micro_neg( chan, chan );
1244 break;
1245
1246 case TGSI_UTIL_SIGN_TOGGLE:
1247 micro_neg( chan, chan );
1248 break;
1249
1250 case TGSI_UTIL_SIGN_KEEP:
1251 break;
1252 }
1253
1254 if (reg->SrcRegisterExtMod.Complement) {
1255 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1256 }
1257 }
1258
1259 static void
1260 store_dest(
1261 struct tgsi_exec_machine *mach,
1262 const union tgsi_exec_channel *chan,
1263 const struct tgsi_full_dst_register *reg,
1264 const struct tgsi_full_instruction *inst,
1265 uint chan_index )
1266 {
1267 uint i;
1268 union tgsi_exec_channel null;
1269 union tgsi_exec_channel *dst;
1270 uint execmask = mach->ExecMask;
1271
1272 switch (reg->DstRegister.File) {
1273 case TGSI_FILE_NULL:
1274 dst = &null;
1275 break;
1276
1277 case TGSI_FILE_OUTPUT:
1278 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1279 + reg->DstRegister.Index].xyzw[chan_index];
1280 break;
1281
1282 case TGSI_FILE_TEMPORARY:
1283 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1284 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1285 break;
1286
1287 case TGSI_FILE_ADDRESS:
1288 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1289 break;
1290
1291 default:
1292 assert( 0 );
1293 return;
1294 }
1295
1296 if (inst->InstructionExtNv.CondFlowEnable) {
1297 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1298 uint swizzle;
1299 uint shift;
1300 uint mask;
1301 uint test;
1302
1303 /* Only CC0 supported.
1304 */
1305 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1306
1307 switch (chan_index) {
1308 case CHAN_X:
1309 swizzle = inst->InstructionExtNv.CondSwizzleX;
1310 break;
1311 case CHAN_Y:
1312 swizzle = inst->InstructionExtNv.CondSwizzleY;
1313 break;
1314 case CHAN_Z:
1315 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1316 break;
1317 case CHAN_W:
1318 swizzle = inst->InstructionExtNv.CondSwizzleW;
1319 break;
1320 default:
1321 assert( 0 );
1322 return;
1323 }
1324
1325 switch (swizzle) {
1326 case TGSI_SWIZZLE_X:
1327 shift = TGSI_EXEC_CC_X_SHIFT;
1328 mask = TGSI_EXEC_CC_X_MASK;
1329 break;
1330 case TGSI_SWIZZLE_Y:
1331 shift = TGSI_EXEC_CC_Y_SHIFT;
1332 mask = TGSI_EXEC_CC_Y_MASK;
1333 break;
1334 case TGSI_SWIZZLE_Z:
1335 shift = TGSI_EXEC_CC_Z_SHIFT;
1336 mask = TGSI_EXEC_CC_Z_MASK;
1337 break;
1338 case TGSI_SWIZZLE_W:
1339 shift = TGSI_EXEC_CC_W_SHIFT;
1340 mask = TGSI_EXEC_CC_W_MASK;
1341 break;
1342 default:
1343 assert( 0 );
1344 return;
1345 }
1346
1347 switch (inst->InstructionExtNv.CondMask) {
1348 case TGSI_CC_GT:
1349 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1350 for (i = 0; i < QUAD_SIZE; i++)
1351 if (cc->u[i] & test)
1352 execmask &= ~(1 << i);
1353 break;
1354
1355 case TGSI_CC_EQ:
1356 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1357 for (i = 0; i < QUAD_SIZE; i++)
1358 if (cc->u[i] & test)
1359 execmask &= ~(1 << i);
1360 break;
1361
1362 case TGSI_CC_LT:
1363 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1364 for (i = 0; i < QUAD_SIZE; i++)
1365 if (cc->u[i] & test)
1366 execmask &= ~(1 << i);
1367 break;
1368
1369 case TGSI_CC_GE:
1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1371 for (i = 0; i < QUAD_SIZE; i++)
1372 if (cc->u[i] & test)
1373 execmask &= ~(1 << i);
1374 break;
1375
1376 case TGSI_CC_LE:
1377 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1378 for (i = 0; i < QUAD_SIZE; i++)
1379 if (cc->u[i] & test)
1380 execmask &= ~(1 << i);
1381 break;
1382
1383 case TGSI_CC_NE:
1384 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1385 for (i = 0; i < QUAD_SIZE; i++)
1386 if (cc->u[i] & test)
1387 execmask &= ~(1 << i);
1388 break;
1389
1390 case TGSI_CC_TR:
1391 break;
1392
1393 case TGSI_CC_FL:
1394 for (i = 0; i < QUAD_SIZE; i++)
1395 execmask &= ~(1 << i);
1396 break;
1397
1398 default:
1399 assert( 0 );
1400 return;
1401 }
1402 }
1403
1404 switch (inst->Instruction.Saturate) {
1405 case TGSI_SAT_NONE:
1406 for (i = 0; i < QUAD_SIZE; i++)
1407 if (execmask & (1 << i))
1408 dst->i[i] = chan->i[i];
1409 break;
1410
1411 case TGSI_SAT_ZERO_ONE:
1412 for (i = 0; i < QUAD_SIZE; i++)
1413 if (execmask & (1 << i)) {
1414 if (chan->f[i] < 0.0f)
1415 dst->f[i] = 0.0f;
1416 else if (chan->f[i] > 1.0f)
1417 dst->f[i] = 1.0f;
1418 else
1419 dst->i[i] = chan->i[i];
1420 }
1421 break;
1422
1423 case TGSI_SAT_MINUS_PLUS_ONE:
1424 for (i = 0; i < QUAD_SIZE; i++)
1425 if (execmask & (1 << i)) {
1426 if (chan->f[i] < -1.0f)
1427 dst->f[i] = -1.0f;
1428 else if (chan->f[i] > 1.0f)
1429 dst->f[i] = 1.0f;
1430 else
1431 dst->i[i] = chan->i[i];
1432 }
1433 break;
1434
1435 default:
1436 assert( 0 );
1437 }
1438
1439 if (inst->InstructionExtNv.CondDstUpdate) {
1440 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1441 uint shift;
1442 uint mask;
1443
1444 /* Only CC0 supported.
1445 */
1446 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1447
1448 switch (chan_index) {
1449 case CHAN_X:
1450 shift = TGSI_EXEC_CC_X_SHIFT;
1451 mask = ~TGSI_EXEC_CC_X_MASK;
1452 break;
1453 case CHAN_Y:
1454 shift = TGSI_EXEC_CC_Y_SHIFT;
1455 mask = ~TGSI_EXEC_CC_Y_MASK;
1456 break;
1457 case CHAN_Z:
1458 shift = TGSI_EXEC_CC_Z_SHIFT;
1459 mask = ~TGSI_EXEC_CC_Z_MASK;
1460 break;
1461 case CHAN_W:
1462 shift = TGSI_EXEC_CC_W_SHIFT;
1463 mask = ~TGSI_EXEC_CC_W_MASK;
1464 break;
1465 default:
1466 assert( 0 );
1467 return;
1468 }
1469
1470 for (i = 0; i < QUAD_SIZE; i++)
1471 if (execmask & (1 << i)) {
1472 cc->u[i] &= mask;
1473 if (dst->f[i] < 0.0f)
1474 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1475 else if (dst->f[i] > 0.0f)
1476 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1477 else if (dst->f[i] == 0.0f)
1478 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1479 else
1480 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1481 }
1482 }
1483 }
1484
1485 #define FETCH(VAL,INDEX,CHAN)\
1486 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1487
1488 #define STORE(VAL,INDEX,CHAN)\
1489 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1490
1491
1492 /**
1493 * Execute ARB-style KIL which is predicated by a src register.
1494 * Kill fragment if any of the four values is less than zero.
1495 */
1496 static void
1497 exec_kil(struct tgsi_exec_machine *mach,
1498 const struct tgsi_full_instruction *inst)
1499 {
1500 uint uniquemask;
1501 uint chan_index;
1502 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1503 union tgsi_exec_channel r[1];
1504
1505 /* This mask stores component bits that were already tested. Note that
1506 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1507 * tested. */
1508 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1509
1510 for (chan_index = 0; chan_index < 4; chan_index++)
1511 {
1512 uint swizzle;
1513 uint i;
1514
1515 /* unswizzle channel */
1516 swizzle = tgsi_util_get_full_src_register_extswizzle (
1517 &inst->FullSrcRegisters[0],
1518 chan_index);
1519
1520 /* check if the component has not been already tested */
1521 if (uniquemask & (1 << swizzle))
1522 continue;
1523 uniquemask |= 1 << swizzle;
1524
1525 FETCH(&r[0], 0, chan_index);
1526 for (i = 0; i < 4; i++)
1527 if (r[0].f[i] < 0.0f)
1528 kilmask |= 1 << i;
1529 }
1530
1531 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1532 }
1533
1534 /**
1535 * Execute NVIDIA-style KIL which is predicated by a condition code.
1536 * Kill fragment if the condition code is TRUE.
1537 */
1538 static void
1539 exec_kilp(struct tgsi_exec_machine *mach,
1540 const struct tgsi_full_instruction *inst)
1541 {
1542 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1543
1544 if (inst->InstructionExtNv.CondFlowEnable) {
1545 uint swizzle[4];
1546 uint chan_index;
1547
1548 kilmask = 0x0;
1549
1550 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1551 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1552 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1553 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1554
1555 for (chan_index = 0; chan_index < 4; chan_index++)
1556 {
1557 uint i;
1558
1559 for (i = 0; i < 4; i++) {
1560 /* TODO: evaluate the condition code */
1561 if (0)
1562 kilmask |= 1 << i;
1563 }
1564 }
1565 }
1566 else {
1567 /* "unconditional" kil */
1568 kilmask = mach->ExecMask;
1569 }
1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1571 }
1572
1573
1574 /*
1575 * Fetch a four texture samples using STR texture coordinates.
1576 */
1577 static void
1578 fetch_texel( struct tgsi_sampler *sampler,
1579 const union tgsi_exec_channel *s,
1580 const union tgsi_exec_channel *t,
1581 const union tgsi_exec_channel *p,
1582 float lodbias, /* XXX should be float[4] */
1583 union tgsi_exec_channel *r,
1584 union tgsi_exec_channel *g,
1585 union tgsi_exec_channel *b,
1586 union tgsi_exec_channel *a )
1587 {
1588 uint j;
1589 float rgba[NUM_CHANNELS][QUAD_SIZE];
1590
1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1592
1593 for (j = 0; j < 4; j++) {
1594 r->f[j] = rgba[0][j];
1595 g->f[j] = rgba[1][j];
1596 b->f[j] = rgba[2][j];
1597 a->f[j] = rgba[3][j];
1598 }
1599 }
1600
1601
1602 static void
1603 exec_tex(struct tgsi_exec_machine *mach,
1604 const struct tgsi_full_instruction *inst,
1605 boolean biasLod,
1606 boolean projected)
1607 {
1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1609 union tgsi_exec_channel r[4];
1610 uint chan_index;
1611 float lodBias;
1612
1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1614
1615 switch (inst->InstructionExtTexture.Texture) {
1616 case TGSI_TEXTURE_1D:
1617
1618 FETCH(&r[0], 0, CHAN_X);
1619
1620 if (projected) {
1621 FETCH(&r[1], 0, CHAN_W);
1622 micro_div( &r[0], &r[0], &r[1] );
1623 }
1624
1625 if (biasLod) {
1626 FETCH(&r[1], 0, CHAN_W);
1627 lodBias = r[2].f[0];
1628 }
1629 else
1630 lodBias = 0.0;
1631
1632 fetch_texel(mach->Samplers[unit],
1633 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1634 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1635 break;
1636
1637 case TGSI_TEXTURE_2D:
1638 case TGSI_TEXTURE_RECT:
1639
1640 FETCH(&r[0], 0, CHAN_X);
1641 FETCH(&r[1], 0, CHAN_Y);
1642 FETCH(&r[2], 0, CHAN_Z);
1643
1644 if (projected) {
1645 FETCH(&r[3], 0, CHAN_W);
1646 micro_div( &r[0], &r[0], &r[3] );
1647 micro_div( &r[1], &r[1], &r[3] );
1648 micro_div( &r[2], &r[2], &r[3] );
1649 }
1650
1651 if (biasLod) {
1652 FETCH(&r[3], 0, CHAN_W);
1653 lodBias = r[3].f[0];
1654 }
1655 else
1656 lodBias = 0.0;
1657
1658 fetch_texel(mach->Samplers[unit],
1659 &r[0], &r[1], &r[2], lodBias, /* inputs */
1660 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1661 break;
1662
1663 case TGSI_TEXTURE_3D:
1664 case TGSI_TEXTURE_CUBE:
1665
1666 FETCH(&r[0], 0, CHAN_X);
1667 FETCH(&r[1], 0, CHAN_Y);
1668 FETCH(&r[2], 0, CHAN_Z);
1669
1670 if (projected) {
1671 FETCH(&r[3], 0, CHAN_W);
1672 micro_div( &r[0], &r[0], &r[3] );
1673 micro_div( &r[1], &r[1], &r[3] );
1674 micro_div( &r[2], &r[2], &r[3] );
1675 }
1676
1677 if (biasLod) {
1678 FETCH(&r[3], 0, CHAN_W);
1679 lodBias = r[3].f[0];
1680 }
1681 else
1682 lodBias = 0.0;
1683
1684 fetch_texel(mach->Samplers[unit],
1685 &r[0], &r[1], &r[2], lodBias,
1686 &r[0], &r[1], &r[2], &r[3]);
1687 break;
1688
1689 default:
1690 assert (0);
1691 }
1692
1693 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1694 STORE( &r[chan_index], 0, chan_index );
1695 }
1696 }
1697
1698
1699 /**
1700 * Evaluate a constant-valued coefficient at the position of the
1701 * current quad.
1702 */
1703 static void
1704 eval_constant_coef(
1705 struct tgsi_exec_machine *mach,
1706 unsigned attrib,
1707 unsigned chan )
1708 {
1709 unsigned i;
1710
1711 for( i = 0; i < QUAD_SIZE; i++ ) {
1712 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1713 }
1714 }
1715
1716 /**
1717 * Evaluate a linear-valued coefficient at the position of the
1718 * current quad.
1719 */
1720 static void
1721 eval_linear_coef(
1722 struct tgsi_exec_machine *mach,
1723 unsigned attrib,
1724 unsigned chan )
1725 {
1726 const float x = mach->QuadPos.xyzw[0].f[0];
1727 const float y = mach->QuadPos.xyzw[1].f[0];
1728 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1729 const float dady = mach->InterpCoefs[attrib].dady[chan];
1730 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1731 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1732 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1733 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1734 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1735 }
1736
1737 /**
1738 * Evaluate a perspective-valued coefficient at the position of the
1739 * current quad.
1740 */
1741 static void
1742 eval_perspective_coef(
1743 struct tgsi_exec_machine *mach,
1744 unsigned attrib,
1745 unsigned chan )
1746 {
1747 const float x = mach->QuadPos.xyzw[0].f[0];
1748 const float y = mach->QuadPos.xyzw[1].f[0];
1749 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1750 const float dady = mach->InterpCoefs[attrib].dady[chan];
1751 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1752 const float *w = mach->QuadPos.xyzw[3].f;
1753 /* divide by W here */
1754 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1755 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1756 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1757 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1758 }
1759
1760
1761 typedef void (* eval_coef_func)(
1762 struct tgsi_exec_machine *mach,
1763 unsigned attrib,
1764 unsigned chan );
1765
1766 static void
1767 exec_declaration(
1768 struct tgsi_exec_machine *mach,
1769 const struct tgsi_full_declaration *decl )
1770 {
1771 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1772 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1773 unsigned first, last, mask;
1774 eval_coef_func eval;
1775
1776 first = decl->DeclarationRange.First;
1777 last = decl->DeclarationRange.Last;
1778 mask = decl->Declaration.UsageMask;
1779
1780 switch( decl->Declaration.Interpolate ) {
1781 case TGSI_INTERPOLATE_CONSTANT:
1782 eval = eval_constant_coef;
1783 break;
1784
1785 case TGSI_INTERPOLATE_LINEAR:
1786 eval = eval_linear_coef;
1787 break;
1788
1789 case TGSI_INTERPOLATE_PERSPECTIVE:
1790 eval = eval_perspective_coef;
1791 break;
1792
1793 default:
1794 eval = NULL;
1795 assert( 0 );
1796 }
1797
1798 if( mask == TGSI_WRITEMASK_XYZW ) {
1799 unsigned i, j;
1800
1801 for( i = first; i <= last; i++ ) {
1802 for( j = 0; j < NUM_CHANNELS; j++ ) {
1803 eval( mach, i, j );
1804 }
1805 }
1806 }
1807 else {
1808 unsigned i, j;
1809
1810 for( j = 0; j < NUM_CHANNELS; j++ ) {
1811 if( mask & (1 << j) ) {
1812 for( i = first; i <= last; i++ ) {
1813 eval( mach, i, j );
1814 }
1815 }
1816 }
1817 }
1818 }
1819 }
1820 }
1821
1822 static void
1823 exec_instruction(
1824 struct tgsi_exec_machine *mach,
1825 const struct tgsi_full_instruction *inst,
1826 int *pc )
1827 {
1828 uint chan_index;
1829 union tgsi_exec_channel r[8];
1830
1831 (*pc)++;
1832
1833 switch (inst->Instruction.Opcode) {
1834 case TGSI_OPCODE_ARL:
1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1836 FETCH( &r[0], 0, chan_index );
1837 micro_flr( &r[0], &r[0] );
1838 STORE( &r[0], 0, chan_index );
1839 }
1840 break;
1841
1842 case TGSI_OPCODE_MOV:
1843 case TGSI_OPCODE_SWZ:
1844 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1845 FETCH( &r[0], 0, chan_index );
1846 STORE( &r[0], 0, chan_index );
1847 }
1848 break;
1849
1850 case TGSI_OPCODE_LIT:
1851 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1852 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1853 }
1854
1855 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1856 FETCH( &r[0], 0, CHAN_X );
1857 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1858 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1859 STORE( &r[0], 0, CHAN_Y );
1860 }
1861
1862 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1863 FETCH( &r[1], 0, CHAN_Y );
1864 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1865
1866 FETCH( &r[2], 0, CHAN_W );
1867 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1868 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1869 micro_pow( &r[1], &r[1], &r[2] );
1870 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1871 STORE( &r[0], 0, CHAN_Z );
1872 }
1873 }
1874
1875 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1876 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1877 }
1878 break;
1879
1880 case TGSI_OPCODE_RCP:
1881 /* TGSI_OPCODE_RECIP */
1882 FETCH( &r[0], 0, CHAN_X );
1883 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1884 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1885 STORE( &r[0], 0, chan_index );
1886 }
1887 break;
1888
1889 case TGSI_OPCODE_RSQ:
1890 /* TGSI_OPCODE_RECIPSQRT */
1891 FETCH( &r[0], 0, CHAN_X );
1892 micro_sqrt( &r[0], &r[0] );
1893 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1894 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1895 STORE( &r[0], 0, chan_index );
1896 }
1897 break;
1898
1899 case TGSI_OPCODE_EXP:
1900 FETCH( &r[0], 0, CHAN_X );
1901 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1902 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1903 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1904 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1905 }
1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1907 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1908 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1909 }
1910 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1911 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1912 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1913 }
1914 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1915 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1916 }
1917 break;
1918
1919 case TGSI_OPCODE_LOG:
1920 FETCH( &r[0], 0, CHAN_X );
1921 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1922 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1923 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1925 STORE( &r[0], 0, CHAN_X );
1926 }
1927 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1928 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1929 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1930 STORE( &r[0], 0, CHAN_Y );
1931 }
1932 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1933 STORE( &r[1], 0, CHAN_Z );
1934 }
1935 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1936 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1937 }
1938 break;
1939
1940 case TGSI_OPCODE_MUL:
1941 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1942 {
1943 FETCH(&r[0], 0, chan_index);
1944 FETCH(&r[1], 1, chan_index);
1945
1946 micro_mul( &r[0], &r[0], &r[1] );
1947
1948 STORE(&r[0], 0, chan_index);
1949 }
1950 break;
1951
1952 case TGSI_OPCODE_ADD:
1953 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1954 FETCH( &r[0], 0, chan_index );
1955 FETCH( &r[1], 1, chan_index );
1956 micro_add( &r[0], &r[0], &r[1] );
1957 STORE( &r[0], 0, chan_index );
1958 }
1959 break;
1960
1961 case TGSI_OPCODE_DP3:
1962 /* TGSI_OPCODE_DOT3 */
1963 FETCH( &r[0], 0, CHAN_X );
1964 FETCH( &r[1], 1, CHAN_X );
1965 micro_mul( &r[0], &r[0], &r[1] );
1966
1967 FETCH( &r[1], 0, CHAN_Y );
1968 FETCH( &r[2], 1, CHAN_Y );
1969 micro_mul( &r[1], &r[1], &r[2] );
1970 micro_add( &r[0], &r[0], &r[1] );
1971
1972 FETCH( &r[1], 0, CHAN_Z );
1973 FETCH( &r[2], 1, CHAN_Z );
1974 micro_mul( &r[1], &r[1], &r[2] );
1975 micro_add( &r[0], &r[0], &r[1] );
1976
1977 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1978 STORE( &r[0], 0, chan_index );
1979 }
1980 break;
1981
1982 case TGSI_OPCODE_DP4:
1983 /* TGSI_OPCODE_DOT4 */
1984 FETCH(&r[0], 0, CHAN_X);
1985 FETCH(&r[1], 1, CHAN_X);
1986
1987 micro_mul( &r[0], &r[0], &r[1] );
1988
1989 FETCH(&r[1], 0, CHAN_Y);
1990 FETCH(&r[2], 1, CHAN_Y);
1991
1992 micro_mul( &r[1], &r[1], &r[2] );
1993 micro_add( &r[0], &r[0], &r[1] );
1994
1995 FETCH(&r[1], 0, CHAN_Z);
1996 FETCH(&r[2], 1, CHAN_Z);
1997
1998 micro_mul( &r[1], &r[1], &r[2] );
1999 micro_add( &r[0], &r[0], &r[1] );
2000
2001 FETCH(&r[1], 0, CHAN_W);
2002 FETCH(&r[2], 1, CHAN_W);
2003
2004 micro_mul( &r[1], &r[1], &r[2] );
2005 micro_add( &r[0], &r[0], &r[1] );
2006
2007 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2008 STORE( &r[0], 0, chan_index );
2009 }
2010 break;
2011
2012 case TGSI_OPCODE_DST:
2013 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2014 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2015 }
2016
2017 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2018 FETCH( &r[0], 0, CHAN_Y );
2019 FETCH( &r[1], 1, CHAN_Y);
2020 micro_mul( &r[0], &r[0], &r[1] );
2021 STORE( &r[0], 0, CHAN_Y );
2022 }
2023
2024 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2025 FETCH( &r[0], 0, CHAN_Z );
2026 STORE( &r[0], 0, CHAN_Z );
2027 }
2028
2029 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2030 FETCH( &r[0], 1, CHAN_W );
2031 STORE( &r[0], 0, CHAN_W );
2032 }
2033 break;
2034
2035 case TGSI_OPCODE_MIN:
2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2037 FETCH(&r[0], 0, chan_index);
2038 FETCH(&r[1], 1, chan_index);
2039
2040 /* XXX use micro_min()?? */
2041 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2042
2043 STORE(&r[0], 0, chan_index);
2044 }
2045 break;
2046
2047 case TGSI_OPCODE_MAX:
2048 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2049 FETCH(&r[0], 0, chan_index);
2050 FETCH(&r[1], 1, chan_index);
2051
2052 /* XXX use micro_max()?? */
2053 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2054
2055 STORE(&r[0], 0, chan_index );
2056 }
2057 break;
2058
2059 case TGSI_OPCODE_SLT:
2060 /* TGSI_OPCODE_SETLT */
2061 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2062 FETCH( &r[0], 0, chan_index );
2063 FETCH( &r[1], 1, chan_index );
2064 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2065 STORE( &r[0], 0, chan_index );
2066 }
2067 break;
2068
2069 case TGSI_OPCODE_SGE:
2070 /* TGSI_OPCODE_SETGE */
2071 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2072 FETCH( &r[0], 0, chan_index );
2073 FETCH( &r[1], 1, chan_index );
2074 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2075 STORE( &r[0], 0, chan_index );
2076 }
2077 break;
2078
2079 case TGSI_OPCODE_MAD:
2080 /* TGSI_OPCODE_MADD */
2081 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2082 FETCH( &r[0], 0, chan_index );
2083 FETCH( &r[1], 1, chan_index );
2084 micro_mul( &r[0], &r[0], &r[1] );
2085 FETCH( &r[1], 2, chan_index );
2086 micro_add( &r[0], &r[0], &r[1] );
2087 STORE( &r[0], 0, chan_index );
2088 }
2089 break;
2090
2091 case TGSI_OPCODE_SUB:
2092 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2093 FETCH(&r[0], 0, chan_index);
2094 FETCH(&r[1], 1, chan_index);
2095
2096 micro_sub( &r[0], &r[0], &r[1] );
2097
2098 STORE(&r[0], 0, chan_index);
2099 }
2100 break;
2101
2102 case TGSI_OPCODE_LERP:
2103 /* TGSI_OPCODE_LRP */
2104 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2105 FETCH(&r[0], 0, chan_index);
2106 FETCH(&r[1], 1, chan_index);
2107 FETCH(&r[2], 2, chan_index);
2108
2109 micro_sub( &r[1], &r[1], &r[2] );
2110 micro_mul( &r[0], &r[0], &r[1] );
2111 micro_add( &r[0], &r[0], &r[2] );
2112
2113 STORE(&r[0], 0, chan_index);
2114 }
2115 break;
2116
2117 case TGSI_OPCODE_CND:
2118 assert (0);
2119 break;
2120
2121 case TGSI_OPCODE_CND0:
2122 assert (0);
2123 break;
2124
2125 case TGSI_OPCODE_DOT2ADD:
2126 /* TGSI_OPCODE_DP2A */
2127 FETCH( &r[0], 0, CHAN_X );
2128 FETCH( &r[1], 1, CHAN_X );
2129 micro_mul( &r[0], &r[0], &r[1] );
2130
2131 FETCH( &r[1], 0, CHAN_Y );
2132 FETCH( &r[2], 1, CHAN_Y );
2133 micro_mul( &r[1], &r[1], &r[2] );
2134 micro_add( &r[0], &r[0], &r[1] );
2135
2136 FETCH( &r[2], 2, CHAN_X );
2137 micro_add( &r[0], &r[0], &r[2] );
2138
2139 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2140 STORE( &r[0], 0, chan_index );
2141 }
2142 break;
2143
2144 case TGSI_OPCODE_INDEX:
2145 assert (0);
2146 break;
2147
2148 case TGSI_OPCODE_NEGATE:
2149 assert (0);
2150 break;
2151
2152 case TGSI_OPCODE_FRAC:
2153 /* TGSI_OPCODE_FRC */
2154 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2155 FETCH( &r[0], 0, chan_index );
2156 micro_frc( &r[0], &r[0] );
2157 STORE( &r[0], 0, chan_index );
2158 }
2159 break;
2160
2161 case TGSI_OPCODE_CLAMP:
2162 assert (0);
2163 break;
2164
2165 case TGSI_OPCODE_FLOOR:
2166 /* TGSI_OPCODE_FLR */
2167 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2168 FETCH( &r[0], 0, chan_index );
2169 micro_flr( &r[0], &r[0] );
2170 STORE( &r[0], 0, chan_index );
2171 }
2172 break;
2173
2174 case TGSI_OPCODE_ROUND:
2175 case TGSI_OPCODE_ARR:
2176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2177 FETCH( &r[0], 0, chan_index );
2178 micro_rnd( &r[0], &r[0] );
2179 STORE( &r[0], 0, chan_index );
2180 }
2181 break;
2182
2183 case TGSI_OPCODE_EXPBASE2:
2184 /* TGSI_OPCODE_EX2 */
2185 FETCH(&r[0], 0, CHAN_X);
2186
2187 #if FAST_MATH
2188 micro_exp2( &r[0], &r[0] );
2189 #else
2190 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2191 #endif
2192
2193 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2194 STORE( &r[0], 0, chan_index );
2195 }
2196 break;
2197
2198 case TGSI_OPCODE_LOGBASE2:
2199 /* TGSI_OPCODE_LG2 */
2200 FETCH( &r[0], 0, CHAN_X );
2201 micro_lg2( &r[0], &r[0] );
2202 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2203 STORE( &r[0], 0, chan_index );
2204 }
2205 break;
2206
2207 case TGSI_OPCODE_POWER:
2208 /* TGSI_OPCODE_POW */
2209 FETCH(&r[0], 0, CHAN_X);
2210 FETCH(&r[1], 1, CHAN_X);
2211
2212 micro_pow( &r[0], &r[0], &r[1] );
2213
2214 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2215 STORE( &r[0], 0, chan_index );
2216 }
2217 break;
2218
2219 case TGSI_OPCODE_CROSSPRODUCT:
2220 /* TGSI_OPCODE_XPD */
2221 FETCH(&r[0], 0, CHAN_Y);
2222 FETCH(&r[1], 1, CHAN_Z);
2223
2224 micro_mul( &r[2], &r[0], &r[1] );
2225
2226 FETCH(&r[3], 0, CHAN_Z);
2227 FETCH(&r[4], 1, CHAN_Y);
2228
2229 micro_mul( &r[5], &r[3], &r[4] );
2230 micro_sub( &r[2], &r[2], &r[5] );
2231
2232 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2233 STORE( &r[2], 0, CHAN_X );
2234 }
2235
2236 FETCH(&r[2], 1, CHAN_X);
2237
2238 micro_mul( &r[3], &r[3], &r[2] );
2239
2240 FETCH(&r[5], 0, CHAN_X);
2241
2242 micro_mul( &r[1], &r[1], &r[5] );
2243 micro_sub( &r[3], &r[3], &r[1] );
2244
2245 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2246 STORE( &r[3], 0, CHAN_Y );
2247 }
2248
2249 micro_mul( &r[5], &r[5], &r[4] );
2250 micro_mul( &r[0], &r[0], &r[2] );
2251 micro_sub( &r[5], &r[5], &r[0] );
2252
2253 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2254 STORE( &r[5], 0, CHAN_Z );
2255 }
2256
2257 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2258 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2259 }
2260 break;
2261
2262 case TGSI_OPCODE_MULTIPLYMATRIX:
2263 assert (0);
2264 break;
2265
2266 case TGSI_OPCODE_ABS:
2267 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2268 FETCH(&r[0], 0, chan_index);
2269
2270 micro_abs( &r[0], &r[0] );
2271
2272 STORE(&r[0], 0, chan_index);
2273 }
2274 break;
2275
2276 case TGSI_OPCODE_RCC:
2277 assert (0);
2278 break;
2279
2280 case TGSI_OPCODE_DPH:
2281 FETCH(&r[0], 0, CHAN_X);
2282 FETCH(&r[1], 1, CHAN_X);
2283
2284 micro_mul( &r[0], &r[0], &r[1] );
2285
2286 FETCH(&r[1], 0, CHAN_Y);
2287 FETCH(&r[2], 1, CHAN_Y);
2288
2289 micro_mul( &r[1], &r[1], &r[2] );
2290 micro_add( &r[0], &r[0], &r[1] );
2291
2292 FETCH(&r[1], 0, CHAN_Z);
2293 FETCH(&r[2], 1, CHAN_Z);
2294
2295 micro_mul( &r[1], &r[1], &r[2] );
2296 micro_add( &r[0], &r[0], &r[1] );
2297
2298 FETCH(&r[1], 1, CHAN_W);
2299
2300 micro_add( &r[0], &r[0], &r[1] );
2301
2302 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2303 STORE( &r[0], 0, chan_index );
2304 }
2305 break;
2306
2307 case TGSI_OPCODE_COS:
2308 FETCH(&r[0], 0, CHAN_X);
2309
2310 micro_cos( &r[0], &r[0] );
2311
2312 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2313 STORE( &r[0], 0, chan_index );
2314 }
2315 break;
2316
2317 case TGSI_OPCODE_DDX:
2318 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2319 FETCH( &r[0], 0, chan_index );
2320 micro_ddx( &r[0], &r[0] );
2321 STORE( &r[0], 0, chan_index );
2322 }
2323 break;
2324
2325 case TGSI_OPCODE_DDY:
2326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2327 FETCH( &r[0], 0, chan_index );
2328 micro_ddy( &r[0], &r[0] );
2329 STORE( &r[0], 0, chan_index );
2330 }
2331 break;
2332
2333 case TGSI_OPCODE_KILP:
2334 exec_kilp (mach, inst);
2335 break;
2336
2337 case TGSI_OPCODE_KIL:
2338 exec_kil (mach, inst);
2339 break;
2340
2341 case TGSI_OPCODE_PK2H:
2342 assert (0);
2343 break;
2344
2345 case TGSI_OPCODE_PK2US:
2346 assert (0);
2347 break;
2348
2349 case TGSI_OPCODE_PK4B:
2350 assert (0);
2351 break;
2352
2353 case TGSI_OPCODE_PK4UB:
2354 assert (0);
2355 break;
2356
2357 case TGSI_OPCODE_RFL:
2358 assert (0);
2359 break;
2360
2361 case TGSI_OPCODE_SEQ:
2362 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2363 FETCH( &r[0], 0, chan_index );
2364 FETCH( &r[1], 1, chan_index );
2365 micro_eq( &r[0], &r[0], &r[1],
2366 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2367 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2368 STORE( &r[0], 0, chan_index );
2369 }
2370 break;
2371
2372 case TGSI_OPCODE_SFL:
2373 assert (0);
2374 break;
2375
2376 case TGSI_OPCODE_SGT:
2377 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2378 FETCH( &r[0], 0, chan_index );
2379 FETCH( &r[1], 1, chan_index );
2380 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2381 STORE( &r[0], 0, chan_index );
2382 }
2383 break;
2384
2385 case TGSI_OPCODE_SIN:
2386 FETCH( &r[0], 0, CHAN_X );
2387 micro_sin( &r[0], &r[0] );
2388 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2389 STORE( &r[0], 0, chan_index );
2390 }
2391 break;
2392
2393 case TGSI_OPCODE_SLE:
2394 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2395 FETCH( &r[0], 0, chan_index );
2396 FETCH( &r[1], 1, chan_index );
2397 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2398 STORE( &r[0], 0, chan_index );
2399 }
2400 break;
2401
2402 case TGSI_OPCODE_SNE:
2403 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2404 FETCH( &r[0], 0, chan_index );
2405 FETCH( &r[1], 1, chan_index );
2406 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2407 STORE( &r[0], 0, chan_index );
2408 }
2409 break;
2410
2411 case TGSI_OPCODE_STR:
2412 assert (0);
2413 break;
2414
2415 case TGSI_OPCODE_TEX:
2416 /* simple texture lookup */
2417 /* src[0] = texcoord */
2418 /* src[1] = sampler unit */
2419 exec_tex(mach, inst, FALSE, FALSE);
2420 break;
2421
2422 case TGSI_OPCODE_TXB:
2423 /* Texture lookup with lod bias */
2424 /* src[0] = texcoord (src[0].w = LOD bias) */
2425 /* src[1] = sampler unit */
2426 exec_tex(mach, inst, TRUE, FALSE);
2427 break;
2428
2429 case TGSI_OPCODE_TXD:
2430 /* Texture lookup with explict partial derivatives */
2431 /* src[0] = texcoord */
2432 /* src[1] = d[strq]/dx */
2433 /* src[2] = d[strq]/dy */
2434 /* src[3] = sampler unit */
2435 assert (0);
2436 break;
2437
2438 case TGSI_OPCODE_TXL:
2439 /* Texture lookup with explit LOD */
2440 /* src[0] = texcoord (src[0].w = LOD) */
2441 /* src[1] = sampler unit */
2442 exec_tex(mach, inst, TRUE, FALSE);
2443 break;
2444
2445 case TGSI_OPCODE_TXP:
2446 /* Texture lookup with projection */
2447 /* src[0] = texcoord (src[0].w = projection) */
2448 /* src[1] = sampler unit */
2449 exec_tex(mach, inst, FALSE, TRUE);
2450 break;
2451
2452 case TGSI_OPCODE_UP2H:
2453 assert (0);
2454 break;
2455
2456 case TGSI_OPCODE_UP2US:
2457 assert (0);
2458 break;
2459
2460 case TGSI_OPCODE_UP4B:
2461 assert (0);
2462 break;
2463
2464 case TGSI_OPCODE_UP4UB:
2465 assert (0);
2466 break;
2467
2468 case TGSI_OPCODE_X2D:
2469 assert (0);
2470 break;
2471
2472 case TGSI_OPCODE_ARA:
2473 assert (0);
2474 break;
2475
2476 case TGSI_OPCODE_BRA:
2477 assert (0);
2478 break;
2479
2480 case TGSI_OPCODE_CAL:
2481 /* skip the call if no execution channels are enabled */
2482 if (mach->ExecMask) {
2483 /* do the call */
2484
2485 /* push the Cond, Loop, Cont stacks */
2486 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2487 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2488 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2489 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2490 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2491 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2492
2493 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2494 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2495
2496 /* note that PC was already incremented above */
2497 mach->CallStack[mach->CallStackTop++] = *pc;
2498 *pc = inst->InstructionExtLabel.Label;
2499 }
2500 break;
2501
2502 case TGSI_OPCODE_RET:
2503 mach->FuncMask &= ~mach->ExecMask;
2504 UPDATE_EXEC_MASK(mach);
2505
2506 if (mach->FuncMask == 0x0) {
2507 /* really return now (otherwise, keep executing */
2508
2509 if (mach->CallStackTop == 0) {
2510 /* returning from main() */
2511 *pc = -1;
2512 return;
2513 }
2514 *pc = mach->CallStack[--mach->CallStackTop];
2515
2516 /* pop the Cond, Loop, Cont stacks */
2517 assert(mach->CondStackTop > 0);
2518 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2519 assert(mach->LoopStackTop > 0);
2520 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2521 assert(mach->ContStackTop > 0);
2522 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2523 assert(mach->FuncStackTop > 0);
2524 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2525
2526 UPDATE_EXEC_MASK(mach);
2527 }
2528 break;
2529
2530 case TGSI_OPCODE_SSG:
2531 /* TGSI_OPCODE_SGN */
2532 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2533 FETCH( &r[0], 0, chan_index );
2534 micro_sgn( &r[0], &r[0] );
2535 STORE( &r[0], 0, chan_index );
2536 }
2537 break;
2538
2539 case TGSI_OPCODE_CMP:
2540 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2541 FETCH(&r[0], 0, chan_index);
2542 FETCH(&r[1], 1, chan_index);
2543 FETCH(&r[2], 2, chan_index);
2544
2545 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2546
2547 STORE(&r[0], 0, chan_index);
2548 }
2549 break;
2550
2551 case TGSI_OPCODE_SCS:
2552 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2553 FETCH( &r[0], 0, CHAN_X );
2554 }
2555 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2556 micro_cos( &r[1], &r[0] );
2557 STORE( &r[1], 0, CHAN_X );
2558 }
2559 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2560 micro_sin( &r[1], &r[0] );
2561 STORE( &r[1], 0, CHAN_Y );
2562 }
2563 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2564 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2565 }
2566 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2567 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2568 }
2569 break;
2570
2571 case TGSI_OPCODE_NRM:
2572 /* 3-component vector normalize */
2573 {
2574 union tgsi_exec_channel tmp, dot;
2575
2576 /* tmp = dp3(src0, src0): */
2577 FETCH( &r[0], 0, CHAN_X );
2578 micro_mul( &tmp, &r[0], &r[0] );
2579
2580 FETCH( &r[1], 0, CHAN_Y );
2581 micro_mul( &dot, &r[1], &r[1] );
2582 micro_add( &tmp, &tmp, &dot );
2583
2584 FETCH( &r[2], 0, CHAN_Z );
2585 micro_mul( &dot, &r[2], &r[2] );
2586 micro_add( &tmp, &tmp, &dot );
2587
2588 /* tmp = 1 / sqrt(tmp) */
2589 micro_sqrt( &tmp, &tmp );
2590 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2591
2592 /* note: w channel is undefined */
2593 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2594 /* chan = chan * tmp */
2595 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2596 STORE( &r[chan_index], 0, chan_index );
2597 }
2598 }
2599 break;
2600
2601 case TGSI_OPCODE_NRM4:
2602 /* 4-component vector normalize */
2603 {
2604 union tgsi_exec_channel tmp, dot;
2605
2606 /* tmp = dp4(src0, src0): */
2607 FETCH( &r[0], 0, CHAN_X );
2608 micro_mul( &tmp, &r[0], &r[0] );
2609
2610 FETCH( &r[1], 0, CHAN_Y );
2611 micro_mul( &dot, &r[1], &r[1] );
2612 micro_add( &tmp, &tmp, &dot );
2613
2614 FETCH( &r[2], 0, CHAN_Z );
2615 micro_mul( &dot, &r[2], &r[2] );
2616 micro_add( &tmp, &tmp, &dot );
2617
2618 FETCH( &r[3], 0, CHAN_W );
2619 micro_mul( &dot, &r[3], &r[3] );
2620 micro_add( &tmp, &tmp, &dot );
2621
2622 /* tmp = 1 / sqrt(tmp) */
2623 micro_sqrt( &tmp, &tmp );
2624 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2625
2626 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2627 /* chan = chan * tmp */
2628 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2629 STORE( &r[chan_index], 0, chan_index );
2630 }
2631 }
2632 break;
2633
2634 case TGSI_OPCODE_DIV:
2635 assert( 0 );
2636 break;
2637
2638 case TGSI_OPCODE_DP2:
2639 FETCH( &r[0], 0, CHAN_X );
2640 FETCH( &r[1], 1, CHAN_X );
2641 micro_mul( &r[0], &r[0], &r[1] );
2642
2643 FETCH( &r[1], 0, CHAN_Y );
2644 FETCH( &r[2], 1, CHAN_Y );
2645 micro_mul( &r[1], &r[1], &r[2] );
2646 micro_add( &r[0], &r[0], &r[1] );
2647
2648 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2649 STORE( &r[0], 0, chan_index );
2650 }
2651 break;
2652
2653 case TGSI_OPCODE_IF:
2654 /* push CondMask */
2655 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2656 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2657 FETCH( &r[0], 0, CHAN_X );
2658 /* update CondMask */
2659 if( ! r[0].u[0] ) {
2660 mach->CondMask &= ~0x1;
2661 }
2662 if( ! r[0].u[1] ) {
2663 mach->CondMask &= ~0x2;
2664 }
2665 if( ! r[0].u[2] ) {
2666 mach->CondMask &= ~0x4;
2667 }
2668 if( ! r[0].u[3] ) {
2669 mach->CondMask &= ~0x8;
2670 }
2671 UPDATE_EXEC_MASK(mach);
2672 /* Todo: If CondMask==0, jump to ELSE */
2673 break;
2674
2675 case TGSI_OPCODE_ELSE:
2676 /* invert CondMask wrt previous mask */
2677 {
2678 uint prevMask;
2679 assert(mach->CondStackTop > 0);
2680 prevMask = mach->CondStack[mach->CondStackTop - 1];
2681 mach->CondMask = ~mach->CondMask & prevMask;
2682 UPDATE_EXEC_MASK(mach);
2683 /* Todo: If CondMask==0, jump to ENDIF */
2684 }
2685 break;
2686
2687 case TGSI_OPCODE_ENDIF:
2688 /* pop CondMask */
2689 assert(mach->CondStackTop > 0);
2690 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2691 UPDATE_EXEC_MASK(mach);
2692 break;
2693
2694 case TGSI_OPCODE_END:
2695 /* halt execution */
2696 *pc = -1;
2697 break;
2698
2699 case TGSI_OPCODE_REP:
2700 assert (0);
2701 break;
2702
2703 case TGSI_OPCODE_ENDREP:
2704 assert (0);
2705 break;
2706
2707 case TGSI_OPCODE_PUSHA:
2708 assert (0);
2709 break;
2710
2711 case TGSI_OPCODE_POPA:
2712 assert (0);
2713 break;
2714
2715 case TGSI_OPCODE_CEIL:
2716 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2717 FETCH( &r[0], 0, chan_index );
2718 micro_ceil( &r[0], &r[0] );
2719 STORE( &r[0], 0, chan_index );
2720 }
2721 break;
2722
2723 case TGSI_OPCODE_I2F:
2724 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2725 FETCH( &r[0], 0, chan_index );
2726 micro_i2f( &r[0], &r[0] );
2727 STORE( &r[0], 0, chan_index );
2728 }
2729 break;
2730
2731 case TGSI_OPCODE_NOT:
2732 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2733 FETCH( &r[0], 0, chan_index );
2734 micro_not( &r[0], &r[0] );
2735 STORE( &r[0], 0, chan_index );
2736 }
2737 break;
2738
2739 case TGSI_OPCODE_TRUNC:
2740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2741 FETCH( &r[0], 0, chan_index );
2742 micro_trunc( &r[0], &r[0] );
2743 STORE( &r[0], 0, chan_index );
2744 }
2745 break;
2746
2747 case TGSI_OPCODE_SHL:
2748 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2749 FETCH( &r[0], 0, chan_index );
2750 FETCH( &r[1], 1, chan_index );
2751 micro_shl( &r[0], &r[0], &r[1] );
2752 STORE( &r[0], 0, chan_index );
2753 }
2754 break;
2755
2756 case TGSI_OPCODE_SHR:
2757 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2758 FETCH( &r[0], 0, chan_index );
2759 FETCH( &r[1], 1, chan_index );
2760 micro_ishr( &r[0], &r[0], &r[1] );
2761 STORE( &r[0], 0, chan_index );
2762 }
2763 break;
2764
2765 case TGSI_OPCODE_AND:
2766 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2767 FETCH( &r[0], 0, chan_index );
2768 FETCH( &r[1], 1, chan_index );
2769 micro_and( &r[0], &r[0], &r[1] );
2770 STORE( &r[0], 0, chan_index );
2771 }
2772 break;
2773
2774 case TGSI_OPCODE_OR:
2775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2776 FETCH( &r[0], 0, chan_index );
2777 FETCH( &r[1], 1, chan_index );
2778 micro_or( &r[0], &r[0], &r[1] );
2779 STORE( &r[0], 0, chan_index );
2780 }
2781 break;
2782
2783 case TGSI_OPCODE_MOD:
2784 assert (0);
2785 break;
2786
2787 case TGSI_OPCODE_XOR:
2788 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2789 FETCH( &r[0], 0, chan_index );
2790 FETCH( &r[1], 1, chan_index );
2791 micro_xor( &r[0], &r[0], &r[1] );
2792 STORE( &r[0], 0, chan_index );
2793 }
2794 break;
2795
2796 case TGSI_OPCODE_SAD:
2797 assert (0);
2798 break;
2799
2800 case TGSI_OPCODE_TXF:
2801 assert (0);
2802 break;
2803
2804 case TGSI_OPCODE_TXQ:
2805 assert (0);
2806 break;
2807
2808 case TGSI_OPCODE_EMIT:
2809 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2810 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2811 break;
2812
2813 case TGSI_OPCODE_ENDPRIM:
2814 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2815 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2816 break;
2817
2818 case TGSI_OPCODE_LOOP:
2819 /* fall-through (for now) */
2820 case TGSI_OPCODE_BGNLOOP2:
2821 /* push LoopMask and ContMasks */
2822 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2823 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2824 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2825 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2826 break;
2827
2828 case TGSI_OPCODE_ENDLOOP:
2829 /* fall-through (for now at least) */
2830 case TGSI_OPCODE_ENDLOOP2:
2831 /* Restore ContMask, but don't pop */
2832 assert(mach->ContStackTop > 0);
2833 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2834 UPDATE_EXEC_MASK(mach);
2835 if (mach->ExecMask) {
2836 /* repeat loop: jump to instruction just past BGNLOOP */
2837 *pc = inst->InstructionExtLabel.Label + 1;
2838 }
2839 else {
2840 /* exit loop: pop LoopMask */
2841 assert(mach->LoopStackTop > 0);
2842 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2843 /* pop ContMask */
2844 assert(mach->ContStackTop > 0);
2845 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2846 }
2847 UPDATE_EXEC_MASK(mach);
2848 break;
2849
2850 case TGSI_OPCODE_BRK:
2851 /* turn off loop channels for each enabled exec channel */
2852 mach->LoopMask &= ~mach->ExecMask;
2853 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2854 UPDATE_EXEC_MASK(mach);
2855 break;
2856
2857 case TGSI_OPCODE_CONT:
2858 /* turn off cont channels for each enabled exec channel */
2859 mach->ContMask &= ~mach->ExecMask;
2860 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2861 UPDATE_EXEC_MASK(mach);
2862 break;
2863
2864 case TGSI_OPCODE_BGNSUB:
2865 /* no-op */
2866 break;
2867
2868 case TGSI_OPCODE_ENDSUB:
2869 /* no-op */
2870 break;
2871
2872 case TGSI_OPCODE_NOISE1:
2873 assert( 0 );
2874 break;
2875
2876 case TGSI_OPCODE_NOISE2:
2877 assert( 0 );
2878 break;
2879
2880 case TGSI_OPCODE_NOISE3:
2881 assert( 0 );
2882 break;
2883
2884 case TGSI_OPCODE_NOISE4:
2885 assert( 0 );
2886 break;
2887
2888 case TGSI_OPCODE_NOP:
2889 break;
2890
2891 default:
2892 assert( 0 );
2893 }
2894 }
2895
2896
2897 /**
2898 * Run TGSI interpreter.
2899 * \return bitmask of "alive" quad components
2900 */
2901 uint
2902 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2903 {
2904 uint i;
2905 int pc = 0;
2906
2907 mach->CondMask = 0xf;
2908 mach->LoopMask = 0xf;
2909 mach->ContMask = 0xf;
2910 mach->FuncMask = 0xf;
2911 mach->ExecMask = 0xf;
2912
2913 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2914 assert(mach->CondStackTop == 0);
2915 assert(mach->LoopStackTop == 0);
2916 assert(mach->ContStackTop == 0);
2917 assert(mach->CallStackTop == 0);
2918
2919 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2920 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2921
2922 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2923 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2924 mach->Primitives[0] = 0;
2925 }
2926
2927 for (i = 0; i < QUAD_SIZE; i++) {
2928 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2929 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2930 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2931 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2932 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2933 }
2934
2935 /* execute declarations (interpolants) */
2936 for (i = 0; i < mach->NumDeclarations; i++) {
2937 exec_declaration( mach, mach->Declarations+i );
2938 }
2939
2940 /* execute instructions, until pc is set to -1 */
2941 while (pc != -1) {
2942 assert(pc < (int) mach->NumInstructions);
2943 exec_instruction( mach, mach->Instructions + pc, &pc );
2944 }
2945
2946 #if 0
2947 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2948 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2949 /*
2950 * Scale back depth component.
2951 */
2952 for (i = 0; i < 4; i++)
2953 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2954 }
2955 #endif
2956
2957 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2958 }
2959
2960