tgsi: Implement RCC opcode.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 #if 0
324 static void
325 micro_iadd(
326 union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src0,
328 const union tgsi_exec_channel *src1 )
329 {
330 dst->i[0] = src0->i[0] + src1->i[0];
331 dst->i[1] = src0->i[1] + src1->i[1];
332 dst->i[2] = src0->i[2] + src1->i[2];
333 dst->i[3] = src0->i[3] + src1->i[3];
334 }
335 #endif
336
337 static void
338 micro_and(
339 union tgsi_exec_channel *dst,
340 const union tgsi_exec_channel *src0,
341 const union tgsi_exec_channel *src1 )
342 {
343 dst->u[0] = src0->u[0] & src1->u[0];
344 dst->u[1] = src0->u[1] & src1->u[1];
345 dst->u[2] = src0->u[2] & src1->u[2];
346 dst->u[3] = src0->u[3] & src1->u[3];
347 }
348
349 static void
350 micro_ceil(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = ceilf( src->f[0] );
355 dst->f[1] = ceilf( src->f[1] );
356 dst->f[2] = ceilf( src->f[2] );
357 dst->f[3] = ceilf( src->f[3] );
358 }
359
360 static void
361 micro_cos(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = cosf( src->f[0] );
366 dst->f[1] = cosf( src->f[1] );
367 dst->f[2] = cosf( src->f[2] );
368 dst->f[3] = cosf( src->f[3] );
369 }
370
371 static void
372 micro_ddx(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] =
377 dst->f[1] =
378 dst->f[2] =
379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380 }
381
382 static void
383 micro_ddy(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src )
386 {
387 dst->f[0] =
388 dst->f[1] =
389 dst->f[2] =
390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391 }
392
393 static void
394 micro_div(
395 union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src0,
397 const union tgsi_exec_channel *src1 )
398 {
399 if (src1->f[0] != 0) {
400 dst->f[0] = src0->f[0] / src1->f[0];
401 }
402 if (src1->f[1] != 0) {
403 dst->f[1] = src0->f[1] / src1->f[1];
404 }
405 if (src1->f[2] != 0) {
406 dst->f[2] = src0->f[2] / src1->f[2];
407 }
408 if (src1->f[3] != 0) {
409 dst->f[3] = src0->f[3] / src1->f[3];
410 }
411 }
412
413 #if 0
414 static void
415 micro_udiv(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1 )
419 {
420 dst->u[0] = src0->u[0] / src1->u[0];
421 dst->u[1] = src0->u[1] / src1->u[1];
422 dst->u[2] = src0->u[2] / src1->u[2];
423 dst->u[3] = src0->u[3] / src1->u[3];
424 }
425 #endif
426
427 static void
428 micro_eq(
429 union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src0,
431 const union tgsi_exec_channel *src1,
432 const union tgsi_exec_channel *src2,
433 const union tgsi_exec_channel *src3 )
434 {
435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439 }
440
441 #if 0
442 static void
443 micro_ieq(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1,
447 const union tgsi_exec_channel *src2,
448 const union tgsi_exec_channel *src3 )
449 {
450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454 }
455 #endif
456
457 static void
458 micro_exp2(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 #if FAST_MATH
463 dst->f[0] = util_fast_exp2( src->f[0] );
464 dst->f[1] = util_fast_exp2( src->f[1] );
465 dst->f[2] = util_fast_exp2( src->f[2] );
466 dst->f[3] = util_fast_exp2( src->f[3] );
467 #else
468 dst->f[0] = powf( 2.0f, src->f[0] );
469 dst->f[1] = powf( 2.0f, src->f[1] );
470 dst->f[2] = powf( 2.0f, src->f[2] );
471 dst->f[3] = powf( 2.0f, src->f[3] );
472 #endif
473 }
474
475 #if 0
476 static void
477 micro_f2ut(
478 union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src )
480 {
481 dst->u[0] = (uint) src->f[0];
482 dst->u[1] = (uint) src->f[1];
483 dst->u[2] = (uint) src->f[2];
484 dst->u[3] = (uint) src->f[3];
485 }
486 #endif
487
488 static void
489 micro_float_clamp(union tgsi_exec_channel *dst,
490 const union tgsi_exec_channel *src)
491 {
492 uint i;
493
494 for (i = 0; i < 4; i++) {
495 if (src->f[i] > 0.0f) {
496 if (src->f[i] > 1.884467e+019f)
497 dst->f[i] = 1.884467e+019f;
498 else if (src->f[i] < 5.42101e-020f)
499 dst->f[i] = 5.42101e-020f;
500 else
501 dst->f[i] = src->f[i];
502 }
503 else {
504 if (src->f[i] < -1.884467e+019f)
505 dst->f[i] = -1.884467e+019f;
506 else if (src->f[i] > -5.42101e-020f)
507 dst->f[i] = -5.42101e-020f;
508 else
509 dst->f[i] = src->f[i];
510 }
511 }
512 }
513
514 static void
515 micro_flr(
516 union tgsi_exec_channel *dst,
517 const union tgsi_exec_channel *src )
518 {
519 dst->f[0] = floorf( src->f[0] );
520 dst->f[1] = floorf( src->f[1] );
521 dst->f[2] = floorf( src->f[2] );
522 dst->f[3] = floorf( src->f[3] );
523 }
524
525 static void
526 micro_frc(
527 union tgsi_exec_channel *dst,
528 const union tgsi_exec_channel *src )
529 {
530 dst->f[0] = src->f[0] - floorf( src->f[0] );
531 dst->f[1] = src->f[1] - floorf( src->f[1] );
532 dst->f[2] = src->f[2] - floorf( src->f[2] );
533 dst->f[3] = src->f[3] - floorf( src->f[3] );
534 }
535
536 static void
537 micro_i2f(
538 union tgsi_exec_channel *dst,
539 const union tgsi_exec_channel *src )
540 {
541 dst->f[0] = (float) src->i[0];
542 dst->f[1] = (float) src->i[1];
543 dst->f[2] = (float) src->i[2];
544 dst->f[3] = (float) src->i[3];
545 }
546
547 static void
548 micro_lg2(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src )
551 {
552 #if FAST_MATH
553 dst->f[0] = util_fast_log2( src->f[0] );
554 dst->f[1] = util_fast_log2( src->f[1] );
555 dst->f[2] = util_fast_log2( src->f[2] );
556 dst->f[3] = util_fast_log2( src->f[3] );
557 #else
558 dst->f[0] = logf( src->f[0] ) * 1.442695f;
559 dst->f[1] = logf( src->f[1] ) * 1.442695f;
560 dst->f[2] = logf( src->f[2] ) * 1.442695f;
561 dst->f[3] = logf( src->f[3] ) * 1.442695f;
562 #endif
563 }
564
565 static void
566 micro_le(
567 union tgsi_exec_channel *dst,
568 const union tgsi_exec_channel *src0,
569 const union tgsi_exec_channel *src1,
570 const union tgsi_exec_channel *src2,
571 const union tgsi_exec_channel *src3 )
572 {
573 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
574 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
575 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
576 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
577 }
578
579 static void
580 micro_lt(
581 union tgsi_exec_channel *dst,
582 const union tgsi_exec_channel *src0,
583 const union tgsi_exec_channel *src1,
584 const union tgsi_exec_channel *src2,
585 const union tgsi_exec_channel *src3 )
586 {
587 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
588 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
589 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
590 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
591 }
592
593 #if 0
594 static void
595 micro_ilt(
596 union tgsi_exec_channel *dst,
597 const union tgsi_exec_channel *src0,
598 const union tgsi_exec_channel *src1,
599 const union tgsi_exec_channel *src2,
600 const union tgsi_exec_channel *src3 )
601 {
602 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
603 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
604 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
605 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
606 }
607 #endif
608
609 #if 0
610 static void
611 micro_ult(
612 union tgsi_exec_channel *dst,
613 const union tgsi_exec_channel *src0,
614 const union tgsi_exec_channel *src1,
615 const union tgsi_exec_channel *src2,
616 const union tgsi_exec_channel *src3 )
617 {
618 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
619 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
620 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
621 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
622 }
623 #endif
624
625 static void
626 micro_max(
627 union tgsi_exec_channel *dst,
628 const union tgsi_exec_channel *src0,
629 const union tgsi_exec_channel *src1 )
630 {
631 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
632 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
633 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
634 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
635 }
636
637 #if 0
638 static void
639 micro_imax(
640 union tgsi_exec_channel *dst,
641 const union tgsi_exec_channel *src0,
642 const union tgsi_exec_channel *src1 )
643 {
644 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
645 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
646 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
647 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
648 }
649 #endif
650
651 #if 0
652 static void
653 micro_umax(
654 union tgsi_exec_channel *dst,
655 const union tgsi_exec_channel *src0,
656 const union tgsi_exec_channel *src1 )
657 {
658 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
659 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
660 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
661 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
662 }
663 #endif
664
665 static void
666 micro_min(
667 union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src0,
669 const union tgsi_exec_channel *src1 )
670 {
671 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
672 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
673 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
674 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
675 }
676
677 #if 0
678 static void
679 micro_imin(
680 union tgsi_exec_channel *dst,
681 const union tgsi_exec_channel *src0,
682 const union tgsi_exec_channel *src1 )
683 {
684 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
685 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
686 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
687 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
688 }
689 #endif
690
691 #if 0
692 static void
693 micro_umin(
694 union tgsi_exec_channel *dst,
695 const union tgsi_exec_channel *src0,
696 const union tgsi_exec_channel *src1 )
697 {
698 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
699 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
700 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
701 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
702 }
703 #endif
704
705 #if 0
706 static void
707 micro_umod(
708 union tgsi_exec_channel *dst,
709 const union tgsi_exec_channel *src0,
710 const union tgsi_exec_channel *src1 )
711 {
712 dst->u[0] = src0->u[0] % src1->u[0];
713 dst->u[1] = src0->u[1] % src1->u[1];
714 dst->u[2] = src0->u[2] % src1->u[2];
715 dst->u[3] = src0->u[3] % src1->u[3];
716 }
717 #endif
718
719 static void
720 micro_mul(
721 union tgsi_exec_channel *dst,
722 const union tgsi_exec_channel *src0,
723 const union tgsi_exec_channel *src1 )
724 {
725 dst->f[0] = src0->f[0] * src1->f[0];
726 dst->f[1] = src0->f[1] * src1->f[1];
727 dst->f[2] = src0->f[2] * src1->f[2];
728 dst->f[3] = src0->f[3] * src1->f[3];
729 }
730
731 #if 0
732 static void
733 micro_imul(
734 union tgsi_exec_channel *dst,
735 const union tgsi_exec_channel *src0,
736 const union tgsi_exec_channel *src1 )
737 {
738 dst->i[0] = src0->i[0] * src1->i[0];
739 dst->i[1] = src0->i[1] * src1->i[1];
740 dst->i[2] = src0->i[2] * src1->i[2];
741 dst->i[3] = src0->i[3] * src1->i[3];
742 }
743 #endif
744
745 #if 0
746 static void
747 micro_imul64(
748 union tgsi_exec_channel *dst0,
749 union tgsi_exec_channel *dst1,
750 const union tgsi_exec_channel *src0,
751 const union tgsi_exec_channel *src1 )
752 {
753 dst1->i[0] = src0->i[0] * src1->i[0];
754 dst1->i[1] = src0->i[1] * src1->i[1];
755 dst1->i[2] = src0->i[2] * src1->i[2];
756 dst1->i[3] = src0->i[3] * src1->i[3];
757 dst0->i[0] = 0;
758 dst0->i[1] = 0;
759 dst0->i[2] = 0;
760 dst0->i[3] = 0;
761 }
762 #endif
763
764 #if 0
765 static void
766 micro_umul64(
767 union tgsi_exec_channel *dst0,
768 union tgsi_exec_channel *dst1,
769 const union tgsi_exec_channel *src0,
770 const union tgsi_exec_channel *src1 )
771 {
772 dst1->u[0] = src0->u[0] * src1->u[0];
773 dst1->u[1] = src0->u[1] * src1->u[1];
774 dst1->u[2] = src0->u[2] * src1->u[2];
775 dst1->u[3] = src0->u[3] * src1->u[3];
776 dst0->u[0] = 0;
777 dst0->u[1] = 0;
778 dst0->u[2] = 0;
779 dst0->u[3] = 0;
780 }
781 #endif
782
783
784 #if 0
785 static void
786 micro_movc(
787 union tgsi_exec_channel *dst,
788 const union tgsi_exec_channel *src0,
789 const union tgsi_exec_channel *src1,
790 const union tgsi_exec_channel *src2 )
791 {
792 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
793 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
794 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
795 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
796 }
797 #endif
798
799 static void
800 micro_neg(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src )
803 {
804 dst->f[0] = -src->f[0];
805 dst->f[1] = -src->f[1];
806 dst->f[2] = -src->f[2];
807 dst->f[3] = -src->f[3];
808 }
809
810 #if 0
811 static void
812 micro_ineg(
813 union tgsi_exec_channel *dst,
814 const union tgsi_exec_channel *src )
815 {
816 dst->i[0] = -src->i[0];
817 dst->i[1] = -src->i[1];
818 dst->i[2] = -src->i[2];
819 dst->i[3] = -src->i[3];
820 }
821 #endif
822
823 static void
824 micro_not(
825 union tgsi_exec_channel *dst,
826 const union tgsi_exec_channel *src )
827 {
828 dst->u[0] = ~src->u[0];
829 dst->u[1] = ~src->u[1];
830 dst->u[2] = ~src->u[2];
831 dst->u[3] = ~src->u[3];
832 }
833
834 static void
835 micro_or(
836 union tgsi_exec_channel *dst,
837 const union tgsi_exec_channel *src0,
838 const union tgsi_exec_channel *src1 )
839 {
840 dst->u[0] = src0->u[0] | src1->u[0];
841 dst->u[1] = src0->u[1] | src1->u[1];
842 dst->u[2] = src0->u[2] | src1->u[2];
843 dst->u[3] = src0->u[3] | src1->u[3];
844 }
845
846 static void
847 micro_pow(
848 union tgsi_exec_channel *dst,
849 const union tgsi_exec_channel *src0,
850 const union tgsi_exec_channel *src1 )
851 {
852 #if FAST_MATH
853 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
854 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
855 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
856 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
857 #else
858 dst->f[0] = powf( src0->f[0], src1->f[0] );
859 dst->f[1] = powf( src0->f[1], src1->f[1] );
860 dst->f[2] = powf( src0->f[2], src1->f[2] );
861 dst->f[3] = powf( src0->f[3], src1->f[3] );
862 #endif
863 }
864
865 static void
866 micro_rnd(
867 union tgsi_exec_channel *dst,
868 const union tgsi_exec_channel *src )
869 {
870 dst->f[0] = floorf( src->f[0] + 0.5f );
871 dst->f[1] = floorf( src->f[1] + 0.5f );
872 dst->f[2] = floorf( src->f[2] + 0.5f );
873 dst->f[3] = floorf( src->f[3] + 0.5f );
874 }
875
876 static void
877 micro_sgn(
878 union tgsi_exec_channel *dst,
879 const union tgsi_exec_channel *src )
880 {
881 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
882 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
883 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
884 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
885 }
886
887 static void
888 micro_shl(
889 union tgsi_exec_channel *dst,
890 const union tgsi_exec_channel *src0,
891 const union tgsi_exec_channel *src1 )
892 {
893 dst->i[0] = src0->i[0] << src1->i[0];
894 dst->i[1] = src0->i[1] << src1->i[1];
895 dst->i[2] = src0->i[2] << src1->i[2];
896 dst->i[3] = src0->i[3] << src1->i[3];
897 }
898
899 static void
900 micro_ishr(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0,
903 const union tgsi_exec_channel *src1 )
904 {
905 dst->i[0] = src0->i[0] >> src1->i[0];
906 dst->i[1] = src0->i[1] >> src1->i[1];
907 dst->i[2] = src0->i[2] >> src1->i[2];
908 dst->i[3] = src0->i[3] >> src1->i[3];
909 }
910
911 static void
912 micro_trunc(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src0 )
915 {
916 dst->f[0] = (float) (int) src0->f[0];
917 dst->f[1] = (float) (int) src0->f[1];
918 dst->f[2] = (float) (int) src0->f[2];
919 dst->f[3] = (float) (int) src0->f[3];
920 }
921
922 #if 0
923 static void
924 micro_ushr(
925 union tgsi_exec_channel *dst,
926 const union tgsi_exec_channel *src0,
927 const union tgsi_exec_channel *src1 )
928 {
929 dst->u[0] = src0->u[0] >> src1->u[0];
930 dst->u[1] = src0->u[1] >> src1->u[1];
931 dst->u[2] = src0->u[2] >> src1->u[2];
932 dst->u[3] = src0->u[3] >> src1->u[3];
933 }
934 #endif
935
936 static void
937 micro_sin(
938 union tgsi_exec_channel *dst,
939 const union tgsi_exec_channel *src )
940 {
941 dst->f[0] = sinf( src->f[0] );
942 dst->f[1] = sinf( src->f[1] );
943 dst->f[2] = sinf( src->f[2] );
944 dst->f[3] = sinf( src->f[3] );
945 }
946
947 static void
948 micro_sqrt( union tgsi_exec_channel *dst,
949 const union tgsi_exec_channel *src )
950 {
951 dst->f[0] = sqrtf( src->f[0] );
952 dst->f[1] = sqrtf( src->f[1] );
953 dst->f[2] = sqrtf( src->f[2] );
954 dst->f[3] = sqrtf( src->f[3] );
955 }
956
957 static void
958 micro_sub(
959 union tgsi_exec_channel *dst,
960 const union tgsi_exec_channel *src0,
961 const union tgsi_exec_channel *src1 )
962 {
963 dst->f[0] = src0->f[0] - src1->f[0];
964 dst->f[1] = src0->f[1] - src1->f[1];
965 dst->f[2] = src0->f[2] - src1->f[2];
966 dst->f[3] = src0->f[3] - src1->f[3];
967 }
968
969 #if 0
970 static void
971 micro_u2f(
972 union tgsi_exec_channel *dst,
973 const union tgsi_exec_channel *src )
974 {
975 dst->f[0] = (float) src->u[0];
976 dst->f[1] = (float) src->u[1];
977 dst->f[2] = (float) src->u[2];
978 dst->f[3] = (float) src->u[3];
979 }
980 #endif
981
982 static void
983 micro_xor(
984 union tgsi_exec_channel *dst,
985 const union tgsi_exec_channel *src0,
986 const union tgsi_exec_channel *src1 )
987 {
988 dst->u[0] = src0->u[0] ^ src1->u[0];
989 dst->u[1] = src0->u[1] ^ src1->u[1];
990 dst->u[2] = src0->u[2] ^ src1->u[2];
991 dst->u[3] = src0->u[3] ^ src1->u[3];
992 }
993
994 static void
995 fetch_src_file_channel(
996 const struct tgsi_exec_machine *mach,
997 const uint file,
998 const uint swizzle,
999 const union tgsi_exec_channel *index,
1000 union tgsi_exec_channel *chan )
1001 {
1002 switch( swizzle ) {
1003 case TGSI_EXTSWIZZLE_X:
1004 case TGSI_EXTSWIZZLE_Y:
1005 case TGSI_EXTSWIZZLE_Z:
1006 case TGSI_EXTSWIZZLE_W:
1007 switch( file ) {
1008 case TGSI_FILE_CONSTANT:
1009 assert(mach->Consts);
1010 if (index->i[0] < 0)
1011 chan->f[0] = 0.0f;
1012 else
1013 chan->f[0] = mach->Consts[index->i[0]][swizzle];
1014 if (index->i[1] < 0)
1015 chan->f[1] = 0.0f;
1016 else
1017 chan->f[1] = mach->Consts[index->i[1]][swizzle];
1018 if (index->i[2] < 0)
1019 chan->f[2] = 0.0f;
1020 else
1021 chan->f[2] = mach->Consts[index->i[2]][swizzle];
1022 if (index->i[3] < 0)
1023 chan->f[3] = 0.0f;
1024 else
1025 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1026 break;
1027
1028 case TGSI_FILE_INPUT:
1029 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1030 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1031 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1032 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1033 break;
1034
1035 case TGSI_FILE_TEMPORARY:
1036 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1037 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1038 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1039 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1040 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1041 break;
1042
1043 case TGSI_FILE_IMMEDIATE:
1044 assert( index->i[0] < (int) mach->ImmLimit );
1045 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1046 assert( index->i[1] < (int) mach->ImmLimit );
1047 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1048 assert( index->i[2] < (int) mach->ImmLimit );
1049 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1050 assert( index->i[3] < (int) mach->ImmLimit );
1051 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1052 break;
1053
1054 case TGSI_FILE_ADDRESS:
1055 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1056 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1057 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1058 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1059 break;
1060
1061 case TGSI_FILE_OUTPUT:
1062 /* vertex/fragment output vars can be read too */
1063 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1064 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1065 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1066 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1067 break;
1068
1069 default:
1070 assert( 0 );
1071 }
1072 break;
1073
1074 case TGSI_EXTSWIZZLE_ZERO:
1075 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1076 break;
1077
1078 case TGSI_EXTSWIZZLE_ONE:
1079 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1080 break;
1081
1082 default:
1083 assert( 0 );
1084 }
1085 }
1086
1087 static void
1088 fetch_source(
1089 const struct tgsi_exec_machine *mach,
1090 union tgsi_exec_channel *chan,
1091 const struct tgsi_full_src_register *reg,
1092 const uint chan_index )
1093 {
1094 union tgsi_exec_channel index;
1095 uint swizzle;
1096
1097 /* We start with a direct index into a register file.
1098 *
1099 * file[1],
1100 * where:
1101 * file = SrcRegister.File
1102 * [1] = SrcRegister.Index
1103 */
1104 index.i[0] =
1105 index.i[1] =
1106 index.i[2] =
1107 index.i[3] = reg->SrcRegister.Index;
1108
1109 /* There is an extra source register that indirectly subscripts
1110 * a register file. The direct index now becomes an offset
1111 * that is being added to the indirect register.
1112 *
1113 * file[ind[2].x+1],
1114 * where:
1115 * ind = SrcRegisterInd.File
1116 * [2] = SrcRegisterInd.Index
1117 * .x = SrcRegisterInd.SwizzleX
1118 */
1119 if (reg->SrcRegister.Indirect) {
1120 union tgsi_exec_channel index2;
1121 union tgsi_exec_channel indir_index;
1122 const uint execmask = mach->ExecMask;
1123 uint i;
1124
1125 /* which address register (always zero now) */
1126 index2.i[0] =
1127 index2.i[1] =
1128 index2.i[2] =
1129 index2.i[3] = reg->SrcRegisterInd.Index;
1130
1131 /* get current value of address register[swizzle] */
1132 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1133 fetch_src_file_channel(
1134 mach,
1135 reg->SrcRegisterInd.File,
1136 swizzle,
1137 &index2,
1138 &indir_index );
1139
1140 /* add value of address register to the offset */
1141 index.i[0] += (int) indir_index.f[0];
1142 index.i[1] += (int) indir_index.f[1];
1143 index.i[2] += (int) indir_index.f[2];
1144 index.i[3] += (int) indir_index.f[3];
1145
1146 /* for disabled execution channels, zero-out the index to
1147 * avoid using a potential garbage value.
1148 */
1149 for (i = 0; i < QUAD_SIZE; i++) {
1150 if ((execmask & (1 << i)) == 0)
1151 index.i[i] = 0;
1152 }
1153 }
1154
1155 /* There is an extra source register that is a second
1156 * subscript to a register file. Effectively it means that
1157 * the register file is actually a 2D array of registers.
1158 *
1159 * file[1][3] == file[1*sizeof(file[1])+3],
1160 * where:
1161 * [3] = SrcRegisterDim.Index
1162 */
1163 if (reg->SrcRegister.Dimension) {
1164 /* The size of the first-order array depends on the register file type.
1165 * We need to multiply the index to the first array to get an effective,
1166 * "flat" index that points to the beginning of the second-order array.
1167 */
1168 switch (reg->SrcRegister.File) {
1169 case TGSI_FILE_INPUT:
1170 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1171 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1172 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1173 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1174 break;
1175 case TGSI_FILE_CONSTANT:
1176 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1177 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1178 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1179 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1180 break;
1181 default:
1182 assert( 0 );
1183 }
1184
1185 index.i[0] += reg->SrcRegisterDim.Index;
1186 index.i[1] += reg->SrcRegisterDim.Index;
1187 index.i[2] += reg->SrcRegisterDim.Index;
1188 index.i[3] += reg->SrcRegisterDim.Index;
1189
1190 /* Again, the second subscript index can be addressed indirectly
1191 * identically to the first one.
1192 * Nothing stops us from indirectly addressing the indirect register,
1193 * but there is no need for that, so we won't exercise it.
1194 *
1195 * file[1][ind[4].y+3],
1196 * where:
1197 * ind = SrcRegisterDimInd.File
1198 * [4] = SrcRegisterDimInd.Index
1199 * .y = SrcRegisterDimInd.SwizzleX
1200 */
1201 if (reg->SrcRegisterDim.Indirect) {
1202 union tgsi_exec_channel index2;
1203 union tgsi_exec_channel indir_index;
1204 const uint execmask = mach->ExecMask;
1205 uint i;
1206
1207 index2.i[0] =
1208 index2.i[1] =
1209 index2.i[2] =
1210 index2.i[3] = reg->SrcRegisterDimInd.Index;
1211
1212 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1213 fetch_src_file_channel(
1214 mach,
1215 reg->SrcRegisterDimInd.File,
1216 swizzle,
1217 &index2,
1218 &indir_index );
1219
1220 index.i[0] += (int) indir_index.f[0];
1221 index.i[1] += (int) indir_index.f[1];
1222 index.i[2] += (int) indir_index.f[2];
1223 index.i[3] += (int) indir_index.f[3];
1224
1225 /* for disabled execution channels, zero-out the index to
1226 * avoid using a potential garbage value.
1227 */
1228 for (i = 0; i < QUAD_SIZE; i++) {
1229 if ((execmask & (1 << i)) == 0)
1230 index.i[i] = 0;
1231 }
1232 }
1233
1234 /* If by any chance there was a need for a 3D array of register
1235 * files, we would have to check whether SrcRegisterDim is followed
1236 * by a dimension register and continue the saga.
1237 */
1238 }
1239
1240 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1241 fetch_src_file_channel(
1242 mach,
1243 reg->SrcRegister.File,
1244 swizzle,
1245 &index,
1246 chan );
1247
1248 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1249 case TGSI_UTIL_SIGN_CLEAR:
1250 micro_abs( chan, chan );
1251 break;
1252
1253 case TGSI_UTIL_SIGN_SET:
1254 micro_abs( chan, chan );
1255 micro_neg( chan, chan );
1256 break;
1257
1258 case TGSI_UTIL_SIGN_TOGGLE:
1259 micro_neg( chan, chan );
1260 break;
1261
1262 case TGSI_UTIL_SIGN_KEEP:
1263 break;
1264 }
1265
1266 if (reg->SrcRegisterExtMod.Complement) {
1267 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1268 }
1269 }
1270
1271 static void
1272 store_dest(
1273 struct tgsi_exec_machine *mach,
1274 const union tgsi_exec_channel *chan,
1275 const struct tgsi_full_dst_register *reg,
1276 const struct tgsi_full_instruction *inst,
1277 uint chan_index )
1278 {
1279 uint i;
1280 union tgsi_exec_channel null;
1281 union tgsi_exec_channel *dst;
1282 uint execmask = mach->ExecMask;
1283
1284 switch (reg->DstRegister.File) {
1285 case TGSI_FILE_NULL:
1286 dst = &null;
1287 break;
1288
1289 case TGSI_FILE_OUTPUT:
1290 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1291 + reg->DstRegister.Index].xyzw[chan_index];
1292 break;
1293
1294 case TGSI_FILE_TEMPORARY:
1295 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1296 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1297 break;
1298
1299 case TGSI_FILE_ADDRESS:
1300 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1301 break;
1302
1303 default:
1304 assert( 0 );
1305 return;
1306 }
1307
1308 if (inst->InstructionExtNv.CondFlowEnable) {
1309 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1310 uint swizzle;
1311 uint shift;
1312 uint mask;
1313 uint test;
1314
1315 /* Only CC0 supported.
1316 */
1317 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1318
1319 switch (chan_index) {
1320 case CHAN_X:
1321 swizzle = inst->InstructionExtNv.CondSwizzleX;
1322 break;
1323 case CHAN_Y:
1324 swizzle = inst->InstructionExtNv.CondSwizzleY;
1325 break;
1326 case CHAN_Z:
1327 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1328 break;
1329 case CHAN_W:
1330 swizzle = inst->InstructionExtNv.CondSwizzleW;
1331 break;
1332 default:
1333 assert( 0 );
1334 return;
1335 }
1336
1337 switch (swizzle) {
1338 case TGSI_SWIZZLE_X:
1339 shift = TGSI_EXEC_CC_X_SHIFT;
1340 mask = TGSI_EXEC_CC_X_MASK;
1341 break;
1342 case TGSI_SWIZZLE_Y:
1343 shift = TGSI_EXEC_CC_Y_SHIFT;
1344 mask = TGSI_EXEC_CC_Y_MASK;
1345 break;
1346 case TGSI_SWIZZLE_Z:
1347 shift = TGSI_EXEC_CC_Z_SHIFT;
1348 mask = TGSI_EXEC_CC_Z_MASK;
1349 break;
1350 case TGSI_SWIZZLE_W:
1351 shift = TGSI_EXEC_CC_W_SHIFT;
1352 mask = TGSI_EXEC_CC_W_MASK;
1353 break;
1354 default:
1355 assert( 0 );
1356 return;
1357 }
1358
1359 switch (inst->InstructionExtNv.CondMask) {
1360 case TGSI_CC_GT:
1361 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1362 for (i = 0; i < QUAD_SIZE; i++)
1363 if (cc->u[i] & test)
1364 execmask &= ~(1 << i);
1365 break;
1366
1367 case TGSI_CC_EQ:
1368 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1369 for (i = 0; i < QUAD_SIZE; i++)
1370 if (cc->u[i] & test)
1371 execmask &= ~(1 << i);
1372 break;
1373
1374 case TGSI_CC_LT:
1375 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1376 for (i = 0; i < QUAD_SIZE; i++)
1377 if (cc->u[i] & test)
1378 execmask &= ~(1 << i);
1379 break;
1380
1381 case TGSI_CC_GE:
1382 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1383 for (i = 0; i < QUAD_SIZE; i++)
1384 if (cc->u[i] & test)
1385 execmask &= ~(1 << i);
1386 break;
1387
1388 case TGSI_CC_LE:
1389 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1390 for (i = 0; i < QUAD_SIZE; i++)
1391 if (cc->u[i] & test)
1392 execmask &= ~(1 << i);
1393 break;
1394
1395 case TGSI_CC_NE:
1396 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1397 for (i = 0; i < QUAD_SIZE; i++)
1398 if (cc->u[i] & test)
1399 execmask &= ~(1 << i);
1400 break;
1401
1402 case TGSI_CC_TR:
1403 break;
1404
1405 case TGSI_CC_FL:
1406 for (i = 0; i < QUAD_SIZE; i++)
1407 execmask &= ~(1 << i);
1408 break;
1409
1410 default:
1411 assert( 0 );
1412 return;
1413 }
1414 }
1415
1416 switch (inst->Instruction.Saturate) {
1417 case TGSI_SAT_NONE:
1418 for (i = 0; i < QUAD_SIZE; i++)
1419 if (execmask & (1 << i))
1420 dst->i[i] = chan->i[i];
1421 break;
1422
1423 case TGSI_SAT_ZERO_ONE:
1424 for (i = 0; i < QUAD_SIZE; i++)
1425 if (execmask & (1 << i)) {
1426 if (chan->f[i] < 0.0f)
1427 dst->f[i] = 0.0f;
1428 else if (chan->f[i] > 1.0f)
1429 dst->f[i] = 1.0f;
1430 else
1431 dst->i[i] = chan->i[i];
1432 }
1433 break;
1434
1435 case TGSI_SAT_MINUS_PLUS_ONE:
1436 for (i = 0; i < QUAD_SIZE; i++)
1437 if (execmask & (1 << i)) {
1438 if (chan->f[i] < -1.0f)
1439 dst->f[i] = -1.0f;
1440 else if (chan->f[i] > 1.0f)
1441 dst->f[i] = 1.0f;
1442 else
1443 dst->i[i] = chan->i[i];
1444 }
1445 break;
1446
1447 default:
1448 assert( 0 );
1449 }
1450
1451 if (inst->InstructionExtNv.CondDstUpdate) {
1452 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1453 uint shift;
1454 uint mask;
1455
1456 /* Only CC0 supported.
1457 */
1458 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1459
1460 switch (chan_index) {
1461 case CHAN_X:
1462 shift = TGSI_EXEC_CC_X_SHIFT;
1463 mask = ~TGSI_EXEC_CC_X_MASK;
1464 break;
1465 case CHAN_Y:
1466 shift = TGSI_EXEC_CC_Y_SHIFT;
1467 mask = ~TGSI_EXEC_CC_Y_MASK;
1468 break;
1469 case CHAN_Z:
1470 shift = TGSI_EXEC_CC_Z_SHIFT;
1471 mask = ~TGSI_EXEC_CC_Z_MASK;
1472 break;
1473 case CHAN_W:
1474 shift = TGSI_EXEC_CC_W_SHIFT;
1475 mask = ~TGSI_EXEC_CC_W_MASK;
1476 break;
1477 default:
1478 assert( 0 );
1479 return;
1480 }
1481
1482 for (i = 0; i < QUAD_SIZE; i++)
1483 if (execmask & (1 << i)) {
1484 cc->u[i] &= mask;
1485 if (dst->f[i] < 0.0f)
1486 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1487 else if (dst->f[i] > 0.0f)
1488 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1489 else if (dst->f[i] == 0.0f)
1490 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1491 else
1492 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1493 }
1494 }
1495 }
1496
1497 #define FETCH(VAL,INDEX,CHAN)\
1498 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1499
1500 #define STORE(VAL,INDEX,CHAN)\
1501 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1502
1503
1504 /**
1505 * Execute ARB-style KIL which is predicated by a src register.
1506 * Kill fragment if any of the four values is less than zero.
1507 */
1508 static void
1509 exec_kil(struct tgsi_exec_machine *mach,
1510 const struct tgsi_full_instruction *inst)
1511 {
1512 uint uniquemask;
1513 uint chan_index;
1514 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1515 union tgsi_exec_channel r[1];
1516
1517 /* This mask stores component bits that were already tested. Note that
1518 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1519 * tested. */
1520 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1521
1522 for (chan_index = 0; chan_index < 4; chan_index++)
1523 {
1524 uint swizzle;
1525 uint i;
1526
1527 /* unswizzle channel */
1528 swizzle = tgsi_util_get_full_src_register_extswizzle (
1529 &inst->FullSrcRegisters[0],
1530 chan_index);
1531
1532 /* check if the component has not been already tested */
1533 if (uniquemask & (1 << swizzle))
1534 continue;
1535 uniquemask |= 1 << swizzle;
1536
1537 FETCH(&r[0], 0, chan_index);
1538 for (i = 0; i < 4; i++)
1539 if (r[0].f[i] < 0.0f)
1540 kilmask |= 1 << i;
1541 }
1542
1543 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1544 }
1545
1546 /**
1547 * Execute NVIDIA-style KIL which is predicated by a condition code.
1548 * Kill fragment if the condition code is TRUE.
1549 */
1550 static void
1551 exec_kilp(struct tgsi_exec_machine *mach,
1552 const struct tgsi_full_instruction *inst)
1553 {
1554 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1555
1556 if (inst->InstructionExtNv.CondFlowEnable) {
1557 uint swizzle[4];
1558 uint chan_index;
1559
1560 kilmask = 0x0;
1561
1562 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1563 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1564 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1565 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1566
1567 for (chan_index = 0; chan_index < 4; chan_index++)
1568 {
1569 uint i;
1570
1571 for (i = 0; i < 4; i++) {
1572 /* TODO: evaluate the condition code */
1573 if (0)
1574 kilmask |= 1 << i;
1575 }
1576 }
1577 }
1578 else {
1579 /* "unconditional" kil */
1580 kilmask = mach->ExecMask;
1581 }
1582 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1583 }
1584
1585
1586 /*
1587 * Fetch a four texture samples using STR texture coordinates.
1588 */
1589 static void
1590 fetch_texel( struct tgsi_sampler *sampler,
1591 const union tgsi_exec_channel *s,
1592 const union tgsi_exec_channel *t,
1593 const union tgsi_exec_channel *p,
1594 float lodbias, /* XXX should be float[4] */
1595 union tgsi_exec_channel *r,
1596 union tgsi_exec_channel *g,
1597 union tgsi_exec_channel *b,
1598 union tgsi_exec_channel *a )
1599 {
1600 uint j;
1601 float rgba[NUM_CHANNELS][QUAD_SIZE];
1602
1603 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1604
1605 for (j = 0; j < 4; j++) {
1606 r->f[j] = rgba[0][j];
1607 g->f[j] = rgba[1][j];
1608 b->f[j] = rgba[2][j];
1609 a->f[j] = rgba[3][j];
1610 }
1611 }
1612
1613
1614 static void
1615 exec_tex(struct tgsi_exec_machine *mach,
1616 const struct tgsi_full_instruction *inst,
1617 boolean biasLod,
1618 boolean projected)
1619 {
1620 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1621 union tgsi_exec_channel r[4];
1622 uint chan_index;
1623 float lodBias;
1624
1625 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1626
1627 switch (inst->InstructionExtTexture.Texture) {
1628 case TGSI_TEXTURE_1D:
1629 case TGSI_TEXTURE_SHADOW1D:
1630
1631 FETCH(&r[0], 0, CHAN_X);
1632
1633 if (projected) {
1634 FETCH(&r[1], 0, CHAN_W);
1635 micro_div( &r[0], &r[0], &r[1] );
1636 }
1637
1638 if (biasLod) {
1639 FETCH(&r[1], 0, CHAN_W);
1640 lodBias = r[2].f[0];
1641 }
1642 else
1643 lodBias = 0.0;
1644
1645 fetch_texel(mach->Samplers[unit],
1646 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1647 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1648 break;
1649
1650 case TGSI_TEXTURE_2D:
1651 case TGSI_TEXTURE_RECT:
1652 case TGSI_TEXTURE_SHADOW2D:
1653 case TGSI_TEXTURE_SHADOWRECT:
1654
1655 FETCH(&r[0], 0, CHAN_X);
1656 FETCH(&r[1], 0, CHAN_Y);
1657 FETCH(&r[2], 0, CHAN_Z);
1658
1659 if (projected) {
1660 FETCH(&r[3], 0, CHAN_W);
1661 micro_div( &r[0], &r[0], &r[3] );
1662 micro_div( &r[1], &r[1], &r[3] );
1663 micro_div( &r[2], &r[2], &r[3] );
1664 }
1665
1666 if (biasLod) {
1667 FETCH(&r[3], 0, CHAN_W);
1668 lodBias = r[3].f[0];
1669 }
1670 else
1671 lodBias = 0.0;
1672
1673 fetch_texel(mach->Samplers[unit],
1674 &r[0], &r[1], &r[2], lodBias, /* inputs */
1675 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1676 break;
1677
1678 case TGSI_TEXTURE_3D:
1679 case TGSI_TEXTURE_CUBE:
1680
1681 FETCH(&r[0], 0, CHAN_X);
1682 FETCH(&r[1], 0, CHAN_Y);
1683 FETCH(&r[2], 0, CHAN_Z);
1684
1685 if (projected) {
1686 FETCH(&r[3], 0, CHAN_W);
1687 micro_div( &r[0], &r[0], &r[3] );
1688 micro_div( &r[1], &r[1], &r[3] );
1689 micro_div( &r[2], &r[2], &r[3] );
1690 }
1691
1692 if (biasLod) {
1693 FETCH(&r[3], 0, CHAN_W);
1694 lodBias = r[3].f[0];
1695 }
1696 else
1697 lodBias = 0.0;
1698
1699 fetch_texel(mach->Samplers[unit],
1700 &r[0], &r[1], &r[2], lodBias,
1701 &r[0], &r[1], &r[2], &r[3]);
1702 break;
1703
1704 default:
1705 assert (0);
1706 }
1707
1708 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1709 STORE( &r[chan_index], 0, chan_index );
1710 }
1711 }
1712
1713
1714 /**
1715 * Evaluate a constant-valued coefficient at the position of the
1716 * current quad.
1717 */
1718 static void
1719 eval_constant_coef(
1720 struct tgsi_exec_machine *mach,
1721 unsigned attrib,
1722 unsigned chan )
1723 {
1724 unsigned i;
1725
1726 for( i = 0; i < QUAD_SIZE; i++ ) {
1727 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1728 }
1729 }
1730
1731 /**
1732 * Evaluate a linear-valued coefficient at the position of the
1733 * current quad.
1734 */
1735 static void
1736 eval_linear_coef(
1737 struct tgsi_exec_machine *mach,
1738 unsigned attrib,
1739 unsigned chan )
1740 {
1741 const float x = mach->QuadPos.xyzw[0].f[0];
1742 const float y = mach->QuadPos.xyzw[1].f[0];
1743 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1744 const float dady = mach->InterpCoefs[attrib].dady[chan];
1745 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1746 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1747 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1748 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1749 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1750 }
1751
1752 /**
1753 * Evaluate a perspective-valued coefficient at the position of the
1754 * current quad.
1755 */
1756 static void
1757 eval_perspective_coef(
1758 struct tgsi_exec_machine *mach,
1759 unsigned attrib,
1760 unsigned chan )
1761 {
1762 const float x = mach->QuadPos.xyzw[0].f[0];
1763 const float y = mach->QuadPos.xyzw[1].f[0];
1764 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1765 const float dady = mach->InterpCoefs[attrib].dady[chan];
1766 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1767 const float *w = mach->QuadPos.xyzw[3].f;
1768 /* divide by W here */
1769 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1770 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1771 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1772 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1773 }
1774
1775
1776 typedef void (* eval_coef_func)(
1777 struct tgsi_exec_machine *mach,
1778 unsigned attrib,
1779 unsigned chan );
1780
1781 static void
1782 exec_declaration(
1783 struct tgsi_exec_machine *mach,
1784 const struct tgsi_full_declaration *decl )
1785 {
1786 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1787 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1788 unsigned first, last, mask;
1789 eval_coef_func eval;
1790
1791 first = decl->DeclarationRange.First;
1792 last = decl->DeclarationRange.Last;
1793 mask = decl->Declaration.UsageMask;
1794
1795 switch( decl->Declaration.Interpolate ) {
1796 case TGSI_INTERPOLATE_CONSTANT:
1797 eval = eval_constant_coef;
1798 break;
1799
1800 case TGSI_INTERPOLATE_LINEAR:
1801 eval = eval_linear_coef;
1802 break;
1803
1804 case TGSI_INTERPOLATE_PERSPECTIVE:
1805 eval = eval_perspective_coef;
1806 break;
1807
1808 default:
1809 eval = NULL;
1810 assert( 0 );
1811 }
1812
1813 if( mask == TGSI_WRITEMASK_XYZW ) {
1814 unsigned i, j;
1815
1816 for( i = first; i <= last; i++ ) {
1817 for( j = 0; j < NUM_CHANNELS; j++ ) {
1818 eval( mach, i, j );
1819 }
1820 }
1821 }
1822 else {
1823 unsigned i, j;
1824
1825 for( j = 0; j < NUM_CHANNELS; j++ ) {
1826 if( mask & (1 << j) ) {
1827 for( i = first; i <= last; i++ ) {
1828 eval( mach, i, j );
1829 }
1830 }
1831 }
1832 }
1833 }
1834 }
1835 }
1836
1837 static void
1838 exec_instruction(
1839 struct tgsi_exec_machine *mach,
1840 const struct tgsi_full_instruction *inst,
1841 int *pc )
1842 {
1843 uint chan_index;
1844 union tgsi_exec_channel r[8];
1845
1846 (*pc)++;
1847
1848 switch (inst->Instruction.Opcode) {
1849 case TGSI_OPCODE_ARL:
1850 /* TGSI_OPCODE_FLOOR */
1851 /* TGSI_OPCODE_FLR */
1852 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1853 FETCH( &r[0], 0, chan_index );
1854 micro_flr( &r[0], &r[0] );
1855 STORE( &r[0], 0, chan_index );
1856 }
1857 break;
1858
1859 case TGSI_OPCODE_MOV:
1860 case TGSI_OPCODE_SWZ:
1861 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1862 FETCH( &r[0], 0, chan_index );
1863 STORE( &r[0], 0, chan_index );
1864 }
1865 break;
1866
1867 case TGSI_OPCODE_LIT:
1868 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1869 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1870 }
1871
1872 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1873 FETCH( &r[0], 0, CHAN_X );
1874 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1875 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1876 STORE( &r[0], 0, CHAN_Y );
1877 }
1878
1879 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1880 FETCH( &r[1], 0, CHAN_Y );
1881 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1882
1883 FETCH( &r[2], 0, CHAN_W );
1884 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1885 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1886 micro_pow( &r[1], &r[1], &r[2] );
1887 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1888 STORE( &r[0], 0, CHAN_Z );
1889 }
1890 }
1891
1892 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1893 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1894 }
1895 break;
1896
1897 case TGSI_OPCODE_RCP:
1898 /* TGSI_OPCODE_RECIP */
1899 FETCH( &r[0], 0, CHAN_X );
1900 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1901 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1902 STORE( &r[0], 0, chan_index );
1903 }
1904 break;
1905
1906 case TGSI_OPCODE_RSQ:
1907 /* TGSI_OPCODE_RECIPSQRT */
1908 FETCH( &r[0], 0, CHAN_X );
1909 micro_abs( &r[0], &r[0] );
1910 micro_sqrt( &r[0], &r[0] );
1911 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1912 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1913 STORE( &r[0], 0, chan_index );
1914 }
1915 break;
1916
1917 case TGSI_OPCODE_EXP:
1918 FETCH( &r[0], 0, CHAN_X );
1919 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1920 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1921 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1922 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1923 }
1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1925 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1926 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1927 }
1928 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1929 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1930 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1931 }
1932 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1933 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1934 }
1935 break;
1936
1937 case TGSI_OPCODE_LOG:
1938 FETCH( &r[0], 0, CHAN_X );
1939 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1940 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1941 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1942 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1943 STORE( &r[0], 0, CHAN_X );
1944 }
1945 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1946 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1947 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1948 STORE( &r[0], 0, CHAN_Y );
1949 }
1950 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1951 STORE( &r[1], 0, CHAN_Z );
1952 }
1953 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1954 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1955 }
1956 break;
1957
1958 case TGSI_OPCODE_MUL:
1959 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1960 {
1961 FETCH(&r[0], 0, chan_index);
1962 FETCH(&r[1], 1, chan_index);
1963
1964 micro_mul( &r[0], &r[0], &r[1] );
1965
1966 STORE(&r[0], 0, chan_index);
1967 }
1968 break;
1969
1970 case TGSI_OPCODE_ADD:
1971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1972 FETCH( &r[0], 0, chan_index );
1973 FETCH( &r[1], 1, chan_index );
1974 micro_add( &r[0], &r[0], &r[1] );
1975 STORE( &r[0], 0, chan_index );
1976 }
1977 break;
1978
1979 case TGSI_OPCODE_DP3:
1980 /* TGSI_OPCODE_DOT3 */
1981 FETCH( &r[0], 0, CHAN_X );
1982 FETCH( &r[1], 1, CHAN_X );
1983 micro_mul( &r[0], &r[0], &r[1] );
1984
1985 FETCH( &r[1], 0, CHAN_Y );
1986 FETCH( &r[2], 1, CHAN_Y );
1987 micro_mul( &r[1], &r[1], &r[2] );
1988 micro_add( &r[0], &r[0], &r[1] );
1989
1990 FETCH( &r[1], 0, CHAN_Z );
1991 FETCH( &r[2], 1, CHAN_Z );
1992 micro_mul( &r[1], &r[1], &r[2] );
1993 micro_add( &r[0], &r[0], &r[1] );
1994
1995 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1996 STORE( &r[0], 0, chan_index );
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_DP4:
2001 /* TGSI_OPCODE_DOT4 */
2002 FETCH(&r[0], 0, CHAN_X);
2003 FETCH(&r[1], 1, CHAN_X);
2004
2005 micro_mul( &r[0], &r[0], &r[1] );
2006
2007 FETCH(&r[1], 0, CHAN_Y);
2008 FETCH(&r[2], 1, CHAN_Y);
2009
2010 micro_mul( &r[1], &r[1], &r[2] );
2011 micro_add( &r[0], &r[0], &r[1] );
2012
2013 FETCH(&r[1], 0, CHAN_Z);
2014 FETCH(&r[2], 1, CHAN_Z);
2015
2016 micro_mul( &r[1], &r[1], &r[2] );
2017 micro_add( &r[0], &r[0], &r[1] );
2018
2019 FETCH(&r[1], 0, CHAN_W);
2020 FETCH(&r[2], 1, CHAN_W);
2021
2022 micro_mul( &r[1], &r[1], &r[2] );
2023 micro_add( &r[0], &r[0], &r[1] );
2024
2025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2026 STORE( &r[0], 0, chan_index );
2027 }
2028 break;
2029
2030 case TGSI_OPCODE_DST:
2031 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2032 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2033 }
2034
2035 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2036 FETCH( &r[0], 0, CHAN_Y );
2037 FETCH( &r[1], 1, CHAN_Y);
2038 micro_mul( &r[0], &r[0], &r[1] );
2039 STORE( &r[0], 0, CHAN_Y );
2040 }
2041
2042 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2043 FETCH( &r[0], 0, CHAN_Z );
2044 STORE( &r[0], 0, CHAN_Z );
2045 }
2046
2047 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2048 FETCH( &r[0], 1, CHAN_W );
2049 STORE( &r[0], 0, CHAN_W );
2050 }
2051 break;
2052
2053 case TGSI_OPCODE_MIN:
2054 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2055 FETCH(&r[0], 0, chan_index);
2056 FETCH(&r[1], 1, chan_index);
2057
2058 /* XXX use micro_min()?? */
2059 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2060
2061 STORE(&r[0], 0, chan_index);
2062 }
2063 break;
2064
2065 case TGSI_OPCODE_MAX:
2066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2067 FETCH(&r[0], 0, chan_index);
2068 FETCH(&r[1], 1, chan_index);
2069
2070 /* XXX use micro_max()?? */
2071 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2072
2073 STORE(&r[0], 0, chan_index );
2074 }
2075 break;
2076
2077 case TGSI_OPCODE_SLT:
2078 /* TGSI_OPCODE_SETLT */
2079 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2080 FETCH( &r[0], 0, chan_index );
2081 FETCH( &r[1], 1, chan_index );
2082 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2083 STORE( &r[0], 0, chan_index );
2084 }
2085 break;
2086
2087 case TGSI_OPCODE_SGE:
2088 /* TGSI_OPCODE_SETGE */
2089 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2090 FETCH( &r[0], 0, chan_index );
2091 FETCH( &r[1], 1, chan_index );
2092 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2093 STORE( &r[0], 0, chan_index );
2094 }
2095 break;
2096
2097 case TGSI_OPCODE_MAD:
2098 /* TGSI_OPCODE_MADD */
2099 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2100 FETCH( &r[0], 0, chan_index );
2101 FETCH( &r[1], 1, chan_index );
2102 micro_mul( &r[0], &r[0], &r[1] );
2103 FETCH( &r[1], 2, chan_index );
2104 micro_add( &r[0], &r[0], &r[1] );
2105 STORE( &r[0], 0, chan_index );
2106 }
2107 break;
2108
2109 case TGSI_OPCODE_SUB:
2110 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2111 FETCH(&r[0], 0, chan_index);
2112 FETCH(&r[1], 1, chan_index);
2113
2114 micro_sub( &r[0], &r[0], &r[1] );
2115
2116 STORE(&r[0], 0, chan_index);
2117 }
2118 break;
2119
2120 case TGSI_OPCODE_LERP:
2121 /* TGSI_OPCODE_LRP */
2122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2123 FETCH(&r[0], 0, chan_index);
2124 FETCH(&r[1], 1, chan_index);
2125 FETCH(&r[2], 2, chan_index);
2126
2127 micro_sub( &r[1], &r[1], &r[2] );
2128 micro_mul( &r[0], &r[0], &r[1] );
2129 micro_add( &r[0], &r[0], &r[2] );
2130
2131 STORE(&r[0], 0, chan_index);
2132 }
2133 break;
2134
2135 case TGSI_OPCODE_CND:
2136 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2137 FETCH(&r[0], 0, chan_index);
2138 FETCH(&r[1], 1, chan_index);
2139 FETCH(&r[2], 2, chan_index);
2140 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2141 STORE(&r[0], 0, chan_index);
2142 }
2143 break;
2144
2145 case TGSI_OPCODE_CND0:
2146 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2147 FETCH(&r[0], 0, chan_index);
2148 FETCH(&r[1], 1, chan_index);
2149 FETCH(&r[2], 2, chan_index);
2150 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2151 STORE(&r[0], 0, chan_index);
2152 }
2153 break;
2154
2155 case TGSI_OPCODE_DOT2ADD:
2156 /* TGSI_OPCODE_DP2A */
2157 FETCH( &r[0], 0, CHAN_X );
2158 FETCH( &r[1], 1, CHAN_X );
2159 micro_mul( &r[0], &r[0], &r[1] );
2160
2161 FETCH( &r[1], 0, CHAN_Y );
2162 FETCH( &r[2], 1, CHAN_Y );
2163 micro_mul( &r[1], &r[1], &r[2] );
2164 micro_add( &r[0], &r[0], &r[1] );
2165
2166 FETCH( &r[2], 2, CHAN_X );
2167 micro_add( &r[0], &r[0], &r[2] );
2168
2169 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2170 STORE( &r[0], 0, chan_index );
2171 }
2172 break;
2173
2174 case TGSI_OPCODE_INDEX:
2175 /* XXX: considered for removal */
2176 assert (0);
2177 break;
2178
2179 case TGSI_OPCODE_NEGATE:
2180 /* XXX: considered for removal */
2181 assert (0);
2182 break;
2183
2184 case TGSI_OPCODE_FRAC:
2185 /* TGSI_OPCODE_FRC */
2186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2187 FETCH( &r[0], 0, chan_index );
2188 micro_frc( &r[0], &r[0] );
2189 STORE( &r[0], 0, chan_index );
2190 }
2191 break;
2192
2193 case TGSI_OPCODE_CLAMP:
2194 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2195 FETCH(&r[0], 0, chan_index);
2196 FETCH(&r[1], 1, chan_index);
2197 micro_max(&r[0], &r[0], &r[1]);
2198 FETCH(&r[1], 2, chan_index);
2199 micro_min(&r[0], &r[0], &r[1]);
2200 STORE(&r[0], 0, chan_index);
2201 }
2202 break;
2203
2204 case TGSI_OPCODE_ROUND:
2205 case TGSI_OPCODE_ARR:
2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207 FETCH( &r[0], 0, chan_index );
2208 micro_rnd( &r[0], &r[0] );
2209 STORE( &r[0], 0, chan_index );
2210 }
2211 break;
2212
2213 case TGSI_OPCODE_EXPBASE2:
2214 /* TGSI_OPCODE_EX2 */
2215 FETCH(&r[0], 0, CHAN_X);
2216
2217 #if FAST_MATH
2218 micro_exp2( &r[0], &r[0] );
2219 #else
2220 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2221 #endif
2222
2223 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2224 STORE( &r[0], 0, chan_index );
2225 }
2226 break;
2227
2228 case TGSI_OPCODE_LOGBASE2:
2229 /* TGSI_OPCODE_LG2 */
2230 FETCH( &r[0], 0, CHAN_X );
2231 micro_lg2( &r[0], &r[0] );
2232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2233 STORE( &r[0], 0, chan_index );
2234 }
2235 break;
2236
2237 case TGSI_OPCODE_POWER:
2238 /* TGSI_OPCODE_POW */
2239 FETCH(&r[0], 0, CHAN_X);
2240 FETCH(&r[1], 1, CHAN_X);
2241
2242 micro_pow( &r[0], &r[0], &r[1] );
2243
2244 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2245 STORE( &r[0], 0, chan_index );
2246 }
2247 break;
2248
2249 case TGSI_OPCODE_CROSSPRODUCT:
2250 /* TGSI_OPCODE_XPD */
2251 FETCH(&r[0], 0, CHAN_Y);
2252 FETCH(&r[1], 1, CHAN_Z);
2253
2254 micro_mul( &r[2], &r[0], &r[1] );
2255
2256 FETCH(&r[3], 0, CHAN_Z);
2257 FETCH(&r[4], 1, CHAN_Y);
2258
2259 micro_mul( &r[5], &r[3], &r[4] );
2260 micro_sub( &r[2], &r[2], &r[5] );
2261
2262 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2263 STORE( &r[2], 0, CHAN_X );
2264 }
2265
2266 FETCH(&r[2], 1, CHAN_X);
2267
2268 micro_mul( &r[3], &r[3], &r[2] );
2269
2270 FETCH(&r[5], 0, CHAN_X);
2271
2272 micro_mul( &r[1], &r[1], &r[5] );
2273 micro_sub( &r[3], &r[3], &r[1] );
2274
2275 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2276 STORE( &r[3], 0, CHAN_Y );
2277 }
2278
2279 micro_mul( &r[5], &r[5], &r[4] );
2280 micro_mul( &r[0], &r[0], &r[2] );
2281 micro_sub( &r[5], &r[5], &r[0] );
2282
2283 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2284 STORE( &r[5], 0, CHAN_Z );
2285 }
2286
2287 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2288 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2289 }
2290 break;
2291
2292 case TGSI_OPCODE_MULTIPLYMATRIX:
2293 /* XXX: considered for removal */
2294 assert (0);
2295 break;
2296
2297 case TGSI_OPCODE_ABS:
2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2299 FETCH(&r[0], 0, chan_index);
2300
2301 micro_abs( &r[0], &r[0] );
2302
2303 STORE(&r[0], 0, chan_index);
2304 }
2305 break;
2306
2307 case TGSI_OPCODE_RCC:
2308 FETCH(&r[0], 0, CHAN_X);
2309 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2310 micro_float_clamp(&r[0], &r[0]);
2311 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2312 STORE(&r[0], 0, chan_index);
2313 }
2314 break;
2315
2316 case TGSI_OPCODE_DPH:
2317 FETCH(&r[0], 0, CHAN_X);
2318 FETCH(&r[1], 1, CHAN_X);
2319
2320 micro_mul( &r[0], &r[0], &r[1] );
2321
2322 FETCH(&r[1], 0, CHAN_Y);
2323 FETCH(&r[2], 1, CHAN_Y);
2324
2325 micro_mul( &r[1], &r[1], &r[2] );
2326 micro_add( &r[0], &r[0], &r[1] );
2327
2328 FETCH(&r[1], 0, CHAN_Z);
2329 FETCH(&r[2], 1, CHAN_Z);
2330
2331 micro_mul( &r[1], &r[1], &r[2] );
2332 micro_add( &r[0], &r[0], &r[1] );
2333
2334 FETCH(&r[1], 1, CHAN_W);
2335
2336 micro_add( &r[0], &r[0], &r[1] );
2337
2338 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2339 STORE( &r[0], 0, chan_index );
2340 }
2341 break;
2342
2343 case TGSI_OPCODE_COS:
2344 FETCH(&r[0], 0, CHAN_X);
2345
2346 micro_cos( &r[0], &r[0] );
2347
2348 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2349 STORE( &r[0], 0, chan_index );
2350 }
2351 break;
2352
2353 case TGSI_OPCODE_DDX:
2354 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2355 FETCH( &r[0], 0, chan_index );
2356 micro_ddx( &r[0], &r[0] );
2357 STORE( &r[0], 0, chan_index );
2358 }
2359 break;
2360
2361 case TGSI_OPCODE_DDY:
2362 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2363 FETCH( &r[0], 0, chan_index );
2364 micro_ddy( &r[0], &r[0] );
2365 STORE( &r[0], 0, chan_index );
2366 }
2367 break;
2368
2369 case TGSI_OPCODE_KILP:
2370 exec_kilp (mach, inst);
2371 break;
2372
2373 case TGSI_OPCODE_KIL:
2374 exec_kil (mach, inst);
2375 break;
2376
2377 case TGSI_OPCODE_PK2H:
2378 assert (0);
2379 break;
2380
2381 case TGSI_OPCODE_PK2US:
2382 assert (0);
2383 break;
2384
2385 case TGSI_OPCODE_PK4B:
2386 assert (0);
2387 break;
2388
2389 case TGSI_OPCODE_PK4UB:
2390 assert (0);
2391 break;
2392
2393 case TGSI_OPCODE_RFL:
2394 assert (0);
2395 break;
2396
2397 case TGSI_OPCODE_SEQ:
2398 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2399 FETCH( &r[0], 0, chan_index );
2400 FETCH( &r[1], 1, chan_index );
2401 micro_eq( &r[0], &r[0], &r[1],
2402 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2403 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2404 STORE( &r[0], 0, chan_index );
2405 }
2406 break;
2407
2408 case TGSI_OPCODE_SFL:
2409 assert (0);
2410 break;
2411
2412 case TGSI_OPCODE_SGT:
2413 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2414 FETCH( &r[0], 0, chan_index );
2415 FETCH( &r[1], 1, chan_index );
2416 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2417 STORE( &r[0], 0, chan_index );
2418 }
2419 break;
2420
2421 case TGSI_OPCODE_SIN:
2422 FETCH( &r[0], 0, CHAN_X );
2423 micro_sin( &r[0], &r[0] );
2424 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2425 STORE( &r[0], 0, chan_index );
2426 }
2427 break;
2428
2429 case TGSI_OPCODE_SLE:
2430 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2431 FETCH( &r[0], 0, chan_index );
2432 FETCH( &r[1], 1, chan_index );
2433 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2434 STORE( &r[0], 0, chan_index );
2435 }
2436 break;
2437
2438 case TGSI_OPCODE_SNE:
2439 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2440 FETCH( &r[0], 0, chan_index );
2441 FETCH( &r[1], 1, chan_index );
2442 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2443 STORE( &r[0], 0, chan_index );
2444 }
2445 break;
2446
2447 case TGSI_OPCODE_STR:
2448 assert (0);
2449 break;
2450
2451 case TGSI_OPCODE_TEX:
2452 /* simple texture lookup */
2453 /* src[0] = texcoord */
2454 /* src[1] = sampler unit */
2455 exec_tex(mach, inst, FALSE, FALSE);
2456 break;
2457
2458 case TGSI_OPCODE_TXB:
2459 /* Texture lookup with lod bias */
2460 /* src[0] = texcoord (src[0].w = LOD bias) */
2461 /* src[1] = sampler unit */
2462 exec_tex(mach, inst, TRUE, FALSE);
2463 break;
2464
2465 case TGSI_OPCODE_TXD:
2466 /* Texture lookup with explict partial derivatives */
2467 /* src[0] = texcoord */
2468 /* src[1] = d[strq]/dx */
2469 /* src[2] = d[strq]/dy */
2470 /* src[3] = sampler unit */
2471 assert (0);
2472 break;
2473
2474 case TGSI_OPCODE_TXL:
2475 /* Texture lookup with explit LOD */
2476 /* src[0] = texcoord (src[0].w = LOD) */
2477 /* src[1] = sampler unit */
2478 exec_tex(mach, inst, TRUE, FALSE);
2479 break;
2480
2481 case TGSI_OPCODE_TXP:
2482 /* Texture lookup with projection */
2483 /* src[0] = texcoord (src[0].w = projection) */
2484 /* src[1] = sampler unit */
2485 exec_tex(mach, inst, FALSE, TRUE);
2486 break;
2487
2488 case TGSI_OPCODE_UP2H:
2489 assert (0);
2490 break;
2491
2492 case TGSI_OPCODE_UP2US:
2493 assert (0);
2494 break;
2495
2496 case TGSI_OPCODE_UP4B:
2497 assert (0);
2498 break;
2499
2500 case TGSI_OPCODE_UP4UB:
2501 assert (0);
2502 break;
2503
2504 case TGSI_OPCODE_X2D:
2505 assert (0);
2506 break;
2507
2508 case TGSI_OPCODE_ARA:
2509 assert (0);
2510 break;
2511
2512 case TGSI_OPCODE_BRA:
2513 assert (0);
2514 break;
2515
2516 case TGSI_OPCODE_CAL:
2517 /* skip the call if no execution channels are enabled */
2518 if (mach->ExecMask) {
2519 /* do the call */
2520
2521 /* push the Cond, Loop, Cont stacks */
2522 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2523 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2524 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2525 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2526 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2527 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2528
2529 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2530 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2531
2532 /* note that PC was already incremented above */
2533 mach->CallStack[mach->CallStackTop++] = *pc;
2534 *pc = inst->InstructionExtLabel.Label;
2535 }
2536 break;
2537
2538 case TGSI_OPCODE_RET:
2539 mach->FuncMask &= ~mach->ExecMask;
2540 UPDATE_EXEC_MASK(mach);
2541
2542 if (mach->FuncMask == 0x0) {
2543 /* really return now (otherwise, keep executing */
2544
2545 if (mach->CallStackTop == 0) {
2546 /* returning from main() */
2547 *pc = -1;
2548 return;
2549 }
2550 *pc = mach->CallStack[--mach->CallStackTop];
2551
2552 /* pop the Cond, Loop, Cont stacks */
2553 assert(mach->CondStackTop > 0);
2554 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2555 assert(mach->LoopStackTop > 0);
2556 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2557 assert(mach->ContStackTop > 0);
2558 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2559 assert(mach->FuncStackTop > 0);
2560 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2561
2562 UPDATE_EXEC_MASK(mach);
2563 }
2564 break;
2565
2566 case TGSI_OPCODE_SSG:
2567 /* TGSI_OPCODE_SGN */
2568 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2569 FETCH( &r[0], 0, chan_index );
2570 micro_sgn( &r[0], &r[0] );
2571 STORE( &r[0], 0, chan_index );
2572 }
2573 break;
2574
2575 case TGSI_OPCODE_CMP:
2576 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2577 FETCH(&r[0], 0, chan_index);
2578 FETCH(&r[1], 1, chan_index);
2579 FETCH(&r[2], 2, chan_index);
2580
2581 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2582
2583 STORE(&r[0], 0, chan_index);
2584 }
2585 break;
2586
2587 case TGSI_OPCODE_SCS:
2588 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2589 FETCH( &r[0], 0, CHAN_X );
2590 }
2591 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2592 micro_cos( &r[1], &r[0] );
2593 STORE( &r[1], 0, CHAN_X );
2594 }
2595 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2596 micro_sin( &r[1], &r[0] );
2597 STORE( &r[1], 0, CHAN_Y );
2598 }
2599 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2600 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2601 }
2602 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2603 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2604 }
2605 break;
2606
2607 case TGSI_OPCODE_NRM:
2608 /* 3-component vector normalize */
2609 {
2610 union tgsi_exec_channel tmp, dot;
2611
2612 /* tmp = dp3(src0, src0): */
2613 FETCH( &r[0], 0, CHAN_X );
2614 micro_mul( &tmp, &r[0], &r[0] );
2615
2616 FETCH( &r[1], 0, CHAN_Y );
2617 micro_mul( &dot, &r[1], &r[1] );
2618 micro_add( &tmp, &tmp, &dot );
2619
2620 FETCH( &r[2], 0, CHAN_Z );
2621 micro_mul( &dot, &r[2], &r[2] );
2622 micro_add( &tmp, &tmp, &dot );
2623
2624 /* tmp = 1 / sqrt(tmp) */
2625 micro_sqrt( &tmp, &tmp );
2626 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2627
2628 /* note: w channel is undefined */
2629 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2630 /* chan = chan * tmp */
2631 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2632 STORE( &r[chan_index], 0, chan_index );
2633 }
2634 }
2635 break;
2636
2637 case TGSI_OPCODE_NRM4:
2638 /* 4-component vector normalize */
2639 {
2640 union tgsi_exec_channel tmp, dot;
2641
2642 /* tmp = dp4(src0, src0): */
2643 FETCH( &r[0], 0, CHAN_X );
2644 micro_mul( &tmp, &r[0], &r[0] );
2645
2646 FETCH( &r[1], 0, CHAN_Y );
2647 micro_mul( &dot, &r[1], &r[1] );
2648 micro_add( &tmp, &tmp, &dot );
2649
2650 FETCH( &r[2], 0, CHAN_Z );
2651 micro_mul( &dot, &r[2], &r[2] );
2652 micro_add( &tmp, &tmp, &dot );
2653
2654 FETCH( &r[3], 0, CHAN_W );
2655 micro_mul( &dot, &r[3], &r[3] );
2656 micro_add( &tmp, &tmp, &dot );
2657
2658 /* tmp = 1 / sqrt(tmp) */
2659 micro_sqrt( &tmp, &tmp );
2660 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2661
2662 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2663 /* chan = chan * tmp */
2664 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2665 STORE( &r[chan_index], 0, chan_index );
2666 }
2667 }
2668 break;
2669
2670 case TGSI_OPCODE_DIV:
2671 assert( 0 );
2672 break;
2673
2674 case TGSI_OPCODE_DP2:
2675 FETCH( &r[0], 0, CHAN_X );
2676 FETCH( &r[1], 1, CHAN_X );
2677 micro_mul( &r[0], &r[0], &r[1] );
2678
2679 FETCH( &r[1], 0, CHAN_Y );
2680 FETCH( &r[2], 1, CHAN_Y );
2681 micro_mul( &r[1], &r[1], &r[2] );
2682 micro_add( &r[0], &r[0], &r[1] );
2683
2684 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2685 STORE( &r[0], 0, chan_index );
2686 }
2687 break;
2688
2689 case TGSI_OPCODE_IF:
2690 /* push CondMask */
2691 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2692 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2693 FETCH( &r[0], 0, CHAN_X );
2694 /* update CondMask */
2695 if( ! r[0].u[0] ) {
2696 mach->CondMask &= ~0x1;
2697 }
2698 if( ! r[0].u[1] ) {
2699 mach->CondMask &= ~0x2;
2700 }
2701 if( ! r[0].u[2] ) {
2702 mach->CondMask &= ~0x4;
2703 }
2704 if( ! r[0].u[3] ) {
2705 mach->CondMask &= ~0x8;
2706 }
2707 UPDATE_EXEC_MASK(mach);
2708 /* Todo: If CondMask==0, jump to ELSE */
2709 break;
2710
2711 case TGSI_OPCODE_ELSE:
2712 /* invert CondMask wrt previous mask */
2713 {
2714 uint prevMask;
2715 assert(mach->CondStackTop > 0);
2716 prevMask = mach->CondStack[mach->CondStackTop - 1];
2717 mach->CondMask = ~mach->CondMask & prevMask;
2718 UPDATE_EXEC_MASK(mach);
2719 /* Todo: If CondMask==0, jump to ENDIF */
2720 }
2721 break;
2722
2723 case TGSI_OPCODE_ENDIF:
2724 /* pop CondMask */
2725 assert(mach->CondStackTop > 0);
2726 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2727 UPDATE_EXEC_MASK(mach);
2728 break;
2729
2730 case TGSI_OPCODE_END:
2731 /* halt execution */
2732 *pc = -1;
2733 break;
2734
2735 case TGSI_OPCODE_REP:
2736 assert (0);
2737 break;
2738
2739 case TGSI_OPCODE_ENDREP:
2740 assert (0);
2741 break;
2742
2743 case TGSI_OPCODE_PUSHA:
2744 assert (0);
2745 break;
2746
2747 case TGSI_OPCODE_POPA:
2748 assert (0);
2749 break;
2750
2751 case TGSI_OPCODE_CEIL:
2752 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2753 FETCH( &r[0], 0, chan_index );
2754 micro_ceil( &r[0], &r[0] );
2755 STORE( &r[0], 0, chan_index );
2756 }
2757 break;
2758
2759 case TGSI_OPCODE_I2F:
2760 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2761 FETCH( &r[0], 0, chan_index );
2762 micro_i2f( &r[0], &r[0] );
2763 STORE( &r[0], 0, chan_index );
2764 }
2765 break;
2766
2767 case TGSI_OPCODE_NOT:
2768 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2769 FETCH( &r[0], 0, chan_index );
2770 micro_not( &r[0], &r[0] );
2771 STORE( &r[0], 0, chan_index );
2772 }
2773 break;
2774
2775 case TGSI_OPCODE_TRUNC:
2776 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2777 FETCH( &r[0], 0, chan_index );
2778 micro_trunc( &r[0], &r[0] );
2779 STORE( &r[0], 0, chan_index );
2780 }
2781 break;
2782
2783 case TGSI_OPCODE_SHL:
2784 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2785 FETCH( &r[0], 0, chan_index );
2786 FETCH( &r[1], 1, chan_index );
2787 micro_shl( &r[0], &r[0], &r[1] );
2788 STORE( &r[0], 0, chan_index );
2789 }
2790 break;
2791
2792 case TGSI_OPCODE_SHR:
2793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2794 FETCH( &r[0], 0, chan_index );
2795 FETCH( &r[1], 1, chan_index );
2796 micro_ishr( &r[0], &r[0], &r[1] );
2797 STORE( &r[0], 0, chan_index );
2798 }
2799 break;
2800
2801 case TGSI_OPCODE_AND:
2802 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2803 FETCH( &r[0], 0, chan_index );
2804 FETCH( &r[1], 1, chan_index );
2805 micro_and( &r[0], &r[0], &r[1] );
2806 STORE( &r[0], 0, chan_index );
2807 }
2808 break;
2809
2810 case TGSI_OPCODE_OR:
2811 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2812 FETCH( &r[0], 0, chan_index );
2813 FETCH( &r[1], 1, chan_index );
2814 micro_or( &r[0], &r[0], &r[1] );
2815 STORE( &r[0], 0, chan_index );
2816 }
2817 break;
2818
2819 case TGSI_OPCODE_MOD:
2820 assert (0);
2821 break;
2822
2823 case TGSI_OPCODE_XOR:
2824 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2825 FETCH( &r[0], 0, chan_index );
2826 FETCH( &r[1], 1, chan_index );
2827 micro_xor( &r[0], &r[0], &r[1] );
2828 STORE( &r[0], 0, chan_index );
2829 }
2830 break;
2831
2832 case TGSI_OPCODE_SAD:
2833 assert (0);
2834 break;
2835
2836 case TGSI_OPCODE_TXF:
2837 assert (0);
2838 break;
2839
2840 case TGSI_OPCODE_TXQ:
2841 assert (0);
2842 break;
2843
2844 case TGSI_OPCODE_EMIT:
2845 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2846 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2847 break;
2848
2849 case TGSI_OPCODE_ENDPRIM:
2850 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2851 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2852 break;
2853
2854 case TGSI_OPCODE_LOOP:
2855 /* fall-through (for now) */
2856 case TGSI_OPCODE_BGNLOOP2:
2857 /* push LoopMask and ContMasks */
2858 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2859 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2860 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2861 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2862 break;
2863
2864 case TGSI_OPCODE_ENDLOOP:
2865 /* fall-through (for now at least) */
2866 case TGSI_OPCODE_ENDLOOP2:
2867 /* Restore ContMask, but don't pop */
2868 assert(mach->ContStackTop > 0);
2869 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2870 UPDATE_EXEC_MASK(mach);
2871 if (mach->ExecMask) {
2872 /* repeat loop: jump to instruction just past BGNLOOP */
2873 *pc = inst->InstructionExtLabel.Label + 1;
2874 }
2875 else {
2876 /* exit loop: pop LoopMask */
2877 assert(mach->LoopStackTop > 0);
2878 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2879 /* pop ContMask */
2880 assert(mach->ContStackTop > 0);
2881 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2882 }
2883 UPDATE_EXEC_MASK(mach);
2884 break;
2885
2886 case TGSI_OPCODE_BRK:
2887 /* turn off loop channels for each enabled exec channel */
2888 mach->LoopMask &= ~mach->ExecMask;
2889 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2890 UPDATE_EXEC_MASK(mach);
2891 break;
2892
2893 case TGSI_OPCODE_CONT:
2894 /* turn off cont channels for each enabled exec channel */
2895 mach->ContMask &= ~mach->ExecMask;
2896 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2897 UPDATE_EXEC_MASK(mach);
2898 break;
2899
2900 case TGSI_OPCODE_BGNSUB:
2901 /* no-op */
2902 break;
2903
2904 case TGSI_OPCODE_ENDSUB:
2905 /* no-op */
2906 break;
2907
2908 case TGSI_OPCODE_NOISE1:
2909 assert( 0 );
2910 break;
2911
2912 case TGSI_OPCODE_NOISE2:
2913 assert( 0 );
2914 break;
2915
2916 case TGSI_OPCODE_NOISE3:
2917 assert( 0 );
2918 break;
2919
2920 case TGSI_OPCODE_NOISE4:
2921 assert( 0 );
2922 break;
2923
2924 case TGSI_OPCODE_NOP:
2925 break;
2926
2927 default:
2928 assert( 0 );
2929 }
2930 }
2931
2932
2933 /**
2934 * Run TGSI interpreter.
2935 * \return bitmask of "alive" quad components
2936 */
2937 uint
2938 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2939 {
2940 uint i;
2941 int pc = 0;
2942
2943 mach->CondMask = 0xf;
2944 mach->LoopMask = 0xf;
2945 mach->ContMask = 0xf;
2946 mach->FuncMask = 0xf;
2947 mach->ExecMask = 0xf;
2948
2949 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2950 assert(mach->CondStackTop == 0);
2951 assert(mach->LoopStackTop == 0);
2952 assert(mach->ContStackTop == 0);
2953 assert(mach->CallStackTop == 0);
2954
2955 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2956 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2957
2958 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2959 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2960 mach->Primitives[0] = 0;
2961 }
2962
2963 for (i = 0; i < QUAD_SIZE; i++) {
2964 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2965 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2966 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2967 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2968 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2969 }
2970
2971 /* execute declarations (interpolants) */
2972 for (i = 0; i < mach->NumDeclarations; i++) {
2973 exec_declaration( mach, mach->Declarations+i );
2974 }
2975
2976 /* execute instructions, until pc is set to -1 */
2977 while (pc != -1) {
2978 assert(pc < (int) mach->NumInstructions);
2979 exec_instruction( mach, mach->Instructions + pc, &pc );
2980 }
2981
2982 #if 0
2983 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2984 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2985 /*
2986 * Scale back depth component.
2987 */
2988 for (i = 0; i < 4; i++)
2989 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2990 }
2991 #endif
2992
2993 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2994 }