tgsi: Implement CLAMP opcode.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 #if 0
324 static void
325 micro_iadd(
326 union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src0,
328 const union tgsi_exec_channel *src1 )
329 {
330 dst->i[0] = src0->i[0] + src1->i[0];
331 dst->i[1] = src0->i[1] + src1->i[1];
332 dst->i[2] = src0->i[2] + src1->i[2];
333 dst->i[3] = src0->i[3] + src1->i[3];
334 }
335 #endif
336
337 static void
338 micro_and(
339 union tgsi_exec_channel *dst,
340 const union tgsi_exec_channel *src0,
341 const union tgsi_exec_channel *src1 )
342 {
343 dst->u[0] = src0->u[0] & src1->u[0];
344 dst->u[1] = src0->u[1] & src1->u[1];
345 dst->u[2] = src0->u[2] & src1->u[2];
346 dst->u[3] = src0->u[3] & src1->u[3];
347 }
348
349 static void
350 micro_ceil(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = ceilf( src->f[0] );
355 dst->f[1] = ceilf( src->f[1] );
356 dst->f[2] = ceilf( src->f[2] );
357 dst->f[3] = ceilf( src->f[3] );
358 }
359
360 static void
361 micro_cos(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = cosf( src->f[0] );
366 dst->f[1] = cosf( src->f[1] );
367 dst->f[2] = cosf( src->f[2] );
368 dst->f[3] = cosf( src->f[3] );
369 }
370
371 static void
372 micro_ddx(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] =
377 dst->f[1] =
378 dst->f[2] =
379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380 }
381
382 static void
383 micro_ddy(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src )
386 {
387 dst->f[0] =
388 dst->f[1] =
389 dst->f[2] =
390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391 }
392
393 static void
394 micro_div(
395 union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src0,
397 const union tgsi_exec_channel *src1 )
398 {
399 if (src1->f[0] != 0) {
400 dst->f[0] = src0->f[0] / src1->f[0];
401 }
402 if (src1->f[1] != 0) {
403 dst->f[1] = src0->f[1] / src1->f[1];
404 }
405 if (src1->f[2] != 0) {
406 dst->f[2] = src0->f[2] / src1->f[2];
407 }
408 if (src1->f[3] != 0) {
409 dst->f[3] = src0->f[3] / src1->f[3];
410 }
411 }
412
413 #if 0
414 static void
415 micro_udiv(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1 )
419 {
420 dst->u[0] = src0->u[0] / src1->u[0];
421 dst->u[1] = src0->u[1] / src1->u[1];
422 dst->u[2] = src0->u[2] / src1->u[2];
423 dst->u[3] = src0->u[3] / src1->u[3];
424 }
425 #endif
426
427 static void
428 micro_eq(
429 union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src0,
431 const union tgsi_exec_channel *src1,
432 const union tgsi_exec_channel *src2,
433 const union tgsi_exec_channel *src3 )
434 {
435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439 }
440
441 #if 0
442 static void
443 micro_ieq(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1,
447 const union tgsi_exec_channel *src2,
448 const union tgsi_exec_channel *src3 )
449 {
450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454 }
455 #endif
456
457 static void
458 micro_exp2(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 #if FAST_MATH
463 dst->f[0] = util_fast_exp2( src->f[0] );
464 dst->f[1] = util_fast_exp2( src->f[1] );
465 dst->f[2] = util_fast_exp2( src->f[2] );
466 dst->f[3] = util_fast_exp2( src->f[3] );
467 #else
468 dst->f[0] = powf( 2.0f, src->f[0] );
469 dst->f[1] = powf( 2.0f, src->f[1] );
470 dst->f[2] = powf( 2.0f, src->f[2] );
471 dst->f[3] = powf( 2.0f, src->f[3] );
472 #endif
473 }
474
475 #if 0
476 static void
477 micro_f2ut(
478 union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src )
480 {
481 dst->u[0] = (uint) src->f[0];
482 dst->u[1] = (uint) src->f[1];
483 dst->u[2] = (uint) src->f[2];
484 dst->u[3] = (uint) src->f[3];
485 }
486 #endif
487
488 static void
489 micro_flr(
490 union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src )
492 {
493 dst->f[0] = floorf( src->f[0] );
494 dst->f[1] = floorf( src->f[1] );
495 dst->f[2] = floorf( src->f[2] );
496 dst->f[3] = floorf( src->f[3] );
497 }
498
499 static void
500 micro_frc(
501 union tgsi_exec_channel *dst,
502 const union tgsi_exec_channel *src )
503 {
504 dst->f[0] = src->f[0] - floorf( src->f[0] );
505 dst->f[1] = src->f[1] - floorf( src->f[1] );
506 dst->f[2] = src->f[2] - floorf( src->f[2] );
507 dst->f[3] = src->f[3] - floorf( src->f[3] );
508 }
509
510 static void
511 micro_i2f(
512 union tgsi_exec_channel *dst,
513 const union tgsi_exec_channel *src )
514 {
515 dst->f[0] = (float) src->i[0];
516 dst->f[1] = (float) src->i[1];
517 dst->f[2] = (float) src->i[2];
518 dst->f[3] = (float) src->i[3];
519 }
520
521 static void
522 micro_lg2(
523 union tgsi_exec_channel *dst,
524 const union tgsi_exec_channel *src )
525 {
526 #if FAST_MATH
527 dst->f[0] = util_fast_log2( src->f[0] );
528 dst->f[1] = util_fast_log2( src->f[1] );
529 dst->f[2] = util_fast_log2( src->f[2] );
530 dst->f[3] = util_fast_log2( src->f[3] );
531 #else
532 dst->f[0] = logf( src->f[0] ) * 1.442695f;
533 dst->f[1] = logf( src->f[1] ) * 1.442695f;
534 dst->f[2] = logf( src->f[2] ) * 1.442695f;
535 dst->f[3] = logf( src->f[3] ) * 1.442695f;
536 #endif
537 }
538
539 static void
540 micro_le(
541 union tgsi_exec_channel *dst,
542 const union tgsi_exec_channel *src0,
543 const union tgsi_exec_channel *src1,
544 const union tgsi_exec_channel *src2,
545 const union tgsi_exec_channel *src3 )
546 {
547 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
548 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
549 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
550 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
551 }
552
553 static void
554 micro_lt(
555 union tgsi_exec_channel *dst,
556 const union tgsi_exec_channel *src0,
557 const union tgsi_exec_channel *src1,
558 const union tgsi_exec_channel *src2,
559 const union tgsi_exec_channel *src3 )
560 {
561 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
562 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
563 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
564 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
565 }
566
567 #if 0
568 static void
569 micro_ilt(
570 union tgsi_exec_channel *dst,
571 const union tgsi_exec_channel *src0,
572 const union tgsi_exec_channel *src1,
573 const union tgsi_exec_channel *src2,
574 const union tgsi_exec_channel *src3 )
575 {
576 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
577 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
578 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
579 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
580 }
581 #endif
582
583 #if 0
584 static void
585 micro_ult(
586 union tgsi_exec_channel *dst,
587 const union tgsi_exec_channel *src0,
588 const union tgsi_exec_channel *src1,
589 const union tgsi_exec_channel *src2,
590 const union tgsi_exec_channel *src3 )
591 {
592 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
593 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
594 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
595 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
596 }
597 #endif
598
599 static void
600 micro_max(
601 union tgsi_exec_channel *dst,
602 const union tgsi_exec_channel *src0,
603 const union tgsi_exec_channel *src1 )
604 {
605 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
606 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
607 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
608 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
609 }
610
611 #if 0
612 static void
613 micro_imax(
614 union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src0,
616 const union tgsi_exec_channel *src1 )
617 {
618 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
619 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
620 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
621 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
622 }
623 #endif
624
625 #if 0
626 static void
627 micro_umax(
628 union tgsi_exec_channel *dst,
629 const union tgsi_exec_channel *src0,
630 const union tgsi_exec_channel *src1 )
631 {
632 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
633 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
634 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
635 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
636 }
637 #endif
638
639 static void
640 micro_min(
641 union tgsi_exec_channel *dst,
642 const union tgsi_exec_channel *src0,
643 const union tgsi_exec_channel *src1 )
644 {
645 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
646 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
647 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
648 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
649 }
650
651 #if 0
652 static void
653 micro_imin(
654 union tgsi_exec_channel *dst,
655 const union tgsi_exec_channel *src0,
656 const union tgsi_exec_channel *src1 )
657 {
658 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
659 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
660 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
661 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
662 }
663 #endif
664
665 #if 0
666 static void
667 micro_umin(
668 union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1 )
671 {
672 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
673 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
674 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
675 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
676 }
677 #endif
678
679 #if 0
680 static void
681 micro_umod(
682 union tgsi_exec_channel *dst,
683 const union tgsi_exec_channel *src0,
684 const union tgsi_exec_channel *src1 )
685 {
686 dst->u[0] = src0->u[0] % src1->u[0];
687 dst->u[1] = src0->u[1] % src1->u[1];
688 dst->u[2] = src0->u[2] % src1->u[2];
689 dst->u[3] = src0->u[3] % src1->u[3];
690 }
691 #endif
692
693 static void
694 micro_mul(
695 union tgsi_exec_channel *dst,
696 const union tgsi_exec_channel *src0,
697 const union tgsi_exec_channel *src1 )
698 {
699 dst->f[0] = src0->f[0] * src1->f[0];
700 dst->f[1] = src0->f[1] * src1->f[1];
701 dst->f[2] = src0->f[2] * src1->f[2];
702 dst->f[3] = src0->f[3] * src1->f[3];
703 }
704
705 #if 0
706 static void
707 micro_imul(
708 union tgsi_exec_channel *dst,
709 const union tgsi_exec_channel *src0,
710 const union tgsi_exec_channel *src1 )
711 {
712 dst->i[0] = src0->i[0] * src1->i[0];
713 dst->i[1] = src0->i[1] * src1->i[1];
714 dst->i[2] = src0->i[2] * src1->i[2];
715 dst->i[3] = src0->i[3] * src1->i[3];
716 }
717 #endif
718
719 #if 0
720 static void
721 micro_imul64(
722 union tgsi_exec_channel *dst0,
723 union tgsi_exec_channel *dst1,
724 const union tgsi_exec_channel *src0,
725 const union tgsi_exec_channel *src1 )
726 {
727 dst1->i[0] = src0->i[0] * src1->i[0];
728 dst1->i[1] = src0->i[1] * src1->i[1];
729 dst1->i[2] = src0->i[2] * src1->i[2];
730 dst1->i[3] = src0->i[3] * src1->i[3];
731 dst0->i[0] = 0;
732 dst0->i[1] = 0;
733 dst0->i[2] = 0;
734 dst0->i[3] = 0;
735 }
736 #endif
737
738 #if 0
739 static void
740 micro_umul64(
741 union tgsi_exec_channel *dst0,
742 union tgsi_exec_channel *dst1,
743 const union tgsi_exec_channel *src0,
744 const union tgsi_exec_channel *src1 )
745 {
746 dst1->u[0] = src0->u[0] * src1->u[0];
747 dst1->u[1] = src0->u[1] * src1->u[1];
748 dst1->u[2] = src0->u[2] * src1->u[2];
749 dst1->u[3] = src0->u[3] * src1->u[3];
750 dst0->u[0] = 0;
751 dst0->u[1] = 0;
752 dst0->u[2] = 0;
753 dst0->u[3] = 0;
754 }
755 #endif
756
757
758 #if 0
759 static void
760 micro_movc(
761 union tgsi_exec_channel *dst,
762 const union tgsi_exec_channel *src0,
763 const union tgsi_exec_channel *src1,
764 const union tgsi_exec_channel *src2 )
765 {
766 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
767 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
768 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
769 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
770 }
771 #endif
772
773 static void
774 micro_neg(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src )
777 {
778 dst->f[0] = -src->f[0];
779 dst->f[1] = -src->f[1];
780 dst->f[2] = -src->f[2];
781 dst->f[3] = -src->f[3];
782 }
783
784 #if 0
785 static void
786 micro_ineg(
787 union tgsi_exec_channel *dst,
788 const union tgsi_exec_channel *src )
789 {
790 dst->i[0] = -src->i[0];
791 dst->i[1] = -src->i[1];
792 dst->i[2] = -src->i[2];
793 dst->i[3] = -src->i[3];
794 }
795 #endif
796
797 static void
798 micro_not(
799 union tgsi_exec_channel *dst,
800 const union tgsi_exec_channel *src )
801 {
802 dst->u[0] = ~src->u[0];
803 dst->u[1] = ~src->u[1];
804 dst->u[2] = ~src->u[2];
805 dst->u[3] = ~src->u[3];
806 }
807
808 static void
809 micro_or(
810 union tgsi_exec_channel *dst,
811 const union tgsi_exec_channel *src0,
812 const union tgsi_exec_channel *src1 )
813 {
814 dst->u[0] = src0->u[0] | src1->u[0];
815 dst->u[1] = src0->u[1] | src1->u[1];
816 dst->u[2] = src0->u[2] | src1->u[2];
817 dst->u[3] = src0->u[3] | src1->u[3];
818 }
819
820 static void
821 micro_pow(
822 union tgsi_exec_channel *dst,
823 const union tgsi_exec_channel *src0,
824 const union tgsi_exec_channel *src1 )
825 {
826 #if FAST_MATH
827 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
828 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
829 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
830 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
831 #else
832 dst->f[0] = powf( src0->f[0], src1->f[0] );
833 dst->f[1] = powf( src0->f[1], src1->f[1] );
834 dst->f[2] = powf( src0->f[2], src1->f[2] );
835 dst->f[3] = powf( src0->f[3], src1->f[3] );
836 #endif
837 }
838
839 static void
840 micro_rnd(
841 union tgsi_exec_channel *dst,
842 const union tgsi_exec_channel *src )
843 {
844 dst->f[0] = floorf( src->f[0] + 0.5f );
845 dst->f[1] = floorf( src->f[1] + 0.5f );
846 dst->f[2] = floorf( src->f[2] + 0.5f );
847 dst->f[3] = floorf( src->f[3] + 0.5f );
848 }
849
850 static void
851 micro_sgn(
852 union tgsi_exec_channel *dst,
853 const union tgsi_exec_channel *src )
854 {
855 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
856 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
857 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
858 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
859 }
860
861 static void
862 micro_shl(
863 union tgsi_exec_channel *dst,
864 const union tgsi_exec_channel *src0,
865 const union tgsi_exec_channel *src1 )
866 {
867 dst->i[0] = src0->i[0] << src1->i[0];
868 dst->i[1] = src0->i[1] << src1->i[1];
869 dst->i[2] = src0->i[2] << src1->i[2];
870 dst->i[3] = src0->i[3] << src1->i[3];
871 }
872
873 static void
874 micro_ishr(
875 union tgsi_exec_channel *dst,
876 const union tgsi_exec_channel *src0,
877 const union tgsi_exec_channel *src1 )
878 {
879 dst->i[0] = src0->i[0] >> src1->i[0];
880 dst->i[1] = src0->i[1] >> src1->i[1];
881 dst->i[2] = src0->i[2] >> src1->i[2];
882 dst->i[3] = src0->i[3] >> src1->i[3];
883 }
884
885 static void
886 micro_trunc(
887 union tgsi_exec_channel *dst,
888 const union tgsi_exec_channel *src0 )
889 {
890 dst->f[0] = (float) (int) src0->f[0];
891 dst->f[1] = (float) (int) src0->f[1];
892 dst->f[2] = (float) (int) src0->f[2];
893 dst->f[3] = (float) (int) src0->f[3];
894 }
895
896 #if 0
897 static void
898 micro_ushr(
899 union tgsi_exec_channel *dst,
900 const union tgsi_exec_channel *src0,
901 const union tgsi_exec_channel *src1 )
902 {
903 dst->u[0] = src0->u[0] >> src1->u[0];
904 dst->u[1] = src0->u[1] >> src1->u[1];
905 dst->u[2] = src0->u[2] >> src1->u[2];
906 dst->u[3] = src0->u[3] >> src1->u[3];
907 }
908 #endif
909
910 static void
911 micro_sin(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src )
914 {
915 dst->f[0] = sinf( src->f[0] );
916 dst->f[1] = sinf( src->f[1] );
917 dst->f[2] = sinf( src->f[2] );
918 dst->f[3] = sinf( src->f[3] );
919 }
920
921 static void
922 micro_sqrt( union tgsi_exec_channel *dst,
923 const union tgsi_exec_channel *src )
924 {
925 dst->f[0] = sqrtf( src->f[0] );
926 dst->f[1] = sqrtf( src->f[1] );
927 dst->f[2] = sqrtf( src->f[2] );
928 dst->f[3] = sqrtf( src->f[3] );
929 }
930
931 static void
932 micro_sub(
933 union tgsi_exec_channel *dst,
934 const union tgsi_exec_channel *src0,
935 const union tgsi_exec_channel *src1 )
936 {
937 dst->f[0] = src0->f[0] - src1->f[0];
938 dst->f[1] = src0->f[1] - src1->f[1];
939 dst->f[2] = src0->f[2] - src1->f[2];
940 dst->f[3] = src0->f[3] - src1->f[3];
941 }
942
943 #if 0
944 static void
945 micro_u2f(
946 union tgsi_exec_channel *dst,
947 const union tgsi_exec_channel *src )
948 {
949 dst->f[0] = (float) src->u[0];
950 dst->f[1] = (float) src->u[1];
951 dst->f[2] = (float) src->u[2];
952 dst->f[3] = (float) src->u[3];
953 }
954 #endif
955
956 static void
957 micro_xor(
958 union tgsi_exec_channel *dst,
959 const union tgsi_exec_channel *src0,
960 const union tgsi_exec_channel *src1 )
961 {
962 dst->u[0] = src0->u[0] ^ src1->u[0];
963 dst->u[1] = src0->u[1] ^ src1->u[1];
964 dst->u[2] = src0->u[2] ^ src1->u[2];
965 dst->u[3] = src0->u[3] ^ src1->u[3];
966 }
967
968 static void
969 fetch_src_file_channel(
970 const struct tgsi_exec_machine *mach,
971 const uint file,
972 const uint swizzle,
973 const union tgsi_exec_channel *index,
974 union tgsi_exec_channel *chan )
975 {
976 switch( swizzle ) {
977 case TGSI_EXTSWIZZLE_X:
978 case TGSI_EXTSWIZZLE_Y:
979 case TGSI_EXTSWIZZLE_Z:
980 case TGSI_EXTSWIZZLE_W:
981 switch( file ) {
982 case TGSI_FILE_CONSTANT:
983 assert(mach->Consts);
984 if (index->i[0] < 0)
985 chan->f[0] = 0.0f;
986 else
987 chan->f[0] = mach->Consts[index->i[0]][swizzle];
988 if (index->i[1] < 0)
989 chan->f[1] = 0.0f;
990 else
991 chan->f[1] = mach->Consts[index->i[1]][swizzle];
992 if (index->i[2] < 0)
993 chan->f[2] = 0.0f;
994 else
995 chan->f[2] = mach->Consts[index->i[2]][swizzle];
996 if (index->i[3] < 0)
997 chan->f[3] = 0.0f;
998 else
999 chan->f[3] = mach->Consts[index->i[3]][swizzle];
1000 break;
1001
1002 case TGSI_FILE_INPUT:
1003 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1004 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1005 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1006 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1007 break;
1008
1009 case TGSI_FILE_TEMPORARY:
1010 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1011 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1012 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1013 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1014 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1015 break;
1016
1017 case TGSI_FILE_IMMEDIATE:
1018 assert( index->i[0] < (int) mach->ImmLimit );
1019 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1020 assert( index->i[1] < (int) mach->ImmLimit );
1021 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1022 assert( index->i[2] < (int) mach->ImmLimit );
1023 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1024 assert( index->i[3] < (int) mach->ImmLimit );
1025 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1026 break;
1027
1028 case TGSI_FILE_ADDRESS:
1029 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1030 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1031 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1032 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1033 break;
1034
1035 case TGSI_FILE_OUTPUT:
1036 /* vertex/fragment output vars can be read too */
1037 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1038 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1039 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1040 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1041 break;
1042
1043 default:
1044 assert( 0 );
1045 }
1046 break;
1047
1048 case TGSI_EXTSWIZZLE_ZERO:
1049 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1050 break;
1051
1052 case TGSI_EXTSWIZZLE_ONE:
1053 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1054 break;
1055
1056 default:
1057 assert( 0 );
1058 }
1059 }
1060
1061 static void
1062 fetch_source(
1063 const struct tgsi_exec_machine *mach,
1064 union tgsi_exec_channel *chan,
1065 const struct tgsi_full_src_register *reg,
1066 const uint chan_index )
1067 {
1068 union tgsi_exec_channel index;
1069 uint swizzle;
1070
1071 /* We start with a direct index into a register file.
1072 *
1073 * file[1],
1074 * where:
1075 * file = SrcRegister.File
1076 * [1] = SrcRegister.Index
1077 */
1078 index.i[0] =
1079 index.i[1] =
1080 index.i[2] =
1081 index.i[3] = reg->SrcRegister.Index;
1082
1083 /* There is an extra source register that indirectly subscripts
1084 * a register file. The direct index now becomes an offset
1085 * that is being added to the indirect register.
1086 *
1087 * file[ind[2].x+1],
1088 * where:
1089 * ind = SrcRegisterInd.File
1090 * [2] = SrcRegisterInd.Index
1091 * .x = SrcRegisterInd.SwizzleX
1092 */
1093 if (reg->SrcRegister.Indirect) {
1094 union tgsi_exec_channel index2;
1095 union tgsi_exec_channel indir_index;
1096 const uint execmask = mach->ExecMask;
1097 uint i;
1098
1099 /* which address register (always zero now) */
1100 index2.i[0] =
1101 index2.i[1] =
1102 index2.i[2] =
1103 index2.i[3] = reg->SrcRegisterInd.Index;
1104
1105 /* get current value of address register[swizzle] */
1106 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1107 fetch_src_file_channel(
1108 mach,
1109 reg->SrcRegisterInd.File,
1110 swizzle,
1111 &index2,
1112 &indir_index );
1113
1114 /* add value of address register to the offset */
1115 index.i[0] += (int) indir_index.f[0];
1116 index.i[1] += (int) indir_index.f[1];
1117 index.i[2] += (int) indir_index.f[2];
1118 index.i[3] += (int) indir_index.f[3];
1119
1120 /* for disabled execution channels, zero-out the index to
1121 * avoid using a potential garbage value.
1122 */
1123 for (i = 0; i < QUAD_SIZE; i++) {
1124 if ((execmask & (1 << i)) == 0)
1125 index.i[i] = 0;
1126 }
1127 }
1128
1129 /* There is an extra source register that is a second
1130 * subscript to a register file. Effectively it means that
1131 * the register file is actually a 2D array of registers.
1132 *
1133 * file[1][3] == file[1*sizeof(file[1])+3],
1134 * where:
1135 * [3] = SrcRegisterDim.Index
1136 */
1137 if (reg->SrcRegister.Dimension) {
1138 /* The size of the first-order array depends on the register file type.
1139 * We need to multiply the index to the first array to get an effective,
1140 * "flat" index that points to the beginning of the second-order array.
1141 */
1142 switch (reg->SrcRegister.File) {
1143 case TGSI_FILE_INPUT:
1144 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1145 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1146 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1147 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1148 break;
1149 case TGSI_FILE_CONSTANT:
1150 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1151 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1152 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1153 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1154 break;
1155 default:
1156 assert( 0 );
1157 }
1158
1159 index.i[0] += reg->SrcRegisterDim.Index;
1160 index.i[1] += reg->SrcRegisterDim.Index;
1161 index.i[2] += reg->SrcRegisterDim.Index;
1162 index.i[3] += reg->SrcRegisterDim.Index;
1163
1164 /* Again, the second subscript index can be addressed indirectly
1165 * identically to the first one.
1166 * Nothing stops us from indirectly addressing the indirect register,
1167 * but there is no need for that, so we won't exercise it.
1168 *
1169 * file[1][ind[4].y+3],
1170 * where:
1171 * ind = SrcRegisterDimInd.File
1172 * [4] = SrcRegisterDimInd.Index
1173 * .y = SrcRegisterDimInd.SwizzleX
1174 */
1175 if (reg->SrcRegisterDim.Indirect) {
1176 union tgsi_exec_channel index2;
1177 union tgsi_exec_channel indir_index;
1178 const uint execmask = mach->ExecMask;
1179 uint i;
1180
1181 index2.i[0] =
1182 index2.i[1] =
1183 index2.i[2] =
1184 index2.i[3] = reg->SrcRegisterDimInd.Index;
1185
1186 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1187 fetch_src_file_channel(
1188 mach,
1189 reg->SrcRegisterDimInd.File,
1190 swizzle,
1191 &index2,
1192 &indir_index );
1193
1194 index.i[0] += (int) indir_index.f[0];
1195 index.i[1] += (int) indir_index.f[1];
1196 index.i[2] += (int) indir_index.f[2];
1197 index.i[3] += (int) indir_index.f[3];
1198
1199 /* for disabled execution channels, zero-out the index to
1200 * avoid using a potential garbage value.
1201 */
1202 for (i = 0; i < QUAD_SIZE; i++) {
1203 if ((execmask & (1 << i)) == 0)
1204 index.i[i] = 0;
1205 }
1206 }
1207
1208 /* If by any chance there was a need for a 3D array of register
1209 * files, we would have to check whether SrcRegisterDim is followed
1210 * by a dimension register and continue the saga.
1211 */
1212 }
1213
1214 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1215 fetch_src_file_channel(
1216 mach,
1217 reg->SrcRegister.File,
1218 swizzle,
1219 &index,
1220 chan );
1221
1222 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1223 case TGSI_UTIL_SIGN_CLEAR:
1224 micro_abs( chan, chan );
1225 break;
1226
1227 case TGSI_UTIL_SIGN_SET:
1228 micro_abs( chan, chan );
1229 micro_neg( chan, chan );
1230 break;
1231
1232 case TGSI_UTIL_SIGN_TOGGLE:
1233 micro_neg( chan, chan );
1234 break;
1235
1236 case TGSI_UTIL_SIGN_KEEP:
1237 break;
1238 }
1239
1240 if (reg->SrcRegisterExtMod.Complement) {
1241 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1242 }
1243 }
1244
1245 static void
1246 store_dest(
1247 struct tgsi_exec_machine *mach,
1248 const union tgsi_exec_channel *chan,
1249 const struct tgsi_full_dst_register *reg,
1250 const struct tgsi_full_instruction *inst,
1251 uint chan_index )
1252 {
1253 uint i;
1254 union tgsi_exec_channel null;
1255 union tgsi_exec_channel *dst;
1256 uint execmask = mach->ExecMask;
1257
1258 switch (reg->DstRegister.File) {
1259 case TGSI_FILE_NULL:
1260 dst = &null;
1261 break;
1262
1263 case TGSI_FILE_OUTPUT:
1264 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1265 + reg->DstRegister.Index].xyzw[chan_index];
1266 break;
1267
1268 case TGSI_FILE_TEMPORARY:
1269 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1270 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1271 break;
1272
1273 case TGSI_FILE_ADDRESS:
1274 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1275 break;
1276
1277 default:
1278 assert( 0 );
1279 return;
1280 }
1281
1282 if (inst->InstructionExtNv.CondFlowEnable) {
1283 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1284 uint swizzle;
1285 uint shift;
1286 uint mask;
1287 uint test;
1288
1289 /* Only CC0 supported.
1290 */
1291 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1292
1293 switch (chan_index) {
1294 case CHAN_X:
1295 swizzle = inst->InstructionExtNv.CondSwizzleX;
1296 break;
1297 case CHAN_Y:
1298 swizzle = inst->InstructionExtNv.CondSwizzleY;
1299 break;
1300 case CHAN_Z:
1301 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1302 break;
1303 case CHAN_W:
1304 swizzle = inst->InstructionExtNv.CondSwizzleW;
1305 break;
1306 default:
1307 assert( 0 );
1308 return;
1309 }
1310
1311 switch (swizzle) {
1312 case TGSI_SWIZZLE_X:
1313 shift = TGSI_EXEC_CC_X_SHIFT;
1314 mask = TGSI_EXEC_CC_X_MASK;
1315 break;
1316 case TGSI_SWIZZLE_Y:
1317 shift = TGSI_EXEC_CC_Y_SHIFT;
1318 mask = TGSI_EXEC_CC_Y_MASK;
1319 break;
1320 case TGSI_SWIZZLE_Z:
1321 shift = TGSI_EXEC_CC_Z_SHIFT;
1322 mask = TGSI_EXEC_CC_Z_MASK;
1323 break;
1324 case TGSI_SWIZZLE_W:
1325 shift = TGSI_EXEC_CC_W_SHIFT;
1326 mask = TGSI_EXEC_CC_W_MASK;
1327 break;
1328 default:
1329 assert( 0 );
1330 return;
1331 }
1332
1333 switch (inst->InstructionExtNv.CondMask) {
1334 case TGSI_CC_GT:
1335 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1336 for (i = 0; i < QUAD_SIZE; i++)
1337 if (cc->u[i] & test)
1338 execmask &= ~(1 << i);
1339 break;
1340
1341 case TGSI_CC_EQ:
1342 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1343 for (i = 0; i < QUAD_SIZE; i++)
1344 if (cc->u[i] & test)
1345 execmask &= ~(1 << i);
1346 break;
1347
1348 case TGSI_CC_LT:
1349 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1350 for (i = 0; i < QUAD_SIZE; i++)
1351 if (cc->u[i] & test)
1352 execmask &= ~(1 << i);
1353 break;
1354
1355 case TGSI_CC_GE:
1356 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1357 for (i = 0; i < QUAD_SIZE; i++)
1358 if (cc->u[i] & test)
1359 execmask &= ~(1 << i);
1360 break;
1361
1362 case TGSI_CC_LE:
1363 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1364 for (i = 0; i < QUAD_SIZE; i++)
1365 if (cc->u[i] & test)
1366 execmask &= ~(1 << i);
1367 break;
1368
1369 case TGSI_CC_NE:
1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1371 for (i = 0; i < QUAD_SIZE; i++)
1372 if (cc->u[i] & test)
1373 execmask &= ~(1 << i);
1374 break;
1375
1376 case TGSI_CC_TR:
1377 break;
1378
1379 case TGSI_CC_FL:
1380 for (i = 0; i < QUAD_SIZE; i++)
1381 execmask &= ~(1 << i);
1382 break;
1383
1384 default:
1385 assert( 0 );
1386 return;
1387 }
1388 }
1389
1390 switch (inst->Instruction.Saturate) {
1391 case TGSI_SAT_NONE:
1392 for (i = 0; i < QUAD_SIZE; i++)
1393 if (execmask & (1 << i))
1394 dst->i[i] = chan->i[i];
1395 break;
1396
1397 case TGSI_SAT_ZERO_ONE:
1398 for (i = 0; i < QUAD_SIZE; i++)
1399 if (execmask & (1 << i)) {
1400 if (chan->f[i] < 0.0f)
1401 dst->f[i] = 0.0f;
1402 else if (chan->f[i] > 1.0f)
1403 dst->f[i] = 1.0f;
1404 else
1405 dst->i[i] = chan->i[i];
1406 }
1407 break;
1408
1409 case TGSI_SAT_MINUS_PLUS_ONE:
1410 for (i = 0; i < QUAD_SIZE; i++)
1411 if (execmask & (1 << i)) {
1412 if (chan->f[i] < -1.0f)
1413 dst->f[i] = -1.0f;
1414 else if (chan->f[i] > 1.0f)
1415 dst->f[i] = 1.0f;
1416 else
1417 dst->i[i] = chan->i[i];
1418 }
1419 break;
1420
1421 default:
1422 assert( 0 );
1423 }
1424
1425 if (inst->InstructionExtNv.CondDstUpdate) {
1426 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1427 uint shift;
1428 uint mask;
1429
1430 /* Only CC0 supported.
1431 */
1432 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1433
1434 switch (chan_index) {
1435 case CHAN_X:
1436 shift = TGSI_EXEC_CC_X_SHIFT;
1437 mask = ~TGSI_EXEC_CC_X_MASK;
1438 break;
1439 case CHAN_Y:
1440 shift = TGSI_EXEC_CC_Y_SHIFT;
1441 mask = ~TGSI_EXEC_CC_Y_MASK;
1442 break;
1443 case CHAN_Z:
1444 shift = TGSI_EXEC_CC_Z_SHIFT;
1445 mask = ~TGSI_EXEC_CC_Z_MASK;
1446 break;
1447 case CHAN_W:
1448 shift = TGSI_EXEC_CC_W_SHIFT;
1449 mask = ~TGSI_EXEC_CC_W_MASK;
1450 break;
1451 default:
1452 assert( 0 );
1453 return;
1454 }
1455
1456 for (i = 0; i < QUAD_SIZE; i++)
1457 if (execmask & (1 << i)) {
1458 cc->u[i] &= mask;
1459 if (dst->f[i] < 0.0f)
1460 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1461 else if (dst->f[i] > 0.0f)
1462 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1463 else if (dst->f[i] == 0.0f)
1464 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1465 else
1466 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1467 }
1468 }
1469 }
1470
1471 #define FETCH(VAL,INDEX,CHAN)\
1472 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1473
1474 #define STORE(VAL,INDEX,CHAN)\
1475 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1476
1477
1478 /**
1479 * Execute ARB-style KIL which is predicated by a src register.
1480 * Kill fragment if any of the four values is less than zero.
1481 */
1482 static void
1483 exec_kil(struct tgsi_exec_machine *mach,
1484 const struct tgsi_full_instruction *inst)
1485 {
1486 uint uniquemask;
1487 uint chan_index;
1488 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1489 union tgsi_exec_channel r[1];
1490
1491 /* This mask stores component bits that were already tested. Note that
1492 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1493 * tested. */
1494 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1495
1496 for (chan_index = 0; chan_index < 4; chan_index++)
1497 {
1498 uint swizzle;
1499 uint i;
1500
1501 /* unswizzle channel */
1502 swizzle = tgsi_util_get_full_src_register_extswizzle (
1503 &inst->FullSrcRegisters[0],
1504 chan_index);
1505
1506 /* check if the component has not been already tested */
1507 if (uniquemask & (1 << swizzle))
1508 continue;
1509 uniquemask |= 1 << swizzle;
1510
1511 FETCH(&r[0], 0, chan_index);
1512 for (i = 0; i < 4; i++)
1513 if (r[0].f[i] < 0.0f)
1514 kilmask |= 1 << i;
1515 }
1516
1517 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1518 }
1519
1520 /**
1521 * Execute NVIDIA-style KIL which is predicated by a condition code.
1522 * Kill fragment if the condition code is TRUE.
1523 */
1524 static void
1525 exec_kilp(struct tgsi_exec_machine *mach,
1526 const struct tgsi_full_instruction *inst)
1527 {
1528 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1529
1530 if (inst->InstructionExtNv.CondFlowEnable) {
1531 uint swizzle[4];
1532 uint chan_index;
1533
1534 kilmask = 0x0;
1535
1536 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1537 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1538 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1539 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1540
1541 for (chan_index = 0; chan_index < 4; chan_index++)
1542 {
1543 uint i;
1544
1545 for (i = 0; i < 4; i++) {
1546 /* TODO: evaluate the condition code */
1547 if (0)
1548 kilmask |= 1 << i;
1549 }
1550 }
1551 }
1552 else {
1553 /* "unconditional" kil */
1554 kilmask = mach->ExecMask;
1555 }
1556 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1557 }
1558
1559
1560 /*
1561 * Fetch a four texture samples using STR texture coordinates.
1562 */
1563 static void
1564 fetch_texel( struct tgsi_sampler *sampler,
1565 const union tgsi_exec_channel *s,
1566 const union tgsi_exec_channel *t,
1567 const union tgsi_exec_channel *p,
1568 float lodbias, /* XXX should be float[4] */
1569 union tgsi_exec_channel *r,
1570 union tgsi_exec_channel *g,
1571 union tgsi_exec_channel *b,
1572 union tgsi_exec_channel *a )
1573 {
1574 uint j;
1575 float rgba[NUM_CHANNELS][QUAD_SIZE];
1576
1577 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1578
1579 for (j = 0; j < 4; j++) {
1580 r->f[j] = rgba[0][j];
1581 g->f[j] = rgba[1][j];
1582 b->f[j] = rgba[2][j];
1583 a->f[j] = rgba[3][j];
1584 }
1585 }
1586
1587
1588 static void
1589 exec_tex(struct tgsi_exec_machine *mach,
1590 const struct tgsi_full_instruction *inst,
1591 boolean biasLod,
1592 boolean projected)
1593 {
1594 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1595 union tgsi_exec_channel r[4];
1596 uint chan_index;
1597 float lodBias;
1598
1599 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1600
1601 switch (inst->InstructionExtTexture.Texture) {
1602 case TGSI_TEXTURE_1D:
1603 case TGSI_TEXTURE_SHADOW1D:
1604
1605 FETCH(&r[0], 0, CHAN_X);
1606
1607 if (projected) {
1608 FETCH(&r[1], 0, CHAN_W);
1609 micro_div( &r[0], &r[0], &r[1] );
1610 }
1611
1612 if (biasLod) {
1613 FETCH(&r[1], 0, CHAN_W);
1614 lodBias = r[2].f[0];
1615 }
1616 else
1617 lodBias = 0.0;
1618
1619 fetch_texel(mach->Samplers[unit],
1620 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1621 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1622 break;
1623
1624 case TGSI_TEXTURE_2D:
1625 case TGSI_TEXTURE_RECT:
1626 case TGSI_TEXTURE_SHADOW2D:
1627 case TGSI_TEXTURE_SHADOWRECT:
1628
1629 FETCH(&r[0], 0, CHAN_X);
1630 FETCH(&r[1], 0, CHAN_Y);
1631 FETCH(&r[2], 0, CHAN_Z);
1632
1633 if (projected) {
1634 FETCH(&r[3], 0, CHAN_W);
1635 micro_div( &r[0], &r[0], &r[3] );
1636 micro_div( &r[1], &r[1], &r[3] );
1637 micro_div( &r[2], &r[2], &r[3] );
1638 }
1639
1640 if (biasLod) {
1641 FETCH(&r[3], 0, CHAN_W);
1642 lodBias = r[3].f[0];
1643 }
1644 else
1645 lodBias = 0.0;
1646
1647 fetch_texel(mach->Samplers[unit],
1648 &r[0], &r[1], &r[2], lodBias, /* inputs */
1649 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1650 break;
1651
1652 case TGSI_TEXTURE_3D:
1653 case TGSI_TEXTURE_CUBE:
1654
1655 FETCH(&r[0], 0, CHAN_X);
1656 FETCH(&r[1], 0, CHAN_Y);
1657 FETCH(&r[2], 0, CHAN_Z);
1658
1659 if (projected) {
1660 FETCH(&r[3], 0, CHAN_W);
1661 micro_div( &r[0], &r[0], &r[3] );
1662 micro_div( &r[1], &r[1], &r[3] );
1663 micro_div( &r[2], &r[2], &r[3] );
1664 }
1665
1666 if (biasLod) {
1667 FETCH(&r[3], 0, CHAN_W);
1668 lodBias = r[3].f[0];
1669 }
1670 else
1671 lodBias = 0.0;
1672
1673 fetch_texel(mach->Samplers[unit],
1674 &r[0], &r[1], &r[2], lodBias,
1675 &r[0], &r[1], &r[2], &r[3]);
1676 break;
1677
1678 default:
1679 assert (0);
1680 }
1681
1682 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1683 STORE( &r[chan_index], 0, chan_index );
1684 }
1685 }
1686
1687
1688 /**
1689 * Evaluate a constant-valued coefficient at the position of the
1690 * current quad.
1691 */
1692 static void
1693 eval_constant_coef(
1694 struct tgsi_exec_machine *mach,
1695 unsigned attrib,
1696 unsigned chan )
1697 {
1698 unsigned i;
1699
1700 for( i = 0; i < QUAD_SIZE; i++ ) {
1701 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1702 }
1703 }
1704
1705 /**
1706 * Evaluate a linear-valued coefficient at the position of the
1707 * current quad.
1708 */
1709 static void
1710 eval_linear_coef(
1711 struct tgsi_exec_machine *mach,
1712 unsigned attrib,
1713 unsigned chan )
1714 {
1715 const float x = mach->QuadPos.xyzw[0].f[0];
1716 const float y = mach->QuadPos.xyzw[1].f[0];
1717 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1718 const float dady = mach->InterpCoefs[attrib].dady[chan];
1719 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1720 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1721 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1722 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1723 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1724 }
1725
1726 /**
1727 * Evaluate a perspective-valued coefficient at the position of the
1728 * current quad.
1729 */
1730 static void
1731 eval_perspective_coef(
1732 struct tgsi_exec_machine *mach,
1733 unsigned attrib,
1734 unsigned chan )
1735 {
1736 const float x = mach->QuadPos.xyzw[0].f[0];
1737 const float y = mach->QuadPos.xyzw[1].f[0];
1738 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1739 const float dady = mach->InterpCoefs[attrib].dady[chan];
1740 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1741 const float *w = mach->QuadPos.xyzw[3].f;
1742 /* divide by W here */
1743 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1744 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1745 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1746 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1747 }
1748
1749
1750 typedef void (* eval_coef_func)(
1751 struct tgsi_exec_machine *mach,
1752 unsigned attrib,
1753 unsigned chan );
1754
1755 static void
1756 exec_declaration(
1757 struct tgsi_exec_machine *mach,
1758 const struct tgsi_full_declaration *decl )
1759 {
1760 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1761 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1762 unsigned first, last, mask;
1763 eval_coef_func eval;
1764
1765 first = decl->DeclarationRange.First;
1766 last = decl->DeclarationRange.Last;
1767 mask = decl->Declaration.UsageMask;
1768
1769 switch( decl->Declaration.Interpolate ) {
1770 case TGSI_INTERPOLATE_CONSTANT:
1771 eval = eval_constant_coef;
1772 break;
1773
1774 case TGSI_INTERPOLATE_LINEAR:
1775 eval = eval_linear_coef;
1776 break;
1777
1778 case TGSI_INTERPOLATE_PERSPECTIVE:
1779 eval = eval_perspective_coef;
1780 break;
1781
1782 default:
1783 eval = NULL;
1784 assert( 0 );
1785 }
1786
1787 if( mask == TGSI_WRITEMASK_XYZW ) {
1788 unsigned i, j;
1789
1790 for( i = first; i <= last; i++ ) {
1791 for( j = 0; j < NUM_CHANNELS; j++ ) {
1792 eval( mach, i, j );
1793 }
1794 }
1795 }
1796 else {
1797 unsigned i, j;
1798
1799 for( j = 0; j < NUM_CHANNELS; j++ ) {
1800 if( mask & (1 << j) ) {
1801 for( i = first; i <= last; i++ ) {
1802 eval( mach, i, j );
1803 }
1804 }
1805 }
1806 }
1807 }
1808 }
1809 }
1810
1811 static void
1812 exec_instruction(
1813 struct tgsi_exec_machine *mach,
1814 const struct tgsi_full_instruction *inst,
1815 int *pc )
1816 {
1817 uint chan_index;
1818 union tgsi_exec_channel r[8];
1819
1820 (*pc)++;
1821
1822 switch (inst->Instruction.Opcode) {
1823 case TGSI_OPCODE_ARL:
1824 /* TGSI_OPCODE_FLOOR */
1825 /* TGSI_OPCODE_FLR */
1826 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1827 FETCH( &r[0], 0, chan_index );
1828 micro_flr( &r[0], &r[0] );
1829 STORE( &r[0], 0, chan_index );
1830 }
1831 break;
1832
1833 case TGSI_OPCODE_MOV:
1834 case TGSI_OPCODE_SWZ:
1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1836 FETCH( &r[0], 0, chan_index );
1837 STORE( &r[0], 0, chan_index );
1838 }
1839 break;
1840
1841 case TGSI_OPCODE_LIT:
1842 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1843 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1844 }
1845
1846 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1847 FETCH( &r[0], 0, CHAN_X );
1848 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1849 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1850 STORE( &r[0], 0, CHAN_Y );
1851 }
1852
1853 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1854 FETCH( &r[1], 0, CHAN_Y );
1855 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1856
1857 FETCH( &r[2], 0, CHAN_W );
1858 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1859 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1860 micro_pow( &r[1], &r[1], &r[2] );
1861 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1862 STORE( &r[0], 0, CHAN_Z );
1863 }
1864 }
1865
1866 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1867 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1868 }
1869 break;
1870
1871 case TGSI_OPCODE_RCP:
1872 /* TGSI_OPCODE_RECIP */
1873 FETCH( &r[0], 0, CHAN_X );
1874 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1875 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1876 STORE( &r[0], 0, chan_index );
1877 }
1878 break;
1879
1880 case TGSI_OPCODE_RSQ:
1881 /* TGSI_OPCODE_RECIPSQRT */
1882 FETCH( &r[0], 0, CHAN_X );
1883 micro_abs( &r[0], &r[0] );
1884 micro_sqrt( &r[0], &r[0] );
1885 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1886 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1887 STORE( &r[0], 0, chan_index );
1888 }
1889 break;
1890
1891 case TGSI_OPCODE_EXP:
1892 FETCH( &r[0], 0, CHAN_X );
1893 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1894 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1895 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1896 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1897 }
1898 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1899 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1900 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1901 }
1902 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1903 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1904 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1905 }
1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1907 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1908 }
1909 break;
1910
1911 case TGSI_OPCODE_LOG:
1912 FETCH( &r[0], 0, CHAN_X );
1913 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1914 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1915 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1916 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1917 STORE( &r[0], 0, CHAN_X );
1918 }
1919 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1920 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1921 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1922 STORE( &r[0], 0, CHAN_Y );
1923 }
1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1925 STORE( &r[1], 0, CHAN_Z );
1926 }
1927 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1928 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1929 }
1930 break;
1931
1932 case TGSI_OPCODE_MUL:
1933 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1934 {
1935 FETCH(&r[0], 0, chan_index);
1936 FETCH(&r[1], 1, chan_index);
1937
1938 micro_mul( &r[0], &r[0], &r[1] );
1939
1940 STORE(&r[0], 0, chan_index);
1941 }
1942 break;
1943
1944 case TGSI_OPCODE_ADD:
1945 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1946 FETCH( &r[0], 0, chan_index );
1947 FETCH( &r[1], 1, chan_index );
1948 micro_add( &r[0], &r[0], &r[1] );
1949 STORE( &r[0], 0, chan_index );
1950 }
1951 break;
1952
1953 case TGSI_OPCODE_DP3:
1954 /* TGSI_OPCODE_DOT3 */
1955 FETCH( &r[0], 0, CHAN_X );
1956 FETCH( &r[1], 1, CHAN_X );
1957 micro_mul( &r[0], &r[0], &r[1] );
1958
1959 FETCH( &r[1], 0, CHAN_Y );
1960 FETCH( &r[2], 1, CHAN_Y );
1961 micro_mul( &r[1], &r[1], &r[2] );
1962 micro_add( &r[0], &r[0], &r[1] );
1963
1964 FETCH( &r[1], 0, CHAN_Z );
1965 FETCH( &r[2], 1, CHAN_Z );
1966 micro_mul( &r[1], &r[1], &r[2] );
1967 micro_add( &r[0], &r[0], &r[1] );
1968
1969 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1970 STORE( &r[0], 0, chan_index );
1971 }
1972 break;
1973
1974 case TGSI_OPCODE_DP4:
1975 /* TGSI_OPCODE_DOT4 */
1976 FETCH(&r[0], 0, CHAN_X);
1977 FETCH(&r[1], 1, CHAN_X);
1978
1979 micro_mul( &r[0], &r[0], &r[1] );
1980
1981 FETCH(&r[1], 0, CHAN_Y);
1982 FETCH(&r[2], 1, CHAN_Y);
1983
1984 micro_mul( &r[1], &r[1], &r[2] );
1985 micro_add( &r[0], &r[0], &r[1] );
1986
1987 FETCH(&r[1], 0, CHAN_Z);
1988 FETCH(&r[2], 1, CHAN_Z);
1989
1990 micro_mul( &r[1], &r[1], &r[2] );
1991 micro_add( &r[0], &r[0], &r[1] );
1992
1993 FETCH(&r[1], 0, CHAN_W);
1994 FETCH(&r[2], 1, CHAN_W);
1995
1996 micro_mul( &r[1], &r[1], &r[2] );
1997 micro_add( &r[0], &r[0], &r[1] );
1998
1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2000 STORE( &r[0], 0, chan_index );
2001 }
2002 break;
2003
2004 case TGSI_OPCODE_DST:
2005 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2006 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2007 }
2008
2009 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2010 FETCH( &r[0], 0, CHAN_Y );
2011 FETCH( &r[1], 1, CHAN_Y);
2012 micro_mul( &r[0], &r[0], &r[1] );
2013 STORE( &r[0], 0, CHAN_Y );
2014 }
2015
2016 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2017 FETCH( &r[0], 0, CHAN_Z );
2018 STORE( &r[0], 0, CHAN_Z );
2019 }
2020
2021 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2022 FETCH( &r[0], 1, CHAN_W );
2023 STORE( &r[0], 0, CHAN_W );
2024 }
2025 break;
2026
2027 case TGSI_OPCODE_MIN:
2028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2029 FETCH(&r[0], 0, chan_index);
2030 FETCH(&r[1], 1, chan_index);
2031
2032 /* XXX use micro_min()?? */
2033 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2034
2035 STORE(&r[0], 0, chan_index);
2036 }
2037 break;
2038
2039 case TGSI_OPCODE_MAX:
2040 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2041 FETCH(&r[0], 0, chan_index);
2042 FETCH(&r[1], 1, chan_index);
2043
2044 /* XXX use micro_max()?? */
2045 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2046
2047 STORE(&r[0], 0, chan_index );
2048 }
2049 break;
2050
2051 case TGSI_OPCODE_SLT:
2052 /* TGSI_OPCODE_SETLT */
2053 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2054 FETCH( &r[0], 0, chan_index );
2055 FETCH( &r[1], 1, chan_index );
2056 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2057 STORE( &r[0], 0, chan_index );
2058 }
2059 break;
2060
2061 case TGSI_OPCODE_SGE:
2062 /* TGSI_OPCODE_SETGE */
2063 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2064 FETCH( &r[0], 0, chan_index );
2065 FETCH( &r[1], 1, chan_index );
2066 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2067 STORE( &r[0], 0, chan_index );
2068 }
2069 break;
2070
2071 case TGSI_OPCODE_MAD:
2072 /* TGSI_OPCODE_MADD */
2073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2074 FETCH( &r[0], 0, chan_index );
2075 FETCH( &r[1], 1, chan_index );
2076 micro_mul( &r[0], &r[0], &r[1] );
2077 FETCH( &r[1], 2, chan_index );
2078 micro_add( &r[0], &r[0], &r[1] );
2079 STORE( &r[0], 0, chan_index );
2080 }
2081 break;
2082
2083 case TGSI_OPCODE_SUB:
2084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2085 FETCH(&r[0], 0, chan_index);
2086 FETCH(&r[1], 1, chan_index);
2087
2088 micro_sub( &r[0], &r[0], &r[1] );
2089
2090 STORE(&r[0], 0, chan_index);
2091 }
2092 break;
2093
2094 case TGSI_OPCODE_LERP:
2095 /* TGSI_OPCODE_LRP */
2096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2097 FETCH(&r[0], 0, chan_index);
2098 FETCH(&r[1], 1, chan_index);
2099 FETCH(&r[2], 2, chan_index);
2100
2101 micro_sub( &r[1], &r[1], &r[2] );
2102 micro_mul( &r[0], &r[0], &r[1] );
2103 micro_add( &r[0], &r[0], &r[2] );
2104
2105 STORE(&r[0], 0, chan_index);
2106 }
2107 break;
2108
2109 case TGSI_OPCODE_CND:
2110 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2111 FETCH(&r[0], 0, chan_index);
2112 FETCH(&r[1], 1, chan_index);
2113 FETCH(&r[2], 2, chan_index);
2114 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2115 STORE(&r[0], 0, chan_index);
2116 }
2117 break;
2118
2119 case TGSI_OPCODE_CND0:
2120 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2121 FETCH(&r[0], 0, chan_index);
2122 FETCH(&r[1], 1, chan_index);
2123 FETCH(&r[2], 2, chan_index);
2124 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2125 STORE(&r[0], 0, chan_index);
2126 }
2127 break;
2128
2129 case TGSI_OPCODE_DOT2ADD:
2130 /* TGSI_OPCODE_DP2A */
2131 FETCH( &r[0], 0, CHAN_X );
2132 FETCH( &r[1], 1, CHAN_X );
2133 micro_mul( &r[0], &r[0], &r[1] );
2134
2135 FETCH( &r[1], 0, CHAN_Y );
2136 FETCH( &r[2], 1, CHAN_Y );
2137 micro_mul( &r[1], &r[1], &r[2] );
2138 micro_add( &r[0], &r[0], &r[1] );
2139
2140 FETCH( &r[2], 2, CHAN_X );
2141 micro_add( &r[0], &r[0], &r[2] );
2142
2143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2144 STORE( &r[0], 0, chan_index );
2145 }
2146 break;
2147
2148 case TGSI_OPCODE_INDEX:
2149 /* XXX: considered for removal */
2150 assert (0);
2151 break;
2152
2153 case TGSI_OPCODE_NEGATE:
2154 /* XXX: considered for removal */
2155 assert (0);
2156 break;
2157
2158 case TGSI_OPCODE_FRAC:
2159 /* TGSI_OPCODE_FRC */
2160 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2161 FETCH( &r[0], 0, chan_index );
2162 micro_frc( &r[0], &r[0] );
2163 STORE( &r[0], 0, chan_index );
2164 }
2165 break;
2166
2167 case TGSI_OPCODE_CLAMP:
2168 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2169 FETCH(&r[0], 0, chan_index);
2170 FETCH(&r[1], 1, chan_index);
2171 micro_max(&r[0], &r[0], &r[1]);
2172 FETCH(&r[1], 2, chan_index);
2173 micro_min(&r[0], &r[0], &r[1]);
2174 STORE(&r[0], 0, chan_index);
2175 }
2176 break;
2177
2178 case TGSI_OPCODE_ROUND:
2179 case TGSI_OPCODE_ARR:
2180 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2181 FETCH( &r[0], 0, chan_index );
2182 micro_rnd( &r[0], &r[0] );
2183 STORE( &r[0], 0, chan_index );
2184 }
2185 break;
2186
2187 case TGSI_OPCODE_EXPBASE2:
2188 /* TGSI_OPCODE_EX2 */
2189 FETCH(&r[0], 0, CHAN_X);
2190
2191 #if FAST_MATH
2192 micro_exp2( &r[0], &r[0] );
2193 #else
2194 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2195 #endif
2196
2197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2198 STORE( &r[0], 0, chan_index );
2199 }
2200 break;
2201
2202 case TGSI_OPCODE_LOGBASE2:
2203 /* TGSI_OPCODE_LG2 */
2204 FETCH( &r[0], 0, CHAN_X );
2205 micro_lg2( &r[0], &r[0] );
2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207 STORE( &r[0], 0, chan_index );
2208 }
2209 break;
2210
2211 case TGSI_OPCODE_POWER:
2212 /* TGSI_OPCODE_POW */
2213 FETCH(&r[0], 0, CHAN_X);
2214 FETCH(&r[1], 1, CHAN_X);
2215
2216 micro_pow( &r[0], &r[0], &r[1] );
2217
2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2219 STORE( &r[0], 0, chan_index );
2220 }
2221 break;
2222
2223 case TGSI_OPCODE_CROSSPRODUCT:
2224 /* TGSI_OPCODE_XPD */
2225 FETCH(&r[0], 0, CHAN_Y);
2226 FETCH(&r[1], 1, CHAN_Z);
2227
2228 micro_mul( &r[2], &r[0], &r[1] );
2229
2230 FETCH(&r[3], 0, CHAN_Z);
2231 FETCH(&r[4], 1, CHAN_Y);
2232
2233 micro_mul( &r[5], &r[3], &r[4] );
2234 micro_sub( &r[2], &r[2], &r[5] );
2235
2236 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2237 STORE( &r[2], 0, CHAN_X );
2238 }
2239
2240 FETCH(&r[2], 1, CHAN_X);
2241
2242 micro_mul( &r[3], &r[3], &r[2] );
2243
2244 FETCH(&r[5], 0, CHAN_X);
2245
2246 micro_mul( &r[1], &r[1], &r[5] );
2247 micro_sub( &r[3], &r[3], &r[1] );
2248
2249 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2250 STORE( &r[3], 0, CHAN_Y );
2251 }
2252
2253 micro_mul( &r[5], &r[5], &r[4] );
2254 micro_mul( &r[0], &r[0], &r[2] );
2255 micro_sub( &r[5], &r[5], &r[0] );
2256
2257 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2258 STORE( &r[5], 0, CHAN_Z );
2259 }
2260
2261 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2262 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2263 }
2264 break;
2265
2266 case TGSI_OPCODE_MULTIPLYMATRIX:
2267 /* XXX: considered for removal */
2268 assert (0);
2269 break;
2270
2271 case TGSI_OPCODE_ABS:
2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2273 FETCH(&r[0], 0, chan_index);
2274
2275 micro_abs( &r[0], &r[0] );
2276
2277 STORE(&r[0], 0, chan_index);
2278 }
2279 break;
2280
2281 case TGSI_OPCODE_RCC:
2282 assert (0);
2283 break;
2284
2285 case TGSI_OPCODE_DPH:
2286 FETCH(&r[0], 0, CHAN_X);
2287 FETCH(&r[1], 1, CHAN_X);
2288
2289 micro_mul( &r[0], &r[0], &r[1] );
2290
2291 FETCH(&r[1], 0, CHAN_Y);
2292 FETCH(&r[2], 1, CHAN_Y);
2293
2294 micro_mul( &r[1], &r[1], &r[2] );
2295 micro_add( &r[0], &r[0], &r[1] );
2296
2297 FETCH(&r[1], 0, CHAN_Z);
2298 FETCH(&r[2], 1, CHAN_Z);
2299
2300 micro_mul( &r[1], &r[1], &r[2] );
2301 micro_add( &r[0], &r[0], &r[1] );
2302
2303 FETCH(&r[1], 1, CHAN_W);
2304
2305 micro_add( &r[0], &r[0], &r[1] );
2306
2307 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2308 STORE( &r[0], 0, chan_index );
2309 }
2310 break;
2311
2312 case TGSI_OPCODE_COS:
2313 FETCH(&r[0], 0, CHAN_X);
2314
2315 micro_cos( &r[0], &r[0] );
2316
2317 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2318 STORE( &r[0], 0, chan_index );
2319 }
2320 break;
2321
2322 case TGSI_OPCODE_DDX:
2323 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2324 FETCH( &r[0], 0, chan_index );
2325 micro_ddx( &r[0], &r[0] );
2326 STORE( &r[0], 0, chan_index );
2327 }
2328 break;
2329
2330 case TGSI_OPCODE_DDY:
2331 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2332 FETCH( &r[0], 0, chan_index );
2333 micro_ddy( &r[0], &r[0] );
2334 STORE( &r[0], 0, chan_index );
2335 }
2336 break;
2337
2338 case TGSI_OPCODE_KILP:
2339 exec_kilp (mach, inst);
2340 break;
2341
2342 case TGSI_OPCODE_KIL:
2343 exec_kil (mach, inst);
2344 break;
2345
2346 case TGSI_OPCODE_PK2H:
2347 assert (0);
2348 break;
2349
2350 case TGSI_OPCODE_PK2US:
2351 assert (0);
2352 break;
2353
2354 case TGSI_OPCODE_PK4B:
2355 assert (0);
2356 break;
2357
2358 case TGSI_OPCODE_PK4UB:
2359 assert (0);
2360 break;
2361
2362 case TGSI_OPCODE_RFL:
2363 assert (0);
2364 break;
2365
2366 case TGSI_OPCODE_SEQ:
2367 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2368 FETCH( &r[0], 0, chan_index );
2369 FETCH( &r[1], 1, chan_index );
2370 micro_eq( &r[0], &r[0], &r[1],
2371 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2372 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2373 STORE( &r[0], 0, chan_index );
2374 }
2375 break;
2376
2377 case TGSI_OPCODE_SFL:
2378 assert (0);
2379 break;
2380
2381 case TGSI_OPCODE_SGT:
2382 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2383 FETCH( &r[0], 0, chan_index );
2384 FETCH( &r[1], 1, chan_index );
2385 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2386 STORE( &r[0], 0, chan_index );
2387 }
2388 break;
2389
2390 case TGSI_OPCODE_SIN:
2391 FETCH( &r[0], 0, CHAN_X );
2392 micro_sin( &r[0], &r[0] );
2393 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2394 STORE( &r[0], 0, chan_index );
2395 }
2396 break;
2397
2398 case TGSI_OPCODE_SLE:
2399 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2400 FETCH( &r[0], 0, chan_index );
2401 FETCH( &r[1], 1, chan_index );
2402 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2403 STORE( &r[0], 0, chan_index );
2404 }
2405 break;
2406
2407 case TGSI_OPCODE_SNE:
2408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2409 FETCH( &r[0], 0, chan_index );
2410 FETCH( &r[1], 1, chan_index );
2411 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2412 STORE( &r[0], 0, chan_index );
2413 }
2414 break;
2415
2416 case TGSI_OPCODE_STR:
2417 assert (0);
2418 break;
2419
2420 case TGSI_OPCODE_TEX:
2421 /* simple texture lookup */
2422 /* src[0] = texcoord */
2423 /* src[1] = sampler unit */
2424 exec_tex(mach, inst, FALSE, FALSE);
2425 break;
2426
2427 case TGSI_OPCODE_TXB:
2428 /* Texture lookup with lod bias */
2429 /* src[0] = texcoord (src[0].w = LOD bias) */
2430 /* src[1] = sampler unit */
2431 exec_tex(mach, inst, TRUE, FALSE);
2432 break;
2433
2434 case TGSI_OPCODE_TXD:
2435 /* Texture lookup with explict partial derivatives */
2436 /* src[0] = texcoord */
2437 /* src[1] = d[strq]/dx */
2438 /* src[2] = d[strq]/dy */
2439 /* src[3] = sampler unit */
2440 assert (0);
2441 break;
2442
2443 case TGSI_OPCODE_TXL:
2444 /* Texture lookup with explit LOD */
2445 /* src[0] = texcoord (src[0].w = LOD) */
2446 /* src[1] = sampler unit */
2447 exec_tex(mach, inst, TRUE, FALSE);
2448 break;
2449
2450 case TGSI_OPCODE_TXP:
2451 /* Texture lookup with projection */
2452 /* src[0] = texcoord (src[0].w = projection) */
2453 /* src[1] = sampler unit */
2454 exec_tex(mach, inst, FALSE, TRUE);
2455 break;
2456
2457 case TGSI_OPCODE_UP2H:
2458 assert (0);
2459 break;
2460
2461 case TGSI_OPCODE_UP2US:
2462 assert (0);
2463 break;
2464
2465 case TGSI_OPCODE_UP4B:
2466 assert (0);
2467 break;
2468
2469 case TGSI_OPCODE_UP4UB:
2470 assert (0);
2471 break;
2472
2473 case TGSI_OPCODE_X2D:
2474 assert (0);
2475 break;
2476
2477 case TGSI_OPCODE_ARA:
2478 assert (0);
2479 break;
2480
2481 case TGSI_OPCODE_BRA:
2482 assert (0);
2483 break;
2484
2485 case TGSI_OPCODE_CAL:
2486 /* skip the call if no execution channels are enabled */
2487 if (mach->ExecMask) {
2488 /* do the call */
2489
2490 /* push the Cond, Loop, Cont stacks */
2491 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2492 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2493 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2494 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2495 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2496 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2497
2498 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2499 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2500
2501 /* note that PC was already incremented above */
2502 mach->CallStack[mach->CallStackTop++] = *pc;
2503 *pc = inst->InstructionExtLabel.Label;
2504 }
2505 break;
2506
2507 case TGSI_OPCODE_RET:
2508 mach->FuncMask &= ~mach->ExecMask;
2509 UPDATE_EXEC_MASK(mach);
2510
2511 if (mach->FuncMask == 0x0) {
2512 /* really return now (otherwise, keep executing */
2513
2514 if (mach->CallStackTop == 0) {
2515 /* returning from main() */
2516 *pc = -1;
2517 return;
2518 }
2519 *pc = mach->CallStack[--mach->CallStackTop];
2520
2521 /* pop the Cond, Loop, Cont stacks */
2522 assert(mach->CondStackTop > 0);
2523 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2524 assert(mach->LoopStackTop > 0);
2525 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2526 assert(mach->ContStackTop > 0);
2527 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2528 assert(mach->FuncStackTop > 0);
2529 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2530
2531 UPDATE_EXEC_MASK(mach);
2532 }
2533 break;
2534
2535 case TGSI_OPCODE_SSG:
2536 /* TGSI_OPCODE_SGN */
2537 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2538 FETCH( &r[0], 0, chan_index );
2539 micro_sgn( &r[0], &r[0] );
2540 STORE( &r[0], 0, chan_index );
2541 }
2542 break;
2543
2544 case TGSI_OPCODE_CMP:
2545 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2546 FETCH(&r[0], 0, chan_index);
2547 FETCH(&r[1], 1, chan_index);
2548 FETCH(&r[2], 2, chan_index);
2549
2550 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2551
2552 STORE(&r[0], 0, chan_index);
2553 }
2554 break;
2555
2556 case TGSI_OPCODE_SCS:
2557 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2558 FETCH( &r[0], 0, CHAN_X );
2559 }
2560 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2561 micro_cos( &r[1], &r[0] );
2562 STORE( &r[1], 0, CHAN_X );
2563 }
2564 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2565 micro_sin( &r[1], &r[0] );
2566 STORE( &r[1], 0, CHAN_Y );
2567 }
2568 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2569 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2570 }
2571 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2572 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2573 }
2574 break;
2575
2576 case TGSI_OPCODE_NRM:
2577 /* 3-component vector normalize */
2578 {
2579 union tgsi_exec_channel tmp, dot;
2580
2581 /* tmp = dp3(src0, src0): */
2582 FETCH( &r[0], 0, CHAN_X );
2583 micro_mul( &tmp, &r[0], &r[0] );
2584
2585 FETCH( &r[1], 0, CHAN_Y );
2586 micro_mul( &dot, &r[1], &r[1] );
2587 micro_add( &tmp, &tmp, &dot );
2588
2589 FETCH( &r[2], 0, CHAN_Z );
2590 micro_mul( &dot, &r[2], &r[2] );
2591 micro_add( &tmp, &tmp, &dot );
2592
2593 /* tmp = 1 / sqrt(tmp) */
2594 micro_sqrt( &tmp, &tmp );
2595 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2596
2597 /* note: w channel is undefined */
2598 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2599 /* chan = chan * tmp */
2600 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2601 STORE( &r[chan_index], 0, chan_index );
2602 }
2603 }
2604 break;
2605
2606 case TGSI_OPCODE_NRM4:
2607 /* 4-component vector normalize */
2608 {
2609 union tgsi_exec_channel tmp, dot;
2610
2611 /* tmp = dp4(src0, src0): */
2612 FETCH( &r[0], 0, CHAN_X );
2613 micro_mul( &tmp, &r[0], &r[0] );
2614
2615 FETCH( &r[1], 0, CHAN_Y );
2616 micro_mul( &dot, &r[1], &r[1] );
2617 micro_add( &tmp, &tmp, &dot );
2618
2619 FETCH( &r[2], 0, CHAN_Z );
2620 micro_mul( &dot, &r[2], &r[2] );
2621 micro_add( &tmp, &tmp, &dot );
2622
2623 FETCH( &r[3], 0, CHAN_W );
2624 micro_mul( &dot, &r[3], &r[3] );
2625 micro_add( &tmp, &tmp, &dot );
2626
2627 /* tmp = 1 / sqrt(tmp) */
2628 micro_sqrt( &tmp, &tmp );
2629 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2630
2631 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2632 /* chan = chan * tmp */
2633 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2634 STORE( &r[chan_index], 0, chan_index );
2635 }
2636 }
2637 break;
2638
2639 case TGSI_OPCODE_DIV:
2640 assert( 0 );
2641 break;
2642
2643 case TGSI_OPCODE_DP2:
2644 FETCH( &r[0], 0, CHAN_X );
2645 FETCH( &r[1], 1, CHAN_X );
2646 micro_mul( &r[0], &r[0], &r[1] );
2647
2648 FETCH( &r[1], 0, CHAN_Y );
2649 FETCH( &r[2], 1, CHAN_Y );
2650 micro_mul( &r[1], &r[1], &r[2] );
2651 micro_add( &r[0], &r[0], &r[1] );
2652
2653 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2654 STORE( &r[0], 0, chan_index );
2655 }
2656 break;
2657
2658 case TGSI_OPCODE_IF:
2659 /* push CondMask */
2660 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2661 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2662 FETCH( &r[0], 0, CHAN_X );
2663 /* update CondMask */
2664 if( ! r[0].u[0] ) {
2665 mach->CondMask &= ~0x1;
2666 }
2667 if( ! r[0].u[1] ) {
2668 mach->CondMask &= ~0x2;
2669 }
2670 if( ! r[0].u[2] ) {
2671 mach->CondMask &= ~0x4;
2672 }
2673 if( ! r[0].u[3] ) {
2674 mach->CondMask &= ~0x8;
2675 }
2676 UPDATE_EXEC_MASK(mach);
2677 /* Todo: If CondMask==0, jump to ELSE */
2678 break;
2679
2680 case TGSI_OPCODE_ELSE:
2681 /* invert CondMask wrt previous mask */
2682 {
2683 uint prevMask;
2684 assert(mach->CondStackTop > 0);
2685 prevMask = mach->CondStack[mach->CondStackTop - 1];
2686 mach->CondMask = ~mach->CondMask & prevMask;
2687 UPDATE_EXEC_MASK(mach);
2688 /* Todo: If CondMask==0, jump to ENDIF */
2689 }
2690 break;
2691
2692 case TGSI_OPCODE_ENDIF:
2693 /* pop CondMask */
2694 assert(mach->CondStackTop > 0);
2695 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2696 UPDATE_EXEC_MASK(mach);
2697 break;
2698
2699 case TGSI_OPCODE_END:
2700 /* halt execution */
2701 *pc = -1;
2702 break;
2703
2704 case TGSI_OPCODE_REP:
2705 assert (0);
2706 break;
2707
2708 case TGSI_OPCODE_ENDREP:
2709 assert (0);
2710 break;
2711
2712 case TGSI_OPCODE_PUSHA:
2713 assert (0);
2714 break;
2715
2716 case TGSI_OPCODE_POPA:
2717 assert (0);
2718 break;
2719
2720 case TGSI_OPCODE_CEIL:
2721 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2722 FETCH( &r[0], 0, chan_index );
2723 micro_ceil( &r[0], &r[0] );
2724 STORE( &r[0], 0, chan_index );
2725 }
2726 break;
2727
2728 case TGSI_OPCODE_I2F:
2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2730 FETCH( &r[0], 0, chan_index );
2731 micro_i2f( &r[0], &r[0] );
2732 STORE( &r[0], 0, chan_index );
2733 }
2734 break;
2735
2736 case TGSI_OPCODE_NOT:
2737 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2738 FETCH( &r[0], 0, chan_index );
2739 micro_not( &r[0], &r[0] );
2740 STORE( &r[0], 0, chan_index );
2741 }
2742 break;
2743
2744 case TGSI_OPCODE_TRUNC:
2745 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2746 FETCH( &r[0], 0, chan_index );
2747 micro_trunc( &r[0], &r[0] );
2748 STORE( &r[0], 0, chan_index );
2749 }
2750 break;
2751
2752 case TGSI_OPCODE_SHL:
2753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2754 FETCH( &r[0], 0, chan_index );
2755 FETCH( &r[1], 1, chan_index );
2756 micro_shl( &r[0], &r[0], &r[1] );
2757 STORE( &r[0], 0, chan_index );
2758 }
2759 break;
2760
2761 case TGSI_OPCODE_SHR:
2762 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2763 FETCH( &r[0], 0, chan_index );
2764 FETCH( &r[1], 1, chan_index );
2765 micro_ishr( &r[0], &r[0], &r[1] );
2766 STORE( &r[0], 0, chan_index );
2767 }
2768 break;
2769
2770 case TGSI_OPCODE_AND:
2771 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2772 FETCH( &r[0], 0, chan_index );
2773 FETCH( &r[1], 1, chan_index );
2774 micro_and( &r[0], &r[0], &r[1] );
2775 STORE( &r[0], 0, chan_index );
2776 }
2777 break;
2778
2779 case TGSI_OPCODE_OR:
2780 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2781 FETCH( &r[0], 0, chan_index );
2782 FETCH( &r[1], 1, chan_index );
2783 micro_or( &r[0], &r[0], &r[1] );
2784 STORE( &r[0], 0, chan_index );
2785 }
2786 break;
2787
2788 case TGSI_OPCODE_MOD:
2789 assert (0);
2790 break;
2791
2792 case TGSI_OPCODE_XOR:
2793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2794 FETCH( &r[0], 0, chan_index );
2795 FETCH( &r[1], 1, chan_index );
2796 micro_xor( &r[0], &r[0], &r[1] );
2797 STORE( &r[0], 0, chan_index );
2798 }
2799 break;
2800
2801 case TGSI_OPCODE_SAD:
2802 assert (0);
2803 break;
2804
2805 case TGSI_OPCODE_TXF:
2806 assert (0);
2807 break;
2808
2809 case TGSI_OPCODE_TXQ:
2810 assert (0);
2811 break;
2812
2813 case TGSI_OPCODE_EMIT:
2814 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2815 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2816 break;
2817
2818 case TGSI_OPCODE_ENDPRIM:
2819 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2820 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2821 break;
2822
2823 case TGSI_OPCODE_LOOP:
2824 /* fall-through (for now) */
2825 case TGSI_OPCODE_BGNLOOP2:
2826 /* push LoopMask and ContMasks */
2827 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2828 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2829 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2830 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2831 break;
2832
2833 case TGSI_OPCODE_ENDLOOP:
2834 /* fall-through (for now at least) */
2835 case TGSI_OPCODE_ENDLOOP2:
2836 /* Restore ContMask, but don't pop */
2837 assert(mach->ContStackTop > 0);
2838 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2839 UPDATE_EXEC_MASK(mach);
2840 if (mach->ExecMask) {
2841 /* repeat loop: jump to instruction just past BGNLOOP */
2842 *pc = inst->InstructionExtLabel.Label + 1;
2843 }
2844 else {
2845 /* exit loop: pop LoopMask */
2846 assert(mach->LoopStackTop > 0);
2847 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2848 /* pop ContMask */
2849 assert(mach->ContStackTop > 0);
2850 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2851 }
2852 UPDATE_EXEC_MASK(mach);
2853 break;
2854
2855 case TGSI_OPCODE_BRK:
2856 /* turn off loop channels for each enabled exec channel */
2857 mach->LoopMask &= ~mach->ExecMask;
2858 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2859 UPDATE_EXEC_MASK(mach);
2860 break;
2861
2862 case TGSI_OPCODE_CONT:
2863 /* turn off cont channels for each enabled exec channel */
2864 mach->ContMask &= ~mach->ExecMask;
2865 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2866 UPDATE_EXEC_MASK(mach);
2867 break;
2868
2869 case TGSI_OPCODE_BGNSUB:
2870 /* no-op */
2871 break;
2872
2873 case TGSI_OPCODE_ENDSUB:
2874 /* no-op */
2875 break;
2876
2877 case TGSI_OPCODE_NOISE1:
2878 assert( 0 );
2879 break;
2880
2881 case TGSI_OPCODE_NOISE2:
2882 assert( 0 );
2883 break;
2884
2885 case TGSI_OPCODE_NOISE3:
2886 assert( 0 );
2887 break;
2888
2889 case TGSI_OPCODE_NOISE4:
2890 assert( 0 );
2891 break;
2892
2893 case TGSI_OPCODE_NOP:
2894 break;
2895
2896 default:
2897 assert( 0 );
2898 }
2899 }
2900
2901
2902 /**
2903 * Run TGSI interpreter.
2904 * \return bitmask of "alive" quad components
2905 */
2906 uint
2907 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2908 {
2909 uint i;
2910 int pc = 0;
2911
2912 mach->CondMask = 0xf;
2913 mach->LoopMask = 0xf;
2914 mach->ContMask = 0xf;
2915 mach->FuncMask = 0xf;
2916 mach->ExecMask = 0xf;
2917
2918 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2919 assert(mach->CondStackTop == 0);
2920 assert(mach->LoopStackTop == 0);
2921 assert(mach->ContStackTop == 0);
2922 assert(mach->CallStackTop == 0);
2923
2924 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2925 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2926
2927 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2928 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2929 mach->Primitives[0] = 0;
2930 }
2931
2932 for (i = 0; i < QUAD_SIZE; i++) {
2933 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2934 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2935 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2936 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2937 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2938 }
2939
2940 /* execute declarations (interpolants) */
2941 for (i = 0; i < mach->NumDeclarations; i++) {
2942 exec_declaration( mach, mach->Declarations+i );
2943 }
2944
2945 /* execute instructions, until pc is set to -1 */
2946 while (pc != -1) {
2947 assert(pc < (int) mach->NumInstructions);
2948 exec_instruction( mach, mach->Instructions + pc, &pc );
2949 }
2950
2951 #if 0
2952 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2953 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2954 /*
2955 * Scale back depth component.
2956 */
2957 for (i = 0; i < 4; i++)
2958 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2959 }
2960 #endif
2961
2962 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2963 }