0fdfb91d39333955408215edadade46a11eeb12f
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2ut(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->u[0] = (uint) src->f[0];
475 dst->u[1] = (uint) src->f[1];
476 dst->u[2] = (uint) src->f[2];
477 dst->u[3] = (uint) src->f[3];
478 }
479
480 static void
481 micro_flr(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->f[0] = floorf( src->f[0] );
486 dst->f[1] = floorf( src->f[1] );
487 dst->f[2] = floorf( src->f[2] );
488 dst->f[3] = floorf( src->f[3] );
489 }
490
491 static void
492 micro_frc(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = src->f[0] - floorf( src->f[0] );
497 dst->f[1] = src->f[1] - floorf( src->f[1] );
498 dst->f[2] = src->f[2] - floorf( src->f[2] );
499 dst->f[3] = src->f[3] - floorf( src->f[3] );
500 }
501
502 static void
503 micro_ge(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src0,
506 const union tgsi_exec_channel *src1,
507 const union tgsi_exec_channel *src2,
508 const union tgsi_exec_channel *src3 )
509 {
510 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
511 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
512 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
513 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
514 }
515
516 static void
517 micro_i2f(
518 union tgsi_exec_channel *dst,
519 const union tgsi_exec_channel *src )
520 {
521 dst->f[0] = (float) src->i[0];
522 dst->f[1] = (float) src->i[1];
523 dst->f[2] = (float) src->i[2];
524 dst->f[3] = (float) src->i[3];
525 }
526
527 static void
528 micro_lg2(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 #if FAST_MATH
533 dst->f[0] = util_fast_log2( src->f[0] );
534 dst->f[1] = util_fast_log2( src->f[1] );
535 dst->f[2] = util_fast_log2( src->f[2] );
536 dst->f[3] = util_fast_log2( src->f[3] );
537 #else
538 dst->f[0] = logf( src->f[0] ) * 1.442695f;
539 dst->f[1] = logf( src->f[1] ) * 1.442695f;
540 dst->f[2] = logf( src->f[2] ) * 1.442695f;
541 dst->f[3] = logf( src->f[3] ) * 1.442695f;
542 #endif
543 }
544
545 static void
546 micro_le(
547 union tgsi_exec_channel *dst,
548 const union tgsi_exec_channel *src0,
549 const union tgsi_exec_channel *src1,
550 const union tgsi_exec_channel *src2,
551 const union tgsi_exec_channel *src3 )
552 {
553 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
554 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
555 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
556 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
557 }
558
559 static void
560 micro_lt(
561 union tgsi_exec_channel *dst,
562 const union tgsi_exec_channel *src0,
563 const union tgsi_exec_channel *src1,
564 const union tgsi_exec_channel *src2,
565 const union tgsi_exec_channel *src3 )
566 {
567 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
568 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
569 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
570 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
571 }
572
573 static void
574 micro_ilt(
575 union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src0,
577 const union tgsi_exec_channel *src1,
578 const union tgsi_exec_channel *src2,
579 const union tgsi_exec_channel *src3 )
580 {
581 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
582 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
583 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
584 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
585 }
586
587 static void
588 micro_ult(
589 union tgsi_exec_channel *dst,
590 const union tgsi_exec_channel *src0,
591 const union tgsi_exec_channel *src1,
592 const union tgsi_exec_channel *src2,
593 const union tgsi_exec_channel *src3 )
594 {
595 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
596 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
597 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
598 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
599 }
600
601 static void
602 micro_max(
603 union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src0,
605 const union tgsi_exec_channel *src1 )
606 {
607 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
608 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
609 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
610 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
611 }
612
613 static void
614 micro_imax(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
620 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
621 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
622 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
623 }
624
625 static void
626 micro_umax(
627 union tgsi_exec_channel *dst,
628 const union tgsi_exec_channel *src0,
629 const union tgsi_exec_channel *src1 )
630 {
631 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
632 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
633 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
634 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
635 }
636
637 static void
638 micro_min(
639 union tgsi_exec_channel *dst,
640 const union tgsi_exec_channel *src0,
641 const union tgsi_exec_channel *src1 )
642 {
643 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
644 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
645 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
646 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
647 }
648
649 static void
650 micro_imin(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src0,
653 const union tgsi_exec_channel *src1 )
654 {
655 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
656 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
657 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
658 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
659 }
660
661 static void
662 micro_umin(
663 union tgsi_exec_channel *dst,
664 const union tgsi_exec_channel *src0,
665 const union tgsi_exec_channel *src1 )
666 {
667 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
668 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
669 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
670 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
671 }
672
673 static void
674 micro_umod(
675 union tgsi_exec_channel *dst,
676 const union tgsi_exec_channel *src0,
677 const union tgsi_exec_channel *src1 )
678 {
679 dst->u[0] = src0->u[0] % src1->u[0];
680 dst->u[1] = src0->u[1] % src1->u[1];
681 dst->u[2] = src0->u[2] % src1->u[2];
682 dst->u[3] = src0->u[3] % src1->u[3];
683 }
684
685 static void
686 micro_mul(
687 union tgsi_exec_channel *dst,
688 const union tgsi_exec_channel *src0,
689 const union tgsi_exec_channel *src1 )
690 {
691 dst->f[0] = src0->f[0] * src1->f[0];
692 dst->f[1] = src0->f[1] * src1->f[1];
693 dst->f[2] = src0->f[2] * src1->f[2];
694 dst->f[3] = src0->f[3] * src1->f[3];
695 }
696
697 static void
698 micro_imul(
699 union tgsi_exec_channel *dst,
700 const union tgsi_exec_channel *src0,
701 const union tgsi_exec_channel *src1 )
702 {
703 dst->i[0] = src0->i[0] * src1->i[0];
704 dst->i[1] = src0->i[1] * src1->i[1];
705 dst->i[2] = src0->i[2] * src1->i[2];
706 dst->i[3] = src0->i[3] * src1->i[3];
707 }
708
709 static void
710 micro_imul64(
711 union tgsi_exec_channel *dst0,
712 union tgsi_exec_channel *dst1,
713 const union tgsi_exec_channel *src0,
714 const union tgsi_exec_channel *src1 )
715 {
716 dst1->i[0] = src0->i[0] * src1->i[0];
717 dst1->i[1] = src0->i[1] * src1->i[1];
718 dst1->i[2] = src0->i[2] * src1->i[2];
719 dst1->i[3] = src0->i[3] * src1->i[3];
720 dst0->i[0] = 0;
721 dst0->i[1] = 0;
722 dst0->i[2] = 0;
723 dst0->i[3] = 0;
724 }
725
726 static void
727 micro_umul64(
728 union tgsi_exec_channel *dst0,
729 union tgsi_exec_channel *dst1,
730 const union tgsi_exec_channel *src0,
731 const union tgsi_exec_channel *src1 )
732 {
733 dst1->u[0] = src0->u[0] * src1->u[0];
734 dst1->u[1] = src0->u[1] * src1->u[1];
735 dst1->u[2] = src0->u[2] * src1->u[2];
736 dst1->u[3] = src0->u[3] * src1->u[3];
737 dst0->u[0] = 0;
738 dst0->u[1] = 0;
739 dst0->u[2] = 0;
740 dst0->u[3] = 0;
741 }
742
743 static void
744 micro_movc(
745 union tgsi_exec_channel *dst,
746 const union tgsi_exec_channel *src0,
747 const union tgsi_exec_channel *src1,
748 const union tgsi_exec_channel *src2 )
749 {
750 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
751 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
752 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
753 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
754 }
755
756 static void
757 micro_neg(
758 union tgsi_exec_channel *dst,
759 const union tgsi_exec_channel *src )
760 {
761 dst->f[0] = -src->f[0];
762 dst->f[1] = -src->f[1];
763 dst->f[2] = -src->f[2];
764 dst->f[3] = -src->f[3];
765 }
766
767 static void
768 micro_ineg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->i[0] = -src->i[0];
773 dst->i[1] = -src->i[1];
774 dst->i[2] = -src->i[2];
775 dst->i[3] = -src->i[3];
776 }
777
778 static void
779 micro_not(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->u[0] = ~src->u[0];
784 dst->u[1] = ~src->u[1];
785 dst->u[2] = ~src->u[2];
786 dst->u[3] = ~src->u[3];
787 }
788
789 static void
790 micro_or(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src0,
793 const union tgsi_exec_channel *src1 )
794 {
795 dst->u[0] = src0->u[0] | src1->u[0];
796 dst->u[1] = src0->u[1] | src1->u[1];
797 dst->u[2] = src0->u[2] | src1->u[2];
798 dst->u[3] = src0->u[3] | src1->u[3];
799 }
800
801 static void
802 micro_pow(
803 union tgsi_exec_channel *dst,
804 const union tgsi_exec_channel *src0,
805 const union tgsi_exec_channel *src1 )
806 {
807 #if FAST_MATH
808 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
809 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
810 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
811 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
812 #else
813 dst->f[0] = powf( src0->f[0], src1->f[0] );
814 dst->f[1] = powf( src0->f[1], src1->f[1] );
815 dst->f[2] = powf( src0->f[2], src1->f[2] );
816 dst->f[3] = powf( src0->f[3], src1->f[3] );
817 #endif
818 }
819
820 static void
821 micro_rnd(
822 union tgsi_exec_channel *dst,
823 const union tgsi_exec_channel *src )
824 {
825 dst->f[0] = floorf( src->f[0] + 0.5f );
826 dst->f[1] = floorf( src->f[1] + 0.5f );
827 dst->f[2] = floorf( src->f[2] + 0.5f );
828 dst->f[3] = floorf( src->f[3] + 0.5f );
829 }
830
831 static void
832 micro_shl(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src0,
835 const union tgsi_exec_channel *src1 )
836 {
837 dst->i[0] = src0->i[0] << src1->i[0];
838 dst->i[1] = src0->i[1] << src1->i[1];
839 dst->i[2] = src0->i[2] << src1->i[2];
840 dst->i[3] = src0->i[3] << src1->i[3];
841 }
842
843 static void
844 micro_ishr(
845 union tgsi_exec_channel *dst,
846 const union tgsi_exec_channel *src0,
847 const union tgsi_exec_channel *src1 )
848 {
849 dst->i[0] = src0->i[0] >> src1->i[0];
850 dst->i[1] = src0->i[1] >> src1->i[1];
851 dst->i[2] = src0->i[2] >> src1->i[2];
852 dst->i[3] = src0->i[3] >> src1->i[3];
853 }
854
855 static void
856 micro_trunc(
857 union tgsi_exec_channel *dst,
858 const union tgsi_exec_channel *src0 )
859 {
860 dst->f[0] = (float) (int) src0->f[0];
861 dst->f[1] = (float) (int) src0->f[1];
862 dst->f[2] = (float) (int) src0->f[2];
863 dst->f[3] = (float) (int) src0->f[3];
864 }
865
866 static void
867 micro_ushr(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0,
870 const union tgsi_exec_channel *src1 )
871 {
872 dst->u[0] = src0->u[0] >> src1->u[0];
873 dst->u[1] = src0->u[1] >> src1->u[1];
874 dst->u[2] = src0->u[2] >> src1->u[2];
875 dst->u[3] = src0->u[3] >> src1->u[3];
876 }
877
878 static void
879 micro_sin(
880 union tgsi_exec_channel *dst,
881 const union tgsi_exec_channel *src )
882 {
883 dst->f[0] = sinf( src->f[0] );
884 dst->f[1] = sinf( src->f[1] );
885 dst->f[2] = sinf( src->f[2] );
886 dst->f[3] = sinf( src->f[3] );
887 }
888
889 static void
890 micro_sqrt( union tgsi_exec_channel *dst,
891 const union tgsi_exec_channel *src )
892 {
893 dst->f[0] = sqrtf( src->f[0] );
894 dst->f[1] = sqrtf( src->f[1] );
895 dst->f[2] = sqrtf( src->f[2] );
896 dst->f[3] = sqrtf( src->f[3] );
897 }
898
899 static void
900 micro_sub(
901 union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src0,
903 const union tgsi_exec_channel *src1 )
904 {
905 dst->f[0] = src0->f[0] - src1->f[0];
906 dst->f[1] = src0->f[1] - src1->f[1];
907 dst->f[2] = src0->f[2] - src1->f[2];
908 dst->f[3] = src0->f[3] - src1->f[3];
909 }
910
911 static void
912 micro_u2f(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src )
915 {
916 dst->f[0] = (float) src->u[0];
917 dst->f[1] = (float) src->u[1];
918 dst->f[2] = (float) src->u[2];
919 dst->f[3] = (float) src->u[3];
920 }
921
922 static void
923 micro_xor(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src0,
926 const union tgsi_exec_channel *src1 )
927 {
928 dst->u[0] = src0->u[0] ^ src1->u[0];
929 dst->u[1] = src0->u[1] ^ src1->u[1];
930 dst->u[2] = src0->u[2] ^ src1->u[2];
931 dst->u[3] = src0->u[3] ^ src1->u[3];
932 }
933
934 static void
935 fetch_src_file_channel(
936 const struct tgsi_exec_machine *mach,
937 const uint file,
938 const uint swizzle,
939 const union tgsi_exec_channel *index,
940 union tgsi_exec_channel *chan )
941 {
942 switch( swizzle ) {
943 case TGSI_EXTSWIZZLE_X:
944 case TGSI_EXTSWIZZLE_Y:
945 case TGSI_EXTSWIZZLE_Z:
946 case TGSI_EXTSWIZZLE_W:
947 switch( file ) {
948 case TGSI_FILE_CONSTANT:
949 assert(mach->Consts);
950 if (index->i[0] < 0)
951 chan->f[0] = 0.0f;
952 else
953 chan->f[0] = mach->Consts[index->i[0]][swizzle];
954 if (index->i[1] < 0)
955 chan->f[1] = 0.0f;
956 else
957 chan->f[1] = mach->Consts[index->i[1]][swizzle];
958 if (index->i[2] < 0)
959 chan->f[2] = 0.0f;
960 else
961 chan->f[2] = mach->Consts[index->i[2]][swizzle];
962 if (index->i[3] < 0)
963 chan->f[3] = 0.0f;
964 else
965 chan->f[3] = mach->Consts[index->i[3]][swizzle];
966 break;
967
968 case TGSI_FILE_INPUT:
969 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
970 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
971 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
972 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
973 break;
974
975 case TGSI_FILE_TEMPORARY:
976 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
977 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
978 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
979 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
980 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
981 break;
982
983 case TGSI_FILE_IMMEDIATE:
984 assert( index->i[0] < (int) mach->ImmLimit );
985 chan->f[0] = mach->Imms[index->i[0]][swizzle];
986 assert( index->i[1] < (int) mach->ImmLimit );
987 chan->f[1] = mach->Imms[index->i[1]][swizzle];
988 assert( index->i[2] < (int) mach->ImmLimit );
989 chan->f[2] = mach->Imms[index->i[2]][swizzle];
990 assert( index->i[3] < (int) mach->ImmLimit );
991 chan->f[3] = mach->Imms[index->i[3]][swizzle];
992 break;
993
994 case TGSI_FILE_ADDRESS:
995 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
996 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
997 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
998 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
999 break;
1000
1001 case TGSI_FILE_OUTPUT:
1002 /* vertex/fragment output vars can be read too */
1003 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1004 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1005 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1006 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1007 break;
1008
1009 default:
1010 assert( 0 );
1011 }
1012 break;
1013
1014 case TGSI_EXTSWIZZLE_ZERO:
1015 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1016 break;
1017
1018 case TGSI_EXTSWIZZLE_ONE:
1019 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1020 break;
1021
1022 default:
1023 assert( 0 );
1024 }
1025 }
1026
1027 static void
1028 fetch_source(
1029 const struct tgsi_exec_machine *mach,
1030 union tgsi_exec_channel *chan,
1031 const struct tgsi_full_src_register *reg,
1032 const uint chan_index )
1033 {
1034 union tgsi_exec_channel index;
1035 uint swizzle;
1036
1037 /* We start with a direct index into a register file.
1038 *
1039 * file[1],
1040 * where:
1041 * file = SrcRegister.File
1042 * [1] = SrcRegister.Index
1043 */
1044 index.i[0] =
1045 index.i[1] =
1046 index.i[2] =
1047 index.i[3] = reg->SrcRegister.Index;
1048
1049 /* There is an extra source register that indirectly subscripts
1050 * a register file. The direct index now becomes an offset
1051 * that is being added to the indirect register.
1052 *
1053 * file[ind[2].x+1],
1054 * where:
1055 * ind = SrcRegisterInd.File
1056 * [2] = SrcRegisterInd.Index
1057 * .x = SrcRegisterInd.SwizzleX
1058 */
1059 if (reg->SrcRegister.Indirect) {
1060 union tgsi_exec_channel index2;
1061 union tgsi_exec_channel indir_index;
1062 const uint execmask = mach->ExecMask;
1063 uint i;
1064
1065 /* which address register (always zero now) */
1066 index2.i[0] =
1067 index2.i[1] =
1068 index2.i[2] =
1069 index2.i[3] = reg->SrcRegisterInd.Index;
1070
1071 /* get current value of address register[swizzle] */
1072 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1073 fetch_src_file_channel(
1074 mach,
1075 reg->SrcRegisterInd.File,
1076 swizzle,
1077 &index2,
1078 &indir_index );
1079
1080 /* add value of address register to the offset */
1081 index.i[0] += (int) indir_index.f[0];
1082 index.i[1] += (int) indir_index.f[1];
1083 index.i[2] += (int) indir_index.f[2];
1084 index.i[3] += (int) indir_index.f[3];
1085
1086 /* for disabled execution channels, zero-out the index to
1087 * avoid using a potential garbage value.
1088 */
1089 for (i = 0; i < QUAD_SIZE; i++) {
1090 if ((execmask & (1 << i)) == 0)
1091 index.i[i] = 0;
1092 }
1093 }
1094
1095 /* There is an extra source register that is a second
1096 * subscript to a register file. Effectively it means that
1097 * the register file is actually a 2D array of registers.
1098 *
1099 * file[1][3] == file[1*sizeof(file[1])+3],
1100 * where:
1101 * [3] = SrcRegisterDim.Index
1102 */
1103 if (reg->SrcRegister.Dimension) {
1104 /* The size of the first-order array depends on the register file type.
1105 * We need to multiply the index to the first array to get an effective,
1106 * "flat" index that points to the beginning of the second-order array.
1107 */
1108 switch (reg->SrcRegister.File) {
1109 case TGSI_FILE_INPUT:
1110 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1111 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1112 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1113 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1114 break;
1115 case TGSI_FILE_CONSTANT:
1116 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1117 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1118 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1119 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1120 break;
1121 default:
1122 assert( 0 );
1123 }
1124
1125 index.i[0] += reg->SrcRegisterDim.Index;
1126 index.i[1] += reg->SrcRegisterDim.Index;
1127 index.i[2] += reg->SrcRegisterDim.Index;
1128 index.i[3] += reg->SrcRegisterDim.Index;
1129
1130 /* Again, the second subscript index can be addressed indirectly
1131 * identically to the first one.
1132 * Nothing stops us from indirectly addressing the indirect register,
1133 * but there is no need for that, so we won't exercise it.
1134 *
1135 * file[1][ind[4].y+3],
1136 * where:
1137 * ind = SrcRegisterDimInd.File
1138 * [4] = SrcRegisterDimInd.Index
1139 * .y = SrcRegisterDimInd.SwizzleX
1140 */
1141 if (reg->SrcRegisterDim.Indirect) {
1142 union tgsi_exec_channel index2;
1143 union tgsi_exec_channel indir_index;
1144 const uint execmask = mach->ExecMask;
1145 uint i;
1146
1147 index2.i[0] =
1148 index2.i[1] =
1149 index2.i[2] =
1150 index2.i[3] = reg->SrcRegisterDimInd.Index;
1151
1152 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1153 fetch_src_file_channel(
1154 mach,
1155 reg->SrcRegisterDimInd.File,
1156 swizzle,
1157 &index2,
1158 &indir_index );
1159
1160 index.i[0] += (int) indir_index.f[0];
1161 index.i[1] += (int) indir_index.f[1];
1162 index.i[2] += (int) indir_index.f[2];
1163 index.i[3] += (int) indir_index.f[3];
1164
1165 /* for disabled execution channels, zero-out the index to
1166 * avoid using a potential garbage value.
1167 */
1168 for (i = 0; i < QUAD_SIZE; i++) {
1169 if ((execmask & (1 << i)) == 0)
1170 index.i[i] = 0;
1171 }
1172 }
1173
1174 /* If by any chance there was a need for a 3D array of register
1175 * files, we would have to check whether SrcRegisterDim is followed
1176 * by a dimension register and continue the saga.
1177 */
1178 }
1179
1180 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1181 fetch_src_file_channel(
1182 mach,
1183 reg->SrcRegister.File,
1184 swizzle,
1185 &index,
1186 chan );
1187
1188 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1189 case TGSI_UTIL_SIGN_CLEAR:
1190 micro_abs( chan, chan );
1191 break;
1192
1193 case TGSI_UTIL_SIGN_SET:
1194 micro_abs( chan, chan );
1195 micro_neg( chan, chan );
1196 break;
1197
1198 case TGSI_UTIL_SIGN_TOGGLE:
1199 micro_neg( chan, chan );
1200 break;
1201
1202 case TGSI_UTIL_SIGN_KEEP:
1203 break;
1204 }
1205
1206 if (reg->SrcRegisterExtMod.Complement) {
1207 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1208 }
1209 }
1210
1211 static void
1212 store_dest(
1213 struct tgsi_exec_machine *mach,
1214 const union tgsi_exec_channel *chan,
1215 const struct tgsi_full_dst_register *reg,
1216 const struct tgsi_full_instruction *inst,
1217 uint chan_index )
1218 {
1219 uint i;
1220 union tgsi_exec_channel null;
1221 union tgsi_exec_channel *dst;
1222 uint execmask = mach->ExecMask;
1223
1224 switch (reg->DstRegister.File) {
1225 case TGSI_FILE_NULL:
1226 dst = &null;
1227 break;
1228
1229 case TGSI_FILE_OUTPUT:
1230 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1231 + reg->DstRegister.Index].xyzw[chan_index];
1232 break;
1233
1234 case TGSI_FILE_TEMPORARY:
1235 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1236 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1237 break;
1238
1239 case TGSI_FILE_ADDRESS:
1240 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1241 break;
1242
1243 default:
1244 assert( 0 );
1245 return;
1246 }
1247
1248 if (inst->InstructionExtNv.CondFlowEnable) {
1249 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1250 uint swizzle;
1251 uint shift;
1252 uint mask;
1253 uint test;
1254
1255 /* Only CC0 supported.
1256 */
1257 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1258
1259 switch (chan_index) {
1260 case CHAN_X:
1261 swizzle = inst->InstructionExtNv.CondSwizzleX;
1262 break;
1263 case CHAN_Y:
1264 swizzle = inst->InstructionExtNv.CondSwizzleY;
1265 break;
1266 case CHAN_Z:
1267 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1268 break;
1269 case CHAN_W:
1270 swizzle = inst->InstructionExtNv.CondSwizzleW;
1271 break;
1272 default:
1273 assert( 0 );
1274 return;
1275 }
1276
1277 switch (swizzle) {
1278 case TGSI_SWIZZLE_X:
1279 shift = TGSI_EXEC_CC_X_SHIFT;
1280 mask = TGSI_EXEC_CC_X_MASK;
1281 break;
1282 case TGSI_SWIZZLE_Y:
1283 shift = TGSI_EXEC_CC_Y_SHIFT;
1284 mask = TGSI_EXEC_CC_Y_MASK;
1285 break;
1286 case TGSI_SWIZZLE_Z:
1287 shift = TGSI_EXEC_CC_Z_SHIFT;
1288 mask = TGSI_EXEC_CC_Z_MASK;
1289 break;
1290 case TGSI_SWIZZLE_W:
1291 shift = TGSI_EXEC_CC_W_SHIFT;
1292 mask = TGSI_EXEC_CC_W_MASK;
1293 break;
1294 default:
1295 assert( 0 );
1296 return;
1297 }
1298
1299 switch (inst->InstructionExtNv.CondMask) {
1300 case TGSI_CC_GT:
1301 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1302 for (i = 0; i < QUAD_SIZE; i++)
1303 if (cc->u[i] & test)
1304 execmask &= ~(1 << i);
1305 break;
1306
1307 case TGSI_CC_EQ:
1308 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1309 for (i = 0; i < QUAD_SIZE; i++)
1310 if (cc->u[i] & test)
1311 execmask &= ~(1 << i);
1312 break;
1313
1314 case TGSI_CC_LT:
1315 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1316 for (i = 0; i < QUAD_SIZE; i++)
1317 if (cc->u[i] & test)
1318 execmask &= ~(1 << i);
1319 break;
1320
1321 case TGSI_CC_GE:
1322 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1323 for (i = 0; i < QUAD_SIZE; i++)
1324 if (cc->u[i] & test)
1325 execmask &= ~(1 << i);
1326 break;
1327
1328 case TGSI_CC_LE:
1329 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1330 for (i = 0; i < QUAD_SIZE; i++)
1331 if (cc->u[i] & test)
1332 execmask &= ~(1 << i);
1333 break;
1334
1335 case TGSI_CC_NE:
1336 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1337 for (i = 0; i < QUAD_SIZE; i++)
1338 if (cc->u[i] & test)
1339 execmask &= ~(1 << i);
1340 break;
1341
1342 case TGSI_CC_TR:
1343 break;
1344
1345 case TGSI_CC_FL:
1346 for (i = 0; i < QUAD_SIZE; i++)
1347 execmask &= ~(1 << i);
1348 break;
1349
1350 default:
1351 assert( 0 );
1352 return;
1353 }
1354 }
1355
1356 switch (inst->Instruction.Saturate) {
1357 case TGSI_SAT_NONE:
1358 for (i = 0; i < QUAD_SIZE; i++)
1359 if (execmask & (1 << i))
1360 dst->i[i] = chan->i[i];
1361 break;
1362
1363 case TGSI_SAT_ZERO_ONE:
1364 for (i = 0; i < QUAD_SIZE; i++)
1365 if (execmask & (1 << i)) {
1366 if (chan->f[i] < 0.0f)
1367 dst->f[i] = 0.0f;
1368 else if (chan->f[i] > 1.0f)
1369 dst->f[i] = 1.0f;
1370 else
1371 dst->i[i] = chan->i[i];
1372 }
1373 break;
1374
1375 case TGSI_SAT_MINUS_PLUS_ONE:
1376 for (i = 0; i < QUAD_SIZE; i++)
1377 if (execmask & (1 << i)) {
1378 if (chan->f[i] < -1.0f)
1379 dst->f[i] = -1.0f;
1380 else if (chan->f[i] > 1.0f)
1381 dst->f[i] = 1.0f;
1382 else
1383 dst->i[i] = chan->i[i];
1384 }
1385 break;
1386
1387 default:
1388 assert( 0 );
1389 }
1390
1391 if (inst->InstructionExtNv.CondDstUpdate) {
1392 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1393 uint shift;
1394 uint mask;
1395
1396 /* Only CC0 supported.
1397 */
1398 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1399
1400 switch (chan_index) {
1401 case CHAN_X:
1402 shift = TGSI_EXEC_CC_X_SHIFT;
1403 mask = ~TGSI_EXEC_CC_X_MASK;
1404 break;
1405 case CHAN_Y:
1406 shift = TGSI_EXEC_CC_Y_SHIFT;
1407 mask = ~TGSI_EXEC_CC_Y_MASK;
1408 break;
1409 case CHAN_Z:
1410 shift = TGSI_EXEC_CC_Z_SHIFT;
1411 mask = ~TGSI_EXEC_CC_Z_MASK;
1412 break;
1413 case CHAN_W:
1414 shift = TGSI_EXEC_CC_W_SHIFT;
1415 mask = ~TGSI_EXEC_CC_W_MASK;
1416 break;
1417 default:
1418 assert( 0 );
1419 return;
1420 }
1421
1422 for (i = 0; i < QUAD_SIZE; i++)
1423 if (execmask & (1 << i)) {
1424 cc->u[i] &= mask;
1425 if (dst->f[i] < 0.0f)
1426 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1427 else if (dst->f[i] > 0.0f)
1428 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1429 else if (dst->f[i] == 0.0f)
1430 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1431 else
1432 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1433 }
1434 }
1435 }
1436
1437 #define FETCH(VAL,INDEX,CHAN)\
1438 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1439
1440 #define STORE(VAL,INDEX,CHAN)\
1441 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1442
1443
1444 /**
1445 * Execute ARB-style KIL which is predicated by a src register.
1446 * Kill fragment if any of the four values is less than zero.
1447 */
1448 static void
1449 exec_kil(struct tgsi_exec_machine *mach,
1450 const struct tgsi_full_instruction *inst)
1451 {
1452 uint uniquemask;
1453 uint chan_index;
1454 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1455 union tgsi_exec_channel r[1];
1456
1457 /* This mask stores component bits that were already tested. Note that
1458 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1459 * tested. */
1460 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1461
1462 for (chan_index = 0; chan_index < 4; chan_index++)
1463 {
1464 uint swizzle;
1465 uint i;
1466
1467 /* unswizzle channel */
1468 swizzle = tgsi_util_get_full_src_register_extswizzle (
1469 &inst->FullSrcRegisters[0],
1470 chan_index);
1471
1472 /* check if the component has not been already tested */
1473 if (uniquemask & (1 << swizzle))
1474 continue;
1475 uniquemask |= 1 << swizzle;
1476
1477 FETCH(&r[0], 0, chan_index);
1478 for (i = 0; i < 4; i++)
1479 if (r[0].f[i] < 0.0f)
1480 kilmask |= 1 << i;
1481 }
1482
1483 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1484 }
1485
1486 /**
1487 * Execute NVIDIA-style KIL which is predicated by a condition code.
1488 * Kill fragment if the condition code is TRUE.
1489 */
1490 static void
1491 exec_kilp(struct tgsi_exec_machine *mach,
1492 const struct tgsi_full_instruction *inst)
1493 {
1494 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1495
1496 if (inst->InstructionExtNv.CondFlowEnable) {
1497 uint swizzle[4];
1498 uint chan_index;
1499
1500 kilmask = 0x0;
1501
1502 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1503 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1504 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1505 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1506
1507 for (chan_index = 0; chan_index < 4; chan_index++)
1508 {
1509 uint i;
1510
1511 for (i = 0; i < 4; i++) {
1512 /* TODO: evaluate the condition code */
1513 if (0)
1514 kilmask |= 1 << i;
1515 }
1516 }
1517 }
1518 else {
1519 /* "unconditional" kil */
1520 kilmask = mach->ExecMask;
1521 }
1522 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1523 }
1524
1525
1526 /*
1527 * Fetch a texel using STR texture coordinates.
1528 */
1529 static void
1530 fetch_texel( struct tgsi_sampler *sampler,
1531 const union tgsi_exec_channel *s,
1532 const union tgsi_exec_channel *t,
1533 const union tgsi_exec_channel *p,
1534 float lodbias, /* XXX should be float[4] */
1535 union tgsi_exec_channel *r,
1536 union tgsi_exec_channel *g,
1537 union tgsi_exec_channel *b,
1538 union tgsi_exec_channel *a )
1539 {
1540 uint j;
1541 float rgba[NUM_CHANNELS][QUAD_SIZE];
1542
1543 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1544
1545 for (j = 0; j < 4; j++) {
1546 r->f[j] = rgba[0][j];
1547 g->f[j] = rgba[1][j];
1548 b->f[j] = rgba[2][j];
1549 a->f[j] = rgba[3][j];
1550 }
1551 }
1552
1553
1554 static void
1555 exec_tex(struct tgsi_exec_machine *mach,
1556 const struct tgsi_full_instruction *inst,
1557 boolean biasLod,
1558 boolean projected)
1559 {
1560 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1561 union tgsi_exec_channel r[8];
1562 uint chan_index;
1563 float lodBias;
1564
1565 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1566
1567 switch (inst->InstructionExtTexture.Texture) {
1568 case TGSI_TEXTURE_1D:
1569
1570 FETCH(&r[0], 0, CHAN_X);
1571
1572 if (projected) {
1573 FETCH(&r[1], 0, CHAN_W);
1574 micro_div( &r[0], &r[0], &r[1] );
1575 }
1576
1577 if (biasLod) {
1578 FETCH(&r[1], 0, CHAN_W);
1579 lodBias = r[2].f[0];
1580 }
1581 else
1582 lodBias = 0.0;
1583
1584 fetch_texel(mach->Samplers[unit],
1585 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1586 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1587 break;
1588
1589 case TGSI_TEXTURE_2D:
1590 case TGSI_TEXTURE_RECT:
1591
1592 FETCH(&r[0], 0, CHAN_X);
1593 FETCH(&r[1], 0, CHAN_Y);
1594 FETCH(&r[2], 0, CHAN_Z);
1595
1596 if (projected) {
1597 FETCH(&r[3], 0, CHAN_W);
1598 micro_div( &r[0], &r[0], &r[3] );
1599 micro_div( &r[1], &r[1], &r[3] );
1600 micro_div( &r[2], &r[2], &r[3] );
1601 }
1602
1603 if (biasLod) {
1604 FETCH(&r[3], 0, CHAN_W);
1605 lodBias = r[3].f[0];
1606 }
1607 else
1608 lodBias = 0.0;
1609
1610 fetch_texel(mach->Samplers[unit],
1611 &r[0], &r[1], &r[2], lodBias, /* inputs */
1612 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1613 break;
1614
1615 case TGSI_TEXTURE_3D:
1616 case TGSI_TEXTURE_CUBE:
1617
1618 FETCH(&r[0], 0, CHAN_X);
1619 FETCH(&r[1], 0, CHAN_Y);
1620 FETCH(&r[2], 0, CHAN_Z);
1621
1622 if (projected) {
1623 FETCH(&r[3], 0, CHAN_W);
1624 micro_div( &r[0], &r[0], &r[3] );
1625 micro_div( &r[1], &r[1], &r[3] );
1626 micro_div( &r[2], &r[2], &r[3] );
1627 }
1628
1629 if (biasLod) {
1630 FETCH(&r[3], 0, CHAN_W);
1631 lodBias = r[3].f[0];
1632 }
1633 else
1634 lodBias = 0.0;
1635
1636 fetch_texel(mach->Samplers[unit],
1637 &r[0], &r[1], &r[2], lodBias,
1638 &r[0], &r[1], &r[2], &r[3]);
1639 break;
1640
1641 default:
1642 assert (0);
1643 }
1644
1645 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1646 STORE( &r[chan_index], 0, chan_index );
1647 }
1648 }
1649
1650
1651 /**
1652 * Evaluate a constant-valued coefficient at the position of the
1653 * current quad.
1654 */
1655 static void
1656 eval_constant_coef(
1657 struct tgsi_exec_machine *mach,
1658 unsigned attrib,
1659 unsigned chan )
1660 {
1661 unsigned i;
1662
1663 for( i = 0; i < QUAD_SIZE; i++ ) {
1664 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1665 }
1666 }
1667
1668 /**
1669 * Evaluate a linear-valued coefficient at the position of the
1670 * current quad.
1671 */
1672 static void
1673 eval_linear_coef(
1674 struct tgsi_exec_machine *mach,
1675 unsigned attrib,
1676 unsigned chan )
1677 {
1678 const float x = mach->QuadPos.xyzw[0].f[0];
1679 const float y = mach->QuadPos.xyzw[1].f[0];
1680 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1681 const float dady = mach->InterpCoefs[attrib].dady[chan];
1682 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1683 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1684 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1685 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1686 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1687 }
1688
1689 /**
1690 * Evaluate a perspective-valued coefficient at the position of the
1691 * current quad.
1692 */
1693 static void
1694 eval_perspective_coef(
1695 struct tgsi_exec_machine *mach,
1696 unsigned attrib,
1697 unsigned chan )
1698 {
1699 const float x = mach->QuadPos.xyzw[0].f[0];
1700 const float y = mach->QuadPos.xyzw[1].f[0];
1701 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1702 const float dady = mach->InterpCoefs[attrib].dady[chan];
1703 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1704 const float *w = mach->QuadPos.xyzw[3].f;
1705 /* divide by W here */
1706 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1707 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1708 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1709 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1710 }
1711
1712
1713 typedef void (* eval_coef_func)(
1714 struct tgsi_exec_machine *mach,
1715 unsigned attrib,
1716 unsigned chan );
1717
1718 static void
1719 exec_declaration(
1720 struct tgsi_exec_machine *mach,
1721 const struct tgsi_full_declaration *decl )
1722 {
1723 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1724 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1725 unsigned first, last, mask;
1726 eval_coef_func eval;
1727
1728 first = decl->DeclarationRange.First;
1729 last = decl->DeclarationRange.Last;
1730 mask = decl->Declaration.UsageMask;
1731
1732 switch( decl->Declaration.Interpolate ) {
1733 case TGSI_INTERPOLATE_CONSTANT:
1734 eval = eval_constant_coef;
1735 break;
1736
1737 case TGSI_INTERPOLATE_LINEAR:
1738 eval = eval_linear_coef;
1739 break;
1740
1741 case TGSI_INTERPOLATE_PERSPECTIVE:
1742 eval = eval_perspective_coef;
1743 break;
1744
1745 default:
1746 eval = NULL;
1747 assert( 0 );
1748 }
1749
1750 if( mask == TGSI_WRITEMASK_XYZW ) {
1751 unsigned i, j;
1752
1753 for( i = first; i <= last; i++ ) {
1754 for( j = 0; j < NUM_CHANNELS; j++ ) {
1755 eval( mach, i, j );
1756 }
1757 }
1758 }
1759 else {
1760 unsigned i, j;
1761
1762 for( j = 0; j < NUM_CHANNELS; j++ ) {
1763 if( mask & (1 << j) ) {
1764 for( i = first; i <= last; i++ ) {
1765 eval( mach, i, j );
1766 }
1767 }
1768 }
1769 }
1770 }
1771 }
1772 }
1773
1774 static void
1775 exec_instruction(
1776 struct tgsi_exec_machine *mach,
1777 const struct tgsi_full_instruction *inst,
1778 int *pc )
1779 {
1780 uint chan_index;
1781 union tgsi_exec_channel r[8];
1782
1783 (*pc)++;
1784
1785 switch (inst->Instruction.Opcode) {
1786 case TGSI_OPCODE_ARL:
1787 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1788 FETCH( &r[0], 0, chan_index );
1789 micro_trunc( &r[0], &r[0] );
1790 STORE( &r[0], 0, chan_index );
1791 }
1792 break;
1793
1794 case TGSI_OPCODE_MOV:
1795 case TGSI_OPCODE_SWZ:
1796 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1797 FETCH( &r[0], 0, chan_index );
1798 STORE( &r[0], 0, chan_index );
1799 }
1800 break;
1801
1802 case TGSI_OPCODE_LIT:
1803 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1804 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1805 }
1806
1807 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1808 FETCH( &r[0], 0, CHAN_X );
1809 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1810 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1811 STORE( &r[0], 0, CHAN_Y );
1812 }
1813
1814 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1815 FETCH( &r[1], 0, CHAN_Y );
1816 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1817
1818 FETCH( &r[2], 0, CHAN_W );
1819 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1820 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1821 micro_pow( &r[1], &r[1], &r[2] );
1822 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1823 STORE( &r[0], 0, CHAN_Z );
1824 }
1825 }
1826
1827 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1828 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1829 }
1830 break;
1831
1832 case TGSI_OPCODE_RCP:
1833 /* TGSI_OPCODE_RECIP */
1834 FETCH( &r[0], 0, CHAN_X );
1835 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1836 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1837 STORE( &r[0], 0, chan_index );
1838 }
1839 break;
1840
1841 case TGSI_OPCODE_RSQ:
1842 /* TGSI_OPCODE_RECIPSQRT */
1843 FETCH( &r[0], 0, CHAN_X );
1844 micro_sqrt( &r[0], &r[0] );
1845 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1846 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1847 STORE( &r[0], 0, chan_index );
1848 }
1849 break;
1850
1851 case TGSI_OPCODE_EXP:
1852 FETCH( &r[0], 0, CHAN_X );
1853 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1854 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1855 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1856 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1857 }
1858 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1859 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1860 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1861 }
1862 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1863 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1864 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1865 }
1866 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1867 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1868 }
1869 break;
1870
1871 case TGSI_OPCODE_LOG:
1872 FETCH( &r[0], 0, CHAN_X );
1873 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1874 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1875 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1876 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1877 STORE( &r[0], 0, CHAN_X );
1878 }
1879 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1880 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1881 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1882 STORE( &r[0], 0, CHAN_Y );
1883 }
1884 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1885 STORE( &r[1], 0, CHAN_Z );
1886 }
1887 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1888 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1889 }
1890 break;
1891
1892 case TGSI_OPCODE_MUL:
1893 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1894 {
1895 FETCH(&r[0], 0, chan_index);
1896 FETCH(&r[1], 1, chan_index);
1897
1898 micro_mul( &r[0], &r[0], &r[1] );
1899
1900 STORE(&r[0], 0, chan_index);
1901 }
1902 break;
1903
1904 case TGSI_OPCODE_ADD:
1905 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1906 FETCH( &r[0], 0, chan_index );
1907 FETCH( &r[1], 1, chan_index );
1908 micro_add( &r[0], &r[0], &r[1] );
1909 STORE( &r[0], 0, chan_index );
1910 }
1911 break;
1912
1913 case TGSI_OPCODE_DP3:
1914 /* TGSI_OPCODE_DOT3 */
1915 FETCH( &r[0], 0, CHAN_X );
1916 FETCH( &r[1], 1, CHAN_X );
1917 micro_mul( &r[0], &r[0], &r[1] );
1918
1919 FETCH( &r[1], 0, CHAN_Y );
1920 FETCH( &r[2], 1, CHAN_Y );
1921 micro_mul( &r[1], &r[1], &r[2] );
1922 micro_add( &r[0], &r[0], &r[1] );
1923
1924 FETCH( &r[1], 0, CHAN_Z );
1925 FETCH( &r[2], 1, CHAN_Z );
1926 micro_mul( &r[1], &r[1], &r[2] );
1927 micro_add( &r[0], &r[0], &r[1] );
1928
1929 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1930 STORE( &r[0], 0, chan_index );
1931 }
1932 break;
1933
1934 case TGSI_OPCODE_DP4:
1935 /* TGSI_OPCODE_DOT4 */
1936 FETCH(&r[0], 0, CHAN_X);
1937 FETCH(&r[1], 1, CHAN_X);
1938
1939 micro_mul( &r[0], &r[0], &r[1] );
1940
1941 FETCH(&r[1], 0, CHAN_Y);
1942 FETCH(&r[2], 1, CHAN_Y);
1943
1944 micro_mul( &r[1], &r[1], &r[2] );
1945 micro_add( &r[0], &r[0], &r[1] );
1946
1947 FETCH(&r[1], 0, CHAN_Z);
1948 FETCH(&r[2], 1, CHAN_Z);
1949
1950 micro_mul( &r[1], &r[1], &r[2] );
1951 micro_add( &r[0], &r[0], &r[1] );
1952
1953 FETCH(&r[1], 0, CHAN_W);
1954 FETCH(&r[2], 1, CHAN_W);
1955
1956 micro_mul( &r[1], &r[1], &r[2] );
1957 micro_add( &r[0], &r[0], &r[1] );
1958
1959 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1960 STORE( &r[0], 0, chan_index );
1961 }
1962 break;
1963
1964 case TGSI_OPCODE_DST:
1965 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1966 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1967 }
1968
1969 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1970 FETCH( &r[0], 0, CHAN_Y );
1971 FETCH( &r[1], 1, CHAN_Y);
1972 micro_mul( &r[0], &r[0], &r[1] );
1973 STORE( &r[0], 0, CHAN_Y );
1974 }
1975
1976 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1977 FETCH( &r[0], 0, CHAN_Z );
1978 STORE( &r[0], 0, CHAN_Z );
1979 }
1980
1981 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1982 FETCH( &r[0], 1, CHAN_W );
1983 STORE( &r[0], 0, CHAN_W );
1984 }
1985 break;
1986
1987 case TGSI_OPCODE_MIN:
1988 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1989 FETCH(&r[0], 0, chan_index);
1990 FETCH(&r[1], 1, chan_index);
1991
1992 /* XXX use micro_min()?? */
1993 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1994
1995 STORE(&r[0], 0, chan_index);
1996 }
1997 break;
1998
1999 case TGSI_OPCODE_MAX:
2000 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2001 FETCH(&r[0], 0, chan_index);
2002 FETCH(&r[1], 1, chan_index);
2003
2004 /* XXX use micro_max()?? */
2005 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2006
2007 STORE(&r[0], 0, chan_index );
2008 }
2009 break;
2010
2011 case TGSI_OPCODE_SLT:
2012 /* TGSI_OPCODE_SETLT */
2013 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2014 FETCH( &r[0], 0, chan_index );
2015 FETCH( &r[1], 1, chan_index );
2016 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2017 STORE( &r[0], 0, chan_index );
2018 }
2019 break;
2020
2021 case TGSI_OPCODE_SGE:
2022 /* TGSI_OPCODE_SETGE */
2023 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2024 FETCH( &r[0], 0, chan_index );
2025 FETCH( &r[1], 1, chan_index );
2026 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2027 STORE( &r[0], 0, chan_index );
2028 }
2029 break;
2030
2031 case TGSI_OPCODE_MAD:
2032 /* TGSI_OPCODE_MADD */
2033 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2034 FETCH( &r[0], 0, chan_index );
2035 FETCH( &r[1], 1, chan_index );
2036 micro_mul( &r[0], &r[0], &r[1] );
2037 FETCH( &r[1], 2, chan_index );
2038 micro_add( &r[0], &r[0], &r[1] );
2039 STORE( &r[0], 0, chan_index );
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_SUB:
2044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2045 FETCH(&r[0], 0, chan_index);
2046 FETCH(&r[1], 1, chan_index);
2047
2048 micro_sub( &r[0], &r[0], &r[1] );
2049
2050 STORE(&r[0], 0, chan_index);
2051 }
2052 break;
2053
2054 case TGSI_OPCODE_LERP:
2055 /* TGSI_OPCODE_LRP */
2056 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2057 FETCH(&r[0], 0, chan_index);
2058 FETCH(&r[1], 1, chan_index);
2059 FETCH(&r[2], 2, chan_index);
2060
2061 micro_sub( &r[1], &r[1], &r[2] );
2062 micro_mul( &r[0], &r[0], &r[1] );
2063 micro_add( &r[0], &r[0], &r[2] );
2064
2065 STORE(&r[0], 0, chan_index);
2066 }
2067 break;
2068
2069 case TGSI_OPCODE_CND:
2070 assert (0);
2071 break;
2072
2073 case TGSI_OPCODE_CND0:
2074 assert (0);
2075 break;
2076
2077 case TGSI_OPCODE_DOT2ADD:
2078 /* TGSI_OPCODE_DP2A */
2079 FETCH( &r[0], 0, CHAN_X );
2080 FETCH( &r[1], 1, CHAN_X );
2081 micro_mul( &r[0], &r[0], &r[1] );
2082
2083 FETCH( &r[1], 0, CHAN_Y );
2084 FETCH( &r[2], 1, CHAN_Y );
2085 micro_mul( &r[1], &r[1], &r[2] );
2086 micro_add( &r[0], &r[0], &r[1] );
2087
2088 FETCH( &r[2], 2, CHAN_X );
2089 micro_add( &r[0], &r[0], &r[2] );
2090
2091 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2092 STORE( &r[0], 0, chan_index );
2093 }
2094 break;
2095
2096 case TGSI_OPCODE_INDEX:
2097 assert (0);
2098 break;
2099
2100 case TGSI_OPCODE_NEGATE:
2101 assert (0);
2102 break;
2103
2104 case TGSI_OPCODE_FRAC:
2105 /* TGSI_OPCODE_FRC */
2106 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2107 FETCH( &r[0], 0, chan_index );
2108 micro_frc( &r[0], &r[0] );
2109 STORE( &r[0], 0, chan_index );
2110 }
2111 break;
2112
2113 case TGSI_OPCODE_CLAMP:
2114 assert (0);
2115 break;
2116
2117 case TGSI_OPCODE_FLOOR:
2118 /* TGSI_OPCODE_FLR */
2119 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2120 FETCH( &r[0], 0, chan_index );
2121 micro_flr( &r[0], &r[0] );
2122 STORE( &r[0], 0, chan_index );
2123 }
2124 break;
2125
2126 case TGSI_OPCODE_ROUND:
2127 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2128 FETCH( &r[0], 0, chan_index );
2129 micro_rnd( &r[0], &r[0] );
2130 STORE( &r[0], 0, chan_index );
2131 }
2132 break;
2133
2134 case TGSI_OPCODE_EXPBASE2:
2135 /* TGSI_OPCODE_EX2 */
2136 FETCH(&r[0], 0, CHAN_X);
2137
2138 #if FAST_MATH
2139 micro_exp2( &r[0], &r[0] );
2140 #else
2141 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2142 #endif
2143
2144 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2145 STORE( &r[0], 0, chan_index );
2146 }
2147 break;
2148
2149 case TGSI_OPCODE_LOGBASE2:
2150 /* TGSI_OPCODE_LG2 */
2151 FETCH( &r[0], 0, CHAN_X );
2152 micro_lg2( &r[0], &r[0] );
2153 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2154 STORE( &r[0], 0, chan_index );
2155 }
2156 break;
2157
2158 case TGSI_OPCODE_POWER:
2159 /* TGSI_OPCODE_POW */
2160 FETCH(&r[0], 0, CHAN_X);
2161 FETCH(&r[1], 1, CHAN_X);
2162
2163 micro_pow( &r[0], &r[0], &r[1] );
2164
2165 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2166 STORE( &r[0], 0, chan_index );
2167 }
2168 break;
2169
2170 case TGSI_OPCODE_CROSSPRODUCT:
2171 /* TGSI_OPCODE_XPD */
2172 FETCH(&r[0], 0, CHAN_Y);
2173 FETCH(&r[1], 1, CHAN_Z);
2174
2175 micro_mul( &r[2], &r[0], &r[1] );
2176
2177 FETCH(&r[3], 0, CHAN_Z);
2178 FETCH(&r[4], 1, CHAN_Y);
2179
2180 micro_mul( &r[5], &r[3], &r[4] );
2181 micro_sub( &r[2], &r[2], &r[5] );
2182
2183 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2184 STORE( &r[2], 0, CHAN_X );
2185 }
2186
2187 FETCH(&r[2], 1, CHAN_X);
2188
2189 micro_mul( &r[3], &r[3], &r[2] );
2190
2191 FETCH(&r[5], 0, CHAN_X);
2192
2193 micro_mul( &r[1], &r[1], &r[5] );
2194 micro_sub( &r[3], &r[3], &r[1] );
2195
2196 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2197 STORE( &r[3], 0, CHAN_Y );
2198 }
2199
2200 micro_mul( &r[5], &r[5], &r[4] );
2201 micro_mul( &r[0], &r[0], &r[2] );
2202 micro_sub( &r[5], &r[5], &r[0] );
2203
2204 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2205 STORE( &r[5], 0, CHAN_Z );
2206 }
2207
2208 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2209 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2210 }
2211 break;
2212
2213 case TGSI_OPCODE_MULTIPLYMATRIX:
2214 assert (0);
2215 break;
2216
2217 case TGSI_OPCODE_ABS:
2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2219 FETCH(&r[0], 0, chan_index);
2220
2221 micro_abs( &r[0], &r[0] );
2222
2223 STORE(&r[0], 0, chan_index);
2224 }
2225 break;
2226
2227 case TGSI_OPCODE_RCC:
2228 assert (0);
2229 break;
2230
2231 case TGSI_OPCODE_DPH:
2232 FETCH(&r[0], 0, CHAN_X);
2233 FETCH(&r[1], 1, CHAN_X);
2234
2235 micro_mul( &r[0], &r[0], &r[1] );
2236
2237 FETCH(&r[1], 0, CHAN_Y);
2238 FETCH(&r[2], 1, CHAN_Y);
2239
2240 micro_mul( &r[1], &r[1], &r[2] );
2241 micro_add( &r[0], &r[0], &r[1] );
2242
2243 FETCH(&r[1], 0, CHAN_Z);
2244 FETCH(&r[2], 1, CHAN_Z);
2245
2246 micro_mul( &r[1], &r[1], &r[2] );
2247 micro_add( &r[0], &r[0], &r[1] );
2248
2249 FETCH(&r[1], 1, CHAN_W);
2250
2251 micro_add( &r[0], &r[0], &r[1] );
2252
2253 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2254 STORE( &r[0], 0, chan_index );
2255 }
2256 break;
2257
2258 case TGSI_OPCODE_COS:
2259 FETCH(&r[0], 0, CHAN_X);
2260
2261 micro_cos( &r[0], &r[0] );
2262
2263 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2264 STORE( &r[0], 0, chan_index );
2265 }
2266 break;
2267
2268 case TGSI_OPCODE_DDX:
2269 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2270 FETCH( &r[0], 0, chan_index );
2271 micro_ddx( &r[0], &r[0] );
2272 STORE( &r[0], 0, chan_index );
2273 }
2274 break;
2275
2276 case TGSI_OPCODE_DDY:
2277 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2278 FETCH( &r[0], 0, chan_index );
2279 micro_ddy( &r[0], &r[0] );
2280 STORE( &r[0], 0, chan_index );
2281 }
2282 break;
2283
2284 case TGSI_OPCODE_KILP:
2285 exec_kilp (mach, inst);
2286 break;
2287
2288 case TGSI_OPCODE_KIL:
2289 exec_kil (mach, inst);
2290 break;
2291
2292 case TGSI_OPCODE_PK2H:
2293 assert (0);
2294 break;
2295
2296 case TGSI_OPCODE_PK2US:
2297 assert (0);
2298 break;
2299
2300 case TGSI_OPCODE_PK4B:
2301 assert (0);
2302 break;
2303
2304 case TGSI_OPCODE_PK4UB:
2305 assert (0);
2306 break;
2307
2308 case TGSI_OPCODE_RFL:
2309 assert (0);
2310 break;
2311
2312 case TGSI_OPCODE_SEQ:
2313 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2314 FETCH( &r[0], 0, chan_index );
2315 FETCH( &r[1], 1, chan_index );
2316 micro_eq( &r[0], &r[0], &r[1],
2317 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2318 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2319 STORE( &r[0], 0, chan_index );
2320 }
2321 break;
2322
2323 case TGSI_OPCODE_SFL:
2324 assert (0);
2325 break;
2326
2327 case TGSI_OPCODE_SGT:
2328 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2329 FETCH( &r[0], 0, chan_index );
2330 FETCH( &r[1], 1, chan_index );
2331 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2332 STORE( &r[0], 0, chan_index );
2333 }
2334 break;
2335
2336 case TGSI_OPCODE_SIN:
2337 FETCH( &r[0], 0, CHAN_X );
2338 micro_sin( &r[0], &r[0] );
2339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2340 STORE( &r[0], 0, chan_index );
2341 }
2342 break;
2343
2344 case TGSI_OPCODE_SLE:
2345 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2346 FETCH( &r[0], 0, chan_index );
2347 FETCH( &r[1], 1, chan_index );
2348 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2349 STORE( &r[0], 0, chan_index );
2350 }
2351 break;
2352
2353 case TGSI_OPCODE_SNE:
2354 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2355 FETCH( &r[0], 0, chan_index );
2356 FETCH( &r[1], 1, chan_index );
2357 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2358 STORE( &r[0], 0, chan_index );
2359 }
2360 break;
2361
2362 case TGSI_OPCODE_STR:
2363 assert (0);
2364 break;
2365
2366 case TGSI_OPCODE_TEX:
2367 /* simple texture lookup */
2368 /* src[0] = texcoord */
2369 /* src[1] = sampler unit */
2370 exec_tex(mach, inst, FALSE, FALSE);
2371 break;
2372
2373 case TGSI_OPCODE_TXB:
2374 /* Texture lookup with lod bias */
2375 /* src[0] = texcoord (src[0].w = LOD bias) */
2376 /* src[1] = sampler unit */
2377 exec_tex(mach, inst, TRUE, FALSE);
2378 break;
2379
2380 case TGSI_OPCODE_TXD:
2381 /* Texture lookup with explict partial derivatives */
2382 /* src[0] = texcoord */
2383 /* src[1] = d[strq]/dx */
2384 /* src[2] = d[strq]/dy */
2385 /* src[3] = sampler unit */
2386 assert (0);
2387 break;
2388
2389 case TGSI_OPCODE_TXL:
2390 /* Texture lookup with explit LOD */
2391 /* src[0] = texcoord (src[0].w = LOD) */
2392 /* src[1] = sampler unit */
2393 exec_tex(mach, inst, TRUE, FALSE);
2394 break;
2395
2396 case TGSI_OPCODE_TXP:
2397 /* Texture lookup with projection */
2398 /* src[0] = texcoord (src[0].w = projection) */
2399 /* src[1] = sampler unit */
2400 exec_tex(mach, inst, FALSE, TRUE);
2401 break;
2402
2403 case TGSI_OPCODE_UP2H:
2404 assert (0);
2405 break;
2406
2407 case TGSI_OPCODE_UP2US:
2408 assert (0);
2409 break;
2410
2411 case TGSI_OPCODE_UP4B:
2412 assert (0);
2413 break;
2414
2415 case TGSI_OPCODE_UP4UB:
2416 assert (0);
2417 break;
2418
2419 case TGSI_OPCODE_X2D:
2420 assert (0);
2421 break;
2422
2423 case TGSI_OPCODE_ARA:
2424 assert (0);
2425 break;
2426
2427 case TGSI_OPCODE_ARR:
2428 assert (0);
2429 break;
2430
2431 case TGSI_OPCODE_BRA:
2432 assert (0);
2433 break;
2434
2435 case TGSI_OPCODE_CAL:
2436 /* skip the call if no execution channels are enabled */
2437 if (mach->ExecMask) {
2438 /* do the call */
2439
2440 /* push the Cond, Loop, Cont stacks */
2441 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2442 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2443 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2444 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2445 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2446 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2447
2448 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2449 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2450
2451 /* note that PC was already incremented above */
2452 mach->CallStack[mach->CallStackTop++] = *pc;
2453 *pc = inst->InstructionExtLabel.Label;
2454 }
2455 break;
2456
2457 case TGSI_OPCODE_RET:
2458 mach->FuncMask &= ~mach->ExecMask;
2459 UPDATE_EXEC_MASK(mach);
2460
2461 if (mach->FuncMask == 0x0) {
2462 /* really return now (otherwise, keep executing */
2463
2464 if (mach->CallStackTop == 0) {
2465 /* returning from main() */
2466 *pc = -1;
2467 return;
2468 }
2469 *pc = mach->CallStack[--mach->CallStackTop];
2470
2471 /* pop the Cond, Loop, Cont stacks */
2472 assert(mach->CondStackTop > 0);
2473 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2474 assert(mach->LoopStackTop > 0);
2475 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2476 assert(mach->ContStackTop > 0);
2477 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2478 assert(mach->FuncStackTop > 0);
2479 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2480
2481 UPDATE_EXEC_MASK(mach);
2482 }
2483 break;
2484
2485 case TGSI_OPCODE_SSG:
2486 assert (0);
2487 break;
2488
2489 case TGSI_OPCODE_CMP:
2490 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2491 FETCH(&r[0], 0, chan_index);
2492 FETCH(&r[1], 1, chan_index);
2493 FETCH(&r[2], 2, chan_index);
2494
2495 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2496
2497 STORE(&r[0], 0, chan_index);
2498 }
2499 break;
2500
2501 case TGSI_OPCODE_SCS:
2502 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2503 FETCH( &r[0], 0, CHAN_X );
2504 }
2505 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2506 micro_cos( &r[1], &r[0] );
2507 STORE( &r[1], 0, CHAN_X );
2508 }
2509 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2510 micro_sin( &r[1], &r[0] );
2511 STORE( &r[1], 0, CHAN_Y );
2512 }
2513 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2514 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2515 }
2516 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2517 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2518 }
2519 break;
2520
2521 case TGSI_OPCODE_NRM:
2522 /* 3-component vector normalize */
2523 {
2524 union tgsi_exec_channel tmp, dot;
2525
2526 /* tmp = dp3(src0, src0): */
2527 FETCH( &r[0], 0, CHAN_X );
2528 micro_mul( &tmp, &r[0], &r[0] );
2529
2530 FETCH( &r[1], 0, CHAN_Y );
2531 micro_mul( &dot, &r[1], &r[1] );
2532 micro_add( &tmp, &tmp, &dot );
2533
2534 FETCH( &r[2], 0, CHAN_Z );
2535 micro_mul( &dot, &r[2], &r[2] );
2536 micro_add( &tmp, &tmp, &dot );
2537
2538 /* tmp = 1 / sqrt(tmp) */
2539 micro_sqrt( &tmp, &tmp );
2540 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2541
2542 /* note: w channel is undefined */
2543 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2544 /* chan = chan * tmp */
2545 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2546 STORE( &r[chan_index], 0, chan_index );
2547 }
2548 }
2549 break;
2550
2551 case TGSI_OPCODE_NRM4:
2552 /* 4-component vector normalize */
2553 {
2554 union tgsi_exec_channel tmp, dot;
2555
2556 /* tmp = dp4(src0, src0): */
2557 FETCH( &r[0], 0, CHAN_X );
2558 micro_mul( &tmp, &r[0], &r[0] );
2559
2560 FETCH( &r[1], 0, CHAN_Y );
2561 micro_mul( &dot, &r[1], &r[1] );
2562 micro_add( &tmp, &tmp, &dot );
2563
2564 FETCH( &r[2], 0, CHAN_Z );
2565 micro_mul( &dot, &r[2], &r[2] );
2566 micro_add( &tmp, &tmp, &dot );
2567
2568 FETCH( &r[3], 0, CHAN_W );
2569 micro_mul( &dot, &r[3], &r[3] );
2570 micro_add( &tmp, &tmp, &dot );
2571
2572 /* tmp = 1 / sqrt(tmp) */
2573 micro_sqrt( &tmp, &tmp );
2574 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2575
2576 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2577 /* chan = chan * tmp */
2578 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2579 STORE( &r[chan_index], 0, chan_index );
2580 }
2581 }
2582 break;
2583
2584 case TGSI_OPCODE_DIV:
2585 assert( 0 );
2586 break;
2587
2588 case TGSI_OPCODE_DP2:
2589 FETCH( &r[0], 0, CHAN_X );
2590 FETCH( &r[1], 1, CHAN_X );
2591 micro_mul( &r[0], &r[0], &r[1] );
2592
2593 FETCH( &r[1], 0, CHAN_Y );
2594 FETCH( &r[2], 1, CHAN_Y );
2595 micro_mul( &r[1], &r[1], &r[2] );
2596 micro_add( &r[0], &r[0], &r[1] );
2597
2598 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2599 STORE( &r[0], 0, chan_index );
2600 }
2601 break;
2602
2603 case TGSI_OPCODE_IF:
2604 /* push CondMask */
2605 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2606 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2607 FETCH( &r[0], 0, CHAN_X );
2608 /* update CondMask */
2609 if( ! r[0].u[0] ) {
2610 mach->CondMask &= ~0x1;
2611 }
2612 if( ! r[0].u[1] ) {
2613 mach->CondMask &= ~0x2;
2614 }
2615 if( ! r[0].u[2] ) {
2616 mach->CondMask &= ~0x4;
2617 }
2618 if( ! r[0].u[3] ) {
2619 mach->CondMask &= ~0x8;
2620 }
2621 UPDATE_EXEC_MASK(mach);
2622 /* Todo: If CondMask==0, jump to ELSE */
2623 break;
2624
2625 case TGSI_OPCODE_ELSE:
2626 /* invert CondMask wrt previous mask */
2627 {
2628 uint prevMask;
2629 assert(mach->CondStackTop > 0);
2630 prevMask = mach->CondStack[mach->CondStackTop - 1];
2631 mach->CondMask = ~mach->CondMask & prevMask;
2632 UPDATE_EXEC_MASK(mach);
2633 /* Todo: If CondMask==0, jump to ENDIF */
2634 }
2635 break;
2636
2637 case TGSI_OPCODE_ENDIF:
2638 /* pop CondMask */
2639 assert(mach->CondStackTop > 0);
2640 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2641 UPDATE_EXEC_MASK(mach);
2642 break;
2643
2644 case TGSI_OPCODE_END:
2645 /* halt execution */
2646 *pc = -1;
2647 break;
2648
2649 case TGSI_OPCODE_REP:
2650 assert (0);
2651 break;
2652
2653 case TGSI_OPCODE_ENDREP:
2654 assert (0);
2655 break;
2656
2657 case TGSI_OPCODE_PUSHA:
2658 assert (0);
2659 break;
2660
2661 case TGSI_OPCODE_POPA:
2662 assert (0);
2663 break;
2664
2665 case TGSI_OPCODE_CEIL:
2666 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2667 FETCH( &r[0], 0, chan_index );
2668 micro_ceil( &r[0], &r[0] );
2669 STORE( &r[0], 0, chan_index );
2670 }
2671 break;
2672
2673 case TGSI_OPCODE_I2F:
2674 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2675 FETCH( &r[0], 0, chan_index );
2676 micro_i2f( &r[0], &r[0] );
2677 STORE( &r[0], 0, chan_index );
2678 }
2679 break;
2680
2681 case TGSI_OPCODE_NOT:
2682 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2683 FETCH( &r[0], 0, chan_index );
2684 micro_not( &r[0], &r[0] );
2685 STORE( &r[0], 0, chan_index );
2686 }
2687 break;
2688
2689 case TGSI_OPCODE_TRUNC:
2690 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2691 FETCH( &r[0], 0, chan_index );
2692 micro_trunc( &r[0], &r[0] );
2693 STORE( &r[0], 0, chan_index );
2694 }
2695 break;
2696
2697 case TGSI_OPCODE_SHL:
2698 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2699 FETCH( &r[0], 0, chan_index );
2700 FETCH( &r[1], 1, chan_index );
2701 micro_shl( &r[0], &r[0], &r[1] );
2702 STORE( &r[0], 0, chan_index );
2703 }
2704 break;
2705
2706 case TGSI_OPCODE_SHR:
2707 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2708 FETCH( &r[0], 0, chan_index );
2709 FETCH( &r[1], 1, chan_index );
2710 micro_ishr( &r[0], &r[0], &r[1] );
2711 STORE( &r[0], 0, chan_index );
2712 }
2713 break;
2714
2715 case TGSI_OPCODE_AND:
2716 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2717 FETCH( &r[0], 0, chan_index );
2718 FETCH( &r[1], 1, chan_index );
2719 micro_and( &r[0], &r[0], &r[1] );
2720 STORE( &r[0], 0, chan_index );
2721 }
2722 break;
2723
2724 case TGSI_OPCODE_OR:
2725 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2726 FETCH( &r[0], 0, chan_index );
2727 FETCH( &r[1], 1, chan_index );
2728 micro_or( &r[0], &r[0], &r[1] );
2729 STORE( &r[0], 0, chan_index );
2730 }
2731 break;
2732
2733 case TGSI_OPCODE_MOD:
2734 assert (0);
2735 break;
2736
2737 case TGSI_OPCODE_XOR:
2738 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2739 FETCH( &r[0], 0, chan_index );
2740 FETCH( &r[1], 1, chan_index );
2741 micro_xor( &r[0], &r[0], &r[1] );
2742 STORE( &r[0], 0, chan_index );
2743 }
2744 break;
2745
2746 case TGSI_OPCODE_SAD:
2747 assert (0);
2748 break;
2749
2750 case TGSI_OPCODE_TXF:
2751 assert (0);
2752 break;
2753
2754 case TGSI_OPCODE_TXQ:
2755 assert (0);
2756 break;
2757
2758 case TGSI_OPCODE_EMIT:
2759 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2760 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2761 break;
2762
2763 case TGSI_OPCODE_ENDPRIM:
2764 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2765 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2766 break;
2767
2768 case TGSI_OPCODE_LOOP:
2769 /* fall-through (for now) */
2770 case TGSI_OPCODE_BGNLOOP2:
2771 /* push LoopMask and ContMasks */
2772 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2773 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2774 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2775 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2776 break;
2777
2778 case TGSI_OPCODE_ENDLOOP:
2779 /* fall-through (for now at least) */
2780 case TGSI_OPCODE_ENDLOOP2:
2781 /* Restore ContMask, but don't pop */
2782 assert(mach->ContStackTop > 0);
2783 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2784 UPDATE_EXEC_MASK(mach);
2785 if (mach->ExecMask) {
2786 /* repeat loop: jump to instruction just past BGNLOOP */
2787 *pc = inst->InstructionExtLabel.Label + 1;
2788 }
2789 else {
2790 /* exit loop: pop LoopMask */
2791 assert(mach->LoopStackTop > 0);
2792 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2793 /* pop ContMask */
2794 assert(mach->ContStackTop > 0);
2795 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2796 }
2797 UPDATE_EXEC_MASK(mach);
2798 break;
2799
2800 case TGSI_OPCODE_BRK:
2801 /* turn off loop channels for each enabled exec channel */
2802 mach->LoopMask &= ~mach->ExecMask;
2803 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2804 UPDATE_EXEC_MASK(mach);
2805 break;
2806
2807 case TGSI_OPCODE_CONT:
2808 /* turn off cont channels for each enabled exec channel */
2809 mach->ContMask &= ~mach->ExecMask;
2810 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2811 UPDATE_EXEC_MASK(mach);
2812 break;
2813
2814 case TGSI_OPCODE_BGNSUB:
2815 /* no-op */
2816 break;
2817
2818 case TGSI_OPCODE_ENDSUB:
2819 /* no-op */
2820 break;
2821
2822 case TGSI_OPCODE_NOISE1:
2823 assert( 0 );
2824 break;
2825
2826 case TGSI_OPCODE_NOISE2:
2827 assert( 0 );
2828 break;
2829
2830 case TGSI_OPCODE_NOISE3:
2831 assert( 0 );
2832 break;
2833
2834 case TGSI_OPCODE_NOISE4:
2835 assert( 0 );
2836 break;
2837
2838 case TGSI_OPCODE_NOP:
2839 break;
2840
2841 default:
2842 assert( 0 );
2843 }
2844 }
2845
2846
2847 /**
2848 * Run TGSI interpreter.
2849 * \return bitmask of "alive" quad components
2850 */
2851 uint
2852 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2853 {
2854 uint i;
2855 int pc = 0;
2856
2857 mach->CondMask = 0xf;
2858 mach->LoopMask = 0xf;
2859 mach->ContMask = 0xf;
2860 mach->FuncMask = 0xf;
2861 mach->ExecMask = 0xf;
2862
2863 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2864 assert(mach->CondStackTop == 0);
2865 assert(mach->LoopStackTop == 0);
2866 assert(mach->ContStackTop == 0);
2867 assert(mach->CallStackTop == 0);
2868
2869 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2870 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2871
2872 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2873 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2874 mach->Primitives[0] = 0;
2875 }
2876
2877 for (i = 0; i < QUAD_SIZE; i++) {
2878 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2879 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2880 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2881 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2882 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2883 }
2884
2885 /* execute declarations (interpolants) */
2886 for (i = 0; i < mach->NumDeclarations; i++) {
2887 exec_declaration( mach, mach->Declarations+i );
2888 }
2889
2890 /* execute instructions, until pc is set to -1 */
2891 while (pc != -1) {
2892 assert(pc < (int) mach->NumInstructions);
2893 exec_instruction( mach, mach->Instructions + pc, &pc );
2894 }
2895
2896 #if 0
2897 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2898 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2899 /*
2900 * Scale back depth component.
2901 */
2902 for (i = 0; i < 4; i++)
2903 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2904 }
2905 #endif
2906
2907 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2908 }
2909
2910