Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler *samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2it(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->i[0] = (int) src->f[0];
475 dst->i[1] = (int) src->f[1];
476 dst->i[2] = (int) src->f[2];
477 dst->i[3] = (int) src->f[3];
478 }
479
480 static void
481 micro_f2ut(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->u[0] = (uint) src->f[0];
486 dst->u[1] = (uint) src->f[1];
487 dst->u[2] = (uint) src->f[2];
488 dst->u[3] = (uint) src->f[3];
489 }
490
491 static void
492 micro_flr(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = floorf( src->f[0] );
497 dst->f[1] = floorf( src->f[1] );
498 dst->f[2] = floorf( src->f[2] );
499 dst->f[3] = floorf( src->f[3] );
500 }
501
502 static void
503 micro_frc(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src )
506 {
507 dst->f[0] = src->f[0] - floorf( src->f[0] );
508 dst->f[1] = src->f[1] - floorf( src->f[1] );
509 dst->f[2] = src->f[2] - floorf( src->f[2] );
510 dst->f[3] = src->f[3] - floorf( src->f[3] );
511 }
512
513 static void
514 micro_ge(
515 union tgsi_exec_channel *dst,
516 const union tgsi_exec_channel *src0,
517 const union tgsi_exec_channel *src1,
518 const union tgsi_exec_channel *src2,
519 const union tgsi_exec_channel *src3 )
520 {
521 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
522 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
523 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
524 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
525 }
526
527 static void
528 micro_i2f(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 dst->f[0] = (float) src->i[0];
533 dst->f[1] = (float) src->i[1];
534 dst->f[2] = (float) src->i[2];
535 dst->f[3] = (float) src->i[3];
536 }
537
538 static void
539 micro_lg2(
540 union tgsi_exec_channel *dst,
541 const union tgsi_exec_channel *src )
542 {
543 #if FAST_MATH
544 dst->f[0] = util_fast_log2( src->f[0] );
545 dst->f[1] = util_fast_log2( src->f[1] );
546 dst->f[2] = util_fast_log2( src->f[2] );
547 dst->f[3] = util_fast_log2( src->f[3] );
548 #else
549 dst->f[0] = logf( src->f[0] ) * 1.442695f;
550 dst->f[1] = logf( src->f[1] ) * 1.442695f;
551 dst->f[2] = logf( src->f[2] ) * 1.442695f;
552 dst->f[3] = logf( src->f[3] ) * 1.442695f;
553 #endif
554 }
555
556 static void
557 micro_le(
558 union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src0,
560 const union tgsi_exec_channel *src1,
561 const union tgsi_exec_channel *src2,
562 const union tgsi_exec_channel *src3 )
563 {
564 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
565 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
566 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
567 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
568 }
569
570 static void
571 micro_lt(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src0,
574 const union tgsi_exec_channel *src1,
575 const union tgsi_exec_channel *src2,
576 const union tgsi_exec_channel *src3 )
577 {
578 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
579 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
580 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
581 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
582 }
583
584 static void
585 micro_ilt(
586 union tgsi_exec_channel *dst,
587 const union tgsi_exec_channel *src0,
588 const union tgsi_exec_channel *src1,
589 const union tgsi_exec_channel *src2,
590 const union tgsi_exec_channel *src3 )
591 {
592 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
593 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
594 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
595 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
596 }
597
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611
612 static void
613 micro_max(
614 union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src0,
616 const union tgsi_exec_channel *src1 )
617 {
618 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
619 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
620 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
621 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
622 }
623
624 static void
625 micro_imax(
626 union tgsi_exec_channel *dst,
627 const union tgsi_exec_channel *src0,
628 const union tgsi_exec_channel *src1 )
629 {
630 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
631 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
632 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
633 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
634 }
635
636 static void
637 micro_umax(
638 union tgsi_exec_channel *dst,
639 const union tgsi_exec_channel *src0,
640 const union tgsi_exec_channel *src1 )
641 {
642 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
643 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
644 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
645 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
646 }
647
648 static void
649 micro_min(
650 union tgsi_exec_channel *dst,
651 const union tgsi_exec_channel *src0,
652 const union tgsi_exec_channel *src1 )
653 {
654 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
655 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
656 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
657 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
658 }
659
660 static void
661 micro_imin(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src0,
664 const union tgsi_exec_channel *src1 )
665 {
666 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
667 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
668 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
669 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
670 }
671
672 static void
673 micro_umin(
674 union tgsi_exec_channel *dst,
675 const union tgsi_exec_channel *src0,
676 const union tgsi_exec_channel *src1 )
677 {
678 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
679 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
680 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
681 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
682 }
683
684 static void
685 micro_umod(
686 union tgsi_exec_channel *dst,
687 const union tgsi_exec_channel *src0,
688 const union tgsi_exec_channel *src1 )
689 {
690 dst->u[0] = src0->u[0] % src1->u[0];
691 dst->u[1] = src0->u[1] % src1->u[1];
692 dst->u[2] = src0->u[2] % src1->u[2];
693 dst->u[3] = src0->u[3] % src1->u[3];
694 }
695
696 static void
697 micro_mul(
698 union tgsi_exec_channel *dst,
699 const union tgsi_exec_channel *src0,
700 const union tgsi_exec_channel *src1 )
701 {
702 dst->f[0] = src0->f[0] * src1->f[0];
703 dst->f[1] = src0->f[1] * src1->f[1];
704 dst->f[2] = src0->f[2] * src1->f[2];
705 dst->f[3] = src0->f[3] * src1->f[3];
706 }
707
708 static void
709 micro_imul(
710 union tgsi_exec_channel *dst,
711 const union tgsi_exec_channel *src0,
712 const union tgsi_exec_channel *src1 )
713 {
714 dst->i[0] = src0->i[0] * src1->i[0];
715 dst->i[1] = src0->i[1] * src1->i[1];
716 dst->i[2] = src0->i[2] * src1->i[2];
717 dst->i[3] = src0->i[3] * src1->i[3];
718 }
719
720 static void
721 micro_imul64(
722 union tgsi_exec_channel *dst0,
723 union tgsi_exec_channel *dst1,
724 const union tgsi_exec_channel *src0,
725 const union tgsi_exec_channel *src1 )
726 {
727 dst1->i[0] = src0->i[0] * src1->i[0];
728 dst1->i[1] = src0->i[1] * src1->i[1];
729 dst1->i[2] = src0->i[2] * src1->i[2];
730 dst1->i[3] = src0->i[3] * src1->i[3];
731 dst0->i[0] = 0;
732 dst0->i[1] = 0;
733 dst0->i[2] = 0;
734 dst0->i[3] = 0;
735 }
736
737 static void
738 micro_umul64(
739 union tgsi_exec_channel *dst0,
740 union tgsi_exec_channel *dst1,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1 )
743 {
744 dst1->u[0] = src0->u[0] * src1->u[0];
745 dst1->u[1] = src0->u[1] * src1->u[1];
746 dst1->u[2] = src0->u[2] * src1->u[2];
747 dst1->u[3] = src0->u[3] * src1->u[3];
748 dst0->u[0] = 0;
749 dst0->u[1] = 0;
750 dst0->u[2] = 0;
751 dst0->u[3] = 0;
752 }
753
754 static void
755 micro_movc(
756 union tgsi_exec_channel *dst,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1,
759 const union tgsi_exec_channel *src2 )
760 {
761 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
762 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
763 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
764 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
765 }
766
767 static void
768 micro_neg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->f[0] = -src->f[0];
773 dst->f[1] = -src->f[1];
774 dst->f[2] = -src->f[2];
775 dst->f[3] = -src->f[3];
776 }
777
778 static void
779 micro_ineg(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->i[0] = -src->i[0];
784 dst->i[1] = -src->i[1];
785 dst->i[2] = -src->i[2];
786 dst->i[3] = -src->i[3];
787 }
788
789 static void
790 micro_not(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src )
793 {
794 dst->u[0] = ~src->u[0];
795 dst->u[1] = ~src->u[1];
796 dst->u[2] = ~src->u[2];
797 dst->u[3] = ~src->u[3];
798 }
799
800 static void
801 micro_or(
802 union tgsi_exec_channel *dst,
803 const union tgsi_exec_channel *src0,
804 const union tgsi_exec_channel *src1 )
805 {
806 dst->u[0] = src0->u[0] | src1->u[0];
807 dst->u[1] = src0->u[1] | src1->u[1];
808 dst->u[2] = src0->u[2] | src1->u[2];
809 dst->u[3] = src0->u[3] | src1->u[3];
810 }
811
812 static void
813 micro_pow(
814 union tgsi_exec_channel *dst,
815 const union tgsi_exec_channel *src0,
816 const union tgsi_exec_channel *src1 )
817 {
818 #if FAST_MATH
819 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
820 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
821 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
822 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
823 #else
824 dst->f[0] = powf( src0->f[0], src1->f[0] );
825 dst->f[1] = powf( src0->f[1], src1->f[1] );
826 dst->f[2] = powf( src0->f[2], src1->f[2] );
827 dst->f[3] = powf( src0->f[3], src1->f[3] );
828 #endif
829 }
830
831 static void
832 micro_rnd(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = floorf( src->f[0] + 0.5f );
837 dst->f[1] = floorf( src->f[1] + 0.5f );
838 dst->f[2] = floorf( src->f[2] + 0.5f );
839 dst->f[3] = floorf( src->f[3] + 0.5f );
840 }
841
842 static void
843 micro_shl(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src0,
846 const union tgsi_exec_channel *src1 )
847 {
848 dst->i[0] = src0->i[0] << src1->i[0];
849 dst->i[1] = src0->i[1] << src1->i[1];
850 dst->i[2] = src0->i[2] << src1->i[2];
851 dst->i[3] = src0->i[3] << src1->i[3];
852 }
853
854 static void
855 micro_ishr(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->i[0] = src0->i[0] >> src1->i[0];
861 dst->i[1] = src0->i[1] >> src1->i[1];
862 dst->i[2] = src0->i[2] >> src1->i[2];
863 dst->i[3] = src0->i[3] >> src1->i[3];
864 }
865
866 static void
867 micro_trunc(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0 )
870 {
871 dst->f[0] = (float) (int) src0->f[0];
872 dst->f[1] = (float) (int) src0->f[1];
873 dst->f[2] = (float) (int) src0->f[2];
874 dst->f[3] = (float) (int) src0->f[3];
875 }
876
877 static void
878 micro_ushr(
879 union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src0,
881 const union tgsi_exec_channel *src1 )
882 {
883 dst->u[0] = src0->u[0] >> src1->u[0];
884 dst->u[1] = src0->u[1] >> src1->u[1];
885 dst->u[2] = src0->u[2] >> src1->u[2];
886 dst->u[3] = src0->u[3] >> src1->u[3];
887 }
888
889 static void
890 micro_sin(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src )
893 {
894 dst->f[0] = sinf( src->f[0] );
895 dst->f[1] = sinf( src->f[1] );
896 dst->f[2] = sinf( src->f[2] );
897 dst->f[3] = sinf( src->f[3] );
898 }
899
900 static void
901 micro_sqrt( union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src )
903 {
904 dst->f[0] = sqrtf( src->f[0] );
905 dst->f[1] = sqrtf( src->f[1] );
906 dst->f[2] = sqrtf( src->f[2] );
907 dst->f[3] = sqrtf( src->f[3] );
908 }
909
910 static void
911 micro_sub(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst->f[0] = src0->f[0] - src1->f[0];
917 dst->f[1] = src0->f[1] - src1->f[1];
918 dst->f[2] = src0->f[2] - src1->f[2];
919 dst->f[3] = src0->f[3] - src1->f[3];
920 }
921
922 static void
923 micro_u2f(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src )
926 {
927 dst->f[0] = (float) src->u[0];
928 dst->f[1] = (float) src->u[1];
929 dst->f[2] = (float) src->u[2];
930 dst->f[3] = (float) src->u[3];
931 }
932
933 static void
934 micro_xor(
935 union tgsi_exec_channel *dst,
936 const union tgsi_exec_channel *src0,
937 const union tgsi_exec_channel *src1 )
938 {
939 dst->u[0] = src0->u[0] ^ src1->u[0];
940 dst->u[1] = src0->u[1] ^ src1->u[1];
941 dst->u[2] = src0->u[2] ^ src1->u[2];
942 dst->u[3] = src0->u[3] ^ src1->u[3];
943 }
944
945 static void
946 fetch_src_file_channel(
947 const struct tgsi_exec_machine *mach,
948 const uint file,
949 const uint swizzle,
950 const union tgsi_exec_channel *index,
951 union tgsi_exec_channel *chan )
952 {
953 switch( swizzle ) {
954 case TGSI_EXTSWIZZLE_X:
955 case TGSI_EXTSWIZZLE_Y:
956 case TGSI_EXTSWIZZLE_Z:
957 case TGSI_EXTSWIZZLE_W:
958 switch( file ) {
959 case TGSI_FILE_CONSTANT:
960 assert(mach->Consts);
961 chan->f[0] = mach->Consts[index->i[0]][swizzle];
962 chan->f[1] = mach->Consts[index->i[1]][swizzle];
963 chan->f[2] = mach->Consts[index->i[2]][swizzle];
964 chan->f[3] = mach->Consts[index->i[3]][swizzle];
965 break;
966
967 case TGSI_FILE_INPUT:
968 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
969 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
970 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
971 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
972 break;
973
974 case TGSI_FILE_TEMPORARY:
975 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
976 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
977 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
978 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
979 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
980 break;
981
982 case TGSI_FILE_IMMEDIATE:
983 assert( index->i[0] < (int) mach->ImmLimit );
984 chan->f[0] = mach->Imms[index->i[0]][swizzle];
985 assert( index->i[1] < (int) mach->ImmLimit );
986 chan->f[1] = mach->Imms[index->i[1]][swizzle];
987 assert( index->i[2] < (int) mach->ImmLimit );
988 chan->f[2] = mach->Imms[index->i[2]][swizzle];
989 assert( index->i[3] < (int) mach->ImmLimit );
990 chan->f[3] = mach->Imms[index->i[3]][swizzle];
991 break;
992
993 case TGSI_FILE_ADDRESS:
994 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
995 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
996 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
997 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
998 break;
999
1000 case TGSI_FILE_OUTPUT:
1001 /* vertex/fragment output vars can be read too */
1002 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1003 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1004 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1005 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1006 break;
1007
1008 default:
1009 assert( 0 );
1010 }
1011 break;
1012
1013 case TGSI_EXTSWIZZLE_ZERO:
1014 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1015 break;
1016
1017 case TGSI_EXTSWIZZLE_ONE:
1018 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1019 break;
1020
1021 default:
1022 assert( 0 );
1023 }
1024 }
1025
1026 static void
1027 fetch_source(
1028 const struct tgsi_exec_machine *mach,
1029 union tgsi_exec_channel *chan,
1030 const struct tgsi_full_src_register *reg,
1031 const uint chan_index )
1032 {
1033 union tgsi_exec_channel index;
1034 uint swizzle;
1035
1036 index.i[0] =
1037 index.i[1] =
1038 index.i[2] =
1039 index.i[3] = reg->SrcRegister.Index;
1040
1041 if (reg->SrcRegister.Indirect) {
1042 union tgsi_exec_channel index2;
1043 union tgsi_exec_channel indir_index;
1044
1045 index2.i[0] =
1046 index2.i[1] =
1047 index2.i[2] =
1048 index2.i[3] = reg->SrcRegisterInd.Index;
1049
1050 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1051 fetch_src_file_channel(
1052 mach,
1053 reg->SrcRegisterInd.File,
1054 swizzle,
1055 &index2,
1056 &indir_index );
1057
1058 index.i[0] += indir_index.i[0];
1059 index.i[1] += indir_index.i[1];
1060 index.i[2] += indir_index.i[2];
1061 index.i[3] += indir_index.i[3];
1062 }
1063
1064 if( reg->SrcRegister.Dimension ) {
1065 switch( reg->SrcRegister.File ) {
1066 case TGSI_FILE_INPUT:
1067 index.i[0] *= 17;
1068 index.i[1] *= 17;
1069 index.i[2] *= 17;
1070 index.i[3] *= 17;
1071 break;
1072 case TGSI_FILE_CONSTANT:
1073 index.i[0] *= 4096;
1074 index.i[1] *= 4096;
1075 index.i[2] *= 4096;
1076 index.i[3] *= 4096;
1077 break;
1078 default:
1079 assert( 0 );
1080 }
1081
1082 index.i[0] += reg->SrcRegisterDim.Index;
1083 index.i[1] += reg->SrcRegisterDim.Index;
1084 index.i[2] += reg->SrcRegisterDim.Index;
1085 index.i[3] += reg->SrcRegisterDim.Index;
1086
1087 if (reg->SrcRegisterDim.Indirect) {
1088 union tgsi_exec_channel index2;
1089 union tgsi_exec_channel indir_index;
1090
1091 index2.i[0] =
1092 index2.i[1] =
1093 index2.i[2] =
1094 index2.i[3] = reg->SrcRegisterDimInd.Index;
1095
1096 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1097 fetch_src_file_channel(
1098 mach,
1099 reg->SrcRegisterDimInd.File,
1100 swizzle,
1101 &index2,
1102 &indir_index );
1103
1104 index.i[0] += indir_index.i[0];
1105 index.i[1] += indir_index.i[1];
1106 index.i[2] += indir_index.i[2];
1107 index.i[3] += indir_index.i[3];
1108 }
1109 }
1110
1111 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1112 fetch_src_file_channel(
1113 mach,
1114 reg->SrcRegister.File,
1115 swizzle,
1116 &index,
1117 chan );
1118
1119 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1120 case TGSI_UTIL_SIGN_CLEAR:
1121 micro_abs( chan, chan );
1122 break;
1123
1124 case TGSI_UTIL_SIGN_SET:
1125 micro_abs( chan, chan );
1126 micro_neg( chan, chan );
1127 break;
1128
1129 case TGSI_UTIL_SIGN_TOGGLE:
1130 micro_neg( chan, chan );
1131 break;
1132
1133 case TGSI_UTIL_SIGN_KEEP:
1134 break;
1135 }
1136
1137 if (reg->SrcRegisterExtMod.Complement) {
1138 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1139 }
1140 }
1141
1142 static void
1143 store_dest(
1144 struct tgsi_exec_machine *mach,
1145 const union tgsi_exec_channel *chan,
1146 const struct tgsi_full_dst_register *reg,
1147 const struct tgsi_full_instruction *inst,
1148 uint chan_index )
1149 {
1150 uint i;
1151 union tgsi_exec_channel null;
1152 union tgsi_exec_channel *dst;
1153 uint execmask = mach->ExecMask;
1154
1155 switch (reg->DstRegister.File) {
1156 case TGSI_FILE_NULL:
1157 dst = &null;
1158 break;
1159
1160 case TGSI_FILE_OUTPUT:
1161 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1162 + reg->DstRegister.Index].xyzw[chan_index];
1163 break;
1164
1165 case TGSI_FILE_TEMPORARY:
1166 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1167 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1168 break;
1169
1170 case TGSI_FILE_ADDRESS:
1171 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1172 break;
1173
1174 default:
1175 assert( 0 );
1176 return;
1177 }
1178
1179 if (inst->InstructionExtNv.CondFlowEnable) {
1180 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1181 uint swizzle;
1182 uint shift;
1183 uint mask;
1184 uint test;
1185
1186 /* Only CC0 supported.
1187 */
1188 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1189
1190 switch (chan_index) {
1191 case CHAN_X:
1192 swizzle = inst->InstructionExtNv.CondSwizzleX;
1193 break;
1194 case CHAN_Y:
1195 swizzle = inst->InstructionExtNv.CondSwizzleY;
1196 break;
1197 case CHAN_Z:
1198 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1199 break;
1200 case CHAN_W:
1201 swizzle = inst->InstructionExtNv.CondSwizzleW;
1202 break;
1203 default:
1204 assert( 0 );
1205 return;
1206 }
1207
1208 switch (swizzle) {
1209 case TGSI_SWIZZLE_X:
1210 shift = TGSI_EXEC_CC_X_SHIFT;
1211 mask = TGSI_EXEC_CC_X_MASK;
1212 break;
1213 case TGSI_SWIZZLE_Y:
1214 shift = TGSI_EXEC_CC_Y_SHIFT;
1215 mask = TGSI_EXEC_CC_Y_MASK;
1216 break;
1217 case TGSI_SWIZZLE_Z:
1218 shift = TGSI_EXEC_CC_Z_SHIFT;
1219 mask = TGSI_EXEC_CC_Z_MASK;
1220 break;
1221 case TGSI_SWIZZLE_W:
1222 shift = TGSI_EXEC_CC_W_SHIFT;
1223 mask = TGSI_EXEC_CC_W_MASK;
1224 break;
1225 default:
1226 assert( 0 );
1227 return;
1228 }
1229
1230 switch (inst->InstructionExtNv.CondMask) {
1231 case TGSI_CC_GT:
1232 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1233 for (i = 0; i < QUAD_SIZE; i++)
1234 if (cc->u[i] & test)
1235 execmask &= ~(1 << i);
1236 break;
1237
1238 case TGSI_CC_EQ:
1239 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1240 for (i = 0; i < QUAD_SIZE; i++)
1241 if (cc->u[i] & test)
1242 execmask &= ~(1 << i);
1243 break;
1244
1245 case TGSI_CC_LT:
1246 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1247 for (i = 0; i < QUAD_SIZE; i++)
1248 if (cc->u[i] & test)
1249 execmask &= ~(1 << i);
1250 break;
1251
1252 case TGSI_CC_GE:
1253 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1254 for (i = 0; i < QUAD_SIZE; i++)
1255 if (cc->u[i] & test)
1256 execmask &= ~(1 << i);
1257 break;
1258
1259 case TGSI_CC_LE:
1260 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1261 for (i = 0; i < QUAD_SIZE; i++)
1262 if (cc->u[i] & test)
1263 execmask &= ~(1 << i);
1264 break;
1265
1266 case TGSI_CC_NE:
1267 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1268 for (i = 0; i < QUAD_SIZE; i++)
1269 if (cc->u[i] & test)
1270 execmask &= ~(1 << i);
1271 break;
1272
1273 case TGSI_CC_TR:
1274 break;
1275
1276 case TGSI_CC_FL:
1277 for (i = 0; i < QUAD_SIZE; i++)
1278 execmask &= ~(1 << i);
1279 break;
1280
1281 default:
1282 assert( 0 );
1283 return;
1284 }
1285 }
1286
1287 switch (inst->Instruction.Saturate) {
1288 case TGSI_SAT_NONE:
1289 for (i = 0; i < QUAD_SIZE; i++)
1290 if (execmask & (1 << i))
1291 dst->i[i] = chan->i[i];
1292 break;
1293
1294 case TGSI_SAT_ZERO_ONE:
1295 for (i = 0; i < QUAD_SIZE; i++)
1296 if (execmask & (1 << i)) {
1297 if (chan->f[i] < 0.0f)
1298 dst->f[i] = 0.0f;
1299 else if (chan->f[i] > 1.0f)
1300 dst->f[i] = 1.0f;
1301 else
1302 dst->i[i] = chan->i[i];
1303 }
1304 break;
1305
1306 case TGSI_SAT_MINUS_PLUS_ONE:
1307 for (i = 0; i < QUAD_SIZE; i++)
1308 if (execmask & (1 << i)) {
1309 if (chan->f[i] < -1.0f)
1310 dst->f[i] = -1.0f;
1311 else if (chan->f[i] > 1.0f)
1312 dst->f[i] = 1.0f;
1313 else
1314 dst->i[i] = chan->i[i];
1315 }
1316 break;
1317
1318 default:
1319 assert( 0 );
1320 }
1321
1322 if (inst->InstructionExtNv.CondDstUpdate) {
1323 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1324 uint shift;
1325 uint mask;
1326
1327 /* Only CC0 supported.
1328 */
1329 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1330
1331 switch (chan_index) {
1332 case CHAN_X:
1333 shift = TGSI_EXEC_CC_X_SHIFT;
1334 mask = ~TGSI_EXEC_CC_X_MASK;
1335 break;
1336 case CHAN_Y:
1337 shift = TGSI_EXEC_CC_Y_SHIFT;
1338 mask = ~TGSI_EXEC_CC_Y_MASK;
1339 break;
1340 case CHAN_Z:
1341 shift = TGSI_EXEC_CC_Z_SHIFT;
1342 mask = ~TGSI_EXEC_CC_Z_MASK;
1343 break;
1344 case CHAN_W:
1345 shift = TGSI_EXEC_CC_W_SHIFT;
1346 mask = ~TGSI_EXEC_CC_W_MASK;
1347 break;
1348 default:
1349 assert( 0 );
1350 return;
1351 }
1352
1353 for (i = 0; i < QUAD_SIZE; i++)
1354 if (execmask & (1 << i)) {
1355 cc->u[i] &= mask;
1356 if (dst->f[i] < 0.0f)
1357 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1358 else if (dst->f[i] > 0.0f)
1359 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1360 else if (dst->f[i] == 0.0f)
1361 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1362 else
1363 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1364 }
1365 }
1366 }
1367
1368 #define FETCH(VAL,INDEX,CHAN)\
1369 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1370
1371 #define STORE(VAL,INDEX,CHAN)\
1372 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1373
1374
1375 /**
1376 * Execute ARB-style KIL which is predicated by a src register.
1377 * Kill fragment if any of the four values is less than zero.
1378 */
1379 static void
1380 exec_kil(struct tgsi_exec_machine *mach,
1381 const struct tgsi_full_instruction *inst)
1382 {
1383 uint uniquemask;
1384 uint chan_index;
1385 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1386 union tgsi_exec_channel r[1];
1387
1388 /* This mask stores component bits that were already tested. Note that
1389 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1390 * tested. */
1391 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1392
1393 for (chan_index = 0; chan_index < 4; chan_index++)
1394 {
1395 uint swizzle;
1396 uint i;
1397
1398 /* unswizzle channel */
1399 swizzle = tgsi_util_get_full_src_register_extswizzle (
1400 &inst->FullSrcRegisters[0],
1401 chan_index);
1402
1403 /* check if the component has not been already tested */
1404 if (uniquemask & (1 << swizzle))
1405 continue;
1406 uniquemask |= 1 << swizzle;
1407
1408 FETCH(&r[0], 0, chan_index);
1409 for (i = 0; i < 4; i++)
1410 if (r[0].f[i] < 0.0f)
1411 kilmask |= 1 << i;
1412 }
1413
1414 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1415 }
1416
1417 /**
1418 * Execute NVIDIA-style KIL which is predicated by a condition code.
1419 * Kill fragment if the condition code is TRUE.
1420 */
1421 static void
1422 exec_kilp(struct tgsi_exec_machine *mach,
1423 const struct tgsi_full_instruction *inst)
1424 {
1425 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1426
1427 if (inst->InstructionExtNv.CondFlowEnable) {
1428 uint swizzle[4];
1429 uint chan_index;
1430
1431 kilmask = 0x0;
1432
1433 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1434 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1435 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1436 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1437
1438 for (chan_index = 0; chan_index < 4; chan_index++)
1439 {
1440 uint i;
1441
1442 for (i = 0; i < 4; i++) {
1443 /* TODO: evaluate the condition code */
1444 if (0)
1445 kilmask |= 1 << i;
1446 }
1447 }
1448 }
1449 else {
1450 /* "unconditional" kil */
1451 kilmask = mach->ExecMask;
1452 }
1453 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1454 }
1455
1456
1457 /*
1458 * Fetch a texel using STR texture coordinates.
1459 */
1460 static void
1461 fetch_texel( struct tgsi_sampler *sampler,
1462 const union tgsi_exec_channel *s,
1463 const union tgsi_exec_channel *t,
1464 const union tgsi_exec_channel *p,
1465 float lodbias, /* XXX should be float[4] */
1466 union tgsi_exec_channel *r,
1467 union tgsi_exec_channel *g,
1468 union tgsi_exec_channel *b,
1469 union tgsi_exec_channel *a )
1470 {
1471 uint j;
1472 float rgba[NUM_CHANNELS][QUAD_SIZE];
1473
1474 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1475
1476 for (j = 0; j < 4; j++) {
1477 r->f[j] = rgba[0][j];
1478 g->f[j] = rgba[1][j];
1479 b->f[j] = rgba[2][j];
1480 a->f[j] = rgba[3][j];
1481 }
1482 }
1483
1484
1485 static void
1486 exec_tex(struct tgsi_exec_machine *mach,
1487 const struct tgsi_full_instruction *inst,
1488 boolean biasLod,
1489 boolean projected)
1490 {
1491 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1492 union tgsi_exec_channel r[8];
1493 uint chan_index;
1494 float lodBias;
1495
1496 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1497
1498 switch (inst->InstructionExtTexture.Texture) {
1499 case TGSI_TEXTURE_1D:
1500
1501 FETCH(&r[0], 0, CHAN_X);
1502
1503 if (projected) {
1504 FETCH(&r[1], 0, CHAN_W);
1505 micro_div( &r[0], &r[0], &r[1] );
1506 }
1507
1508 if (biasLod) {
1509 FETCH(&r[1], 0, CHAN_W);
1510 lodBias = r[2].f[0];
1511 }
1512 else
1513 lodBias = 0.0;
1514
1515 fetch_texel(&mach->Samplers[unit],
1516 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1517 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1518 break;
1519
1520 case TGSI_TEXTURE_2D:
1521 case TGSI_TEXTURE_RECT:
1522
1523 FETCH(&r[0], 0, CHAN_X);
1524 FETCH(&r[1], 0, CHAN_Y);
1525 FETCH(&r[2], 0, CHAN_Z);
1526
1527 if (projected) {
1528 FETCH(&r[3], 0, CHAN_W);
1529 micro_div( &r[0], &r[0], &r[3] );
1530 micro_div( &r[1], &r[1], &r[3] );
1531 micro_div( &r[2], &r[2], &r[3] );
1532 }
1533
1534 if (biasLod) {
1535 FETCH(&r[3], 0, CHAN_W);
1536 lodBias = r[3].f[0];
1537 }
1538 else
1539 lodBias = 0.0;
1540
1541 fetch_texel(&mach->Samplers[unit],
1542 &r[0], &r[1], &r[2], lodBias, /* inputs */
1543 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1544 break;
1545
1546 case TGSI_TEXTURE_3D:
1547 case TGSI_TEXTURE_CUBE:
1548
1549 FETCH(&r[0], 0, CHAN_X);
1550 FETCH(&r[1], 0, CHAN_Y);
1551 FETCH(&r[2], 0, CHAN_Z);
1552
1553 if (projected) {
1554 FETCH(&r[3], 0, CHAN_W);
1555 micro_div( &r[0], &r[0], &r[3] );
1556 micro_div( &r[1], &r[1], &r[3] );
1557 micro_div( &r[2], &r[2], &r[3] );
1558 }
1559
1560 if (biasLod) {
1561 FETCH(&r[3], 0, CHAN_W);
1562 lodBias = r[3].f[0];
1563 }
1564 else
1565 lodBias = 0.0;
1566
1567 fetch_texel(&mach->Samplers[unit],
1568 &r[0], &r[1], &r[2], lodBias,
1569 &r[0], &r[1], &r[2], &r[3]);
1570 break;
1571
1572 default:
1573 assert (0);
1574 }
1575
1576 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1577 STORE( &r[chan_index], 0, chan_index );
1578 }
1579 }
1580
1581
1582 /**
1583 * Evaluate a constant-valued coefficient at the position of the
1584 * current quad.
1585 */
1586 static void
1587 eval_constant_coef(
1588 struct tgsi_exec_machine *mach,
1589 unsigned attrib,
1590 unsigned chan )
1591 {
1592 unsigned i;
1593
1594 for( i = 0; i < QUAD_SIZE; i++ ) {
1595 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1596 }
1597 }
1598
1599 /**
1600 * Evaluate a linear-valued coefficient at the position of the
1601 * current quad.
1602 */
1603 static void
1604 eval_linear_coef(
1605 struct tgsi_exec_machine *mach,
1606 unsigned attrib,
1607 unsigned chan )
1608 {
1609 const float x = mach->QuadPos.xyzw[0].f[0];
1610 const float y = mach->QuadPos.xyzw[1].f[0];
1611 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1612 const float dady = mach->InterpCoefs[attrib].dady[chan];
1613 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1614 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1615 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1616 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1617 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1618 }
1619
1620 /**
1621 * Evaluate a perspective-valued coefficient at the position of the
1622 * current quad.
1623 */
1624 static void
1625 eval_perspective_coef(
1626 struct tgsi_exec_machine *mach,
1627 unsigned attrib,
1628 unsigned chan )
1629 {
1630 const float x = mach->QuadPos.xyzw[0].f[0];
1631 const float y = mach->QuadPos.xyzw[1].f[0];
1632 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1633 const float dady = mach->InterpCoefs[attrib].dady[chan];
1634 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1635 const float *w = mach->QuadPos.xyzw[3].f;
1636 /* divide by W here */
1637 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1638 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1639 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1640 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1641 }
1642
1643
1644 typedef void (* eval_coef_func)(
1645 struct tgsi_exec_machine *mach,
1646 unsigned attrib,
1647 unsigned chan );
1648
1649 static void
1650 exec_declaration(
1651 struct tgsi_exec_machine *mach,
1652 const struct tgsi_full_declaration *decl )
1653 {
1654 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1655 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1656 unsigned first, last, mask;
1657 eval_coef_func eval;
1658
1659 first = decl->DeclarationRange.First;
1660 last = decl->DeclarationRange.Last;
1661 mask = decl->Declaration.UsageMask;
1662
1663 switch( decl->Declaration.Interpolate ) {
1664 case TGSI_INTERPOLATE_CONSTANT:
1665 eval = eval_constant_coef;
1666 break;
1667
1668 case TGSI_INTERPOLATE_LINEAR:
1669 eval = eval_linear_coef;
1670 break;
1671
1672 case TGSI_INTERPOLATE_PERSPECTIVE:
1673 eval = eval_perspective_coef;
1674 break;
1675
1676 default:
1677 assert( 0 );
1678 }
1679
1680 if( mask == TGSI_WRITEMASK_XYZW ) {
1681 unsigned i, j;
1682
1683 for( i = first; i <= last; i++ ) {
1684 for( j = 0; j < NUM_CHANNELS; j++ ) {
1685 eval( mach, i, j );
1686 }
1687 }
1688 }
1689 else {
1690 unsigned i, j;
1691
1692 for( j = 0; j < NUM_CHANNELS; j++ ) {
1693 if( mask & (1 << j) ) {
1694 for( i = first; i <= last; i++ ) {
1695 eval( mach, i, j );
1696 }
1697 }
1698 }
1699 }
1700 }
1701 }
1702 }
1703
1704 static void
1705 exec_instruction(
1706 struct tgsi_exec_machine *mach,
1707 const struct tgsi_full_instruction *inst,
1708 int *pc )
1709 {
1710 uint chan_index;
1711 union tgsi_exec_channel r[8];
1712
1713 (*pc)++;
1714
1715 switch (inst->Instruction.Opcode) {
1716 case TGSI_OPCODE_ARL:
1717 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1718 FETCH( &r[0], 0, chan_index );
1719 micro_f2it( &r[0], &r[0] );
1720 STORE( &r[0], 0, chan_index );
1721 }
1722 break;
1723
1724 case TGSI_OPCODE_MOV:
1725 case TGSI_OPCODE_SWZ:
1726 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1727 FETCH( &r[0], 0, chan_index );
1728 STORE( &r[0], 0, chan_index );
1729 }
1730 break;
1731
1732 case TGSI_OPCODE_LIT:
1733 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1734 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1735 }
1736
1737 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1738 FETCH( &r[0], 0, CHAN_X );
1739 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1740 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1741 STORE( &r[0], 0, CHAN_Y );
1742 }
1743
1744 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1745 FETCH( &r[1], 0, CHAN_Y );
1746 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1747
1748 FETCH( &r[2], 0, CHAN_W );
1749 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1750 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1751 micro_pow( &r[1], &r[1], &r[2] );
1752 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1753 STORE( &r[0], 0, CHAN_Z );
1754 }
1755 }
1756
1757 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1758 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1759 }
1760 break;
1761
1762 case TGSI_OPCODE_RCP:
1763 /* TGSI_OPCODE_RECIP */
1764 FETCH( &r[0], 0, CHAN_X );
1765 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1766 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1767 STORE( &r[0], 0, chan_index );
1768 }
1769 break;
1770
1771 case TGSI_OPCODE_RSQ:
1772 /* TGSI_OPCODE_RECIPSQRT */
1773 FETCH( &r[0], 0, CHAN_X );
1774 micro_sqrt( &r[0], &r[0] );
1775 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1776 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1777 STORE( &r[0], 0, chan_index );
1778 }
1779 break;
1780
1781 case TGSI_OPCODE_EXP:
1782 FETCH( &r[0], 0, CHAN_X );
1783 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1784 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1785 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1786 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1787 }
1788 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1789 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1790 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1791 }
1792 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1793 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1794 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1795 }
1796 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1797 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1798 }
1799 break;
1800
1801 case TGSI_OPCODE_LOG:
1802 FETCH( &r[0], 0, CHAN_X );
1803 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1804 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1805 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1806 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1807 STORE( &r[0], 0, CHAN_X );
1808 }
1809 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1810 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1811 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1812 STORE( &r[0], 0, CHAN_Y );
1813 }
1814 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1815 STORE( &r[1], 0, CHAN_Z );
1816 }
1817 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1818 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1819 }
1820 break;
1821
1822 case TGSI_OPCODE_MUL:
1823 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1824 {
1825 FETCH(&r[0], 0, chan_index);
1826 FETCH(&r[1], 1, chan_index);
1827
1828 micro_mul( &r[0], &r[0], &r[1] );
1829
1830 STORE(&r[0], 0, chan_index);
1831 }
1832 break;
1833
1834 case TGSI_OPCODE_ADD:
1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1836 FETCH( &r[0], 0, chan_index );
1837 FETCH( &r[1], 1, chan_index );
1838 micro_add( &r[0], &r[0], &r[1] );
1839 STORE( &r[0], 0, chan_index );
1840 }
1841 break;
1842
1843 case TGSI_OPCODE_DP3:
1844 /* TGSI_OPCODE_DOT3 */
1845 FETCH( &r[0], 0, CHAN_X );
1846 FETCH( &r[1], 1, CHAN_X );
1847 micro_mul( &r[0], &r[0], &r[1] );
1848
1849 FETCH( &r[1], 0, CHAN_Y );
1850 FETCH( &r[2], 1, CHAN_Y );
1851 micro_mul( &r[1], &r[1], &r[2] );
1852 micro_add( &r[0], &r[0], &r[1] );
1853
1854 FETCH( &r[1], 0, CHAN_Z );
1855 FETCH( &r[2], 1, CHAN_Z );
1856 micro_mul( &r[1], &r[1], &r[2] );
1857 micro_add( &r[0], &r[0], &r[1] );
1858
1859 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1860 STORE( &r[0], 0, chan_index );
1861 }
1862 break;
1863
1864 case TGSI_OPCODE_DP4:
1865 /* TGSI_OPCODE_DOT4 */
1866 FETCH(&r[0], 0, CHAN_X);
1867 FETCH(&r[1], 1, CHAN_X);
1868
1869 micro_mul( &r[0], &r[0], &r[1] );
1870
1871 FETCH(&r[1], 0, CHAN_Y);
1872 FETCH(&r[2], 1, CHAN_Y);
1873
1874 micro_mul( &r[1], &r[1], &r[2] );
1875 micro_add( &r[0], &r[0], &r[1] );
1876
1877 FETCH(&r[1], 0, CHAN_Z);
1878 FETCH(&r[2], 1, CHAN_Z);
1879
1880 micro_mul( &r[1], &r[1], &r[2] );
1881 micro_add( &r[0], &r[0], &r[1] );
1882
1883 FETCH(&r[1], 0, CHAN_W);
1884 FETCH(&r[2], 1, CHAN_W);
1885
1886 micro_mul( &r[1], &r[1], &r[2] );
1887 micro_add( &r[0], &r[0], &r[1] );
1888
1889 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1890 STORE( &r[0], 0, chan_index );
1891 }
1892 break;
1893
1894 case TGSI_OPCODE_DST:
1895 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1896 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1897 }
1898
1899 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1900 FETCH( &r[0], 0, CHAN_Y );
1901 FETCH( &r[1], 1, CHAN_Y);
1902 micro_mul( &r[0], &r[0], &r[1] );
1903 STORE( &r[0], 0, CHAN_Y );
1904 }
1905
1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1907 FETCH( &r[0], 0, CHAN_Z );
1908 STORE( &r[0], 0, CHAN_Z );
1909 }
1910
1911 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1912 FETCH( &r[0], 1, CHAN_W );
1913 STORE( &r[0], 0, CHAN_W );
1914 }
1915 break;
1916
1917 case TGSI_OPCODE_MIN:
1918 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1919 FETCH(&r[0], 0, chan_index);
1920 FETCH(&r[1], 1, chan_index);
1921
1922 /* XXX use micro_min()?? */
1923 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1924
1925 STORE(&r[0], 0, chan_index);
1926 }
1927 break;
1928
1929 case TGSI_OPCODE_MAX:
1930 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1931 FETCH(&r[0], 0, chan_index);
1932 FETCH(&r[1], 1, chan_index);
1933
1934 /* XXX use micro_max()?? */
1935 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1936
1937 STORE(&r[0], 0, chan_index );
1938 }
1939 break;
1940
1941 case TGSI_OPCODE_SLT:
1942 /* TGSI_OPCODE_SETLT */
1943 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1944 FETCH( &r[0], 0, chan_index );
1945 FETCH( &r[1], 1, chan_index );
1946 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1947 STORE( &r[0], 0, chan_index );
1948 }
1949 break;
1950
1951 case TGSI_OPCODE_SGE:
1952 /* TGSI_OPCODE_SETGE */
1953 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1954 FETCH( &r[0], 0, chan_index );
1955 FETCH( &r[1], 1, chan_index );
1956 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1957 STORE( &r[0], 0, chan_index );
1958 }
1959 break;
1960
1961 case TGSI_OPCODE_MAD:
1962 /* TGSI_OPCODE_MADD */
1963 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1964 FETCH( &r[0], 0, chan_index );
1965 FETCH( &r[1], 1, chan_index );
1966 micro_mul( &r[0], &r[0], &r[1] );
1967 FETCH( &r[1], 2, chan_index );
1968 micro_add( &r[0], &r[0], &r[1] );
1969 STORE( &r[0], 0, chan_index );
1970 }
1971 break;
1972
1973 case TGSI_OPCODE_SUB:
1974 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1975 FETCH(&r[0], 0, chan_index);
1976 FETCH(&r[1], 1, chan_index);
1977
1978 micro_sub( &r[0], &r[0], &r[1] );
1979
1980 STORE(&r[0], 0, chan_index);
1981 }
1982 break;
1983
1984 case TGSI_OPCODE_LERP:
1985 /* TGSI_OPCODE_LRP */
1986 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1987 FETCH(&r[0], 0, chan_index);
1988 FETCH(&r[1], 1, chan_index);
1989 FETCH(&r[2], 2, chan_index);
1990
1991 micro_sub( &r[1], &r[1], &r[2] );
1992 micro_mul( &r[0], &r[0], &r[1] );
1993 micro_add( &r[0], &r[0], &r[2] );
1994
1995 STORE(&r[0], 0, chan_index);
1996 }
1997 break;
1998
1999 case TGSI_OPCODE_CND:
2000 assert (0);
2001 break;
2002
2003 case TGSI_OPCODE_CND0:
2004 assert (0);
2005 break;
2006
2007 case TGSI_OPCODE_DOT2ADD:
2008 /* TGSI_OPCODE_DP2A */
2009 assert (0);
2010 break;
2011
2012 case TGSI_OPCODE_INDEX:
2013 assert (0);
2014 break;
2015
2016 case TGSI_OPCODE_NEGATE:
2017 assert (0);
2018 break;
2019
2020 case TGSI_OPCODE_FRAC:
2021 /* TGSI_OPCODE_FRC */
2022 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2023 FETCH( &r[0], 0, chan_index );
2024 micro_frc( &r[0], &r[0] );
2025 STORE( &r[0], 0, chan_index );
2026 }
2027 break;
2028
2029 case TGSI_OPCODE_CLAMP:
2030 assert (0);
2031 break;
2032
2033 case TGSI_OPCODE_FLOOR:
2034 /* TGSI_OPCODE_FLR */
2035 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2036 FETCH( &r[0], 0, chan_index );
2037 micro_flr( &r[0], &r[0] );
2038 STORE( &r[0], 0, chan_index );
2039 }
2040 break;
2041
2042 case TGSI_OPCODE_ROUND:
2043 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2044 FETCH( &r[0], 0, chan_index );
2045 micro_rnd( &r[0], &r[0] );
2046 STORE( &r[0], 0, chan_index );
2047 }
2048 break;
2049
2050 case TGSI_OPCODE_EXPBASE2:
2051 /* TGSI_OPCODE_EX2 */
2052 FETCH(&r[0], 0, CHAN_X);
2053
2054 #if FAST_MATH
2055 micro_exp2( &r[0], &r[0] );
2056 #else
2057 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2058 #endif
2059
2060 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2061 STORE( &r[0], 0, chan_index );
2062 }
2063 break;
2064
2065 case TGSI_OPCODE_LOGBASE2:
2066 /* TGSI_OPCODE_LG2 */
2067 FETCH( &r[0], 0, CHAN_X );
2068 micro_lg2( &r[0], &r[0] );
2069 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2070 STORE( &r[0], 0, chan_index );
2071 }
2072 break;
2073
2074 case TGSI_OPCODE_POWER:
2075 /* TGSI_OPCODE_POW */
2076 FETCH(&r[0], 0, CHAN_X);
2077 FETCH(&r[1], 1, CHAN_X);
2078
2079 micro_pow( &r[0], &r[0], &r[1] );
2080
2081 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2082 STORE( &r[0], 0, chan_index );
2083 }
2084 break;
2085
2086 case TGSI_OPCODE_CROSSPRODUCT:
2087 /* TGSI_OPCODE_XPD */
2088 FETCH(&r[0], 0, CHAN_Y);
2089 FETCH(&r[1], 1, CHAN_Z);
2090
2091 micro_mul( &r[2], &r[0], &r[1] );
2092
2093 FETCH(&r[3], 0, CHAN_Z);
2094 FETCH(&r[4], 1, CHAN_Y);
2095
2096 micro_mul( &r[5], &r[3], &r[4] );
2097 micro_sub( &r[2], &r[2], &r[5] );
2098
2099 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2100 STORE( &r[2], 0, CHAN_X );
2101 }
2102
2103 FETCH(&r[2], 1, CHAN_X);
2104
2105 micro_mul( &r[3], &r[3], &r[2] );
2106
2107 FETCH(&r[5], 0, CHAN_X);
2108
2109 micro_mul( &r[1], &r[1], &r[5] );
2110 micro_sub( &r[3], &r[3], &r[1] );
2111
2112 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2113 STORE( &r[3], 0, CHAN_Y );
2114 }
2115
2116 micro_mul( &r[5], &r[5], &r[4] );
2117 micro_mul( &r[0], &r[0], &r[2] );
2118 micro_sub( &r[5], &r[5], &r[0] );
2119
2120 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2121 STORE( &r[5], 0, CHAN_Z );
2122 }
2123
2124 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2125 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2126 }
2127 break;
2128
2129 case TGSI_OPCODE_MULTIPLYMATRIX:
2130 assert (0);
2131 break;
2132
2133 case TGSI_OPCODE_ABS:
2134 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2135 FETCH(&r[0], 0, chan_index);
2136
2137 micro_abs( &r[0], &r[0] );
2138
2139 STORE(&r[0], 0, chan_index);
2140 }
2141 break;
2142
2143 case TGSI_OPCODE_RCC:
2144 assert (0);
2145 break;
2146
2147 case TGSI_OPCODE_DPH:
2148 FETCH(&r[0], 0, CHAN_X);
2149 FETCH(&r[1], 1, CHAN_X);
2150
2151 micro_mul( &r[0], &r[0], &r[1] );
2152
2153 FETCH(&r[1], 0, CHAN_Y);
2154 FETCH(&r[2], 1, CHAN_Y);
2155
2156 micro_mul( &r[1], &r[1], &r[2] );
2157 micro_add( &r[0], &r[0], &r[1] );
2158
2159 FETCH(&r[1], 0, CHAN_Z);
2160 FETCH(&r[2], 1, CHAN_Z);
2161
2162 micro_mul( &r[1], &r[1], &r[2] );
2163 micro_add( &r[0], &r[0], &r[1] );
2164
2165 FETCH(&r[1], 1, CHAN_W);
2166
2167 micro_add( &r[0], &r[0], &r[1] );
2168
2169 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2170 STORE( &r[0], 0, chan_index );
2171 }
2172 break;
2173
2174 case TGSI_OPCODE_COS:
2175 FETCH(&r[0], 0, CHAN_X);
2176
2177 micro_cos( &r[0], &r[0] );
2178
2179 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2180 STORE( &r[0], 0, chan_index );
2181 }
2182 break;
2183
2184 case TGSI_OPCODE_DDX:
2185 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2186 FETCH( &r[0], 0, chan_index );
2187 micro_ddx( &r[0], &r[0] );
2188 STORE( &r[0], 0, chan_index );
2189 }
2190 break;
2191
2192 case TGSI_OPCODE_DDY:
2193 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2194 FETCH( &r[0], 0, chan_index );
2195 micro_ddy( &r[0], &r[0] );
2196 STORE( &r[0], 0, chan_index );
2197 }
2198 break;
2199
2200 case TGSI_OPCODE_KILP:
2201 exec_kilp (mach, inst);
2202 break;
2203
2204 case TGSI_OPCODE_KIL:
2205 exec_kil (mach, inst);
2206 break;
2207
2208 case TGSI_OPCODE_PK2H:
2209 assert (0);
2210 break;
2211
2212 case TGSI_OPCODE_PK2US:
2213 assert (0);
2214 break;
2215
2216 case TGSI_OPCODE_PK4B:
2217 assert (0);
2218 break;
2219
2220 case TGSI_OPCODE_PK4UB:
2221 assert (0);
2222 break;
2223
2224 case TGSI_OPCODE_RFL:
2225 assert (0);
2226 break;
2227
2228 case TGSI_OPCODE_SEQ:
2229 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2230 FETCH( &r[0], 0, chan_index );
2231 FETCH( &r[1], 1, chan_index );
2232 micro_eq( &r[0], &r[0], &r[1],
2233 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2234 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2235 STORE( &r[0], 0, chan_index );
2236 }
2237 break;
2238
2239 case TGSI_OPCODE_SFL:
2240 assert (0);
2241 break;
2242
2243 case TGSI_OPCODE_SGT:
2244 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2245 FETCH( &r[0], 0, chan_index );
2246 FETCH( &r[1], 1, chan_index );
2247 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2248 STORE( &r[0], 0, chan_index );
2249 }
2250 break;
2251
2252 case TGSI_OPCODE_SIN:
2253 FETCH( &r[0], 0, CHAN_X );
2254 micro_sin( &r[0], &r[0] );
2255 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2256 STORE( &r[0], 0, chan_index );
2257 }
2258 break;
2259
2260 case TGSI_OPCODE_SLE:
2261 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2262 FETCH( &r[0], 0, chan_index );
2263 FETCH( &r[1], 1, chan_index );
2264 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2265 STORE( &r[0], 0, chan_index );
2266 }
2267 break;
2268
2269 case TGSI_OPCODE_SNE:
2270 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2271 FETCH( &r[0], 0, chan_index );
2272 FETCH( &r[1], 1, chan_index );
2273 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2274 STORE( &r[0], 0, chan_index );
2275 }
2276 break;
2277
2278 case TGSI_OPCODE_STR:
2279 assert (0);
2280 break;
2281
2282 case TGSI_OPCODE_TEX:
2283 /* simple texture lookup */
2284 /* src[0] = texcoord */
2285 /* src[1] = sampler unit */
2286 exec_tex(mach, inst, FALSE, FALSE);
2287 break;
2288
2289 case TGSI_OPCODE_TXB:
2290 /* Texture lookup with lod bias */
2291 /* src[0] = texcoord (src[0].w = LOD bias) */
2292 /* src[1] = sampler unit */
2293 exec_tex(mach, inst, TRUE, FALSE);
2294 break;
2295
2296 case TGSI_OPCODE_TXD:
2297 /* Texture lookup with explict partial derivatives */
2298 /* src[0] = texcoord */
2299 /* src[1] = d[strq]/dx */
2300 /* src[2] = d[strq]/dy */
2301 /* src[3] = sampler unit */
2302 assert (0);
2303 break;
2304
2305 case TGSI_OPCODE_TXL:
2306 /* Texture lookup with explit LOD */
2307 /* src[0] = texcoord (src[0].w = LOD) */
2308 /* src[1] = sampler unit */
2309 exec_tex(mach, inst, TRUE, FALSE);
2310 break;
2311
2312 case TGSI_OPCODE_TXP:
2313 /* Texture lookup with projection */
2314 /* src[0] = texcoord (src[0].w = projection) */
2315 /* src[1] = sampler unit */
2316 exec_tex(mach, inst, FALSE, TRUE);
2317 break;
2318
2319 case TGSI_OPCODE_UP2H:
2320 assert (0);
2321 break;
2322
2323 case TGSI_OPCODE_UP2US:
2324 assert (0);
2325 break;
2326
2327 case TGSI_OPCODE_UP4B:
2328 assert (0);
2329 break;
2330
2331 case TGSI_OPCODE_UP4UB:
2332 assert (0);
2333 break;
2334
2335 case TGSI_OPCODE_X2D:
2336 assert (0);
2337 break;
2338
2339 case TGSI_OPCODE_ARA:
2340 assert (0);
2341 break;
2342
2343 case TGSI_OPCODE_ARR:
2344 assert (0);
2345 break;
2346
2347 case TGSI_OPCODE_BRA:
2348 assert (0);
2349 break;
2350
2351 case TGSI_OPCODE_CAL:
2352 /* skip the call if no execution channels are enabled */
2353 if (mach->ExecMask) {
2354 /* do the call */
2355
2356 /* push the Cond, Loop, Cont stacks */
2357 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2358 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2359 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2360 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2361 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2362 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2363
2364 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2365 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2366
2367 /* note that PC was already incremented above */
2368 mach->CallStack[mach->CallStackTop++] = *pc;
2369 *pc = inst->InstructionExtLabel.Label;
2370 }
2371 break;
2372
2373 case TGSI_OPCODE_RET:
2374 mach->FuncMask &= ~mach->ExecMask;
2375 UPDATE_EXEC_MASK(mach);
2376
2377 if (mach->FuncMask == 0x0) {
2378 /* really return now (otherwise, keep executing */
2379
2380 if (mach->CallStackTop == 0) {
2381 /* returning from main() */
2382 *pc = -1;
2383 return;
2384 }
2385 *pc = mach->CallStack[--mach->CallStackTop];
2386
2387 /* pop the Cond, Loop, Cont stacks */
2388 assert(mach->CondStackTop > 0);
2389 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2390 assert(mach->LoopStackTop > 0);
2391 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2392 assert(mach->ContStackTop > 0);
2393 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2394 assert(mach->FuncStackTop > 0);
2395 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2396
2397 UPDATE_EXEC_MASK(mach);
2398 }
2399 break;
2400
2401 case TGSI_OPCODE_SSG:
2402 assert (0);
2403 break;
2404
2405 case TGSI_OPCODE_CMP:
2406 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2407 FETCH(&r[0], 0, chan_index);
2408 FETCH(&r[1], 1, chan_index);
2409 FETCH(&r[2], 2, chan_index);
2410
2411 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2412
2413 STORE(&r[0], 0, chan_index);
2414 }
2415 break;
2416
2417 case TGSI_OPCODE_SCS:
2418 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2419 FETCH( &r[0], 0, CHAN_X );
2420 }
2421 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2422 micro_cos( &r[1], &r[0] );
2423 STORE( &r[1], 0, CHAN_X );
2424 }
2425 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2426 micro_sin( &r[1], &r[0] );
2427 STORE( &r[1], 0, CHAN_Y );
2428 }
2429 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2430 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2431 }
2432 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2433 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2434 }
2435 break;
2436
2437 case TGSI_OPCODE_NRM:
2438 assert (0);
2439 break;
2440
2441 case TGSI_OPCODE_DIV:
2442 assert( 0 );
2443 break;
2444
2445 case TGSI_OPCODE_DP2:
2446 FETCH( &r[0], 0, CHAN_X );
2447 FETCH( &r[1], 1, CHAN_X );
2448 micro_mul( &r[0], &r[0], &r[1] );
2449
2450 FETCH( &r[1], 0, CHAN_Y );
2451 FETCH( &r[2], 1, CHAN_Y );
2452 micro_mul( &r[1], &r[1], &r[2] );
2453 micro_add( &r[0], &r[0], &r[1] );
2454
2455 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2456 STORE( &r[0], 0, chan_index );
2457 }
2458 break;
2459
2460 case TGSI_OPCODE_IF:
2461 /* push CondMask */
2462 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2463 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2464 FETCH( &r[0], 0, CHAN_X );
2465 /* update CondMask */
2466 if( ! r[0].u[0] ) {
2467 mach->CondMask &= ~0x1;
2468 }
2469 if( ! r[0].u[1] ) {
2470 mach->CondMask &= ~0x2;
2471 }
2472 if( ! r[0].u[2] ) {
2473 mach->CondMask &= ~0x4;
2474 }
2475 if( ! r[0].u[3] ) {
2476 mach->CondMask &= ~0x8;
2477 }
2478 UPDATE_EXEC_MASK(mach);
2479 /* Todo: If CondMask==0, jump to ELSE */
2480 break;
2481
2482 case TGSI_OPCODE_ELSE:
2483 /* invert CondMask wrt previous mask */
2484 {
2485 uint prevMask;
2486 assert(mach->CondStackTop > 0);
2487 prevMask = mach->CondStack[mach->CondStackTop - 1];
2488 mach->CondMask = ~mach->CondMask & prevMask;
2489 UPDATE_EXEC_MASK(mach);
2490 /* Todo: If CondMask==0, jump to ENDIF */
2491 }
2492 break;
2493
2494 case TGSI_OPCODE_ENDIF:
2495 /* pop CondMask */
2496 assert(mach->CondStackTop > 0);
2497 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2498 UPDATE_EXEC_MASK(mach);
2499 break;
2500
2501 case TGSI_OPCODE_END:
2502 /* halt execution */
2503 *pc = -1;
2504 break;
2505
2506 case TGSI_OPCODE_REP:
2507 assert (0);
2508 break;
2509
2510 case TGSI_OPCODE_ENDREP:
2511 assert (0);
2512 break;
2513
2514 case TGSI_OPCODE_PUSHA:
2515 assert (0);
2516 break;
2517
2518 case TGSI_OPCODE_POPA:
2519 assert (0);
2520 break;
2521
2522 case TGSI_OPCODE_CEIL:
2523 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2524 FETCH( &r[0], 0, chan_index );
2525 micro_ceil( &r[0], &r[0] );
2526 STORE( &r[0], 0, chan_index );
2527 }
2528 break;
2529
2530 case TGSI_OPCODE_I2F:
2531 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2532 FETCH( &r[0], 0, chan_index );
2533 micro_i2f( &r[0], &r[0] );
2534 STORE( &r[0], 0, chan_index );
2535 }
2536 break;
2537
2538 case TGSI_OPCODE_NOT:
2539 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2540 FETCH( &r[0], 0, chan_index );
2541 micro_not( &r[0], &r[0] );
2542 STORE( &r[0], 0, chan_index );
2543 }
2544 break;
2545
2546 case TGSI_OPCODE_TRUNC:
2547 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2548 FETCH( &r[0], 0, chan_index );
2549 micro_trunc( &r[0], &r[0] );
2550 STORE( &r[0], 0, chan_index );
2551 }
2552 break;
2553
2554 case TGSI_OPCODE_SHL:
2555 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2556 FETCH( &r[0], 0, chan_index );
2557 FETCH( &r[1], 1, chan_index );
2558 micro_shl( &r[0], &r[0], &r[1] );
2559 STORE( &r[0], 0, chan_index );
2560 }
2561 break;
2562
2563 case TGSI_OPCODE_SHR:
2564 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2565 FETCH( &r[0], 0, chan_index );
2566 FETCH( &r[1], 1, chan_index );
2567 micro_ishr( &r[0], &r[0], &r[1] );
2568 STORE( &r[0], 0, chan_index );
2569 }
2570 break;
2571
2572 case TGSI_OPCODE_AND:
2573 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2574 FETCH( &r[0], 0, chan_index );
2575 FETCH( &r[1], 1, chan_index );
2576 micro_and( &r[0], &r[0], &r[1] );
2577 STORE( &r[0], 0, chan_index );
2578 }
2579 break;
2580
2581 case TGSI_OPCODE_OR:
2582 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2583 FETCH( &r[0], 0, chan_index );
2584 FETCH( &r[1], 1, chan_index );
2585 micro_or( &r[0], &r[0], &r[1] );
2586 STORE( &r[0], 0, chan_index );
2587 }
2588 break;
2589
2590 case TGSI_OPCODE_MOD:
2591 assert (0);
2592 break;
2593
2594 case TGSI_OPCODE_XOR:
2595 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2596 FETCH( &r[0], 0, chan_index );
2597 FETCH( &r[1], 1, chan_index );
2598 micro_xor( &r[0], &r[0], &r[1] );
2599 STORE( &r[0], 0, chan_index );
2600 }
2601 break;
2602
2603 case TGSI_OPCODE_SAD:
2604 assert (0);
2605 break;
2606
2607 case TGSI_OPCODE_TXF:
2608 assert (0);
2609 break;
2610
2611 case TGSI_OPCODE_TXQ:
2612 assert (0);
2613 break;
2614
2615 case TGSI_OPCODE_EMIT:
2616 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2617 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2618 break;
2619
2620 case TGSI_OPCODE_ENDPRIM:
2621 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2622 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2623 break;
2624
2625 case TGSI_OPCODE_LOOP:
2626 /* fall-through (for now) */
2627 case TGSI_OPCODE_BGNLOOP2:
2628 /* push LoopMask and ContMasks */
2629 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2630 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2631 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2632 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2633 break;
2634
2635 case TGSI_OPCODE_ENDLOOP:
2636 /* fall-through (for now at least) */
2637 case TGSI_OPCODE_ENDLOOP2:
2638 /* Restore ContMask, but don't pop */
2639 assert(mach->ContStackTop > 0);
2640 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2641 UPDATE_EXEC_MASK(mach);
2642 if (mach->ExecMask) {
2643 /* repeat loop: jump to instruction just past BGNLOOP */
2644 *pc = inst->InstructionExtLabel.Label + 1;
2645 }
2646 else {
2647 /* exit loop: pop LoopMask */
2648 assert(mach->LoopStackTop > 0);
2649 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2650 /* pop ContMask */
2651 assert(mach->ContStackTop > 0);
2652 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2653 }
2654 UPDATE_EXEC_MASK(mach);
2655 break;
2656
2657 case TGSI_OPCODE_BRK:
2658 /* turn off loop channels for each enabled exec channel */
2659 mach->LoopMask &= ~mach->ExecMask;
2660 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2661 UPDATE_EXEC_MASK(mach);
2662 break;
2663
2664 case TGSI_OPCODE_CONT:
2665 /* turn off cont channels for each enabled exec channel */
2666 mach->ContMask &= ~mach->ExecMask;
2667 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2668 UPDATE_EXEC_MASK(mach);
2669 break;
2670
2671 case TGSI_OPCODE_BGNSUB:
2672 /* no-op */
2673 break;
2674
2675 case TGSI_OPCODE_ENDSUB:
2676 /* no-op */
2677 break;
2678
2679 case TGSI_OPCODE_NOISE1:
2680 assert( 0 );
2681 break;
2682
2683 case TGSI_OPCODE_NOISE2:
2684 assert( 0 );
2685 break;
2686
2687 case TGSI_OPCODE_NOISE3:
2688 assert( 0 );
2689 break;
2690
2691 case TGSI_OPCODE_NOISE4:
2692 assert( 0 );
2693 break;
2694
2695 case TGSI_OPCODE_NOP:
2696 break;
2697
2698 default:
2699 assert( 0 );
2700 }
2701 }
2702
2703
2704 /**
2705 * Run TGSI interpreter.
2706 * \return bitmask of "alive" quad components
2707 */
2708 uint
2709 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2710 {
2711 uint i;
2712 int pc = 0;
2713
2714 mach->CondMask = 0xf;
2715 mach->LoopMask = 0xf;
2716 mach->ContMask = 0xf;
2717 mach->FuncMask = 0xf;
2718 mach->ExecMask = 0xf;
2719
2720 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2721 assert(mach->CondStackTop == 0);
2722 assert(mach->LoopStackTop == 0);
2723 assert(mach->ContStackTop == 0);
2724 assert(mach->CallStackTop == 0);
2725
2726 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2727 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2728
2729 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2730 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2731 mach->Primitives[0] = 0;
2732 }
2733
2734 for (i = 0; i < QUAD_SIZE; i++) {
2735 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2736 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2737 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2738 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2739 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2740 }
2741
2742 /* execute declarations (interpolants) */
2743 for (i = 0; i < mach->NumDeclarations; i++) {
2744 exec_declaration( mach, mach->Declarations+i );
2745 }
2746
2747 /* execute instructions, until pc is set to -1 */
2748 while (pc != -1) {
2749 assert(pc < (int) mach->NumInstructions);
2750 exec_instruction( mach, mach->Instructions + pc, &pc );
2751 }
2752
2753 #if 0
2754 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2755 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2756 /*
2757 * Scale back depth component.
2758 */
2759 for (i = 0; i < 4; i++)
2760 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2761 }
2762 #endif
2763
2764 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2765 }
2766
2767