Merge branch 'master' into gallium-0.2
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler *samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2it(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->i[0] = (int) src->f[0];
475 dst->i[1] = (int) src->f[1];
476 dst->i[2] = (int) src->f[2];
477 dst->i[3] = (int) src->f[3];
478 }
479
480 static void
481 micro_f2ut(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->u[0] = (uint) src->f[0];
486 dst->u[1] = (uint) src->f[1];
487 dst->u[2] = (uint) src->f[2];
488 dst->u[3] = (uint) src->f[3];
489 }
490
491 static void
492 micro_flr(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = floorf( src->f[0] );
497 dst->f[1] = floorf( src->f[1] );
498 dst->f[2] = floorf( src->f[2] );
499 dst->f[3] = floorf( src->f[3] );
500 }
501
502 static void
503 micro_frc(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src )
506 {
507 dst->f[0] = src->f[0] - floorf( src->f[0] );
508 dst->f[1] = src->f[1] - floorf( src->f[1] );
509 dst->f[2] = src->f[2] - floorf( src->f[2] );
510 dst->f[3] = src->f[3] - floorf( src->f[3] );
511 }
512
513 static void
514 micro_ge(
515 union tgsi_exec_channel *dst,
516 const union tgsi_exec_channel *src0,
517 const union tgsi_exec_channel *src1,
518 const union tgsi_exec_channel *src2,
519 const union tgsi_exec_channel *src3 )
520 {
521 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
522 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
523 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
524 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
525 }
526
527 static void
528 micro_i2f(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 dst->f[0] = (float) src->i[0];
533 dst->f[1] = (float) src->i[1];
534 dst->f[2] = (float) src->i[2];
535 dst->f[3] = (float) src->i[3];
536 }
537
538 static void
539 micro_lg2(
540 union tgsi_exec_channel *dst,
541 const union tgsi_exec_channel *src )
542 {
543 #if FAST_MATH
544 dst->f[0] = util_fast_log2( src->f[0] );
545 dst->f[1] = util_fast_log2( src->f[1] );
546 dst->f[2] = util_fast_log2( src->f[2] );
547 dst->f[3] = util_fast_log2( src->f[3] );
548 #else
549 dst->f[0] = logf( src->f[0] ) * 1.442695f;
550 dst->f[1] = logf( src->f[1] ) * 1.442695f;
551 dst->f[2] = logf( src->f[2] ) * 1.442695f;
552 dst->f[3] = logf( src->f[3] ) * 1.442695f;
553 #endif
554 }
555
556 static void
557 micro_le(
558 union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src0,
560 const union tgsi_exec_channel *src1,
561 const union tgsi_exec_channel *src2,
562 const union tgsi_exec_channel *src3 )
563 {
564 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
565 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
566 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
567 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
568 }
569
570 static void
571 micro_lt(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src0,
574 const union tgsi_exec_channel *src1,
575 const union tgsi_exec_channel *src2,
576 const union tgsi_exec_channel *src3 )
577 {
578 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
579 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
580 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
581 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
582 }
583
584 static void
585 micro_ilt(
586 union tgsi_exec_channel *dst,
587 const union tgsi_exec_channel *src0,
588 const union tgsi_exec_channel *src1,
589 const union tgsi_exec_channel *src2,
590 const union tgsi_exec_channel *src3 )
591 {
592 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
593 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
594 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
595 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
596 }
597
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611
612 static void
613 micro_max(
614 union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src0,
616 const union tgsi_exec_channel *src1 )
617 {
618 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
619 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
620 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
621 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
622 }
623
624 static void
625 micro_imax(
626 union tgsi_exec_channel *dst,
627 const union tgsi_exec_channel *src0,
628 const union tgsi_exec_channel *src1 )
629 {
630 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
631 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
632 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
633 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
634 }
635
636 static void
637 micro_umax(
638 union tgsi_exec_channel *dst,
639 const union tgsi_exec_channel *src0,
640 const union tgsi_exec_channel *src1 )
641 {
642 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
643 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
644 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
645 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
646 }
647
648 static void
649 micro_min(
650 union tgsi_exec_channel *dst,
651 const union tgsi_exec_channel *src0,
652 const union tgsi_exec_channel *src1 )
653 {
654 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
655 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
656 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
657 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
658 }
659
660 static void
661 micro_imin(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src0,
664 const union tgsi_exec_channel *src1 )
665 {
666 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
667 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
668 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
669 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
670 }
671
672 static void
673 micro_umin(
674 union tgsi_exec_channel *dst,
675 const union tgsi_exec_channel *src0,
676 const union tgsi_exec_channel *src1 )
677 {
678 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
679 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
680 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
681 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
682 }
683
684 static void
685 micro_umod(
686 union tgsi_exec_channel *dst,
687 const union tgsi_exec_channel *src0,
688 const union tgsi_exec_channel *src1 )
689 {
690 dst->u[0] = src0->u[0] % src1->u[0];
691 dst->u[1] = src0->u[1] % src1->u[1];
692 dst->u[2] = src0->u[2] % src1->u[2];
693 dst->u[3] = src0->u[3] % src1->u[3];
694 }
695
696 static void
697 micro_mul(
698 union tgsi_exec_channel *dst,
699 const union tgsi_exec_channel *src0,
700 const union tgsi_exec_channel *src1 )
701 {
702 dst->f[0] = src0->f[0] * src1->f[0];
703 dst->f[1] = src0->f[1] * src1->f[1];
704 dst->f[2] = src0->f[2] * src1->f[2];
705 dst->f[3] = src0->f[3] * src1->f[3];
706 }
707
708 static void
709 micro_imul(
710 union tgsi_exec_channel *dst,
711 const union tgsi_exec_channel *src0,
712 const union tgsi_exec_channel *src1 )
713 {
714 dst->i[0] = src0->i[0] * src1->i[0];
715 dst->i[1] = src0->i[1] * src1->i[1];
716 dst->i[2] = src0->i[2] * src1->i[2];
717 dst->i[3] = src0->i[3] * src1->i[3];
718 }
719
720 static void
721 micro_imul64(
722 union tgsi_exec_channel *dst0,
723 union tgsi_exec_channel *dst1,
724 const union tgsi_exec_channel *src0,
725 const union tgsi_exec_channel *src1 )
726 {
727 dst1->i[0] = src0->i[0] * src1->i[0];
728 dst1->i[1] = src0->i[1] * src1->i[1];
729 dst1->i[2] = src0->i[2] * src1->i[2];
730 dst1->i[3] = src0->i[3] * src1->i[3];
731 dst0->i[0] = 0;
732 dst0->i[1] = 0;
733 dst0->i[2] = 0;
734 dst0->i[3] = 0;
735 }
736
737 static void
738 micro_umul64(
739 union tgsi_exec_channel *dst0,
740 union tgsi_exec_channel *dst1,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1 )
743 {
744 dst1->u[0] = src0->u[0] * src1->u[0];
745 dst1->u[1] = src0->u[1] * src1->u[1];
746 dst1->u[2] = src0->u[2] * src1->u[2];
747 dst1->u[3] = src0->u[3] * src1->u[3];
748 dst0->u[0] = 0;
749 dst0->u[1] = 0;
750 dst0->u[2] = 0;
751 dst0->u[3] = 0;
752 }
753
754 static void
755 micro_movc(
756 union tgsi_exec_channel *dst,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1,
759 const union tgsi_exec_channel *src2 )
760 {
761 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
762 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
763 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
764 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
765 }
766
767 static void
768 micro_neg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->f[0] = -src->f[0];
773 dst->f[1] = -src->f[1];
774 dst->f[2] = -src->f[2];
775 dst->f[3] = -src->f[3];
776 }
777
778 static void
779 micro_ineg(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->i[0] = -src->i[0];
784 dst->i[1] = -src->i[1];
785 dst->i[2] = -src->i[2];
786 dst->i[3] = -src->i[3];
787 }
788
789 static void
790 micro_not(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src )
793 {
794 dst->u[0] = ~src->u[0];
795 dst->u[1] = ~src->u[1];
796 dst->u[2] = ~src->u[2];
797 dst->u[3] = ~src->u[3];
798 }
799
800 static void
801 micro_or(
802 union tgsi_exec_channel *dst,
803 const union tgsi_exec_channel *src0,
804 const union tgsi_exec_channel *src1 )
805 {
806 dst->u[0] = src0->u[0] | src1->u[0];
807 dst->u[1] = src0->u[1] | src1->u[1];
808 dst->u[2] = src0->u[2] | src1->u[2];
809 dst->u[3] = src0->u[3] | src1->u[3];
810 }
811
812 static void
813 micro_pow(
814 union tgsi_exec_channel *dst,
815 const union tgsi_exec_channel *src0,
816 const union tgsi_exec_channel *src1 )
817 {
818 #if FAST_MATH
819 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
820 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
821 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
822 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
823 #else
824 dst->f[0] = powf( src0->f[0], src1->f[0] );
825 dst->f[1] = powf( src0->f[1], src1->f[1] );
826 dst->f[2] = powf( src0->f[2], src1->f[2] );
827 dst->f[3] = powf( src0->f[3], src1->f[3] );
828 #endif
829 }
830
831 static void
832 micro_rnd(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = floorf( src->f[0] + 0.5f );
837 dst->f[1] = floorf( src->f[1] + 0.5f );
838 dst->f[2] = floorf( src->f[2] + 0.5f );
839 dst->f[3] = floorf( src->f[3] + 0.5f );
840 }
841
842 static void
843 micro_shl(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src0,
846 const union tgsi_exec_channel *src1 )
847 {
848 dst->i[0] = src0->i[0] << src1->i[0];
849 dst->i[1] = src0->i[1] << src1->i[1];
850 dst->i[2] = src0->i[2] << src1->i[2];
851 dst->i[3] = src0->i[3] << src1->i[3];
852 }
853
854 static void
855 micro_ishr(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->i[0] = src0->i[0] >> src1->i[0];
861 dst->i[1] = src0->i[1] >> src1->i[1];
862 dst->i[2] = src0->i[2] >> src1->i[2];
863 dst->i[3] = src0->i[3] >> src1->i[3];
864 }
865
866 static void
867 micro_trunc(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0 )
870 {
871 dst->f[0] = (float) (int) src0->f[0];
872 dst->f[1] = (float) (int) src0->f[1];
873 dst->f[2] = (float) (int) src0->f[2];
874 dst->f[3] = (float) (int) src0->f[3];
875 }
876
877 static void
878 micro_ushr(
879 union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src0,
881 const union tgsi_exec_channel *src1 )
882 {
883 dst->u[0] = src0->u[0] >> src1->u[0];
884 dst->u[1] = src0->u[1] >> src1->u[1];
885 dst->u[2] = src0->u[2] >> src1->u[2];
886 dst->u[3] = src0->u[3] >> src1->u[3];
887 }
888
889 static void
890 micro_sin(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src )
893 {
894 dst->f[0] = sinf( src->f[0] );
895 dst->f[1] = sinf( src->f[1] );
896 dst->f[2] = sinf( src->f[2] );
897 dst->f[3] = sinf( src->f[3] );
898 }
899
900 static void
901 micro_sqrt( union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src )
903 {
904 dst->f[0] = sqrtf( src->f[0] );
905 dst->f[1] = sqrtf( src->f[1] );
906 dst->f[2] = sqrtf( src->f[2] );
907 dst->f[3] = sqrtf( src->f[3] );
908 }
909
910 static void
911 micro_sub(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst->f[0] = src0->f[0] - src1->f[0];
917 dst->f[1] = src0->f[1] - src1->f[1];
918 dst->f[2] = src0->f[2] - src1->f[2];
919 dst->f[3] = src0->f[3] - src1->f[3];
920 }
921
922 static void
923 micro_u2f(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src )
926 {
927 dst->f[0] = (float) src->u[0];
928 dst->f[1] = (float) src->u[1];
929 dst->f[2] = (float) src->u[2];
930 dst->f[3] = (float) src->u[3];
931 }
932
933 static void
934 micro_xor(
935 union tgsi_exec_channel *dst,
936 const union tgsi_exec_channel *src0,
937 const union tgsi_exec_channel *src1 )
938 {
939 dst->u[0] = src0->u[0] ^ src1->u[0];
940 dst->u[1] = src0->u[1] ^ src1->u[1];
941 dst->u[2] = src0->u[2] ^ src1->u[2];
942 dst->u[3] = src0->u[3] ^ src1->u[3];
943 }
944
945 static void
946 fetch_src_file_channel(
947 const struct tgsi_exec_machine *mach,
948 const uint file,
949 const uint swizzle,
950 const union tgsi_exec_channel *index,
951 union tgsi_exec_channel *chan )
952 {
953 switch( swizzle ) {
954 case TGSI_EXTSWIZZLE_X:
955 case TGSI_EXTSWIZZLE_Y:
956 case TGSI_EXTSWIZZLE_Z:
957 case TGSI_EXTSWIZZLE_W:
958 switch( file ) {
959 case TGSI_FILE_CONSTANT:
960 assert(mach->Consts);
961 chan->f[0] = mach->Consts[index->i[0]][swizzle];
962 chan->f[1] = mach->Consts[index->i[1]][swizzle];
963 chan->f[2] = mach->Consts[index->i[2]][swizzle];
964 chan->f[3] = mach->Consts[index->i[3]][swizzle];
965 break;
966
967 case TGSI_FILE_INPUT:
968 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
969 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
970 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
971 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
972 break;
973
974 case TGSI_FILE_TEMPORARY:
975 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
976 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
977 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
978 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
979 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
980 break;
981
982 case TGSI_FILE_IMMEDIATE:
983 assert( index->i[0] < (int) mach->ImmLimit );
984 chan->f[0] = mach->Imms[index->i[0]][swizzle];
985 assert( index->i[1] < (int) mach->ImmLimit );
986 chan->f[1] = mach->Imms[index->i[1]][swizzle];
987 assert( index->i[2] < (int) mach->ImmLimit );
988 chan->f[2] = mach->Imms[index->i[2]][swizzle];
989 assert( index->i[3] < (int) mach->ImmLimit );
990 chan->f[3] = mach->Imms[index->i[3]][swizzle];
991 break;
992
993 case TGSI_FILE_ADDRESS:
994 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
995 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
996 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
997 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
998 break;
999
1000 case TGSI_FILE_OUTPUT:
1001 /* vertex/fragment output vars can be read too */
1002 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1003 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1004 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1005 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1006 break;
1007
1008 default:
1009 assert( 0 );
1010 }
1011 break;
1012
1013 case TGSI_EXTSWIZZLE_ZERO:
1014 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1015 break;
1016
1017 case TGSI_EXTSWIZZLE_ONE:
1018 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1019 break;
1020
1021 default:
1022 assert( 0 );
1023 }
1024 }
1025
1026 static void
1027 fetch_source(
1028 const struct tgsi_exec_machine *mach,
1029 union tgsi_exec_channel *chan,
1030 const struct tgsi_full_src_register *reg,
1031 const uint chan_index )
1032 {
1033 union tgsi_exec_channel index;
1034 uint swizzle;
1035
1036 index.i[0] =
1037 index.i[1] =
1038 index.i[2] =
1039 index.i[3] = reg->SrcRegister.Index;
1040
1041 if (reg->SrcRegister.Indirect) {
1042 union tgsi_exec_channel index2;
1043 union tgsi_exec_channel indir_index;
1044
1045 index2.i[0] =
1046 index2.i[1] =
1047 index2.i[2] =
1048 index2.i[3] = reg->SrcRegisterInd.Index;
1049
1050 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1051 fetch_src_file_channel(
1052 mach,
1053 reg->SrcRegisterInd.File,
1054 swizzle,
1055 &index2,
1056 &indir_index );
1057
1058 index.i[0] += indir_index.i[0];
1059 index.i[1] += indir_index.i[1];
1060 index.i[2] += indir_index.i[2];
1061 index.i[3] += indir_index.i[3];
1062 }
1063
1064 if( reg->SrcRegister.Dimension ) {
1065 switch( reg->SrcRegister.File ) {
1066 case TGSI_FILE_INPUT:
1067 index.i[0] *= 17;
1068 index.i[1] *= 17;
1069 index.i[2] *= 17;
1070 index.i[3] *= 17;
1071 break;
1072 case TGSI_FILE_CONSTANT:
1073 index.i[0] *= 4096;
1074 index.i[1] *= 4096;
1075 index.i[2] *= 4096;
1076 index.i[3] *= 4096;
1077 break;
1078 default:
1079 assert( 0 );
1080 }
1081
1082 index.i[0] += reg->SrcRegisterDim.Index;
1083 index.i[1] += reg->SrcRegisterDim.Index;
1084 index.i[2] += reg->SrcRegisterDim.Index;
1085 index.i[3] += reg->SrcRegisterDim.Index;
1086
1087 if (reg->SrcRegisterDim.Indirect) {
1088 union tgsi_exec_channel index2;
1089 union tgsi_exec_channel indir_index;
1090
1091 index2.i[0] =
1092 index2.i[1] =
1093 index2.i[2] =
1094 index2.i[3] = reg->SrcRegisterDimInd.Index;
1095
1096 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1097 fetch_src_file_channel(
1098 mach,
1099 reg->SrcRegisterDimInd.File,
1100 swizzle,
1101 &index2,
1102 &indir_index );
1103
1104 index.i[0] += indir_index.i[0];
1105 index.i[1] += indir_index.i[1];
1106 index.i[2] += indir_index.i[2];
1107 index.i[3] += indir_index.i[3];
1108 }
1109 }
1110
1111 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1112 fetch_src_file_channel(
1113 mach,
1114 reg->SrcRegister.File,
1115 swizzle,
1116 &index,
1117 chan );
1118
1119 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1120 case TGSI_UTIL_SIGN_CLEAR:
1121 micro_abs( chan, chan );
1122 break;
1123
1124 case TGSI_UTIL_SIGN_SET:
1125 micro_abs( chan, chan );
1126 micro_neg( chan, chan );
1127 break;
1128
1129 case TGSI_UTIL_SIGN_TOGGLE:
1130 micro_neg( chan, chan );
1131 break;
1132
1133 case TGSI_UTIL_SIGN_KEEP:
1134 break;
1135 }
1136
1137 if (reg->SrcRegisterExtMod.Complement) {
1138 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1139 }
1140 }
1141
1142 static void
1143 store_dest(
1144 struct tgsi_exec_machine *mach,
1145 const union tgsi_exec_channel *chan,
1146 const struct tgsi_full_dst_register *reg,
1147 const struct tgsi_full_instruction *inst,
1148 uint chan_index )
1149 {
1150 uint i;
1151 union tgsi_exec_channel null;
1152 union tgsi_exec_channel *dst;
1153 uint execmask = mach->ExecMask;
1154
1155 switch (reg->DstRegister.File) {
1156 case TGSI_FILE_NULL:
1157 dst = &null;
1158 break;
1159
1160 case TGSI_FILE_OUTPUT:
1161 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1162 + reg->DstRegister.Index].xyzw[chan_index];
1163 break;
1164
1165 case TGSI_FILE_TEMPORARY:
1166 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1167 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1168 break;
1169
1170 case TGSI_FILE_ADDRESS:
1171 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1172 break;
1173
1174 default:
1175 assert( 0 );
1176 return;
1177 }
1178
1179 if (inst->InstructionExtNv.CondFlowEnable) {
1180 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1181 uint swizzle;
1182 uint shift;
1183 uint mask;
1184 uint test;
1185
1186 /* Only CC0 supported.
1187 */
1188 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1189
1190 switch (chan_index) {
1191 case CHAN_X:
1192 swizzle = inst->InstructionExtNv.CondSwizzleX;
1193 break;
1194 case CHAN_Y:
1195 swizzle = inst->InstructionExtNv.CondSwizzleY;
1196 break;
1197 case CHAN_Z:
1198 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1199 break;
1200 case CHAN_W:
1201 swizzle = inst->InstructionExtNv.CondSwizzleW;
1202 break;
1203 default:
1204 assert( 0 );
1205 return;
1206 }
1207
1208 switch (swizzle) {
1209 case TGSI_SWIZZLE_X:
1210 shift = TGSI_EXEC_CC_X_SHIFT;
1211 mask = TGSI_EXEC_CC_X_MASK;
1212 break;
1213 case TGSI_SWIZZLE_Y:
1214 shift = TGSI_EXEC_CC_Y_SHIFT;
1215 mask = TGSI_EXEC_CC_Y_MASK;
1216 break;
1217 case TGSI_SWIZZLE_Z:
1218 shift = TGSI_EXEC_CC_Z_SHIFT;
1219 mask = TGSI_EXEC_CC_Z_MASK;
1220 break;
1221 case TGSI_SWIZZLE_W:
1222 shift = TGSI_EXEC_CC_W_SHIFT;
1223 mask = TGSI_EXEC_CC_W_MASK;
1224 break;
1225 default:
1226 assert( 0 );
1227 return;
1228 }
1229
1230 switch (inst->InstructionExtNv.CondMask) {
1231 case TGSI_CC_GT:
1232 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1233 for (i = 0; i < QUAD_SIZE; i++)
1234 if (cc->u[i] & test)
1235 execmask &= ~(1 << i);
1236 break;
1237
1238 case TGSI_CC_EQ:
1239 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1240 for (i = 0; i < QUAD_SIZE; i++)
1241 if (cc->u[i] & test)
1242 execmask &= ~(1 << i);
1243 break;
1244
1245 case TGSI_CC_LT:
1246 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1247 for (i = 0; i < QUAD_SIZE; i++)
1248 if (cc->u[i] & test)
1249 execmask &= ~(1 << i);
1250 break;
1251
1252 case TGSI_CC_GE:
1253 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1254 for (i = 0; i < QUAD_SIZE; i++)
1255 if (cc->u[i] & test)
1256 execmask &= ~(1 << i);
1257 break;
1258
1259 case TGSI_CC_LE:
1260 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1261 for (i = 0; i < QUAD_SIZE; i++)
1262 if (cc->u[i] & test)
1263 execmask &= ~(1 << i);
1264 break;
1265
1266 case TGSI_CC_NE:
1267 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1268 for (i = 0; i < QUAD_SIZE; i++)
1269 if (cc->u[i] & test)
1270 execmask &= ~(1 << i);
1271 break;
1272
1273 case TGSI_CC_TR:
1274 break;
1275
1276 case TGSI_CC_FL:
1277 for (i = 0; i < QUAD_SIZE; i++)
1278 execmask &= ~(1 << i);
1279 break;
1280
1281 default:
1282 assert( 0 );
1283 return;
1284 }
1285 }
1286
1287 switch (inst->Instruction.Saturate) {
1288 case TGSI_SAT_NONE:
1289 for (i = 0; i < QUAD_SIZE; i++)
1290 if (execmask & (1 << i))
1291 dst->i[i] = chan->i[i];
1292 break;
1293
1294 case TGSI_SAT_ZERO_ONE:
1295 for (i = 0; i < QUAD_SIZE; i++)
1296 if (execmask & (1 << i)) {
1297 if (chan->f[i] < 0.0f)
1298 dst->f[i] = 0.0f;
1299 else if (chan->f[i] > 1.0f)
1300 dst->f[i] = 1.0f;
1301 else
1302 dst->i[i] = chan->i[i];
1303 }
1304 break;
1305
1306 case TGSI_SAT_MINUS_PLUS_ONE:
1307 for (i = 0; i < QUAD_SIZE; i++)
1308 if (execmask & (1 << i)) {
1309 if (chan->f[i] < -1.0f)
1310 dst->f[i] = -1.0f;
1311 else if (chan->f[i] > 1.0f)
1312 dst->f[i] = 1.0f;
1313 else
1314 dst->i[i] = chan->i[i];
1315 }
1316 break;
1317
1318 default:
1319 assert( 0 );
1320 }
1321
1322 if (inst->InstructionExtNv.CondDstUpdate) {
1323 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1324 uint shift;
1325 uint mask;
1326
1327 /* Only CC0 supported.
1328 */
1329 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1330
1331 switch (chan_index) {
1332 case CHAN_X:
1333 shift = TGSI_EXEC_CC_X_SHIFT;
1334 mask = ~TGSI_EXEC_CC_X_MASK;
1335 break;
1336 case CHAN_Y:
1337 shift = TGSI_EXEC_CC_Y_SHIFT;
1338 mask = ~TGSI_EXEC_CC_Y_MASK;
1339 break;
1340 case CHAN_Z:
1341 shift = TGSI_EXEC_CC_Z_SHIFT;
1342 mask = ~TGSI_EXEC_CC_Z_MASK;
1343 break;
1344 case CHAN_W:
1345 shift = TGSI_EXEC_CC_W_SHIFT;
1346 mask = ~TGSI_EXEC_CC_W_MASK;
1347 break;
1348 default:
1349 assert( 0 );
1350 return;
1351 }
1352
1353 for (i = 0; i < QUAD_SIZE; i++)
1354 if (execmask & (1 << i)) {
1355 cc->u[i] &= mask;
1356 if (dst->f[i] < 0.0f)
1357 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1358 else if (dst->f[i] > 0.0f)
1359 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1360 else if (dst->f[i] == 0.0f)
1361 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1362 else
1363 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1364 }
1365 }
1366 }
1367
1368 #define FETCH(VAL,INDEX,CHAN)\
1369 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1370
1371 #define STORE(VAL,INDEX,CHAN)\
1372 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1373
1374
1375 /**
1376 * Execute ARB-style KIL which is predicated by a src register.
1377 * Kill fragment if any of the four values is less than zero.
1378 */
1379 static void
1380 exec_kil(struct tgsi_exec_machine *mach,
1381 const struct tgsi_full_instruction *inst)
1382 {
1383 uint uniquemask;
1384 uint chan_index;
1385 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1386 union tgsi_exec_channel r[1];
1387
1388 /* This mask stores component bits that were already tested. Note that
1389 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1390 * tested. */
1391 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1392
1393 for (chan_index = 0; chan_index < 4; chan_index++)
1394 {
1395 uint swizzle;
1396 uint i;
1397
1398 /* unswizzle channel */
1399 swizzle = tgsi_util_get_full_src_register_extswizzle (
1400 &inst->FullSrcRegisters[0],
1401 chan_index);
1402
1403 /* check if the component has not been already tested */
1404 if (uniquemask & (1 << swizzle))
1405 continue;
1406 uniquemask |= 1 << swizzle;
1407
1408 FETCH(&r[0], 0, chan_index);
1409 for (i = 0; i < 4; i++)
1410 if (r[0].f[i] < 0.0f)
1411 kilmask |= 1 << i;
1412 }
1413
1414 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1415 }
1416
1417 /**
1418 * Execute NVIDIA-style KIL which is predicated by a condition code.
1419 * Kill fragment if the condition code is TRUE.
1420 */
1421 static void
1422 exec_kilp(struct tgsi_exec_machine *mach,
1423 const struct tgsi_full_instruction *inst)
1424 {
1425 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1426
1427 if (inst->InstructionExtNv.CondFlowEnable) {
1428 uint swizzle[4];
1429 uint chan_index;
1430
1431 kilmask = 0x0;
1432
1433 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1434 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1435 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1436 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1437
1438 for (chan_index = 0; chan_index < 4; chan_index++)
1439 {
1440 uint i;
1441
1442 for (i = 0; i < 4; i++) {
1443 /* TODO: evaluate the condition code */
1444 if (0)
1445 kilmask |= 1 << i;
1446 }
1447 }
1448 }
1449 else {
1450 /* "unconditional" kil */
1451 kilmask = mach->ExecMask;
1452 }
1453 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1454 }
1455
1456
1457 /*
1458 * Fetch a texel using STR texture coordinates.
1459 */
1460 static void
1461 fetch_texel( struct tgsi_sampler *sampler,
1462 const union tgsi_exec_channel *s,
1463 const union tgsi_exec_channel *t,
1464 const union tgsi_exec_channel *p,
1465 float lodbias, /* XXX should be float[4] */
1466 union tgsi_exec_channel *r,
1467 union tgsi_exec_channel *g,
1468 union tgsi_exec_channel *b,
1469 union tgsi_exec_channel *a )
1470 {
1471 uint j;
1472 float rgba[NUM_CHANNELS][QUAD_SIZE];
1473
1474 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1475
1476 for (j = 0; j < 4; j++) {
1477 r->f[j] = rgba[0][j];
1478 g->f[j] = rgba[1][j];
1479 b->f[j] = rgba[2][j];
1480 a->f[j] = rgba[3][j];
1481 }
1482 }
1483
1484
1485 static void
1486 exec_tex(struct tgsi_exec_machine *mach,
1487 const struct tgsi_full_instruction *inst,
1488 boolean biasLod,
1489 boolean projected)
1490 {
1491 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1492 union tgsi_exec_channel r[8];
1493 uint chan_index;
1494 float lodBias;
1495
1496 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1497
1498 switch (inst->InstructionExtTexture.Texture) {
1499 case TGSI_TEXTURE_1D:
1500
1501 FETCH(&r[0], 0, CHAN_X);
1502
1503 if (projected) {
1504 FETCH(&r[1], 0, CHAN_W);
1505 micro_div( &r[0], &r[0], &r[1] );
1506 }
1507
1508 if (biasLod) {
1509 FETCH(&r[1], 0, CHAN_W);
1510 lodBias = r[2].f[0];
1511 }
1512 else
1513 lodBias = 0.0;
1514
1515 fetch_texel(&mach->Samplers[unit],
1516 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1517 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1518 break;
1519
1520 case TGSI_TEXTURE_2D:
1521 case TGSI_TEXTURE_RECT:
1522
1523 FETCH(&r[0], 0, CHAN_X);
1524 FETCH(&r[1], 0, CHAN_Y);
1525 FETCH(&r[2], 0, CHAN_Z);
1526
1527 if (projected) {
1528 FETCH(&r[3], 0, CHAN_W);
1529 micro_div( &r[0], &r[0], &r[3] );
1530 micro_div( &r[1], &r[1], &r[3] );
1531 micro_div( &r[2], &r[2], &r[3] );
1532 }
1533
1534 if (biasLod) {
1535 FETCH(&r[3], 0, CHAN_W);
1536 lodBias = r[3].f[0];
1537 }
1538 else
1539 lodBias = 0.0;
1540
1541 fetch_texel(&mach->Samplers[unit],
1542 &r[0], &r[1], &r[2], lodBias, /* inputs */
1543 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1544 break;
1545
1546 case TGSI_TEXTURE_3D:
1547 case TGSI_TEXTURE_CUBE:
1548
1549 FETCH(&r[0], 0, CHAN_X);
1550 FETCH(&r[1], 0, CHAN_Y);
1551 FETCH(&r[2], 0, CHAN_Z);
1552
1553 if (projected) {
1554 FETCH(&r[3], 0, CHAN_W);
1555 micro_div( &r[0], &r[0], &r[3] );
1556 micro_div( &r[1], &r[1], &r[3] );
1557 micro_div( &r[2], &r[2], &r[3] );
1558 }
1559
1560 if (biasLod) {
1561 FETCH(&r[3], 0, CHAN_W);
1562 lodBias = r[3].f[0];
1563 }
1564 else
1565 lodBias = 0.0;
1566
1567 fetch_texel(&mach->Samplers[unit],
1568 &r[0], &r[1], &r[2], lodBias,
1569 &r[0], &r[1], &r[2], &r[3]);
1570 break;
1571
1572 default:
1573 assert (0);
1574 }
1575
1576 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1577 STORE( &r[chan_index], 0, chan_index );
1578 }
1579 }
1580
1581
1582 /**
1583 * Evaluate a constant-valued coefficient at the position of the
1584 * current quad.
1585 */
1586 static void
1587 eval_constant_coef(
1588 struct tgsi_exec_machine *mach,
1589 unsigned attrib,
1590 unsigned chan )
1591 {
1592 unsigned i;
1593
1594 for( i = 0; i < QUAD_SIZE; i++ ) {
1595 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1596 }
1597 }
1598
1599 /**
1600 * Evaluate a linear-valued coefficient at the position of the
1601 * current quad.
1602 */
1603 static void
1604 eval_linear_coef(
1605 struct tgsi_exec_machine *mach,
1606 unsigned attrib,
1607 unsigned chan )
1608 {
1609 const float x = mach->QuadPos.xyzw[0].f[0];
1610 const float y = mach->QuadPos.xyzw[1].f[0];
1611 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1612 const float dady = mach->InterpCoefs[attrib].dady[chan];
1613 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1614 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1615 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1616 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1617 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1618 }
1619
1620 /**
1621 * Evaluate a perspective-valued coefficient at the position of the
1622 * current quad.
1623 */
1624 static void
1625 eval_perspective_coef(
1626 struct tgsi_exec_machine *mach,
1627 unsigned attrib,
1628 unsigned chan )
1629 {
1630 const float x = mach->QuadPos.xyzw[0].f[0];
1631 const float y = mach->QuadPos.xyzw[1].f[0];
1632 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1633 const float dady = mach->InterpCoefs[attrib].dady[chan];
1634 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1635 const float *w = mach->QuadPos.xyzw[3].f;
1636 /* divide by W here */
1637 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1638 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1639 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1640 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1641 }
1642
1643
1644 typedef void (* eval_coef_func)(
1645 struct tgsi_exec_machine *mach,
1646 unsigned attrib,
1647 unsigned chan );
1648
1649 static void
1650 exec_declaration(
1651 struct tgsi_exec_machine *mach,
1652 const struct tgsi_full_declaration *decl )
1653 {
1654 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1655 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1656 unsigned first, last, mask;
1657 eval_coef_func eval;
1658
1659 first = decl->DeclarationRange.First;
1660 last = decl->DeclarationRange.Last;
1661 mask = decl->Declaration.UsageMask;
1662
1663 switch( decl->Declaration.Interpolate ) {
1664 case TGSI_INTERPOLATE_CONSTANT:
1665 eval = eval_constant_coef;
1666 break;
1667
1668 case TGSI_INTERPOLATE_LINEAR:
1669 eval = eval_linear_coef;
1670 break;
1671
1672 case TGSI_INTERPOLATE_PERSPECTIVE:
1673 eval = eval_perspective_coef;
1674 break;
1675
1676 default:
1677 eval = NULL;
1678 assert( 0 );
1679 }
1680
1681 if( mask == TGSI_WRITEMASK_XYZW ) {
1682 unsigned i, j;
1683
1684 for( i = first; i <= last; i++ ) {
1685 for( j = 0; j < NUM_CHANNELS; j++ ) {
1686 eval( mach, i, j );
1687 }
1688 }
1689 }
1690 else {
1691 unsigned i, j;
1692
1693 for( j = 0; j < NUM_CHANNELS; j++ ) {
1694 if( mask & (1 << j) ) {
1695 for( i = first; i <= last; i++ ) {
1696 eval( mach, i, j );
1697 }
1698 }
1699 }
1700 }
1701 }
1702 }
1703 }
1704
1705 static void
1706 exec_instruction(
1707 struct tgsi_exec_machine *mach,
1708 const struct tgsi_full_instruction *inst,
1709 int *pc )
1710 {
1711 uint chan_index;
1712 union tgsi_exec_channel r[8];
1713
1714 (*pc)++;
1715
1716 switch (inst->Instruction.Opcode) {
1717 case TGSI_OPCODE_ARL:
1718 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1719 FETCH( &r[0], 0, chan_index );
1720 micro_f2it( &r[0], &r[0] );
1721 STORE( &r[0], 0, chan_index );
1722 }
1723 break;
1724
1725 case TGSI_OPCODE_MOV:
1726 case TGSI_OPCODE_SWZ:
1727 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1728 FETCH( &r[0], 0, chan_index );
1729 STORE( &r[0], 0, chan_index );
1730 }
1731 break;
1732
1733 case TGSI_OPCODE_LIT:
1734 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1735 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1736 }
1737
1738 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1739 FETCH( &r[0], 0, CHAN_X );
1740 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1741 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1742 STORE( &r[0], 0, CHAN_Y );
1743 }
1744
1745 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1746 FETCH( &r[1], 0, CHAN_Y );
1747 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1748
1749 FETCH( &r[2], 0, CHAN_W );
1750 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1751 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1752 micro_pow( &r[1], &r[1], &r[2] );
1753 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1754 STORE( &r[0], 0, CHAN_Z );
1755 }
1756 }
1757
1758 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1759 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1760 }
1761 break;
1762
1763 case TGSI_OPCODE_RCP:
1764 /* TGSI_OPCODE_RECIP */
1765 FETCH( &r[0], 0, CHAN_X );
1766 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1767 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1768 STORE( &r[0], 0, chan_index );
1769 }
1770 break;
1771
1772 case TGSI_OPCODE_RSQ:
1773 /* TGSI_OPCODE_RECIPSQRT */
1774 FETCH( &r[0], 0, CHAN_X );
1775 micro_sqrt( &r[0], &r[0] );
1776 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1777 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1778 STORE( &r[0], 0, chan_index );
1779 }
1780 break;
1781
1782 case TGSI_OPCODE_EXP:
1783 FETCH( &r[0], 0, CHAN_X );
1784 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1785 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1786 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1787 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1788 }
1789 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1790 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1791 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1792 }
1793 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1794 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1795 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1796 }
1797 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1798 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1799 }
1800 break;
1801
1802 case TGSI_OPCODE_LOG:
1803 FETCH( &r[0], 0, CHAN_X );
1804 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1805 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1806 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1807 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1808 STORE( &r[0], 0, CHAN_X );
1809 }
1810 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1811 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1812 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1813 STORE( &r[0], 0, CHAN_Y );
1814 }
1815 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1816 STORE( &r[1], 0, CHAN_Z );
1817 }
1818 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1819 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1820 }
1821 break;
1822
1823 case TGSI_OPCODE_MUL:
1824 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1825 {
1826 FETCH(&r[0], 0, chan_index);
1827 FETCH(&r[1], 1, chan_index);
1828
1829 micro_mul( &r[0], &r[0], &r[1] );
1830
1831 STORE(&r[0], 0, chan_index);
1832 }
1833 break;
1834
1835 case TGSI_OPCODE_ADD:
1836 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1837 FETCH( &r[0], 0, chan_index );
1838 FETCH( &r[1], 1, chan_index );
1839 micro_add( &r[0], &r[0], &r[1] );
1840 STORE( &r[0], 0, chan_index );
1841 }
1842 break;
1843
1844 case TGSI_OPCODE_DP3:
1845 /* TGSI_OPCODE_DOT3 */
1846 FETCH( &r[0], 0, CHAN_X );
1847 FETCH( &r[1], 1, CHAN_X );
1848 micro_mul( &r[0], &r[0], &r[1] );
1849
1850 FETCH( &r[1], 0, CHAN_Y );
1851 FETCH( &r[2], 1, CHAN_Y );
1852 micro_mul( &r[1], &r[1], &r[2] );
1853 micro_add( &r[0], &r[0], &r[1] );
1854
1855 FETCH( &r[1], 0, CHAN_Z );
1856 FETCH( &r[2], 1, CHAN_Z );
1857 micro_mul( &r[1], &r[1], &r[2] );
1858 micro_add( &r[0], &r[0], &r[1] );
1859
1860 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1861 STORE( &r[0], 0, chan_index );
1862 }
1863 break;
1864
1865 case TGSI_OPCODE_DP4:
1866 /* TGSI_OPCODE_DOT4 */
1867 FETCH(&r[0], 0, CHAN_X);
1868 FETCH(&r[1], 1, CHAN_X);
1869
1870 micro_mul( &r[0], &r[0], &r[1] );
1871
1872 FETCH(&r[1], 0, CHAN_Y);
1873 FETCH(&r[2], 1, CHAN_Y);
1874
1875 micro_mul( &r[1], &r[1], &r[2] );
1876 micro_add( &r[0], &r[0], &r[1] );
1877
1878 FETCH(&r[1], 0, CHAN_Z);
1879 FETCH(&r[2], 1, CHAN_Z);
1880
1881 micro_mul( &r[1], &r[1], &r[2] );
1882 micro_add( &r[0], &r[0], &r[1] );
1883
1884 FETCH(&r[1], 0, CHAN_W);
1885 FETCH(&r[2], 1, CHAN_W);
1886
1887 micro_mul( &r[1], &r[1], &r[2] );
1888 micro_add( &r[0], &r[0], &r[1] );
1889
1890 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1891 STORE( &r[0], 0, chan_index );
1892 }
1893 break;
1894
1895 case TGSI_OPCODE_DST:
1896 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1897 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1898 }
1899
1900 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1901 FETCH( &r[0], 0, CHAN_Y );
1902 FETCH( &r[1], 1, CHAN_Y);
1903 micro_mul( &r[0], &r[0], &r[1] );
1904 STORE( &r[0], 0, CHAN_Y );
1905 }
1906
1907 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1908 FETCH( &r[0], 0, CHAN_Z );
1909 STORE( &r[0], 0, CHAN_Z );
1910 }
1911
1912 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1913 FETCH( &r[0], 1, CHAN_W );
1914 STORE( &r[0], 0, CHAN_W );
1915 }
1916 break;
1917
1918 case TGSI_OPCODE_MIN:
1919 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1920 FETCH(&r[0], 0, chan_index);
1921 FETCH(&r[1], 1, chan_index);
1922
1923 /* XXX use micro_min()?? */
1924 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1925
1926 STORE(&r[0], 0, chan_index);
1927 }
1928 break;
1929
1930 case TGSI_OPCODE_MAX:
1931 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1932 FETCH(&r[0], 0, chan_index);
1933 FETCH(&r[1], 1, chan_index);
1934
1935 /* XXX use micro_max()?? */
1936 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1937
1938 STORE(&r[0], 0, chan_index );
1939 }
1940 break;
1941
1942 case TGSI_OPCODE_SLT:
1943 /* TGSI_OPCODE_SETLT */
1944 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1945 FETCH( &r[0], 0, chan_index );
1946 FETCH( &r[1], 1, chan_index );
1947 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1948 STORE( &r[0], 0, chan_index );
1949 }
1950 break;
1951
1952 case TGSI_OPCODE_SGE:
1953 /* TGSI_OPCODE_SETGE */
1954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1955 FETCH( &r[0], 0, chan_index );
1956 FETCH( &r[1], 1, chan_index );
1957 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1958 STORE( &r[0], 0, chan_index );
1959 }
1960 break;
1961
1962 case TGSI_OPCODE_MAD:
1963 /* TGSI_OPCODE_MADD */
1964 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1965 FETCH( &r[0], 0, chan_index );
1966 FETCH( &r[1], 1, chan_index );
1967 micro_mul( &r[0], &r[0], &r[1] );
1968 FETCH( &r[1], 2, chan_index );
1969 micro_add( &r[0], &r[0], &r[1] );
1970 STORE( &r[0], 0, chan_index );
1971 }
1972 break;
1973
1974 case TGSI_OPCODE_SUB:
1975 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1976 FETCH(&r[0], 0, chan_index);
1977 FETCH(&r[1], 1, chan_index);
1978
1979 micro_sub( &r[0], &r[0], &r[1] );
1980
1981 STORE(&r[0], 0, chan_index);
1982 }
1983 break;
1984
1985 case TGSI_OPCODE_LERP:
1986 /* TGSI_OPCODE_LRP */
1987 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1988 FETCH(&r[0], 0, chan_index);
1989 FETCH(&r[1], 1, chan_index);
1990 FETCH(&r[2], 2, chan_index);
1991
1992 micro_sub( &r[1], &r[1], &r[2] );
1993 micro_mul( &r[0], &r[0], &r[1] );
1994 micro_add( &r[0], &r[0], &r[2] );
1995
1996 STORE(&r[0], 0, chan_index);
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_CND:
2001 assert (0);
2002 break;
2003
2004 case TGSI_OPCODE_CND0:
2005 assert (0);
2006 break;
2007
2008 case TGSI_OPCODE_DOT2ADD:
2009 /* TGSI_OPCODE_DP2A */
2010 assert (0);
2011 break;
2012
2013 case TGSI_OPCODE_INDEX:
2014 assert (0);
2015 break;
2016
2017 case TGSI_OPCODE_NEGATE:
2018 assert (0);
2019 break;
2020
2021 case TGSI_OPCODE_FRAC:
2022 /* TGSI_OPCODE_FRC */
2023 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2024 FETCH( &r[0], 0, chan_index );
2025 micro_frc( &r[0], &r[0] );
2026 STORE( &r[0], 0, chan_index );
2027 }
2028 break;
2029
2030 case TGSI_OPCODE_CLAMP:
2031 assert (0);
2032 break;
2033
2034 case TGSI_OPCODE_FLOOR:
2035 /* TGSI_OPCODE_FLR */
2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2037 FETCH( &r[0], 0, chan_index );
2038 micro_flr( &r[0], &r[0] );
2039 STORE( &r[0], 0, chan_index );
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_ROUND:
2044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2045 FETCH( &r[0], 0, chan_index );
2046 micro_rnd( &r[0], &r[0] );
2047 STORE( &r[0], 0, chan_index );
2048 }
2049 break;
2050
2051 case TGSI_OPCODE_EXPBASE2:
2052 /* TGSI_OPCODE_EX2 */
2053 FETCH(&r[0], 0, CHAN_X);
2054
2055 #if FAST_MATH
2056 micro_exp2( &r[0], &r[0] );
2057 #else
2058 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2059 #endif
2060
2061 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2062 STORE( &r[0], 0, chan_index );
2063 }
2064 break;
2065
2066 case TGSI_OPCODE_LOGBASE2:
2067 /* TGSI_OPCODE_LG2 */
2068 FETCH( &r[0], 0, CHAN_X );
2069 micro_lg2( &r[0], &r[0] );
2070 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2071 STORE( &r[0], 0, chan_index );
2072 }
2073 break;
2074
2075 case TGSI_OPCODE_POWER:
2076 /* TGSI_OPCODE_POW */
2077 FETCH(&r[0], 0, CHAN_X);
2078 FETCH(&r[1], 1, CHAN_X);
2079
2080 micro_pow( &r[0], &r[0], &r[1] );
2081
2082 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2083 STORE( &r[0], 0, chan_index );
2084 }
2085 break;
2086
2087 case TGSI_OPCODE_CROSSPRODUCT:
2088 /* TGSI_OPCODE_XPD */
2089 FETCH(&r[0], 0, CHAN_Y);
2090 FETCH(&r[1], 1, CHAN_Z);
2091
2092 micro_mul( &r[2], &r[0], &r[1] );
2093
2094 FETCH(&r[3], 0, CHAN_Z);
2095 FETCH(&r[4], 1, CHAN_Y);
2096
2097 micro_mul( &r[5], &r[3], &r[4] );
2098 micro_sub( &r[2], &r[2], &r[5] );
2099
2100 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2101 STORE( &r[2], 0, CHAN_X );
2102 }
2103
2104 FETCH(&r[2], 1, CHAN_X);
2105
2106 micro_mul( &r[3], &r[3], &r[2] );
2107
2108 FETCH(&r[5], 0, CHAN_X);
2109
2110 micro_mul( &r[1], &r[1], &r[5] );
2111 micro_sub( &r[3], &r[3], &r[1] );
2112
2113 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2114 STORE( &r[3], 0, CHAN_Y );
2115 }
2116
2117 micro_mul( &r[5], &r[5], &r[4] );
2118 micro_mul( &r[0], &r[0], &r[2] );
2119 micro_sub( &r[5], &r[5], &r[0] );
2120
2121 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2122 STORE( &r[5], 0, CHAN_Z );
2123 }
2124
2125 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2126 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2127 }
2128 break;
2129
2130 case TGSI_OPCODE_MULTIPLYMATRIX:
2131 assert (0);
2132 break;
2133
2134 case TGSI_OPCODE_ABS:
2135 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2136 FETCH(&r[0], 0, chan_index);
2137
2138 micro_abs( &r[0], &r[0] );
2139
2140 STORE(&r[0], 0, chan_index);
2141 }
2142 break;
2143
2144 case TGSI_OPCODE_RCC:
2145 assert (0);
2146 break;
2147
2148 case TGSI_OPCODE_DPH:
2149 FETCH(&r[0], 0, CHAN_X);
2150 FETCH(&r[1], 1, CHAN_X);
2151
2152 micro_mul( &r[0], &r[0], &r[1] );
2153
2154 FETCH(&r[1], 0, CHAN_Y);
2155 FETCH(&r[2], 1, CHAN_Y);
2156
2157 micro_mul( &r[1], &r[1], &r[2] );
2158 micro_add( &r[0], &r[0], &r[1] );
2159
2160 FETCH(&r[1], 0, CHAN_Z);
2161 FETCH(&r[2], 1, CHAN_Z);
2162
2163 micro_mul( &r[1], &r[1], &r[2] );
2164 micro_add( &r[0], &r[0], &r[1] );
2165
2166 FETCH(&r[1], 1, CHAN_W);
2167
2168 micro_add( &r[0], &r[0], &r[1] );
2169
2170 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2171 STORE( &r[0], 0, chan_index );
2172 }
2173 break;
2174
2175 case TGSI_OPCODE_COS:
2176 FETCH(&r[0], 0, CHAN_X);
2177
2178 micro_cos( &r[0], &r[0] );
2179
2180 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2181 STORE( &r[0], 0, chan_index );
2182 }
2183 break;
2184
2185 case TGSI_OPCODE_DDX:
2186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2187 FETCH( &r[0], 0, chan_index );
2188 micro_ddx( &r[0], &r[0] );
2189 STORE( &r[0], 0, chan_index );
2190 }
2191 break;
2192
2193 case TGSI_OPCODE_DDY:
2194 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2195 FETCH( &r[0], 0, chan_index );
2196 micro_ddy( &r[0], &r[0] );
2197 STORE( &r[0], 0, chan_index );
2198 }
2199 break;
2200
2201 case TGSI_OPCODE_KILP:
2202 exec_kilp (mach, inst);
2203 break;
2204
2205 case TGSI_OPCODE_KIL:
2206 exec_kil (mach, inst);
2207 break;
2208
2209 case TGSI_OPCODE_PK2H:
2210 assert (0);
2211 break;
2212
2213 case TGSI_OPCODE_PK2US:
2214 assert (0);
2215 break;
2216
2217 case TGSI_OPCODE_PK4B:
2218 assert (0);
2219 break;
2220
2221 case TGSI_OPCODE_PK4UB:
2222 assert (0);
2223 break;
2224
2225 case TGSI_OPCODE_RFL:
2226 assert (0);
2227 break;
2228
2229 case TGSI_OPCODE_SEQ:
2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2231 FETCH( &r[0], 0, chan_index );
2232 FETCH( &r[1], 1, chan_index );
2233 micro_eq( &r[0], &r[0], &r[1],
2234 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2235 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2236 STORE( &r[0], 0, chan_index );
2237 }
2238 break;
2239
2240 case TGSI_OPCODE_SFL:
2241 assert (0);
2242 break;
2243
2244 case TGSI_OPCODE_SGT:
2245 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2246 FETCH( &r[0], 0, chan_index );
2247 FETCH( &r[1], 1, chan_index );
2248 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2249 STORE( &r[0], 0, chan_index );
2250 }
2251 break;
2252
2253 case TGSI_OPCODE_SIN:
2254 FETCH( &r[0], 0, CHAN_X );
2255 micro_sin( &r[0], &r[0] );
2256 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2257 STORE( &r[0], 0, chan_index );
2258 }
2259 break;
2260
2261 case TGSI_OPCODE_SLE:
2262 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2263 FETCH( &r[0], 0, chan_index );
2264 FETCH( &r[1], 1, chan_index );
2265 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2266 STORE( &r[0], 0, chan_index );
2267 }
2268 break;
2269
2270 case TGSI_OPCODE_SNE:
2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272 FETCH( &r[0], 0, chan_index );
2273 FETCH( &r[1], 1, chan_index );
2274 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2275 STORE( &r[0], 0, chan_index );
2276 }
2277 break;
2278
2279 case TGSI_OPCODE_STR:
2280 assert (0);
2281 break;
2282
2283 case TGSI_OPCODE_TEX:
2284 /* simple texture lookup */
2285 /* src[0] = texcoord */
2286 /* src[1] = sampler unit */
2287 exec_tex(mach, inst, FALSE, FALSE);
2288 break;
2289
2290 case TGSI_OPCODE_TXB:
2291 /* Texture lookup with lod bias */
2292 /* src[0] = texcoord (src[0].w = LOD bias) */
2293 /* src[1] = sampler unit */
2294 exec_tex(mach, inst, TRUE, FALSE);
2295 break;
2296
2297 case TGSI_OPCODE_TXD:
2298 /* Texture lookup with explict partial derivatives */
2299 /* src[0] = texcoord */
2300 /* src[1] = d[strq]/dx */
2301 /* src[2] = d[strq]/dy */
2302 /* src[3] = sampler unit */
2303 assert (0);
2304 break;
2305
2306 case TGSI_OPCODE_TXL:
2307 /* Texture lookup with explit LOD */
2308 /* src[0] = texcoord (src[0].w = LOD) */
2309 /* src[1] = sampler unit */
2310 exec_tex(mach, inst, TRUE, FALSE);
2311 break;
2312
2313 case TGSI_OPCODE_TXP:
2314 /* Texture lookup with projection */
2315 /* src[0] = texcoord (src[0].w = projection) */
2316 /* src[1] = sampler unit */
2317 exec_tex(mach, inst, FALSE, TRUE);
2318 break;
2319
2320 case TGSI_OPCODE_UP2H:
2321 assert (0);
2322 break;
2323
2324 case TGSI_OPCODE_UP2US:
2325 assert (0);
2326 break;
2327
2328 case TGSI_OPCODE_UP4B:
2329 assert (0);
2330 break;
2331
2332 case TGSI_OPCODE_UP4UB:
2333 assert (0);
2334 break;
2335
2336 case TGSI_OPCODE_X2D:
2337 assert (0);
2338 break;
2339
2340 case TGSI_OPCODE_ARA:
2341 assert (0);
2342 break;
2343
2344 case TGSI_OPCODE_ARR:
2345 assert (0);
2346 break;
2347
2348 case TGSI_OPCODE_BRA:
2349 assert (0);
2350 break;
2351
2352 case TGSI_OPCODE_CAL:
2353 /* skip the call if no execution channels are enabled */
2354 if (mach->ExecMask) {
2355 /* do the call */
2356
2357 /* push the Cond, Loop, Cont stacks */
2358 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2359 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2360 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2361 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2362 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2363 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2364
2365 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2366 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2367
2368 /* note that PC was already incremented above */
2369 mach->CallStack[mach->CallStackTop++] = *pc;
2370 *pc = inst->InstructionExtLabel.Label;
2371 }
2372 break;
2373
2374 case TGSI_OPCODE_RET:
2375 mach->FuncMask &= ~mach->ExecMask;
2376 UPDATE_EXEC_MASK(mach);
2377
2378 if (mach->FuncMask == 0x0) {
2379 /* really return now (otherwise, keep executing */
2380
2381 if (mach->CallStackTop == 0) {
2382 /* returning from main() */
2383 *pc = -1;
2384 return;
2385 }
2386 *pc = mach->CallStack[--mach->CallStackTop];
2387
2388 /* pop the Cond, Loop, Cont stacks */
2389 assert(mach->CondStackTop > 0);
2390 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2391 assert(mach->LoopStackTop > 0);
2392 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2393 assert(mach->ContStackTop > 0);
2394 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2395 assert(mach->FuncStackTop > 0);
2396 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2397
2398 UPDATE_EXEC_MASK(mach);
2399 }
2400 break;
2401
2402 case TGSI_OPCODE_SSG:
2403 assert (0);
2404 break;
2405
2406 case TGSI_OPCODE_CMP:
2407 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2408 FETCH(&r[0], 0, chan_index);
2409 FETCH(&r[1], 1, chan_index);
2410 FETCH(&r[2], 2, chan_index);
2411
2412 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2413
2414 STORE(&r[0], 0, chan_index);
2415 }
2416 break;
2417
2418 case TGSI_OPCODE_SCS:
2419 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2420 FETCH( &r[0], 0, CHAN_X );
2421 }
2422 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2423 micro_cos( &r[1], &r[0] );
2424 STORE( &r[1], 0, CHAN_X );
2425 }
2426 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2427 micro_sin( &r[1], &r[0] );
2428 STORE( &r[1], 0, CHAN_Y );
2429 }
2430 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2431 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2432 }
2433 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2434 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2435 }
2436 break;
2437
2438 case TGSI_OPCODE_NRM:
2439 assert (0);
2440 break;
2441
2442 case TGSI_OPCODE_DIV:
2443 assert( 0 );
2444 break;
2445
2446 case TGSI_OPCODE_DP2:
2447 FETCH( &r[0], 0, CHAN_X );
2448 FETCH( &r[1], 1, CHAN_X );
2449 micro_mul( &r[0], &r[0], &r[1] );
2450
2451 FETCH( &r[1], 0, CHAN_Y );
2452 FETCH( &r[2], 1, CHAN_Y );
2453 micro_mul( &r[1], &r[1], &r[2] );
2454 micro_add( &r[0], &r[0], &r[1] );
2455
2456 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2457 STORE( &r[0], 0, chan_index );
2458 }
2459 break;
2460
2461 case TGSI_OPCODE_IF:
2462 /* push CondMask */
2463 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2464 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2465 FETCH( &r[0], 0, CHAN_X );
2466 /* update CondMask */
2467 if( ! r[0].u[0] ) {
2468 mach->CondMask &= ~0x1;
2469 }
2470 if( ! r[0].u[1] ) {
2471 mach->CondMask &= ~0x2;
2472 }
2473 if( ! r[0].u[2] ) {
2474 mach->CondMask &= ~0x4;
2475 }
2476 if( ! r[0].u[3] ) {
2477 mach->CondMask &= ~0x8;
2478 }
2479 UPDATE_EXEC_MASK(mach);
2480 /* Todo: If CondMask==0, jump to ELSE */
2481 break;
2482
2483 case TGSI_OPCODE_ELSE:
2484 /* invert CondMask wrt previous mask */
2485 {
2486 uint prevMask;
2487 assert(mach->CondStackTop > 0);
2488 prevMask = mach->CondStack[mach->CondStackTop - 1];
2489 mach->CondMask = ~mach->CondMask & prevMask;
2490 UPDATE_EXEC_MASK(mach);
2491 /* Todo: If CondMask==0, jump to ENDIF */
2492 }
2493 break;
2494
2495 case TGSI_OPCODE_ENDIF:
2496 /* pop CondMask */
2497 assert(mach->CondStackTop > 0);
2498 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2499 UPDATE_EXEC_MASK(mach);
2500 break;
2501
2502 case TGSI_OPCODE_END:
2503 /* halt execution */
2504 *pc = -1;
2505 break;
2506
2507 case TGSI_OPCODE_REP:
2508 assert (0);
2509 break;
2510
2511 case TGSI_OPCODE_ENDREP:
2512 assert (0);
2513 break;
2514
2515 case TGSI_OPCODE_PUSHA:
2516 assert (0);
2517 break;
2518
2519 case TGSI_OPCODE_POPA:
2520 assert (0);
2521 break;
2522
2523 case TGSI_OPCODE_CEIL:
2524 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2525 FETCH( &r[0], 0, chan_index );
2526 micro_ceil( &r[0], &r[0] );
2527 STORE( &r[0], 0, chan_index );
2528 }
2529 break;
2530
2531 case TGSI_OPCODE_I2F:
2532 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2533 FETCH( &r[0], 0, chan_index );
2534 micro_i2f( &r[0], &r[0] );
2535 STORE( &r[0], 0, chan_index );
2536 }
2537 break;
2538
2539 case TGSI_OPCODE_NOT:
2540 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2541 FETCH( &r[0], 0, chan_index );
2542 micro_not( &r[0], &r[0] );
2543 STORE( &r[0], 0, chan_index );
2544 }
2545 break;
2546
2547 case TGSI_OPCODE_TRUNC:
2548 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2549 FETCH( &r[0], 0, chan_index );
2550 micro_trunc( &r[0], &r[0] );
2551 STORE( &r[0], 0, chan_index );
2552 }
2553 break;
2554
2555 case TGSI_OPCODE_SHL:
2556 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2557 FETCH( &r[0], 0, chan_index );
2558 FETCH( &r[1], 1, chan_index );
2559 micro_shl( &r[0], &r[0], &r[1] );
2560 STORE( &r[0], 0, chan_index );
2561 }
2562 break;
2563
2564 case TGSI_OPCODE_SHR:
2565 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2566 FETCH( &r[0], 0, chan_index );
2567 FETCH( &r[1], 1, chan_index );
2568 micro_ishr( &r[0], &r[0], &r[1] );
2569 STORE( &r[0], 0, chan_index );
2570 }
2571 break;
2572
2573 case TGSI_OPCODE_AND:
2574 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2575 FETCH( &r[0], 0, chan_index );
2576 FETCH( &r[1], 1, chan_index );
2577 micro_and( &r[0], &r[0], &r[1] );
2578 STORE( &r[0], 0, chan_index );
2579 }
2580 break;
2581
2582 case TGSI_OPCODE_OR:
2583 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2584 FETCH( &r[0], 0, chan_index );
2585 FETCH( &r[1], 1, chan_index );
2586 micro_or( &r[0], &r[0], &r[1] );
2587 STORE( &r[0], 0, chan_index );
2588 }
2589 break;
2590
2591 case TGSI_OPCODE_MOD:
2592 assert (0);
2593 break;
2594
2595 case TGSI_OPCODE_XOR:
2596 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2597 FETCH( &r[0], 0, chan_index );
2598 FETCH( &r[1], 1, chan_index );
2599 micro_xor( &r[0], &r[0], &r[1] );
2600 STORE( &r[0], 0, chan_index );
2601 }
2602 break;
2603
2604 case TGSI_OPCODE_SAD:
2605 assert (0);
2606 break;
2607
2608 case TGSI_OPCODE_TXF:
2609 assert (0);
2610 break;
2611
2612 case TGSI_OPCODE_TXQ:
2613 assert (0);
2614 break;
2615
2616 case TGSI_OPCODE_EMIT:
2617 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2618 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2619 break;
2620
2621 case TGSI_OPCODE_ENDPRIM:
2622 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2623 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2624 break;
2625
2626 case TGSI_OPCODE_LOOP:
2627 /* fall-through (for now) */
2628 case TGSI_OPCODE_BGNLOOP2:
2629 /* push LoopMask and ContMasks */
2630 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2631 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2632 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2633 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2634 break;
2635
2636 case TGSI_OPCODE_ENDLOOP:
2637 /* fall-through (for now at least) */
2638 case TGSI_OPCODE_ENDLOOP2:
2639 /* Restore ContMask, but don't pop */
2640 assert(mach->ContStackTop > 0);
2641 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2642 UPDATE_EXEC_MASK(mach);
2643 if (mach->ExecMask) {
2644 /* repeat loop: jump to instruction just past BGNLOOP */
2645 *pc = inst->InstructionExtLabel.Label + 1;
2646 }
2647 else {
2648 /* exit loop: pop LoopMask */
2649 assert(mach->LoopStackTop > 0);
2650 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2651 /* pop ContMask */
2652 assert(mach->ContStackTop > 0);
2653 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2654 }
2655 UPDATE_EXEC_MASK(mach);
2656 break;
2657
2658 case TGSI_OPCODE_BRK:
2659 /* turn off loop channels for each enabled exec channel */
2660 mach->LoopMask &= ~mach->ExecMask;
2661 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2662 UPDATE_EXEC_MASK(mach);
2663 break;
2664
2665 case TGSI_OPCODE_CONT:
2666 /* turn off cont channels for each enabled exec channel */
2667 mach->ContMask &= ~mach->ExecMask;
2668 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2669 UPDATE_EXEC_MASK(mach);
2670 break;
2671
2672 case TGSI_OPCODE_BGNSUB:
2673 /* no-op */
2674 break;
2675
2676 case TGSI_OPCODE_ENDSUB:
2677 /* no-op */
2678 break;
2679
2680 case TGSI_OPCODE_NOISE1:
2681 assert( 0 );
2682 break;
2683
2684 case TGSI_OPCODE_NOISE2:
2685 assert( 0 );
2686 break;
2687
2688 case TGSI_OPCODE_NOISE3:
2689 assert( 0 );
2690 break;
2691
2692 case TGSI_OPCODE_NOISE4:
2693 assert( 0 );
2694 break;
2695
2696 case TGSI_OPCODE_NOP:
2697 break;
2698
2699 default:
2700 assert( 0 );
2701 }
2702 }
2703
2704
2705 /**
2706 * Run TGSI interpreter.
2707 * \return bitmask of "alive" quad components
2708 */
2709 uint
2710 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2711 {
2712 uint i;
2713 int pc = 0;
2714
2715 mach->CondMask = 0xf;
2716 mach->LoopMask = 0xf;
2717 mach->ContMask = 0xf;
2718 mach->FuncMask = 0xf;
2719 mach->ExecMask = 0xf;
2720
2721 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2722 assert(mach->CondStackTop == 0);
2723 assert(mach->LoopStackTop == 0);
2724 assert(mach->ContStackTop == 0);
2725 assert(mach->CallStackTop == 0);
2726
2727 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2728 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2729
2730 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2731 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2732 mach->Primitives[0] = 0;
2733 }
2734
2735 for (i = 0; i < QUAD_SIZE; i++) {
2736 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2737 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2738 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2739 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2740 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2741 }
2742
2743 /* execute declarations (interpolants) */
2744 for (i = 0; i < mach->NumDeclarations; i++) {
2745 exec_declaration( mach, mach->Declarations+i );
2746 }
2747
2748 /* execute instructions, until pc is set to -1 */
2749 while (pc != -1) {
2750 assert(pc < (int) mach->NumInstructions);
2751 exec_instruction( mach, mach->Instructions + pc, &pc );
2752 }
2753
2754 #if 0
2755 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2756 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2757 /*
2758 * Scale back depth component.
2759 */
2760 for (i = 0; i < 4; i++)
2761 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2762 }
2763 #endif
2764
2765 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2766 }
2767
2768