Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler **samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2ut(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->u[0] = (uint) src->f[0];
475 dst->u[1] = (uint) src->f[1];
476 dst->u[2] = (uint) src->f[2];
477 dst->u[3] = (uint) src->f[3];
478 }
479
480 static void
481 micro_flr(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->f[0] = floorf( src->f[0] );
486 dst->f[1] = floorf( src->f[1] );
487 dst->f[2] = floorf( src->f[2] );
488 dst->f[3] = floorf( src->f[3] );
489 }
490
491 static void
492 micro_frc(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = src->f[0] - floorf( src->f[0] );
497 dst->f[1] = src->f[1] - floorf( src->f[1] );
498 dst->f[2] = src->f[2] - floorf( src->f[2] );
499 dst->f[3] = src->f[3] - floorf( src->f[3] );
500 }
501
502 static void
503 micro_ge(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src0,
506 const union tgsi_exec_channel *src1,
507 const union tgsi_exec_channel *src2,
508 const union tgsi_exec_channel *src3 )
509 {
510 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
511 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
512 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
513 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
514 }
515
516 static void
517 micro_i2f(
518 union tgsi_exec_channel *dst,
519 const union tgsi_exec_channel *src )
520 {
521 dst->f[0] = (float) src->i[0];
522 dst->f[1] = (float) src->i[1];
523 dst->f[2] = (float) src->i[2];
524 dst->f[3] = (float) src->i[3];
525 }
526
527 static void
528 micro_lg2(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 #if FAST_MATH
533 dst->f[0] = util_fast_log2( src->f[0] );
534 dst->f[1] = util_fast_log2( src->f[1] );
535 dst->f[2] = util_fast_log2( src->f[2] );
536 dst->f[3] = util_fast_log2( src->f[3] );
537 #else
538 dst->f[0] = logf( src->f[0] ) * 1.442695f;
539 dst->f[1] = logf( src->f[1] ) * 1.442695f;
540 dst->f[2] = logf( src->f[2] ) * 1.442695f;
541 dst->f[3] = logf( src->f[3] ) * 1.442695f;
542 #endif
543 }
544
545 static void
546 micro_le(
547 union tgsi_exec_channel *dst,
548 const union tgsi_exec_channel *src0,
549 const union tgsi_exec_channel *src1,
550 const union tgsi_exec_channel *src2,
551 const union tgsi_exec_channel *src3 )
552 {
553 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
554 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
555 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
556 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
557 }
558
559 static void
560 micro_lt(
561 union tgsi_exec_channel *dst,
562 const union tgsi_exec_channel *src0,
563 const union tgsi_exec_channel *src1,
564 const union tgsi_exec_channel *src2,
565 const union tgsi_exec_channel *src3 )
566 {
567 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
568 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
569 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
570 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
571 }
572
573 static void
574 micro_ilt(
575 union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src0,
577 const union tgsi_exec_channel *src1,
578 const union tgsi_exec_channel *src2,
579 const union tgsi_exec_channel *src3 )
580 {
581 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
582 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
583 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
584 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
585 }
586
587 static void
588 micro_ult(
589 union tgsi_exec_channel *dst,
590 const union tgsi_exec_channel *src0,
591 const union tgsi_exec_channel *src1,
592 const union tgsi_exec_channel *src2,
593 const union tgsi_exec_channel *src3 )
594 {
595 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
596 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
597 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
598 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
599 }
600
601 static void
602 micro_max(
603 union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src0,
605 const union tgsi_exec_channel *src1 )
606 {
607 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
608 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
609 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
610 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
611 }
612
613 static void
614 micro_imax(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
620 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
621 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
622 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
623 }
624
625 static void
626 micro_umax(
627 union tgsi_exec_channel *dst,
628 const union tgsi_exec_channel *src0,
629 const union tgsi_exec_channel *src1 )
630 {
631 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
632 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
633 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
634 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
635 }
636
637 static void
638 micro_min(
639 union tgsi_exec_channel *dst,
640 const union tgsi_exec_channel *src0,
641 const union tgsi_exec_channel *src1 )
642 {
643 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
644 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
645 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
646 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
647 }
648
649 static void
650 micro_imin(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src0,
653 const union tgsi_exec_channel *src1 )
654 {
655 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
656 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
657 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
658 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
659 }
660
661 static void
662 micro_umin(
663 union tgsi_exec_channel *dst,
664 const union tgsi_exec_channel *src0,
665 const union tgsi_exec_channel *src1 )
666 {
667 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
668 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
669 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
670 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
671 }
672
673 static void
674 micro_umod(
675 union tgsi_exec_channel *dst,
676 const union tgsi_exec_channel *src0,
677 const union tgsi_exec_channel *src1 )
678 {
679 dst->u[0] = src0->u[0] % src1->u[0];
680 dst->u[1] = src0->u[1] % src1->u[1];
681 dst->u[2] = src0->u[2] % src1->u[2];
682 dst->u[3] = src0->u[3] % src1->u[3];
683 }
684
685 static void
686 micro_mul(
687 union tgsi_exec_channel *dst,
688 const union tgsi_exec_channel *src0,
689 const union tgsi_exec_channel *src1 )
690 {
691 dst->f[0] = src0->f[0] * src1->f[0];
692 dst->f[1] = src0->f[1] * src1->f[1];
693 dst->f[2] = src0->f[2] * src1->f[2];
694 dst->f[3] = src0->f[3] * src1->f[3];
695 }
696
697 static void
698 micro_imul(
699 union tgsi_exec_channel *dst,
700 const union tgsi_exec_channel *src0,
701 const union tgsi_exec_channel *src1 )
702 {
703 dst->i[0] = src0->i[0] * src1->i[0];
704 dst->i[1] = src0->i[1] * src1->i[1];
705 dst->i[2] = src0->i[2] * src1->i[2];
706 dst->i[3] = src0->i[3] * src1->i[3];
707 }
708
709 static void
710 micro_imul64(
711 union tgsi_exec_channel *dst0,
712 union tgsi_exec_channel *dst1,
713 const union tgsi_exec_channel *src0,
714 const union tgsi_exec_channel *src1 )
715 {
716 dst1->i[0] = src0->i[0] * src1->i[0];
717 dst1->i[1] = src0->i[1] * src1->i[1];
718 dst1->i[2] = src0->i[2] * src1->i[2];
719 dst1->i[3] = src0->i[3] * src1->i[3];
720 dst0->i[0] = 0;
721 dst0->i[1] = 0;
722 dst0->i[2] = 0;
723 dst0->i[3] = 0;
724 }
725
726 static void
727 micro_umul64(
728 union tgsi_exec_channel *dst0,
729 union tgsi_exec_channel *dst1,
730 const union tgsi_exec_channel *src0,
731 const union tgsi_exec_channel *src1 )
732 {
733 dst1->u[0] = src0->u[0] * src1->u[0];
734 dst1->u[1] = src0->u[1] * src1->u[1];
735 dst1->u[2] = src0->u[2] * src1->u[2];
736 dst1->u[3] = src0->u[3] * src1->u[3];
737 dst0->u[0] = 0;
738 dst0->u[1] = 0;
739 dst0->u[2] = 0;
740 dst0->u[3] = 0;
741 }
742
743 static void
744 micro_movc(
745 union tgsi_exec_channel *dst,
746 const union tgsi_exec_channel *src0,
747 const union tgsi_exec_channel *src1,
748 const union tgsi_exec_channel *src2 )
749 {
750 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
751 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
752 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
753 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
754 }
755
756 static void
757 micro_neg(
758 union tgsi_exec_channel *dst,
759 const union tgsi_exec_channel *src )
760 {
761 dst->f[0] = -src->f[0];
762 dst->f[1] = -src->f[1];
763 dst->f[2] = -src->f[2];
764 dst->f[3] = -src->f[3];
765 }
766
767 static void
768 micro_ineg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->i[0] = -src->i[0];
773 dst->i[1] = -src->i[1];
774 dst->i[2] = -src->i[2];
775 dst->i[3] = -src->i[3];
776 }
777
778 static void
779 micro_not(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->u[0] = ~src->u[0];
784 dst->u[1] = ~src->u[1];
785 dst->u[2] = ~src->u[2];
786 dst->u[3] = ~src->u[3];
787 }
788
789 static void
790 micro_or(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src0,
793 const union tgsi_exec_channel *src1 )
794 {
795 dst->u[0] = src0->u[0] | src1->u[0];
796 dst->u[1] = src0->u[1] | src1->u[1];
797 dst->u[2] = src0->u[2] | src1->u[2];
798 dst->u[3] = src0->u[3] | src1->u[3];
799 }
800
801 static void
802 micro_pow(
803 union tgsi_exec_channel *dst,
804 const union tgsi_exec_channel *src0,
805 const union tgsi_exec_channel *src1 )
806 {
807 #if FAST_MATH
808 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
809 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
810 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
811 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
812 #else
813 dst->f[0] = powf( src0->f[0], src1->f[0] );
814 dst->f[1] = powf( src0->f[1], src1->f[1] );
815 dst->f[2] = powf( src0->f[2], src1->f[2] );
816 dst->f[3] = powf( src0->f[3], src1->f[3] );
817 #endif
818 }
819
820 static void
821 micro_rnd(
822 union tgsi_exec_channel *dst,
823 const union tgsi_exec_channel *src )
824 {
825 dst->f[0] = floorf( src->f[0] + 0.5f );
826 dst->f[1] = floorf( src->f[1] + 0.5f );
827 dst->f[2] = floorf( src->f[2] + 0.5f );
828 dst->f[3] = floorf( src->f[3] + 0.5f );
829 }
830
831 static void
832 micro_sgn(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
837 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
838 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
839 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
840 }
841
842 static void
843 micro_shl(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src0,
846 const union tgsi_exec_channel *src1 )
847 {
848 dst->i[0] = src0->i[0] << src1->i[0];
849 dst->i[1] = src0->i[1] << src1->i[1];
850 dst->i[2] = src0->i[2] << src1->i[2];
851 dst->i[3] = src0->i[3] << src1->i[3];
852 }
853
854 static void
855 micro_ishr(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->i[0] = src0->i[0] >> src1->i[0];
861 dst->i[1] = src0->i[1] >> src1->i[1];
862 dst->i[2] = src0->i[2] >> src1->i[2];
863 dst->i[3] = src0->i[3] >> src1->i[3];
864 }
865
866 static void
867 micro_trunc(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0 )
870 {
871 dst->f[0] = (float) (int) src0->f[0];
872 dst->f[1] = (float) (int) src0->f[1];
873 dst->f[2] = (float) (int) src0->f[2];
874 dst->f[3] = (float) (int) src0->f[3];
875 }
876
877 static void
878 micro_ushr(
879 union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src0,
881 const union tgsi_exec_channel *src1 )
882 {
883 dst->u[0] = src0->u[0] >> src1->u[0];
884 dst->u[1] = src0->u[1] >> src1->u[1];
885 dst->u[2] = src0->u[2] >> src1->u[2];
886 dst->u[3] = src0->u[3] >> src1->u[3];
887 }
888
889 static void
890 micro_sin(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src )
893 {
894 dst->f[0] = sinf( src->f[0] );
895 dst->f[1] = sinf( src->f[1] );
896 dst->f[2] = sinf( src->f[2] );
897 dst->f[3] = sinf( src->f[3] );
898 }
899
900 static void
901 micro_sqrt( union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src )
903 {
904 dst->f[0] = sqrtf( src->f[0] );
905 dst->f[1] = sqrtf( src->f[1] );
906 dst->f[2] = sqrtf( src->f[2] );
907 dst->f[3] = sqrtf( src->f[3] );
908 }
909
910 static void
911 micro_sub(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst->f[0] = src0->f[0] - src1->f[0];
917 dst->f[1] = src0->f[1] - src1->f[1];
918 dst->f[2] = src0->f[2] - src1->f[2];
919 dst->f[3] = src0->f[3] - src1->f[3];
920 }
921
922 static void
923 micro_u2f(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src )
926 {
927 dst->f[0] = (float) src->u[0];
928 dst->f[1] = (float) src->u[1];
929 dst->f[2] = (float) src->u[2];
930 dst->f[3] = (float) src->u[3];
931 }
932
933 static void
934 micro_xor(
935 union tgsi_exec_channel *dst,
936 const union tgsi_exec_channel *src0,
937 const union tgsi_exec_channel *src1 )
938 {
939 dst->u[0] = src0->u[0] ^ src1->u[0];
940 dst->u[1] = src0->u[1] ^ src1->u[1];
941 dst->u[2] = src0->u[2] ^ src1->u[2];
942 dst->u[3] = src0->u[3] ^ src1->u[3];
943 }
944
945 static void
946 fetch_src_file_channel(
947 const struct tgsi_exec_machine *mach,
948 const uint file,
949 const uint swizzle,
950 const union tgsi_exec_channel *index,
951 union tgsi_exec_channel *chan )
952 {
953 switch( swizzle ) {
954 case TGSI_EXTSWIZZLE_X:
955 case TGSI_EXTSWIZZLE_Y:
956 case TGSI_EXTSWIZZLE_Z:
957 case TGSI_EXTSWIZZLE_W:
958 switch( file ) {
959 case TGSI_FILE_CONSTANT:
960 assert(mach->Consts);
961 if (index->i[0] < 0)
962 chan->f[0] = 0.0f;
963 else
964 chan->f[0] = mach->Consts[index->i[0]][swizzle];
965 if (index->i[1] < 0)
966 chan->f[1] = 0.0f;
967 else
968 chan->f[1] = mach->Consts[index->i[1]][swizzle];
969 if (index->i[2] < 0)
970 chan->f[2] = 0.0f;
971 else
972 chan->f[2] = mach->Consts[index->i[2]][swizzle];
973 if (index->i[3] < 0)
974 chan->f[3] = 0.0f;
975 else
976 chan->f[3] = mach->Consts[index->i[3]][swizzle];
977 break;
978
979 case TGSI_FILE_INPUT:
980 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
981 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
982 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
983 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
984 break;
985
986 case TGSI_FILE_TEMPORARY:
987 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
988 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
989 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
990 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
991 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
992 break;
993
994 case TGSI_FILE_IMMEDIATE:
995 assert( index->i[0] < (int) mach->ImmLimit );
996 chan->f[0] = mach->Imms[index->i[0]][swizzle];
997 assert( index->i[1] < (int) mach->ImmLimit );
998 chan->f[1] = mach->Imms[index->i[1]][swizzle];
999 assert( index->i[2] < (int) mach->ImmLimit );
1000 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1001 assert( index->i[3] < (int) mach->ImmLimit );
1002 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1003 break;
1004
1005 case TGSI_FILE_ADDRESS:
1006 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1007 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1008 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1009 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1010 break;
1011
1012 case TGSI_FILE_OUTPUT:
1013 /* vertex/fragment output vars can be read too */
1014 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1015 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1016 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1017 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1018 break;
1019
1020 default:
1021 assert( 0 );
1022 }
1023 break;
1024
1025 case TGSI_EXTSWIZZLE_ZERO:
1026 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1027 break;
1028
1029 case TGSI_EXTSWIZZLE_ONE:
1030 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1031 break;
1032
1033 default:
1034 assert( 0 );
1035 }
1036 }
1037
1038 static void
1039 fetch_source(
1040 const struct tgsi_exec_machine *mach,
1041 union tgsi_exec_channel *chan,
1042 const struct tgsi_full_src_register *reg,
1043 const uint chan_index )
1044 {
1045 union tgsi_exec_channel index;
1046 uint swizzle;
1047
1048 /* We start with a direct index into a register file.
1049 *
1050 * file[1],
1051 * where:
1052 * file = SrcRegister.File
1053 * [1] = SrcRegister.Index
1054 */
1055 index.i[0] =
1056 index.i[1] =
1057 index.i[2] =
1058 index.i[3] = reg->SrcRegister.Index;
1059
1060 /* There is an extra source register that indirectly subscripts
1061 * a register file. The direct index now becomes an offset
1062 * that is being added to the indirect register.
1063 *
1064 * file[ind[2].x+1],
1065 * where:
1066 * ind = SrcRegisterInd.File
1067 * [2] = SrcRegisterInd.Index
1068 * .x = SrcRegisterInd.SwizzleX
1069 */
1070 if (reg->SrcRegister.Indirect) {
1071 union tgsi_exec_channel index2;
1072 union tgsi_exec_channel indir_index;
1073 const uint execmask = mach->ExecMask;
1074 uint i;
1075
1076 /* which address register (always zero now) */
1077 index2.i[0] =
1078 index2.i[1] =
1079 index2.i[2] =
1080 index2.i[3] = reg->SrcRegisterInd.Index;
1081
1082 /* get current value of address register[swizzle] */
1083 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1084 fetch_src_file_channel(
1085 mach,
1086 reg->SrcRegisterInd.File,
1087 swizzle,
1088 &index2,
1089 &indir_index );
1090
1091 /* add value of address register to the offset */
1092 index.i[0] += (int) indir_index.f[0];
1093 index.i[1] += (int) indir_index.f[1];
1094 index.i[2] += (int) indir_index.f[2];
1095 index.i[3] += (int) indir_index.f[3];
1096
1097 /* for disabled execution channels, zero-out the index to
1098 * avoid using a potential garbage value.
1099 */
1100 for (i = 0; i < QUAD_SIZE; i++) {
1101 if ((execmask & (1 << i)) == 0)
1102 index.i[i] = 0;
1103 }
1104 }
1105
1106 /* There is an extra source register that is a second
1107 * subscript to a register file. Effectively it means that
1108 * the register file is actually a 2D array of registers.
1109 *
1110 * file[1][3] == file[1*sizeof(file[1])+3],
1111 * where:
1112 * [3] = SrcRegisterDim.Index
1113 */
1114 if (reg->SrcRegister.Dimension) {
1115 /* The size of the first-order array depends on the register file type.
1116 * We need to multiply the index to the first array to get an effective,
1117 * "flat" index that points to the beginning of the second-order array.
1118 */
1119 switch (reg->SrcRegister.File) {
1120 case TGSI_FILE_INPUT:
1121 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1122 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1123 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1124 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1125 break;
1126 case TGSI_FILE_CONSTANT:
1127 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1128 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1129 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1130 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1131 break;
1132 default:
1133 assert( 0 );
1134 }
1135
1136 index.i[0] += reg->SrcRegisterDim.Index;
1137 index.i[1] += reg->SrcRegisterDim.Index;
1138 index.i[2] += reg->SrcRegisterDim.Index;
1139 index.i[3] += reg->SrcRegisterDim.Index;
1140
1141 /* Again, the second subscript index can be addressed indirectly
1142 * identically to the first one.
1143 * Nothing stops us from indirectly addressing the indirect register,
1144 * but there is no need for that, so we won't exercise it.
1145 *
1146 * file[1][ind[4].y+3],
1147 * where:
1148 * ind = SrcRegisterDimInd.File
1149 * [4] = SrcRegisterDimInd.Index
1150 * .y = SrcRegisterDimInd.SwizzleX
1151 */
1152 if (reg->SrcRegisterDim.Indirect) {
1153 union tgsi_exec_channel index2;
1154 union tgsi_exec_channel indir_index;
1155 const uint execmask = mach->ExecMask;
1156 uint i;
1157
1158 index2.i[0] =
1159 index2.i[1] =
1160 index2.i[2] =
1161 index2.i[3] = reg->SrcRegisterDimInd.Index;
1162
1163 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1164 fetch_src_file_channel(
1165 mach,
1166 reg->SrcRegisterDimInd.File,
1167 swizzle,
1168 &index2,
1169 &indir_index );
1170
1171 index.i[0] += (int) indir_index.f[0];
1172 index.i[1] += (int) indir_index.f[1];
1173 index.i[2] += (int) indir_index.f[2];
1174 index.i[3] += (int) indir_index.f[3];
1175
1176 /* for disabled execution channels, zero-out the index to
1177 * avoid using a potential garbage value.
1178 */
1179 for (i = 0; i < QUAD_SIZE; i++) {
1180 if ((execmask & (1 << i)) == 0)
1181 index.i[i] = 0;
1182 }
1183 }
1184
1185 /* If by any chance there was a need for a 3D array of register
1186 * files, we would have to check whether SrcRegisterDim is followed
1187 * by a dimension register and continue the saga.
1188 */
1189 }
1190
1191 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1192 fetch_src_file_channel(
1193 mach,
1194 reg->SrcRegister.File,
1195 swizzle,
1196 &index,
1197 chan );
1198
1199 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1200 case TGSI_UTIL_SIGN_CLEAR:
1201 micro_abs( chan, chan );
1202 break;
1203
1204 case TGSI_UTIL_SIGN_SET:
1205 micro_abs( chan, chan );
1206 micro_neg( chan, chan );
1207 break;
1208
1209 case TGSI_UTIL_SIGN_TOGGLE:
1210 micro_neg( chan, chan );
1211 break;
1212
1213 case TGSI_UTIL_SIGN_KEEP:
1214 break;
1215 }
1216
1217 if (reg->SrcRegisterExtMod.Complement) {
1218 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1219 }
1220 }
1221
1222 static void
1223 store_dest(
1224 struct tgsi_exec_machine *mach,
1225 const union tgsi_exec_channel *chan,
1226 const struct tgsi_full_dst_register *reg,
1227 const struct tgsi_full_instruction *inst,
1228 uint chan_index )
1229 {
1230 uint i;
1231 union tgsi_exec_channel null;
1232 union tgsi_exec_channel *dst;
1233 uint execmask = mach->ExecMask;
1234
1235 switch (reg->DstRegister.File) {
1236 case TGSI_FILE_NULL:
1237 dst = &null;
1238 break;
1239
1240 case TGSI_FILE_OUTPUT:
1241 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1242 + reg->DstRegister.Index].xyzw[chan_index];
1243 break;
1244
1245 case TGSI_FILE_TEMPORARY:
1246 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1247 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1248 break;
1249
1250 case TGSI_FILE_ADDRESS:
1251 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1252 break;
1253
1254 default:
1255 assert( 0 );
1256 return;
1257 }
1258
1259 if (inst->InstructionExtNv.CondFlowEnable) {
1260 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1261 uint swizzle;
1262 uint shift;
1263 uint mask;
1264 uint test;
1265
1266 /* Only CC0 supported.
1267 */
1268 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1269
1270 switch (chan_index) {
1271 case CHAN_X:
1272 swizzle = inst->InstructionExtNv.CondSwizzleX;
1273 break;
1274 case CHAN_Y:
1275 swizzle = inst->InstructionExtNv.CondSwizzleY;
1276 break;
1277 case CHAN_Z:
1278 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1279 break;
1280 case CHAN_W:
1281 swizzle = inst->InstructionExtNv.CondSwizzleW;
1282 break;
1283 default:
1284 assert( 0 );
1285 return;
1286 }
1287
1288 switch (swizzle) {
1289 case TGSI_SWIZZLE_X:
1290 shift = TGSI_EXEC_CC_X_SHIFT;
1291 mask = TGSI_EXEC_CC_X_MASK;
1292 break;
1293 case TGSI_SWIZZLE_Y:
1294 shift = TGSI_EXEC_CC_Y_SHIFT;
1295 mask = TGSI_EXEC_CC_Y_MASK;
1296 break;
1297 case TGSI_SWIZZLE_Z:
1298 shift = TGSI_EXEC_CC_Z_SHIFT;
1299 mask = TGSI_EXEC_CC_Z_MASK;
1300 break;
1301 case TGSI_SWIZZLE_W:
1302 shift = TGSI_EXEC_CC_W_SHIFT;
1303 mask = TGSI_EXEC_CC_W_MASK;
1304 break;
1305 default:
1306 assert( 0 );
1307 return;
1308 }
1309
1310 switch (inst->InstructionExtNv.CondMask) {
1311 case TGSI_CC_GT:
1312 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1313 for (i = 0; i < QUAD_SIZE; i++)
1314 if (cc->u[i] & test)
1315 execmask &= ~(1 << i);
1316 break;
1317
1318 case TGSI_CC_EQ:
1319 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1320 for (i = 0; i < QUAD_SIZE; i++)
1321 if (cc->u[i] & test)
1322 execmask &= ~(1 << i);
1323 break;
1324
1325 case TGSI_CC_LT:
1326 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1327 for (i = 0; i < QUAD_SIZE; i++)
1328 if (cc->u[i] & test)
1329 execmask &= ~(1 << i);
1330 break;
1331
1332 case TGSI_CC_GE:
1333 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1334 for (i = 0; i < QUAD_SIZE; i++)
1335 if (cc->u[i] & test)
1336 execmask &= ~(1 << i);
1337 break;
1338
1339 case TGSI_CC_LE:
1340 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1341 for (i = 0; i < QUAD_SIZE; i++)
1342 if (cc->u[i] & test)
1343 execmask &= ~(1 << i);
1344 break;
1345
1346 case TGSI_CC_NE:
1347 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1348 for (i = 0; i < QUAD_SIZE; i++)
1349 if (cc->u[i] & test)
1350 execmask &= ~(1 << i);
1351 break;
1352
1353 case TGSI_CC_TR:
1354 break;
1355
1356 case TGSI_CC_FL:
1357 for (i = 0; i < QUAD_SIZE; i++)
1358 execmask &= ~(1 << i);
1359 break;
1360
1361 default:
1362 assert( 0 );
1363 return;
1364 }
1365 }
1366
1367 switch (inst->Instruction.Saturate) {
1368 case TGSI_SAT_NONE:
1369 for (i = 0; i < QUAD_SIZE; i++)
1370 if (execmask & (1 << i))
1371 dst->i[i] = chan->i[i];
1372 break;
1373
1374 case TGSI_SAT_ZERO_ONE:
1375 for (i = 0; i < QUAD_SIZE; i++)
1376 if (execmask & (1 << i)) {
1377 if (chan->f[i] < 0.0f)
1378 dst->f[i] = 0.0f;
1379 else if (chan->f[i] > 1.0f)
1380 dst->f[i] = 1.0f;
1381 else
1382 dst->i[i] = chan->i[i];
1383 }
1384 break;
1385
1386 case TGSI_SAT_MINUS_PLUS_ONE:
1387 for (i = 0; i < QUAD_SIZE; i++)
1388 if (execmask & (1 << i)) {
1389 if (chan->f[i] < -1.0f)
1390 dst->f[i] = -1.0f;
1391 else if (chan->f[i] > 1.0f)
1392 dst->f[i] = 1.0f;
1393 else
1394 dst->i[i] = chan->i[i];
1395 }
1396 break;
1397
1398 default:
1399 assert( 0 );
1400 }
1401
1402 if (inst->InstructionExtNv.CondDstUpdate) {
1403 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1404 uint shift;
1405 uint mask;
1406
1407 /* Only CC0 supported.
1408 */
1409 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1410
1411 switch (chan_index) {
1412 case CHAN_X:
1413 shift = TGSI_EXEC_CC_X_SHIFT;
1414 mask = ~TGSI_EXEC_CC_X_MASK;
1415 break;
1416 case CHAN_Y:
1417 shift = TGSI_EXEC_CC_Y_SHIFT;
1418 mask = ~TGSI_EXEC_CC_Y_MASK;
1419 break;
1420 case CHAN_Z:
1421 shift = TGSI_EXEC_CC_Z_SHIFT;
1422 mask = ~TGSI_EXEC_CC_Z_MASK;
1423 break;
1424 case CHAN_W:
1425 shift = TGSI_EXEC_CC_W_SHIFT;
1426 mask = ~TGSI_EXEC_CC_W_MASK;
1427 break;
1428 default:
1429 assert( 0 );
1430 return;
1431 }
1432
1433 for (i = 0; i < QUAD_SIZE; i++)
1434 if (execmask & (1 << i)) {
1435 cc->u[i] &= mask;
1436 if (dst->f[i] < 0.0f)
1437 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1438 else if (dst->f[i] > 0.0f)
1439 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1440 else if (dst->f[i] == 0.0f)
1441 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1442 else
1443 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1444 }
1445 }
1446 }
1447
1448 #define FETCH(VAL,INDEX,CHAN)\
1449 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1450
1451 #define STORE(VAL,INDEX,CHAN)\
1452 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1453
1454
1455 /**
1456 * Execute ARB-style KIL which is predicated by a src register.
1457 * Kill fragment if any of the four values is less than zero.
1458 */
1459 static void
1460 exec_kil(struct tgsi_exec_machine *mach,
1461 const struct tgsi_full_instruction *inst)
1462 {
1463 uint uniquemask;
1464 uint chan_index;
1465 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1466 union tgsi_exec_channel r[1];
1467
1468 /* This mask stores component bits that were already tested. Note that
1469 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1470 * tested. */
1471 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1472
1473 for (chan_index = 0; chan_index < 4; chan_index++)
1474 {
1475 uint swizzle;
1476 uint i;
1477
1478 /* unswizzle channel */
1479 swizzle = tgsi_util_get_full_src_register_extswizzle (
1480 &inst->FullSrcRegisters[0],
1481 chan_index);
1482
1483 /* check if the component has not been already tested */
1484 if (uniquemask & (1 << swizzle))
1485 continue;
1486 uniquemask |= 1 << swizzle;
1487
1488 FETCH(&r[0], 0, chan_index);
1489 for (i = 0; i < 4; i++)
1490 if (r[0].f[i] < 0.0f)
1491 kilmask |= 1 << i;
1492 }
1493
1494 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1495 }
1496
1497 /**
1498 * Execute NVIDIA-style KIL which is predicated by a condition code.
1499 * Kill fragment if the condition code is TRUE.
1500 */
1501 static void
1502 exec_kilp(struct tgsi_exec_machine *mach,
1503 const struct tgsi_full_instruction *inst)
1504 {
1505 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1506
1507 if (inst->InstructionExtNv.CondFlowEnable) {
1508 uint swizzle[4];
1509 uint chan_index;
1510
1511 kilmask = 0x0;
1512
1513 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1514 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1515 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1516 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1517
1518 for (chan_index = 0; chan_index < 4; chan_index++)
1519 {
1520 uint i;
1521
1522 for (i = 0; i < 4; i++) {
1523 /* TODO: evaluate the condition code */
1524 if (0)
1525 kilmask |= 1 << i;
1526 }
1527 }
1528 }
1529 else {
1530 /* "unconditional" kil */
1531 kilmask = mach->ExecMask;
1532 }
1533 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1534 }
1535
1536
1537 /*
1538 * Fetch a four texture samples using STR texture coordinates.
1539 */
1540 static void
1541 fetch_texel( struct tgsi_sampler *sampler,
1542 const union tgsi_exec_channel *s,
1543 const union tgsi_exec_channel *t,
1544 const union tgsi_exec_channel *p,
1545 float lodbias, /* XXX should be float[4] */
1546 union tgsi_exec_channel *r,
1547 union tgsi_exec_channel *g,
1548 union tgsi_exec_channel *b,
1549 union tgsi_exec_channel *a )
1550 {
1551 uint j;
1552 float rgba[NUM_CHANNELS][QUAD_SIZE];
1553
1554 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1555
1556 for (j = 0; j < 4; j++) {
1557 r->f[j] = rgba[0][j];
1558 g->f[j] = rgba[1][j];
1559 b->f[j] = rgba[2][j];
1560 a->f[j] = rgba[3][j];
1561 }
1562 }
1563
1564
1565 static void
1566 exec_tex(struct tgsi_exec_machine *mach,
1567 const struct tgsi_full_instruction *inst,
1568 boolean biasLod,
1569 boolean projected)
1570 {
1571 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1572 union tgsi_exec_channel r[4];
1573 uint chan_index;
1574 float lodBias;
1575
1576 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1577
1578 switch (inst->InstructionExtTexture.Texture) {
1579 case TGSI_TEXTURE_1D:
1580
1581 FETCH(&r[0], 0, CHAN_X);
1582
1583 if (projected) {
1584 FETCH(&r[1], 0, CHAN_W);
1585 micro_div( &r[0], &r[0], &r[1] );
1586 }
1587
1588 if (biasLod) {
1589 FETCH(&r[1], 0, CHAN_W);
1590 lodBias = r[2].f[0];
1591 }
1592 else
1593 lodBias = 0.0;
1594
1595 fetch_texel(mach->Samplers[unit],
1596 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1597 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1598 break;
1599
1600 case TGSI_TEXTURE_2D:
1601 case TGSI_TEXTURE_RECT:
1602
1603 FETCH(&r[0], 0, CHAN_X);
1604 FETCH(&r[1], 0, CHAN_Y);
1605 FETCH(&r[2], 0, CHAN_Z);
1606
1607 if (projected) {
1608 FETCH(&r[3], 0, CHAN_W);
1609 micro_div( &r[0], &r[0], &r[3] );
1610 micro_div( &r[1], &r[1], &r[3] );
1611 micro_div( &r[2], &r[2], &r[3] );
1612 }
1613
1614 if (biasLod) {
1615 FETCH(&r[3], 0, CHAN_W);
1616 lodBias = r[3].f[0];
1617 }
1618 else
1619 lodBias = 0.0;
1620
1621 fetch_texel(mach->Samplers[unit],
1622 &r[0], &r[1], &r[2], lodBias, /* inputs */
1623 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1624 break;
1625
1626 case TGSI_TEXTURE_3D:
1627 case TGSI_TEXTURE_CUBE:
1628
1629 FETCH(&r[0], 0, CHAN_X);
1630 FETCH(&r[1], 0, CHAN_Y);
1631 FETCH(&r[2], 0, CHAN_Z);
1632
1633 if (projected) {
1634 FETCH(&r[3], 0, CHAN_W);
1635 micro_div( &r[0], &r[0], &r[3] );
1636 micro_div( &r[1], &r[1], &r[3] );
1637 micro_div( &r[2], &r[2], &r[3] );
1638 }
1639
1640 if (biasLod) {
1641 FETCH(&r[3], 0, CHAN_W);
1642 lodBias = r[3].f[0];
1643 }
1644 else
1645 lodBias = 0.0;
1646
1647 fetch_texel(mach->Samplers[unit],
1648 &r[0], &r[1], &r[2], lodBias,
1649 &r[0], &r[1], &r[2], &r[3]);
1650 break;
1651
1652 default:
1653 assert (0);
1654 }
1655
1656 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1657 STORE( &r[chan_index], 0, chan_index );
1658 }
1659 }
1660
1661
1662 /**
1663 * Evaluate a constant-valued coefficient at the position of the
1664 * current quad.
1665 */
1666 static void
1667 eval_constant_coef(
1668 struct tgsi_exec_machine *mach,
1669 unsigned attrib,
1670 unsigned chan )
1671 {
1672 unsigned i;
1673
1674 for( i = 0; i < QUAD_SIZE; i++ ) {
1675 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1676 }
1677 }
1678
1679 /**
1680 * Evaluate a linear-valued coefficient at the position of the
1681 * current quad.
1682 */
1683 static void
1684 eval_linear_coef(
1685 struct tgsi_exec_machine *mach,
1686 unsigned attrib,
1687 unsigned chan )
1688 {
1689 const float x = mach->QuadPos.xyzw[0].f[0];
1690 const float y = mach->QuadPos.xyzw[1].f[0];
1691 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1692 const float dady = mach->InterpCoefs[attrib].dady[chan];
1693 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1694 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1695 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1696 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1697 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1698 }
1699
1700 /**
1701 * Evaluate a perspective-valued coefficient at the position of the
1702 * current quad.
1703 */
1704 static void
1705 eval_perspective_coef(
1706 struct tgsi_exec_machine *mach,
1707 unsigned attrib,
1708 unsigned chan )
1709 {
1710 const float x = mach->QuadPos.xyzw[0].f[0];
1711 const float y = mach->QuadPos.xyzw[1].f[0];
1712 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1713 const float dady = mach->InterpCoefs[attrib].dady[chan];
1714 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1715 const float *w = mach->QuadPos.xyzw[3].f;
1716 /* divide by W here */
1717 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1718 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1719 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1720 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1721 }
1722
1723
1724 typedef void (* eval_coef_func)(
1725 struct tgsi_exec_machine *mach,
1726 unsigned attrib,
1727 unsigned chan );
1728
1729 static void
1730 exec_declaration(
1731 struct tgsi_exec_machine *mach,
1732 const struct tgsi_full_declaration *decl )
1733 {
1734 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1735 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1736 unsigned first, last, mask;
1737 eval_coef_func eval;
1738
1739 first = decl->DeclarationRange.First;
1740 last = decl->DeclarationRange.Last;
1741 mask = decl->Declaration.UsageMask;
1742
1743 switch( decl->Declaration.Interpolate ) {
1744 case TGSI_INTERPOLATE_CONSTANT:
1745 eval = eval_constant_coef;
1746 break;
1747
1748 case TGSI_INTERPOLATE_LINEAR:
1749 eval = eval_linear_coef;
1750 break;
1751
1752 case TGSI_INTERPOLATE_PERSPECTIVE:
1753 eval = eval_perspective_coef;
1754 break;
1755
1756 default:
1757 eval = NULL;
1758 assert( 0 );
1759 }
1760
1761 if( mask == TGSI_WRITEMASK_XYZW ) {
1762 unsigned i, j;
1763
1764 for( i = first; i <= last; i++ ) {
1765 for( j = 0; j < NUM_CHANNELS; j++ ) {
1766 eval( mach, i, j );
1767 }
1768 }
1769 }
1770 else {
1771 unsigned i, j;
1772
1773 for( j = 0; j < NUM_CHANNELS; j++ ) {
1774 if( mask & (1 << j) ) {
1775 for( i = first; i <= last; i++ ) {
1776 eval( mach, i, j );
1777 }
1778 }
1779 }
1780 }
1781 }
1782 }
1783 }
1784
1785 static void
1786 exec_instruction(
1787 struct tgsi_exec_machine *mach,
1788 const struct tgsi_full_instruction *inst,
1789 int *pc )
1790 {
1791 uint chan_index;
1792 union tgsi_exec_channel r[8];
1793
1794 (*pc)++;
1795
1796 switch (inst->Instruction.Opcode) {
1797 case TGSI_OPCODE_ARL:
1798 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1799 FETCH( &r[0], 0, chan_index );
1800 micro_trunc( &r[0], &r[0] );
1801 STORE( &r[0], 0, chan_index );
1802 }
1803 break;
1804
1805 case TGSI_OPCODE_MOV:
1806 case TGSI_OPCODE_SWZ:
1807 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1808 FETCH( &r[0], 0, chan_index );
1809 STORE( &r[0], 0, chan_index );
1810 }
1811 break;
1812
1813 case TGSI_OPCODE_LIT:
1814 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1815 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1816 }
1817
1818 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1819 FETCH( &r[0], 0, CHAN_X );
1820 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1821 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1822 STORE( &r[0], 0, CHAN_Y );
1823 }
1824
1825 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1826 FETCH( &r[1], 0, CHAN_Y );
1827 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1828
1829 FETCH( &r[2], 0, CHAN_W );
1830 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1831 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1832 micro_pow( &r[1], &r[1], &r[2] );
1833 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1834 STORE( &r[0], 0, CHAN_Z );
1835 }
1836 }
1837
1838 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1839 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1840 }
1841 break;
1842
1843 case TGSI_OPCODE_RCP:
1844 /* TGSI_OPCODE_RECIP */
1845 FETCH( &r[0], 0, CHAN_X );
1846 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1847 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1848 STORE( &r[0], 0, chan_index );
1849 }
1850 break;
1851
1852 case TGSI_OPCODE_RSQ:
1853 /* TGSI_OPCODE_RECIPSQRT */
1854 FETCH( &r[0], 0, CHAN_X );
1855 micro_sqrt( &r[0], &r[0] );
1856 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1857 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1858 STORE( &r[0], 0, chan_index );
1859 }
1860 break;
1861
1862 case TGSI_OPCODE_EXP:
1863 FETCH( &r[0], 0, CHAN_X );
1864 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1865 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1866 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1867 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1868 }
1869 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1870 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1871 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1872 }
1873 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1874 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1875 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1876 }
1877 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1878 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1879 }
1880 break;
1881
1882 case TGSI_OPCODE_LOG:
1883 FETCH( &r[0], 0, CHAN_X );
1884 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1885 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1886 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1887 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1888 STORE( &r[0], 0, CHAN_X );
1889 }
1890 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1891 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1892 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1893 STORE( &r[0], 0, CHAN_Y );
1894 }
1895 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1896 STORE( &r[1], 0, CHAN_Z );
1897 }
1898 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1899 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1900 }
1901 break;
1902
1903 case TGSI_OPCODE_MUL:
1904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1905 {
1906 FETCH(&r[0], 0, chan_index);
1907 FETCH(&r[1], 1, chan_index);
1908
1909 micro_mul( &r[0], &r[0], &r[1] );
1910
1911 STORE(&r[0], 0, chan_index);
1912 }
1913 break;
1914
1915 case TGSI_OPCODE_ADD:
1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917 FETCH( &r[0], 0, chan_index );
1918 FETCH( &r[1], 1, chan_index );
1919 micro_add( &r[0], &r[0], &r[1] );
1920 STORE( &r[0], 0, chan_index );
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_DP3:
1925 /* TGSI_OPCODE_DOT3 */
1926 FETCH( &r[0], 0, CHAN_X );
1927 FETCH( &r[1], 1, CHAN_X );
1928 micro_mul( &r[0], &r[0], &r[1] );
1929
1930 FETCH( &r[1], 0, CHAN_Y );
1931 FETCH( &r[2], 1, CHAN_Y );
1932 micro_mul( &r[1], &r[1], &r[2] );
1933 micro_add( &r[0], &r[0], &r[1] );
1934
1935 FETCH( &r[1], 0, CHAN_Z );
1936 FETCH( &r[2], 1, CHAN_Z );
1937 micro_mul( &r[1], &r[1], &r[2] );
1938 micro_add( &r[0], &r[0], &r[1] );
1939
1940 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1941 STORE( &r[0], 0, chan_index );
1942 }
1943 break;
1944
1945 case TGSI_OPCODE_DP4:
1946 /* TGSI_OPCODE_DOT4 */
1947 FETCH(&r[0], 0, CHAN_X);
1948 FETCH(&r[1], 1, CHAN_X);
1949
1950 micro_mul( &r[0], &r[0], &r[1] );
1951
1952 FETCH(&r[1], 0, CHAN_Y);
1953 FETCH(&r[2], 1, CHAN_Y);
1954
1955 micro_mul( &r[1], &r[1], &r[2] );
1956 micro_add( &r[0], &r[0], &r[1] );
1957
1958 FETCH(&r[1], 0, CHAN_Z);
1959 FETCH(&r[2], 1, CHAN_Z);
1960
1961 micro_mul( &r[1], &r[1], &r[2] );
1962 micro_add( &r[0], &r[0], &r[1] );
1963
1964 FETCH(&r[1], 0, CHAN_W);
1965 FETCH(&r[2], 1, CHAN_W);
1966
1967 micro_mul( &r[1], &r[1], &r[2] );
1968 micro_add( &r[0], &r[0], &r[1] );
1969
1970 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1971 STORE( &r[0], 0, chan_index );
1972 }
1973 break;
1974
1975 case TGSI_OPCODE_DST:
1976 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1977 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1978 }
1979
1980 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1981 FETCH( &r[0], 0, CHAN_Y );
1982 FETCH( &r[1], 1, CHAN_Y);
1983 micro_mul( &r[0], &r[0], &r[1] );
1984 STORE( &r[0], 0, CHAN_Y );
1985 }
1986
1987 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1988 FETCH( &r[0], 0, CHAN_Z );
1989 STORE( &r[0], 0, CHAN_Z );
1990 }
1991
1992 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1993 FETCH( &r[0], 1, CHAN_W );
1994 STORE( &r[0], 0, CHAN_W );
1995 }
1996 break;
1997
1998 case TGSI_OPCODE_MIN:
1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2000 FETCH(&r[0], 0, chan_index);
2001 FETCH(&r[1], 1, chan_index);
2002
2003 /* XXX use micro_min()?? */
2004 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2005
2006 STORE(&r[0], 0, chan_index);
2007 }
2008 break;
2009
2010 case TGSI_OPCODE_MAX:
2011 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2012 FETCH(&r[0], 0, chan_index);
2013 FETCH(&r[1], 1, chan_index);
2014
2015 /* XXX use micro_max()?? */
2016 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2017
2018 STORE(&r[0], 0, chan_index );
2019 }
2020 break;
2021
2022 case TGSI_OPCODE_SLT:
2023 /* TGSI_OPCODE_SETLT */
2024 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2025 FETCH( &r[0], 0, chan_index );
2026 FETCH( &r[1], 1, chan_index );
2027 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2028 STORE( &r[0], 0, chan_index );
2029 }
2030 break;
2031
2032 case TGSI_OPCODE_SGE:
2033 /* TGSI_OPCODE_SETGE */
2034 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2035 FETCH( &r[0], 0, chan_index );
2036 FETCH( &r[1], 1, chan_index );
2037 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2038 STORE( &r[0], 0, chan_index );
2039 }
2040 break;
2041
2042 case TGSI_OPCODE_MAD:
2043 /* TGSI_OPCODE_MADD */
2044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2045 FETCH( &r[0], 0, chan_index );
2046 FETCH( &r[1], 1, chan_index );
2047 micro_mul( &r[0], &r[0], &r[1] );
2048 FETCH( &r[1], 2, chan_index );
2049 micro_add( &r[0], &r[0], &r[1] );
2050 STORE( &r[0], 0, chan_index );
2051 }
2052 break;
2053
2054 case TGSI_OPCODE_SUB:
2055 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2056 FETCH(&r[0], 0, chan_index);
2057 FETCH(&r[1], 1, chan_index);
2058
2059 micro_sub( &r[0], &r[0], &r[1] );
2060
2061 STORE(&r[0], 0, chan_index);
2062 }
2063 break;
2064
2065 case TGSI_OPCODE_LERP:
2066 /* TGSI_OPCODE_LRP */
2067 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2068 FETCH(&r[0], 0, chan_index);
2069 FETCH(&r[1], 1, chan_index);
2070 FETCH(&r[2], 2, chan_index);
2071
2072 micro_sub( &r[1], &r[1], &r[2] );
2073 micro_mul( &r[0], &r[0], &r[1] );
2074 micro_add( &r[0], &r[0], &r[2] );
2075
2076 STORE(&r[0], 0, chan_index);
2077 }
2078 break;
2079
2080 case TGSI_OPCODE_CND:
2081 assert (0);
2082 break;
2083
2084 case TGSI_OPCODE_CND0:
2085 assert (0);
2086 break;
2087
2088 case TGSI_OPCODE_DOT2ADD:
2089 /* TGSI_OPCODE_DP2A */
2090 FETCH( &r[0], 0, CHAN_X );
2091 FETCH( &r[1], 1, CHAN_X );
2092 micro_mul( &r[0], &r[0], &r[1] );
2093
2094 FETCH( &r[1], 0, CHAN_Y );
2095 FETCH( &r[2], 1, CHAN_Y );
2096 micro_mul( &r[1], &r[1], &r[2] );
2097 micro_add( &r[0], &r[0], &r[1] );
2098
2099 FETCH( &r[2], 2, CHAN_X );
2100 micro_add( &r[0], &r[0], &r[2] );
2101
2102 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2103 STORE( &r[0], 0, chan_index );
2104 }
2105 break;
2106
2107 case TGSI_OPCODE_INDEX:
2108 assert (0);
2109 break;
2110
2111 case TGSI_OPCODE_NEGATE:
2112 assert (0);
2113 break;
2114
2115 case TGSI_OPCODE_FRAC:
2116 /* TGSI_OPCODE_FRC */
2117 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2118 FETCH( &r[0], 0, chan_index );
2119 micro_frc( &r[0], &r[0] );
2120 STORE( &r[0], 0, chan_index );
2121 }
2122 break;
2123
2124 case TGSI_OPCODE_CLAMP:
2125 assert (0);
2126 break;
2127
2128 case TGSI_OPCODE_FLOOR:
2129 /* TGSI_OPCODE_FLR */
2130 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2131 FETCH( &r[0], 0, chan_index );
2132 micro_flr( &r[0], &r[0] );
2133 STORE( &r[0], 0, chan_index );
2134 }
2135 break;
2136
2137 case TGSI_OPCODE_ROUND:
2138 case TGSI_OPCODE_ARR:
2139 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2140 FETCH( &r[0], 0, chan_index );
2141 micro_rnd( &r[0], &r[0] );
2142 STORE( &r[0], 0, chan_index );
2143 }
2144 break;
2145
2146 case TGSI_OPCODE_EXPBASE2:
2147 /* TGSI_OPCODE_EX2 */
2148 FETCH(&r[0], 0, CHAN_X);
2149
2150 #if FAST_MATH
2151 micro_exp2( &r[0], &r[0] );
2152 #else
2153 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2154 #endif
2155
2156 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2157 STORE( &r[0], 0, chan_index );
2158 }
2159 break;
2160
2161 case TGSI_OPCODE_LOGBASE2:
2162 /* TGSI_OPCODE_LG2 */
2163 FETCH( &r[0], 0, CHAN_X );
2164 micro_lg2( &r[0], &r[0] );
2165 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2166 STORE( &r[0], 0, chan_index );
2167 }
2168 break;
2169
2170 case TGSI_OPCODE_POWER:
2171 /* TGSI_OPCODE_POW */
2172 FETCH(&r[0], 0, CHAN_X);
2173 FETCH(&r[1], 1, CHAN_X);
2174
2175 micro_pow( &r[0], &r[0], &r[1] );
2176
2177 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2178 STORE( &r[0], 0, chan_index );
2179 }
2180 break;
2181
2182 case TGSI_OPCODE_CROSSPRODUCT:
2183 /* TGSI_OPCODE_XPD */
2184 FETCH(&r[0], 0, CHAN_Y);
2185 FETCH(&r[1], 1, CHAN_Z);
2186
2187 micro_mul( &r[2], &r[0], &r[1] );
2188
2189 FETCH(&r[3], 0, CHAN_Z);
2190 FETCH(&r[4], 1, CHAN_Y);
2191
2192 micro_mul( &r[5], &r[3], &r[4] );
2193 micro_sub( &r[2], &r[2], &r[5] );
2194
2195 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2196 STORE( &r[2], 0, CHAN_X );
2197 }
2198
2199 FETCH(&r[2], 1, CHAN_X);
2200
2201 micro_mul( &r[3], &r[3], &r[2] );
2202
2203 FETCH(&r[5], 0, CHAN_X);
2204
2205 micro_mul( &r[1], &r[1], &r[5] );
2206 micro_sub( &r[3], &r[3], &r[1] );
2207
2208 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2209 STORE( &r[3], 0, CHAN_Y );
2210 }
2211
2212 micro_mul( &r[5], &r[5], &r[4] );
2213 micro_mul( &r[0], &r[0], &r[2] );
2214 micro_sub( &r[5], &r[5], &r[0] );
2215
2216 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2217 STORE( &r[5], 0, CHAN_Z );
2218 }
2219
2220 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2221 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2222 }
2223 break;
2224
2225 case TGSI_OPCODE_MULTIPLYMATRIX:
2226 assert (0);
2227 break;
2228
2229 case TGSI_OPCODE_ABS:
2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2231 FETCH(&r[0], 0, chan_index);
2232
2233 micro_abs( &r[0], &r[0] );
2234
2235 STORE(&r[0], 0, chan_index);
2236 }
2237 break;
2238
2239 case TGSI_OPCODE_RCC:
2240 assert (0);
2241 break;
2242
2243 case TGSI_OPCODE_DPH:
2244 FETCH(&r[0], 0, CHAN_X);
2245 FETCH(&r[1], 1, CHAN_X);
2246
2247 micro_mul( &r[0], &r[0], &r[1] );
2248
2249 FETCH(&r[1], 0, CHAN_Y);
2250 FETCH(&r[2], 1, CHAN_Y);
2251
2252 micro_mul( &r[1], &r[1], &r[2] );
2253 micro_add( &r[0], &r[0], &r[1] );
2254
2255 FETCH(&r[1], 0, CHAN_Z);
2256 FETCH(&r[2], 1, CHAN_Z);
2257
2258 micro_mul( &r[1], &r[1], &r[2] );
2259 micro_add( &r[0], &r[0], &r[1] );
2260
2261 FETCH(&r[1], 1, CHAN_W);
2262
2263 micro_add( &r[0], &r[0], &r[1] );
2264
2265 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2266 STORE( &r[0], 0, chan_index );
2267 }
2268 break;
2269
2270 case TGSI_OPCODE_COS:
2271 FETCH(&r[0], 0, CHAN_X);
2272
2273 micro_cos( &r[0], &r[0] );
2274
2275 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2276 STORE( &r[0], 0, chan_index );
2277 }
2278 break;
2279
2280 case TGSI_OPCODE_DDX:
2281 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2282 FETCH( &r[0], 0, chan_index );
2283 micro_ddx( &r[0], &r[0] );
2284 STORE( &r[0], 0, chan_index );
2285 }
2286 break;
2287
2288 case TGSI_OPCODE_DDY:
2289 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2290 FETCH( &r[0], 0, chan_index );
2291 micro_ddy( &r[0], &r[0] );
2292 STORE( &r[0], 0, chan_index );
2293 }
2294 break;
2295
2296 case TGSI_OPCODE_KILP:
2297 exec_kilp (mach, inst);
2298 break;
2299
2300 case TGSI_OPCODE_KIL:
2301 exec_kil (mach, inst);
2302 break;
2303
2304 case TGSI_OPCODE_PK2H:
2305 assert (0);
2306 break;
2307
2308 case TGSI_OPCODE_PK2US:
2309 assert (0);
2310 break;
2311
2312 case TGSI_OPCODE_PK4B:
2313 assert (0);
2314 break;
2315
2316 case TGSI_OPCODE_PK4UB:
2317 assert (0);
2318 break;
2319
2320 case TGSI_OPCODE_RFL:
2321 assert (0);
2322 break;
2323
2324 case TGSI_OPCODE_SEQ:
2325 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2326 FETCH( &r[0], 0, chan_index );
2327 FETCH( &r[1], 1, chan_index );
2328 micro_eq( &r[0], &r[0], &r[1],
2329 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2330 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2331 STORE( &r[0], 0, chan_index );
2332 }
2333 break;
2334
2335 case TGSI_OPCODE_SFL:
2336 assert (0);
2337 break;
2338
2339 case TGSI_OPCODE_SGT:
2340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2341 FETCH( &r[0], 0, chan_index );
2342 FETCH( &r[1], 1, chan_index );
2343 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2344 STORE( &r[0], 0, chan_index );
2345 }
2346 break;
2347
2348 case TGSI_OPCODE_SIN:
2349 FETCH( &r[0], 0, CHAN_X );
2350 micro_sin( &r[0], &r[0] );
2351 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2352 STORE( &r[0], 0, chan_index );
2353 }
2354 break;
2355
2356 case TGSI_OPCODE_SLE:
2357 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2358 FETCH( &r[0], 0, chan_index );
2359 FETCH( &r[1], 1, chan_index );
2360 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2361 STORE( &r[0], 0, chan_index );
2362 }
2363 break;
2364
2365 case TGSI_OPCODE_SNE:
2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2367 FETCH( &r[0], 0, chan_index );
2368 FETCH( &r[1], 1, chan_index );
2369 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2370 STORE( &r[0], 0, chan_index );
2371 }
2372 break;
2373
2374 case TGSI_OPCODE_STR:
2375 assert (0);
2376 break;
2377
2378 case TGSI_OPCODE_TEX:
2379 /* simple texture lookup */
2380 /* src[0] = texcoord */
2381 /* src[1] = sampler unit */
2382 exec_tex(mach, inst, FALSE, FALSE);
2383 break;
2384
2385 case TGSI_OPCODE_TXB:
2386 /* Texture lookup with lod bias */
2387 /* src[0] = texcoord (src[0].w = LOD bias) */
2388 /* src[1] = sampler unit */
2389 exec_tex(mach, inst, TRUE, FALSE);
2390 break;
2391
2392 case TGSI_OPCODE_TXD:
2393 /* Texture lookup with explict partial derivatives */
2394 /* src[0] = texcoord */
2395 /* src[1] = d[strq]/dx */
2396 /* src[2] = d[strq]/dy */
2397 /* src[3] = sampler unit */
2398 assert (0);
2399 break;
2400
2401 case TGSI_OPCODE_TXL:
2402 /* Texture lookup with explit LOD */
2403 /* src[0] = texcoord (src[0].w = LOD) */
2404 /* src[1] = sampler unit */
2405 exec_tex(mach, inst, TRUE, FALSE);
2406 break;
2407
2408 case TGSI_OPCODE_TXP:
2409 /* Texture lookup with projection */
2410 /* src[0] = texcoord (src[0].w = projection) */
2411 /* src[1] = sampler unit */
2412 exec_tex(mach, inst, FALSE, TRUE);
2413 break;
2414
2415 case TGSI_OPCODE_UP2H:
2416 assert (0);
2417 break;
2418
2419 case TGSI_OPCODE_UP2US:
2420 assert (0);
2421 break;
2422
2423 case TGSI_OPCODE_UP4B:
2424 assert (0);
2425 break;
2426
2427 case TGSI_OPCODE_UP4UB:
2428 assert (0);
2429 break;
2430
2431 case TGSI_OPCODE_X2D:
2432 assert (0);
2433 break;
2434
2435 case TGSI_OPCODE_ARA:
2436 assert (0);
2437 break;
2438
2439 case TGSI_OPCODE_BRA:
2440 assert (0);
2441 break;
2442
2443 case TGSI_OPCODE_CAL:
2444 /* skip the call if no execution channels are enabled */
2445 if (mach->ExecMask) {
2446 /* do the call */
2447
2448 /* push the Cond, Loop, Cont stacks */
2449 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2450 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2451 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2452 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2453 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2454 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2455
2456 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2457 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2458
2459 /* note that PC was already incremented above */
2460 mach->CallStack[mach->CallStackTop++] = *pc;
2461 *pc = inst->InstructionExtLabel.Label;
2462 }
2463 break;
2464
2465 case TGSI_OPCODE_RET:
2466 mach->FuncMask &= ~mach->ExecMask;
2467 UPDATE_EXEC_MASK(mach);
2468
2469 if (mach->FuncMask == 0x0) {
2470 /* really return now (otherwise, keep executing */
2471
2472 if (mach->CallStackTop == 0) {
2473 /* returning from main() */
2474 *pc = -1;
2475 return;
2476 }
2477 *pc = mach->CallStack[--mach->CallStackTop];
2478
2479 /* pop the Cond, Loop, Cont stacks */
2480 assert(mach->CondStackTop > 0);
2481 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2482 assert(mach->LoopStackTop > 0);
2483 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2484 assert(mach->ContStackTop > 0);
2485 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2486 assert(mach->FuncStackTop > 0);
2487 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2488
2489 UPDATE_EXEC_MASK(mach);
2490 }
2491 break;
2492
2493 case TGSI_OPCODE_SSG:
2494 /* TGSI_OPCODE_SGN */
2495 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2496 FETCH( &r[0], 0, chan_index );
2497 micro_sgn( &r[0], &r[0] );
2498 STORE( &r[0], 0, chan_index );
2499 }
2500 break;
2501
2502 case TGSI_OPCODE_CMP:
2503 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2504 FETCH(&r[0], 0, chan_index);
2505 FETCH(&r[1], 1, chan_index);
2506 FETCH(&r[2], 2, chan_index);
2507
2508 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2509
2510 STORE(&r[0], 0, chan_index);
2511 }
2512 break;
2513
2514 case TGSI_OPCODE_SCS:
2515 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2516 FETCH( &r[0], 0, CHAN_X );
2517 }
2518 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2519 micro_cos( &r[1], &r[0] );
2520 STORE( &r[1], 0, CHAN_X );
2521 }
2522 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2523 micro_sin( &r[1], &r[0] );
2524 STORE( &r[1], 0, CHAN_Y );
2525 }
2526 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2527 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2528 }
2529 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2530 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2531 }
2532 break;
2533
2534 case TGSI_OPCODE_NRM:
2535 /* 3-component vector normalize */
2536 {
2537 union tgsi_exec_channel tmp, dot;
2538
2539 /* tmp = dp3(src0, src0): */
2540 FETCH( &r[0], 0, CHAN_X );
2541 micro_mul( &tmp, &r[0], &r[0] );
2542
2543 FETCH( &r[1], 0, CHAN_Y );
2544 micro_mul( &dot, &r[1], &r[1] );
2545 micro_add( &tmp, &tmp, &dot );
2546
2547 FETCH( &r[2], 0, CHAN_Z );
2548 micro_mul( &dot, &r[2], &r[2] );
2549 micro_add( &tmp, &tmp, &dot );
2550
2551 /* tmp = 1 / sqrt(tmp) */
2552 micro_sqrt( &tmp, &tmp );
2553 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2554
2555 /* note: w channel is undefined */
2556 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2557 /* chan = chan * tmp */
2558 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2559 STORE( &r[chan_index], 0, chan_index );
2560 }
2561 }
2562 break;
2563
2564 case TGSI_OPCODE_NRM4:
2565 /* 4-component vector normalize */
2566 {
2567 union tgsi_exec_channel tmp, dot;
2568
2569 /* tmp = dp4(src0, src0): */
2570 FETCH( &r[0], 0, CHAN_X );
2571 micro_mul( &tmp, &r[0], &r[0] );
2572
2573 FETCH( &r[1], 0, CHAN_Y );
2574 micro_mul( &dot, &r[1], &r[1] );
2575 micro_add( &tmp, &tmp, &dot );
2576
2577 FETCH( &r[2], 0, CHAN_Z );
2578 micro_mul( &dot, &r[2], &r[2] );
2579 micro_add( &tmp, &tmp, &dot );
2580
2581 FETCH( &r[3], 0, CHAN_W );
2582 micro_mul( &dot, &r[3], &r[3] );
2583 micro_add( &tmp, &tmp, &dot );
2584
2585 /* tmp = 1 / sqrt(tmp) */
2586 micro_sqrt( &tmp, &tmp );
2587 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2588
2589 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2590 /* chan = chan * tmp */
2591 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2592 STORE( &r[chan_index], 0, chan_index );
2593 }
2594 }
2595 break;
2596
2597 case TGSI_OPCODE_DIV:
2598 assert( 0 );
2599 break;
2600
2601 case TGSI_OPCODE_DP2:
2602 FETCH( &r[0], 0, CHAN_X );
2603 FETCH( &r[1], 1, CHAN_X );
2604 micro_mul( &r[0], &r[0], &r[1] );
2605
2606 FETCH( &r[1], 0, CHAN_Y );
2607 FETCH( &r[2], 1, CHAN_Y );
2608 micro_mul( &r[1], &r[1], &r[2] );
2609 micro_add( &r[0], &r[0], &r[1] );
2610
2611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2612 STORE( &r[0], 0, chan_index );
2613 }
2614 break;
2615
2616 case TGSI_OPCODE_IF:
2617 /* push CondMask */
2618 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2619 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2620 FETCH( &r[0], 0, CHAN_X );
2621 /* update CondMask */
2622 if( ! r[0].u[0] ) {
2623 mach->CondMask &= ~0x1;
2624 }
2625 if( ! r[0].u[1] ) {
2626 mach->CondMask &= ~0x2;
2627 }
2628 if( ! r[0].u[2] ) {
2629 mach->CondMask &= ~0x4;
2630 }
2631 if( ! r[0].u[3] ) {
2632 mach->CondMask &= ~0x8;
2633 }
2634 UPDATE_EXEC_MASK(mach);
2635 /* Todo: If CondMask==0, jump to ELSE */
2636 break;
2637
2638 case TGSI_OPCODE_ELSE:
2639 /* invert CondMask wrt previous mask */
2640 {
2641 uint prevMask;
2642 assert(mach->CondStackTop > 0);
2643 prevMask = mach->CondStack[mach->CondStackTop - 1];
2644 mach->CondMask = ~mach->CondMask & prevMask;
2645 UPDATE_EXEC_MASK(mach);
2646 /* Todo: If CondMask==0, jump to ENDIF */
2647 }
2648 break;
2649
2650 case TGSI_OPCODE_ENDIF:
2651 /* pop CondMask */
2652 assert(mach->CondStackTop > 0);
2653 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2654 UPDATE_EXEC_MASK(mach);
2655 break;
2656
2657 case TGSI_OPCODE_END:
2658 /* halt execution */
2659 *pc = -1;
2660 break;
2661
2662 case TGSI_OPCODE_REP:
2663 assert (0);
2664 break;
2665
2666 case TGSI_OPCODE_ENDREP:
2667 assert (0);
2668 break;
2669
2670 case TGSI_OPCODE_PUSHA:
2671 assert (0);
2672 break;
2673
2674 case TGSI_OPCODE_POPA:
2675 assert (0);
2676 break;
2677
2678 case TGSI_OPCODE_CEIL:
2679 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2680 FETCH( &r[0], 0, chan_index );
2681 micro_ceil( &r[0], &r[0] );
2682 STORE( &r[0], 0, chan_index );
2683 }
2684 break;
2685
2686 case TGSI_OPCODE_I2F:
2687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2688 FETCH( &r[0], 0, chan_index );
2689 micro_i2f( &r[0], &r[0] );
2690 STORE( &r[0], 0, chan_index );
2691 }
2692 break;
2693
2694 case TGSI_OPCODE_NOT:
2695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2696 FETCH( &r[0], 0, chan_index );
2697 micro_not( &r[0], &r[0] );
2698 STORE( &r[0], 0, chan_index );
2699 }
2700 break;
2701
2702 case TGSI_OPCODE_TRUNC:
2703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2704 FETCH( &r[0], 0, chan_index );
2705 micro_trunc( &r[0], &r[0] );
2706 STORE( &r[0], 0, chan_index );
2707 }
2708 break;
2709
2710 case TGSI_OPCODE_SHL:
2711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2712 FETCH( &r[0], 0, chan_index );
2713 FETCH( &r[1], 1, chan_index );
2714 micro_shl( &r[0], &r[0], &r[1] );
2715 STORE( &r[0], 0, chan_index );
2716 }
2717 break;
2718
2719 case TGSI_OPCODE_SHR:
2720 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2721 FETCH( &r[0], 0, chan_index );
2722 FETCH( &r[1], 1, chan_index );
2723 micro_ishr( &r[0], &r[0], &r[1] );
2724 STORE( &r[0], 0, chan_index );
2725 }
2726 break;
2727
2728 case TGSI_OPCODE_AND:
2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2730 FETCH( &r[0], 0, chan_index );
2731 FETCH( &r[1], 1, chan_index );
2732 micro_and( &r[0], &r[0], &r[1] );
2733 STORE( &r[0], 0, chan_index );
2734 }
2735 break;
2736
2737 case TGSI_OPCODE_OR:
2738 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2739 FETCH( &r[0], 0, chan_index );
2740 FETCH( &r[1], 1, chan_index );
2741 micro_or( &r[0], &r[0], &r[1] );
2742 STORE( &r[0], 0, chan_index );
2743 }
2744 break;
2745
2746 case TGSI_OPCODE_MOD:
2747 assert (0);
2748 break;
2749
2750 case TGSI_OPCODE_XOR:
2751 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2752 FETCH( &r[0], 0, chan_index );
2753 FETCH( &r[1], 1, chan_index );
2754 micro_xor( &r[0], &r[0], &r[1] );
2755 STORE( &r[0], 0, chan_index );
2756 }
2757 break;
2758
2759 case TGSI_OPCODE_SAD:
2760 assert (0);
2761 break;
2762
2763 case TGSI_OPCODE_TXF:
2764 assert (0);
2765 break;
2766
2767 case TGSI_OPCODE_TXQ:
2768 assert (0);
2769 break;
2770
2771 case TGSI_OPCODE_EMIT:
2772 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2773 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2774 break;
2775
2776 case TGSI_OPCODE_ENDPRIM:
2777 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2778 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2779 break;
2780
2781 case TGSI_OPCODE_LOOP:
2782 /* fall-through (for now) */
2783 case TGSI_OPCODE_BGNLOOP2:
2784 /* push LoopMask and ContMasks */
2785 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2786 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2787 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2788 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2789 break;
2790
2791 case TGSI_OPCODE_ENDLOOP:
2792 /* fall-through (for now at least) */
2793 case TGSI_OPCODE_ENDLOOP2:
2794 /* Restore ContMask, but don't pop */
2795 assert(mach->ContStackTop > 0);
2796 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2797 UPDATE_EXEC_MASK(mach);
2798 if (mach->ExecMask) {
2799 /* repeat loop: jump to instruction just past BGNLOOP */
2800 *pc = inst->InstructionExtLabel.Label + 1;
2801 }
2802 else {
2803 /* exit loop: pop LoopMask */
2804 assert(mach->LoopStackTop > 0);
2805 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2806 /* pop ContMask */
2807 assert(mach->ContStackTop > 0);
2808 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2809 }
2810 UPDATE_EXEC_MASK(mach);
2811 break;
2812
2813 case TGSI_OPCODE_BRK:
2814 /* turn off loop channels for each enabled exec channel */
2815 mach->LoopMask &= ~mach->ExecMask;
2816 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2817 UPDATE_EXEC_MASK(mach);
2818 break;
2819
2820 case TGSI_OPCODE_CONT:
2821 /* turn off cont channels for each enabled exec channel */
2822 mach->ContMask &= ~mach->ExecMask;
2823 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2824 UPDATE_EXEC_MASK(mach);
2825 break;
2826
2827 case TGSI_OPCODE_BGNSUB:
2828 /* no-op */
2829 break;
2830
2831 case TGSI_OPCODE_ENDSUB:
2832 /* no-op */
2833 break;
2834
2835 case TGSI_OPCODE_NOISE1:
2836 assert( 0 );
2837 break;
2838
2839 case TGSI_OPCODE_NOISE2:
2840 assert( 0 );
2841 break;
2842
2843 case TGSI_OPCODE_NOISE3:
2844 assert( 0 );
2845 break;
2846
2847 case TGSI_OPCODE_NOISE4:
2848 assert( 0 );
2849 break;
2850
2851 case TGSI_OPCODE_NOP:
2852 break;
2853
2854 default:
2855 assert( 0 );
2856 }
2857 }
2858
2859
2860 /**
2861 * Run TGSI interpreter.
2862 * \return bitmask of "alive" quad components
2863 */
2864 uint
2865 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2866 {
2867 uint i;
2868 int pc = 0;
2869
2870 mach->CondMask = 0xf;
2871 mach->LoopMask = 0xf;
2872 mach->ContMask = 0xf;
2873 mach->FuncMask = 0xf;
2874 mach->ExecMask = 0xf;
2875
2876 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2877 assert(mach->CondStackTop == 0);
2878 assert(mach->LoopStackTop == 0);
2879 assert(mach->ContStackTop == 0);
2880 assert(mach->CallStackTop == 0);
2881
2882 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2883 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2884
2885 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2886 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2887 mach->Primitives[0] = 0;
2888 }
2889
2890 for (i = 0; i < QUAD_SIZE; i++) {
2891 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2892 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2893 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2894 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2895 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2896 }
2897
2898 /* execute declarations (interpolants) */
2899 for (i = 0; i < mach->NumDeclarations; i++) {
2900 exec_declaration( mach, mach->Declarations+i );
2901 }
2902
2903 /* execute instructions, until pc is set to -1 */
2904 while (pc != -1) {
2905 assert(pc < (int) mach->NumInstructions);
2906 exec_instruction( mach, mach->Instructions + pc, &pc );
2907 }
2908
2909 #if 0
2910 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2911 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2912 /*
2913 * Scale back depth component.
2914 */
2915 for (i = 0; i < 4; i++)
2916 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2917 }
2918 #endif
2919
2920 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2921 }
2922
2923