tgsi: Keep address register as a floating point.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler *samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2ut(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->u[0] = (uint) src->f[0];
475 dst->u[1] = (uint) src->f[1];
476 dst->u[2] = (uint) src->f[2];
477 dst->u[3] = (uint) src->f[3];
478 }
479
480 static void
481 micro_flr(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->f[0] = floorf( src->f[0] );
486 dst->f[1] = floorf( src->f[1] );
487 dst->f[2] = floorf( src->f[2] );
488 dst->f[3] = floorf( src->f[3] );
489 }
490
491 static void
492 micro_frc(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = src->f[0] - floorf( src->f[0] );
497 dst->f[1] = src->f[1] - floorf( src->f[1] );
498 dst->f[2] = src->f[2] - floorf( src->f[2] );
499 dst->f[3] = src->f[3] - floorf( src->f[3] );
500 }
501
502 static void
503 micro_ge(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src0,
506 const union tgsi_exec_channel *src1,
507 const union tgsi_exec_channel *src2,
508 const union tgsi_exec_channel *src3 )
509 {
510 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
511 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
512 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
513 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
514 }
515
516 static void
517 micro_i2f(
518 union tgsi_exec_channel *dst,
519 const union tgsi_exec_channel *src )
520 {
521 dst->f[0] = (float) src->i[0];
522 dst->f[1] = (float) src->i[1];
523 dst->f[2] = (float) src->i[2];
524 dst->f[3] = (float) src->i[3];
525 }
526
527 static void
528 micro_lg2(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 #if FAST_MATH
533 dst->f[0] = util_fast_log2( src->f[0] );
534 dst->f[1] = util_fast_log2( src->f[1] );
535 dst->f[2] = util_fast_log2( src->f[2] );
536 dst->f[3] = util_fast_log2( src->f[3] );
537 #else
538 dst->f[0] = logf( src->f[0] ) * 1.442695f;
539 dst->f[1] = logf( src->f[1] ) * 1.442695f;
540 dst->f[2] = logf( src->f[2] ) * 1.442695f;
541 dst->f[3] = logf( src->f[3] ) * 1.442695f;
542 #endif
543 }
544
545 static void
546 micro_le(
547 union tgsi_exec_channel *dst,
548 const union tgsi_exec_channel *src0,
549 const union tgsi_exec_channel *src1,
550 const union tgsi_exec_channel *src2,
551 const union tgsi_exec_channel *src3 )
552 {
553 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
554 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
555 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
556 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
557 }
558
559 static void
560 micro_lt(
561 union tgsi_exec_channel *dst,
562 const union tgsi_exec_channel *src0,
563 const union tgsi_exec_channel *src1,
564 const union tgsi_exec_channel *src2,
565 const union tgsi_exec_channel *src3 )
566 {
567 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
568 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
569 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
570 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
571 }
572
573 static void
574 micro_ilt(
575 union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src0,
577 const union tgsi_exec_channel *src1,
578 const union tgsi_exec_channel *src2,
579 const union tgsi_exec_channel *src3 )
580 {
581 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
582 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
583 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
584 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
585 }
586
587 static void
588 micro_ult(
589 union tgsi_exec_channel *dst,
590 const union tgsi_exec_channel *src0,
591 const union tgsi_exec_channel *src1,
592 const union tgsi_exec_channel *src2,
593 const union tgsi_exec_channel *src3 )
594 {
595 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
596 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
597 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
598 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
599 }
600
601 static void
602 micro_max(
603 union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src0,
605 const union tgsi_exec_channel *src1 )
606 {
607 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
608 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
609 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
610 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
611 }
612
613 static void
614 micro_imax(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
620 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
621 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
622 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
623 }
624
625 static void
626 micro_umax(
627 union tgsi_exec_channel *dst,
628 const union tgsi_exec_channel *src0,
629 const union tgsi_exec_channel *src1 )
630 {
631 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
632 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
633 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
634 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
635 }
636
637 static void
638 micro_min(
639 union tgsi_exec_channel *dst,
640 const union tgsi_exec_channel *src0,
641 const union tgsi_exec_channel *src1 )
642 {
643 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
644 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
645 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
646 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
647 }
648
649 static void
650 micro_imin(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src0,
653 const union tgsi_exec_channel *src1 )
654 {
655 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
656 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
657 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
658 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
659 }
660
661 static void
662 micro_umin(
663 union tgsi_exec_channel *dst,
664 const union tgsi_exec_channel *src0,
665 const union tgsi_exec_channel *src1 )
666 {
667 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
668 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
669 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
670 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
671 }
672
673 static void
674 micro_umod(
675 union tgsi_exec_channel *dst,
676 const union tgsi_exec_channel *src0,
677 const union tgsi_exec_channel *src1 )
678 {
679 dst->u[0] = src0->u[0] % src1->u[0];
680 dst->u[1] = src0->u[1] % src1->u[1];
681 dst->u[2] = src0->u[2] % src1->u[2];
682 dst->u[3] = src0->u[3] % src1->u[3];
683 }
684
685 static void
686 micro_mul(
687 union tgsi_exec_channel *dst,
688 const union tgsi_exec_channel *src0,
689 const union tgsi_exec_channel *src1 )
690 {
691 dst->f[0] = src0->f[0] * src1->f[0];
692 dst->f[1] = src0->f[1] * src1->f[1];
693 dst->f[2] = src0->f[2] * src1->f[2];
694 dst->f[3] = src0->f[3] * src1->f[3];
695 }
696
697 static void
698 micro_imul(
699 union tgsi_exec_channel *dst,
700 const union tgsi_exec_channel *src0,
701 const union tgsi_exec_channel *src1 )
702 {
703 dst->i[0] = src0->i[0] * src1->i[0];
704 dst->i[1] = src0->i[1] * src1->i[1];
705 dst->i[2] = src0->i[2] * src1->i[2];
706 dst->i[3] = src0->i[3] * src1->i[3];
707 }
708
709 static void
710 micro_imul64(
711 union tgsi_exec_channel *dst0,
712 union tgsi_exec_channel *dst1,
713 const union tgsi_exec_channel *src0,
714 const union tgsi_exec_channel *src1 )
715 {
716 dst1->i[0] = src0->i[0] * src1->i[0];
717 dst1->i[1] = src0->i[1] * src1->i[1];
718 dst1->i[2] = src0->i[2] * src1->i[2];
719 dst1->i[3] = src0->i[3] * src1->i[3];
720 dst0->i[0] = 0;
721 dst0->i[1] = 0;
722 dst0->i[2] = 0;
723 dst0->i[3] = 0;
724 }
725
726 static void
727 micro_umul64(
728 union tgsi_exec_channel *dst0,
729 union tgsi_exec_channel *dst1,
730 const union tgsi_exec_channel *src0,
731 const union tgsi_exec_channel *src1 )
732 {
733 dst1->u[0] = src0->u[0] * src1->u[0];
734 dst1->u[1] = src0->u[1] * src1->u[1];
735 dst1->u[2] = src0->u[2] * src1->u[2];
736 dst1->u[3] = src0->u[3] * src1->u[3];
737 dst0->u[0] = 0;
738 dst0->u[1] = 0;
739 dst0->u[2] = 0;
740 dst0->u[3] = 0;
741 }
742
743 static void
744 micro_movc(
745 union tgsi_exec_channel *dst,
746 const union tgsi_exec_channel *src0,
747 const union tgsi_exec_channel *src1,
748 const union tgsi_exec_channel *src2 )
749 {
750 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
751 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
752 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
753 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
754 }
755
756 static void
757 micro_neg(
758 union tgsi_exec_channel *dst,
759 const union tgsi_exec_channel *src )
760 {
761 dst->f[0] = -src->f[0];
762 dst->f[1] = -src->f[1];
763 dst->f[2] = -src->f[2];
764 dst->f[3] = -src->f[3];
765 }
766
767 static void
768 micro_ineg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->i[0] = -src->i[0];
773 dst->i[1] = -src->i[1];
774 dst->i[2] = -src->i[2];
775 dst->i[3] = -src->i[3];
776 }
777
778 static void
779 micro_not(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->u[0] = ~src->u[0];
784 dst->u[1] = ~src->u[1];
785 dst->u[2] = ~src->u[2];
786 dst->u[3] = ~src->u[3];
787 }
788
789 static void
790 micro_or(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src0,
793 const union tgsi_exec_channel *src1 )
794 {
795 dst->u[0] = src0->u[0] | src1->u[0];
796 dst->u[1] = src0->u[1] | src1->u[1];
797 dst->u[2] = src0->u[2] | src1->u[2];
798 dst->u[3] = src0->u[3] | src1->u[3];
799 }
800
801 static void
802 micro_pow(
803 union tgsi_exec_channel *dst,
804 const union tgsi_exec_channel *src0,
805 const union tgsi_exec_channel *src1 )
806 {
807 #if FAST_MATH
808 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
809 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
810 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
811 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
812 #else
813 dst->f[0] = powf( src0->f[0], src1->f[0] );
814 dst->f[1] = powf( src0->f[1], src1->f[1] );
815 dst->f[2] = powf( src0->f[2], src1->f[2] );
816 dst->f[3] = powf( src0->f[3], src1->f[3] );
817 #endif
818 }
819
820 static void
821 micro_rnd(
822 union tgsi_exec_channel *dst,
823 const union tgsi_exec_channel *src )
824 {
825 dst->f[0] = floorf( src->f[0] + 0.5f );
826 dst->f[1] = floorf( src->f[1] + 0.5f );
827 dst->f[2] = floorf( src->f[2] + 0.5f );
828 dst->f[3] = floorf( src->f[3] + 0.5f );
829 }
830
831 static void
832 micro_sgn(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
837 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
838 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
839 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
840 }
841
842 static void
843 micro_shl(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src0,
846 const union tgsi_exec_channel *src1 )
847 {
848 dst->i[0] = src0->i[0] << src1->i[0];
849 dst->i[1] = src0->i[1] << src1->i[1];
850 dst->i[2] = src0->i[2] << src1->i[2];
851 dst->i[3] = src0->i[3] << src1->i[3];
852 }
853
854 static void
855 micro_ishr(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->i[0] = src0->i[0] >> src1->i[0];
861 dst->i[1] = src0->i[1] >> src1->i[1];
862 dst->i[2] = src0->i[2] >> src1->i[2];
863 dst->i[3] = src0->i[3] >> src1->i[3];
864 }
865
866 static void
867 micro_trunc(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0 )
870 {
871 dst->f[0] = (float) (int) src0->f[0];
872 dst->f[1] = (float) (int) src0->f[1];
873 dst->f[2] = (float) (int) src0->f[2];
874 dst->f[3] = (float) (int) src0->f[3];
875 }
876
877 static void
878 micro_ushr(
879 union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src0,
881 const union tgsi_exec_channel *src1 )
882 {
883 dst->u[0] = src0->u[0] >> src1->u[0];
884 dst->u[1] = src0->u[1] >> src1->u[1];
885 dst->u[2] = src0->u[2] >> src1->u[2];
886 dst->u[3] = src0->u[3] >> src1->u[3];
887 }
888
889 static void
890 micro_sin(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src )
893 {
894 dst->f[0] = sinf( src->f[0] );
895 dst->f[1] = sinf( src->f[1] );
896 dst->f[2] = sinf( src->f[2] );
897 dst->f[3] = sinf( src->f[3] );
898 }
899
900 static void
901 micro_sqrt( union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src )
903 {
904 dst->f[0] = sqrtf( src->f[0] );
905 dst->f[1] = sqrtf( src->f[1] );
906 dst->f[2] = sqrtf( src->f[2] );
907 dst->f[3] = sqrtf( src->f[3] );
908 }
909
910 static void
911 micro_sub(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst->f[0] = src0->f[0] - src1->f[0];
917 dst->f[1] = src0->f[1] - src1->f[1];
918 dst->f[2] = src0->f[2] - src1->f[2];
919 dst->f[3] = src0->f[3] - src1->f[3];
920 }
921
922 static void
923 micro_u2f(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src )
926 {
927 dst->f[0] = (float) src->u[0];
928 dst->f[1] = (float) src->u[1];
929 dst->f[2] = (float) src->u[2];
930 dst->f[3] = (float) src->u[3];
931 }
932
933 static void
934 micro_xor(
935 union tgsi_exec_channel *dst,
936 const union tgsi_exec_channel *src0,
937 const union tgsi_exec_channel *src1 )
938 {
939 dst->u[0] = src0->u[0] ^ src1->u[0];
940 dst->u[1] = src0->u[1] ^ src1->u[1];
941 dst->u[2] = src0->u[2] ^ src1->u[2];
942 dst->u[3] = src0->u[3] ^ src1->u[3];
943 }
944
945 static void
946 fetch_src_file_channel(
947 const struct tgsi_exec_machine *mach,
948 const uint file,
949 const uint swizzle,
950 const union tgsi_exec_channel *index,
951 union tgsi_exec_channel *chan )
952 {
953 switch( swizzle ) {
954 case TGSI_EXTSWIZZLE_X:
955 case TGSI_EXTSWIZZLE_Y:
956 case TGSI_EXTSWIZZLE_Z:
957 case TGSI_EXTSWIZZLE_W:
958 switch( file ) {
959 case TGSI_FILE_CONSTANT:
960 assert(mach->Consts);
961 assert(index->i[0] >= 0);
962 assert(index->i[1] >= 0);
963 assert(index->i[2] >= 0);
964 assert(index->i[3] >= 0);
965 chan->f[0] = mach->Consts[index->i[0]][swizzle];
966 chan->f[1] = mach->Consts[index->i[1]][swizzle];
967 chan->f[2] = mach->Consts[index->i[2]][swizzle];
968 chan->f[3] = mach->Consts[index->i[3]][swizzle];
969 break;
970
971 case TGSI_FILE_INPUT:
972 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
973 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
974 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
975 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
976 break;
977
978 case TGSI_FILE_TEMPORARY:
979 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
980 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
981 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
982 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
983 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
984 break;
985
986 case TGSI_FILE_IMMEDIATE:
987 assert( index->i[0] < (int) mach->ImmLimit );
988 chan->f[0] = mach->Imms[index->i[0]][swizzle];
989 assert( index->i[1] < (int) mach->ImmLimit );
990 chan->f[1] = mach->Imms[index->i[1]][swizzle];
991 assert( index->i[2] < (int) mach->ImmLimit );
992 chan->f[2] = mach->Imms[index->i[2]][swizzle];
993 assert( index->i[3] < (int) mach->ImmLimit );
994 chan->f[3] = mach->Imms[index->i[3]][swizzle];
995 break;
996
997 case TGSI_FILE_ADDRESS:
998 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
999 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1000 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1001 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1002 break;
1003
1004 case TGSI_FILE_OUTPUT:
1005 /* vertex/fragment output vars can be read too */
1006 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1007 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1008 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1009 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1010 break;
1011
1012 default:
1013 assert( 0 );
1014 }
1015 break;
1016
1017 case TGSI_EXTSWIZZLE_ZERO:
1018 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1019 break;
1020
1021 case TGSI_EXTSWIZZLE_ONE:
1022 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1023 break;
1024
1025 default:
1026 assert( 0 );
1027 }
1028 }
1029
1030 static void
1031 fetch_source(
1032 const struct tgsi_exec_machine *mach,
1033 union tgsi_exec_channel *chan,
1034 const struct tgsi_full_src_register *reg,
1035 const uint chan_index )
1036 {
1037 union tgsi_exec_channel index;
1038 uint swizzle;
1039
1040 index.i[0] =
1041 index.i[1] =
1042 index.i[2] =
1043 index.i[3] = reg->SrcRegister.Index;
1044
1045 if (reg->SrcRegister.Indirect) {
1046 union tgsi_exec_channel index2;
1047 union tgsi_exec_channel indir_index;
1048 const uint execmask = mach->ExecMask;
1049 uint i;
1050
1051 /* which address register (always zero now) */
1052 index2.i[0] =
1053 index2.i[1] =
1054 index2.i[2] =
1055 index2.i[3] = reg->SrcRegisterInd.Index;
1056
1057 /* get current value of address register[swizzle] */
1058 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1059 fetch_src_file_channel(
1060 mach,
1061 reg->SrcRegisterInd.File,
1062 swizzle,
1063 &index2,
1064 &indir_index );
1065
1066 /* add value of address register to the offset */
1067 index.i[0] += (int) indir_index.f[0];
1068 index.i[1] += (int) indir_index.f[1];
1069 index.i[2] += (int) indir_index.f[2];
1070 index.i[3] += (int) indir_index.f[3];
1071
1072 /* for disabled execution channels, zero-out the index to
1073 * avoid using a potential garbage value.
1074 */
1075 for (i = 0; i < QUAD_SIZE; i++) {
1076 if ((execmask & (1 << i)) == 0)
1077 index.i[i] = 0;
1078 }
1079 }
1080
1081 if( reg->SrcRegister.Dimension ) {
1082 switch( reg->SrcRegister.File ) {
1083 case TGSI_FILE_INPUT:
1084 index.i[0] *= 17;
1085 index.i[1] *= 17;
1086 index.i[2] *= 17;
1087 index.i[3] *= 17;
1088 break;
1089 case TGSI_FILE_CONSTANT:
1090 index.i[0] *= 4096;
1091 index.i[1] *= 4096;
1092 index.i[2] *= 4096;
1093 index.i[3] *= 4096;
1094 break;
1095 default:
1096 assert( 0 );
1097 }
1098
1099 index.i[0] += reg->SrcRegisterDim.Index;
1100 index.i[1] += reg->SrcRegisterDim.Index;
1101 index.i[2] += reg->SrcRegisterDim.Index;
1102 index.i[3] += reg->SrcRegisterDim.Index;
1103
1104 if (reg->SrcRegisterDim.Indirect) {
1105 union tgsi_exec_channel index2;
1106 union tgsi_exec_channel indir_index;
1107 const uint execmask = mach->ExecMask;
1108 uint i;
1109
1110 index2.i[0] =
1111 index2.i[1] =
1112 index2.i[2] =
1113 index2.i[3] = reg->SrcRegisterDimInd.Index;
1114
1115 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1116 fetch_src_file_channel(
1117 mach,
1118 reg->SrcRegisterDimInd.File,
1119 swizzle,
1120 &index2,
1121 &indir_index );
1122
1123 index.i[0] += (int) indir_index.f[0];
1124 index.i[1] += (int) indir_index.f[1];
1125 index.i[2] += (int) indir_index.f[2];
1126 index.i[3] += (int) indir_index.f[3];
1127
1128 /* for disabled execution channels, zero-out the index to
1129 * avoid using a potential garbage value.
1130 */
1131 for (i = 0; i < QUAD_SIZE; i++) {
1132 if ((execmask & (1 << i)) == 0)
1133 index.i[i] = 0;
1134 }
1135 }
1136 }
1137
1138 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1139 fetch_src_file_channel(
1140 mach,
1141 reg->SrcRegister.File,
1142 swizzle,
1143 &index,
1144 chan );
1145
1146 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1147 case TGSI_UTIL_SIGN_CLEAR:
1148 micro_abs( chan, chan );
1149 break;
1150
1151 case TGSI_UTIL_SIGN_SET:
1152 micro_abs( chan, chan );
1153 micro_neg( chan, chan );
1154 break;
1155
1156 case TGSI_UTIL_SIGN_TOGGLE:
1157 micro_neg( chan, chan );
1158 break;
1159
1160 case TGSI_UTIL_SIGN_KEEP:
1161 break;
1162 }
1163
1164 if (reg->SrcRegisterExtMod.Complement) {
1165 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1166 }
1167 }
1168
1169 static void
1170 store_dest(
1171 struct tgsi_exec_machine *mach,
1172 const union tgsi_exec_channel *chan,
1173 const struct tgsi_full_dst_register *reg,
1174 const struct tgsi_full_instruction *inst,
1175 uint chan_index )
1176 {
1177 uint i;
1178 union tgsi_exec_channel null;
1179 union tgsi_exec_channel *dst;
1180 uint execmask = mach->ExecMask;
1181
1182 switch (reg->DstRegister.File) {
1183 case TGSI_FILE_NULL:
1184 dst = &null;
1185 break;
1186
1187 case TGSI_FILE_OUTPUT:
1188 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1189 + reg->DstRegister.Index].xyzw[chan_index];
1190 break;
1191
1192 case TGSI_FILE_TEMPORARY:
1193 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1194 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1195 break;
1196
1197 case TGSI_FILE_ADDRESS:
1198 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1199 break;
1200
1201 default:
1202 assert( 0 );
1203 return;
1204 }
1205
1206 if (inst->InstructionExtNv.CondFlowEnable) {
1207 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1208 uint swizzle;
1209 uint shift;
1210 uint mask;
1211 uint test;
1212
1213 /* Only CC0 supported.
1214 */
1215 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1216
1217 switch (chan_index) {
1218 case CHAN_X:
1219 swizzle = inst->InstructionExtNv.CondSwizzleX;
1220 break;
1221 case CHAN_Y:
1222 swizzle = inst->InstructionExtNv.CondSwizzleY;
1223 break;
1224 case CHAN_Z:
1225 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1226 break;
1227 case CHAN_W:
1228 swizzle = inst->InstructionExtNv.CondSwizzleW;
1229 break;
1230 default:
1231 assert( 0 );
1232 return;
1233 }
1234
1235 switch (swizzle) {
1236 case TGSI_SWIZZLE_X:
1237 shift = TGSI_EXEC_CC_X_SHIFT;
1238 mask = TGSI_EXEC_CC_X_MASK;
1239 break;
1240 case TGSI_SWIZZLE_Y:
1241 shift = TGSI_EXEC_CC_Y_SHIFT;
1242 mask = TGSI_EXEC_CC_Y_MASK;
1243 break;
1244 case TGSI_SWIZZLE_Z:
1245 shift = TGSI_EXEC_CC_Z_SHIFT;
1246 mask = TGSI_EXEC_CC_Z_MASK;
1247 break;
1248 case TGSI_SWIZZLE_W:
1249 shift = TGSI_EXEC_CC_W_SHIFT;
1250 mask = TGSI_EXEC_CC_W_MASK;
1251 break;
1252 default:
1253 assert( 0 );
1254 return;
1255 }
1256
1257 switch (inst->InstructionExtNv.CondMask) {
1258 case TGSI_CC_GT:
1259 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1260 for (i = 0; i < QUAD_SIZE; i++)
1261 if (cc->u[i] & test)
1262 execmask &= ~(1 << i);
1263 break;
1264
1265 case TGSI_CC_EQ:
1266 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1267 for (i = 0; i < QUAD_SIZE; i++)
1268 if (cc->u[i] & test)
1269 execmask &= ~(1 << i);
1270 break;
1271
1272 case TGSI_CC_LT:
1273 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1274 for (i = 0; i < QUAD_SIZE; i++)
1275 if (cc->u[i] & test)
1276 execmask &= ~(1 << i);
1277 break;
1278
1279 case TGSI_CC_GE:
1280 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1281 for (i = 0; i < QUAD_SIZE; i++)
1282 if (cc->u[i] & test)
1283 execmask &= ~(1 << i);
1284 break;
1285
1286 case TGSI_CC_LE:
1287 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1288 for (i = 0; i < QUAD_SIZE; i++)
1289 if (cc->u[i] & test)
1290 execmask &= ~(1 << i);
1291 break;
1292
1293 case TGSI_CC_NE:
1294 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1295 for (i = 0; i < QUAD_SIZE; i++)
1296 if (cc->u[i] & test)
1297 execmask &= ~(1 << i);
1298 break;
1299
1300 case TGSI_CC_TR:
1301 break;
1302
1303 case TGSI_CC_FL:
1304 for (i = 0; i < QUAD_SIZE; i++)
1305 execmask &= ~(1 << i);
1306 break;
1307
1308 default:
1309 assert( 0 );
1310 return;
1311 }
1312 }
1313
1314 switch (inst->Instruction.Saturate) {
1315 case TGSI_SAT_NONE:
1316 for (i = 0; i < QUAD_SIZE; i++)
1317 if (execmask & (1 << i))
1318 dst->i[i] = chan->i[i];
1319 break;
1320
1321 case TGSI_SAT_ZERO_ONE:
1322 for (i = 0; i < QUAD_SIZE; i++)
1323 if (execmask & (1 << i)) {
1324 if (chan->f[i] < 0.0f)
1325 dst->f[i] = 0.0f;
1326 else if (chan->f[i] > 1.0f)
1327 dst->f[i] = 1.0f;
1328 else
1329 dst->i[i] = chan->i[i];
1330 }
1331 break;
1332
1333 case TGSI_SAT_MINUS_PLUS_ONE:
1334 for (i = 0; i < QUAD_SIZE; i++)
1335 if (execmask & (1 << i)) {
1336 if (chan->f[i] < -1.0f)
1337 dst->f[i] = -1.0f;
1338 else if (chan->f[i] > 1.0f)
1339 dst->f[i] = 1.0f;
1340 else
1341 dst->i[i] = chan->i[i];
1342 }
1343 break;
1344
1345 default:
1346 assert( 0 );
1347 }
1348
1349 if (inst->InstructionExtNv.CondDstUpdate) {
1350 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1351 uint shift;
1352 uint mask;
1353
1354 /* Only CC0 supported.
1355 */
1356 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1357
1358 switch (chan_index) {
1359 case CHAN_X:
1360 shift = TGSI_EXEC_CC_X_SHIFT;
1361 mask = ~TGSI_EXEC_CC_X_MASK;
1362 break;
1363 case CHAN_Y:
1364 shift = TGSI_EXEC_CC_Y_SHIFT;
1365 mask = ~TGSI_EXEC_CC_Y_MASK;
1366 break;
1367 case CHAN_Z:
1368 shift = TGSI_EXEC_CC_Z_SHIFT;
1369 mask = ~TGSI_EXEC_CC_Z_MASK;
1370 break;
1371 case CHAN_W:
1372 shift = TGSI_EXEC_CC_W_SHIFT;
1373 mask = ~TGSI_EXEC_CC_W_MASK;
1374 break;
1375 default:
1376 assert( 0 );
1377 return;
1378 }
1379
1380 for (i = 0; i < QUAD_SIZE; i++)
1381 if (execmask & (1 << i)) {
1382 cc->u[i] &= mask;
1383 if (dst->f[i] < 0.0f)
1384 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1385 else if (dst->f[i] > 0.0f)
1386 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1387 else if (dst->f[i] == 0.0f)
1388 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1389 else
1390 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1391 }
1392 }
1393 }
1394
1395 #define FETCH(VAL,INDEX,CHAN)\
1396 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1397
1398 #define STORE(VAL,INDEX,CHAN)\
1399 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1400
1401
1402 /**
1403 * Execute ARB-style KIL which is predicated by a src register.
1404 * Kill fragment if any of the four values is less than zero.
1405 */
1406 static void
1407 exec_kil(struct tgsi_exec_machine *mach,
1408 const struct tgsi_full_instruction *inst)
1409 {
1410 uint uniquemask;
1411 uint chan_index;
1412 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1413 union tgsi_exec_channel r[1];
1414
1415 /* This mask stores component bits that were already tested. Note that
1416 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1417 * tested. */
1418 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1419
1420 for (chan_index = 0; chan_index < 4; chan_index++)
1421 {
1422 uint swizzle;
1423 uint i;
1424
1425 /* unswizzle channel */
1426 swizzle = tgsi_util_get_full_src_register_extswizzle (
1427 &inst->FullSrcRegisters[0],
1428 chan_index);
1429
1430 /* check if the component has not been already tested */
1431 if (uniquemask & (1 << swizzle))
1432 continue;
1433 uniquemask |= 1 << swizzle;
1434
1435 FETCH(&r[0], 0, chan_index);
1436 for (i = 0; i < 4; i++)
1437 if (r[0].f[i] < 0.0f)
1438 kilmask |= 1 << i;
1439 }
1440
1441 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1442 }
1443
1444 /**
1445 * Execute NVIDIA-style KIL which is predicated by a condition code.
1446 * Kill fragment if the condition code is TRUE.
1447 */
1448 static void
1449 exec_kilp(struct tgsi_exec_machine *mach,
1450 const struct tgsi_full_instruction *inst)
1451 {
1452 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1453
1454 if (inst->InstructionExtNv.CondFlowEnable) {
1455 uint swizzle[4];
1456 uint chan_index;
1457
1458 kilmask = 0x0;
1459
1460 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1461 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1462 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1463 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1464
1465 for (chan_index = 0; chan_index < 4; chan_index++)
1466 {
1467 uint i;
1468
1469 for (i = 0; i < 4; i++) {
1470 /* TODO: evaluate the condition code */
1471 if (0)
1472 kilmask |= 1 << i;
1473 }
1474 }
1475 }
1476 else {
1477 /* "unconditional" kil */
1478 kilmask = mach->ExecMask;
1479 }
1480 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1481 }
1482
1483
1484 /*
1485 * Fetch a texel using STR texture coordinates.
1486 */
1487 static void
1488 fetch_texel( struct tgsi_sampler *sampler,
1489 const union tgsi_exec_channel *s,
1490 const union tgsi_exec_channel *t,
1491 const union tgsi_exec_channel *p,
1492 float lodbias, /* XXX should be float[4] */
1493 union tgsi_exec_channel *r,
1494 union tgsi_exec_channel *g,
1495 union tgsi_exec_channel *b,
1496 union tgsi_exec_channel *a )
1497 {
1498 uint j;
1499 float rgba[NUM_CHANNELS][QUAD_SIZE];
1500
1501 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1502
1503 for (j = 0; j < 4; j++) {
1504 r->f[j] = rgba[0][j];
1505 g->f[j] = rgba[1][j];
1506 b->f[j] = rgba[2][j];
1507 a->f[j] = rgba[3][j];
1508 }
1509 }
1510
1511
1512 static void
1513 exec_tex(struct tgsi_exec_machine *mach,
1514 const struct tgsi_full_instruction *inst,
1515 boolean biasLod,
1516 boolean projected)
1517 {
1518 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1519 union tgsi_exec_channel r[8];
1520 uint chan_index;
1521 float lodBias;
1522
1523 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1524
1525 switch (inst->InstructionExtTexture.Texture) {
1526 case TGSI_TEXTURE_1D:
1527
1528 FETCH(&r[0], 0, CHAN_X);
1529
1530 if (projected) {
1531 FETCH(&r[1], 0, CHAN_W);
1532 micro_div( &r[0], &r[0], &r[1] );
1533 }
1534
1535 if (biasLod) {
1536 FETCH(&r[1], 0, CHAN_W);
1537 lodBias = r[2].f[0];
1538 }
1539 else
1540 lodBias = 0.0;
1541
1542 fetch_texel(&mach->Samplers[unit],
1543 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1544 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1545 break;
1546
1547 case TGSI_TEXTURE_2D:
1548 case TGSI_TEXTURE_RECT:
1549
1550 FETCH(&r[0], 0, CHAN_X);
1551 FETCH(&r[1], 0, CHAN_Y);
1552 FETCH(&r[2], 0, CHAN_Z);
1553
1554 if (projected) {
1555 FETCH(&r[3], 0, CHAN_W);
1556 micro_div( &r[0], &r[0], &r[3] );
1557 micro_div( &r[1], &r[1], &r[3] );
1558 micro_div( &r[2], &r[2], &r[3] );
1559 }
1560
1561 if (biasLod) {
1562 FETCH(&r[3], 0, CHAN_W);
1563 lodBias = r[3].f[0];
1564 }
1565 else
1566 lodBias = 0.0;
1567
1568 fetch_texel(&mach->Samplers[unit],
1569 &r[0], &r[1], &r[2], lodBias, /* inputs */
1570 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1571 break;
1572
1573 case TGSI_TEXTURE_3D:
1574 case TGSI_TEXTURE_CUBE:
1575
1576 FETCH(&r[0], 0, CHAN_X);
1577 FETCH(&r[1], 0, CHAN_Y);
1578 FETCH(&r[2], 0, CHAN_Z);
1579
1580 if (projected) {
1581 FETCH(&r[3], 0, CHAN_W);
1582 micro_div( &r[0], &r[0], &r[3] );
1583 micro_div( &r[1], &r[1], &r[3] );
1584 micro_div( &r[2], &r[2], &r[3] );
1585 }
1586
1587 if (biasLod) {
1588 FETCH(&r[3], 0, CHAN_W);
1589 lodBias = r[3].f[0];
1590 }
1591 else
1592 lodBias = 0.0;
1593
1594 fetch_texel(&mach->Samplers[unit],
1595 &r[0], &r[1], &r[2], lodBias,
1596 &r[0], &r[1], &r[2], &r[3]);
1597 break;
1598
1599 default:
1600 assert (0);
1601 }
1602
1603 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1604 STORE( &r[chan_index], 0, chan_index );
1605 }
1606 }
1607
1608
1609 /**
1610 * Evaluate a constant-valued coefficient at the position of the
1611 * current quad.
1612 */
1613 static void
1614 eval_constant_coef(
1615 struct tgsi_exec_machine *mach,
1616 unsigned attrib,
1617 unsigned chan )
1618 {
1619 unsigned i;
1620
1621 for( i = 0; i < QUAD_SIZE; i++ ) {
1622 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1623 }
1624 }
1625
1626 /**
1627 * Evaluate a linear-valued coefficient at the position of the
1628 * current quad.
1629 */
1630 static void
1631 eval_linear_coef(
1632 struct tgsi_exec_machine *mach,
1633 unsigned attrib,
1634 unsigned chan )
1635 {
1636 const float x = mach->QuadPos.xyzw[0].f[0];
1637 const float y = mach->QuadPos.xyzw[1].f[0];
1638 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1639 const float dady = mach->InterpCoefs[attrib].dady[chan];
1640 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1641 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1642 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1643 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1644 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1645 }
1646
1647 /**
1648 * Evaluate a perspective-valued coefficient at the position of the
1649 * current quad.
1650 */
1651 static void
1652 eval_perspective_coef(
1653 struct tgsi_exec_machine *mach,
1654 unsigned attrib,
1655 unsigned chan )
1656 {
1657 const float x = mach->QuadPos.xyzw[0].f[0];
1658 const float y = mach->QuadPos.xyzw[1].f[0];
1659 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1660 const float dady = mach->InterpCoefs[attrib].dady[chan];
1661 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1662 const float *w = mach->QuadPos.xyzw[3].f;
1663 /* divide by W here */
1664 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1665 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1666 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1667 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1668 }
1669
1670
1671 typedef void (* eval_coef_func)(
1672 struct tgsi_exec_machine *mach,
1673 unsigned attrib,
1674 unsigned chan );
1675
1676 static void
1677 exec_declaration(
1678 struct tgsi_exec_machine *mach,
1679 const struct tgsi_full_declaration *decl )
1680 {
1681 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1682 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1683 unsigned first, last, mask;
1684 eval_coef_func eval;
1685
1686 first = decl->DeclarationRange.First;
1687 last = decl->DeclarationRange.Last;
1688 mask = decl->Declaration.UsageMask;
1689
1690 switch( decl->Declaration.Interpolate ) {
1691 case TGSI_INTERPOLATE_CONSTANT:
1692 eval = eval_constant_coef;
1693 break;
1694
1695 case TGSI_INTERPOLATE_LINEAR:
1696 eval = eval_linear_coef;
1697 break;
1698
1699 case TGSI_INTERPOLATE_PERSPECTIVE:
1700 eval = eval_perspective_coef;
1701 break;
1702
1703 default:
1704 assert( 0 );
1705 }
1706
1707 if( mask == TGSI_WRITEMASK_XYZW ) {
1708 unsigned i, j;
1709
1710 for( i = first; i <= last; i++ ) {
1711 for( j = 0; j < NUM_CHANNELS; j++ ) {
1712 eval( mach, i, j );
1713 }
1714 }
1715 }
1716 else {
1717 unsigned i, j;
1718
1719 for( j = 0; j < NUM_CHANNELS; j++ ) {
1720 if( mask & (1 << j) ) {
1721 for( i = first; i <= last; i++ ) {
1722 eval( mach, i, j );
1723 }
1724 }
1725 }
1726 }
1727 }
1728 }
1729 }
1730
1731 static void
1732 exec_instruction(
1733 struct tgsi_exec_machine *mach,
1734 const struct tgsi_full_instruction *inst,
1735 int *pc )
1736 {
1737 uint chan_index;
1738 union tgsi_exec_channel r[8];
1739
1740 (*pc)++;
1741
1742 switch (inst->Instruction.Opcode) {
1743 case TGSI_OPCODE_ARL:
1744 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1745 FETCH( &r[0], 0, chan_index );
1746 micro_trunc( &r[0], &r[0] );
1747 STORE( &r[0], 0, chan_index );
1748 }
1749 break;
1750
1751 case TGSI_OPCODE_MOV:
1752 case TGSI_OPCODE_SWZ:
1753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1754 FETCH( &r[0], 0, chan_index );
1755 STORE( &r[0], 0, chan_index );
1756 }
1757 break;
1758
1759 case TGSI_OPCODE_LIT:
1760 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1761 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1762 }
1763
1764 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1765 FETCH( &r[0], 0, CHAN_X );
1766 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1767 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1768 STORE( &r[0], 0, CHAN_Y );
1769 }
1770
1771 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1772 FETCH( &r[1], 0, CHAN_Y );
1773 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1774
1775 FETCH( &r[2], 0, CHAN_W );
1776 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1777 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1778 micro_pow( &r[1], &r[1], &r[2] );
1779 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1780 STORE( &r[0], 0, CHAN_Z );
1781 }
1782 }
1783
1784 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1785 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1786 }
1787 break;
1788
1789 case TGSI_OPCODE_RCP:
1790 /* TGSI_OPCODE_RECIP */
1791 FETCH( &r[0], 0, CHAN_X );
1792 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1794 STORE( &r[0], 0, chan_index );
1795 }
1796 break;
1797
1798 case TGSI_OPCODE_RSQ:
1799 /* TGSI_OPCODE_RECIPSQRT */
1800 FETCH( &r[0], 0, CHAN_X );
1801 micro_sqrt( &r[0], &r[0] );
1802 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1803 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1804 STORE( &r[0], 0, chan_index );
1805 }
1806 break;
1807
1808 case TGSI_OPCODE_EXP:
1809 FETCH( &r[0], 0, CHAN_X );
1810 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1811 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1812 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1813 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1814 }
1815 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1816 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1817 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1818 }
1819 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1820 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1821 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1822 }
1823 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1824 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1825 }
1826 break;
1827
1828 case TGSI_OPCODE_LOG:
1829 FETCH( &r[0], 0, CHAN_X );
1830 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1831 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1832 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1833 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1834 STORE( &r[0], 0, CHAN_X );
1835 }
1836 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1837 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1838 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1839 STORE( &r[0], 0, CHAN_Y );
1840 }
1841 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1842 STORE( &r[1], 0, CHAN_Z );
1843 }
1844 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1845 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1846 }
1847 break;
1848
1849 case TGSI_OPCODE_MUL:
1850 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1851 {
1852 FETCH(&r[0], 0, chan_index);
1853 FETCH(&r[1], 1, chan_index);
1854
1855 micro_mul( &r[0], &r[0], &r[1] );
1856
1857 STORE(&r[0], 0, chan_index);
1858 }
1859 break;
1860
1861 case TGSI_OPCODE_ADD:
1862 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1863 FETCH( &r[0], 0, chan_index );
1864 FETCH( &r[1], 1, chan_index );
1865 micro_add( &r[0], &r[0], &r[1] );
1866 STORE( &r[0], 0, chan_index );
1867 }
1868 break;
1869
1870 case TGSI_OPCODE_DP3:
1871 /* TGSI_OPCODE_DOT3 */
1872 FETCH( &r[0], 0, CHAN_X );
1873 FETCH( &r[1], 1, CHAN_X );
1874 micro_mul( &r[0], &r[0], &r[1] );
1875
1876 FETCH( &r[1], 0, CHAN_Y );
1877 FETCH( &r[2], 1, CHAN_Y );
1878 micro_mul( &r[1], &r[1], &r[2] );
1879 micro_add( &r[0], &r[0], &r[1] );
1880
1881 FETCH( &r[1], 0, CHAN_Z );
1882 FETCH( &r[2], 1, CHAN_Z );
1883 micro_mul( &r[1], &r[1], &r[2] );
1884 micro_add( &r[0], &r[0], &r[1] );
1885
1886 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1887 STORE( &r[0], 0, chan_index );
1888 }
1889 break;
1890
1891 case TGSI_OPCODE_DP4:
1892 /* TGSI_OPCODE_DOT4 */
1893 FETCH(&r[0], 0, CHAN_X);
1894 FETCH(&r[1], 1, CHAN_X);
1895
1896 micro_mul( &r[0], &r[0], &r[1] );
1897
1898 FETCH(&r[1], 0, CHAN_Y);
1899 FETCH(&r[2], 1, CHAN_Y);
1900
1901 micro_mul( &r[1], &r[1], &r[2] );
1902 micro_add( &r[0], &r[0], &r[1] );
1903
1904 FETCH(&r[1], 0, CHAN_Z);
1905 FETCH(&r[2], 1, CHAN_Z);
1906
1907 micro_mul( &r[1], &r[1], &r[2] );
1908 micro_add( &r[0], &r[0], &r[1] );
1909
1910 FETCH(&r[1], 0, CHAN_W);
1911 FETCH(&r[2], 1, CHAN_W);
1912
1913 micro_mul( &r[1], &r[1], &r[2] );
1914 micro_add( &r[0], &r[0], &r[1] );
1915
1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917 STORE( &r[0], 0, chan_index );
1918 }
1919 break;
1920
1921 case TGSI_OPCODE_DST:
1922 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1923 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1924 }
1925
1926 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1927 FETCH( &r[0], 0, CHAN_Y );
1928 FETCH( &r[1], 1, CHAN_Y);
1929 micro_mul( &r[0], &r[0], &r[1] );
1930 STORE( &r[0], 0, CHAN_Y );
1931 }
1932
1933 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1934 FETCH( &r[0], 0, CHAN_Z );
1935 STORE( &r[0], 0, CHAN_Z );
1936 }
1937
1938 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1939 FETCH( &r[0], 1, CHAN_W );
1940 STORE( &r[0], 0, CHAN_W );
1941 }
1942 break;
1943
1944 case TGSI_OPCODE_MIN:
1945 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1946 FETCH(&r[0], 0, chan_index);
1947 FETCH(&r[1], 1, chan_index);
1948
1949 /* XXX use micro_min()?? */
1950 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1951
1952 STORE(&r[0], 0, chan_index);
1953 }
1954 break;
1955
1956 case TGSI_OPCODE_MAX:
1957 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1958 FETCH(&r[0], 0, chan_index);
1959 FETCH(&r[1], 1, chan_index);
1960
1961 /* XXX use micro_max()?? */
1962 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1963
1964 STORE(&r[0], 0, chan_index );
1965 }
1966 break;
1967
1968 case TGSI_OPCODE_SLT:
1969 /* TGSI_OPCODE_SETLT */
1970 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1971 FETCH( &r[0], 0, chan_index );
1972 FETCH( &r[1], 1, chan_index );
1973 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1974 STORE( &r[0], 0, chan_index );
1975 }
1976 break;
1977
1978 case TGSI_OPCODE_SGE:
1979 /* TGSI_OPCODE_SETGE */
1980 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1981 FETCH( &r[0], 0, chan_index );
1982 FETCH( &r[1], 1, chan_index );
1983 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1984 STORE( &r[0], 0, chan_index );
1985 }
1986 break;
1987
1988 case TGSI_OPCODE_MAD:
1989 /* TGSI_OPCODE_MADD */
1990 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1991 FETCH( &r[0], 0, chan_index );
1992 FETCH( &r[1], 1, chan_index );
1993 micro_mul( &r[0], &r[0], &r[1] );
1994 FETCH( &r[1], 2, chan_index );
1995 micro_add( &r[0], &r[0], &r[1] );
1996 STORE( &r[0], 0, chan_index );
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_SUB:
2001 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2002 FETCH(&r[0], 0, chan_index);
2003 FETCH(&r[1], 1, chan_index);
2004
2005 micro_sub( &r[0], &r[0], &r[1] );
2006
2007 STORE(&r[0], 0, chan_index);
2008 }
2009 break;
2010
2011 case TGSI_OPCODE_LERP:
2012 /* TGSI_OPCODE_LRP */
2013 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2014 FETCH(&r[0], 0, chan_index);
2015 FETCH(&r[1], 1, chan_index);
2016 FETCH(&r[2], 2, chan_index);
2017
2018 micro_sub( &r[1], &r[1], &r[2] );
2019 micro_mul( &r[0], &r[0], &r[1] );
2020 micro_add( &r[0], &r[0], &r[2] );
2021
2022 STORE(&r[0], 0, chan_index);
2023 }
2024 break;
2025
2026 case TGSI_OPCODE_CND:
2027 assert (0);
2028 break;
2029
2030 case TGSI_OPCODE_CND0:
2031 assert (0);
2032 break;
2033
2034 case TGSI_OPCODE_DOT2ADD:
2035 /* TGSI_OPCODE_DP2A */
2036 assert (0);
2037 break;
2038
2039 case TGSI_OPCODE_INDEX:
2040 assert (0);
2041 break;
2042
2043 case TGSI_OPCODE_NEGATE:
2044 assert (0);
2045 break;
2046
2047 case TGSI_OPCODE_FRAC:
2048 /* TGSI_OPCODE_FRC */
2049 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2050 FETCH( &r[0], 0, chan_index );
2051 micro_frc( &r[0], &r[0] );
2052 STORE( &r[0], 0, chan_index );
2053 }
2054 break;
2055
2056 case TGSI_OPCODE_CLAMP:
2057 assert (0);
2058 break;
2059
2060 case TGSI_OPCODE_FLOOR:
2061 /* TGSI_OPCODE_FLR */
2062 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2063 FETCH( &r[0], 0, chan_index );
2064 micro_flr( &r[0], &r[0] );
2065 STORE( &r[0], 0, chan_index );
2066 }
2067 break;
2068
2069 case TGSI_OPCODE_ROUND:
2070 case TGSI_OPCODE_ARR:
2071 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2072 FETCH( &r[0], 0, chan_index );
2073 micro_rnd( &r[0], &r[0] );
2074 STORE( &r[0], 0, chan_index );
2075 }
2076 break;
2077
2078 case TGSI_OPCODE_EXPBASE2:
2079 /* TGSI_OPCODE_EX2 */
2080 FETCH(&r[0], 0, CHAN_X);
2081
2082 #if FAST_MATH
2083 micro_exp2( &r[0], &r[0] );
2084 #else
2085 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2086 #endif
2087
2088 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2089 STORE( &r[0], 0, chan_index );
2090 }
2091 break;
2092
2093 case TGSI_OPCODE_LOGBASE2:
2094 /* TGSI_OPCODE_LG2 */
2095 FETCH( &r[0], 0, CHAN_X );
2096 micro_lg2( &r[0], &r[0] );
2097 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2098 STORE( &r[0], 0, chan_index );
2099 }
2100 break;
2101
2102 case TGSI_OPCODE_POWER:
2103 /* TGSI_OPCODE_POW */
2104 FETCH(&r[0], 0, CHAN_X);
2105 FETCH(&r[1], 1, CHAN_X);
2106
2107 micro_pow( &r[0], &r[0], &r[1] );
2108
2109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2110 STORE( &r[0], 0, chan_index );
2111 }
2112 break;
2113
2114 case TGSI_OPCODE_CROSSPRODUCT:
2115 /* TGSI_OPCODE_XPD */
2116 FETCH(&r[0], 0, CHAN_Y);
2117 FETCH(&r[1], 1, CHAN_Z);
2118
2119 micro_mul( &r[2], &r[0], &r[1] );
2120
2121 FETCH(&r[3], 0, CHAN_Z);
2122 FETCH(&r[4], 1, CHAN_Y);
2123
2124 micro_mul( &r[5], &r[3], &r[4] );
2125 micro_sub( &r[2], &r[2], &r[5] );
2126
2127 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2128 STORE( &r[2], 0, CHAN_X );
2129 }
2130
2131 FETCH(&r[2], 1, CHAN_X);
2132
2133 micro_mul( &r[3], &r[3], &r[2] );
2134
2135 FETCH(&r[5], 0, CHAN_X);
2136
2137 micro_mul( &r[1], &r[1], &r[5] );
2138 micro_sub( &r[3], &r[3], &r[1] );
2139
2140 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2141 STORE( &r[3], 0, CHAN_Y );
2142 }
2143
2144 micro_mul( &r[5], &r[5], &r[4] );
2145 micro_mul( &r[0], &r[0], &r[2] );
2146 micro_sub( &r[5], &r[5], &r[0] );
2147
2148 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2149 STORE( &r[5], 0, CHAN_Z );
2150 }
2151
2152 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2153 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2154 }
2155 break;
2156
2157 case TGSI_OPCODE_MULTIPLYMATRIX:
2158 assert (0);
2159 break;
2160
2161 case TGSI_OPCODE_ABS:
2162 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2163 FETCH(&r[0], 0, chan_index);
2164
2165 micro_abs( &r[0], &r[0] );
2166
2167 STORE(&r[0], 0, chan_index);
2168 }
2169 break;
2170
2171 case TGSI_OPCODE_RCC:
2172 assert (0);
2173 break;
2174
2175 case TGSI_OPCODE_DPH:
2176 FETCH(&r[0], 0, CHAN_X);
2177 FETCH(&r[1], 1, CHAN_X);
2178
2179 micro_mul( &r[0], &r[0], &r[1] );
2180
2181 FETCH(&r[1], 0, CHAN_Y);
2182 FETCH(&r[2], 1, CHAN_Y);
2183
2184 micro_mul( &r[1], &r[1], &r[2] );
2185 micro_add( &r[0], &r[0], &r[1] );
2186
2187 FETCH(&r[1], 0, CHAN_Z);
2188 FETCH(&r[2], 1, CHAN_Z);
2189
2190 micro_mul( &r[1], &r[1], &r[2] );
2191 micro_add( &r[0], &r[0], &r[1] );
2192
2193 FETCH(&r[1], 1, CHAN_W);
2194
2195 micro_add( &r[0], &r[0], &r[1] );
2196
2197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2198 STORE( &r[0], 0, chan_index );
2199 }
2200 break;
2201
2202 case TGSI_OPCODE_COS:
2203 FETCH(&r[0], 0, CHAN_X);
2204
2205 micro_cos( &r[0], &r[0] );
2206
2207 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2208 STORE( &r[0], 0, chan_index );
2209 }
2210 break;
2211
2212 case TGSI_OPCODE_DDX:
2213 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2214 FETCH( &r[0], 0, chan_index );
2215 micro_ddx( &r[0], &r[0] );
2216 STORE( &r[0], 0, chan_index );
2217 }
2218 break;
2219
2220 case TGSI_OPCODE_DDY:
2221 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2222 FETCH( &r[0], 0, chan_index );
2223 micro_ddy( &r[0], &r[0] );
2224 STORE( &r[0], 0, chan_index );
2225 }
2226 break;
2227
2228 case TGSI_OPCODE_KILP:
2229 exec_kilp (mach, inst);
2230 break;
2231
2232 case TGSI_OPCODE_KIL:
2233 exec_kil (mach, inst);
2234 break;
2235
2236 case TGSI_OPCODE_PK2H:
2237 assert (0);
2238 break;
2239
2240 case TGSI_OPCODE_PK2US:
2241 assert (0);
2242 break;
2243
2244 case TGSI_OPCODE_PK4B:
2245 assert (0);
2246 break;
2247
2248 case TGSI_OPCODE_PK4UB:
2249 assert (0);
2250 break;
2251
2252 case TGSI_OPCODE_RFL:
2253 assert (0);
2254 break;
2255
2256 case TGSI_OPCODE_SEQ:
2257 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2258 FETCH( &r[0], 0, chan_index );
2259 FETCH( &r[1], 1, chan_index );
2260 micro_eq( &r[0], &r[0], &r[1],
2261 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2262 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2263 STORE( &r[0], 0, chan_index );
2264 }
2265 break;
2266
2267 case TGSI_OPCODE_SFL:
2268 assert (0);
2269 break;
2270
2271 case TGSI_OPCODE_SGT:
2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2273 FETCH( &r[0], 0, chan_index );
2274 FETCH( &r[1], 1, chan_index );
2275 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2276 STORE( &r[0], 0, chan_index );
2277 }
2278 break;
2279
2280 case TGSI_OPCODE_SIN:
2281 FETCH( &r[0], 0, CHAN_X );
2282 micro_sin( &r[0], &r[0] );
2283 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2284 STORE( &r[0], 0, chan_index );
2285 }
2286 break;
2287
2288 case TGSI_OPCODE_SLE:
2289 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2290 FETCH( &r[0], 0, chan_index );
2291 FETCH( &r[1], 1, chan_index );
2292 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2293 STORE( &r[0], 0, chan_index );
2294 }
2295 break;
2296
2297 case TGSI_OPCODE_SNE:
2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2299 FETCH( &r[0], 0, chan_index );
2300 FETCH( &r[1], 1, chan_index );
2301 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2302 STORE( &r[0], 0, chan_index );
2303 }
2304 break;
2305
2306 case TGSI_OPCODE_STR:
2307 assert (0);
2308 break;
2309
2310 case TGSI_OPCODE_TEX:
2311 /* simple texture lookup */
2312 /* src[0] = texcoord */
2313 /* src[1] = sampler unit */
2314 exec_tex(mach, inst, FALSE, FALSE);
2315 break;
2316
2317 case TGSI_OPCODE_TXB:
2318 /* Texture lookup with lod bias */
2319 /* src[0] = texcoord (src[0].w = LOD bias) */
2320 /* src[1] = sampler unit */
2321 exec_tex(mach, inst, TRUE, FALSE);
2322 break;
2323
2324 case TGSI_OPCODE_TXD:
2325 /* Texture lookup with explict partial derivatives */
2326 /* src[0] = texcoord */
2327 /* src[1] = d[strq]/dx */
2328 /* src[2] = d[strq]/dy */
2329 /* src[3] = sampler unit */
2330 assert (0);
2331 break;
2332
2333 case TGSI_OPCODE_TXL:
2334 /* Texture lookup with explit LOD */
2335 /* src[0] = texcoord (src[0].w = LOD) */
2336 /* src[1] = sampler unit */
2337 exec_tex(mach, inst, TRUE, FALSE);
2338 break;
2339
2340 case TGSI_OPCODE_TXP:
2341 /* Texture lookup with projection */
2342 /* src[0] = texcoord (src[0].w = projection) */
2343 /* src[1] = sampler unit */
2344 exec_tex(mach, inst, FALSE, TRUE);
2345 break;
2346
2347 case TGSI_OPCODE_UP2H:
2348 assert (0);
2349 break;
2350
2351 case TGSI_OPCODE_UP2US:
2352 assert (0);
2353 break;
2354
2355 case TGSI_OPCODE_UP4B:
2356 assert (0);
2357 break;
2358
2359 case TGSI_OPCODE_UP4UB:
2360 assert (0);
2361 break;
2362
2363 case TGSI_OPCODE_X2D:
2364 assert (0);
2365 break;
2366
2367 case TGSI_OPCODE_ARA:
2368 assert (0);
2369 break;
2370
2371 case TGSI_OPCODE_BRA:
2372 assert (0);
2373 break;
2374
2375 case TGSI_OPCODE_CAL:
2376 /* skip the call if no execution channels are enabled */
2377 if (mach->ExecMask) {
2378 /* do the call */
2379
2380 /* push the Cond, Loop, Cont stacks */
2381 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2382 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2383 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2384 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2385 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2386 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2387
2388 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2389 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2390
2391 /* note that PC was already incremented above */
2392 mach->CallStack[mach->CallStackTop++] = *pc;
2393 *pc = inst->InstructionExtLabel.Label;
2394 }
2395 break;
2396
2397 case TGSI_OPCODE_RET:
2398 mach->FuncMask &= ~mach->ExecMask;
2399 UPDATE_EXEC_MASK(mach);
2400
2401 if (mach->FuncMask == 0x0) {
2402 /* really return now (otherwise, keep executing */
2403
2404 if (mach->CallStackTop == 0) {
2405 /* returning from main() */
2406 *pc = -1;
2407 return;
2408 }
2409 *pc = mach->CallStack[--mach->CallStackTop];
2410
2411 /* pop the Cond, Loop, Cont stacks */
2412 assert(mach->CondStackTop > 0);
2413 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2414 assert(mach->LoopStackTop > 0);
2415 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2416 assert(mach->ContStackTop > 0);
2417 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2418 assert(mach->FuncStackTop > 0);
2419 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2420
2421 UPDATE_EXEC_MASK(mach);
2422 }
2423 break;
2424
2425 case TGSI_OPCODE_SSG:
2426 /* TGSI_OPCODE_SGN */
2427 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2428 FETCH( &r[0], 0, chan_index );
2429 micro_sgn( &r[0], &r[0] );
2430 STORE( &r[0], 0, chan_index );
2431 }
2432 break;
2433
2434 case TGSI_OPCODE_CMP:
2435 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2436 FETCH(&r[0], 0, chan_index);
2437 FETCH(&r[1], 1, chan_index);
2438 FETCH(&r[2], 2, chan_index);
2439
2440 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2441
2442 STORE(&r[0], 0, chan_index);
2443 }
2444 break;
2445
2446 case TGSI_OPCODE_SCS:
2447 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2448 FETCH( &r[0], 0, CHAN_X );
2449 }
2450 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2451 micro_cos( &r[1], &r[0] );
2452 STORE( &r[1], 0, CHAN_X );
2453 }
2454 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2455 micro_sin( &r[1], &r[0] );
2456 STORE( &r[1], 0, CHAN_Y );
2457 }
2458 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2459 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2460 }
2461 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2462 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2463 }
2464 break;
2465
2466 case TGSI_OPCODE_NRM:
2467 /* 3-component vector normalize */
2468 {
2469 union tgsi_exec_channel tmp, dot;
2470
2471 /* tmp = dp3(src0, src0): */
2472 FETCH( &r[0], 0, CHAN_X );
2473 micro_mul( &tmp, &r[0], &r[0] );
2474
2475 FETCH( &r[1], 0, CHAN_Y );
2476 micro_mul( &dot, &r[1], &r[1] );
2477 micro_add( &tmp, &tmp, &dot );
2478
2479 FETCH( &r[2], 0, CHAN_Z );
2480 micro_mul( &dot, &r[2], &r[2] );
2481 micro_add( &tmp, &tmp, &dot );
2482
2483 /* tmp = 1 / tmp */
2484 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2485
2486 /* note: w channel is undefined */
2487 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2488 /* chan = chan * tmp */
2489 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2490 STORE( &r[chan_index], 0, chan_index );
2491 }
2492 }
2493 break;
2494
2495 case TGSI_OPCODE_NRM4:
2496 /* 4-component vector normalize */
2497 {
2498 union tgsi_exec_channel tmp, dot;
2499
2500 /* tmp = dp4(src0, src0): */
2501 FETCH( &r[0], 0, CHAN_X );
2502 micro_mul( &tmp, &r[0], &r[0] );
2503
2504 FETCH( &r[1], 0, CHAN_Y );
2505 micro_mul( &dot, &r[1], &r[1] );
2506 micro_add( &tmp, &tmp, &dot );
2507
2508 FETCH( &r[2], 0, CHAN_Z );
2509 micro_mul( &dot, &r[2], &r[2] );
2510 micro_add( &tmp, &tmp, &dot );
2511
2512 FETCH( &r[3], 0, CHAN_W );
2513 micro_mul( &dot, &r[3], &r[3] );
2514 micro_add( &tmp, &tmp, &dot );
2515
2516 /* tmp = 1 / tmp */
2517 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2518
2519 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2520 /* chan = chan * tmp */
2521 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2522 STORE( &r[chan_index], 0, chan_index );
2523 }
2524 }
2525 break;
2526
2527 case TGSI_OPCODE_DIV:
2528 assert( 0 );
2529 break;
2530
2531 case TGSI_OPCODE_DP2:
2532 FETCH( &r[0], 0, CHAN_X );
2533 FETCH( &r[1], 1, CHAN_X );
2534 micro_mul( &r[0], &r[0], &r[1] );
2535
2536 FETCH( &r[1], 0, CHAN_Y );
2537 FETCH( &r[2], 1, CHAN_Y );
2538 micro_mul( &r[1], &r[1], &r[2] );
2539 micro_add( &r[0], &r[0], &r[1] );
2540
2541 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2542 STORE( &r[0], 0, chan_index );
2543 }
2544 break;
2545
2546 case TGSI_OPCODE_IF:
2547 /* push CondMask */
2548 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2549 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2550 FETCH( &r[0], 0, CHAN_X );
2551 /* update CondMask */
2552 if( ! r[0].u[0] ) {
2553 mach->CondMask &= ~0x1;
2554 }
2555 if( ! r[0].u[1] ) {
2556 mach->CondMask &= ~0x2;
2557 }
2558 if( ! r[0].u[2] ) {
2559 mach->CondMask &= ~0x4;
2560 }
2561 if( ! r[0].u[3] ) {
2562 mach->CondMask &= ~0x8;
2563 }
2564 UPDATE_EXEC_MASK(mach);
2565 /* Todo: If CondMask==0, jump to ELSE */
2566 break;
2567
2568 case TGSI_OPCODE_ELSE:
2569 /* invert CondMask wrt previous mask */
2570 {
2571 uint prevMask;
2572 assert(mach->CondStackTop > 0);
2573 prevMask = mach->CondStack[mach->CondStackTop - 1];
2574 mach->CondMask = ~mach->CondMask & prevMask;
2575 UPDATE_EXEC_MASK(mach);
2576 /* Todo: If CondMask==0, jump to ENDIF */
2577 }
2578 break;
2579
2580 case TGSI_OPCODE_ENDIF:
2581 /* pop CondMask */
2582 assert(mach->CondStackTop > 0);
2583 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2584 UPDATE_EXEC_MASK(mach);
2585 break;
2586
2587 case TGSI_OPCODE_END:
2588 /* halt execution */
2589 *pc = -1;
2590 break;
2591
2592 case TGSI_OPCODE_REP:
2593 assert (0);
2594 break;
2595
2596 case TGSI_OPCODE_ENDREP:
2597 assert (0);
2598 break;
2599
2600 case TGSI_OPCODE_PUSHA:
2601 assert (0);
2602 break;
2603
2604 case TGSI_OPCODE_POPA:
2605 assert (0);
2606 break;
2607
2608 case TGSI_OPCODE_CEIL:
2609 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2610 FETCH( &r[0], 0, chan_index );
2611 micro_ceil( &r[0], &r[0] );
2612 STORE( &r[0], 0, chan_index );
2613 }
2614 break;
2615
2616 case TGSI_OPCODE_I2F:
2617 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2618 FETCH( &r[0], 0, chan_index );
2619 micro_i2f( &r[0], &r[0] );
2620 STORE( &r[0], 0, chan_index );
2621 }
2622 break;
2623
2624 case TGSI_OPCODE_NOT:
2625 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2626 FETCH( &r[0], 0, chan_index );
2627 micro_not( &r[0], &r[0] );
2628 STORE( &r[0], 0, chan_index );
2629 }
2630 break;
2631
2632 case TGSI_OPCODE_TRUNC:
2633 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2634 FETCH( &r[0], 0, chan_index );
2635 micro_trunc( &r[0], &r[0] );
2636 STORE( &r[0], 0, chan_index );
2637 }
2638 break;
2639
2640 case TGSI_OPCODE_SHL:
2641 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2642 FETCH( &r[0], 0, chan_index );
2643 FETCH( &r[1], 1, chan_index );
2644 micro_shl( &r[0], &r[0], &r[1] );
2645 STORE( &r[0], 0, chan_index );
2646 }
2647 break;
2648
2649 case TGSI_OPCODE_SHR:
2650 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2651 FETCH( &r[0], 0, chan_index );
2652 FETCH( &r[1], 1, chan_index );
2653 micro_ishr( &r[0], &r[0], &r[1] );
2654 STORE( &r[0], 0, chan_index );
2655 }
2656 break;
2657
2658 case TGSI_OPCODE_AND:
2659 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2660 FETCH( &r[0], 0, chan_index );
2661 FETCH( &r[1], 1, chan_index );
2662 micro_and( &r[0], &r[0], &r[1] );
2663 STORE( &r[0], 0, chan_index );
2664 }
2665 break;
2666
2667 case TGSI_OPCODE_OR:
2668 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2669 FETCH( &r[0], 0, chan_index );
2670 FETCH( &r[1], 1, chan_index );
2671 micro_or( &r[0], &r[0], &r[1] );
2672 STORE( &r[0], 0, chan_index );
2673 }
2674 break;
2675
2676 case TGSI_OPCODE_MOD:
2677 assert (0);
2678 break;
2679
2680 case TGSI_OPCODE_XOR:
2681 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2682 FETCH( &r[0], 0, chan_index );
2683 FETCH( &r[1], 1, chan_index );
2684 micro_xor( &r[0], &r[0], &r[1] );
2685 STORE( &r[0], 0, chan_index );
2686 }
2687 break;
2688
2689 case TGSI_OPCODE_SAD:
2690 assert (0);
2691 break;
2692
2693 case TGSI_OPCODE_TXF:
2694 assert (0);
2695 break;
2696
2697 case TGSI_OPCODE_TXQ:
2698 assert (0);
2699 break;
2700
2701 case TGSI_OPCODE_EMIT:
2702 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2703 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2704 break;
2705
2706 case TGSI_OPCODE_ENDPRIM:
2707 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2708 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2709 break;
2710
2711 case TGSI_OPCODE_LOOP:
2712 /* fall-through (for now) */
2713 case TGSI_OPCODE_BGNLOOP2:
2714 /* push LoopMask and ContMasks */
2715 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2716 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2717 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2718 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2719 break;
2720
2721 case TGSI_OPCODE_ENDLOOP:
2722 /* fall-through (for now at least) */
2723 case TGSI_OPCODE_ENDLOOP2:
2724 /* Restore ContMask, but don't pop */
2725 assert(mach->ContStackTop > 0);
2726 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2727 UPDATE_EXEC_MASK(mach);
2728 if (mach->ExecMask) {
2729 /* repeat loop: jump to instruction just past BGNLOOP */
2730 *pc = inst->InstructionExtLabel.Label + 1;
2731 }
2732 else {
2733 /* exit loop: pop LoopMask */
2734 assert(mach->LoopStackTop > 0);
2735 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2736 /* pop ContMask */
2737 assert(mach->ContStackTop > 0);
2738 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2739 }
2740 UPDATE_EXEC_MASK(mach);
2741 break;
2742
2743 case TGSI_OPCODE_BRK:
2744 /* turn off loop channels for each enabled exec channel */
2745 mach->LoopMask &= ~mach->ExecMask;
2746 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2747 UPDATE_EXEC_MASK(mach);
2748 break;
2749
2750 case TGSI_OPCODE_CONT:
2751 /* turn off cont channels for each enabled exec channel */
2752 mach->ContMask &= ~mach->ExecMask;
2753 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2754 UPDATE_EXEC_MASK(mach);
2755 break;
2756
2757 case TGSI_OPCODE_BGNSUB:
2758 /* no-op */
2759 break;
2760
2761 case TGSI_OPCODE_ENDSUB:
2762 /* no-op */
2763 break;
2764
2765 case TGSI_OPCODE_NOISE1:
2766 assert( 0 );
2767 break;
2768
2769 case TGSI_OPCODE_NOISE2:
2770 assert( 0 );
2771 break;
2772
2773 case TGSI_OPCODE_NOISE3:
2774 assert( 0 );
2775 break;
2776
2777 case TGSI_OPCODE_NOISE4:
2778 assert( 0 );
2779 break;
2780
2781 case TGSI_OPCODE_NOP:
2782 break;
2783
2784 default:
2785 assert( 0 );
2786 }
2787 }
2788
2789
2790 /**
2791 * Run TGSI interpreter.
2792 * \return bitmask of "alive" quad components
2793 */
2794 uint
2795 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2796 {
2797 uint i;
2798 int pc = 0;
2799
2800 mach->CondMask = 0xf;
2801 mach->LoopMask = 0xf;
2802 mach->ContMask = 0xf;
2803 mach->FuncMask = 0xf;
2804 mach->ExecMask = 0xf;
2805
2806 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2807 assert(mach->CondStackTop == 0);
2808 assert(mach->LoopStackTop == 0);
2809 assert(mach->ContStackTop == 0);
2810 assert(mach->CallStackTop == 0);
2811
2812 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2813 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2814
2815 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2816 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2817 mach->Primitives[0] = 0;
2818 }
2819
2820 for (i = 0; i < QUAD_SIZE; i++) {
2821 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2822 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2823 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2824 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2825 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2826 }
2827
2828 /* execute declarations (interpolants) */
2829 for (i = 0; i < mach->NumDeclarations; i++) {
2830 exec_declaration( mach, mach->Declarations+i );
2831 }
2832
2833 /* execute instructions, until pc is set to -1 */
2834 while (pc != -1) {
2835 assert(pc < (int) mach->NumInstructions);
2836 exec_instruction( mach, mach->Instructions + pc, &pc );
2837 }
2838
2839 #if 0
2840 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2841 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2842 /*
2843 * Scale back depth component.
2844 */
2845 for (i = 0; i < 4; i++)
2846 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2847 }
2848 #endif
2849
2850 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2851 }
2852
2853