tgsi: More comments on source register indirect and 2D indexing.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_shader_tokens.h"
56 #include "tgsi/tgsi_parse.h"
57 #include "tgsi/tgsi_util.h"
58 #include "tgsi_exec.h"
59 #include "util/u_memory.h"
60 #include "util/u_math.h"
61
62 #define FAST_MATH 1
63
64 #define TILE_TOP_LEFT 0
65 #define TILE_TOP_RIGHT 1
66 #define TILE_BOTTOM_LEFT 2
67 #define TILE_BOTTOM_RIGHT 3
68
69 #define CHAN_X 0
70 #define CHAN_Y 1
71 #define CHAN_Z 2
72 #define CHAN_W 3
73
74 /*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
78 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
79 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
80 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
81 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
82 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
83 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
84 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
85 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
86 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
87 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
88 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
89 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
90 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
91 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
92 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
93 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
94 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
95 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
96 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
97 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
98 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
99 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122 /** The execution mask depends on the conditional mask and the loop mask */
123 #define UPDATE_EXEC_MASK(MACH) \
124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126 /**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131 void
132 tgsi_exec_machine_bind_shader(
133 struct tgsi_exec_machine *mach,
134 const struct tgsi_token *tokens,
135 uint numSamplers,
136 struct tgsi_sampler *samplers)
137 {
138 uint k;
139 struct tgsi_parse_context parse;
140 struct tgsi_exec_labels *labels = &mach->Labels;
141 struct tgsi_full_instruction *instructions;
142 struct tgsi_full_declaration *declarations;
143 uint maxInstructions = 10, numInstructions = 0;
144 uint maxDeclarations = 10, numDeclarations = 0;
145 uint instno = 0;
146
147 #if 0
148 tgsi_dump(tokens, 0);
149 #endif
150
151 util_init_math();
152
153 mach->Tokens = tokens;
154 mach->Samplers = samplers;
155
156 k = tgsi_parse_init (&parse, mach->Tokens);
157 if (k != TGSI_PARSE_OK) {
158 debug_printf( "Problem parsing!\n" );
159 return;
160 }
161
162 mach->Processor = parse.FullHeader.Processor.Processor;
163 mach->ImmLimit = 0;
164 labels->count = 0;
165
166 declarations = (struct tgsi_full_declaration *)
167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169 if (!declarations) {
170 return;
171 }
172
173 instructions = (struct tgsi_full_instruction *)
174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176 if (!instructions) {
177 FREE( declarations );
178 return;
179 }
180
181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
182 uint pointer = parse.Position;
183 uint i;
184
185 tgsi_parse_token( &parse );
186 switch( parse.FullToken.Token.Type ) {
187 case TGSI_TOKEN_TYPE_DECLARATION:
188 /* save expanded declaration */
189 if (numDeclarations == maxDeclarations) {
190 declarations = REALLOC(declarations,
191 maxDeclarations
192 * sizeof(struct tgsi_full_declaration),
193 (maxDeclarations + 10)
194 * sizeof(struct tgsi_full_declaration));
195 maxDeclarations += 10;
196 }
197 memcpy(declarations + numDeclarations,
198 &parse.FullToken.FullDeclaration,
199 sizeof(declarations[0]));
200 numDeclarations++;
201 break;
202
203 case TGSI_TOKEN_TYPE_IMMEDIATE:
204 {
205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206 assert( size % 4 == 0 );
207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209 for( i = 0; i < size; i++ ) {
210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212 }
213 mach->ImmLimit += size / 4;
214 }
215 break;
216
217 case TGSI_TOKEN_TYPE_INSTRUCTION:
218 assert( labels->count < MAX_LABELS );
219
220 labels->labels[labels->count][0] = instno;
221 labels->labels[labels->count][1] = pointer;
222 labels->count++;
223
224 /* save expanded instruction */
225 if (numInstructions == maxInstructions) {
226 instructions = REALLOC(instructions,
227 maxInstructions
228 * sizeof(struct tgsi_full_instruction),
229 (maxInstructions + 10)
230 * sizeof(struct tgsi_full_instruction));
231 maxInstructions += 10;
232 }
233 memcpy(instructions + numInstructions,
234 &parse.FullToken.FullInstruction,
235 sizeof(instructions[0]));
236 numInstructions++;
237 break;
238
239 default:
240 assert( 0 );
241 }
242 }
243 tgsi_parse_free (&parse);
244
245 if (mach->Declarations) {
246 FREE( mach->Declarations );
247 }
248 mach->Declarations = declarations;
249 mach->NumDeclarations = numDeclarations;
250
251 if (mach->Instructions) {
252 FREE( mach->Instructions );
253 }
254 mach->Instructions = instructions;
255 mach->NumInstructions = numInstructions;
256 }
257
258
259 void
260 tgsi_exec_machine_init(
261 struct tgsi_exec_machine *mach )
262 {
263 uint i;
264
265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268 /* Setup constants. */
269 for( i = 0; i < 4; i++ ) {
270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280 }
281 }
282
283
284 void
285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286 {
287 if (mach->Instructions) {
288 FREE(mach->Instructions);
289 mach->Instructions = NULL;
290 mach->NumInstructions = 0;
291 }
292 if (mach->Declarations) {
293 FREE(mach->Declarations);
294 mach->Declarations = NULL;
295 mach->NumDeclarations = 0;
296 }
297 }
298
299
300 static void
301 micro_abs(
302 union tgsi_exec_channel *dst,
303 const union tgsi_exec_channel *src )
304 {
305 dst->f[0] = fabsf( src->f[0] );
306 dst->f[1] = fabsf( src->f[1] );
307 dst->f[2] = fabsf( src->f[2] );
308 dst->f[3] = fabsf( src->f[3] );
309 }
310
311 static void
312 micro_add(
313 union tgsi_exec_channel *dst,
314 const union tgsi_exec_channel *src0,
315 const union tgsi_exec_channel *src1 )
316 {
317 dst->f[0] = src0->f[0] + src1->f[0];
318 dst->f[1] = src0->f[1] + src1->f[1];
319 dst->f[2] = src0->f[2] + src1->f[2];
320 dst->f[3] = src0->f[3] + src1->f[3];
321 }
322
323 static void
324 micro_iadd(
325 union tgsi_exec_channel *dst,
326 const union tgsi_exec_channel *src0,
327 const union tgsi_exec_channel *src1 )
328 {
329 dst->i[0] = src0->i[0] + src1->i[0];
330 dst->i[1] = src0->i[1] + src1->i[1];
331 dst->i[2] = src0->i[2] + src1->i[2];
332 dst->i[3] = src0->i[3] + src1->i[3];
333 }
334
335 static void
336 micro_and(
337 union tgsi_exec_channel *dst,
338 const union tgsi_exec_channel *src0,
339 const union tgsi_exec_channel *src1 )
340 {
341 dst->u[0] = src0->u[0] & src1->u[0];
342 dst->u[1] = src0->u[1] & src1->u[1];
343 dst->u[2] = src0->u[2] & src1->u[2];
344 dst->u[3] = src0->u[3] & src1->u[3];
345 }
346
347 static void
348 micro_ceil(
349 union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src )
351 {
352 dst->f[0] = ceilf( src->f[0] );
353 dst->f[1] = ceilf( src->f[1] );
354 dst->f[2] = ceilf( src->f[2] );
355 dst->f[3] = ceilf( src->f[3] );
356 }
357
358 static void
359 micro_cos(
360 union tgsi_exec_channel *dst,
361 const union tgsi_exec_channel *src )
362 {
363 dst->f[0] = cosf( src->f[0] );
364 dst->f[1] = cosf( src->f[1] );
365 dst->f[2] = cosf( src->f[2] );
366 dst->f[3] = cosf( src->f[3] );
367 }
368
369 static void
370 micro_ddx(
371 union tgsi_exec_channel *dst,
372 const union tgsi_exec_channel *src )
373 {
374 dst->f[0] =
375 dst->f[1] =
376 dst->f[2] =
377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378 }
379
380 static void
381 micro_ddy(
382 union tgsi_exec_channel *dst,
383 const union tgsi_exec_channel *src )
384 {
385 dst->f[0] =
386 dst->f[1] =
387 dst->f[2] =
388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389 }
390
391 static void
392 micro_div(
393 union tgsi_exec_channel *dst,
394 const union tgsi_exec_channel *src0,
395 const union tgsi_exec_channel *src1 )
396 {
397 if (src1->f[0] != 0) {
398 dst->f[0] = src0->f[0] / src1->f[0];
399 }
400 if (src1->f[1] != 0) {
401 dst->f[1] = src0->f[1] / src1->f[1];
402 }
403 if (src1->f[2] != 0) {
404 dst->f[2] = src0->f[2] / src1->f[2];
405 }
406 if (src1->f[3] != 0) {
407 dst->f[3] = src0->f[3] / src1->f[3];
408 }
409 }
410
411 static void
412 micro_udiv(
413 union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src0,
415 const union tgsi_exec_channel *src1 )
416 {
417 dst->u[0] = src0->u[0] / src1->u[0];
418 dst->u[1] = src0->u[1] / src1->u[1];
419 dst->u[2] = src0->u[2] / src1->u[2];
420 dst->u[3] = src0->u[3] / src1->u[3];
421 }
422
423 static void
424 micro_eq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435 }
436
437 static void
438 micro_ieq(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src0,
441 const union tgsi_exec_channel *src1,
442 const union tgsi_exec_channel *src2,
443 const union tgsi_exec_channel *src3 )
444 {
445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449 }
450
451 static void
452 micro_exp2(
453 union tgsi_exec_channel *dst,
454 const union tgsi_exec_channel *src)
455 {
456 #if FAST_MATH
457 dst->f[0] = util_fast_exp2( src->f[0] );
458 dst->f[1] = util_fast_exp2( src->f[1] );
459 dst->f[2] = util_fast_exp2( src->f[2] );
460 dst->f[3] = util_fast_exp2( src->f[3] );
461 #else
462 dst->f[0] = powf( 2.0f, src->f[0] );
463 dst->f[1] = powf( 2.0f, src->f[1] );
464 dst->f[2] = powf( 2.0f, src->f[2] );
465 dst->f[3] = powf( 2.0f, src->f[3] );
466 #endif
467 }
468
469 static void
470 micro_f2it(
471 union tgsi_exec_channel *dst,
472 const union tgsi_exec_channel *src )
473 {
474 dst->i[0] = (int) src->f[0];
475 dst->i[1] = (int) src->f[1];
476 dst->i[2] = (int) src->f[2];
477 dst->i[3] = (int) src->f[3];
478 }
479
480 static void
481 micro_f2ut(
482 union tgsi_exec_channel *dst,
483 const union tgsi_exec_channel *src )
484 {
485 dst->u[0] = (uint) src->f[0];
486 dst->u[1] = (uint) src->f[1];
487 dst->u[2] = (uint) src->f[2];
488 dst->u[3] = (uint) src->f[3];
489 }
490
491 static void
492 micro_flr(
493 union tgsi_exec_channel *dst,
494 const union tgsi_exec_channel *src )
495 {
496 dst->f[0] = floorf( src->f[0] );
497 dst->f[1] = floorf( src->f[1] );
498 dst->f[2] = floorf( src->f[2] );
499 dst->f[3] = floorf( src->f[3] );
500 }
501
502 static void
503 micro_frc(
504 union tgsi_exec_channel *dst,
505 const union tgsi_exec_channel *src )
506 {
507 dst->f[0] = src->f[0] - floorf( src->f[0] );
508 dst->f[1] = src->f[1] - floorf( src->f[1] );
509 dst->f[2] = src->f[2] - floorf( src->f[2] );
510 dst->f[3] = src->f[3] - floorf( src->f[3] );
511 }
512
513 static void
514 micro_ge(
515 union tgsi_exec_channel *dst,
516 const union tgsi_exec_channel *src0,
517 const union tgsi_exec_channel *src1,
518 const union tgsi_exec_channel *src2,
519 const union tgsi_exec_channel *src3 )
520 {
521 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
522 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
523 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
524 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
525 }
526
527 static void
528 micro_i2f(
529 union tgsi_exec_channel *dst,
530 const union tgsi_exec_channel *src )
531 {
532 dst->f[0] = (float) src->i[0];
533 dst->f[1] = (float) src->i[1];
534 dst->f[2] = (float) src->i[2];
535 dst->f[3] = (float) src->i[3];
536 }
537
538 static void
539 micro_lg2(
540 union tgsi_exec_channel *dst,
541 const union tgsi_exec_channel *src )
542 {
543 #if FAST_MATH
544 dst->f[0] = util_fast_log2( src->f[0] );
545 dst->f[1] = util_fast_log2( src->f[1] );
546 dst->f[2] = util_fast_log2( src->f[2] );
547 dst->f[3] = util_fast_log2( src->f[3] );
548 #else
549 dst->f[0] = logf( src->f[0] ) * 1.442695f;
550 dst->f[1] = logf( src->f[1] ) * 1.442695f;
551 dst->f[2] = logf( src->f[2] ) * 1.442695f;
552 dst->f[3] = logf( src->f[3] ) * 1.442695f;
553 #endif
554 }
555
556 static void
557 micro_le(
558 union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src0,
560 const union tgsi_exec_channel *src1,
561 const union tgsi_exec_channel *src2,
562 const union tgsi_exec_channel *src3 )
563 {
564 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
565 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
566 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
567 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
568 }
569
570 static void
571 micro_lt(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src0,
574 const union tgsi_exec_channel *src1,
575 const union tgsi_exec_channel *src2,
576 const union tgsi_exec_channel *src3 )
577 {
578 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
579 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
580 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
581 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
582 }
583
584 static void
585 micro_ilt(
586 union tgsi_exec_channel *dst,
587 const union tgsi_exec_channel *src0,
588 const union tgsi_exec_channel *src1,
589 const union tgsi_exec_channel *src2,
590 const union tgsi_exec_channel *src3 )
591 {
592 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
593 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
594 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
595 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
596 }
597
598 static void
599 micro_ult(
600 union tgsi_exec_channel *dst,
601 const union tgsi_exec_channel *src0,
602 const union tgsi_exec_channel *src1,
603 const union tgsi_exec_channel *src2,
604 const union tgsi_exec_channel *src3 )
605 {
606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610 }
611
612 static void
613 micro_max(
614 union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src0,
616 const union tgsi_exec_channel *src1 )
617 {
618 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
619 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
620 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
621 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
622 }
623
624 static void
625 micro_imax(
626 union tgsi_exec_channel *dst,
627 const union tgsi_exec_channel *src0,
628 const union tgsi_exec_channel *src1 )
629 {
630 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
631 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
632 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
633 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
634 }
635
636 static void
637 micro_umax(
638 union tgsi_exec_channel *dst,
639 const union tgsi_exec_channel *src0,
640 const union tgsi_exec_channel *src1 )
641 {
642 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
643 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
644 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
645 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
646 }
647
648 static void
649 micro_min(
650 union tgsi_exec_channel *dst,
651 const union tgsi_exec_channel *src0,
652 const union tgsi_exec_channel *src1 )
653 {
654 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
655 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
656 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
657 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
658 }
659
660 static void
661 micro_imin(
662 union tgsi_exec_channel *dst,
663 const union tgsi_exec_channel *src0,
664 const union tgsi_exec_channel *src1 )
665 {
666 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
667 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
668 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
669 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
670 }
671
672 static void
673 micro_umin(
674 union tgsi_exec_channel *dst,
675 const union tgsi_exec_channel *src0,
676 const union tgsi_exec_channel *src1 )
677 {
678 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
679 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
680 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
681 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
682 }
683
684 static void
685 micro_umod(
686 union tgsi_exec_channel *dst,
687 const union tgsi_exec_channel *src0,
688 const union tgsi_exec_channel *src1 )
689 {
690 dst->u[0] = src0->u[0] % src1->u[0];
691 dst->u[1] = src0->u[1] % src1->u[1];
692 dst->u[2] = src0->u[2] % src1->u[2];
693 dst->u[3] = src0->u[3] % src1->u[3];
694 }
695
696 static void
697 micro_mul(
698 union tgsi_exec_channel *dst,
699 const union tgsi_exec_channel *src0,
700 const union tgsi_exec_channel *src1 )
701 {
702 dst->f[0] = src0->f[0] * src1->f[0];
703 dst->f[1] = src0->f[1] * src1->f[1];
704 dst->f[2] = src0->f[2] * src1->f[2];
705 dst->f[3] = src0->f[3] * src1->f[3];
706 }
707
708 static void
709 micro_imul(
710 union tgsi_exec_channel *dst,
711 const union tgsi_exec_channel *src0,
712 const union tgsi_exec_channel *src1 )
713 {
714 dst->i[0] = src0->i[0] * src1->i[0];
715 dst->i[1] = src0->i[1] * src1->i[1];
716 dst->i[2] = src0->i[2] * src1->i[2];
717 dst->i[3] = src0->i[3] * src1->i[3];
718 }
719
720 static void
721 micro_imul64(
722 union tgsi_exec_channel *dst0,
723 union tgsi_exec_channel *dst1,
724 const union tgsi_exec_channel *src0,
725 const union tgsi_exec_channel *src1 )
726 {
727 dst1->i[0] = src0->i[0] * src1->i[0];
728 dst1->i[1] = src0->i[1] * src1->i[1];
729 dst1->i[2] = src0->i[2] * src1->i[2];
730 dst1->i[3] = src0->i[3] * src1->i[3];
731 dst0->i[0] = 0;
732 dst0->i[1] = 0;
733 dst0->i[2] = 0;
734 dst0->i[3] = 0;
735 }
736
737 static void
738 micro_umul64(
739 union tgsi_exec_channel *dst0,
740 union tgsi_exec_channel *dst1,
741 const union tgsi_exec_channel *src0,
742 const union tgsi_exec_channel *src1 )
743 {
744 dst1->u[0] = src0->u[0] * src1->u[0];
745 dst1->u[1] = src0->u[1] * src1->u[1];
746 dst1->u[2] = src0->u[2] * src1->u[2];
747 dst1->u[3] = src0->u[3] * src1->u[3];
748 dst0->u[0] = 0;
749 dst0->u[1] = 0;
750 dst0->u[2] = 0;
751 dst0->u[3] = 0;
752 }
753
754 static void
755 micro_movc(
756 union tgsi_exec_channel *dst,
757 const union tgsi_exec_channel *src0,
758 const union tgsi_exec_channel *src1,
759 const union tgsi_exec_channel *src2 )
760 {
761 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
762 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
763 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
764 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
765 }
766
767 static void
768 micro_neg(
769 union tgsi_exec_channel *dst,
770 const union tgsi_exec_channel *src )
771 {
772 dst->f[0] = -src->f[0];
773 dst->f[1] = -src->f[1];
774 dst->f[2] = -src->f[2];
775 dst->f[3] = -src->f[3];
776 }
777
778 static void
779 micro_ineg(
780 union tgsi_exec_channel *dst,
781 const union tgsi_exec_channel *src )
782 {
783 dst->i[0] = -src->i[0];
784 dst->i[1] = -src->i[1];
785 dst->i[2] = -src->i[2];
786 dst->i[3] = -src->i[3];
787 }
788
789 static void
790 micro_not(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src )
793 {
794 dst->u[0] = ~src->u[0];
795 dst->u[1] = ~src->u[1];
796 dst->u[2] = ~src->u[2];
797 dst->u[3] = ~src->u[3];
798 }
799
800 static void
801 micro_or(
802 union tgsi_exec_channel *dst,
803 const union tgsi_exec_channel *src0,
804 const union tgsi_exec_channel *src1 )
805 {
806 dst->u[0] = src0->u[0] | src1->u[0];
807 dst->u[1] = src0->u[1] | src1->u[1];
808 dst->u[2] = src0->u[2] | src1->u[2];
809 dst->u[3] = src0->u[3] | src1->u[3];
810 }
811
812 static void
813 micro_pow(
814 union tgsi_exec_channel *dst,
815 const union tgsi_exec_channel *src0,
816 const union tgsi_exec_channel *src1 )
817 {
818 #if FAST_MATH
819 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
820 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
821 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
822 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
823 #else
824 dst->f[0] = powf( src0->f[0], src1->f[0] );
825 dst->f[1] = powf( src0->f[1], src1->f[1] );
826 dst->f[2] = powf( src0->f[2], src1->f[2] );
827 dst->f[3] = powf( src0->f[3], src1->f[3] );
828 #endif
829 }
830
831 static void
832 micro_rnd(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = floorf( src->f[0] + 0.5f );
837 dst->f[1] = floorf( src->f[1] + 0.5f );
838 dst->f[2] = floorf( src->f[2] + 0.5f );
839 dst->f[3] = floorf( src->f[3] + 0.5f );
840 }
841
842 static void
843 micro_shl(
844 union tgsi_exec_channel *dst,
845 const union tgsi_exec_channel *src0,
846 const union tgsi_exec_channel *src1 )
847 {
848 dst->i[0] = src0->i[0] << src1->i[0];
849 dst->i[1] = src0->i[1] << src1->i[1];
850 dst->i[2] = src0->i[2] << src1->i[2];
851 dst->i[3] = src0->i[3] << src1->i[3];
852 }
853
854 static void
855 micro_ishr(
856 union tgsi_exec_channel *dst,
857 const union tgsi_exec_channel *src0,
858 const union tgsi_exec_channel *src1 )
859 {
860 dst->i[0] = src0->i[0] >> src1->i[0];
861 dst->i[1] = src0->i[1] >> src1->i[1];
862 dst->i[2] = src0->i[2] >> src1->i[2];
863 dst->i[3] = src0->i[3] >> src1->i[3];
864 }
865
866 static void
867 micro_trunc(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0 )
870 {
871 dst->f[0] = (float) (int) src0->f[0];
872 dst->f[1] = (float) (int) src0->f[1];
873 dst->f[2] = (float) (int) src0->f[2];
874 dst->f[3] = (float) (int) src0->f[3];
875 }
876
877 static void
878 micro_ushr(
879 union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src0,
881 const union tgsi_exec_channel *src1 )
882 {
883 dst->u[0] = src0->u[0] >> src1->u[0];
884 dst->u[1] = src0->u[1] >> src1->u[1];
885 dst->u[2] = src0->u[2] >> src1->u[2];
886 dst->u[3] = src0->u[3] >> src1->u[3];
887 }
888
889 static void
890 micro_sin(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src )
893 {
894 dst->f[0] = sinf( src->f[0] );
895 dst->f[1] = sinf( src->f[1] );
896 dst->f[2] = sinf( src->f[2] );
897 dst->f[3] = sinf( src->f[3] );
898 }
899
900 static void
901 micro_sqrt( union tgsi_exec_channel *dst,
902 const union tgsi_exec_channel *src )
903 {
904 dst->f[0] = sqrtf( src->f[0] );
905 dst->f[1] = sqrtf( src->f[1] );
906 dst->f[2] = sqrtf( src->f[2] );
907 dst->f[3] = sqrtf( src->f[3] );
908 }
909
910 static void
911 micro_sub(
912 union tgsi_exec_channel *dst,
913 const union tgsi_exec_channel *src0,
914 const union tgsi_exec_channel *src1 )
915 {
916 dst->f[0] = src0->f[0] - src1->f[0];
917 dst->f[1] = src0->f[1] - src1->f[1];
918 dst->f[2] = src0->f[2] - src1->f[2];
919 dst->f[3] = src0->f[3] - src1->f[3];
920 }
921
922 static void
923 micro_u2f(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src )
926 {
927 dst->f[0] = (float) src->u[0];
928 dst->f[1] = (float) src->u[1];
929 dst->f[2] = (float) src->u[2];
930 dst->f[3] = (float) src->u[3];
931 }
932
933 static void
934 micro_xor(
935 union tgsi_exec_channel *dst,
936 const union tgsi_exec_channel *src0,
937 const union tgsi_exec_channel *src1 )
938 {
939 dst->u[0] = src0->u[0] ^ src1->u[0];
940 dst->u[1] = src0->u[1] ^ src1->u[1];
941 dst->u[2] = src0->u[2] ^ src1->u[2];
942 dst->u[3] = src0->u[3] ^ src1->u[3];
943 }
944
945 static void
946 fetch_src_file_channel(
947 const struct tgsi_exec_machine *mach,
948 const uint file,
949 const uint swizzle,
950 const union tgsi_exec_channel *index,
951 union tgsi_exec_channel *chan )
952 {
953 switch( swizzle ) {
954 case TGSI_EXTSWIZZLE_X:
955 case TGSI_EXTSWIZZLE_Y:
956 case TGSI_EXTSWIZZLE_Z:
957 case TGSI_EXTSWIZZLE_W:
958 switch( file ) {
959 case TGSI_FILE_CONSTANT:
960 assert(mach->Consts);
961 assert(index->i[0] >= 0);
962 assert(index->i[1] >= 0);
963 assert(index->i[2] >= 0);
964 assert(index->i[3] >= 0);
965 chan->f[0] = mach->Consts[index->i[0]][swizzle];
966 chan->f[1] = mach->Consts[index->i[1]][swizzle];
967 chan->f[2] = mach->Consts[index->i[2]][swizzle];
968 chan->f[3] = mach->Consts[index->i[3]][swizzle];
969 break;
970
971 case TGSI_FILE_INPUT:
972 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
973 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
974 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
975 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
976 break;
977
978 case TGSI_FILE_TEMPORARY:
979 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
980 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
981 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
982 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
983 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
984 break;
985
986 case TGSI_FILE_IMMEDIATE:
987 assert( index->i[0] < (int) mach->ImmLimit );
988 chan->f[0] = mach->Imms[index->i[0]][swizzle];
989 assert( index->i[1] < (int) mach->ImmLimit );
990 chan->f[1] = mach->Imms[index->i[1]][swizzle];
991 assert( index->i[2] < (int) mach->ImmLimit );
992 chan->f[2] = mach->Imms[index->i[2]][swizzle];
993 assert( index->i[3] < (int) mach->ImmLimit );
994 chan->f[3] = mach->Imms[index->i[3]][swizzle];
995 break;
996
997 case TGSI_FILE_ADDRESS:
998 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
999 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1000 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1001 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1002 break;
1003
1004 case TGSI_FILE_OUTPUT:
1005 /* vertex/fragment output vars can be read too */
1006 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1007 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1008 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1009 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1010 break;
1011
1012 default:
1013 assert( 0 );
1014 }
1015 break;
1016
1017 case TGSI_EXTSWIZZLE_ZERO:
1018 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1019 break;
1020
1021 case TGSI_EXTSWIZZLE_ONE:
1022 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1023 break;
1024
1025 default:
1026 assert( 0 );
1027 }
1028 }
1029
1030 static void
1031 fetch_source(
1032 const struct tgsi_exec_machine *mach,
1033 union tgsi_exec_channel *chan,
1034 const struct tgsi_full_src_register *reg,
1035 const uint chan_index )
1036 {
1037 union tgsi_exec_channel index;
1038 uint swizzle;
1039
1040 /* We start with a direct index into a register file.
1041 *
1042 * file[1],
1043 * where:
1044 * file = SrcRegister.File
1045 * [1] = SrcRegister.Index
1046 */
1047 index.i[0] =
1048 index.i[1] =
1049 index.i[2] =
1050 index.i[3] = reg->SrcRegister.Index;
1051
1052 /* There is an extra source register that indirectly subscripts
1053 * a register file. The direct index now becomes an offset
1054 * that is being added to the indirect register.
1055 *
1056 * file[ind[2].x+1],
1057 * where:
1058 * ind = SrcRegisterInd.File
1059 * [2] = SrcRegisterInd.Index
1060 * .x = SrcRegisterInd.SwizzleX
1061 */
1062 if (reg->SrcRegister.Indirect) {
1063 union tgsi_exec_channel index2;
1064 union tgsi_exec_channel indir_index;
1065 const uint execmask = mach->ExecMask;
1066 uint i;
1067
1068 /* which address register (always zero now) */
1069 index2.i[0] =
1070 index2.i[1] =
1071 index2.i[2] =
1072 index2.i[3] = reg->SrcRegisterInd.Index;
1073
1074 /* get current value of address register[swizzle] */
1075 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1076 fetch_src_file_channel(
1077 mach,
1078 reg->SrcRegisterInd.File,
1079 swizzle,
1080 &index2,
1081 &indir_index );
1082
1083 /* add value of address register to the offset */
1084 index.i[0] += indir_index.i[0];
1085 index.i[1] += indir_index.i[1];
1086 index.i[2] += indir_index.i[2];
1087 index.i[3] += indir_index.i[3];
1088
1089 /* for disabled execution channels, zero-out the index to
1090 * avoid using a potential garbage value.
1091 */
1092 for (i = 0; i < QUAD_SIZE; i++) {
1093 if ((execmask & (1 << i)) == 0)
1094 index.i[i] = 0;
1095 }
1096 }
1097
1098 /* There is an extra source register that is a second
1099 * subscript to a register file. Effectively it means that
1100 * the register file is actually a 2D array of registers.
1101 *
1102 * file[1][3] == file[1*sizeof(file[1])+3],
1103 * where:
1104 * [3] = SrcRegisterDim.Index
1105 */
1106 if (reg->SrcRegister.Dimension) {
1107 /* The size of the first-order array depends on the register file type.
1108 * We need to multiply the index to the first array to get an effective,
1109 * "flat" index that points to the beginning of the second-order array.
1110 */
1111 switch (reg->SrcRegister.File) {
1112 case TGSI_FILE_INPUT:
1113 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1114 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1115 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1116 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1117 break;
1118 case TGSI_FILE_CONSTANT:
1119 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1120 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1121 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1122 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1123 break;
1124 default:
1125 assert( 0 );
1126 }
1127
1128 index.i[0] += reg->SrcRegisterDim.Index;
1129 index.i[1] += reg->SrcRegisterDim.Index;
1130 index.i[2] += reg->SrcRegisterDim.Index;
1131 index.i[3] += reg->SrcRegisterDim.Index;
1132
1133 /* Again, the second subscript index can be addressed indirectly
1134 * identically to the first one.
1135 * Nothing stops us from indirectly addressing the indirect register,
1136 * but there is no need for that, so we won't exercise it.
1137 *
1138 * file[1][ind[4].y+3],
1139 * where:
1140 * ind = SrcRegisterDimInd.File
1141 * [4] = SrcRegisterDimInd.Index
1142 * .y = SrcRegisterDimInd.SwizzleX
1143 */
1144 if (reg->SrcRegisterDim.Indirect) {
1145 union tgsi_exec_channel index2;
1146 union tgsi_exec_channel indir_index;
1147 const uint execmask = mach->ExecMask;
1148 uint i;
1149
1150 index2.i[0] =
1151 index2.i[1] =
1152 index2.i[2] =
1153 index2.i[3] = reg->SrcRegisterDimInd.Index;
1154
1155 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1156 fetch_src_file_channel(
1157 mach,
1158 reg->SrcRegisterDimInd.File,
1159 swizzle,
1160 &index2,
1161 &indir_index );
1162
1163 index.i[0] += indir_index.i[0];
1164 index.i[1] += indir_index.i[1];
1165 index.i[2] += indir_index.i[2];
1166 index.i[3] += indir_index.i[3];
1167
1168 /* for disabled execution channels, zero-out the index to
1169 * avoid using a potential garbage value.
1170 */
1171 for (i = 0; i < QUAD_SIZE; i++) {
1172 if ((execmask & (1 << i)) == 0)
1173 index.i[i] = 0;
1174 }
1175 }
1176
1177 /* If by any chance there was a need for a 3D array of register
1178 * files, we would have to check whether SrcRegisterDim is followed
1179 * by a dimension register and continue the saga.
1180 */
1181 }
1182
1183 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1184 fetch_src_file_channel(
1185 mach,
1186 reg->SrcRegister.File,
1187 swizzle,
1188 &index,
1189 chan );
1190
1191 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1192 case TGSI_UTIL_SIGN_CLEAR:
1193 micro_abs( chan, chan );
1194 break;
1195
1196 case TGSI_UTIL_SIGN_SET:
1197 micro_abs( chan, chan );
1198 micro_neg( chan, chan );
1199 break;
1200
1201 case TGSI_UTIL_SIGN_TOGGLE:
1202 micro_neg( chan, chan );
1203 break;
1204
1205 case TGSI_UTIL_SIGN_KEEP:
1206 break;
1207 }
1208
1209 if (reg->SrcRegisterExtMod.Complement) {
1210 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1211 }
1212 }
1213
1214 static void
1215 store_dest(
1216 struct tgsi_exec_machine *mach,
1217 const union tgsi_exec_channel *chan,
1218 const struct tgsi_full_dst_register *reg,
1219 const struct tgsi_full_instruction *inst,
1220 uint chan_index )
1221 {
1222 uint i;
1223 union tgsi_exec_channel null;
1224 union tgsi_exec_channel *dst;
1225 uint execmask = mach->ExecMask;
1226
1227 switch (reg->DstRegister.File) {
1228 case TGSI_FILE_NULL:
1229 dst = &null;
1230 break;
1231
1232 case TGSI_FILE_OUTPUT:
1233 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1234 + reg->DstRegister.Index].xyzw[chan_index];
1235 break;
1236
1237 case TGSI_FILE_TEMPORARY:
1238 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1239 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1240 break;
1241
1242 case TGSI_FILE_ADDRESS:
1243 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1244 break;
1245
1246 default:
1247 assert( 0 );
1248 return;
1249 }
1250
1251 if (inst->InstructionExtNv.CondFlowEnable) {
1252 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1253 uint swizzle;
1254 uint shift;
1255 uint mask;
1256 uint test;
1257
1258 /* Only CC0 supported.
1259 */
1260 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1261
1262 switch (chan_index) {
1263 case CHAN_X:
1264 swizzle = inst->InstructionExtNv.CondSwizzleX;
1265 break;
1266 case CHAN_Y:
1267 swizzle = inst->InstructionExtNv.CondSwizzleY;
1268 break;
1269 case CHAN_Z:
1270 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1271 break;
1272 case CHAN_W:
1273 swizzle = inst->InstructionExtNv.CondSwizzleW;
1274 break;
1275 default:
1276 assert( 0 );
1277 return;
1278 }
1279
1280 switch (swizzle) {
1281 case TGSI_SWIZZLE_X:
1282 shift = TGSI_EXEC_CC_X_SHIFT;
1283 mask = TGSI_EXEC_CC_X_MASK;
1284 break;
1285 case TGSI_SWIZZLE_Y:
1286 shift = TGSI_EXEC_CC_Y_SHIFT;
1287 mask = TGSI_EXEC_CC_Y_MASK;
1288 break;
1289 case TGSI_SWIZZLE_Z:
1290 shift = TGSI_EXEC_CC_Z_SHIFT;
1291 mask = TGSI_EXEC_CC_Z_MASK;
1292 break;
1293 case TGSI_SWIZZLE_W:
1294 shift = TGSI_EXEC_CC_W_SHIFT;
1295 mask = TGSI_EXEC_CC_W_MASK;
1296 break;
1297 default:
1298 assert( 0 );
1299 return;
1300 }
1301
1302 switch (inst->InstructionExtNv.CondMask) {
1303 case TGSI_CC_GT:
1304 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1305 for (i = 0; i < QUAD_SIZE; i++)
1306 if (cc->u[i] & test)
1307 execmask &= ~(1 << i);
1308 break;
1309
1310 case TGSI_CC_EQ:
1311 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1312 for (i = 0; i < QUAD_SIZE; i++)
1313 if (cc->u[i] & test)
1314 execmask &= ~(1 << i);
1315 break;
1316
1317 case TGSI_CC_LT:
1318 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1319 for (i = 0; i < QUAD_SIZE; i++)
1320 if (cc->u[i] & test)
1321 execmask &= ~(1 << i);
1322 break;
1323
1324 case TGSI_CC_GE:
1325 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1326 for (i = 0; i < QUAD_SIZE; i++)
1327 if (cc->u[i] & test)
1328 execmask &= ~(1 << i);
1329 break;
1330
1331 case TGSI_CC_LE:
1332 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1333 for (i = 0; i < QUAD_SIZE; i++)
1334 if (cc->u[i] & test)
1335 execmask &= ~(1 << i);
1336 break;
1337
1338 case TGSI_CC_NE:
1339 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1340 for (i = 0; i < QUAD_SIZE; i++)
1341 if (cc->u[i] & test)
1342 execmask &= ~(1 << i);
1343 break;
1344
1345 case TGSI_CC_TR:
1346 break;
1347
1348 case TGSI_CC_FL:
1349 for (i = 0; i < QUAD_SIZE; i++)
1350 execmask &= ~(1 << i);
1351 break;
1352
1353 default:
1354 assert( 0 );
1355 return;
1356 }
1357 }
1358
1359 switch (inst->Instruction.Saturate) {
1360 case TGSI_SAT_NONE:
1361 for (i = 0; i < QUAD_SIZE; i++)
1362 if (execmask & (1 << i))
1363 dst->i[i] = chan->i[i];
1364 break;
1365
1366 case TGSI_SAT_ZERO_ONE:
1367 for (i = 0; i < QUAD_SIZE; i++)
1368 if (execmask & (1 << i)) {
1369 if (chan->f[i] < 0.0f)
1370 dst->f[i] = 0.0f;
1371 else if (chan->f[i] > 1.0f)
1372 dst->f[i] = 1.0f;
1373 else
1374 dst->i[i] = chan->i[i];
1375 }
1376 break;
1377
1378 case TGSI_SAT_MINUS_PLUS_ONE:
1379 for (i = 0; i < QUAD_SIZE; i++)
1380 if (execmask & (1 << i)) {
1381 if (chan->f[i] < -1.0f)
1382 dst->f[i] = -1.0f;
1383 else if (chan->f[i] > 1.0f)
1384 dst->f[i] = 1.0f;
1385 else
1386 dst->i[i] = chan->i[i];
1387 }
1388 break;
1389
1390 default:
1391 assert( 0 );
1392 }
1393
1394 if (inst->InstructionExtNv.CondDstUpdate) {
1395 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1396 uint shift;
1397 uint mask;
1398
1399 /* Only CC0 supported.
1400 */
1401 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1402
1403 switch (chan_index) {
1404 case CHAN_X:
1405 shift = TGSI_EXEC_CC_X_SHIFT;
1406 mask = ~TGSI_EXEC_CC_X_MASK;
1407 break;
1408 case CHAN_Y:
1409 shift = TGSI_EXEC_CC_Y_SHIFT;
1410 mask = ~TGSI_EXEC_CC_Y_MASK;
1411 break;
1412 case CHAN_Z:
1413 shift = TGSI_EXEC_CC_Z_SHIFT;
1414 mask = ~TGSI_EXEC_CC_Z_MASK;
1415 break;
1416 case CHAN_W:
1417 shift = TGSI_EXEC_CC_W_SHIFT;
1418 mask = ~TGSI_EXEC_CC_W_MASK;
1419 break;
1420 default:
1421 assert( 0 );
1422 return;
1423 }
1424
1425 for (i = 0; i < QUAD_SIZE; i++)
1426 if (execmask & (1 << i)) {
1427 cc->u[i] &= mask;
1428 if (dst->f[i] < 0.0f)
1429 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1430 else if (dst->f[i] > 0.0f)
1431 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1432 else if (dst->f[i] == 0.0f)
1433 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1434 else
1435 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1436 }
1437 }
1438 }
1439
1440 #define FETCH(VAL,INDEX,CHAN)\
1441 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1442
1443 #define STORE(VAL,INDEX,CHAN)\
1444 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1445
1446
1447 /**
1448 * Execute ARB-style KIL which is predicated by a src register.
1449 * Kill fragment if any of the four values is less than zero.
1450 */
1451 static void
1452 exec_kil(struct tgsi_exec_machine *mach,
1453 const struct tgsi_full_instruction *inst)
1454 {
1455 uint uniquemask;
1456 uint chan_index;
1457 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1458 union tgsi_exec_channel r[1];
1459
1460 /* This mask stores component bits that were already tested. Note that
1461 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1462 * tested. */
1463 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1464
1465 for (chan_index = 0; chan_index < 4; chan_index++)
1466 {
1467 uint swizzle;
1468 uint i;
1469
1470 /* unswizzle channel */
1471 swizzle = tgsi_util_get_full_src_register_extswizzle (
1472 &inst->FullSrcRegisters[0],
1473 chan_index);
1474
1475 /* check if the component has not been already tested */
1476 if (uniquemask & (1 << swizzle))
1477 continue;
1478 uniquemask |= 1 << swizzle;
1479
1480 FETCH(&r[0], 0, chan_index);
1481 for (i = 0; i < 4; i++)
1482 if (r[0].f[i] < 0.0f)
1483 kilmask |= 1 << i;
1484 }
1485
1486 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1487 }
1488
1489 /**
1490 * Execute NVIDIA-style KIL which is predicated by a condition code.
1491 * Kill fragment if the condition code is TRUE.
1492 */
1493 static void
1494 exec_kilp(struct tgsi_exec_machine *mach,
1495 const struct tgsi_full_instruction *inst)
1496 {
1497 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1498
1499 if (inst->InstructionExtNv.CondFlowEnable) {
1500 uint swizzle[4];
1501 uint chan_index;
1502
1503 kilmask = 0x0;
1504
1505 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1506 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1507 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1508 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1509
1510 for (chan_index = 0; chan_index < 4; chan_index++)
1511 {
1512 uint i;
1513
1514 for (i = 0; i < 4; i++) {
1515 /* TODO: evaluate the condition code */
1516 if (0)
1517 kilmask |= 1 << i;
1518 }
1519 }
1520 }
1521 else {
1522 /* "unconditional" kil */
1523 kilmask = mach->ExecMask;
1524 }
1525 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1526 }
1527
1528
1529 /*
1530 * Fetch a texel using STR texture coordinates.
1531 */
1532 static void
1533 fetch_texel( struct tgsi_sampler *sampler,
1534 const union tgsi_exec_channel *s,
1535 const union tgsi_exec_channel *t,
1536 const union tgsi_exec_channel *p,
1537 float lodbias, /* XXX should be float[4] */
1538 union tgsi_exec_channel *r,
1539 union tgsi_exec_channel *g,
1540 union tgsi_exec_channel *b,
1541 union tgsi_exec_channel *a )
1542 {
1543 uint j;
1544 float rgba[NUM_CHANNELS][QUAD_SIZE];
1545
1546 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1547
1548 for (j = 0; j < 4; j++) {
1549 r->f[j] = rgba[0][j];
1550 g->f[j] = rgba[1][j];
1551 b->f[j] = rgba[2][j];
1552 a->f[j] = rgba[3][j];
1553 }
1554 }
1555
1556
1557 static void
1558 exec_tex(struct tgsi_exec_machine *mach,
1559 const struct tgsi_full_instruction *inst,
1560 boolean biasLod,
1561 boolean projected)
1562 {
1563 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1564 union tgsi_exec_channel r[8];
1565 uint chan_index;
1566 float lodBias;
1567
1568 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1569
1570 switch (inst->InstructionExtTexture.Texture) {
1571 case TGSI_TEXTURE_1D:
1572
1573 FETCH(&r[0], 0, CHAN_X);
1574
1575 if (projected) {
1576 FETCH(&r[1], 0, CHAN_W);
1577 micro_div( &r[0], &r[0], &r[1] );
1578 }
1579
1580 if (biasLod) {
1581 FETCH(&r[1], 0, CHAN_W);
1582 lodBias = r[2].f[0];
1583 }
1584 else
1585 lodBias = 0.0;
1586
1587 fetch_texel(&mach->Samplers[unit],
1588 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1589 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1590 break;
1591
1592 case TGSI_TEXTURE_2D:
1593 case TGSI_TEXTURE_RECT:
1594
1595 FETCH(&r[0], 0, CHAN_X);
1596 FETCH(&r[1], 0, CHAN_Y);
1597 FETCH(&r[2], 0, CHAN_Z);
1598
1599 if (projected) {
1600 FETCH(&r[3], 0, CHAN_W);
1601 micro_div( &r[0], &r[0], &r[3] );
1602 micro_div( &r[1], &r[1], &r[3] );
1603 micro_div( &r[2], &r[2], &r[3] );
1604 }
1605
1606 if (biasLod) {
1607 FETCH(&r[3], 0, CHAN_W);
1608 lodBias = r[3].f[0];
1609 }
1610 else
1611 lodBias = 0.0;
1612
1613 fetch_texel(&mach->Samplers[unit],
1614 &r[0], &r[1], &r[2], lodBias, /* inputs */
1615 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1616 break;
1617
1618 case TGSI_TEXTURE_3D:
1619 case TGSI_TEXTURE_CUBE:
1620
1621 FETCH(&r[0], 0, CHAN_X);
1622 FETCH(&r[1], 0, CHAN_Y);
1623 FETCH(&r[2], 0, CHAN_Z);
1624
1625 if (projected) {
1626 FETCH(&r[3], 0, CHAN_W);
1627 micro_div( &r[0], &r[0], &r[3] );
1628 micro_div( &r[1], &r[1], &r[3] );
1629 micro_div( &r[2], &r[2], &r[3] );
1630 }
1631
1632 if (biasLod) {
1633 FETCH(&r[3], 0, CHAN_W);
1634 lodBias = r[3].f[0];
1635 }
1636 else
1637 lodBias = 0.0;
1638
1639 fetch_texel(&mach->Samplers[unit],
1640 &r[0], &r[1], &r[2], lodBias,
1641 &r[0], &r[1], &r[2], &r[3]);
1642 break;
1643
1644 default:
1645 assert (0);
1646 }
1647
1648 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1649 STORE( &r[chan_index], 0, chan_index );
1650 }
1651 }
1652
1653
1654 /**
1655 * Evaluate a constant-valued coefficient at the position of the
1656 * current quad.
1657 */
1658 static void
1659 eval_constant_coef(
1660 struct tgsi_exec_machine *mach,
1661 unsigned attrib,
1662 unsigned chan )
1663 {
1664 unsigned i;
1665
1666 for( i = 0; i < QUAD_SIZE; i++ ) {
1667 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1668 }
1669 }
1670
1671 /**
1672 * Evaluate a linear-valued coefficient at the position of the
1673 * current quad.
1674 */
1675 static void
1676 eval_linear_coef(
1677 struct tgsi_exec_machine *mach,
1678 unsigned attrib,
1679 unsigned chan )
1680 {
1681 const float x = mach->QuadPos.xyzw[0].f[0];
1682 const float y = mach->QuadPos.xyzw[1].f[0];
1683 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1684 const float dady = mach->InterpCoefs[attrib].dady[chan];
1685 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1686 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1687 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1688 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1689 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1690 }
1691
1692 /**
1693 * Evaluate a perspective-valued coefficient at the position of the
1694 * current quad.
1695 */
1696 static void
1697 eval_perspective_coef(
1698 struct tgsi_exec_machine *mach,
1699 unsigned attrib,
1700 unsigned chan )
1701 {
1702 const float x = mach->QuadPos.xyzw[0].f[0];
1703 const float y = mach->QuadPos.xyzw[1].f[0];
1704 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1705 const float dady = mach->InterpCoefs[attrib].dady[chan];
1706 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1707 const float *w = mach->QuadPos.xyzw[3].f;
1708 /* divide by W here */
1709 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1710 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1711 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1712 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1713 }
1714
1715
1716 typedef void (* eval_coef_func)(
1717 struct tgsi_exec_machine *mach,
1718 unsigned attrib,
1719 unsigned chan );
1720
1721 static void
1722 exec_declaration(
1723 struct tgsi_exec_machine *mach,
1724 const struct tgsi_full_declaration *decl )
1725 {
1726 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1727 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1728 unsigned first, last, mask;
1729 eval_coef_func eval;
1730
1731 first = decl->DeclarationRange.First;
1732 last = decl->DeclarationRange.Last;
1733 mask = decl->Declaration.UsageMask;
1734
1735 switch( decl->Declaration.Interpolate ) {
1736 case TGSI_INTERPOLATE_CONSTANT:
1737 eval = eval_constant_coef;
1738 break;
1739
1740 case TGSI_INTERPOLATE_LINEAR:
1741 eval = eval_linear_coef;
1742 break;
1743
1744 case TGSI_INTERPOLATE_PERSPECTIVE:
1745 eval = eval_perspective_coef;
1746 break;
1747
1748 default:
1749 eval = NULL;
1750 assert( 0 );
1751 }
1752
1753 if( mask == TGSI_WRITEMASK_XYZW ) {
1754 unsigned i, j;
1755
1756 for( i = first; i <= last; i++ ) {
1757 for( j = 0; j < NUM_CHANNELS; j++ ) {
1758 eval( mach, i, j );
1759 }
1760 }
1761 }
1762 else {
1763 unsigned i, j;
1764
1765 for( j = 0; j < NUM_CHANNELS; j++ ) {
1766 if( mask & (1 << j) ) {
1767 for( i = first; i <= last; i++ ) {
1768 eval( mach, i, j );
1769 }
1770 }
1771 }
1772 }
1773 }
1774 }
1775 }
1776
1777 static void
1778 exec_instruction(
1779 struct tgsi_exec_machine *mach,
1780 const struct tgsi_full_instruction *inst,
1781 int *pc )
1782 {
1783 uint chan_index;
1784 union tgsi_exec_channel r[8];
1785
1786 (*pc)++;
1787
1788 switch (inst->Instruction.Opcode) {
1789 case TGSI_OPCODE_ARL:
1790 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1791 FETCH( &r[0], 0, chan_index );
1792 micro_f2it( &r[0], &r[0] );
1793 STORE( &r[0], 0, chan_index );
1794 }
1795 break;
1796
1797 case TGSI_OPCODE_MOV:
1798 case TGSI_OPCODE_SWZ:
1799 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1800 FETCH( &r[0], 0, chan_index );
1801 STORE( &r[0], 0, chan_index );
1802 }
1803 break;
1804
1805 case TGSI_OPCODE_LIT:
1806 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1807 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1808 }
1809
1810 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1811 FETCH( &r[0], 0, CHAN_X );
1812 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1813 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1814 STORE( &r[0], 0, CHAN_Y );
1815 }
1816
1817 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1818 FETCH( &r[1], 0, CHAN_Y );
1819 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1820
1821 FETCH( &r[2], 0, CHAN_W );
1822 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1823 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1824 micro_pow( &r[1], &r[1], &r[2] );
1825 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1826 STORE( &r[0], 0, CHAN_Z );
1827 }
1828 }
1829
1830 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1831 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1832 }
1833 break;
1834
1835 case TGSI_OPCODE_RCP:
1836 /* TGSI_OPCODE_RECIP */
1837 FETCH( &r[0], 0, CHAN_X );
1838 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1839 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1840 STORE( &r[0], 0, chan_index );
1841 }
1842 break;
1843
1844 case TGSI_OPCODE_RSQ:
1845 /* TGSI_OPCODE_RECIPSQRT */
1846 FETCH( &r[0], 0, CHAN_X );
1847 micro_sqrt( &r[0], &r[0] );
1848 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1849 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1850 STORE( &r[0], 0, chan_index );
1851 }
1852 break;
1853
1854 case TGSI_OPCODE_EXP:
1855 FETCH( &r[0], 0, CHAN_X );
1856 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1857 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1858 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1859 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1860 }
1861 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1862 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1863 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1864 }
1865 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1866 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1867 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1868 }
1869 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1870 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1871 }
1872 break;
1873
1874 case TGSI_OPCODE_LOG:
1875 FETCH( &r[0], 0, CHAN_X );
1876 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1877 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1878 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1879 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1880 STORE( &r[0], 0, CHAN_X );
1881 }
1882 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1883 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1884 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1885 STORE( &r[0], 0, CHAN_Y );
1886 }
1887 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1888 STORE( &r[1], 0, CHAN_Z );
1889 }
1890 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1891 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1892 }
1893 break;
1894
1895 case TGSI_OPCODE_MUL:
1896 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1897 {
1898 FETCH(&r[0], 0, chan_index);
1899 FETCH(&r[1], 1, chan_index);
1900
1901 micro_mul( &r[0], &r[0], &r[1] );
1902
1903 STORE(&r[0], 0, chan_index);
1904 }
1905 break;
1906
1907 case TGSI_OPCODE_ADD:
1908 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1909 FETCH( &r[0], 0, chan_index );
1910 FETCH( &r[1], 1, chan_index );
1911 micro_add( &r[0], &r[0], &r[1] );
1912 STORE( &r[0], 0, chan_index );
1913 }
1914 break;
1915
1916 case TGSI_OPCODE_DP3:
1917 /* TGSI_OPCODE_DOT3 */
1918 FETCH( &r[0], 0, CHAN_X );
1919 FETCH( &r[1], 1, CHAN_X );
1920 micro_mul( &r[0], &r[0], &r[1] );
1921
1922 FETCH( &r[1], 0, CHAN_Y );
1923 FETCH( &r[2], 1, CHAN_Y );
1924 micro_mul( &r[1], &r[1], &r[2] );
1925 micro_add( &r[0], &r[0], &r[1] );
1926
1927 FETCH( &r[1], 0, CHAN_Z );
1928 FETCH( &r[2], 1, CHAN_Z );
1929 micro_mul( &r[1], &r[1], &r[2] );
1930 micro_add( &r[0], &r[0], &r[1] );
1931
1932 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1933 STORE( &r[0], 0, chan_index );
1934 }
1935 break;
1936
1937 case TGSI_OPCODE_DP4:
1938 /* TGSI_OPCODE_DOT4 */
1939 FETCH(&r[0], 0, CHAN_X);
1940 FETCH(&r[1], 1, CHAN_X);
1941
1942 micro_mul( &r[0], &r[0], &r[1] );
1943
1944 FETCH(&r[1], 0, CHAN_Y);
1945 FETCH(&r[2], 1, CHAN_Y);
1946
1947 micro_mul( &r[1], &r[1], &r[2] );
1948 micro_add( &r[0], &r[0], &r[1] );
1949
1950 FETCH(&r[1], 0, CHAN_Z);
1951 FETCH(&r[2], 1, CHAN_Z);
1952
1953 micro_mul( &r[1], &r[1], &r[2] );
1954 micro_add( &r[0], &r[0], &r[1] );
1955
1956 FETCH(&r[1], 0, CHAN_W);
1957 FETCH(&r[2], 1, CHAN_W);
1958
1959 micro_mul( &r[1], &r[1], &r[2] );
1960 micro_add( &r[0], &r[0], &r[1] );
1961
1962 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1963 STORE( &r[0], 0, chan_index );
1964 }
1965 break;
1966
1967 case TGSI_OPCODE_DST:
1968 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1969 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1970 }
1971
1972 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1973 FETCH( &r[0], 0, CHAN_Y );
1974 FETCH( &r[1], 1, CHAN_Y);
1975 micro_mul( &r[0], &r[0], &r[1] );
1976 STORE( &r[0], 0, CHAN_Y );
1977 }
1978
1979 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1980 FETCH( &r[0], 0, CHAN_Z );
1981 STORE( &r[0], 0, CHAN_Z );
1982 }
1983
1984 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1985 FETCH( &r[0], 1, CHAN_W );
1986 STORE( &r[0], 0, CHAN_W );
1987 }
1988 break;
1989
1990 case TGSI_OPCODE_MIN:
1991 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1992 FETCH(&r[0], 0, chan_index);
1993 FETCH(&r[1], 1, chan_index);
1994
1995 /* XXX use micro_min()?? */
1996 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1997
1998 STORE(&r[0], 0, chan_index);
1999 }
2000 break;
2001
2002 case TGSI_OPCODE_MAX:
2003 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2004 FETCH(&r[0], 0, chan_index);
2005 FETCH(&r[1], 1, chan_index);
2006
2007 /* XXX use micro_max()?? */
2008 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2009
2010 STORE(&r[0], 0, chan_index );
2011 }
2012 break;
2013
2014 case TGSI_OPCODE_SLT:
2015 /* TGSI_OPCODE_SETLT */
2016 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2017 FETCH( &r[0], 0, chan_index );
2018 FETCH( &r[1], 1, chan_index );
2019 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2020 STORE( &r[0], 0, chan_index );
2021 }
2022 break;
2023
2024 case TGSI_OPCODE_SGE:
2025 /* TGSI_OPCODE_SETGE */
2026 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2027 FETCH( &r[0], 0, chan_index );
2028 FETCH( &r[1], 1, chan_index );
2029 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2030 STORE( &r[0], 0, chan_index );
2031 }
2032 break;
2033
2034 case TGSI_OPCODE_MAD:
2035 /* TGSI_OPCODE_MADD */
2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2037 FETCH( &r[0], 0, chan_index );
2038 FETCH( &r[1], 1, chan_index );
2039 micro_mul( &r[0], &r[0], &r[1] );
2040 FETCH( &r[1], 2, chan_index );
2041 micro_add( &r[0], &r[0], &r[1] );
2042 STORE( &r[0], 0, chan_index );
2043 }
2044 break;
2045
2046 case TGSI_OPCODE_SUB:
2047 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2048 FETCH(&r[0], 0, chan_index);
2049 FETCH(&r[1], 1, chan_index);
2050
2051 micro_sub( &r[0], &r[0], &r[1] );
2052
2053 STORE(&r[0], 0, chan_index);
2054 }
2055 break;
2056
2057 case TGSI_OPCODE_LERP:
2058 /* TGSI_OPCODE_LRP */
2059 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2060 FETCH(&r[0], 0, chan_index);
2061 FETCH(&r[1], 1, chan_index);
2062 FETCH(&r[2], 2, chan_index);
2063
2064 micro_sub( &r[1], &r[1], &r[2] );
2065 micro_mul( &r[0], &r[0], &r[1] );
2066 micro_add( &r[0], &r[0], &r[2] );
2067
2068 STORE(&r[0], 0, chan_index);
2069 }
2070 break;
2071
2072 case TGSI_OPCODE_CND:
2073 assert (0);
2074 break;
2075
2076 case TGSI_OPCODE_CND0:
2077 assert (0);
2078 break;
2079
2080 case TGSI_OPCODE_DOT2ADD:
2081 /* TGSI_OPCODE_DP2A */
2082 FETCH( &r[0], 0, CHAN_X );
2083 FETCH( &r[1], 1, CHAN_X );
2084 micro_mul( &r[0], &r[0], &r[1] );
2085
2086 FETCH( &r[1], 0, CHAN_Y );
2087 FETCH( &r[2], 1, CHAN_Y );
2088 micro_mul( &r[1], &r[1], &r[2] );
2089 micro_add( &r[0], &r[0], &r[1] );
2090
2091 FETCH( &r[2], 2, CHAN_X );
2092 micro_add( &r[0], &r[0], &r[2] );
2093
2094 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2095 STORE( &r[0], 0, chan_index );
2096 }
2097 break;
2098
2099 case TGSI_OPCODE_INDEX:
2100 assert (0);
2101 break;
2102
2103 case TGSI_OPCODE_NEGATE:
2104 assert (0);
2105 break;
2106
2107 case TGSI_OPCODE_FRAC:
2108 /* TGSI_OPCODE_FRC */
2109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2110 FETCH( &r[0], 0, chan_index );
2111 micro_frc( &r[0], &r[0] );
2112 STORE( &r[0], 0, chan_index );
2113 }
2114 break;
2115
2116 case TGSI_OPCODE_CLAMP:
2117 assert (0);
2118 break;
2119
2120 case TGSI_OPCODE_FLOOR:
2121 /* TGSI_OPCODE_FLR */
2122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2123 FETCH( &r[0], 0, chan_index );
2124 micro_flr( &r[0], &r[0] );
2125 STORE( &r[0], 0, chan_index );
2126 }
2127 break;
2128
2129 case TGSI_OPCODE_ROUND:
2130 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2131 FETCH( &r[0], 0, chan_index );
2132 micro_rnd( &r[0], &r[0] );
2133 STORE( &r[0], 0, chan_index );
2134 }
2135 break;
2136
2137 case TGSI_OPCODE_EXPBASE2:
2138 /* TGSI_OPCODE_EX2 */
2139 FETCH(&r[0], 0, CHAN_X);
2140
2141 #if FAST_MATH
2142 micro_exp2( &r[0], &r[0] );
2143 #else
2144 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2145 #endif
2146
2147 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2148 STORE( &r[0], 0, chan_index );
2149 }
2150 break;
2151
2152 case TGSI_OPCODE_LOGBASE2:
2153 /* TGSI_OPCODE_LG2 */
2154 FETCH( &r[0], 0, CHAN_X );
2155 micro_lg2( &r[0], &r[0] );
2156 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2157 STORE( &r[0], 0, chan_index );
2158 }
2159 break;
2160
2161 case TGSI_OPCODE_POWER:
2162 /* TGSI_OPCODE_POW */
2163 FETCH(&r[0], 0, CHAN_X);
2164 FETCH(&r[1], 1, CHAN_X);
2165
2166 micro_pow( &r[0], &r[0], &r[1] );
2167
2168 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2169 STORE( &r[0], 0, chan_index );
2170 }
2171 break;
2172
2173 case TGSI_OPCODE_CROSSPRODUCT:
2174 /* TGSI_OPCODE_XPD */
2175 FETCH(&r[0], 0, CHAN_Y);
2176 FETCH(&r[1], 1, CHAN_Z);
2177
2178 micro_mul( &r[2], &r[0], &r[1] );
2179
2180 FETCH(&r[3], 0, CHAN_Z);
2181 FETCH(&r[4], 1, CHAN_Y);
2182
2183 micro_mul( &r[5], &r[3], &r[4] );
2184 micro_sub( &r[2], &r[2], &r[5] );
2185
2186 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2187 STORE( &r[2], 0, CHAN_X );
2188 }
2189
2190 FETCH(&r[2], 1, CHAN_X);
2191
2192 micro_mul( &r[3], &r[3], &r[2] );
2193
2194 FETCH(&r[5], 0, CHAN_X);
2195
2196 micro_mul( &r[1], &r[1], &r[5] );
2197 micro_sub( &r[3], &r[3], &r[1] );
2198
2199 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2200 STORE( &r[3], 0, CHAN_Y );
2201 }
2202
2203 micro_mul( &r[5], &r[5], &r[4] );
2204 micro_mul( &r[0], &r[0], &r[2] );
2205 micro_sub( &r[5], &r[5], &r[0] );
2206
2207 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2208 STORE( &r[5], 0, CHAN_Z );
2209 }
2210
2211 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2212 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2213 }
2214 break;
2215
2216 case TGSI_OPCODE_MULTIPLYMATRIX:
2217 assert (0);
2218 break;
2219
2220 case TGSI_OPCODE_ABS:
2221 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2222 FETCH(&r[0], 0, chan_index);
2223
2224 micro_abs( &r[0], &r[0] );
2225
2226 STORE(&r[0], 0, chan_index);
2227 }
2228 break;
2229
2230 case TGSI_OPCODE_RCC:
2231 assert (0);
2232 break;
2233
2234 case TGSI_OPCODE_DPH:
2235 FETCH(&r[0], 0, CHAN_X);
2236 FETCH(&r[1], 1, CHAN_X);
2237
2238 micro_mul( &r[0], &r[0], &r[1] );
2239
2240 FETCH(&r[1], 0, CHAN_Y);
2241 FETCH(&r[2], 1, CHAN_Y);
2242
2243 micro_mul( &r[1], &r[1], &r[2] );
2244 micro_add( &r[0], &r[0], &r[1] );
2245
2246 FETCH(&r[1], 0, CHAN_Z);
2247 FETCH(&r[2], 1, CHAN_Z);
2248
2249 micro_mul( &r[1], &r[1], &r[2] );
2250 micro_add( &r[0], &r[0], &r[1] );
2251
2252 FETCH(&r[1], 1, CHAN_W);
2253
2254 micro_add( &r[0], &r[0], &r[1] );
2255
2256 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2257 STORE( &r[0], 0, chan_index );
2258 }
2259 break;
2260
2261 case TGSI_OPCODE_COS:
2262 FETCH(&r[0], 0, CHAN_X);
2263
2264 micro_cos( &r[0], &r[0] );
2265
2266 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2267 STORE( &r[0], 0, chan_index );
2268 }
2269 break;
2270
2271 case TGSI_OPCODE_DDX:
2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2273 FETCH( &r[0], 0, chan_index );
2274 micro_ddx( &r[0], &r[0] );
2275 STORE( &r[0], 0, chan_index );
2276 }
2277 break;
2278
2279 case TGSI_OPCODE_DDY:
2280 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2281 FETCH( &r[0], 0, chan_index );
2282 micro_ddy( &r[0], &r[0] );
2283 STORE( &r[0], 0, chan_index );
2284 }
2285 break;
2286
2287 case TGSI_OPCODE_KILP:
2288 exec_kilp (mach, inst);
2289 break;
2290
2291 case TGSI_OPCODE_KIL:
2292 exec_kil (mach, inst);
2293 break;
2294
2295 case TGSI_OPCODE_PK2H:
2296 assert (0);
2297 break;
2298
2299 case TGSI_OPCODE_PK2US:
2300 assert (0);
2301 break;
2302
2303 case TGSI_OPCODE_PK4B:
2304 assert (0);
2305 break;
2306
2307 case TGSI_OPCODE_PK4UB:
2308 assert (0);
2309 break;
2310
2311 case TGSI_OPCODE_RFL:
2312 assert (0);
2313 break;
2314
2315 case TGSI_OPCODE_SEQ:
2316 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2317 FETCH( &r[0], 0, chan_index );
2318 FETCH( &r[1], 1, chan_index );
2319 micro_eq( &r[0], &r[0], &r[1],
2320 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2321 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2322 STORE( &r[0], 0, chan_index );
2323 }
2324 break;
2325
2326 case TGSI_OPCODE_SFL:
2327 assert (0);
2328 break;
2329
2330 case TGSI_OPCODE_SGT:
2331 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2332 FETCH( &r[0], 0, chan_index );
2333 FETCH( &r[1], 1, chan_index );
2334 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2335 STORE( &r[0], 0, chan_index );
2336 }
2337 break;
2338
2339 case TGSI_OPCODE_SIN:
2340 FETCH( &r[0], 0, CHAN_X );
2341 micro_sin( &r[0], &r[0] );
2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2343 STORE( &r[0], 0, chan_index );
2344 }
2345 break;
2346
2347 case TGSI_OPCODE_SLE:
2348 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2349 FETCH( &r[0], 0, chan_index );
2350 FETCH( &r[1], 1, chan_index );
2351 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2352 STORE( &r[0], 0, chan_index );
2353 }
2354 break;
2355
2356 case TGSI_OPCODE_SNE:
2357 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2358 FETCH( &r[0], 0, chan_index );
2359 FETCH( &r[1], 1, chan_index );
2360 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2361 STORE( &r[0], 0, chan_index );
2362 }
2363 break;
2364
2365 case TGSI_OPCODE_STR:
2366 assert (0);
2367 break;
2368
2369 case TGSI_OPCODE_TEX:
2370 /* simple texture lookup */
2371 /* src[0] = texcoord */
2372 /* src[1] = sampler unit */
2373 exec_tex(mach, inst, FALSE, FALSE);
2374 break;
2375
2376 case TGSI_OPCODE_TXB:
2377 /* Texture lookup with lod bias */
2378 /* src[0] = texcoord (src[0].w = LOD bias) */
2379 /* src[1] = sampler unit */
2380 exec_tex(mach, inst, TRUE, FALSE);
2381 break;
2382
2383 case TGSI_OPCODE_TXD:
2384 /* Texture lookup with explict partial derivatives */
2385 /* src[0] = texcoord */
2386 /* src[1] = d[strq]/dx */
2387 /* src[2] = d[strq]/dy */
2388 /* src[3] = sampler unit */
2389 assert (0);
2390 break;
2391
2392 case TGSI_OPCODE_TXL:
2393 /* Texture lookup with explit LOD */
2394 /* src[0] = texcoord (src[0].w = LOD) */
2395 /* src[1] = sampler unit */
2396 exec_tex(mach, inst, TRUE, FALSE);
2397 break;
2398
2399 case TGSI_OPCODE_TXP:
2400 /* Texture lookup with projection */
2401 /* src[0] = texcoord (src[0].w = projection) */
2402 /* src[1] = sampler unit */
2403 exec_tex(mach, inst, FALSE, TRUE);
2404 break;
2405
2406 case TGSI_OPCODE_UP2H:
2407 assert (0);
2408 break;
2409
2410 case TGSI_OPCODE_UP2US:
2411 assert (0);
2412 break;
2413
2414 case TGSI_OPCODE_UP4B:
2415 assert (0);
2416 break;
2417
2418 case TGSI_OPCODE_UP4UB:
2419 assert (0);
2420 break;
2421
2422 case TGSI_OPCODE_X2D:
2423 assert (0);
2424 break;
2425
2426 case TGSI_OPCODE_ARA:
2427 assert (0);
2428 break;
2429
2430 case TGSI_OPCODE_ARR:
2431 assert (0);
2432 break;
2433
2434 case TGSI_OPCODE_BRA:
2435 assert (0);
2436 break;
2437
2438 case TGSI_OPCODE_CAL:
2439 /* skip the call if no execution channels are enabled */
2440 if (mach->ExecMask) {
2441 /* do the call */
2442
2443 /* push the Cond, Loop, Cont stacks */
2444 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2445 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2446 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2447 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2448 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2449 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2450
2451 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2452 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2453
2454 /* note that PC was already incremented above */
2455 mach->CallStack[mach->CallStackTop++] = *pc;
2456 *pc = inst->InstructionExtLabel.Label;
2457 }
2458 break;
2459
2460 case TGSI_OPCODE_RET:
2461 mach->FuncMask &= ~mach->ExecMask;
2462 UPDATE_EXEC_MASK(mach);
2463
2464 if (mach->FuncMask == 0x0) {
2465 /* really return now (otherwise, keep executing */
2466
2467 if (mach->CallStackTop == 0) {
2468 /* returning from main() */
2469 *pc = -1;
2470 return;
2471 }
2472 *pc = mach->CallStack[--mach->CallStackTop];
2473
2474 /* pop the Cond, Loop, Cont stacks */
2475 assert(mach->CondStackTop > 0);
2476 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2477 assert(mach->LoopStackTop > 0);
2478 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2479 assert(mach->ContStackTop > 0);
2480 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2481 assert(mach->FuncStackTop > 0);
2482 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2483
2484 UPDATE_EXEC_MASK(mach);
2485 }
2486 break;
2487
2488 case TGSI_OPCODE_SSG:
2489 assert (0);
2490 break;
2491
2492 case TGSI_OPCODE_CMP:
2493 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2494 FETCH(&r[0], 0, chan_index);
2495 FETCH(&r[1], 1, chan_index);
2496 FETCH(&r[2], 2, chan_index);
2497
2498 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2499
2500 STORE(&r[0], 0, chan_index);
2501 }
2502 break;
2503
2504 case TGSI_OPCODE_SCS:
2505 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2506 FETCH( &r[0], 0, CHAN_X );
2507 }
2508 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2509 micro_cos( &r[1], &r[0] );
2510 STORE( &r[1], 0, CHAN_X );
2511 }
2512 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2513 micro_sin( &r[1], &r[0] );
2514 STORE( &r[1], 0, CHAN_Y );
2515 }
2516 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2517 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2518 }
2519 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2520 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2521 }
2522 break;
2523
2524 case TGSI_OPCODE_NRM:
2525 /* 3-component vector normalize */
2526 {
2527 union tgsi_exec_channel tmp, dot;
2528
2529 /* tmp = dp3(src0, src0): */
2530 FETCH( &r[0], 0, CHAN_X );
2531 micro_mul( &tmp, &r[0], &r[0] );
2532
2533 FETCH( &r[1], 0, CHAN_Y );
2534 micro_mul( &dot, &r[1], &r[1] );
2535 micro_add( &tmp, &tmp, &dot );
2536
2537 FETCH( &r[2], 0, CHAN_Z );
2538 micro_mul( &dot, &r[2], &r[2] );
2539 micro_add( &tmp, &tmp, &dot );
2540
2541 /* tmp = 1 / sqrt(tmp) */
2542 micro_sqrt( &tmp, &tmp );
2543 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2544
2545 /* note: w channel is undefined */
2546 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2547 /* chan = chan * tmp */
2548 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2549 STORE( &r[chan_index], 0, chan_index );
2550 }
2551 }
2552 break;
2553
2554 case TGSI_OPCODE_NRM4:
2555 /* 4-component vector normalize */
2556 {
2557 union tgsi_exec_channel tmp, dot;
2558
2559 /* tmp = dp4(src0, src0): */
2560 FETCH( &r[0], 0, CHAN_X );
2561 micro_mul( &tmp, &r[0], &r[0] );
2562
2563 FETCH( &r[1], 0, CHAN_Y );
2564 micro_mul( &dot, &r[1], &r[1] );
2565 micro_add( &tmp, &tmp, &dot );
2566
2567 FETCH( &r[2], 0, CHAN_Z );
2568 micro_mul( &dot, &r[2], &r[2] );
2569 micro_add( &tmp, &tmp, &dot );
2570
2571 FETCH( &r[3], 0, CHAN_W );
2572 micro_mul( &dot, &r[3], &r[3] );
2573 micro_add( &tmp, &tmp, &dot );
2574
2575 /* tmp = 1 / sqrt(tmp) */
2576 micro_sqrt( &tmp, &tmp );
2577 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2578
2579 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2580 /* chan = chan * tmp */
2581 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2582 STORE( &r[chan_index], 0, chan_index );
2583 }
2584 }
2585 break;
2586
2587 case TGSI_OPCODE_DIV:
2588 assert( 0 );
2589 break;
2590
2591 case TGSI_OPCODE_DP2:
2592 FETCH( &r[0], 0, CHAN_X );
2593 FETCH( &r[1], 1, CHAN_X );
2594 micro_mul( &r[0], &r[0], &r[1] );
2595
2596 FETCH( &r[1], 0, CHAN_Y );
2597 FETCH( &r[2], 1, CHAN_Y );
2598 micro_mul( &r[1], &r[1], &r[2] );
2599 micro_add( &r[0], &r[0], &r[1] );
2600
2601 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2602 STORE( &r[0], 0, chan_index );
2603 }
2604 break;
2605
2606 case TGSI_OPCODE_IF:
2607 /* push CondMask */
2608 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2609 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2610 FETCH( &r[0], 0, CHAN_X );
2611 /* update CondMask */
2612 if( ! r[0].u[0] ) {
2613 mach->CondMask &= ~0x1;
2614 }
2615 if( ! r[0].u[1] ) {
2616 mach->CondMask &= ~0x2;
2617 }
2618 if( ! r[0].u[2] ) {
2619 mach->CondMask &= ~0x4;
2620 }
2621 if( ! r[0].u[3] ) {
2622 mach->CondMask &= ~0x8;
2623 }
2624 UPDATE_EXEC_MASK(mach);
2625 /* Todo: If CondMask==0, jump to ELSE */
2626 break;
2627
2628 case TGSI_OPCODE_ELSE:
2629 /* invert CondMask wrt previous mask */
2630 {
2631 uint prevMask;
2632 assert(mach->CondStackTop > 0);
2633 prevMask = mach->CondStack[mach->CondStackTop - 1];
2634 mach->CondMask = ~mach->CondMask & prevMask;
2635 UPDATE_EXEC_MASK(mach);
2636 /* Todo: If CondMask==0, jump to ENDIF */
2637 }
2638 break;
2639
2640 case TGSI_OPCODE_ENDIF:
2641 /* pop CondMask */
2642 assert(mach->CondStackTop > 0);
2643 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2644 UPDATE_EXEC_MASK(mach);
2645 break;
2646
2647 case TGSI_OPCODE_END:
2648 /* halt execution */
2649 *pc = -1;
2650 break;
2651
2652 case TGSI_OPCODE_REP:
2653 assert (0);
2654 break;
2655
2656 case TGSI_OPCODE_ENDREP:
2657 assert (0);
2658 break;
2659
2660 case TGSI_OPCODE_PUSHA:
2661 assert (0);
2662 break;
2663
2664 case TGSI_OPCODE_POPA:
2665 assert (0);
2666 break;
2667
2668 case TGSI_OPCODE_CEIL:
2669 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2670 FETCH( &r[0], 0, chan_index );
2671 micro_ceil( &r[0], &r[0] );
2672 STORE( &r[0], 0, chan_index );
2673 }
2674 break;
2675
2676 case TGSI_OPCODE_I2F:
2677 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2678 FETCH( &r[0], 0, chan_index );
2679 micro_i2f( &r[0], &r[0] );
2680 STORE( &r[0], 0, chan_index );
2681 }
2682 break;
2683
2684 case TGSI_OPCODE_NOT:
2685 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2686 FETCH( &r[0], 0, chan_index );
2687 micro_not( &r[0], &r[0] );
2688 STORE( &r[0], 0, chan_index );
2689 }
2690 break;
2691
2692 case TGSI_OPCODE_TRUNC:
2693 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2694 FETCH( &r[0], 0, chan_index );
2695 micro_trunc( &r[0], &r[0] );
2696 STORE( &r[0], 0, chan_index );
2697 }
2698 break;
2699
2700 case TGSI_OPCODE_SHL:
2701 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2702 FETCH( &r[0], 0, chan_index );
2703 FETCH( &r[1], 1, chan_index );
2704 micro_shl( &r[0], &r[0], &r[1] );
2705 STORE( &r[0], 0, chan_index );
2706 }
2707 break;
2708
2709 case TGSI_OPCODE_SHR:
2710 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2711 FETCH( &r[0], 0, chan_index );
2712 FETCH( &r[1], 1, chan_index );
2713 micro_ishr( &r[0], &r[0], &r[1] );
2714 STORE( &r[0], 0, chan_index );
2715 }
2716 break;
2717
2718 case TGSI_OPCODE_AND:
2719 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2720 FETCH( &r[0], 0, chan_index );
2721 FETCH( &r[1], 1, chan_index );
2722 micro_and( &r[0], &r[0], &r[1] );
2723 STORE( &r[0], 0, chan_index );
2724 }
2725 break;
2726
2727 case TGSI_OPCODE_OR:
2728 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2729 FETCH( &r[0], 0, chan_index );
2730 FETCH( &r[1], 1, chan_index );
2731 micro_or( &r[0], &r[0], &r[1] );
2732 STORE( &r[0], 0, chan_index );
2733 }
2734 break;
2735
2736 case TGSI_OPCODE_MOD:
2737 assert (0);
2738 break;
2739
2740 case TGSI_OPCODE_XOR:
2741 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2742 FETCH( &r[0], 0, chan_index );
2743 FETCH( &r[1], 1, chan_index );
2744 micro_xor( &r[0], &r[0], &r[1] );
2745 STORE( &r[0], 0, chan_index );
2746 }
2747 break;
2748
2749 case TGSI_OPCODE_SAD:
2750 assert (0);
2751 break;
2752
2753 case TGSI_OPCODE_TXF:
2754 assert (0);
2755 break;
2756
2757 case TGSI_OPCODE_TXQ:
2758 assert (0);
2759 break;
2760
2761 case TGSI_OPCODE_EMIT:
2762 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2763 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2764 break;
2765
2766 case TGSI_OPCODE_ENDPRIM:
2767 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2768 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2769 break;
2770
2771 case TGSI_OPCODE_LOOP:
2772 /* fall-through (for now) */
2773 case TGSI_OPCODE_BGNLOOP2:
2774 /* push LoopMask and ContMasks */
2775 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2776 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2777 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2778 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2779 break;
2780
2781 case TGSI_OPCODE_ENDLOOP:
2782 /* fall-through (for now at least) */
2783 case TGSI_OPCODE_ENDLOOP2:
2784 /* Restore ContMask, but don't pop */
2785 assert(mach->ContStackTop > 0);
2786 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2787 UPDATE_EXEC_MASK(mach);
2788 if (mach->ExecMask) {
2789 /* repeat loop: jump to instruction just past BGNLOOP */
2790 *pc = inst->InstructionExtLabel.Label + 1;
2791 }
2792 else {
2793 /* exit loop: pop LoopMask */
2794 assert(mach->LoopStackTop > 0);
2795 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2796 /* pop ContMask */
2797 assert(mach->ContStackTop > 0);
2798 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2799 }
2800 UPDATE_EXEC_MASK(mach);
2801 break;
2802
2803 case TGSI_OPCODE_BRK:
2804 /* turn off loop channels for each enabled exec channel */
2805 mach->LoopMask &= ~mach->ExecMask;
2806 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2807 UPDATE_EXEC_MASK(mach);
2808 break;
2809
2810 case TGSI_OPCODE_CONT:
2811 /* turn off cont channels for each enabled exec channel */
2812 mach->ContMask &= ~mach->ExecMask;
2813 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2814 UPDATE_EXEC_MASK(mach);
2815 break;
2816
2817 case TGSI_OPCODE_BGNSUB:
2818 /* no-op */
2819 break;
2820
2821 case TGSI_OPCODE_ENDSUB:
2822 /* no-op */
2823 break;
2824
2825 case TGSI_OPCODE_NOISE1:
2826 assert( 0 );
2827 break;
2828
2829 case TGSI_OPCODE_NOISE2:
2830 assert( 0 );
2831 break;
2832
2833 case TGSI_OPCODE_NOISE3:
2834 assert( 0 );
2835 break;
2836
2837 case TGSI_OPCODE_NOISE4:
2838 assert( 0 );
2839 break;
2840
2841 case TGSI_OPCODE_NOP:
2842 break;
2843
2844 default:
2845 assert( 0 );
2846 }
2847 }
2848
2849
2850 /**
2851 * Run TGSI interpreter.
2852 * \return bitmask of "alive" quad components
2853 */
2854 uint
2855 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2856 {
2857 uint i;
2858 int pc = 0;
2859
2860 mach->CondMask = 0xf;
2861 mach->LoopMask = 0xf;
2862 mach->ContMask = 0xf;
2863 mach->FuncMask = 0xf;
2864 mach->ExecMask = 0xf;
2865
2866 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2867 assert(mach->CondStackTop == 0);
2868 assert(mach->LoopStackTop == 0);
2869 assert(mach->ContStackTop == 0);
2870 assert(mach->CallStackTop == 0);
2871
2872 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2873 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2874
2875 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2876 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2877 mach->Primitives[0] = 0;
2878 }
2879
2880 for (i = 0; i < QUAD_SIZE; i++) {
2881 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2882 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2883 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2884 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2885 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2886 }
2887
2888 /* execute declarations (interpolants) */
2889 for (i = 0; i < mach->NumDeclarations; i++) {
2890 exec_declaration( mach, mach->Declarations+i );
2891 }
2892
2893 /* execute instructions, until pc is set to -1 */
2894 while (pc != -1) {
2895 assert(pc < (int) mach->NumInstructions);
2896 exec_instruction( mach, mach->Instructions + pc, &pc );
2897 }
2898
2899 #if 0
2900 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2901 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2902 /*
2903 * Scale back depth component.
2904 */
2905 for (i = 0; i < 4; i++)
2906 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2907 }
2908 #endif
2909
2910 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2911 }
2912
2913