tgsi: Implement LOG opcode for SSE2 codegen.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 #define CHAN_X 0
67 #define CHAN_Y 1
68 #define CHAN_Z 2
69 #define CHAN_W 3
70
71 /*
72 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
73 */
74 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
75 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
76 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
77 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
78 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
79 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
80 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
81 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
82 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
83 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
84 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
85 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
86 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
87 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
88 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
89 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
90 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
91 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
92 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
93 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
94 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
95 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
96 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
97 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
98 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
99 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
100 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
101 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
102 #define TEMP_R0 TGSI_EXEC_TEMP_R0
103
104 #define IS_CHANNEL_ENABLED(INST, CHAN)\
105 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
106
107 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
108 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
111 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
112 if (IS_CHANNEL_ENABLED( INST, CHAN ))
113
114 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
117
118
119 /** The execution mask depends on the conditional mask and the loop mask */
120 #define UPDATE_EXEC_MASK(MACH) \
121 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
122
123 /**
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
127 */
128 void
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine *mach,
131 const struct tgsi_token *tokens,
132 uint numSamplers,
133 struct tgsi_sampler *samplers)
134 {
135 uint k;
136 struct tgsi_parse_context parse;
137 struct tgsi_exec_labels *labels = &mach->Labels;
138 struct tgsi_full_instruction *instructions;
139 struct tgsi_full_declaration *declarations;
140 uint maxInstructions = 10, numInstructions = 0;
141 uint maxDeclarations = 10, numDeclarations = 0;
142 uint instno = 0;
143
144 #if 0
145 tgsi_dump(tokens, 0);
146 #endif
147
148 mach->Tokens = tokens;
149 mach->Samplers = samplers;
150
151 k = tgsi_parse_init (&parse, mach->Tokens);
152 if (k != TGSI_PARSE_OK) {
153 debug_printf( "Problem parsing!\n" );
154 return;
155 }
156
157 mach->Processor = parse.FullHeader.Processor.Processor;
158 mach->ImmLimit = 0;
159 labels->count = 0;
160
161 declarations = (struct tgsi_full_declaration *)
162 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
163
164 if (!declarations) {
165 return;
166 }
167
168 instructions = (struct tgsi_full_instruction *)
169 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
170
171 if (!instructions) {
172 FREE( declarations );
173 return;
174 }
175
176 while( !tgsi_parse_end_of_tokens( &parse ) ) {
177 uint pointer = parse.Position;
178 uint i;
179
180 tgsi_parse_token( &parse );
181 switch( parse.FullToken.Token.Type ) {
182 case TGSI_TOKEN_TYPE_DECLARATION:
183 /* save expanded declaration */
184 if (numDeclarations == maxDeclarations) {
185 declarations = REALLOC(declarations,
186 maxDeclarations
187 * sizeof(struct tgsi_full_declaration),
188 (maxDeclarations + 10)
189 * sizeof(struct tgsi_full_declaration));
190 maxDeclarations += 10;
191 }
192 memcpy(declarations + numDeclarations,
193 &parse.FullToken.FullDeclaration,
194 sizeof(declarations[0]));
195 numDeclarations++;
196 break;
197
198 case TGSI_TOKEN_TYPE_IMMEDIATE:
199 {
200 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
201 assert( size % 4 == 0 );
202 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
203
204 for( i = 0; i < size; i++ ) {
205 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
206 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
207 }
208 mach->ImmLimit += size / 4;
209 }
210 break;
211
212 case TGSI_TOKEN_TYPE_INSTRUCTION:
213 assert( labels->count < MAX_LABELS );
214
215 labels->labels[labels->count][0] = instno;
216 labels->labels[labels->count][1] = pointer;
217 labels->count++;
218
219 /* save expanded instruction */
220 if (numInstructions == maxInstructions) {
221 instructions = REALLOC(instructions,
222 maxInstructions
223 * sizeof(struct tgsi_full_instruction),
224 (maxInstructions + 10)
225 * sizeof(struct tgsi_full_instruction));
226 maxInstructions += 10;
227 }
228 memcpy(instructions + numInstructions,
229 &parse.FullToken.FullInstruction,
230 sizeof(instructions[0]));
231 numInstructions++;
232 break;
233
234 default:
235 assert( 0 );
236 }
237 }
238 tgsi_parse_free (&parse);
239
240 if (mach->Declarations) {
241 FREE( mach->Declarations );
242 }
243 mach->Declarations = declarations;
244 mach->NumDeclarations = numDeclarations;
245
246 if (mach->Instructions) {
247 FREE( mach->Instructions );
248 }
249 mach->Instructions = instructions;
250 mach->NumInstructions = numInstructions;
251 }
252
253
254 void
255 tgsi_exec_machine_init(
256 struct tgsi_exec_machine *mach )
257 {
258 uint i;
259
260 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
261 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
262
263 /* Setup constants. */
264 for( i = 0; i < 4; i++ ) {
265 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
266 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
267 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
268 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
269 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
270 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
271 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
272 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
273 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
274 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
275 }
276 }
277
278
279 void
280 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
281 {
282 if (mach->Instructions) {
283 FREE(mach->Instructions);
284 mach->Instructions = NULL;
285 mach->NumInstructions = 0;
286 }
287 if (mach->Declarations) {
288 FREE(mach->Declarations);
289 mach->Declarations = NULL;
290 mach->NumDeclarations = 0;
291 }
292 }
293
294
295 static void
296 micro_abs(
297 union tgsi_exec_channel *dst,
298 const union tgsi_exec_channel *src )
299 {
300 dst->f[0] = fabsf( src->f[0] );
301 dst->f[1] = fabsf( src->f[1] );
302 dst->f[2] = fabsf( src->f[2] );
303 dst->f[3] = fabsf( src->f[3] );
304 }
305
306 static void
307 micro_add(
308 union tgsi_exec_channel *dst,
309 const union tgsi_exec_channel *src0,
310 const union tgsi_exec_channel *src1 )
311 {
312 dst->f[0] = src0->f[0] + src1->f[0];
313 dst->f[1] = src0->f[1] + src1->f[1];
314 dst->f[2] = src0->f[2] + src1->f[2];
315 dst->f[3] = src0->f[3] + src1->f[3];
316 }
317
318 static void
319 micro_iadd(
320 union tgsi_exec_channel *dst,
321 const union tgsi_exec_channel *src0,
322 const union tgsi_exec_channel *src1 )
323 {
324 dst->i[0] = src0->i[0] + src1->i[0];
325 dst->i[1] = src0->i[1] + src1->i[1];
326 dst->i[2] = src0->i[2] + src1->i[2];
327 dst->i[3] = src0->i[3] + src1->i[3];
328 }
329
330 static void
331 micro_and(
332 union tgsi_exec_channel *dst,
333 const union tgsi_exec_channel *src0,
334 const union tgsi_exec_channel *src1 )
335 {
336 dst->u[0] = src0->u[0] & src1->u[0];
337 dst->u[1] = src0->u[1] & src1->u[1];
338 dst->u[2] = src0->u[2] & src1->u[2];
339 dst->u[3] = src0->u[3] & src1->u[3];
340 }
341
342 static void
343 micro_ceil(
344 union tgsi_exec_channel *dst,
345 const union tgsi_exec_channel *src )
346 {
347 dst->f[0] = ceilf( src->f[0] );
348 dst->f[1] = ceilf( src->f[1] );
349 dst->f[2] = ceilf( src->f[2] );
350 dst->f[3] = ceilf( src->f[3] );
351 }
352
353 static void
354 micro_cos(
355 union tgsi_exec_channel *dst,
356 const union tgsi_exec_channel *src )
357 {
358 dst->f[0] = cosf( src->f[0] );
359 dst->f[1] = cosf( src->f[1] );
360 dst->f[2] = cosf( src->f[2] );
361 dst->f[3] = cosf( src->f[3] );
362 }
363
364 static void
365 micro_ddx(
366 union tgsi_exec_channel *dst,
367 const union tgsi_exec_channel *src )
368 {
369 dst->f[0] =
370 dst->f[1] =
371 dst->f[2] =
372 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
373 }
374
375 static void
376 micro_ddy(
377 union tgsi_exec_channel *dst,
378 const union tgsi_exec_channel *src )
379 {
380 dst->f[0] =
381 dst->f[1] =
382 dst->f[2] =
383 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
384 }
385
386 static void
387 micro_div(
388 union tgsi_exec_channel *dst,
389 const union tgsi_exec_channel *src0,
390 const union tgsi_exec_channel *src1 )
391 {
392 if (src1->f[0] != 0) {
393 dst->f[0] = src0->f[0] / src1->f[0];
394 }
395 if (src1->f[1] != 0) {
396 dst->f[1] = src0->f[1] / src1->f[1];
397 }
398 if (src1->f[2] != 0) {
399 dst->f[2] = src0->f[2] / src1->f[2];
400 }
401 if (src1->f[3] != 0) {
402 dst->f[3] = src0->f[3] / src1->f[3];
403 }
404 }
405
406 static void
407 micro_udiv(
408 union tgsi_exec_channel *dst,
409 const union tgsi_exec_channel *src0,
410 const union tgsi_exec_channel *src1 )
411 {
412 dst->u[0] = src0->u[0] / src1->u[0];
413 dst->u[1] = src0->u[1] / src1->u[1];
414 dst->u[2] = src0->u[2] / src1->u[2];
415 dst->u[3] = src0->u[3] / src1->u[3];
416 }
417
418 static void
419 micro_eq(
420 union tgsi_exec_channel *dst,
421 const union tgsi_exec_channel *src0,
422 const union tgsi_exec_channel *src1,
423 const union tgsi_exec_channel *src2,
424 const union tgsi_exec_channel *src3 )
425 {
426 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
427 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
428 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
429 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
430 }
431
432 static void
433 micro_ieq(
434 union tgsi_exec_channel *dst,
435 const union tgsi_exec_channel *src0,
436 const union tgsi_exec_channel *src1,
437 const union tgsi_exec_channel *src2,
438 const union tgsi_exec_channel *src3 )
439 {
440 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
441 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
442 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
443 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
444 }
445
446 static void
447 micro_exp2(
448 union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src)
450 {
451 dst->f[0] = powf( 2.0f, src->f[0] );
452 dst->f[1] = powf( 2.0f, src->f[1] );
453 dst->f[2] = powf( 2.0f, src->f[2] );
454 dst->f[3] = powf( 2.0f, src->f[3] );
455 }
456
457 static void
458 micro_f2it(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src )
461 {
462 dst->i[0] = (int) src->f[0];
463 dst->i[1] = (int) src->f[1];
464 dst->i[2] = (int) src->f[2];
465 dst->i[3] = (int) src->f[3];
466 }
467
468 static void
469 micro_f2ut(
470 union tgsi_exec_channel *dst,
471 const union tgsi_exec_channel *src )
472 {
473 dst->u[0] = (uint) src->f[0];
474 dst->u[1] = (uint) src->f[1];
475 dst->u[2] = (uint) src->f[2];
476 dst->u[3] = (uint) src->f[3];
477 }
478
479 static void
480 micro_flr(
481 union tgsi_exec_channel *dst,
482 const union tgsi_exec_channel *src )
483 {
484 dst->f[0] = floorf( src->f[0] );
485 dst->f[1] = floorf( src->f[1] );
486 dst->f[2] = floorf( src->f[2] );
487 dst->f[3] = floorf( src->f[3] );
488 }
489
490 static void
491 micro_frc(
492 union tgsi_exec_channel *dst,
493 const union tgsi_exec_channel *src )
494 {
495 dst->f[0] = src->f[0] - floorf( src->f[0] );
496 dst->f[1] = src->f[1] - floorf( src->f[1] );
497 dst->f[2] = src->f[2] - floorf( src->f[2] );
498 dst->f[3] = src->f[3] - floorf( src->f[3] );
499 }
500
501 static void
502 micro_ge(
503 union tgsi_exec_channel *dst,
504 const union tgsi_exec_channel *src0,
505 const union tgsi_exec_channel *src1,
506 const union tgsi_exec_channel *src2,
507 const union tgsi_exec_channel *src3 )
508 {
509 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
510 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
511 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
512 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
513 }
514
515 static void
516 micro_i2f(
517 union tgsi_exec_channel *dst,
518 const union tgsi_exec_channel *src )
519 {
520 dst->f[0] = (float) src->i[0];
521 dst->f[1] = (float) src->i[1];
522 dst->f[2] = (float) src->i[2];
523 dst->f[3] = (float) src->i[3];
524 }
525
526 static void
527 micro_lg2(
528 union tgsi_exec_channel *dst,
529 const union tgsi_exec_channel *src )
530 {
531 dst->f[0] = logf( src->f[0] ) * 1.442695f;
532 dst->f[1] = logf( src->f[1] ) * 1.442695f;
533 dst->f[2] = logf( src->f[2] ) * 1.442695f;
534 dst->f[3] = logf( src->f[3] ) * 1.442695f;
535 }
536
537 static void
538 micro_le(
539 union tgsi_exec_channel *dst,
540 const union tgsi_exec_channel *src0,
541 const union tgsi_exec_channel *src1,
542 const union tgsi_exec_channel *src2,
543 const union tgsi_exec_channel *src3 )
544 {
545 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
546 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
547 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
548 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
549 }
550
551 static void
552 micro_lt(
553 union tgsi_exec_channel *dst,
554 const union tgsi_exec_channel *src0,
555 const union tgsi_exec_channel *src1,
556 const union tgsi_exec_channel *src2,
557 const union tgsi_exec_channel *src3 )
558 {
559 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
560 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
561 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
562 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
563 }
564
565 static void
566 micro_ilt(
567 union tgsi_exec_channel *dst,
568 const union tgsi_exec_channel *src0,
569 const union tgsi_exec_channel *src1,
570 const union tgsi_exec_channel *src2,
571 const union tgsi_exec_channel *src3 )
572 {
573 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
574 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
575 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
576 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
577 }
578
579 static void
580 micro_ult(
581 union tgsi_exec_channel *dst,
582 const union tgsi_exec_channel *src0,
583 const union tgsi_exec_channel *src1,
584 const union tgsi_exec_channel *src2,
585 const union tgsi_exec_channel *src3 )
586 {
587 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
588 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
589 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
590 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
591 }
592
593 static void
594 micro_max(
595 union tgsi_exec_channel *dst,
596 const union tgsi_exec_channel *src0,
597 const union tgsi_exec_channel *src1 )
598 {
599 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
600 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
601 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
602 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
603 }
604
605 static void
606 micro_imax(
607 union tgsi_exec_channel *dst,
608 const union tgsi_exec_channel *src0,
609 const union tgsi_exec_channel *src1 )
610 {
611 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
612 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
613 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
614 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
615 }
616
617 static void
618 micro_umax(
619 union tgsi_exec_channel *dst,
620 const union tgsi_exec_channel *src0,
621 const union tgsi_exec_channel *src1 )
622 {
623 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
624 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
625 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
626 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
627 }
628
629 static void
630 micro_min(
631 union tgsi_exec_channel *dst,
632 const union tgsi_exec_channel *src0,
633 const union tgsi_exec_channel *src1 )
634 {
635 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
636 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
637 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
638 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
639 }
640
641 static void
642 micro_imin(
643 union tgsi_exec_channel *dst,
644 const union tgsi_exec_channel *src0,
645 const union tgsi_exec_channel *src1 )
646 {
647 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
648 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
649 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
650 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
651 }
652
653 static void
654 micro_umin(
655 union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1 )
658 {
659 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
660 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
661 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
662 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
663 }
664
665 static void
666 micro_umod(
667 union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src0,
669 const union tgsi_exec_channel *src1 )
670 {
671 dst->u[0] = src0->u[0] % src1->u[0];
672 dst->u[1] = src0->u[1] % src1->u[1];
673 dst->u[2] = src0->u[2] % src1->u[2];
674 dst->u[3] = src0->u[3] % src1->u[3];
675 }
676
677 static void
678 micro_mul(
679 union tgsi_exec_channel *dst,
680 const union tgsi_exec_channel *src0,
681 const union tgsi_exec_channel *src1 )
682 {
683 dst->f[0] = src0->f[0] * src1->f[0];
684 dst->f[1] = src0->f[1] * src1->f[1];
685 dst->f[2] = src0->f[2] * src1->f[2];
686 dst->f[3] = src0->f[3] * src1->f[3];
687 }
688
689 static void
690 micro_imul(
691 union tgsi_exec_channel *dst,
692 const union tgsi_exec_channel *src0,
693 const union tgsi_exec_channel *src1 )
694 {
695 dst->i[0] = src0->i[0] * src1->i[0];
696 dst->i[1] = src0->i[1] * src1->i[1];
697 dst->i[2] = src0->i[2] * src1->i[2];
698 dst->i[3] = src0->i[3] * src1->i[3];
699 }
700
701 static void
702 micro_imul64(
703 union tgsi_exec_channel *dst0,
704 union tgsi_exec_channel *dst1,
705 const union tgsi_exec_channel *src0,
706 const union tgsi_exec_channel *src1 )
707 {
708 dst1->i[0] = src0->i[0] * src1->i[0];
709 dst1->i[1] = src0->i[1] * src1->i[1];
710 dst1->i[2] = src0->i[2] * src1->i[2];
711 dst1->i[3] = src0->i[3] * src1->i[3];
712 dst0->i[0] = 0;
713 dst0->i[1] = 0;
714 dst0->i[2] = 0;
715 dst0->i[3] = 0;
716 }
717
718 static void
719 micro_umul64(
720 union tgsi_exec_channel *dst0,
721 union tgsi_exec_channel *dst1,
722 const union tgsi_exec_channel *src0,
723 const union tgsi_exec_channel *src1 )
724 {
725 dst1->u[0] = src0->u[0] * src1->u[0];
726 dst1->u[1] = src0->u[1] * src1->u[1];
727 dst1->u[2] = src0->u[2] * src1->u[2];
728 dst1->u[3] = src0->u[3] * src1->u[3];
729 dst0->u[0] = 0;
730 dst0->u[1] = 0;
731 dst0->u[2] = 0;
732 dst0->u[3] = 0;
733 }
734
735 static void
736 micro_movc(
737 union tgsi_exec_channel *dst,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1,
740 const union tgsi_exec_channel *src2 )
741 {
742 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
743 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
744 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
745 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
746 }
747
748 static void
749 micro_neg(
750 union tgsi_exec_channel *dst,
751 const union tgsi_exec_channel *src )
752 {
753 dst->f[0] = -src->f[0];
754 dst->f[1] = -src->f[1];
755 dst->f[2] = -src->f[2];
756 dst->f[3] = -src->f[3];
757 }
758
759 static void
760 micro_ineg(
761 union tgsi_exec_channel *dst,
762 const union tgsi_exec_channel *src )
763 {
764 dst->i[0] = -src->i[0];
765 dst->i[1] = -src->i[1];
766 dst->i[2] = -src->i[2];
767 dst->i[3] = -src->i[3];
768 }
769
770 static void
771 micro_not(
772 union tgsi_exec_channel *dst,
773 const union tgsi_exec_channel *src )
774 {
775 dst->u[0] = ~src->u[0];
776 dst->u[1] = ~src->u[1];
777 dst->u[2] = ~src->u[2];
778 dst->u[3] = ~src->u[3];
779 }
780
781 static void
782 micro_or(
783 union tgsi_exec_channel *dst,
784 const union tgsi_exec_channel *src0,
785 const union tgsi_exec_channel *src1 )
786 {
787 dst->u[0] = src0->u[0] | src1->u[0];
788 dst->u[1] = src0->u[1] | src1->u[1];
789 dst->u[2] = src0->u[2] | src1->u[2];
790 dst->u[3] = src0->u[3] | src1->u[3];
791 }
792
793 static void
794 micro_pow(
795 union tgsi_exec_channel *dst,
796 const union tgsi_exec_channel *src0,
797 const union tgsi_exec_channel *src1 )
798 {
799 dst->f[0] = powf( src0->f[0], src1->f[0] );
800 dst->f[1] = powf( src0->f[1], src1->f[1] );
801 dst->f[2] = powf( src0->f[2], src1->f[2] );
802 dst->f[3] = powf( src0->f[3], src1->f[3] );
803 }
804
805 static void
806 micro_rnd(
807 union tgsi_exec_channel *dst,
808 const union tgsi_exec_channel *src )
809 {
810 dst->f[0] = floorf( src->f[0] + 0.5f );
811 dst->f[1] = floorf( src->f[1] + 0.5f );
812 dst->f[2] = floorf( src->f[2] + 0.5f );
813 dst->f[3] = floorf( src->f[3] + 0.5f );
814 }
815
816 static void
817 micro_shl(
818 union tgsi_exec_channel *dst,
819 const union tgsi_exec_channel *src0,
820 const union tgsi_exec_channel *src1 )
821 {
822 dst->i[0] = src0->i[0] << src1->i[0];
823 dst->i[1] = src0->i[1] << src1->i[1];
824 dst->i[2] = src0->i[2] << src1->i[2];
825 dst->i[3] = src0->i[3] << src1->i[3];
826 }
827
828 static void
829 micro_ishr(
830 union tgsi_exec_channel *dst,
831 const union tgsi_exec_channel *src0,
832 const union tgsi_exec_channel *src1 )
833 {
834 dst->i[0] = src0->i[0] >> src1->i[0];
835 dst->i[1] = src0->i[1] >> src1->i[1];
836 dst->i[2] = src0->i[2] >> src1->i[2];
837 dst->i[3] = src0->i[3] >> src1->i[3];
838 }
839
840 static void
841 micro_trunc(
842 union tgsi_exec_channel *dst,
843 const union tgsi_exec_channel *src0 )
844 {
845 dst->f[0] = (float) (int) src0->f[0];
846 dst->f[1] = (float) (int) src0->f[1];
847 dst->f[2] = (float) (int) src0->f[2];
848 dst->f[3] = (float) (int) src0->f[3];
849 }
850
851 static void
852 micro_ushr(
853 union tgsi_exec_channel *dst,
854 const union tgsi_exec_channel *src0,
855 const union tgsi_exec_channel *src1 )
856 {
857 dst->u[0] = src0->u[0] >> src1->u[0];
858 dst->u[1] = src0->u[1] >> src1->u[1];
859 dst->u[2] = src0->u[2] >> src1->u[2];
860 dst->u[3] = src0->u[3] >> src1->u[3];
861 }
862
863 static void
864 micro_sin(
865 union tgsi_exec_channel *dst,
866 const union tgsi_exec_channel *src )
867 {
868 dst->f[0] = sinf( src->f[0] );
869 dst->f[1] = sinf( src->f[1] );
870 dst->f[2] = sinf( src->f[2] );
871 dst->f[3] = sinf( src->f[3] );
872 }
873
874 static void
875 micro_sqrt( union tgsi_exec_channel *dst,
876 const union tgsi_exec_channel *src )
877 {
878 dst->f[0] = sqrtf( src->f[0] );
879 dst->f[1] = sqrtf( src->f[1] );
880 dst->f[2] = sqrtf( src->f[2] );
881 dst->f[3] = sqrtf( src->f[3] );
882 }
883
884 static void
885 micro_sub(
886 union tgsi_exec_channel *dst,
887 const union tgsi_exec_channel *src0,
888 const union tgsi_exec_channel *src1 )
889 {
890 dst->f[0] = src0->f[0] - src1->f[0];
891 dst->f[1] = src0->f[1] - src1->f[1];
892 dst->f[2] = src0->f[2] - src1->f[2];
893 dst->f[3] = src0->f[3] - src1->f[3];
894 }
895
896 static void
897 micro_u2f(
898 union tgsi_exec_channel *dst,
899 const union tgsi_exec_channel *src )
900 {
901 dst->f[0] = (float) src->u[0];
902 dst->f[1] = (float) src->u[1];
903 dst->f[2] = (float) src->u[2];
904 dst->f[3] = (float) src->u[3];
905 }
906
907 static void
908 micro_xor(
909 union tgsi_exec_channel *dst,
910 const union tgsi_exec_channel *src0,
911 const union tgsi_exec_channel *src1 )
912 {
913 dst->u[0] = src0->u[0] ^ src1->u[0];
914 dst->u[1] = src0->u[1] ^ src1->u[1];
915 dst->u[2] = src0->u[2] ^ src1->u[2];
916 dst->u[3] = src0->u[3] ^ src1->u[3];
917 }
918
919 static void
920 fetch_src_file_channel(
921 const struct tgsi_exec_machine *mach,
922 const uint file,
923 const uint swizzle,
924 const union tgsi_exec_channel *index,
925 union tgsi_exec_channel *chan )
926 {
927 switch( swizzle ) {
928 case TGSI_EXTSWIZZLE_X:
929 case TGSI_EXTSWIZZLE_Y:
930 case TGSI_EXTSWIZZLE_Z:
931 case TGSI_EXTSWIZZLE_W:
932 switch( file ) {
933 case TGSI_FILE_CONSTANT:
934 chan->f[0] = mach->Consts[index->i[0]][swizzle];
935 chan->f[1] = mach->Consts[index->i[1]][swizzle];
936 chan->f[2] = mach->Consts[index->i[2]][swizzle];
937 chan->f[3] = mach->Consts[index->i[3]][swizzle];
938 break;
939
940 case TGSI_FILE_INPUT:
941 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
942 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
943 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
944 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
945 break;
946
947 case TGSI_FILE_TEMPORARY:
948 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
949 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
950 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
951 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
952 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
953 break;
954
955 case TGSI_FILE_IMMEDIATE:
956 assert( index->i[0] < (int) mach->ImmLimit );
957 chan->f[0] = mach->Imms[index->i[0]][swizzle];
958 assert( index->i[1] < (int) mach->ImmLimit );
959 chan->f[1] = mach->Imms[index->i[1]][swizzle];
960 assert( index->i[2] < (int) mach->ImmLimit );
961 chan->f[2] = mach->Imms[index->i[2]][swizzle];
962 assert( index->i[3] < (int) mach->ImmLimit );
963 chan->f[3] = mach->Imms[index->i[3]][swizzle];
964 break;
965
966 case TGSI_FILE_ADDRESS:
967 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
968 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
969 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
970 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
971 break;
972
973 case TGSI_FILE_OUTPUT:
974 /* vertex/fragment output vars can be read too */
975 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
976 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
977 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
978 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
979 break;
980
981 default:
982 assert( 0 );
983 }
984 break;
985
986 case TGSI_EXTSWIZZLE_ZERO:
987 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
988 break;
989
990 case TGSI_EXTSWIZZLE_ONE:
991 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
992 break;
993
994 default:
995 assert( 0 );
996 }
997 }
998
999 static void
1000 fetch_source(
1001 const struct tgsi_exec_machine *mach,
1002 union tgsi_exec_channel *chan,
1003 const struct tgsi_full_src_register *reg,
1004 const uint chan_index )
1005 {
1006 union tgsi_exec_channel index;
1007 uint swizzle;
1008
1009 index.i[0] =
1010 index.i[1] =
1011 index.i[2] =
1012 index.i[3] = reg->SrcRegister.Index;
1013
1014 if (reg->SrcRegister.Indirect) {
1015 union tgsi_exec_channel index2;
1016 union tgsi_exec_channel indir_index;
1017
1018 index2.i[0] =
1019 index2.i[1] =
1020 index2.i[2] =
1021 index2.i[3] = reg->SrcRegisterInd.Index;
1022
1023 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1024 fetch_src_file_channel(
1025 mach,
1026 reg->SrcRegisterInd.File,
1027 swizzle,
1028 &index2,
1029 &indir_index );
1030
1031 index.i[0] += indir_index.i[0];
1032 index.i[1] += indir_index.i[1];
1033 index.i[2] += indir_index.i[2];
1034 index.i[3] += indir_index.i[3];
1035 }
1036
1037 if( reg->SrcRegister.Dimension ) {
1038 switch( reg->SrcRegister.File ) {
1039 case TGSI_FILE_INPUT:
1040 index.i[0] *= 17;
1041 index.i[1] *= 17;
1042 index.i[2] *= 17;
1043 index.i[3] *= 17;
1044 break;
1045 case TGSI_FILE_CONSTANT:
1046 index.i[0] *= 4096;
1047 index.i[1] *= 4096;
1048 index.i[2] *= 4096;
1049 index.i[3] *= 4096;
1050 break;
1051 default:
1052 assert( 0 );
1053 }
1054
1055 index.i[0] += reg->SrcRegisterDim.Index;
1056 index.i[1] += reg->SrcRegisterDim.Index;
1057 index.i[2] += reg->SrcRegisterDim.Index;
1058 index.i[3] += reg->SrcRegisterDim.Index;
1059
1060 if (reg->SrcRegisterDim.Indirect) {
1061 union tgsi_exec_channel index2;
1062 union tgsi_exec_channel indir_index;
1063
1064 index2.i[0] =
1065 index2.i[1] =
1066 index2.i[2] =
1067 index2.i[3] = reg->SrcRegisterDimInd.Index;
1068
1069 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1070 fetch_src_file_channel(
1071 mach,
1072 reg->SrcRegisterDimInd.File,
1073 swizzle,
1074 &index2,
1075 &indir_index );
1076
1077 index.i[0] += indir_index.i[0];
1078 index.i[1] += indir_index.i[1];
1079 index.i[2] += indir_index.i[2];
1080 index.i[3] += indir_index.i[3];
1081 }
1082 }
1083
1084 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1085 fetch_src_file_channel(
1086 mach,
1087 reg->SrcRegister.File,
1088 swizzle,
1089 &index,
1090 chan );
1091
1092 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1093 case TGSI_UTIL_SIGN_CLEAR:
1094 micro_abs( chan, chan );
1095 break;
1096
1097 case TGSI_UTIL_SIGN_SET:
1098 micro_abs( chan, chan );
1099 micro_neg( chan, chan );
1100 break;
1101
1102 case TGSI_UTIL_SIGN_TOGGLE:
1103 micro_neg( chan, chan );
1104 break;
1105
1106 case TGSI_UTIL_SIGN_KEEP:
1107 break;
1108 }
1109
1110 if (reg->SrcRegisterExtMod.Complement) {
1111 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1112 }
1113 }
1114
1115 static void
1116 store_dest(
1117 struct tgsi_exec_machine *mach,
1118 const union tgsi_exec_channel *chan,
1119 const struct tgsi_full_dst_register *reg,
1120 const struct tgsi_full_instruction *inst,
1121 uint chan_index )
1122 {
1123 uint i;
1124 union tgsi_exec_channel null;
1125 union tgsi_exec_channel *dst;
1126 uint execmask = mach->ExecMask;
1127
1128 switch (reg->DstRegister.File) {
1129 case TGSI_FILE_NULL:
1130 dst = &null;
1131 break;
1132
1133 case TGSI_FILE_OUTPUT:
1134 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1135 + reg->DstRegister.Index].xyzw[chan_index];
1136 break;
1137
1138 case TGSI_FILE_TEMPORARY:
1139 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1140 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1141 break;
1142
1143 case TGSI_FILE_ADDRESS:
1144 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1145 break;
1146
1147 default:
1148 assert( 0 );
1149 return;
1150 }
1151
1152 if (inst->InstructionExtNv.CondFlowEnable) {
1153 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1154 uint swizzle;
1155 uint shift;
1156 uint mask;
1157 uint test;
1158
1159 /* Only CC0 supported.
1160 */
1161 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1162
1163 switch (chan_index) {
1164 case CHAN_X:
1165 swizzle = inst->InstructionExtNv.CondSwizzleX;
1166 break;
1167 case CHAN_Y:
1168 swizzle = inst->InstructionExtNv.CondSwizzleY;
1169 break;
1170 case CHAN_Z:
1171 swizzle = inst->InstructionExtNv.CondSwizzleZ;
1172 break;
1173 case CHAN_W:
1174 swizzle = inst->InstructionExtNv.CondSwizzleW;
1175 break;
1176 default:
1177 assert( 0 );
1178 return;
1179 }
1180
1181 switch (swizzle) {
1182 case TGSI_SWIZZLE_X:
1183 shift = TGSI_EXEC_CC_X_SHIFT;
1184 mask = TGSI_EXEC_CC_X_MASK;
1185 break;
1186 case TGSI_SWIZZLE_Y:
1187 shift = TGSI_EXEC_CC_Y_SHIFT;
1188 mask = TGSI_EXEC_CC_Y_MASK;
1189 break;
1190 case TGSI_SWIZZLE_Z:
1191 shift = TGSI_EXEC_CC_Z_SHIFT;
1192 mask = TGSI_EXEC_CC_Z_MASK;
1193 break;
1194 case TGSI_SWIZZLE_W:
1195 shift = TGSI_EXEC_CC_W_SHIFT;
1196 mask = TGSI_EXEC_CC_W_MASK;
1197 break;
1198 default:
1199 assert( 0 );
1200 return;
1201 }
1202
1203 switch (inst->InstructionExtNv.CondMask) {
1204 case TGSI_CC_GT:
1205 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1206 for (i = 0; i < QUAD_SIZE; i++)
1207 if (cc->u[i] & test)
1208 execmask &= ~(1 << i);
1209 break;
1210
1211 case TGSI_CC_EQ:
1212 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1213 for (i = 0; i < QUAD_SIZE; i++)
1214 if (cc->u[i] & test)
1215 execmask &= ~(1 << i);
1216 break;
1217
1218 case TGSI_CC_LT:
1219 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1220 for (i = 0; i < QUAD_SIZE; i++)
1221 if (cc->u[i] & test)
1222 execmask &= ~(1 << i);
1223 break;
1224
1225 case TGSI_CC_GE:
1226 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1227 for (i = 0; i < QUAD_SIZE; i++)
1228 if (cc->u[i] & test)
1229 execmask &= ~(1 << i);
1230 break;
1231
1232 case TGSI_CC_LE:
1233 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1234 for (i = 0; i < QUAD_SIZE; i++)
1235 if (cc->u[i] & test)
1236 execmask &= ~(1 << i);
1237 break;
1238
1239 case TGSI_CC_NE:
1240 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1241 for (i = 0; i < QUAD_SIZE; i++)
1242 if (cc->u[i] & test)
1243 execmask &= ~(1 << i);
1244 break;
1245
1246 case TGSI_CC_TR:
1247 break;
1248
1249 case TGSI_CC_FL:
1250 for (i = 0; i < QUAD_SIZE; i++)
1251 execmask &= ~(1 << i);
1252 break;
1253
1254 default:
1255 assert( 0 );
1256 return;
1257 }
1258 }
1259
1260 switch (inst->Instruction.Saturate) {
1261 case TGSI_SAT_NONE:
1262 for (i = 0; i < QUAD_SIZE; i++)
1263 if (execmask & (1 << i))
1264 dst->i[i] = chan->i[i];
1265 break;
1266
1267 case TGSI_SAT_ZERO_ONE:
1268 for (i = 0; i < QUAD_SIZE; i++)
1269 if (execmask & (1 << i)) {
1270 if (chan->f[i] < 0.0f)
1271 dst->f[i] = 0.0f;
1272 else if (chan->f[i] > 1.0f)
1273 dst->f[i] = 1.0f;
1274 else
1275 dst->i[i] = chan->i[i];
1276 }
1277 break;
1278
1279 case TGSI_SAT_MINUS_PLUS_ONE:
1280 for (i = 0; i < QUAD_SIZE; i++)
1281 if (execmask & (1 << i)) {
1282 if (chan->f[i] < -1.0f)
1283 dst->f[i] = -1.0f;
1284 else if (chan->f[i] > 1.0f)
1285 dst->f[i] = 1.0f;
1286 else
1287 dst->i[i] = chan->i[i];
1288 }
1289 break;
1290
1291 default:
1292 assert( 0 );
1293 }
1294
1295 if (inst->InstructionExtNv.CondDstUpdate) {
1296 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1297 uint shift;
1298 uint mask;
1299
1300 /* Only CC0 supported.
1301 */
1302 assert( inst->InstructionExtNv.CondDstIndex < 1 );
1303
1304 switch (chan_index) {
1305 case CHAN_X:
1306 shift = TGSI_EXEC_CC_X_SHIFT;
1307 mask = ~TGSI_EXEC_CC_X_MASK;
1308 break;
1309 case CHAN_Y:
1310 shift = TGSI_EXEC_CC_Y_SHIFT;
1311 mask = ~TGSI_EXEC_CC_Y_MASK;
1312 break;
1313 case CHAN_Z:
1314 shift = TGSI_EXEC_CC_Z_SHIFT;
1315 mask = ~TGSI_EXEC_CC_Z_MASK;
1316 break;
1317 case CHAN_W:
1318 shift = TGSI_EXEC_CC_W_SHIFT;
1319 mask = ~TGSI_EXEC_CC_W_MASK;
1320 break;
1321 default:
1322 assert( 0 );
1323 return;
1324 }
1325
1326 for (i = 0; i < QUAD_SIZE; i++)
1327 if (execmask & (1 << i)) {
1328 cc->u[i] &= mask;
1329 if (dst->f[i] < 0.0f)
1330 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1331 else if (dst->f[i] > 0.0f)
1332 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1333 else if (dst->f[i] == 0.0f)
1334 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1335 else
1336 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1337 }
1338 }
1339 }
1340
1341 #define FETCH(VAL,INDEX,CHAN)\
1342 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1343
1344 #define STORE(VAL,INDEX,CHAN)\
1345 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1346
1347
1348 /**
1349 * Execute ARB-style KIL which is predicated by a src register.
1350 * Kill fragment if any of the four values is less than zero.
1351 */
1352 static void
1353 exec_kil(struct tgsi_exec_machine *mach,
1354 const struct tgsi_full_instruction *inst)
1355 {
1356 uint uniquemask;
1357 uint chan_index;
1358 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1359 union tgsi_exec_channel r[1];
1360
1361 /* This mask stores component bits that were already tested. Note that
1362 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1363 * tested. */
1364 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1365
1366 for (chan_index = 0; chan_index < 4; chan_index++)
1367 {
1368 uint swizzle;
1369 uint i;
1370
1371 /* unswizzle channel */
1372 swizzle = tgsi_util_get_full_src_register_extswizzle (
1373 &inst->FullSrcRegisters[0],
1374 chan_index);
1375
1376 /* check if the component has not been already tested */
1377 if (uniquemask & (1 << swizzle))
1378 continue;
1379 uniquemask |= 1 << swizzle;
1380
1381 FETCH(&r[0], 0, chan_index);
1382 for (i = 0; i < 4; i++)
1383 if (r[0].f[i] < 0.0f)
1384 kilmask |= 1 << i;
1385 }
1386
1387 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1388 }
1389
1390 /**
1391 * Execute NVIDIA-style KIL which is predicated by a condition code.
1392 * Kill fragment if the condition code is TRUE.
1393 */
1394 static void
1395 exec_kilp(struct tgsi_exec_machine *mach,
1396 const struct tgsi_full_instruction *inst)
1397 {
1398 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1399
1400 if (inst->InstructionExtNv.CondFlowEnable) {
1401 uint swizzle[4];
1402 uint chan_index;
1403
1404 kilmask = 0x0;
1405
1406 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1407 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1408 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1409 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1410
1411 for (chan_index = 0; chan_index < 4; chan_index++)
1412 {
1413 uint i;
1414
1415 for (i = 0; i < 4; i++) {
1416 /* TODO: evaluate the condition code */
1417 if (0)
1418 kilmask |= 1 << i;
1419 }
1420 }
1421 }
1422 else {
1423 /* "unconditional" kil */
1424 kilmask = mach->ExecMask;
1425 }
1426 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1427 }
1428
1429
1430 /*
1431 * Fetch a texel using STR texture coordinates.
1432 */
1433 static void
1434 fetch_texel( struct tgsi_sampler *sampler,
1435 const union tgsi_exec_channel *s,
1436 const union tgsi_exec_channel *t,
1437 const union tgsi_exec_channel *p,
1438 float lodbias, /* XXX should be float[4] */
1439 union tgsi_exec_channel *r,
1440 union tgsi_exec_channel *g,
1441 union tgsi_exec_channel *b,
1442 union tgsi_exec_channel *a )
1443 {
1444 uint j;
1445 float rgba[NUM_CHANNELS][QUAD_SIZE];
1446
1447 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1448
1449 for (j = 0; j < 4; j++) {
1450 r->f[j] = rgba[0][j];
1451 g->f[j] = rgba[1][j];
1452 b->f[j] = rgba[2][j];
1453 a->f[j] = rgba[3][j];
1454 }
1455 }
1456
1457
1458 static void
1459 exec_tex(struct tgsi_exec_machine *mach,
1460 const struct tgsi_full_instruction *inst,
1461 boolean biasLod,
1462 boolean projected)
1463 {
1464 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1465 union tgsi_exec_channel r[8];
1466 uint chan_index;
1467 float lodBias;
1468
1469 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1470
1471 switch (inst->InstructionExtTexture.Texture) {
1472 case TGSI_TEXTURE_1D:
1473
1474 FETCH(&r[0], 0, CHAN_X);
1475
1476 if (projected) {
1477 FETCH(&r[1], 0, CHAN_W);
1478 micro_div( &r[0], &r[0], &r[1] );
1479 }
1480
1481 if (biasLod) {
1482 FETCH(&r[1], 0, CHAN_W);
1483 lodBias = r[2].f[0];
1484 }
1485 else
1486 lodBias = 0.0;
1487
1488 fetch_texel(&mach->Samplers[unit],
1489 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1490 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1491 break;
1492
1493 case TGSI_TEXTURE_2D:
1494 case TGSI_TEXTURE_RECT:
1495
1496 FETCH(&r[0], 0, CHAN_X);
1497 FETCH(&r[1], 0, CHAN_Y);
1498 FETCH(&r[2], 0, CHAN_Z);
1499
1500 if (projected) {
1501 FETCH(&r[3], 0, CHAN_W);
1502 micro_div( &r[0], &r[0], &r[3] );
1503 micro_div( &r[1], &r[1], &r[3] );
1504 micro_div( &r[2], &r[2], &r[3] );
1505 }
1506
1507 if (biasLod) {
1508 FETCH(&r[3], 0, CHAN_W);
1509 lodBias = r[3].f[0];
1510 }
1511 else
1512 lodBias = 0.0;
1513
1514 fetch_texel(&mach->Samplers[unit],
1515 &r[0], &r[1], &r[2], lodBias, /* inputs */
1516 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1517 break;
1518
1519 case TGSI_TEXTURE_3D:
1520 case TGSI_TEXTURE_CUBE:
1521
1522 FETCH(&r[0], 0, CHAN_X);
1523 FETCH(&r[1], 0, CHAN_Y);
1524 FETCH(&r[2], 0, CHAN_Z);
1525
1526 if (projected) {
1527 FETCH(&r[3], 0, CHAN_W);
1528 micro_div( &r[0], &r[0], &r[3] );
1529 micro_div( &r[1], &r[1], &r[3] );
1530 micro_div( &r[2], &r[2], &r[3] );
1531 }
1532
1533 if (biasLod) {
1534 FETCH(&r[3], 0, CHAN_W);
1535 lodBias = r[3].f[0];
1536 }
1537 else
1538 lodBias = 0.0;
1539
1540 fetch_texel(&mach->Samplers[unit],
1541 &r[0], &r[1], &r[2], lodBias,
1542 &r[0], &r[1], &r[2], &r[3]);
1543 break;
1544
1545 default:
1546 assert (0);
1547 }
1548
1549 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1550 STORE( &r[chan_index], 0, chan_index );
1551 }
1552 }
1553
1554
1555 /**
1556 * Evaluate a constant-valued coefficient at the position of the
1557 * current quad.
1558 */
1559 static void
1560 eval_constant_coef(
1561 struct tgsi_exec_machine *mach,
1562 unsigned attrib,
1563 unsigned chan )
1564 {
1565 unsigned i;
1566
1567 for( i = 0; i < QUAD_SIZE; i++ ) {
1568 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1569 }
1570 }
1571
1572 /**
1573 * Evaluate a linear-valued coefficient at the position of the
1574 * current quad.
1575 */
1576 static void
1577 eval_linear_coef(
1578 struct tgsi_exec_machine *mach,
1579 unsigned attrib,
1580 unsigned chan )
1581 {
1582 const float x = mach->QuadPos.xyzw[0].f[0];
1583 const float y = mach->QuadPos.xyzw[1].f[0];
1584 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1585 const float dady = mach->InterpCoefs[attrib].dady[chan];
1586 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1587 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1588 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1589 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1590 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1591 }
1592
1593 /**
1594 * Evaluate a perspective-valued coefficient at the position of the
1595 * current quad.
1596 */
1597 static void
1598 eval_perspective_coef(
1599 struct tgsi_exec_machine *mach,
1600 unsigned attrib,
1601 unsigned chan )
1602 {
1603 const float x = mach->QuadPos.xyzw[0].f[0];
1604 const float y = mach->QuadPos.xyzw[1].f[0];
1605 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1606 const float dady = mach->InterpCoefs[attrib].dady[chan];
1607 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1608 const float *w = mach->QuadPos.xyzw[3].f;
1609 /* divide by W here */
1610 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1611 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1612 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1613 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1614 }
1615
1616
1617 typedef void (* eval_coef_func)(
1618 struct tgsi_exec_machine *mach,
1619 unsigned attrib,
1620 unsigned chan );
1621
1622 static void
1623 exec_declaration(
1624 struct tgsi_exec_machine *mach,
1625 const struct tgsi_full_declaration *decl )
1626 {
1627 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1628 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1629 unsigned first, last, mask;
1630 eval_coef_func eval;
1631
1632 first = decl->DeclarationRange.First;
1633 last = decl->DeclarationRange.Last;
1634 mask = decl->Declaration.UsageMask;
1635
1636 switch( decl->Declaration.Interpolate ) {
1637 case TGSI_INTERPOLATE_CONSTANT:
1638 eval = eval_constant_coef;
1639 break;
1640
1641 case TGSI_INTERPOLATE_LINEAR:
1642 eval = eval_linear_coef;
1643 break;
1644
1645 case TGSI_INTERPOLATE_PERSPECTIVE:
1646 eval = eval_perspective_coef;
1647 break;
1648
1649 default:
1650 assert( 0 );
1651 }
1652
1653 if( mask == TGSI_WRITEMASK_XYZW ) {
1654 unsigned i, j;
1655
1656 for( i = first; i <= last; i++ ) {
1657 for( j = 0; j < NUM_CHANNELS; j++ ) {
1658 eval( mach, i, j );
1659 }
1660 }
1661 }
1662 else {
1663 unsigned i, j;
1664
1665 for( j = 0; j < NUM_CHANNELS; j++ ) {
1666 if( mask & (1 << j) ) {
1667 for( i = first; i <= last; i++ ) {
1668 eval( mach, i, j );
1669 }
1670 }
1671 }
1672 }
1673 }
1674 }
1675 }
1676
1677 static void
1678 exec_instruction(
1679 struct tgsi_exec_machine *mach,
1680 const struct tgsi_full_instruction *inst,
1681 int *pc )
1682 {
1683 uint chan_index;
1684 union tgsi_exec_channel r[8];
1685
1686 (*pc)++;
1687
1688 switch (inst->Instruction.Opcode) {
1689 case TGSI_OPCODE_ARL:
1690 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1691 FETCH( &r[0], 0, chan_index );
1692 micro_f2it( &r[0], &r[0] );
1693 STORE( &r[0], 0, chan_index );
1694 }
1695 break;
1696
1697 case TGSI_OPCODE_MOV:
1698 case TGSI_OPCODE_SWZ:
1699 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1700 FETCH( &r[0], 0, chan_index );
1701 STORE( &r[0], 0, chan_index );
1702 }
1703 break;
1704
1705 case TGSI_OPCODE_LIT:
1706 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1707 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1708 }
1709
1710 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1711 FETCH( &r[0], 0, CHAN_X );
1712 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1713 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1714 STORE( &r[0], 0, CHAN_Y );
1715 }
1716
1717 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1718 FETCH( &r[1], 0, CHAN_Y );
1719 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1720
1721 FETCH( &r[2], 0, CHAN_W );
1722 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1723 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1724 micro_pow( &r[1], &r[1], &r[2] );
1725 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1726 STORE( &r[0], 0, CHAN_Z );
1727 }
1728 }
1729
1730 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1731 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1732 }
1733 break;
1734
1735 case TGSI_OPCODE_RCP:
1736 /* TGSI_OPCODE_RECIP */
1737 FETCH( &r[0], 0, CHAN_X );
1738 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1739 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1740 STORE( &r[0], 0, chan_index );
1741 }
1742 break;
1743
1744 case TGSI_OPCODE_RSQ:
1745 /* TGSI_OPCODE_RECIPSQRT */
1746 FETCH( &r[0], 0, CHAN_X );
1747 micro_sqrt( &r[0], &r[0] );
1748 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1749 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1750 STORE( &r[0], 0, chan_index );
1751 }
1752 break;
1753
1754 case TGSI_OPCODE_EXP:
1755 FETCH( &r[0], 0, CHAN_X );
1756 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1757 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1758 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1759 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1760 }
1761 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1762 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1763 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1764 }
1765 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1766 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1767 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1768 }
1769 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1770 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1771 }
1772 break;
1773
1774 case TGSI_OPCODE_LOG:
1775 FETCH( &r[0], 0, CHAN_X );
1776 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1777 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1778 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1779 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1780 STORE( &r[0], 0, CHAN_X );
1781 }
1782 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1783 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1784 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1785 STORE( &r[0], 0, CHAN_Y );
1786 }
1787 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1788 STORE( &r[1], 0, CHAN_Z );
1789 }
1790 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1791 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1792 }
1793 break;
1794
1795 case TGSI_OPCODE_MUL:
1796 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1797 {
1798 FETCH(&r[0], 0, chan_index);
1799 FETCH(&r[1], 1, chan_index);
1800
1801 micro_mul( &r[0], &r[0], &r[1] );
1802
1803 STORE(&r[0], 0, chan_index);
1804 }
1805 break;
1806
1807 case TGSI_OPCODE_ADD:
1808 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1809 FETCH( &r[0], 0, chan_index );
1810 FETCH( &r[1], 1, chan_index );
1811 micro_add( &r[0], &r[0], &r[1] );
1812 STORE( &r[0], 0, chan_index );
1813 }
1814 break;
1815
1816 case TGSI_OPCODE_DP3:
1817 /* TGSI_OPCODE_DOT3 */
1818 FETCH( &r[0], 0, CHAN_X );
1819 FETCH( &r[1], 1, CHAN_X );
1820 micro_mul( &r[0], &r[0], &r[1] );
1821
1822 FETCH( &r[1], 0, CHAN_Y );
1823 FETCH( &r[2], 1, CHAN_Y );
1824 micro_mul( &r[1], &r[1], &r[2] );
1825 micro_add( &r[0], &r[0], &r[1] );
1826
1827 FETCH( &r[1], 0, CHAN_Z );
1828 FETCH( &r[2], 1, CHAN_Z );
1829 micro_mul( &r[1], &r[1], &r[2] );
1830 micro_add( &r[0], &r[0], &r[1] );
1831
1832 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1833 STORE( &r[0], 0, chan_index );
1834 }
1835 break;
1836
1837 case TGSI_OPCODE_DP4:
1838 /* TGSI_OPCODE_DOT4 */
1839 FETCH(&r[0], 0, CHAN_X);
1840 FETCH(&r[1], 1, CHAN_X);
1841
1842 micro_mul( &r[0], &r[0], &r[1] );
1843
1844 FETCH(&r[1], 0, CHAN_Y);
1845 FETCH(&r[2], 1, CHAN_Y);
1846
1847 micro_mul( &r[1], &r[1], &r[2] );
1848 micro_add( &r[0], &r[0], &r[1] );
1849
1850 FETCH(&r[1], 0, CHAN_Z);
1851 FETCH(&r[2], 1, CHAN_Z);
1852
1853 micro_mul( &r[1], &r[1], &r[2] );
1854 micro_add( &r[0], &r[0], &r[1] );
1855
1856 FETCH(&r[1], 0, CHAN_W);
1857 FETCH(&r[2], 1, CHAN_W);
1858
1859 micro_mul( &r[1], &r[1], &r[2] );
1860 micro_add( &r[0], &r[0], &r[1] );
1861
1862 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1863 STORE( &r[0], 0, chan_index );
1864 }
1865 break;
1866
1867 case TGSI_OPCODE_DST:
1868 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1869 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1870 }
1871
1872 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1873 FETCH( &r[0], 0, CHAN_Y );
1874 FETCH( &r[1], 1, CHAN_Y);
1875 micro_mul( &r[0], &r[0], &r[1] );
1876 STORE( &r[0], 0, CHAN_Y );
1877 }
1878
1879 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1880 FETCH( &r[0], 0, CHAN_Z );
1881 STORE( &r[0], 0, CHAN_Z );
1882 }
1883
1884 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1885 FETCH( &r[0], 1, CHAN_W );
1886 STORE( &r[0], 0, CHAN_W );
1887 }
1888 break;
1889
1890 case TGSI_OPCODE_MIN:
1891 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1892 FETCH(&r[0], 0, chan_index);
1893 FETCH(&r[1], 1, chan_index);
1894
1895 /* XXX use micro_min()?? */
1896 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1897
1898 STORE(&r[0], 0, chan_index);
1899 }
1900 break;
1901
1902 case TGSI_OPCODE_MAX:
1903 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1904 FETCH(&r[0], 0, chan_index);
1905 FETCH(&r[1], 1, chan_index);
1906
1907 /* XXX use micro_max()?? */
1908 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1909
1910 STORE(&r[0], 0, chan_index );
1911 }
1912 break;
1913
1914 case TGSI_OPCODE_SLT:
1915 /* TGSI_OPCODE_SETLT */
1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917 FETCH( &r[0], 0, chan_index );
1918 FETCH( &r[1], 1, chan_index );
1919 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1920 STORE( &r[0], 0, chan_index );
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_SGE:
1925 /* TGSI_OPCODE_SETGE */
1926 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1927 FETCH( &r[0], 0, chan_index );
1928 FETCH( &r[1], 1, chan_index );
1929 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1930 STORE( &r[0], 0, chan_index );
1931 }
1932 break;
1933
1934 case TGSI_OPCODE_MAD:
1935 /* TGSI_OPCODE_MADD */
1936 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1937 FETCH( &r[0], 0, chan_index );
1938 FETCH( &r[1], 1, chan_index );
1939 micro_mul( &r[0], &r[0], &r[1] );
1940 FETCH( &r[1], 2, chan_index );
1941 micro_add( &r[0], &r[0], &r[1] );
1942 STORE( &r[0], 0, chan_index );
1943 }
1944 break;
1945
1946 case TGSI_OPCODE_SUB:
1947 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1948 FETCH(&r[0], 0, chan_index);
1949 FETCH(&r[1], 1, chan_index);
1950
1951 micro_sub( &r[0], &r[0], &r[1] );
1952
1953 STORE(&r[0], 0, chan_index);
1954 }
1955 break;
1956
1957 case TGSI_OPCODE_LERP:
1958 /* TGSI_OPCODE_LRP */
1959 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1960 FETCH(&r[0], 0, chan_index);
1961 FETCH(&r[1], 1, chan_index);
1962 FETCH(&r[2], 2, chan_index);
1963
1964 micro_sub( &r[1], &r[1], &r[2] );
1965 micro_mul( &r[0], &r[0], &r[1] );
1966 micro_add( &r[0], &r[0], &r[2] );
1967
1968 STORE(&r[0], 0, chan_index);
1969 }
1970 break;
1971
1972 case TGSI_OPCODE_CND:
1973 assert (0);
1974 break;
1975
1976 case TGSI_OPCODE_CND0:
1977 assert (0);
1978 break;
1979
1980 case TGSI_OPCODE_DOT2ADD:
1981 /* TGSI_OPCODE_DP2A */
1982 assert (0);
1983 break;
1984
1985 case TGSI_OPCODE_INDEX:
1986 assert (0);
1987 break;
1988
1989 case TGSI_OPCODE_NEGATE:
1990 assert (0);
1991 break;
1992
1993 case TGSI_OPCODE_FRAC:
1994 /* TGSI_OPCODE_FRC */
1995 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1996 FETCH( &r[0], 0, chan_index );
1997 micro_frc( &r[0], &r[0] );
1998 STORE( &r[0], 0, chan_index );
1999 }
2000 break;
2001
2002 case TGSI_OPCODE_CLAMP:
2003 assert (0);
2004 break;
2005
2006 case TGSI_OPCODE_FLOOR:
2007 /* TGSI_OPCODE_FLR */
2008 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2009 FETCH( &r[0], 0, chan_index );
2010 micro_flr( &r[0], &r[0] );
2011 STORE( &r[0], 0, chan_index );
2012 }
2013 break;
2014
2015 case TGSI_OPCODE_ROUND:
2016 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2017 FETCH( &r[0], 0, chan_index );
2018 micro_rnd( &r[0], &r[0] );
2019 STORE( &r[0], 0, chan_index );
2020 }
2021 break;
2022
2023 case TGSI_OPCODE_EXPBASE2:
2024 /* TGSI_OPCODE_EX2 */
2025 FETCH(&r[0], 0, CHAN_X);
2026
2027 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2028
2029 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2030 STORE( &r[0], 0, chan_index );
2031 }
2032 break;
2033
2034 case TGSI_OPCODE_LOGBASE2:
2035 /* TGSI_OPCODE_LG2 */
2036 FETCH( &r[0], 0, CHAN_X );
2037 micro_lg2( &r[0], &r[0] );
2038 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2039 STORE( &r[0], 0, chan_index );
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_POWER:
2044 /* TGSI_OPCODE_POW */
2045 FETCH(&r[0], 0, CHAN_X);
2046 FETCH(&r[1], 1, CHAN_X);
2047
2048 micro_pow( &r[0], &r[0], &r[1] );
2049
2050 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2051 STORE( &r[0], 0, chan_index );
2052 }
2053 break;
2054
2055 case TGSI_OPCODE_CROSSPRODUCT:
2056 /* TGSI_OPCODE_XPD */
2057 FETCH(&r[0], 0, CHAN_Y);
2058 FETCH(&r[1], 1, CHAN_Z);
2059
2060 micro_mul( &r[2], &r[0], &r[1] );
2061
2062 FETCH(&r[3], 0, CHAN_Z);
2063 FETCH(&r[4], 1, CHAN_Y);
2064
2065 micro_mul( &r[5], &r[3], &r[4] );
2066 micro_sub( &r[2], &r[2], &r[5] );
2067
2068 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2069 STORE( &r[2], 0, CHAN_X );
2070 }
2071
2072 FETCH(&r[2], 1, CHAN_X);
2073
2074 micro_mul( &r[3], &r[3], &r[2] );
2075
2076 FETCH(&r[5], 0, CHAN_X);
2077
2078 micro_mul( &r[1], &r[1], &r[5] );
2079 micro_sub( &r[3], &r[3], &r[1] );
2080
2081 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2082 STORE( &r[3], 0, CHAN_Y );
2083 }
2084
2085 micro_mul( &r[5], &r[5], &r[4] );
2086 micro_mul( &r[0], &r[0], &r[2] );
2087 micro_sub( &r[5], &r[5], &r[0] );
2088
2089 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2090 STORE( &r[5], 0, CHAN_Z );
2091 }
2092
2093 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2094 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2095 }
2096 break;
2097
2098 case TGSI_OPCODE_MULTIPLYMATRIX:
2099 assert (0);
2100 break;
2101
2102 case TGSI_OPCODE_ABS:
2103 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2104 FETCH(&r[0], 0, chan_index);
2105
2106 micro_abs( &r[0], &r[0] );
2107
2108 STORE(&r[0], 0, chan_index);
2109 }
2110 break;
2111
2112 case TGSI_OPCODE_RCC:
2113 assert (0);
2114 break;
2115
2116 case TGSI_OPCODE_DPH:
2117 FETCH(&r[0], 0, CHAN_X);
2118 FETCH(&r[1], 1, CHAN_X);
2119
2120 micro_mul( &r[0], &r[0], &r[1] );
2121
2122 FETCH(&r[1], 0, CHAN_Y);
2123 FETCH(&r[2], 1, CHAN_Y);
2124
2125 micro_mul( &r[1], &r[1], &r[2] );
2126 micro_add( &r[0], &r[0], &r[1] );
2127
2128 FETCH(&r[1], 0, CHAN_Z);
2129 FETCH(&r[2], 1, CHAN_Z);
2130
2131 micro_mul( &r[1], &r[1], &r[2] );
2132 micro_add( &r[0], &r[0], &r[1] );
2133
2134 FETCH(&r[1], 1, CHAN_W);
2135
2136 micro_add( &r[0], &r[0], &r[1] );
2137
2138 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2139 STORE( &r[0], 0, chan_index );
2140 }
2141 break;
2142
2143 case TGSI_OPCODE_COS:
2144 FETCH(&r[0], 0, CHAN_X);
2145
2146 micro_cos( &r[0], &r[0] );
2147
2148 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2149 STORE( &r[0], 0, chan_index );
2150 }
2151 break;
2152
2153 case TGSI_OPCODE_DDX:
2154 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2155 FETCH( &r[0], 0, chan_index );
2156 micro_ddx( &r[0], &r[0] );
2157 STORE( &r[0], 0, chan_index );
2158 }
2159 break;
2160
2161 case TGSI_OPCODE_DDY:
2162 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2163 FETCH( &r[0], 0, chan_index );
2164 micro_ddy( &r[0], &r[0] );
2165 STORE( &r[0], 0, chan_index );
2166 }
2167 break;
2168
2169 case TGSI_OPCODE_KILP:
2170 exec_kilp (mach, inst);
2171 break;
2172
2173 case TGSI_OPCODE_KIL:
2174 exec_kil (mach, inst);
2175 break;
2176
2177 case TGSI_OPCODE_PK2H:
2178 assert (0);
2179 break;
2180
2181 case TGSI_OPCODE_PK2US:
2182 assert (0);
2183 break;
2184
2185 case TGSI_OPCODE_PK4B:
2186 assert (0);
2187 break;
2188
2189 case TGSI_OPCODE_PK4UB:
2190 assert (0);
2191 break;
2192
2193 case TGSI_OPCODE_RFL:
2194 assert (0);
2195 break;
2196
2197 case TGSI_OPCODE_SEQ:
2198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2199 FETCH( &r[0], 0, chan_index );
2200 FETCH( &r[1], 1, chan_index );
2201 micro_eq( &r[0], &r[0], &r[1],
2202 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2203 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2204 STORE( &r[0], 0, chan_index );
2205 }
2206 break;
2207
2208 case TGSI_OPCODE_SFL:
2209 assert (0);
2210 break;
2211
2212 case TGSI_OPCODE_SGT:
2213 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2214 FETCH( &r[0], 0, chan_index );
2215 FETCH( &r[1], 1, chan_index );
2216 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2217 STORE( &r[0], 0, chan_index );
2218 }
2219 break;
2220
2221 case TGSI_OPCODE_SIN:
2222 FETCH( &r[0], 0, CHAN_X );
2223 micro_sin( &r[0], &r[0] );
2224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2225 STORE( &r[0], 0, chan_index );
2226 }
2227 break;
2228
2229 case TGSI_OPCODE_SLE:
2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2231 FETCH( &r[0], 0, chan_index );
2232 FETCH( &r[1], 1, chan_index );
2233 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2234 STORE( &r[0], 0, chan_index );
2235 }
2236 break;
2237
2238 case TGSI_OPCODE_SNE:
2239 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2240 FETCH( &r[0], 0, chan_index );
2241 FETCH( &r[1], 1, chan_index );
2242 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2243 STORE( &r[0], 0, chan_index );
2244 }
2245 break;
2246
2247 case TGSI_OPCODE_STR:
2248 assert (0);
2249 break;
2250
2251 case TGSI_OPCODE_TEX:
2252 /* simple texture lookup */
2253 /* src[0] = texcoord */
2254 /* src[1] = sampler unit */
2255 exec_tex(mach, inst, FALSE, FALSE);
2256 break;
2257
2258 case TGSI_OPCODE_TXB:
2259 /* Texture lookup with lod bias */
2260 /* src[0] = texcoord (src[0].w = LOD bias) */
2261 /* src[1] = sampler unit */
2262 exec_tex(mach, inst, TRUE, FALSE);
2263 break;
2264
2265 case TGSI_OPCODE_TXD:
2266 /* Texture lookup with explict partial derivatives */
2267 /* src[0] = texcoord */
2268 /* src[1] = d[strq]/dx */
2269 /* src[2] = d[strq]/dy */
2270 /* src[3] = sampler unit */
2271 assert (0);
2272 break;
2273
2274 case TGSI_OPCODE_TXL:
2275 /* Texture lookup with explit LOD */
2276 /* src[0] = texcoord (src[0].w = LOD) */
2277 /* src[1] = sampler unit */
2278 exec_tex(mach, inst, TRUE, FALSE);
2279 break;
2280
2281 case TGSI_OPCODE_TXP:
2282 /* Texture lookup with projection */
2283 /* src[0] = texcoord (src[0].w = projection) */
2284 /* src[1] = sampler unit */
2285 exec_tex(mach, inst, FALSE, TRUE);
2286 break;
2287
2288 case TGSI_OPCODE_UP2H:
2289 assert (0);
2290 break;
2291
2292 case TGSI_OPCODE_UP2US:
2293 assert (0);
2294 break;
2295
2296 case TGSI_OPCODE_UP4B:
2297 assert (0);
2298 break;
2299
2300 case TGSI_OPCODE_UP4UB:
2301 assert (0);
2302 break;
2303
2304 case TGSI_OPCODE_X2D:
2305 assert (0);
2306 break;
2307
2308 case TGSI_OPCODE_ARA:
2309 assert (0);
2310 break;
2311
2312 case TGSI_OPCODE_ARR:
2313 assert (0);
2314 break;
2315
2316 case TGSI_OPCODE_BRA:
2317 assert (0);
2318 break;
2319
2320 case TGSI_OPCODE_CAL:
2321 /* skip the call if no execution channels are enabled */
2322 if (mach->ExecMask) {
2323 /* do the call */
2324
2325 /* push the Cond, Loop, Cont stacks */
2326 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2327 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2328 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2329 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2330 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2331 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2332
2333 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2334 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2335
2336 /* note that PC was already incremented above */
2337 mach->CallStack[mach->CallStackTop++] = *pc;
2338 *pc = inst->InstructionExtLabel.Label;
2339 }
2340 break;
2341
2342 case TGSI_OPCODE_RET:
2343 mach->FuncMask &= ~mach->ExecMask;
2344 UPDATE_EXEC_MASK(mach);
2345
2346 if (mach->FuncMask == 0x0) {
2347 /* really return now (otherwise, keep executing */
2348
2349 if (mach->CallStackTop == 0) {
2350 /* returning from main() */
2351 *pc = -1;
2352 return;
2353 }
2354 *pc = mach->CallStack[--mach->CallStackTop];
2355
2356 /* pop the Cond, Loop, Cont stacks */
2357 assert(mach->CondStackTop > 0);
2358 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2359 assert(mach->LoopStackTop > 0);
2360 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2361 assert(mach->ContStackTop > 0);
2362 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2363 assert(mach->FuncStackTop > 0);
2364 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2365
2366 UPDATE_EXEC_MASK(mach);
2367 }
2368 break;
2369
2370 case TGSI_OPCODE_SSG:
2371 assert (0);
2372 break;
2373
2374 case TGSI_OPCODE_CMP:
2375 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2376 FETCH(&r[0], 0, chan_index);
2377 FETCH(&r[1], 1, chan_index);
2378 FETCH(&r[2], 2, chan_index);
2379
2380 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2381
2382 STORE(&r[0], 0, chan_index);
2383 }
2384 break;
2385
2386 case TGSI_OPCODE_SCS:
2387 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2388 FETCH( &r[0], 0, CHAN_X );
2389 }
2390 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2391 micro_cos( &r[1], &r[0] );
2392 STORE( &r[1], 0, CHAN_X );
2393 }
2394 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2395 micro_sin( &r[1], &r[0] );
2396 STORE( &r[1], 0, CHAN_Y );
2397 }
2398 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2399 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2400 }
2401 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2402 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2403 }
2404 break;
2405
2406 case TGSI_OPCODE_NRM:
2407 assert (0);
2408 break;
2409
2410 case TGSI_OPCODE_DIV:
2411 assert( 0 );
2412 break;
2413
2414 case TGSI_OPCODE_DP2:
2415 FETCH( &r[0], 0, CHAN_X );
2416 FETCH( &r[1], 1, CHAN_X );
2417 micro_mul( &r[0], &r[0], &r[1] );
2418
2419 FETCH( &r[1], 0, CHAN_Y );
2420 FETCH( &r[2], 1, CHAN_Y );
2421 micro_mul( &r[1], &r[1], &r[2] );
2422 micro_add( &r[0], &r[0], &r[1] );
2423
2424 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2425 STORE( &r[0], 0, chan_index );
2426 }
2427 break;
2428
2429 case TGSI_OPCODE_IF:
2430 /* push CondMask */
2431 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2432 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2433 FETCH( &r[0], 0, CHAN_X );
2434 /* update CondMask */
2435 if( ! r[0].u[0] ) {
2436 mach->CondMask &= ~0x1;
2437 }
2438 if( ! r[0].u[1] ) {
2439 mach->CondMask &= ~0x2;
2440 }
2441 if( ! r[0].u[2] ) {
2442 mach->CondMask &= ~0x4;
2443 }
2444 if( ! r[0].u[3] ) {
2445 mach->CondMask &= ~0x8;
2446 }
2447 UPDATE_EXEC_MASK(mach);
2448 /* Todo: If CondMask==0, jump to ELSE */
2449 break;
2450
2451 case TGSI_OPCODE_ELSE:
2452 /* invert CondMask wrt previous mask */
2453 {
2454 uint prevMask;
2455 assert(mach->CondStackTop > 0);
2456 prevMask = mach->CondStack[mach->CondStackTop - 1];
2457 mach->CondMask = ~mach->CondMask & prevMask;
2458 UPDATE_EXEC_MASK(mach);
2459 /* Todo: If CondMask==0, jump to ENDIF */
2460 }
2461 break;
2462
2463 case TGSI_OPCODE_ENDIF:
2464 /* pop CondMask */
2465 assert(mach->CondStackTop > 0);
2466 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2467 UPDATE_EXEC_MASK(mach);
2468 break;
2469
2470 case TGSI_OPCODE_END:
2471 /* halt execution */
2472 *pc = -1;
2473 break;
2474
2475 case TGSI_OPCODE_REP:
2476 assert (0);
2477 break;
2478
2479 case TGSI_OPCODE_ENDREP:
2480 assert (0);
2481 break;
2482
2483 case TGSI_OPCODE_PUSHA:
2484 assert (0);
2485 break;
2486
2487 case TGSI_OPCODE_POPA:
2488 assert (0);
2489 break;
2490
2491 case TGSI_OPCODE_CEIL:
2492 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2493 FETCH( &r[0], 0, chan_index );
2494 micro_ceil( &r[0], &r[0] );
2495 STORE( &r[0], 0, chan_index );
2496 }
2497 break;
2498
2499 case TGSI_OPCODE_I2F:
2500 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2501 FETCH( &r[0], 0, chan_index );
2502 micro_i2f( &r[0], &r[0] );
2503 STORE( &r[0], 0, chan_index );
2504 }
2505 break;
2506
2507 case TGSI_OPCODE_NOT:
2508 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2509 FETCH( &r[0], 0, chan_index );
2510 micro_not( &r[0], &r[0] );
2511 STORE( &r[0], 0, chan_index );
2512 }
2513 break;
2514
2515 case TGSI_OPCODE_TRUNC:
2516 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2517 FETCH( &r[0], 0, chan_index );
2518 micro_trunc( &r[0], &r[0] );
2519 STORE( &r[0], 0, chan_index );
2520 }
2521 break;
2522
2523 case TGSI_OPCODE_SHL:
2524 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2525 FETCH( &r[0], 0, chan_index );
2526 FETCH( &r[1], 1, chan_index );
2527 micro_shl( &r[0], &r[0], &r[1] );
2528 STORE( &r[0], 0, chan_index );
2529 }
2530 break;
2531
2532 case TGSI_OPCODE_SHR:
2533 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2534 FETCH( &r[0], 0, chan_index );
2535 FETCH( &r[1], 1, chan_index );
2536 micro_ishr( &r[0], &r[0], &r[1] );
2537 STORE( &r[0], 0, chan_index );
2538 }
2539 break;
2540
2541 case TGSI_OPCODE_AND:
2542 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2543 FETCH( &r[0], 0, chan_index );
2544 FETCH( &r[1], 1, chan_index );
2545 micro_and( &r[0], &r[0], &r[1] );
2546 STORE( &r[0], 0, chan_index );
2547 }
2548 break;
2549
2550 case TGSI_OPCODE_OR:
2551 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2552 FETCH( &r[0], 0, chan_index );
2553 FETCH( &r[1], 1, chan_index );
2554 micro_or( &r[0], &r[0], &r[1] );
2555 STORE( &r[0], 0, chan_index );
2556 }
2557 break;
2558
2559 case TGSI_OPCODE_MOD:
2560 assert (0);
2561 break;
2562
2563 case TGSI_OPCODE_XOR:
2564 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2565 FETCH( &r[0], 0, chan_index );
2566 FETCH( &r[1], 1, chan_index );
2567 micro_xor( &r[0], &r[0], &r[1] );
2568 STORE( &r[0], 0, chan_index );
2569 }
2570 break;
2571
2572 case TGSI_OPCODE_SAD:
2573 assert (0);
2574 break;
2575
2576 case TGSI_OPCODE_TXF:
2577 assert (0);
2578 break;
2579
2580 case TGSI_OPCODE_TXQ:
2581 assert (0);
2582 break;
2583
2584 case TGSI_OPCODE_EMIT:
2585 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2586 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2587 break;
2588
2589 case TGSI_OPCODE_ENDPRIM:
2590 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2591 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2592 break;
2593
2594 case TGSI_OPCODE_LOOP:
2595 /* fall-through (for now) */
2596 case TGSI_OPCODE_BGNLOOP2:
2597 /* push LoopMask and ContMasks */
2598 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2599 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2600 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2601 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2602 break;
2603
2604 case TGSI_OPCODE_ENDLOOP:
2605 /* fall-through (for now at least) */
2606 case TGSI_OPCODE_ENDLOOP2:
2607 /* Restore ContMask, but don't pop */
2608 assert(mach->ContStackTop > 0);
2609 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2610 UPDATE_EXEC_MASK(mach);
2611 if (mach->ExecMask) {
2612 /* repeat loop: jump to instruction just past BGNLOOP */
2613 *pc = inst->InstructionExtLabel.Label + 1;
2614 }
2615 else {
2616 /* exit loop: pop LoopMask */
2617 assert(mach->LoopStackTop > 0);
2618 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2619 /* pop ContMask */
2620 assert(mach->ContStackTop > 0);
2621 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2622 }
2623 UPDATE_EXEC_MASK(mach);
2624 break;
2625
2626 case TGSI_OPCODE_BRK:
2627 /* turn off loop channels for each enabled exec channel */
2628 mach->LoopMask &= ~mach->ExecMask;
2629 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2630 UPDATE_EXEC_MASK(mach);
2631 break;
2632
2633 case TGSI_OPCODE_CONT:
2634 /* turn off cont channels for each enabled exec channel */
2635 mach->ContMask &= ~mach->ExecMask;
2636 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2637 UPDATE_EXEC_MASK(mach);
2638 break;
2639
2640 case TGSI_OPCODE_BGNSUB:
2641 /* no-op */
2642 break;
2643
2644 case TGSI_OPCODE_ENDSUB:
2645 /* no-op */
2646 break;
2647
2648 case TGSI_OPCODE_NOISE1:
2649 assert( 0 );
2650 break;
2651
2652 case TGSI_OPCODE_NOISE2:
2653 assert( 0 );
2654 break;
2655
2656 case TGSI_OPCODE_NOISE3:
2657 assert( 0 );
2658 break;
2659
2660 case TGSI_OPCODE_NOISE4:
2661 assert( 0 );
2662 break;
2663
2664 case TGSI_OPCODE_NOP:
2665 break;
2666
2667 default:
2668 assert( 0 );
2669 }
2670 }
2671
2672
2673 /**
2674 * Run TGSI interpreter.
2675 * \return bitmask of "alive" quad components
2676 */
2677 uint
2678 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2679 {
2680 uint i;
2681 int pc = 0;
2682
2683 mach->CondMask = 0xf;
2684 mach->LoopMask = 0xf;
2685 mach->ContMask = 0xf;
2686 mach->FuncMask = 0xf;
2687 mach->ExecMask = 0xf;
2688
2689 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2690 assert(mach->CondStackTop == 0);
2691 assert(mach->LoopStackTop == 0);
2692 assert(mach->ContStackTop == 0);
2693 assert(mach->CallStackTop == 0);
2694
2695 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2696 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2697
2698 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2699 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2700 mach->Primitives[0] = 0;
2701 }
2702
2703 for (i = 0; i < QUAD_SIZE; i++) {
2704 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2705 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2706 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2707 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2708 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2709 }
2710
2711 /* execute declarations (interpolants) */
2712 for (i = 0; i < mach->NumDeclarations; i++) {
2713 exec_declaration( mach, mach->Declarations+i );
2714 }
2715
2716 /* execute instructions, until pc is set to -1 */
2717 while (pc != -1) {
2718 assert(pc < (int) mach->NumInstructions);
2719 exec_instruction( mach, mach->Instructions + pc, &pc );
2720 }
2721
2722 #if 0
2723 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2724 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2725 /*
2726 * Scale back depth component.
2727 */
2728 for (i = 0; i < 4; i++)
2729 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2730 }
2731 #endif
2732
2733 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2734 }
2735
2736