tgsi: Cosmetic changes.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 #define CHAN_X 0
67 #define CHAN_Y 1
68 #define CHAN_Z 2
69 #define CHAN_W 3
70
71 /*
72 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
73 */
74 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
75 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
76 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
77 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
78 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
79 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
80 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
81 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
82 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
83 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
84 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
85 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
86 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
87 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
88 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
89 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
90 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
91 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
92 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
93 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
94 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
95 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
96 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
97 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
98 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
99 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
100 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
101 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
102 #define TEMP_R0 TGSI_EXEC_TEMP_R0
103
104 #define IS_CHANNEL_ENABLED(INST, CHAN)\
105 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
106
107 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
108 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
109
110 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
111 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
112 if (IS_CHANNEL_ENABLED( INST, CHAN ))
113
114 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
117
118
119 /** The execution mask depends on the conditional mask and the loop mask */
120 #define UPDATE_EXEC_MASK(MACH) \
121 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
122
123 /**
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
127 */
128 void
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine *mach,
131 const struct tgsi_token *tokens,
132 uint numSamplers,
133 struct tgsi_sampler *samplers)
134 {
135 uint k;
136 struct tgsi_parse_context parse;
137 struct tgsi_exec_labels *labels = &mach->Labels;
138 struct tgsi_full_instruction *instructions;
139 struct tgsi_full_declaration *declarations;
140 uint maxInstructions = 10, numInstructions = 0;
141 uint maxDeclarations = 10, numDeclarations = 0;
142 uint instno = 0;
143
144 #if 0
145 tgsi_dump(tokens, 0);
146 #endif
147
148 mach->Tokens = tokens;
149 mach->Samplers = samplers;
150
151 k = tgsi_parse_init (&parse, mach->Tokens);
152 if (k != TGSI_PARSE_OK) {
153 debug_printf( "Problem parsing!\n" );
154 return;
155 }
156
157 mach->Processor = parse.FullHeader.Processor.Processor;
158 mach->ImmLimit = 0;
159 labels->count = 0;
160
161 declarations = (struct tgsi_full_declaration *)
162 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
163
164 if (!declarations) {
165 return;
166 }
167
168 instructions = (struct tgsi_full_instruction *)
169 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
170
171 if (!instructions) {
172 FREE( declarations );
173 return;
174 }
175
176 while( !tgsi_parse_end_of_tokens( &parse ) ) {
177 uint pointer = parse.Position;
178 uint i;
179
180 tgsi_parse_token( &parse );
181 switch( parse.FullToken.Token.Type ) {
182 case TGSI_TOKEN_TYPE_DECLARATION:
183 /* save expanded declaration */
184 if (numDeclarations == maxDeclarations) {
185 declarations = REALLOC(declarations,
186 maxDeclarations
187 * sizeof(struct tgsi_full_declaration),
188 (maxDeclarations + 10)
189 * sizeof(struct tgsi_full_declaration));
190 maxDeclarations += 10;
191 }
192 memcpy(declarations + numDeclarations,
193 &parse.FullToken.FullDeclaration,
194 sizeof(declarations[0]));
195 numDeclarations++;
196 break;
197
198 case TGSI_TOKEN_TYPE_IMMEDIATE:
199 {
200 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
201 assert( size % 4 == 0 );
202 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
203
204 for( i = 0; i < size; i++ ) {
205 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
206 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
207 }
208 mach->ImmLimit += size / 4;
209 }
210 break;
211
212 case TGSI_TOKEN_TYPE_INSTRUCTION:
213 assert( labels->count < MAX_LABELS );
214
215 labels->labels[labels->count][0] = instno;
216 labels->labels[labels->count][1] = pointer;
217 labels->count++;
218
219 /* save expanded instruction */
220 if (numInstructions == maxInstructions) {
221 instructions = REALLOC(instructions,
222 maxInstructions
223 * sizeof(struct tgsi_full_instruction),
224 (maxInstructions + 10)
225 * sizeof(struct tgsi_full_instruction));
226 maxInstructions += 10;
227 }
228 memcpy(instructions + numInstructions,
229 &parse.FullToken.FullInstruction,
230 sizeof(instructions[0]));
231 numInstructions++;
232 break;
233
234 default:
235 assert( 0 );
236 }
237 }
238 tgsi_parse_free (&parse);
239
240 if (mach->Declarations) {
241 FREE( mach->Declarations );
242 }
243 mach->Declarations = declarations;
244 mach->NumDeclarations = numDeclarations;
245
246 if (mach->Instructions) {
247 FREE( mach->Instructions );
248 }
249 mach->Instructions = instructions;
250 mach->NumInstructions = numInstructions;
251 }
252
253
254 void
255 tgsi_exec_machine_init(
256 struct tgsi_exec_machine *mach )
257 {
258 uint i;
259
260 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
261 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
262
263 /* Setup constants. */
264 for( i = 0; i < 4; i++ ) {
265 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
266 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
267 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
268 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
269 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
270 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
271 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
272 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
273 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
274 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
275 }
276 }
277
278
279 void
280 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
281 {
282 if (mach->Instructions) {
283 FREE(mach->Instructions);
284 mach->Instructions = NULL;
285 mach->NumInstructions = 0;
286 }
287 if (mach->Declarations) {
288 FREE(mach->Declarations);
289 mach->Declarations = NULL;
290 mach->NumDeclarations = 0;
291 }
292 }
293
294
295 static void
296 micro_abs(
297 union tgsi_exec_channel *dst,
298 const union tgsi_exec_channel *src )
299 {
300 dst->f[0] = fabsf( src->f[0] );
301 dst->f[1] = fabsf( src->f[1] );
302 dst->f[2] = fabsf( src->f[2] );
303 dst->f[3] = fabsf( src->f[3] );
304 }
305
306 static void
307 micro_add(
308 union tgsi_exec_channel *dst,
309 const union tgsi_exec_channel *src0,
310 const union tgsi_exec_channel *src1 )
311 {
312 dst->f[0] = src0->f[0] + src1->f[0];
313 dst->f[1] = src0->f[1] + src1->f[1];
314 dst->f[2] = src0->f[2] + src1->f[2];
315 dst->f[3] = src0->f[3] + src1->f[3];
316 }
317
318 static void
319 micro_iadd(
320 union tgsi_exec_channel *dst,
321 const union tgsi_exec_channel *src0,
322 const union tgsi_exec_channel *src1 )
323 {
324 dst->i[0] = src0->i[0] + src1->i[0];
325 dst->i[1] = src0->i[1] + src1->i[1];
326 dst->i[2] = src0->i[2] + src1->i[2];
327 dst->i[3] = src0->i[3] + src1->i[3];
328 }
329
330 static void
331 micro_and(
332 union tgsi_exec_channel *dst,
333 const union tgsi_exec_channel *src0,
334 const union tgsi_exec_channel *src1 )
335 {
336 dst->u[0] = src0->u[0] & src1->u[0];
337 dst->u[1] = src0->u[1] & src1->u[1];
338 dst->u[2] = src0->u[2] & src1->u[2];
339 dst->u[3] = src0->u[3] & src1->u[3];
340 }
341
342 static void
343 micro_ceil(
344 union tgsi_exec_channel *dst,
345 const union tgsi_exec_channel *src )
346 {
347 dst->f[0] = ceilf( src->f[0] );
348 dst->f[1] = ceilf( src->f[1] );
349 dst->f[2] = ceilf( src->f[2] );
350 dst->f[3] = ceilf( src->f[3] );
351 }
352
353 static void
354 micro_cos(
355 union tgsi_exec_channel *dst,
356 const union tgsi_exec_channel *src )
357 {
358 dst->f[0] = cosf( src->f[0] );
359 dst->f[1] = cosf( src->f[1] );
360 dst->f[2] = cosf( src->f[2] );
361 dst->f[3] = cosf( src->f[3] );
362 }
363
364 static void
365 micro_ddx(
366 union tgsi_exec_channel *dst,
367 const union tgsi_exec_channel *src )
368 {
369 dst->f[0] =
370 dst->f[1] =
371 dst->f[2] =
372 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
373 }
374
375 static void
376 micro_ddy(
377 union tgsi_exec_channel *dst,
378 const union tgsi_exec_channel *src )
379 {
380 dst->f[0] =
381 dst->f[1] =
382 dst->f[2] =
383 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
384 }
385
386 static void
387 micro_div(
388 union tgsi_exec_channel *dst,
389 const union tgsi_exec_channel *src0,
390 const union tgsi_exec_channel *src1 )
391 {
392 if (src1->f[0] != 0) {
393 dst->f[0] = src0->f[0] / src1->f[0];
394 }
395 if (src1->f[1] != 0) {
396 dst->f[1] = src0->f[1] / src1->f[1];
397 }
398 if (src1->f[2] != 0) {
399 dst->f[2] = src0->f[2] / src1->f[2];
400 }
401 if (src1->f[3] != 0) {
402 dst->f[3] = src0->f[3] / src1->f[3];
403 }
404 }
405
406 static void
407 micro_udiv(
408 union tgsi_exec_channel *dst,
409 const union tgsi_exec_channel *src0,
410 const union tgsi_exec_channel *src1 )
411 {
412 dst->u[0] = src0->u[0] / src1->u[0];
413 dst->u[1] = src0->u[1] / src1->u[1];
414 dst->u[2] = src0->u[2] / src1->u[2];
415 dst->u[3] = src0->u[3] / src1->u[3];
416 }
417
418 static void
419 micro_eq(
420 union tgsi_exec_channel *dst,
421 const union tgsi_exec_channel *src0,
422 const union tgsi_exec_channel *src1,
423 const union tgsi_exec_channel *src2,
424 const union tgsi_exec_channel *src3 )
425 {
426 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
427 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
428 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
429 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
430 }
431
432 static void
433 micro_ieq(
434 union tgsi_exec_channel *dst,
435 const union tgsi_exec_channel *src0,
436 const union tgsi_exec_channel *src1,
437 const union tgsi_exec_channel *src2,
438 const union tgsi_exec_channel *src3 )
439 {
440 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
441 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
442 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
443 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
444 }
445
446 static void
447 micro_exp2(
448 union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src)
450 {
451 dst->f[0] = powf( 2.0f, src->f[0] );
452 dst->f[1] = powf( 2.0f, src->f[1] );
453 dst->f[2] = powf( 2.0f, src->f[2] );
454 dst->f[3] = powf( 2.0f, src->f[3] );
455 }
456
457 static void
458 micro_f2it(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src )
461 {
462 dst->i[0] = (int) src->f[0];
463 dst->i[1] = (int) src->f[1];
464 dst->i[2] = (int) src->f[2];
465 dst->i[3] = (int) src->f[3];
466 }
467
468 static void
469 micro_f2ut(
470 union tgsi_exec_channel *dst,
471 const union tgsi_exec_channel *src )
472 {
473 dst->u[0] = (uint) src->f[0];
474 dst->u[1] = (uint) src->f[1];
475 dst->u[2] = (uint) src->f[2];
476 dst->u[3] = (uint) src->f[3];
477 }
478
479 static void
480 micro_flr(
481 union tgsi_exec_channel *dst,
482 const union tgsi_exec_channel *src )
483 {
484 dst->f[0] = floorf( src->f[0] );
485 dst->f[1] = floorf( src->f[1] );
486 dst->f[2] = floorf( src->f[2] );
487 dst->f[3] = floorf( src->f[3] );
488 }
489
490 static void
491 micro_frc(
492 union tgsi_exec_channel *dst,
493 const union tgsi_exec_channel *src )
494 {
495 dst->f[0] = src->f[0] - floorf( src->f[0] );
496 dst->f[1] = src->f[1] - floorf( src->f[1] );
497 dst->f[2] = src->f[2] - floorf( src->f[2] );
498 dst->f[3] = src->f[3] - floorf( src->f[3] );
499 }
500
501 static void
502 micro_ge(
503 union tgsi_exec_channel *dst,
504 const union tgsi_exec_channel *src0,
505 const union tgsi_exec_channel *src1,
506 const union tgsi_exec_channel *src2,
507 const union tgsi_exec_channel *src3 )
508 {
509 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
510 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
511 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
512 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
513 }
514
515 static void
516 micro_i2f(
517 union tgsi_exec_channel *dst,
518 const union tgsi_exec_channel *src )
519 {
520 dst->f[0] = (float) src->i[0];
521 dst->f[1] = (float) src->i[1];
522 dst->f[2] = (float) src->i[2];
523 dst->f[3] = (float) src->i[3];
524 }
525
526 static void
527 micro_lg2(
528 union tgsi_exec_channel *dst,
529 const union tgsi_exec_channel *src )
530 {
531 dst->f[0] = logf( src->f[0] ) * 1.442695f;
532 dst->f[1] = logf( src->f[1] ) * 1.442695f;
533 dst->f[2] = logf( src->f[2] ) * 1.442695f;
534 dst->f[3] = logf( src->f[3] ) * 1.442695f;
535 }
536
537 static void
538 micro_le(
539 union tgsi_exec_channel *dst,
540 const union tgsi_exec_channel *src0,
541 const union tgsi_exec_channel *src1,
542 const union tgsi_exec_channel *src2,
543 const union tgsi_exec_channel *src3 )
544 {
545 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
546 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
547 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
548 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
549 }
550
551 static void
552 micro_lt(
553 union tgsi_exec_channel *dst,
554 const union tgsi_exec_channel *src0,
555 const union tgsi_exec_channel *src1,
556 const union tgsi_exec_channel *src2,
557 const union tgsi_exec_channel *src3 )
558 {
559 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
560 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
561 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
562 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
563 }
564
565 static void
566 micro_ilt(
567 union tgsi_exec_channel *dst,
568 const union tgsi_exec_channel *src0,
569 const union tgsi_exec_channel *src1,
570 const union tgsi_exec_channel *src2,
571 const union tgsi_exec_channel *src3 )
572 {
573 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
574 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
575 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
576 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
577 }
578
579 static void
580 micro_ult(
581 union tgsi_exec_channel *dst,
582 const union tgsi_exec_channel *src0,
583 const union tgsi_exec_channel *src1,
584 const union tgsi_exec_channel *src2,
585 const union tgsi_exec_channel *src3 )
586 {
587 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
588 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
589 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
590 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
591 }
592
593 static void
594 micro_max(
595 union tgsi_exec_channel *dst,
596 const union tgsi_exec_channel *src0,
597 const union tgsi_exec_channel *src1 )
598 {
599 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
600 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
601 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
602 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
603 }
604
605 static void
606 micro_imax(
607 union tgsi_exec_channel *dst,
608 const union tgsi_exec_channel *src0,
609 const union tgsi_exec_channel *src1 )
610 {
611 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
612 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
613 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
614 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
615 }
616
617 static void
618 micro_umax(
619 union tgsi_exec_channel *dst,
620 const union tgsi_exec_channel *src0,
621 const union tgsi_exec_channel *src1 )
622 {
623 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
624 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
625 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
626 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
627 }
628
629 static void
630 micro_min(
631 union tgsi_exec_channel *dst,
632 const union tgsi_exec_channel *src0,
633 const union tgsi_exec_channel *src1 )
634 {
635 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
636 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
637 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
638 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
639 }
640
641 static void
642 micro_imin(
643 union tgsi_exec_channel *dst,
644 const union tgsi_exec_channel *src0,
645 const union tgsi_exec_channel *src1 )
646 {
647 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
648 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
649 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
650 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
651 }
652
653 static void
654 micro_umin(
655 union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1 )
658 {
659 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
660 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
661 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
662 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
663 }
664
665 static void
666 micro_umod(
667 union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src0,
669 const union tgsi_exec_channel *src1 )
670 {
671 dst->u[0] = src0->u[0] % src1->u[0];
672 dst->u[1] = src0->u[1] % src1->u[1];
673 dst->u[2] = src0->u[2] % src1->u[2];
674 dst->u[3] = src0->u[3] % src1->u[3];
675 }
676
677 static void
678 micro_mul(
679 union tgsi_exec_channel *dst,
680 const union tgsi_exec_channel *src0,
681 const union tgsi_exec_channel *src1 )
682 {
683 dst->f[0] = src0->f[0] * src1->f[0];
684 dst->f[1] = src0->f[1] * src1->f[1];
685 dst->f[2] = src0->f[2] * src1->f[2];
686 dst->f[3] = src0->f[3] * src1->f[3];
687 }
688
689 static void
690 micro_imul(
691 union tgsi_exec_channel *dst,
692 const union tgsi_exec_channel *src0,
693 const union tgsi_exec_channel *src1 )
694 {
695 dst->i[0] = src0->i[0] * src1->i[0];
696 dst->i[1] = src0->i[1] * src1->i[1];
697 dst->i[2] = src0->i[2] * src1->i[2];
698 dst->i[3] = src0->i[3] * src1->i[3];
699 }
700
701 static void
702 micro_imul64(
703 union tgsi_exec_channel *dst0,
704 union tgsi_exec_channel *dst1,
705 const union tgsi_exec_channel *src0,
706 const union tgsi_exec_channel *src1 )
707 {
708 dst1->i[0] = src0->i[0] * src1->i[0];
709 dst1->i[1] = src0->i[1] * src1->i[1];
710 dst1->i[2] = src0->i[2] * src1->i[2];
711 dst1->i[3] = src0->i[3] * src1->i[3];
712 dst0->i[0] = 0;
713 dst0->i[1] = 0;
714 dst0->i[2] = 0;
715 dst0->i[3] = 0;
716 }
717
718 static void
719 micro_umul64(
720 union tgsi_exec_channel *dst0,
721 union tgsi_exec_channel *dst1,
722 const union tgsi_exec_channel *src0,
723 const union tgsi_exec_channel *src1 )
724 {
725 dst1->u[0] = src0->u[0] * src1->u[0];
726 dst1->u[1] = src0->u[1] * src1->u[1];
727 dst1->u[2] = src0->u[2] * src1->u[2];
728 dst1->u[3] = src0->u[3] * src1->u[3];
729 dst0->u[0] = 0;
730 dst0->u[1] = 0;
731 dst0->u[2] = 0;
732 dst0->u[3] = 0;
733 }
734
735 static void
736 micro_movc(
737 union tgsi_exec_channel *dst,
738 const union tgsi_exec_channel *src0,
739 const union tgsi_exec_channel *src1,
740 const union tgsi_exec_channel *src2 )
741 {
742 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
743 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
744 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
745 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
746 }
747
748 static void
749 micro_neg(
750 union tgsi_exec_channel *dst,
751 const union tgsi_exec_channel *src )
752 {
753 dst->f[0] = -src->f[0];
754 dst->f[1] = -src->f[1];
755 dst->f[2] = -src->f[2];
756 dst->f[3] = -src->f[3];
757 }
758
759 static void
760 micro_ineg(
761 union tgsi_exec_channel *dst,
762 const union tgsi_exec_channel *src )
763 {
764 dst->i[0] = -src->i[0];
765 dst->i[1] = -src->i[1];
766 dst->i[2] = -src->i[2];
767 dst->i[3] = -src->i[3];
768 }
769
770 static void
771 micro_not(
772 union tgsi_exec_channel *dst,
773 const union tgsi_exec_channel *src )
774 {
775 dst->u[0] = ~src->u[0];
776 dst->u[1] = ~src->u[1];
777 dst->u[2] = ~src->u[2];
778 dst->u[3] = ~src->u[3];
779 }
780
781 static void
782 micro_or(
783 union tgsi_exec_channel *dst,
784 const union tgsi_exec_channel *src0,
785 const union tgsi_exec_channel *src1 )
786 {
787 dst->u[0] = src0->u[0] | src1->u[0];
788 dst->u[1] = src0->u[1] | src1->u[1];
789 dst->u[2] = src0->u[2] | src1->u[2];
790 dst->u[3] = src0->u[3] | src1->u[3];
791 }
792
793 static void
794 micro_pow(
795 union tgsi_exec_channel *dst,
796 const union tgsi_exec_channel *src0,
797 const union tgsi_exec_channel *src1 )
798 {
799 dst->f[0] = powf( src0->f[0], src1->f[0] );
800 dst->f[1] = powf( src0->f[1], src1->f[1] );
801 dst->f[2] = powf( src0->f[2], src1->f[2] );
802 dst->f[3] = powf( src0->f[3], src1->f[3] );
803 }
804
805 static void
806 micro_rnd(
807 union tgsi_exec_channel *dst,
808 const union tgsi_exec_channel *src )
809 {
810 dst->f[0] = floorf( src->f[0] + 0.5f );
811 dst->f[1] = floorf( src->f[1] + 0.5f );
812 dst->f[2] = floorf( src->f[2] + 0.5f );
813 dst->f[3] = floorf( src->f[3] + 0.5f );
814 }
815
816 static void
817 micro_shl(
818 union tgsi_exec_channel *dst,
819 const union tgsi_exec_channel *src0,
820 const union tgsi_exec_channel *src1 )
821 {
822 dst->i[0] = src0->i[0] << src1->i[0];
823 dst->i[1] = src0->i[1] << src1->i[1];
824 dst->i[2] = src0->i[2] << src1->i[2];
825 dst->i[3] = src0->i[3] << src1->i[3];
826 }
827
828 static void
829 micro_ishr(
830 union tgsi_exec_channel *dst,
831 const union tgsi_exec_channel *src0,
832 const union tgsi_exec_channel *src1 )
833 {
834 dst->i[0] = src0->i[0] >> src1->i[0];
835 dst->i[1] = src0->i[1] >> src1->i[1];
836 dst->i[2] = src0->i[2] >> src1->i[2];
837 dst->i[3] = src0->i[3] >> src1->i[3];
838 }
839
840 static void
841 micro_trunc(
842 union tgsi_exec_channel *dst,
843 const union tgsi_exec_channel *src0 )
844 {
845 dst->f[0] = (float) (int) src0->f[0];
846 dst->f[1] = (float) (int) src0->f[1];
847 dst->f[2] = (float) (int) src0->f[2];
848 dst->f[3] = (float) (int) src0->f[3];
849 }
850
851 static void
852 micro_ushr(
853 union tgsi_exec_channel *dst,
854 const union tgsi_exec_channel *src0,
855 const union tgsi_exec_channel *src1 )
856 {
857 dst->u[0] = src0->u[0] >> src1->u[0];
858 dst->u[1] = src0->u[1] >> src1->u[1];
859 dst->u[2] = src0->u[2] >> src1->u[2];
860 dst->u[3] = src0->u[3] >> src1->u[3];
861 }
862
863 static void
864 micro_sin(
865 union tgsi_exec_channel *dst,
866 const union tgsi_exec_channel *src )
867 {
868 dst->f[0] = sinf( src->f[0] );
869 dst->f[1] = sinf( src->f[1] );
870 dst->f[2] = sinf( src->f[2] );
871 dst->f[3] = sinf( src->f[3] );
872 }
873
874 static void
875 micro_sqrt( union tgsi_exec_channel *dst,
876 const union tgsi_exec_channel *src )
877 {
878 dst->f[0] = sqrtf( src->f[0] );
879 dst->f[1] = sqrtf( src->f[1] );
880 dst->f[2] = sqrtf( src->f[2] );
881 dst->f[3] = sqrtf( src->f[3] );
882 }
883
884 static void
885 micro_sub(
886 union tgsi_exec_channel *dst,
887 const union tgsi_exec_channel *src0,
888 const union tgsi_exec_channel *src1 )
889 {
890 dst->f[0] = src0->f[0] - src1->f[0];
891 dst->f[1] = src0->f[1] - src1->f[1];
892 dst->f[2] = src0->f[2] - src1->f[2];
893 dst->f[3] = src0->f[3] - src1->f[3];
894 }
895
896 static void
897 micro_u2f(
898 union tgsi_exec_channel *dst,
899 const union tgsi_exec_channel *src )
900 {
901 dst->f[0] = (float) src->u[0];
902 dst->f[1] = (float) src->u[1];
903 dst->f[2] = (float) src->u[2];
904 dst->f[3] = (float) src->u[3];
905 }
906
907 static void
908 micro_xor(
909 union tgsi_exec_channel *dst,
910 const union tgsi_exec_channel *src0,
911 const union tgsi_exec_channel *src1 )
912 {
913 dst->u[0] = src0->u[0] ^ src1->u[0];
914 dst->u[1] = src0->u[1] ^ src1->u[1];
915 dst->u[2] = src0->u[2] ^ src1->u[2];
916 dst->u[3] = src0->u[3] ^ src1->u[3];
917 }
918
919 static void
920 fetch_src_file_channel(
921 const struct tgsi_exec_machine *mach,
922 const uint file,
923 const uint swizzle,
924 const union tgsi_exec_channel *index,
925 union tgsi_exec_channel *chan )
926 {
927 switch( swizzle ) {
928 case TGSI_EXTSWIZZLE_X:
929 case TGSI_EXTSWIZZLE_Y:
930 case TGSI_EXTSWIZZLE_Z:
931 case TGSI_EXTSWIZZLE_W:
932 switch( file ) {
933 case TGSI_FILE_CONSTANT:
934 chan->f[0] = mach->Consts[index->i[0]][swizzle];
935 chan->f[1] = mach->Consts[index->i[1]][swizzle];
936 chan->f[2] = mach->Consts[index->i[2]][swizzle];
937 chan->f[3] = mach->Consts[index->i[3]][swizzle];
938 break;
939
940 case TGSI_FILE_INPUT:
941 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
942 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
943 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
944 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
945 break;
946
947 case TGSI_FILE_TEMPORARY:
948 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
949 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
950 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
951 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
952 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
953 break;
954
955 case TGSI_FILE_IMMEDIATE:
956 assert( index->i[0] < (int) mach->ImmLimit );
957 chan->f[0] = mach->Imms[index->i[0]][swizzle];
958 assert( index->i[1] < (int) mach->ImmLimit );
959 chan->f[1] = mach->Imms[index->i[1]][swizzle];
960 assert( index->i[2] < (int) mach->ImmLimit );
961 chan->f[2] = mach->Imms[index->i[2]][swizzle];
962 assert( index->i[3] < (int) mach->ImmLimit );
963 chan->f[3] = mach->Imms[index->i[3]][swizzle];
964 break;
965
966 case TGSI_FILE_ADDRESS:
967 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
968 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
969 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
970 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
971 break;
972
973 case TGSI_FILE_OUTPUT:
974 /* vertex/fragment output vars can be read too */
975 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
976 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
977 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
978 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
979 break;
980
981 default:
982 assert( 0 );
983 }
984 break;
985
986 case TGSI_EXTSWIZZLE_ZERO:
987 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
988 break;
989
990 case TGSI_EXTSWIZZLE_ONE:
991 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
992 break;
993
994 default:
995 assert( 0 );
996 }
997 }
998
999 static void
1000 fetch_source(
1001 const struct tgsi_exec_machine *mach,
1002 union tgsi_exec_channel *chan,
1003 const struct tgsi_full_src_register *reg,
1004 const uint chan_index )
1005 {
1006 union tgsi_exec_channel index;
1007 uint swizzle;
1008
1009 index.i[0] =
1010 index.i[1] =
1011 index.i[2] =
1012 index.i[3] = reg->SrcRegister.Index;
1013
1014 if (reg->SrcRegister.Indirect) {
1015 union tgsi_exec_channel index2;
1016 union tgsi_exec_channel indir_index;
1017
1018 index2.i[0] =
1019 index2.i[1] =
1020 index2.i[2] =
1021 index2.i[3] = reg->SrcRegisterInd.Index;
1022
1023 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1024 fetch_src_file_channel(
1025 mach,
1026 reg->SrcRegisterInd.File,
1027 swizzle,
1028 &index2,
1029 &indir_index );
1030
1031 index.i[0] += indir_index.i[0];
1032 index.i[1] += indir_index.i[1];
1033 index.i[2] += indir_index.i[2];
1034 index.i[3] += indir_index.i[3];
1035 }
1036
1037 if( reg->SrcRegister.Dimension ) {
1038 switch( reg->SrcRegister.File ) {
1039 case TGSI_FILE_INPUT:
1040 index.i[0] *= 17;
1041 index.i[1] *= 17;
1042 index.i[2] *= 17;
1043 index.i[3] *= 17;
1044 break;
1045 case TGSI_FILE_CONSTANT:
1046 index.i[0] *= 4096;
1047 index.i[1] *= 4096;
1048 index.i[2] *= 4096;
1049 index.i[3] *= 4096;
1050 break;
1051 default:
1052 assert( 0 );
1053 }
1054
1055 index.i[0] += reg->SrcRegisterDim.Index;
1056 index.i[1] += reg->SrcRegisterDim.Index;
1057 index.i[2] += reg->SrcRegisterDim.Index;
1058 index.i[3] += reg->SrcRegisterDim.Index;
1059
1060 if (reg->SrcRegisterDim.Indirect) {
1061 union tgsi_exec_channel index2;
1062 union tgsi_exec_channel indir_index;
1063
1064 index2.i[0] =
1065 index2.i[1] =
1066 index2.i[2] =
1067 index2.i[3] = reg->SrcRegisterDimInd.Index;
1068
1069 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1070 fetch_src_file_channel(
1071 mach,
1072 reg->SrcRegisterDimInd.File,
1073 swizzle,
1074 &index2,
1075 &indir_index );
1076
1077 index.i[0] += indir_index.i[0];
1078 index.i[1] += indir_index.i[1];
1079 index.i[2] += indir_index.i[2];
1080 index.i[3] += indir_index.i[3];
1081 }
1082 }
1083
1084 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1085 fetch_src_file_channel(
1086 mach,
1087 reg->SrcRegister.File,
1088 swizzle,
1089 &index,
1090 chan );
1091
1092 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1093 case TGSI_UTIL_SIGN_CLEAR:
1094 micro_abs( chan, chan );
1095 break;
1096
1097 case TGSI_UTIL_SIGN_SET:
1098 micro_abs( chan, chan );
1099 micro_neg( chan, chan );
1100 break;
1101
1102 case TGSI_UTIL_SIGN_TOGGLE:
1103 micro_neg( chan, chan );
1104 break;
1105
1106 case TGSI_UTIL_SIGN_KEEP:
1107 break;
1108 }
1109
1110 if (reg->SrcRegisterExtMod.Complement) {
1111 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1112 }
1113 }
1114
1115 static void
1116 store_dest(
1117 struct tgsi_exec_machine *mach,
1118 const union tgsi_exec_channel *chan,
1119 const struct tgsi_full_dst_register *reg,
1120 const struct tgsi_full_instruction *inst,
1121 uint chan_index )
1122 {
1123 union tgsi_exec_channel *dst;
1124
1125 switch (reg->DstRegister.File) {
1126 case TGSI_FILE_NULL:
1127 return;
1128
1129 case TGSI_FILE_OUTPUT:
1130 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1131 + reg->DstRegister.Index].xyzw[chan_index];
1132 break;
1133
1134 case TGSI_FILE_TEMPORARY:
1135 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1136 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1137 break;
1138
1139 case TGSI_FILE_ADDRESS:
1140 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1141 break;
1142
1143 default:
1144 assert( 0 );
1145 return;
1146 }
1147
1148 switch (inst->Instruction.Saturate) {
1149 case TGSI_SAT_NONE:
1150 if (mach->ExecMask & 0x1)
1151 dst->i[0] = chan->i[0];
1152 if (mach->ExecMask & 0x2)
1153 dst->i[1] = chan->i[1];
1154 if (mach->ExecMask & 0x4)
1155 dst->i[2] = chan->i[2];
1156 if (mach->ExecMask & 0x8)
1157 dst->i[3] = chan->i[3];
1158 break;
1159
1160 case TGSI_SAT_ZERO_ONE:
1161 /* XXX need to obey ExecMask here */
1162 micro_max( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1163 micro_min( dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1164 break;
1165
1166 case TGSI_SAT_MINUS_PLUS_ONE:
1167 assert( 0 );
1168 break;
1169
1170 default:
1171 assert( 0 );
1172 }
1173 }
1174
1175 #define FETCH(VAL,INDEX,CHAN)\
1176 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1177
1178 #define STORE(VAL,INDEX,CHAN)\
1179 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1180
1181
1182 /**
1183 * Execute ARB-style KIL which is predicated by a src register.
1184 * Kill fragment if any of the four values is less than zero.
1185 */
1186 static void
1187 exec_kil(struct tgsi_exec_machine *mach,
1188 const struct tgsi_full_instruction *inst)
1189 {
1190 uint uniquemask;
1191 uint chan_index;
1192 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1193 union tgsi_exec_channel r[1];
1194
1195 /* This mask stores component bits that were already tested. Note that
1196 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1197 * tested. */
1198 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1199
1200 for (chan_index = 0; chan_index < 4; chan_index++)
1201 {
1202 uint swizzle;
1203 uint i;
1204
1205 /* unswizzle channel */
1206 swizzle = tgsi_util_get_full_src_register_extswizzle (
1207 &inst->FullSrcRegisters[0],
1208 chan_index);
1209
1210 /* check if the component has not been already tested */
1211 if (uniquemask & (1 << swizzle))
1212 continue;
1213 uniquemask |= 1 << swizzle;
1214
1215 FETCH(&r[0], 0, chan_index);
1216 for (i = 0; i < 4; i++)
1217 if (r[0].f[i] < 0.0f)
1218 kilmask |= 1 << i;
1219 }
1220
1221 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1222 }
1223
1224 /**
1225 * Execute NVIDIA-style KIL which is predicated by a condition code.
1226 * Kill fragment if the condition code is TRUE.
1227 */
1228 static void
1229 exec_kilp(struct tgsi_exec_machine *mach,
1230 const struct tgsi_full_instruction *inst)
1231 {
1232 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1233
1234 if (inst->InstructionExtNv.CondFlowEnable) {
1235 uint swizzle[4];
1236 uint chan_index;
1237
1238 kilmask = 0x0;
1239
1240 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1241 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1242 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1243 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1244
1245 for (chan_index = 0; chan_index < 4; chan_index++)
1246 {
1247 uint i;
1248
1249 for (i = 0; i < 4; i++) {
1250 /* TODO: evaluate the condition code */
1251 if (0)
1252 kilmask |= 1 << i;
1253 }
1254 }
1255 }
1256 else {
1257 /* "unconditional" kil */
1258 kilmask = mach->ExecMask;
1259 }
1260 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1261 }
1262
1263
1264 /*
1265 * Fetch a texel using STR texture coordinates.
1266 */
1267 static void
1268 fetch_texel( struct tgsi_sampler *sampler,
1269 const union tgsi_exec_channel *s,
1270 const union tgsi_exec_channel *t,
1271 const union tgsi_exec_channel *p,
1272 float lodbias, /* XXX should be float[4] */
1273 union tgsi_exec_channel *r,
1274 union tgsi_exec_channel *g,
1275 union tgsi_exec_channel *b,
1276 union tgsi_exec_channel *a )
1277 {
1278 uint j;
1279 float rgba[NUM_CHANNELS][QUAD_SIZE];
1280
1281 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1282
1283 for (j = 0; j < 4; j++) {
1284 r->f[j] = rgba[0][j];
1285 g->f[j] = rgba[1][j];
1286 b->f[j] = rgba[2][j];
1287 a->f[j] = rgba[3][j];
1288 }
1289 }
1290
1291
1292 static void
1293 exec_tex(struct tgsi_exec_machine *mach,
1294 const struct tgsi_full_instruction *inst,
1295 boolean biasLod,
1296 boolean projected)
1297 {
1298 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1299 union tgsi_exec_channel r[8];
1300 uint chan_index;
1301 float lodBias;
1302
1303 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1304
1305 switch (inst->InstructionExtTexture.Texture) {
1306 case TGSI_TEXTURE_1D:
1307
1308 FETCH(&r[0], 0, CHAN_X);
1309
1310 if (projected) {
1311 FETCH(&r[1], 0, CHAN_W);
1312 micro_div( &r[0], &r[0], &r[1] );
1313 }
1314
1315 if (biasLod) {
1316 FETCH(&r[1], 0, CHAN_W);
1317 lodBias = r[2].f[0];
1318 }
1319 else
1320 lodBias = 0.0;
1321
1322 fetch_texel(&mach->Samplers[unit],
1323 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1324 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1325 break;
1326
1327 case TGSI_TEXTURE_2D:
1328 case TGSI_TEXTURE_RECT:
1329
1330 FETCH(&r[0], 0, CHAN_X);
1331 FETCH(&r[1], 0, CHAN_Y);
1332 FETCH(&r[2], 0, CHAN_Z);
1333
1334 if (projected) {
1335 FETCH(&r[3], 0, CHAN_W);
1336 micro_div( &r[0], &r[0], &r[3] );
1337 micro_div( &r[1], &r[1], &r[3] );
1338 micro_div( &r[2], &r[2], &r[3] );
1339 }
1340
1341 if (biasLod) {
1342 FETCH(&r[3], 0, CHAN_W);
1343 lodBias = r[3].f[0];
1344 }
1345 else
1346 lodBias = 0.0;
1347
1348 fetch_texel(&mach->Samplers[unit],
1349 &r[0], &r[1], &r[2], lodBias, /* inputs */
1350 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1351 break;
1352
1353 case TGSI_TEXTURE_3D:
1354 case TGSI_TEXTURE_CUBE:
1355
1356 FETCH(&r[0], 0, CHAN_X);
1357 FETCH(&r[1], 0, CHAN_Y);
1358 FETCH(&r[2], 0, CHAN_Z);
1359
1360 if (projected) {
1361 FETCH(&r[3], 0, CHAN_W);
1362 micro_div( &r[0], &r[0], &r[3] );
1363 micro_div( &r[1], &r[1], &r[3] );
1364 micro_div( &r[2], &r[2], &r[3] );
1365 }
1366
1367 if (biasLod) {
1368 FETCH(&r[3], 0, CHAN_W);
1369 lodBias = r[3].f[0];
1370 }
1371 else
1372 lodBias = 0.0;
1373
1374 fetch_texel(&mach->Samplers[unit],
1375 &r[0], &r[1], &r[2], lodBias,
1376 &r[0], &r[1], &r[2], &r[3]);
1377 break;
1378
1379 default:
1380 assert (0);
1381 }
1382
1383 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1384 STORE( &r[chan_index], 0, chan_index );
1385 }
1386 }
1387
1388
1389 /**
1390 * Evaluate a constant-valued coefficient at the position of the
1391 * current quad.
1392 */
1393 static void
1394 eval_constant_coef(
1395 struct tgsi_exec_machine *mach,
1396 unsigned attrib,
1397 unsigned chan )
1398 {
1399 unsigned i;
1400
1401 for( i = 0; i < QUAD_SIZE; i++ ) {
1402 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1403 }
1404 }
1405
1406 /**
1407 * Evaluate a linear-valued coefficient at the position of the
1408 * current quad.
1409 */
1410 static void
1411 eval_linear_coef(
1412 struct tgsi_exec_machine *mach,
1413 unsigned attrib,
1414 unsigned chan )
1415 {
1416 const float x = mach->QuadPos.xyzw[0].f[0];
1417 const float y = mach->QuadPos.xyzw[1].f[0];
1418 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1419 const float dady = mach->InterpCoefs[attrib].dady[chan];
1420 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1421 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1422 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1423 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1424 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1425 }
1426
1427 /**
1428 * Evaluate a perspective-valued coefficient at the position of the
1429 * current quad.
1430 */
1431 static void
1432 eval_perspective_coef(
1433 struct tgsi_exec_machine *mach,
1434 unsigned attrib,
1435 unsigned chan )
1436 {
1437 const float x = mach->QuadPos.xyzw[0].f[0];
1438 const float y = mach->QuadPos.xyzw[1].f[0];
1439 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1440 const float dady = mach->InterpCoefs[attrib].dady[chan];
1441 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1442 const float *w = mach->QuadPos.xyzw[3].f;
1443 /* divide by W here */
1444 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1445 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1446 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1447 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1448 }
1449
1450
1451 typedef void (* eval_coef_func)(
1452 struct tgsi_exec_machine *mach,
1453 unsigned attrib,
1454 unsigned chan );
1455
1456 static void
1457 exec_declaration(
1458 struct tgsi_exec_machine *mach,
1459 const struct tgsi_full_declaration *decl )
1460 {
1461 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1462 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1463 unsigned first, last, mask;
1464 eval_coef_func eval;
1465
1466 first = decl->DeclarationRange.First;
1467 last = decl->DeclarationRange.Last;
1468 mask = decl->Declaration.UsageMask;
1469
1470 switch( decl->Declaration.Interpolate ) {
1471 case TGSI_INTERPOLATE_CONSTANT:
1472 eval = eval_constant_coef;
1473 break;
1474
1475 case TGSI_INTERPOLATE_LINEAR:
1476 eval = eval_linear_coef;
1477 break;
1478
1479 case TGSI_INTERPOLATE_PERSPECTIVE:
1480 eval = eval_perspective_coef;
1481 break;
1482
1483 default:
1484 assert( 0 );
1485 }
1486
1487 if( mask == TGSI_WRITEMASK_XYZW ) {
1488 unsigned i, j;
1489
1490 for( i = first; i <= last; i++ ) {
1491 for( j = 0; j < NUM_CHANNELS; j++ ) {
1492 eval( mach, i, j );
1493 }
1494 }
1495 }
1496 else {
1497 unsigned i, j;
1498
1499 for( j = 0; j < NUM_CHANNELS; j++ ) {
1500 if( mask & (1 << j) ) {
1501 for( i = first; i <= last; i++ ) {
1502 eval( mach, i, j );
1503 }
1504 }
1505 }
1506 }
1507 }
1508 }
1509 }
1510
1511 static void
1512 exec_instruction(
1513 struct tgsi_exec_machine *mach,
1514 const struct tgsi_full_instruction *inst,
1515 int *pc )
1516 {
1517 uint chan_index;
1518 union tgsi_exec_channel r[8];
1519
1520 (*pc)++;
1521
1522 switch (inst->Instruction.Opcode) {
1523 case TGSI_OPCODE_ARL:
1524 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1525 FETCH( &r[0], 0, chan_index );
1526 micro_f2it( &r[0], &r[0] );
1527 STORE( &r[0], 0, chan_index );
1528 }
1529 break;
1530
1531 case TGSI_OPCODE_MOV:
1532 case TGSI_OPCODE_SWZ:
1533 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1534 FETCH( &r[0], 0, chan_index );
1535 STORE( &r[0], 0, chan_index );
1536 }
1537 break;
1538
1539 case TGSI_OPCODE_LIT:
1540 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1541 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1542 }
1543
1544 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1545 FETCH( &r[0], 0, CHAN_X );
1546 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1547 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1548 STORE( &r[0], 0, CHAN_Y );
1549 }
1550
1551 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1552 FETCH( &r[1], 0, CHAN_Y );
1553 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1554
1555 FETCH( &r[2], 0, CHAN_W );
1556 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1557 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1558 micro_pow( &r[1], &r[1], &r[2] );
1559 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1560 STORE( &r[0], 0, CHAN_Z );
1561 }
1562 }
1563
1564 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1565 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1566 }
1567 break;
1568
1569 case TGSI_OPCODE_RCP:
1570 /* TGSI_OPCODE_RECIP */
1571 FETCH( &r[0], 0, CHAN_X );
1572 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1573 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1574 STORE( &r[0], 0, chan_index );
1575 }
1576 break;
1577
1578 case TGSI_OPCODE_RSQ:
1579 /* TGSI_OPCODE_RECIPSQRT */
1580 FETCH( &r[0], 0, CHAN_X );
1581 micro_sqrt( &r[0], &r[0] );
1582 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1583 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1584 STORE( &r[0], 0, chan_index );
1585 }
1586 break;
1587
1588 case TGSI_OPCODE_EXP:
1589 FETCH( &r[0], 0, CHAN_X );
1590 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1591 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1592 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1593 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1594 }
1595 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1596 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1597 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1598 }
1599 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1600 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1601 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1602 }
1603 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1604 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1605 }
1606 break;
1607
1608 case TGSI_OPCODE_LOG:
1609 FETCH( &r[0], 0, CHAN_X );
1610 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1611 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1612 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1613 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1614 STORE( &r[0], 0, CHAN_X );
1615 }
1616 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1617 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1618 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1619 STORE( &r[0], 0, CHAN_Y );
1620 }
1621 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1622 STORE( &r[1], 0, CHAN_Z );
1623 }
1624 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1625 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1626 }
1627 break;
1628
1629 case TGSI_OPCODE_MUL:
1630 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1631 {
1632 FETCH(&r[0], 0, chan_index);
1633 FETCH(&r[1], 1, chan_index);
1634
1635 micro_mul( &r[0], &r[0], &r[1] );
1636
1637 STORE(&r[0], 0, chan_index);
1638 }
1639 break;
1640
1641 case TGSI_OPCODE_ADD:
1642 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1643 FETCH( &r[0], 0, chan_index );
1644 FETCH( &r[1], 1, chan_index );
1645 micro_add( &r[0], &r[0], &r[1] );
1646 STORE( &r[0], 0, chan_index );
1647 }
1648 break;
1649
1650 case TGSI_OPCODE_DP3:
1651 /* TGSI_OPCODE_DOT3 */
1652 FETCH( &r[0], 0, CHAN_X );
1653 FETCH( &r[1], 1, CHAN_X );
1654 micro_mul( &r[0], &r[0], &r[1] );
1655
1656 FETCH( &r[1], 0, CHAN_Y );
1657 FETCH( &r[2], 1, CHAN_Y );
1658 micro_mul( &r[1], &r[1], &r[2] );
1659 micro_add( &r[0], &r[0], &r[1] );
1660
1661 FETCH( &r[1], 0, CHAN_Z );
1662 FETCH( &r[2], 1, CHAN_Z );
1663 micro_mul( &r[1], &r[1], &r[2] );
1664 micro_add( &r[0], &r[0], &r[1] );
1665
1666 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1667 STORE( &r[0], 0, chan_index );
1668 }
1669 break;
1670
1671 case TGSI_OPCODE_DP4:
1672 /* TGSI_OPCODE_DOT4 */
1673 FETCH(&r[0], 0, CHAN_X);
1674 FETCH(&r[1], 1, CHAN_X);
1675
1676 micro_mul( &r[0], &r[0], &r[1] );
1677
1678 FETCH(&r[1], 0, CHAN_Y);
1679 FETCH(&r[2], 1, CHAN_Y);
1680
1681 micro_mul( &r[1], &r[1], &r[2] );
1682 micro_add( &r[0], &r[0], &r[1] );
1683
1684 FETCH(&r[1], 0, CHAN_Z);
1685 FETCH(&r[2], 1, CHAN_Z);
1686
1687 micro_mul( &r[1], &r[1], &r[2] );
1688 micro_add( &r[0], &r[0], &r[1] );
1689
1690 FETCH(&r[1], 0, CHAN_W);
1691 FETCH(&r[2], 1, CHAN_W);
1692
1693 micro_mul( &r[1], &r[1], &r[2] );
1694 micro_add( &r[0], &r[0], &r[1] );
1695
1696 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1697 STORE( &r[0], 0, chan_index );
1698 }
1699 break;
1700
1701 case TGSI_OPCODE_DST:
1702 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1703 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1704 }
1705
1706 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1707 FETCH( &r[0], 0, CHAN_Y );
1708 FETCH( &r[1], 1, CHAN_Y);
1709 micro_mul( &r[0], &r[0], &r[1] );
1710 STORE( &r[0], 0, CHAN_Y );
1711 }
1712
1713 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1714 FETCH( &r[0], 0, CHAN_Z );
1715 STORE( &r[0], 0, CHAN_Z );
1716 }
1717
1718 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1719 FETCH( &r[0], 1, CHAN_W );
1720 STORE( &r[0], 0, CHAN_W );
1721 }
1722 break;
1723
1724 case TGSI_OPCODE_MIN:
1725 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1726 FETCH(&r[0], 0, chan_index);
1727 FETCH(&r[1], 1, chan_index);
1728
1729 /* XXX use micro_min()?? */
1730 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1731
1732 STORE(&r[0], 0, chan_index);
1733 }
1734 break;
1735
1736 case TGSI_OPCODE_MAX:
1737 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1738 FETCH(&r[0], 0, chan_index);
1739 FETCH(&r[1], 1, chan_index);
1740
1741 /* XXX use micro_max()?? */
1742 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1743
1744 STORE(&r[0], 0, chan_index );
1745 }
1746 break;
1747
1748 case TGSI_OPCODE_SLT:
1749 /* TGSI_OPCODE_SETLT */
1750 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1751 FETCH( &r[0], 0, chan_index );
1752 FETCH( &r[1], 1, chan_index );
1753 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1754 STORE( &r[0], 0, chan_index );
1755 }
1756 break;
1757
1758 case TGSI_OPCODE_SGE:
1759 /* TGSI_OPCODE_SETGE */
1760 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1761 FETCH( &r[0], 0, chan_index );
1762 FETCH( &r[1], 1, chan_index );
1763 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1764 STORE( &r[0], 0, chan_index );
1765 }
1766 break;
1767
1768 case TGSI_OPCODE_MAD:
1769 /* TGSI_OPCODE_MADD */
1770 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1771 FETCH( &r[0], 0, chan_index );
1772 FETCH( &r[1], 1, chan_index );
1773 micro_mul( &r[0], &r[0], &r[1] );
1774 FETCH( &r[1], 2, chan_index );
1775 micro_add( &r[0], &r[0], &r[1] );
1776 STORE( &r[0], 0, chan_index );
1777 }
1778 break;
1779
1780 case TGSI_OPCODE_SUB:
1781 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1782 FETCH(&r[0], 0, chan_index);
1783 FETCH(&r[1], 1, chan_index);
1784
1785 micro_sub( &r[0], &r[0], &r[1] );
1786
1787 STORE(&r[0], 0, chan_index);
1788 }
1789 break;
1790
1791 case TGSI_OPCODE_LERP:
1792 /* TGSI_OPCODE_LRP */
1793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1794 FETCH(&r[0], 0, chan_index);
1795 FETCH(&r[1], 1, chan_index);
1796 FETCH(&r[2], 2, chan_index);
1797
1798 micro_sub( &r[1], &r[1], &r[2] );
1799 micro_mul( &r[0], &r[0], &r[1] );
1800 micro_add( &r[0], &r[0], &r[2] );
1801
1802 STORE(&r[0], 0, chan_index);
1803 }
1804 break;
1805
1806 case TGSI_OPCODE_CND:
1807 assert (0);
1808 break;
1809
1810 case TGSI_OPCODE_CND0:
1811 assert (0);
1812 break;
1813
1814 case TGSI_OPCODE_DOT2ADD:
1815 /* TGSI_OPCODE_DP2A */
1816 assert (0);
1817 break;
1818
1819 case TGSI_OPCODE_INDEX:
1820 assert (0);
1821 break;
1822
1823 case TGSI_OPCODE_NEGATE:
1824 assert (0);
1825 break;
1826
1827 case TGSI_OPCODE_FRAC:
1828 /* TGSI_OPCODE_FRC */
1829 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1830 FETCH( &r[0], 0, chan_index );
1831 micro_frc( &r[0], &r[0] );
1832 STORE( &r[0], 0, chan_index );
1833 }
1834 break;
1835
1836 case TGSI_OPCODE_CLAMP:
1837 assert (0);
1838 break;
1839
1840 case TGSI_OPCODE_FLOOR:
1841 /* TGSI_OPCODE_FLR */
1842 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1843 FETCH( &r[0], 0, chan_index );
1844 micro_flr( &r[0], &r[0] );
1845 STORE( &r[0], 0, chan_index );
1846 }
1847 break;
1848
1849 case TGSI_OPCODE_ROUND:
1850 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1851 FETCH( &r[0], 0, chan_index );
1852 micro_rnd( &r[0], &r[0] );
1853 STORE( &r[0], 0, chan_index );
1854 }
1855 break;
1856
1857 case TGSI_OPCODE_EXPBASE2:
1858 /* TGSI_OPCODE_EX2 */
1859 FETCH(&r[0], 0, CHAN_X);
1860
1861 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1862
1863 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1864 STORE( &r[0], 0, chan_index );
1865 }
1866 break;
1867
1868 case TGSI_OPCODE_LOGBASE2:
1869 /* TGSI_OPCODE_LG2 */
1870 FETCH( &r[0], 0, CHAN_X );
1871 micro_lg2( &r[0], &r[0] );
1872 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1873 STORE( &r[0], 0, chan_index );
1874 }
1875 break;
1876
1877 case TGSI_OPCODE_POWER:
1878 /* TGSI_OPCODE_POW */
1879 FETCH(&r[0], 0, CHAN_X);
1880 FETCH(&r[1], 1, CHAN_X);
1881
1882 micro_pow( &r[0], &r[0], &r[1] );
1883
1884 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1885 STORE( &r[0], 0, chan_index );
1886 }
1887 break;
1888
1889 case TGSI_OPCODE_CROSSPRODUCT:
1890 /* TGSI_OPCODE_XPD */
1891 FETCH(&r[0], 0, CHAN_Y);
1892 FETCH(&r[1], 1, CHAN_Z);
1893
1894 micro_mul( &r[2], &r[0], &r[1] );
1895
1896 FETCH(&r[3], 0, CHAN_Z);
1897 FETCH(&r[4], 1, CHAN_Y);
1898
1899 micro_mul( &r[5], &r[3], &r[4] );
1900 micro_sub( &r[2], &r[2], &r[5] );
1901
1902 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1903 STORE( &r[2], 0, CHAN_X );
1904 }
1905
1906 FETCH(&r[2], 1, CHAN_X);
1907
1908 micro_mul( &r[3], &r[3], &r[2] );
1909
1910 FETCH(&r[5], 0, CHAN_X);
1911
1912 micro_mul( &r[1], &r[1], &r[5] );
1913 micro_sub( &r[3], &r[3], &r[1] );
1914
1915 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1916 STORE( &r[3], 0, CHAN_Y );
1917 }
1918
1919 micro_mul( &r[5], &r[5], &r[4] );
1920 micro_mul( &r[0], &r[0], &r[2] );
1921 micro_sub( &r[5], &r[5], &r[0] );
1922
1923 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1924 STORE( &r[5], 0, CHAN_Z );
1925 }
1926
1927 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1928 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1929 }
1930 break;
1931
1932 case TGSI_OPCODE_MULTIPLYMATRIX:
1933 assert (0);
1934 break;
1935
1936 case TGSI_OPCODE_ABS:
1937 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1938 FETCH(&r[0], 0, chan_index);
1939
1940 micro_abs( &r[0], &r[0] );
1941
1942 STORE(&r[0], 0, chan_index);
1943 }
1944 break;
1945
1946 case TGSI_OPCODE_RCC:
1947 assert (0);
1948 break;
1949
1950 case TGSI_OPCODE_DPH:
1951 FETCH(&r[0], 0, CHAN_X);
1952 FETCH(&r[1], 1, CHAN_X);
1953
1954 micro_mul( &r[0], &r[0], &r[1] );
1955
1956 FETCH(&r[1], 0, CHAN_Y);
1957 FETCH(&r[2], 1, CHAN_Y);
1958
1959 micro_mul( &r[1], &r[1], &r[2] );
1960 micro_add( &r[0], &r[0], &r[1] );
1961
1962 FETCH(&r[1], 0, CHAN_Z);
1963 FETCH(&r[2], 1, CHAN_Z);
1964
1965 micro_mul( &r[1], &r[1], &r[2] );
1966 micro_add( &r[0], &r[0], &r[1] );
1967
1968 FETCH(&r[1], 1, CHAN_W);
1969
1970 micro_add( &r[0], &r[0], &r[1] );
1971
1972 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1973 STORE( &r[0], 0, chan_index );
1974 }
1975 break;
1976
1977 case TGSI_OPCODE_COS:
1978 FETCH(&r[0], 0, CHAN_X);
1979
1980 micro_cos( &r[0], &r[0] );
1981
1982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1983 STORE( &r[0], 0, chan_index );
1984 }
1985 break;
1986
1987 case TGSI_OPCODE_DDX:
1988 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1989 FETCH( &r[0], 0, chan_index );
1990 micro_ddx( &r[0], &r[0] );
1991 STORE( &r[0], 0, chan_index );
1992 }
1993 break;
1994
1995 case TGSI_OPCODE_DDY:
1996 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1997 FETCH( &r[0], 0, chan_index );
1998 micro_ddy( &r[0], &r[0] );
1999 STORE( &r[0], 0, chan_index );
2000 }
2001 break;
2002
2003 case TGSI_OPCODE_KILP:
2004 exec_kilp (mach, inst);
2005 break;
2006
2007 case TGSI_OPCODE_KIL:
2008 exec_kil (mach, inst);
2009 break;
2010
2011 case TGSI_OPCODE_PK2H:
2012 assert (0);
2013 break;
2014
2015 case TGSI_OPCODE_PK2US:
2016 assert (0);
2017 break;
2018
2019 case TGSI_OPCODE_PK4B:
2020 assert (0);
2021 break;
2022
2023 case TGSI_OPCODE_PK4UB:
2024 assert (0);
2025 break;
2026
2027 case TGSI_OPCODE_RFL:
2028 assert (0);
2029 break;
2030
2031 case TGSI_OPCODE_SEQ:
2032 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2033 FETCH( &r[0], 0, chan_index );
2034 FETCH( &r[1], 1, chan_index );
2035 micro_eq( &r[0], &r[0], &r[1],
2036 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2037 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2038 STORE( &r[0], 0, chan_index );
2039 }
2040 break;
2041
2042 case TGSI_OPCODE_SFL:
2043 assert (0);
2044 break;
2045
2046 case TGSI_OPCODE_SGT:
2047 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2048 FETCH( &r[0], 0, chan_index );
2049 FETCH( &r[1], 1, chan_index );
2050 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2051 STORE( &r[0], 0, chan_index );
2052 }
2053 break;
2054
2055 case TGSI_OPCODE_SIN:
2056 FETCH( &r[0], 0, CHAN_X );
2057 micro_sin( &r[0], &r[0] );
2058 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2059 STORE( &r[0], 0, chan_index );
2060 }
2061 break;
2062
2063 case TGSI_OPCODE_SLE:
2064 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2065 FETCH( &r[0], 0, chan_index );
2066 FETCH( &r[1], 1, chan_index );
2067 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2068 STORE( &r[0], 0, chan_index );
2069 }
2070 break;
2071
2072 case TGSI_OPCODE_SNE:
2073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2074 FETCH( &r[0], 0, chan_index );
2075 FETCH( &r[1], 1, chan_index );
2076 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2077 STORE( &r[0], 0, chan_index );
2078 }
2079 break;
2080
2081 case TGSI_OPCODE_STR:
2082 assert (0);
2083 break;
2084
2085 case TGSI_OPCODE_TEX:
2086 /* simple texture lookup */
2087 /* src[0] = texcoord */
2088 /* src[1] = sampler unit */
2089 exec_tex(mach, inst, FALSE, FALSE);
2090 break;
2091
2092 case TGSI_OPCODE_TXB:
2093 /* Texture lookup with lod bias */
2094 /* src[0] = texcoord (src[0].w = LOD bias) */
2095 /* src[1] = sampler unit */
2096 exec_tex(mach, inst, TRUE, FALSE);
2097 break;
2098
2099 case TGSI_OPCODE_TXD:
2100 /* Texture lookup with explict partial derivatives */
2101 /* src[0] = texcoord */
2102 /* src[1] = d[strq]/dx */
2103 /* src[2] = d[strq]/dy */
2104 /* src[3] = sampler unit */
2105 assert (0);
2106 break;
2107
2108 case TGSI_OPCODE_TXL:
2109 /* Texture lookup with explit LOD */
2110 /* src[0] = texcoord (src[0].w = LOD) */
2111 /* src[1] = sampler unit */
2112 exec_tex(mach, inst, TRUE, FALSE);
2113 break;
2114
2115 case TGSI_OPCODE_TXP:
2116 /* Texture lookup with projection */
2117 /* src[0] = texcoord (src[0].w = projection) */
2118 /* src[1] = sampler unit */
2119 exec_tex(mach, inst, FALSE, TRUE);
2120 break;
2121
2122 case TGSI_OPCODE_UP2H:
2123 assert (0);
2124 break;
2125
2126 case TGSI_OPCODE_UP2US:
2127 assert (0);
2128 break;
2129
2130 case TGSI_OPCODE_UP4B:
2131 assert (0);
2132 break;
2133
2134 case TGSI_OPCODE_UP4UB:
2135 assert (0);
2136 break;
2137
2138 case TGSI_OPCODE_X2D:
2139 assert (0);
2140 break;
2141
2142 case TGSI_OPCODE_ARA:
2143 assert (0);
2144 break;
2145
2146 case TGSI_OPCODE_ARR:
2147 assert (0);
2148 break;
2149
2150 case TGSI_OPCODE_BRA:
2151 assert (0);
2152 break;
2153
2154 case TGSI_OPCODE_CAL:
2155 /* skip the call if no execution channels are enabled */
2156 if (mach->ExecMask) {
2157 /* do the call */
2158
2159 /* push the Cond, Loop, Cont stacks */
2160 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2161 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2162 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2163 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2164 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2165 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2166
2167 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2168 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2169
2170 /* note that PC was already incremented above */
2171 mach->CallStack[mach->CallStackTop++] = *pc;
2172 *pc = inst->InstructionExtLabel.Label;
2173 }
2174 break;
2175
2176 case TGSI_OPCODE_RET:
2177 mach->FuncMask &= ~mach->ExecMask;
2178 UPDATE_EXEC_MASK(mach);
2179
2180 if (mach->FuncMask == 0x0) {
2181 /* really return now (otherwise, keep executing */
2182
2183 if (mach->CallStackTop == 0) {
2184 /* returning from main() */
2185 *pc = -1;
2186 return;
2187 }
2188 *pc = mach->CallStack[--mach->CallStackTop];
2189
2190 /* pop the Cond, Loop, Cont stacks */
2191 assert(mach->CondStackTop > 0);
2192 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2193 assert(mach->LoopStackTop > 0);
2194 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2195 assert(mach->ContStackTop > 0);
2196 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2197 assert(mach->FuncStackTop > 0);
2198 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2199
2200 UPDATE_EXEC_MASK(mach);
2201 }
2202 break;
2203
2204 case TGSI_OPCODE_SSG:
2205 assert (0);
2206 break;
2207
2208 case TGSI_OPCODE_CMP:
2209 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2210 FETCH(&r[0], 0, chan_index);
2211 FETCH(&r[1], 1, chan_index);
2212 FETCH(&r[2], 2, chan_index);
2213
2214 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2215
2216 STORE(&r[0], 0, chan_index);
2217 }
2218 break;
2219
2220 case TGSI_OPCODE_SCS:
2221 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2222 FETCH( &r[0], 0, CHAN_X );
2223 }
2224 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2225 micro_cos( &r[1], &r[0] );
2226 STORE( &r[1], 0, CHAN_X );
2227 }
2228 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2229 micro_sin( &r[1], &r[0] );
2230 STORE( &r[1], 0, CHAN_Y );
2231 }
2232 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2233 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2234 }
2235 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2236 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2237 }
2238 break;
2239
2240 case TGSI_OPCODE_NRM:
2241 assert (0);
2242 break;
2243
2244 case TGSI_OPCODE_DIV:
2245 assert( 0 );
2246 break;
2247
2248 case TGSI_OPCODE_DP2:
2249 FETCH( &r[0], 0, CHAN_X );
2250 FETCH( &r[1], 1, CHAN_X );
2251 micro_mul( &r[0], &r[0], &r[1] );
2252
2253 FETCH( &r[1], 0, CHAN_Y );
2254 FETCH( &r[2], 1, CHAN_Y );
2255 micro_mul( &r[1], &r[1], &r[2] );
2256 micro_add( &r[0], &r[0], &r[1] );
2257
2258 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2259 STORE( &r[0], 0, chan_index );
2260 }
2261 break;
2262
2263 case TGSI_OPCODE_IF:
2264 /* push CondMask */
2265 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2266 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2267 FETCH( &r[0], 0, CHAN_X );
2268 /* update CondMask */
2269 if( ! r[0].u[0] ) {
2270 mach->CondMask &= ~0x1;
2271 }
2272 if( ! r[0].u[1] ) {
2273 mach->CondMask &= ~0x2;
2274 }
2275 if( ! r[0].u[2] ) {
2276 mach->CondMask &= ~0x4;
2277 }
2278 if( ! r[0].u[3] ) {
2279 mach->CondMask &= ~0x8;
2280 }
2281 UPDATE_EXEC_MASK(mach);
2282 /* Todo: If CondMask==0, jump to ELSE */
2283 break;
2284
2285 case TGSI_OPCODE_ELSE:
2286 /* invert CondMask wrt previous mask */
2287 {
2288 uint prevMask;
2289 assert(mach->CondStackTop > 0);
2290 prevMask = mach->CondStack[mach->CondStackTop - 1];
2291 mach->CondMask = ~mach->CondMask & prevMask;
2292 UPDATE_EXEC_MASK(mach);
2293 /* Todo: If CondMask==0, jump to ENDIF */
2294 }
2295 break;
2296
2297 case TGSI_OPCODE_ENDIF:
2298 /* pop CondMask */
2299 assert(mach->CondStackTop > 0);
2300 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2301 UPDATE_EXEC_MASK(mach);
2302 break;
2303
2304 case TGSI_OPCODE_END:
2305 /* halt execution */
2306 *pc = -1;
2307 break;
2308
2309 case TGSI_OPCODE_REP:
2310 assert (0);
2311 break;
2312
2313 case TGSI_OPCODE_ENDREP:
2314 assert (0);
2315 break;
2316
2317 case TGSI_OPCODE_PUSHA:
2318 assert (0);
2319 break;
2320
2321 case TGSI_OPCODE_POPA:
2322 assert (0);
2323 break;
2324
2325 case TGSI_OPCODE_CEIL:
2326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2327 FETCH( &r[0], 0, chan_index );
2328 micro_ceil( &r[0], &r[0] );
2329 STORE( &r[0], 0, chan_index );
2330 }
2331 break;
2332
2333 case TGSI_OPCODE_I2F:
2334 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2335 FETCH( &r[0], 0, chan_index );
2336 micro_i2f( &r[0], &r[0] );
2337 STORE( &r[0], 0, chan_index );
2338 }
2339 break;
2340
2341 case TGSI_OPCODE_NOT:
2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2343 FETCH( &r[0], 0, chan_index );
2344 micro_not( &r[0], &r[0] );
2345 STORE( &r[0], 0, chan_index );
2346 }
2347 break;
2348
2349 case TGSI_OPCODE_TRUNC:
2350 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2351 FETCH( &r[0], 0, chan_index );
2352 micro_trunc( &r[0], &r[0] );
2353 STORE( &r[0], 0, chan_index );
2354 }
2355 break;
2356
2357 case TGSI_OPCODE_SHL:
2358 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2359 FETCH( &r[0], 0, chan_index );
2360 FETCH( &r[1], 1, chan_index );
2361 micro_shl( &r[0], &r[0], &r[1] );
2362 STORE( &r[0], 0, chan_index );
2363 }
2364 break;
2365
2366 case TGSI_OPCODE_SHR:
2367 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2368 FETCH( &r[0], 0, chan_index );
2369 FETCH( &r[1], 1, chan_index );
2370 micro_ishr( &r[0], &r[0], &r[1] );
2371 STORE( &r[0], 0, chan_index );
2372 }
2373 break;
2374
2375 case TGSI_OPCODE_AND:
2376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2377 FETCH( &r[0], 0, chan_index );
2378 FETCH( &r[1], 1, chan_index );
2379 micro_and( &r[0], &r[0], &r[1] );
2380 STORE( &r[0], 0, chan_index );
2381 }
2382 break;
2383
2384 case TGSI_OPCODE_OR:
2385 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2386 FETCH( &r[0], 0, chan_index );
2387 FETCH( &r[1], 1, chan_index );
2388 micro_or( &r[0], &r[0], &r[1] );
2389 STORE( &r[0], 0, chan_index );
2390 }
2391 break;
2392
2393 case TGSI_OPCODE_MOD:
2394 assert (0);
2395 break;
2396
2397 case TGSI_OPCODE_XOR:
2398 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2399 FETCH( &r[0], 0, chan_index );
2400 FETCH( &r[1], 1, chan_index );
2401 micro_xor( &r[0], &r[0], &r[1] );
2402 STORE( &r[0], 0, chan_index );
2403 }
2404 break;
2405
2406 case TGSI_OPCODE_SAD:
2407 assert (0);
2408 break;
2409
2410 case TGSI_OPCODE_TXF:
2411 assert (0);
2412 break;
2413
2414 case TGSI_OPCODE_TXQ:
2415 assert (0);
2416 break;
2417
2418 case TGSI_OPCODE_EMIT:
2419 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2420 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2421 break;
2422
2423 case TGSI_OPCODE_ENDPRIM:
2424 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2425 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2426 break;
2427
2428 case TGSI_OPCODE_LOOP:
2429 /* fall-through (for now) */
2430 case TGSI_OPCODE_BGNLOOP2:
2431 /* push LoopMask and ContMasks */
2432 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2433 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2434 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2435 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2436 break;
2437
2438 case TGSI_OPCODE_ENDLOOP:
2439 /* fall-through (for now at least) */
2440 case TGSI_OPCODE_ENDLOOP2:
2441 /* Restore ContMask, but don't pop */
2442 assert(mach->ContStackTop > 0);
2443 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2444 UPDATE_EXEC_MASK(mach);
2445 if (mach->ExecMask) {
2446 /* repeat loop: jump to instruction just past BGNLOOP */
2447 *pc = inst->InstructionExtLabel.Label + 1;
2448 }
2449 else {
2450 /* exit loop: pop LoopMask */
2451 assert(mach->LoopStackTop > 0);
2452 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2453 /* pop ContMask */
2454 assert(mach->ContStackTop > 0);
2455 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2456 }
2457 UPDATE_EXEC_MASK(mach);
2458 break;
2459
2460 case TGSI_OPCODE_BRK:
2461 /* turn off loop channels for each enabled exec channel */
2462 mach->LoopMask &= ~mach->ExecMask;
2463 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2464 UPDATE_EXEC_MASK(mach);
2465 break;
2466
2467 case TGSI_OPCODE_CONT:
2468 /* turn off cont channels for each enabled exec channel */
2469 mach->ContMask &= ~mach->ExecMask;
2470 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2471 UPDATE_EXEC_MASK(mach);
2472 break;
2473
2474 case TGSI_OPCODE_BGNSUB:
2475 /* no-op */
2476 break;
2477
2478 case TGSI_OPCODE_ENDSUB:
2479 /* no-op */
2480 break;
2481
2482 case TGSI_OPCODE_NOISE1:
2483 assert( 0 );
2484 break;
2485
2486 case TGSI_OPCODE_NOISE2:
2487 assert( 0 );
2488 break;
2489
2490 case TGSI_OPCODE_NOISE3:
2491 assert( 0 );
2492 break;
2493
2494 case TGSI_OPCODE_NOISE4:
2495 assert( 0 );
2496 break;
2497
2498 case TGSI_OPCODE_NOP:
2499 break;
2500
2501 default:
2502 assert( 0 );
2503 }
2504 }
2505
2506
2507 /**
2508 * Run TGSI interpreter.
2509 * \return bitmask of "alive" quad components
2510 */
2511 uint
2512 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2513 {
2514 uint i;
2515 int pc = 0;
2516
2517 mach->CondMask = 0xf;
2518 mach->LoopMask = 0xf;
2519 mach->ContMask = 0xf;
2520 mach->FuncMask = 0xf;
2521 mach->ExecMask = 0xf;
2522
2523 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2524 assert(mach->CondStackTop == 0);
2525 assert(mach->LoopStackTop == 0);
2526 assert(mach->ContStackTop == 0);
2527 assert(mach->CallStackTop == 0);
2528
2529 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2530 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2531
2532 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2533 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2534 mach->Primitives[0] = 0;
2535 }
2536
2537 for (i = 0; i < QUAD_SIZE; i++) {
2538 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2539 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2540 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2541 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2542 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2543 }
2544
2545 /* execute declarations (interpolants) */
2546 for (i = 0; i < mach->NumDeclarations; i++) {
2547 exec_declaration( mach, mach->Declarations+i );
2548 }
2549
2550 /* execute instructions, until pc is set to -1 */
2551 while (pc != -1) {
2552 assert(pc < (int) mach->NumInstructions);
2553 exec_instruction( mach, mach->Instructions + pc, &pc );
2554 }
2555
2556 #if 0
2557 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2558 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2559 /*
2560 * Scale back depth component.
2561 */
2562 for (i = 0; i < 4; i++)
2563 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2564 }
2565 #endif
2566
2567 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2568 }
2569
2570