Merge tgsi/exec and tgsi/util directories.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_parse.h"
58 #include "tgsi/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 /*
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
68 */
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
92 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
93 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
94 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
95 #define TEMP_R0 TGSI_EXEC_TEMP_R0
96
97 #define FOR_EACH_CHANNEL(CHAN)\
98 for (CHAN = 0; CHAN < 4; CHAN++)
99
100 #define IS_CHANNEL_ENABLED(INST, CHAN)\
101 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
102
103 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
104 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
105
106 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED( INST, CHAN ))
109
110 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
111 FOR_EACH_CHANNEL( CHAN )\
112 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
113
114
115 /** The execution mask depends on the conditional mask and the loop mask */
116 #define UPDATE_EXEC_MASK(MACH) \
117 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
118
119
120 #define CHAN_X 0
121 #define CHAN_Y 1
122 #define CHAN_Z 2
123 #define CHAN_W 3
124
125
126
127 /**
128 * Initialize machine state by expanding tokens to full instructions,
129 * allocating temporary storage, setting up constants, etc.
130 * After this, we can call tgsi_exec_machine_run() many times.
131 */
132 void
133 tgsi_exec_machine_bind_shader(
134 struct tgsi_exec_machine *mach,
135 const struct tgsi_token *tokens,
136 uint numSamplers,
137 struct tgsi_sampler *samplers)
138 {
139 uint k;
140 struct tgsi_parse_context parse;
141 struct tgsi_exec_labels *labels = &mach->Labels;
142 struct tgsi_full_instruction *instructions;
143 struct tgsi_full_declaration *declarations;
144 uint maxInstructions = 10, numInstructions = 0;
145 uint maxDeclarations = 10, numDeclarations = 0;
146 uint instno = 0;
147
148 #if 0
149 tgsi_dump(tokens, 0);
150 #endif
151
152 mach->Tokens = tokens;
153 mach->Samplers = samplers;
154
155 k = tgsi_parse_init (&parse, mach->Tokens);
156 if (k != TGSI_PARSE_OK) {
157 debug_printf( "Problem parsing!\n" );
158 return;
159 }
160
161 mach->Processor = parse.FullHeader.Processor.Processor;
162 mach->ImmLimit = 0;
163 labels->count = 0;
164
165 declarations = (struct tgsi_full_declaration *)
166 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
167
168 if (!declarations) {
169 return;
170 }
171
172 instructions = (struct tgsi_full_instruction *)
173 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
174
175 if (!instructions) {
176 FREE( declarations );
177 return;
178 }
179
180 while( !tgsi_parse_end_of_tokens( &parse ) ) {
181 uint pointer = parse.Position;
182 uint i;
183
184 tgsi_parse_token( &parse );
185 switch( parse.FullToken.Token.Type ) {
186 case TGSI_TOKEN_TYPE_DECLARATION:
187 /* save expanded declaration */
188 if (numDeclarations == maxDeclarations) {
189 declarations = REALLOC(declarations,
190 maxDeclarations
191 * sizeof(struct tgsi_full_declaration),
192 (maxDeclarations + 10)
193 * sizeof(struct tgsi_full_declaration));
194 maxDeclarations += 10;
195 }
196 memcpy(declarations + numDeclarations,
197 &parse.FullToken.FullDeclaration,
198 sizeof(declarations[0]));
199 numDeclarations++;
200 break;
201
202 case TGSI_TOKEN_TYPE_IMMEDIATE:
203 {
204 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
205 assert( size % 4 == 0 );
206 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
207
208 for( i = 0; i < size; i++ ) {
209 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
210 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
211 }
212 mach->ImmLimit += size / 4;
213 }
214 break;
215
216 case TGSI_TOKEN_TYPE_INSTRUCTION:
217 assert( labels->count < MAX_LABELS );
218
219 labels->labels[labels->count][0] = instno;
220 labels->labels[labels->count][1] = pointer;
221 labels->count++;
222
223 /* save expanded instruction */
224 if (numInstructions == maxInstructions) {
225 instructions = REALLOC(instructions,
226 maxInstructions
227 * sizeof(struct tgsi_full_instruction),
228 (maxInstructions + 10)
229 * sizeof(struct tgsi_full_instruction));
230 maxInstructions += 10;
231 }
232 memcpy(instructions + numInstructions,
233 &parse.FullToken.FullInstruction,
234 sizeof(instructions[0]));
235 numInstructions++;
236 break;
237
238 default:
239 assert( 0 );
240 }
241 }
242 tgsi_parse_free (&parse);
243
244 if (mach->Declarations) {
245 FREE( mach->Declarations );
246 }
247 mach->Declarations = declarations;
248 mach->NumDeclarations = numDeclarations;
249
250 if (mach->Instructions) {
251 FREE( mach->Instructions );
252 }
253 mach->Instructions = instructions;
254 mach->NumInstructions = numInstructions;
255 }
256
257
258 void
259 tgsi_exec_machine_init(
260 struct tgsi_exec_machine *mach )
261 {
262 uint i;
263
264 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
265 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
266
267 /* Setup constants. */
268 for( i = 0; i < 4; i++ ) {
269 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
270 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
271 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
272 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
273 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
274 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
275 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
276 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
277 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
278 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
279 }
280 }
281
282
283 void
284 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
285 {
286 if (mach->Instructions) {
287 FREE(mach->Instructions);
288 mach->Instructions = NULL;
289 mach->NumInstructions = 0;
290 }
291 if (mach->Declarations) {
292 FREE(mach->Declarations);
293 mach->Declarations = NULL;
294 mach->NumDeclarations = 0;
295 }
296 }
297
298
299 static void
300 micro_abs(
301 union tgsi_exec_channel *dst,
302 const union tgsi_exec_channel *src )
303 {
304 dst->f[0] = fabsf( src->f[0] );
305 dst->f[1] = fabsf( src->f[1] );
306 dst->f[2] = fabsf( src->f[2] );
307 dst->f[3] = fabsf( src->f[3] );
308 }
309
310 static void
311 micro_add(
312 union tgsi_exec_channel *dst,
313 const union tgsi_exec_channel *src0,
314 const union tgsi_exec_channel *src1 )
315 {
316 dst->f[0] = src0->f[0] + src1->f[0];
317 dst->f[1] = src0->f[1] + src1->f[1];
318 dst->f[2] = src0->f[2] + src1->f[2];
319 dst->f[3] = src0->f[3] + src1->f[3];
320 }
321
322 static void
323 micro_iadd(
324 union tgsi_exec_channel *dst,
325 const union tgsi_exec_channel *src0,
326 const union tgsi_exec_channel *src1 )
327 {
328 dst->i[0] = src0->i[0] + src1->i[0];
329 dst->i[1] = src0->i[1] + src1->i[1];
330 dst->i[2] = src0->i[2] + src1->i[2];
331 dst->i[3] = src0->i[3] + src1->i[3];
332 }
333
334 static void
335 micro_and(
336 union tgsi_exec_channel *dst,
337 const union tgsi_exec_channel *src0,
338 const union tgsi_exec_channel *src1 )
339 {
340 dst->u[0] = src0->u[0] & src1->u[0];
341 dst->u[1] = src0->u[1] & src1->u[1];
342 dst->u[2] = src0->u[2] & src1->u[2];
343 dst->u[3] = src0->u[3] & src1->u[3];
344 }
345
346 static void
347 micro_ceil(
348 union tgsi_exec_channel *dst,
349 const union tgsi_exec_channel *src )
350 {
351 dst->f[0] = ceilf( src->f[0] );
352 dst->f[1] = ceilf( src->f[1] );
353 dst->f[2] = ceilf( src->f[2] );
354 dst->f[3] = ceilf( src->f[3] );
355 }
356
357 static void
358 micro_cos(
359 union tgsi_exec_channel *dst,
360 const union tgsi_exec_channel *src )
361 {
362 dst->f[0] = cosf( src->f[0] );
363 dst->f[1] = cosf( src->f[1] );
364 dst->f[2] = cosf( src->f[2] );
365 dst->f[3] = cosf( src->f[3] );
366 }
367
368 static void
369 micro_ddx(
370 union tgsi_exec_channel *dst,
371 const union tgsi_exec_channel *src )
372 {
373 dst->f[0] =
374 dst->f[1] =
375 dst->f[2] =
376 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
377 }
378
379 static void
380 micro_ddy(
381 union tgsi_exec_channel *dst,
382 const union tgsi_exec_channel *src )
383 {
384 dst->f[0] =
385 dst->f[1] =
386 dst->f[2] =
387 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
388 }
389
390 static void
391 micro_div(
392 union tgsi_exec_channel *dst,
393 const union tgsi_exec_channel *src0,
394 const union tgsi_exec_channel *src1 )
395 {
396 dst->f[0] = src0->f[0] / src1->f[0];
397 dst->f[1] = src0->f[1] / src1->f[1];
398 dst->f[2] = src0->f[2] / src1->f[2];
399 dst->f[3] = src0->f[3] / src1->f[3];
400 }
401
402 static void
403 micro_udiv(
404 union tgsi_exec_channel *dst,
405 const union tgsi_exec_channel *src0,
406 const union tgsi_exec_channel *src1 )
407 {
408 dst->u[0] = src0->u[0] / src1->u[0];
409 dst->u[1] = src0->u[1] / src1->u[1];
410 dst->u[2] = src0->u[2] / src1->u[2];
411 dst->u[3] = src0->u[3] / src1->u[3];
412 }
413
414 static void
415 micro_eq(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1,
419 const union tgsi_exec_channel *src2,
420 const union tgsi_exec_channel *src3 )
421 {
422 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
423 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
424 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
425 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
426 }
427
428 static void
429 micro_ieq(
430 union tgsi_exec_channel *dst,
431 const union tgsi_exec_channel *src0,
432 const union tgsi_exec_channel *src1,
433 const union tgsi_exec_channel *src2,
434 const union tgsi_exec_channel *src3 )
435 {
436 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
437 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
438 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
439 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
440 }
441
442 static void
443 micro_exp2(
444 union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src)
446 {
447 dst->f[0] = powf( 2.0f, src->f[0] );
448 dst->f[1] = powf( 2.0f, src->f[1] );
449 dst->f[2] = powf( 2.0f, src->f[2] );
450 dst->f[3] = powf( 2.0f, src->f[3] );
451 }
452
453 static void
454 micro_f2it(
455 union tgsi_exec_channel *dst,
456 const union tgsi_exec_channel *src )
457 {
458 dst->i[0] = (int) src->f[0];
459 dst->i[1] = (int) src->f[1];
460 dst->i[2] = (int) src->f[2];
461 dst->i[3] = (int) src->f[3];
462 }
463
464 static void
465 micro_f2ut(
466 union tgsi_exec_channel *dst,
467 const union tgsi_exec_channel *src )
468 {
469 dst->u[0] = (uint) src->f[0];
470 dst->u[1] = (uint) src->f[1];
471 dst->u[2] = (uint) src->f[2];
472 dst->u[3] = (uint) src->f[3];
473 }
474
475 static void
476 micro_flr(
477 union tgsi_exec_channel *dst,
478 const union tgsi_exec_channel *src )
479 {
480 dst->f[0] = floorf( src->f[0] );
481 dst->f[1] = floorf( src->f[1] );
482 dst->f[2] = floorf( src->f[2] );
483 dst->f[3] = floorf( src->f[3] );
484 }
485
486 static void
487 micro_frc(
488 union tgsi_exec_channel *dst,
489 const union tgsi_exec_channel *src )
490 {
491 dst->f[0] = src->f[0] - floorf( src->f[0] );
492 dst->f[1] = src->f[1] - floorf( src->f[1] );
493 dst->f[2] = src->f[2] - floorf( src->f[2] );
494 dst->f[3] = src->f[3] - floorf( src->f[3] );
495 }
496
497 static void
498 micro_ge(
499 union tgsi_exec_channel *dst,
500 const union tgsi_exec_channel *src0,
501 const union tgsi_exec_channel *src1,
502 const union tgsi_exec_channel *src2,
503 const union tgsi_exec_channel *src3 )
504 {
505 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
506 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
507 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
508 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
509 }
510
511 static void
512 micro_i2f(
513 union tgsi_exec_channel *dst,
514 const union tgsi_exec_channel *src )
515 {
516 dst->f[0] = (float) src->i[0];
517 dst->f[1] = (float) src->i[1];
518 dst->f[2] = (float) src->i[2];
519 dst->f[3] = (float) src->i[3];
520 }
521
522 static void
523 micro_lg2(
524 union tgsi_exec_channel *dst,
525 const union tgsi_exec_channel *src )
526 {
527 dst->f[0] = logf( src->f[0] ) * 1.442695f;
528 dst->f[1] = logf( src->f[1] ) * 1.442695f;
529 dst->f[2] = logf( src->f[2] ) * 1.442695f;
530 dst->f[3] = logf( src->f[3] ) * 1.442695f;
531 }
532
533 static void
534 micro_le(
535 union tgsi_exec_channel *dst,
536 const union tgsi_exec_channel *src0,
537 const union tgsi_exec_channel *src1,
538 const union tgsi_exec_channel *src2,
539 const union tgsi_exec_channel *src3 )
540 {
541 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
542 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
543 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
544 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
545 }
546
547 static void
548 micro_lt(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src0,
551 const union tgsi_exec_channel *src1,
552 const union tgsi_exec_channel *src2,
553 const union tgsi_exec_channel *src3 )
554 {
555 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
556 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
557 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
558 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
559 }
560
561 static void
562 micro_ilt(
563 union tgsi_exec_channel *dst,
564 const union tgsi_exec_channel *src0,
565 const union tgsi_exec_channel *src1,
566 const union tgsi_exec_channel *src2,
567 const union tgsi_exec_channel *src3 )
568 {
569 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
570 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
571 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
572 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
573 }
574
575 static void
576 micro_ult(
577 union tgsi_exec_channel *dst,
578 const union tgsi_exec_channel *src0,
579 const union tgsi_exec_channel *src1,
580 const union tgsi_exec_channel *src2,
581 const union tgsi_exec_channel *src3 )
582 {
583 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
584 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
585 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
586 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
587 }
588
589 static void
590 micro_max(
591 union tgsi_exec_channel *dst,
592 const union tgsi_exec_channel *src0,
593 const union tgsi_exec_channel *src1 )
594 {
595 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
596 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
597 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
598 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
599 }
600
601 static void
602 micro_imax(
603 union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src0,
605 const union tgsi_exec_channel *src1 )
606 {
607 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
608 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
609 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
610 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
611 }
612
613 static void
614 micro_umax(
615 union tgsi_exec_channel *dst,
616 const union tgsi_exec_channel *src0,
617 const union tgsi_exec_channel *src1 )
618 {
619 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
620 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
621 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
622 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
623 }
624
625 static void
626 micro_min(
627 union tgsi_exec_channel *dst,
628 const union tgsi_exec_channel *src0,
629 const union tgsi_exec_channel *src1 )
630 {
631 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
632 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
633 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
634 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
635 }
636
637 static void
638 micro_imin(
639 union tgsi_exec_channel *dst,
640 const union tgsi_exec_channel *src0,
641 const union tgsi_exec_channel *src1 )
642 {
643 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
644 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
645 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
646 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
647 }
648
649 static void
650 micro_umin(
651 union tgsi_exec_channel *dst,
652 const union tgsi_exec_channel *src0,
653 const union tgsi_exec_channel *src1 )
654 {
655 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
656 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
657 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
658 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
659 }
660
661 static void
662 micro_umod(
663 union tgsi_exec_channel *dst,
664 const union tgsi_exec_channel *src0,
665 const union tgsi_exec_channel *src1 )
666 {
667 dst->u[0] = src0->u[0] % src1->u[0];
668 dst->u[1] = src0->u[1] % src1->u[1];
669 dst->u[2] = src0->u[2] % src1->u[2];
670 dst->u[3] = src0->u[3] % src1->u[3];
671 }
672
673 static void
674 micro_mul(
675 union tgsi_exec_channel *dst,
676 const union tgsi_exec_channel *src0,
677 const union tgsi_exec_channel *src1 )
678 {
679 dst->f[0] = src0->f[0] * src1->f[0];
680 dst->f[1] = src0->f[1] * src1->f[1];
681 dst->f[2] = src0->f[2] * src1->f[2];
682 dst->f[3] = src0->f[3] * src1->f[3];
683 }
684
685 static void
686 micro_imul(
687 union tgsi_exec_channel *dst,
688 const union tgsi_exec_channel *src0,
689 const union tgsi_exec_channel *src1 )
690 {
691 dst->i[0] = src0->i[0] * src1->i[0];
692 dst->i[1] = src0->i[1] * src1->i[1];
693 dst->i[2] = src0->i[2] * src1->i[2];
694 dst->i[3] = src0->i[3] * src1->i[3];
695 }
696
697 static void
698 micro_imul64(
699 union tgsi_exec_channel *dst0,
700 union tgsi_exec_channel *dst1,
701 const union tgsi_exec_channel *src0,
702 const union tgsi_exec_channel *src1 )
703 {
704 dst1->i[0] = src0->i[0] * src1->i[0];
705 dst1->i[1] = src0->i[1] * src1->i[1];
706 dst1->i[2] = src0->i[2] * src1->i[2];
707 dst1->i[3] = src0->i[3] * src1->i[3];
708 dst0->i[0] = 0;
709 dst0->i[1] = 0;
710 dst0->i[2] = 0;
711 dst0->i[3] = 0;
712 }
713
714 static void
715 micro_umul64(
716 union tgsi_exec_channel *dst0,
717 union tgsi_exec_channel *dst1,
718 const union tgsi_exec_channel *src0,
719 const union tgsi_exec_channel *src1 )
720 {
721 dst1->u[0] = src0->u[0] * src1->u[0];
722 dst1->u[1] = src0->u[1] * src1->u[1];
723 dst1->u[2] = src0->u[2] * src1->u[2];
724 dst1->u[3] = src0->u[3] * src1->u[3];
725 dst0->u[0] = 0;
726 dst0->u[1] = 0;
727 dst0->u[2] = 0;
728 dst0->u[3] = 0;
729 }
730
731 static void
732 micro_movc(
733 union tgsi_exec_channel *dst,
734 const union tgsi_exec_channel *src0,
735 const union tgsi_exec_channel *src1,
736 const union tgsi_exec_channel *src2 )
737 {
738 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
739 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
740 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
741 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
742 }
743
744 static void
745 micro_neg(
746 union tgsi_exec_channel *dst,
747 const union tgsi_exec_channel *src )
748 {
749 dst->f[0] = -src->f[0];
750 dst->f[1] = -src->f[1];
751 dst->f[2] = -src->f[2];
752 dst->f[3] = -src->f[3];
753 }
754
755 static void
756 micro_ineg(
757 union tgsi_exec_channel *dst,
758 const union tgsi_exec_channel *src )
759 {
760 dst->i[0] = -src->i[0];
761 dst->i[1] = -src->i[1];
762 dst->i[2] = -src->i[2];
763 dst->i[3] = -src->i[3];
764 }
765
766 static void
767 micro_not(
768 union tgsi_exec_channel *dst,
769 const union tgsi_exec_channel *src )
770 {
771 dst->u[0] = ~src->u[0];
772 dst->u[1] = ~src->u[1];
773 dst->u[2] = ~src->u[2];
774 dst->u[3] = ~src->u[3];
775 }
776
777 static void
778 micro_or(
779 union tgsi_exec_channel *dst,
780 const union tgsi_exec_channel *src0,
781 const union tgsi_exec_channel *src1 )
782 {
783 dst->u[0] = src0->u[0] | src1->u[0];
784 dst->u[1] = src0->u[1] | src1->u[1];
785 dst->u[2] = src0->u[2] | src1->u[2];
786 dst->u[3] = src0->u[3] | src1->u[3];
787 }
788
789 static void
790 micro_pow(
791 union tgsi_exec_channel *dst,
792 const union tgsi_exec_channel *src0,
793 const union tgsi_exec_channel *src1 )
794 {
795 dst->f[0] = powf( src0->f[0], src1->f[0] );
796 dst->f[1] = powf( src0->f[1], src1->f[1] );
797 dst->f[2] = powf( src0->f[2], src1->f[2] );
798 dst->f[3] = powf( src0->f[3], src1->f[3] );
799 }
800
801 static void
802 micro_rnd(
803 union tgsi_exec_channel *dst,
804 const union tgsi_exec_channel *src )
805 {
806 dst->f[0] = floorf( src->f[0] + 0.5f );
807 dst->f[1] = floorf( src->f[1] + 0.5f );
808 dst->f[2] = floorf( src->f[2] + 0.5f );
809 dst->f[3] = floorf( src->f[3] + 0.5f );
810 }
811
812 static void
813 micro_shl(
814 union tgsi_exec_channel *dst,
815 const union tgsi_exec_channel *src0,
816 const union tgsi_exec_channel *src1 )
817 {
818 dst->i[0] = src0->i[0] << src1->i[0];
819 dst->i[1] = src0->i[1] << src1->i[1];
820 dst->i[2] = src0->i[2] << src1->i[2];
821 dst->i[3] = src0->i[3] << src1->i[3];
822 }
823
824 static void
825 micro_ishr(
826 union tgsi_exec_channel *dst,
827 const union tgsi_exec_channel *src0,
828 const union tgsi_exec_channel *src1 )
829 {
830 dst->i[0] = src0->i[0] >> src1->i[0];
831 dst->i[1] = src0->i[1] >> src1->i[1];
832 dst->i[2] = src0->i[2] >> src1->i[2];
833 dst->i[3] = src0->i[3] >> src1->i[3];
834 }
835
836 static void
837 micro_trunc(
838 union tgsi_exec_channel *dst,
839 const union tgsi_exec_channel *src0 )
840 {
841 dst->f[0] = (float) (int) src0->f[0];
842 dst->f[1] = (float) (int) src0->f[1];
843 dst->f[2] = (float) (int) src0->f[2];
844 dst->f[3] = (float) (int) src0->f[3];
845 }
846
847 static void
848 micro_ushr(
849 union tgsi_exec_channel *dst,
850 const union tgsi_exec_channel *src0,
851 const union tgsi_exec_channel *src1 )
852 {
853 dst->u[0] = src0->u[0] >> src1->u[0];
854 dst->u[1] = src0->u[1] >> src1->u[1];
855 dst->u[2] = src0->u[2] >> src1->u[2];
856 dst->u[3] = src0->u[3] >> src1->u[3];
857 }
858
859 static void
860 micro_sin(
861 union tgsi_exec_channel *dst,
862 const union tgsi_exec_channel *src )
863 {
864 dst->f[0] = sinf( src->f[0] );
865 dst->f[1] = sinf( src->f[1] );
866 dst->f[2] = sinf( src->f[2] );
867 dst->f[3] = sinf( src->f[3] );
868 }
869
870 static void
871 micro_sqrt( union tgsi_exec_channel *dst,
872 const union tgsi_exec_channel *src )
873 {
874 dst->f[0] = sqrtf( src->f[0] );
875 dst->f[1] = sqrtf( src->f[1] );
876 dst->f[2] = sqrtf( src->f[2] );
877 dst->f[3] = sqrtf( src->f[3] );
878 }
879
880 static void
881 micro_sub(
882 union tgsi_exec_channel *dst,
883 const union tgsi_exec_channel *src0,
884 const union tgsi_exec_channel *src1 )
885 {
886 dst->f[0] = src0->f[0] - src1->f[0];
887 dst->f[1] = src0->f[1] - src1->f[1];
888 dst->f[2] = src0->f[2] - src1->f[2];
889 dst->f[3] = src0->f[3] - src1->f[3];
890 }
891
892 static void
893 micro_u2f(
894 union tgsi_exec_channel *dst,
895 const union tgsi_exec_channel *src )
896 {
897 dst->f[0] = (float) src->u[0];
898 dst->f[1] = (float) src->u[1];
899 dst->f[2] = (float) src->u[2];
900 dst->f[3] = (float) src->u[3];
901 }
902
903 static void
904 micro_xor(
905 union tgsi_exec_channel *dst,
906 const union tgsi_exec_channel *src0,
907 const union tgsi_exec_channel *src1 )
908 {
909 dst->u[0] = src0->u[0] ^ src1->u[0];
910 dst->u[1] = src0->u[1] ^ src1->u[1];
911 dst->u[2] = src0->u[2] ^ src1->u[2];
912 dst->u[3] = src0->u[3] ^ src1->u[3];
913 }
914
915 static void
916 fetch_src_file_channel(
917 const struct tgsi_exec_machine *mach,
918 const uint file,
919 const uint swizzle,
920 const union tgsi_exec_channel *index,
921 union tgsi_exec_channel *chan )
922 {
923 switch( swizzle ) {
924 case TGSI_EXTSWIZZLE_X:
925 case TGSI_EXTSWIZZLE_Y:
926 case TGSI_EXTSWIZZLE_Z:
927 case TGSI_EXTSWIZZLE_W:
928 switch( file ) {
929 case TGSI_FILE_CONSTANT:
930 chan->f[0] = mach->Consts[index->i[0]][swizzle];
931 chan->f[1] = mach->Consts[index->i[1]][swizzle];
932 chan->f[2] = mach->Consts[index->i[2]][swizzle];
933 chan->f[3] = mach->Consts[index->i[3]][swizzle];
934 break;
935
936 case TGSI_FILE_INPUT:
937 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
938 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
939 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
940 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
941 break;
942
943 case TGSI_FILE_TEMPORARY:
944 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
945 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
946 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
947 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
948 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
949 break;
950
951 case TGSI_FILE_IMMEDIATE:
952 assert( index->i[0] < (int) mach->ImmLimit );
953 chan->f[0] = mach->Imms[index->i[0]][swizzle];
954 assert( index->i[1] < (int) mach->ImmLimit );
955 chan->f[1] = mach->Imms[index->i[1]][swizzle];
956 assert( index->i[2] < (int) mach->ImmLimit );
957 chan->f[2] = mach->Imms[index->i[2]][swizzle];
958 assert( index->i[3] < (int) mach->ImmLimit );
959 chan->f[3] = mach->Imms[index->i[3]][swizzle];
960 break;
961
962 case TGSI_FILE_ADDRESS:
963 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
964 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
965 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
966 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
967 break;
968
969 case TGSI_FILE_OUTPUT:
970 /* vertex/fragment output vars can be read too */
971 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
972 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
973 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
974 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
975 break;
976
977 default:
978 assert( 0 );
979 }
980 break;
981
982 case TGSI_EXTSWIZZLE_ZERO:
983 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
984 break;
985
986 case TGSI_EXTSWIZZLE_ONE:
987 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
988 break;
989
990 default:
991 assert( 0 );
992 }
993 }
994
995 static void
996 fetch_source(
997 const struct tgsi_exec_machine *mach,
998 union tgsi_exec_channel *chan,
999 const struct tgsi_full_src_register *reg,
1000 const uint chan_index )
1001 {
1002 union tgsi_exec_channel index;
1003 uint swizzle;
1004
1005 index.i[0] =
1006 index.i[1] =
1007 index.i[2] =
1008 index.i[3] = reg->SrcRegister.Index;
1009
1010 if (reg->SrcRegister.Indirect) {
1011 union tgsi_exec_channel index2;
1012 union tgsi_exec_channel indir_index;
1013
1014 index2.i[0] =
1015 index2.i[1] =
1016 index2.i[2] =
1017 index2.i[3] = reg->SrcRegisterInd.Index;
1018
1019 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1020 fetch_src_file_channel(
1021 mach,
1022 reg->SrcRegisterInd.File,
1023 swizzle,
1024 &index2,
1025 &indir_index );
1026
1027 index.i[0] += indir_index.i[0];
1028 index.i[1] += indir_index.i[1];
1029 index.i[2] += indir_index.i[2];
1030 index.i[3] += indir_index.i[3];
1031 }
1032
1033 if( reg->SrcRegister.Dimension ) {
1034 switch( reg->SrcRegister.File ) {
1035 case TGSI_FILE_INPUT:
1036 index.i[0] *= 17;
1037 index.i[1] *= 17;
1038 index.i[2] *= 17;
1039 index.i[3] *= 17;
1040 break;
1041 case TGSI_FILE_CONSTANT:
1042 index.i[0] *= 4096;
1043 index.i[1] *= 4096;
1044 index.i[2] *= 4096;
1045 index.i[3] *= 4096;
1046 break;
1047 default:
1048 assert( 0 );
1049 }
1050
1051 index.i[0] += reg->SrcRegisterDim.Index;
1052 index.i[1] += reg->SrcRegisterDim.Index;
1053 index.i[2] += reg->SrcRegisterDim.Index;
1054 index.i[3] += reg->SrcRegisterDim.Index;
1055
1056 if (reg->SrcRegisterDim.Indirect) {
1057 union tgsi_exec_channel index2;
1058 union tgsi_exec_channel indir_index;
1059
1060 index2.i[0] =
1061 index2.i[1] =
1062 index2.i[2] =
1063 index2.i[3] = reg->SrcRegisterDimInd.Index;
1064
1065 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1066 fetch_src_file_channel(
1067 mach,
1068 reg->SrcRegisterDimInd.File,
1069 swizzle,
1070 &index2,
1071 &indir_index );
1072
1073 index.i[0] += indir_index.i[0];
1074 index.i[1] += indir_index.i[1];
1075 index.i[2] += indir_index.i[2];
1076 index.i[3] += indir_index.i[3];
1077 }
1078 }
1079
1080 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1081 fetch_src_file_channel(
1082 mach,
1083 reg->SrcRegister.File,
1084 swizzle,
1085 &index,
1086 chan );
1087
1088 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1089 case TGSI_UTIL_SIGN_CLEAR:
1090 micro_abs( chan, chan );
1091 break;
1092
1093 case TGSI_UTIL_SIGN_SET:
1094 micro_abs( chan, chan );
1095 micro_neg( chan, chan );
1096 break;
1097
1098 case TGSI_UTIL_SIGN_TOGGLE:
1099 micro_neg( chan, chan );
1100 break;
1101
1102 case TGSI_UTIL_SIGN_KEEP:
1103 break;
1104 }
1105
1106 if (reg->SrcRegisterExtMod.Complement) {
1107 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1108 }
1109 }
1110
1111 static void
1112 store_dest(
1113 struct tgsi_exec_machine *mach,
1114 const union tgsi_exec_channel *chan,
1115 const struct tgsi_full_dst_register *reg,
1116 const struct tgsi_full_instruction *inst,
1117 uint chan_index )
1118 {
1119 union tgsi_exec_channel *dst;
1120
1121 switch( reg->DstRegister.File ) {
1122 case TGSI_FILE_NULL:
1123 return;
1124
1125 case TGSI_FILE_OUTPUT:
1126 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1127 + reg->DstRegister.Index].xyzw[chan_index];
1128 break;
1129
1130 case TGSI_FILE_TEMPORARY:
1131 assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
1132 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1133 break;
1134
1135 case TGSI_FILE_ADDRESS:
1136 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1137 break;
1138
1139 default:
1140 assert( 0 );
1141 return;
1142 }
1143
1144 switch (inst->Instruction.Saturate)
1145 {
1146 case TGSI_SAT_NONE:
1147 if (mach->ExecMask & 0x1)
1148 dst->i[0] = chan->i[0];
1149 if (mach->ExecMask & 0x2)
1150 dst->i[1] = chan->i[1];
1151 if (mach->ExecMask & 0x4)
1152 dst->i[2] = chan->i[2];
1153 if (mach->ExecMask & 0x8)
1154 dst->i[3] = chan->i[3];
1155 break;
1156
1157 case TGSI_SAT_ZERO_ONE:
1158 /* XXX need to obey ExecMask here */
1159 micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1160 micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
1161 break;
1162
1163 case TGSI_SAT_MINUS_PLUS_ONE:
1164 assert( 0 );
1165 break;
1166
1167 default:
1168 assert( 0 );
1169 }
1170 }
1171
1172 #define FETCH(VAL,INDEX,CHAN)\
1173 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1174
1175 #define STORE(VAL,INDEX,CHAN)\
1176 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1177
1178
1179 /**
1180 * Execute ARB-style KIL which is predicated by a src register.
1181 * Kill fragment if any of the four values is less than zero.
1182 */
1183 static void
1184 exec_kilp(struct tgsi_exec_machine *mach,
1185 const struct tgsi_full_instruction *inst)
1186 {
1187 uint uniquemask;
1188 uint chan_index;
1189 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1190 union tgsi_exec_channel r[1];
1191
1192 /* This mask stores component bits that were already tested. Note that
1193 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1194 * tested. */
1195 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1196
1197 for (chan_index = 0; chan_index < 4; chan_index++)
1198 {
1199 uint swizzle;
1200 uint i;
1201
1202 /* unswizzle channel */
1203 swizzle = tgsi_util_get_full_src_register_extswizzle (
1204 &inst->FullSrcRegisters[0],
1205 chan_index);
1206
1207 /* check if the component has not been already tested */
1208 if (uniquemask & (1 << swizzle))
1209 continue;
1210 uniquemask |= 1 << swizzle;
1211
1212 FETCH(&r[0], 0, chan_index);
1213 for (i = 0; i < 4; i++)
1214 if (r[0].f[i] < 0.0f)
1215 kilmask |= 1 << i;
1216 }
1217
1218 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1219 }
1220
1221
1222 /*
1223 * Fetch a texel using STR texture coordinates.
1224 */
1225 static void
1226 fetch_texel( struct tgsi_sampler *sampler,
1227 const union tgsi_exec_channel *s,
1228 const union tgsi_exec_channel *t,
1229 const union tgsi_exec_channel *p,
1230 float lodbias, /* XXX should be float[4] */
1231 union tgsi_exec_channel *r,
1232 union tgsi_exec_channel *g,
1233 union tgsi_exec_channel *b,
1234 union tgsi_exec_channel *a )
1235 {
1236 uint j;
1237 float rgba[NUM_CHANNELS][QUAD_SIZE];
1238
1239 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1240
1241 for (j = 0; j < 4; j++) {
1242 r->f[j] = rgba[0][j];
1243 g->f[j] = rgba[1][j];
1244 b->f[j] = rgba[2][j];
1245 a->f[j] = rgba[3][j];
1246 }
1247 }
1248
1249
1250 static void
1251 exec_tex(struct tgsi_exec_machine *mach,
1252 const struct tgsi_full_instruction *inst,
1253 boolean biasLod,
1254 boolean projected)
1255 {
1256 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1257 union tgsi_exec_channel r[8];
1258 uint chan_index;
1259 float lodBias;
1260
1261 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1262
1263 switch (inst->InstructionExtTexture.Texture) {
1264 case TGSI_TEXTURE_1D:
1265
1266 FETCH(&r[0], 0, CHAN_X);
1267
1268 if (projected) {
1269 FETCH(&r[1], 0, CHAN_W);
1270 micro_div( &r[0], &r[0], &r[1] );
1271 }
1272
1273 if (biasLod) {
1274 FETCH(&r[1], 0, CHAN_W);
1275 lodBias = r[2].f[0];
1276 }
1277 else
1278 lodBias = 0.0;
1279
1280 fetch_texel(&mach->Samplers[unit],
1281 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1282 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1283 break;
1284
1285 case TGSI_TEXTURE_2D:
1286 case TGSI_TEXTURE_RECT:
1287
1288 FETCH(&r[0], 0, CHAN_X);
1289 FETCH(&r[1], 0, CHAN_Y);
1290 FETCH(&r[2], 0, CHAN_Z);
1291
1292 if (projected) {
1293 FETCH(&r[3], 0, CHAN_W);
1294 micro_div( &r[0], &r[0], &r[3] );
1295 micro_div( &r[1], &r[1], &r[3] );
1296 micro_div( &r[2], &r[2], &r[3] );
1297 }
1298
1299 if (biasLod) {
1300 FETCH(&r[3], 0, CHAN_W);
1301 lodBias = r[3].f[0];
1302 }
1303 else
1304 lodBias = 0.0;
1305
1306 fetch_texel(&mach->Samplers[unit],
1307 &r[0], &r[1], &r[2], lodBias, /* inputs */
1308 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1309 break;
1310
1311 case TGSI_TEXTURE_3D:
1312 case TGSI_TEXTURE_CUBE:
1313
1314 FETCH(&r[0], 0, CHAN_X);
1315 FETCH(&r[1], 0, CHAN_Y);
1316 FETCH(&r[2], 0, CHAN_Z);
1317
1318 if (projected) {
1319 FETCH(&r[3], 0, CHAN_W);
1320 micro_div( &r[0], &r[0], &r[3] );
1321 micro_div( &r[1], &r[1], &r[3] );
1322 micro_div( &r[2], &r[2], &r[3] );
1323 }
1324
1325 if (biasLod) {
1326 FETCH(&r[3], 0, CHAN_W);
1327 lodBias = r[3].f[0];
1328 }
1329 else
1330 lodBias = 0.0;
1331
1332 fetch_texel(&mach->Samplers[unit],
1333 &r[0], &r[1], &r[2], lodBias,
1334 &r[0], &r[1], &r[2], &r[3]);
1335 break;
1336
1337 default:
1338 assert (0);
1339 }
1340
1341 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1342 STORE( &r[chan_index], 0, chan_index );
1343 }
1344 }
1345
1346
1347 /**
1348 * Evaluate a constant-valued coefficient at the position of the
1349 * current quad.
1350 */
1351 static void
1352 eval_constant_coef(
1353 struct tgsi_exec_machine *mach,
1354 unsigned attrib,
1355 unsigned chan )
1356 {
1357 unsigned i;
1358
1359 for( i = 0; i < QUAD_SIZE; i++ ) {
1360 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1361 }
1362 }
1363
1364 /**
1365 * Evaluate a linear-valued coefficient at the position of the
1366 * current quad.
1367 */
1368 static void
1369 eval_linear_coef(
1370 struct tgsi_exec_machine *mach,
1371 unsigned attrib,
1372 unsigned chan )
1373 {
1374 const float x = mach->QuadPos.xyzw[0].f[0];
1375 const float y = mach->QuadPos.xyzw[1].f[0];
1376 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1377 const float dady = mach->InterpCoefs[attrib].dady[chan];
1378 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1379 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1380 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1381 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1382 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1383 }
1384
1385 /**
1386 * Evaluate a perspective-valued coefficient at the position of the
1387 * current quad.
1388 */
1389 static void
1390 eval_perspective_coef(
1391 struct tgsi_exec_machine *mach,
1392 unsigned attrib,
1393 unsigned chan )
1394 {
1395 const float x = mach->QuadPos.xyzw[0].f[0];
1396 const float y = mach->QuadPos.xyzw[1].f[0];
1397 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1398 const float dady = mach->InterpCoefs[attrib].dady[chan];
1399 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1400 const float *w = mach->QuadPos.xyzw[3].f;
1401 /* divide by W here */
1402 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1403 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1404 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1405 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1406 }
1407
1408
1409 typedef void (* eval_coef_func)(
1410 struct tgsi_exec_machine *mach,
1411 unsigned attrib,
1412 unsigned chan );
1413
1414 static void
1415 exec_declaration(
1416 struct tgsi_exec_machine *mach,
1417 const struct tgsi_full_declaration *decl )
1418 {
1419 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1420 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1421 unsigned first, last, mask;
1422 eval_coef_func eval;
1423
1424 first = decl->DeclarationRange.First;
1425 last = decl->DeclarationRange.Last;
1426 mask = decl->Declaration.UsageMask;
1427
1428 switch( decl->Declaration.Interpolate ) {
1429 case TGSI_INTERPOLATE_CONSTANT:
1430 eval = eval_constant_coef;
1431 break;
1432
1433 case TGSI_INTERPOLATE_LINEAR:
1434 eval = eval_linear_coef;
1435 break;
1436
1437 case TGSI_INTERPOLATE_PERSPECTIVE:
1438 eval = eval_perspective_coef;
1439 break;
1440
1441 default:
1442 assert( 0 );
1443 }
1444
1445 if( mask == TGSI_WRITEMASK_XYZW ) {
1446 unsigned i, j;
1447
1448 for( i = first; i <= last; i++ ) {
1449 for( j = 0; j < NUM_CHANNELS; j++ ) {
1450 eval( mach, i, j );
1451 }
1452 }
1453 }
1454 else {
1455 unsigned i, j;
1456
1457 for( j = 0; j < NUM_CHANNELS; j++ ) {
1458 if( mask & (1 << j) ) {
1459 for( i = first; i <= last; i++ ) {
1460 eval( mach, i, j );
1461 }
1462 }
1463 }
1464 }
1465 }
1466 }
1467 }
1468
1469 static void
1470 exec_instruction(
1471 struct tgsi_exec_machine *mach,
1472 const struct tgsi_full_instruction *inst,
1473 int *pc )
1474 {
1475 uint chan_index;
1476 union tgsi_exec_channel r[8];
1477
1478 (*pc)++;
1479
1480 switch (inst->Instruction.Opcode) {
1481 case TGSI_OPCODE_ARL:
1482 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1483 FETCH( &r[0], 0, chan_index );
1484 micro_f2it( &r[0], &r[0] );
1485 STORE( &r[0], 0, chan_index );
1486 }
1487 break;
1488
1489 case TGSI_OPCODE_MOV:
1490 case TGSI_OPCODE_SWZ:
1491 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1492 FETCH( &r[0], 0, chan_index );
1493 STORE( &r[0], 0, chan_index );
1494 }
1495 break;
1496
1497 case TGSI_OPCODE_LIT:
1498 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1499 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1500 }
1501
1502 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1503 FETCH( &r[0], 0, CHAN_X );
1504 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1505 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1506 STORE( &r[0], 0, CHAN_Y );
1507 }
1508
1509 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1510 FETCH( &r[1], 0, CHAN_Y );
1511 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1512
1513 FETCH( &r[2], 0, CHAN_W );
1514 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1515 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1516 micro_pow( &r[1], &r[1], &r[2] );
1517 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1518 STORE( &r[0], 0, CHAN_Z );
1519 }
1520 }
1521
1522 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1523 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1524 }
1525 break;
1526
1527 case TGSI_OPCODE_RCP:
1528 /* TGSI_OPCODE_RECIP */
1529 FETCH( &r[0], 0, CHAN_X );
1530 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1531 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1532 STORE( &r[0], 0, chan_index );
1533 }
1534 break;
1535
1536 case TGSI_OPCODE_RSQ:
1537 /* TGSI_OPCODE_RECIPSQRT */
1538 FETCH( &r[0], 0, CHAN_X );
1539 micro_sqrt( &r[0], &r[0] );
1540 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1541 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1542 STORE( &r[0], 0, chan_index );
1543 }
1544 break;
1545
1546 case TGSI_OPCODE_EXP:
1547 FETCH( &r[0], 0, CHAN_X );
1548 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1549 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1550 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1551 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1552 }
1553 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1554 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1555 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1556 }
1557 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1558 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1559 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1560 }
1561 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1562 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1563 }
1564 break;
1565
1566 case TGSI_OPCODE_LOG:
1567 FETCH( &r[0], 0, CHAN_X );
1568 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1569 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1570 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1571 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1572 STORE( &r[0], 0, CHAN_X );
1573 }
1574 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1575 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1576 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1577 STORE( &r[0], 0, CHAN_Y );
1578 }
1579 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1580 STORE( &r[1], 0, CHAN_Z );
1581 }
1582 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1583 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1584 }
1585 break;
1586
1587 case TGSI_OPCODE_MUL:
1588 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1589 {
1590 FETCH(&r[0], 0, chan_index);
1591 FETCH(&r[1], 1, chan_index);
1592
1593 micro_mul( &r[0], &r[0], &r[1] );
1594
1595 STORE(&r[0], 0, chan_index);
1596 }
1597 break;
1598
1599 case TGSI_OPCODE_ADD:
1600 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1601 FETCH( &r[0], 0, chan_index );
1602 FETCH( &r[1], 1, chan_index );
1603 micro_add( &r[0], &r[0], &r[1] );
1604 STORE( &r[0], 0, chan_index );
1605 }
1606 break;
1607
1608 case TGSI_OPCODE_DP3:
1609 /* TGSI_OPCODE_DOT3 */
1610 FETCH( &r[0], 0, CHAN_X );
1611 FETCH( &r[1], 1, CHAN_X );
1612 micro_mul( &r[0], &r[0], &r[1] );
1613
1614 FETCH( &r[1], 0, CHAN_Y );
1615 FETCH( &r[2], 1, CHAN_Y );
1616 micro_mul( &r[1], &r[1], &r[2] );
1617 micro_add( &r[0], &r[0], &r[1] );
1618
1619 FETCH( &r[1], 0, CHAN_Z );
1620 FETCH( &r[2], 1, CHAN_Z );
1621 micro_mul( &r[1], &r[1], &r[2] );
1622 micro_add( &r[0], &r[0], &r[1] );
1623
1624 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1625 STORE( &r[0], 0, chan_index );
1626 }
1627 break;
1628
1629 case TGSI_OPCODE_DP4:
1630 /* TGSI_OPCODE_DOT4 */
1631 FETCH(&r[0], 0, CHAN_X);
1632 FETCH(&r[1], 1, CHAN_X);
1633
1634 micro_mul( &r[0], &r[0], &r[1] );
1635
1636 FETCH(&r[1], 0, CHAN_Y);
1637 FETCH(&r[2], 1, CHAN_Y);
1638
1639 micro_mul( &r[1], &r[1], &r[2] );
1640 micro_add( &r[0], &r[0], &r[1] );
1641
1642 FETCH(&r[1], 0, CHAN_Z);
1643 FETCH(&r[2], 1, CHAN_Z);
1644
1645 micro_mul( &r[1], &r[1], &r[2] );
1646 micro_add( &r[0], &r[0], &r[1] );
1647
1648 FETCH(&r[1], 0, CHAN_W);
1649 FETCH(&r[2], 1, CHAN_W);
1650
1651 micro_mul( &r[1], &r[1], &r[2] );
1652 micro_add( &r[0], &r[0], &r[1] );
1653
1654 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1655 STORE( &r[0], 0, chan_index );
1656 }
1657 break;
1658
1659 case TGSI_OPCODE_DST:
1660 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1661 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1662 }
1663
1664 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1665 FETCH( &r[0], 0, CHAN_Y );
1666 FETCH( &r[1], 1, CHAN_Y);
1667 micro_mul( &r[0], &r[0], &r[1] );
1668 STORE( &r[0], 0, CHAN_Y );
1669 }
1670
1671 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1672 FETCH( &r[0], 0, CHAN_Z );
1673 STORE( &r[0], 0, CHAN_Z );
1674 }
1675
1676 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1677 FETCH( &r[0], 1, CHAN_W );
1678 STORE( &r[0], 0, CHAN_W );
1679 }
1680 break;
1681
1682 case TGSI_OPCODE_MIN:
1683 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1684 FETCH(&r[0], 0, chan_index);
1685 FETCH(&r[1], 1, chan_index);
1686
1687 /* XXX use micro_min()?? */
1688 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1689
1690 STORE(&r[0], 0, chan_index);
1691 }
1692 break;
1693
1694 case TGSI_OPCODE_MAX:
1695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1696 FETCH(&r[0], 0, chan_index);
1697 FETCH(&r[1], 1, chan_index);
1698
1699 /* XXX use micro_max()?? */
1700 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1701
1702 STORE(&r[0], 0, chan_index );
1703 }
1704 break;
1705
1706 case TGSI_OPCODE_SLT:
1707 /* TGSI_OPCODE_SETLT */
1708 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1709 FETCH( &r[0], 0, chan_index );
1710 FETCH( &r[1], 1, chan_index );
1711 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1712 STORE( &r[0], 0, chan_index );
1713 }
1714 break;
1715
1716 case TGSI_OPCODE_SGE:
1717 /* TGSI_OPCODE_SETGE */
1718 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1719 FETCH( &r[0], 0, chan_index );
1720 FETCH( &r[1], 1, chan_index );
1721 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1722 STORE( &r[0], 0, chan_index );
1723 }
1724 break;
1725
1726 case TGSI_OPCODE_MAD:
1727 /* TGSI_OPCODE_MADD */
1728 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1729 FETCH( &r[0], 0, chan_index );
1730 FETCH( &r[1], 1, chan_index );
1731 micro_mul( &r[0], &r[0], &r[1] );
1732 FETCH( &r[1], 2, chan_index );
1733 micro_add( &r[0], &r[0], &r[1] );
1734 STORE( &r[0], 0, chan_index );
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_SUB:
1739 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1740 FETCH(&r[0], 0, chan_index);
1741 FETCH(&r[1], 1, chan_index);
1742
1743 micro_sub( &r[0], &r[0], &r[1] );
1744
1745 STORE(&r[0], 0, chan_index);
1746 }
1747 break;
1748
1749 case TGSI_OPCODE_LERP:
1750 /* TGSI_OPCODE_LRP */
1751 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1752 FETCH(&r[0], 0, chan_index);
1753 FETCH(&r[1], 1, chan_index);
1754 FETCH(&r[2], 2, chan_index);
1755
1756 micro_sub( &r[1], &r[1], &r[2] );
1757 micro_mul( &r[0], &r[0], &r[1] );
1758 micro_add( &r[0], &r[0], &r[2] );
1759
1760 STORE(&r[0], 0, chan_index);
1761 }
1762 break;
1763
1764 case TGSI_OPCODE_CND:
1765 assert (0);
1766 break;
1767
1768 case TGSI_OPCODE_CND0:
1769 assert (0);
1770 break;
1771
1772 case TGSI_OPCODE_DOT2ADD:
1773 /* TGSI_OPCODE_DP2A */
1774 assert (0);
1775 break;
1776
1777 case TGSI_OPCODE_INDEX:
1778 assert (0);
1779 break;
1780
1781 case TGSI_OPCODE_NEGATE:
1782 assert (0);
1783 break;
1784
1785 case TGSI_OPCODE_FRAC:
1786 /* TGSI_OPCODE_FRC */
1787 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1788 FETCH( &r[0], 0, chan_index );
1789 micro_frc( &r[0], &r[0] );
1790 STORE( &r[0], 0, chan_index );
1791 }
1792 break;
1793
1794 case TGSI_OPCODE_CLAMP:
1795 assert (0);
1796 break;
1797
1798 case TGSI_OPCODE_FLOOR:
1799 /* TGSI_OPCODE_FLR */
1800 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1801 FETCH( &r[0], 0, chan_index );
1802 micro_flr( &r[0], &r[0] );
1803 STORE( &r[0], 0, chan_index );
1804 }
1805 break;
1806
1807 case TGSI_OPCODE_ROUND:
1808 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1809 FETCH( &r[0], 0, chan_index );
1810 micro_rnd( &r[0], &r[0] );
1811 STORE( &r[0], 0, chan_index );
1812 }
1813 break;
1814
1815 case TGSI_OPCODE_EXPBASE2:
1816 /* TGSI_OPCODE_EX2 */
1817 FETCH(&r[0], 0, CHAN_X);
1818
1819 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1820
1821 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1822 STORE( &r[0], 0, chan_index );
1823 }
1824 break;
1825
1826 case TGSI_OPCODE_LOGBASE2:
1827 /* TGSI_OPCODE_LG2 */
1828 FETCH( &r[0], 0, CHAN_X );
1829 micro_lg2( &r[0], &r[0] );
1830 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1831 STORE( &r[0], 0, chan_index );
1832 }
1833 break;
1834
1835 case TGSI_OPCODE_POWER:
1836 /* TGSI_OPCODE_POW */
1837 FETCH(&r[0], 0, CHAN_X);
1838 FETCH(&r[1], 1, CHAN_X);
1839
1840 micro_pow( &r[0], &r[0], &r[1] );
1841
1842 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1843 STORE( &r[0], 0, chan_index );
1844 }
1845 break;
1846
1847 case TGSI_OPCODE_CROSSPRODUCT:
1848 /* TGSI_OPCODE_XPD */
1849 FETCH(&r[0], 0, CHAN_Y);
1850 FETCH(&r[1], 1, CHAN_Z);
1851
1852 micro_mul( &r[2], &r[0], &r[1] );
1853
1854 FETCH(&r[3], 0, CHAN_Z);
1855 FETCH(&r[4], 1, CHAN_Y);
1856
1857 micro_mul( &r[5], &r[3], &r[4] );
1858 micro_sub( &r[2], &r[2], &r[5] );
1859
1860 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1861 STORE( &r[2], 0, CHAN_X );
1862 }
1863
1864 FETCH(&r[2], 1, CHAN_X);
1865
1866 micro_mul( &r[3], &r[3], &r[2] );
1867
1868 FETCH(&r[5], 0, CHAN_X);
1869
1870 micro_mul( &r[1], &r[1], &r[5] );
1871 micro_sub( &r[3], &r[3], &r[1] );
1872
1873 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1874 STORE( &r[3], 0, CHAN_Y );
1875 }
1876
1877 micro_mul( &r[5], &r[5], &r[4] );
1878 micro_mul( &r[0], &r[0], &r[2] );
1879 micro_sub( &r[5], &r[5], &r[0] );
1880
1881 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1882 STORE( &r[5], 0, CHAN_Z );
1883 }
1884
1885 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1886 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1887 }
1888 break;
1889
1890 case TGSI_OPCODE_MULTIPLYMATRIX:
1891 assert (0);
1892 break;
1893
1894 case TGSI_OPCODE_ABS:
1895 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1896 FETCH(&r[0], 0, chan_index);
1897
1898 micro_abs( &r[0], &r[0] );
1899
1900 STORE(&r[0], 0, chan_index);
1901 }
1902 break;
1903
1904 case TGSI_OPCODE_RCC:
1905 assert (0);
1906 break;
1907
1908 case TGSI_OPCODE_DPH:
1909 FETCH(&r[0], 0, CHAN_X);
1910 FETCH(&r[1], 1, CHAN_X);
1911
1912 micro_mul( &r[0], &r[0], &r[1] );
1913
1914 FETCH(&r[1], 0, CHAN_Y);
1915 FETCH(&r[2], 1, CHAN_Y);
1916
1917 micro_mul( &r[1], &r[1], &r[2] );
1918 micro_add( &r[0], &r[0], &r[1] );
1919
1920 FETCH(&r[1], 0, CHAN_Z);
1921 FETCH(&r[2], 1, CHAN_Z);
1922
1923 micro_mul( &r[1], &r[1], &r[2] );
1924 micro_add( &r[0], &r[0], &r[1] );
1925
1926 FETCH(&r[1], 1, CHAN_W);
1927
1928 micro_add( &r[0], &r[0], &r[1] );
1929
1930 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1931 STORE( &r[0], 0, chan_index );
1932 }
1933 break;
1934
1935 case TGSI_OPCODE_COS:
1936 FETCH(&r[0], 0, CHAN_X);
1937
1938 micro_cos( &r[0], &r[0] );
1939
1940 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1941 STORE( &r[0], 0, chan_index );
1942 }
1943 break;
1944
1945 case TGSI_OPCODE_DDX:
1946 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1947 FETCH( &r[0], 0, chan_index );
1948 micro_ddx( &r[0], &r[0] );
1949 STORE( &r[0], 0, chan_index );
1950 }
1951 break;
1952
1953 case TGSI_OPCODE_DDY:
1954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1955 FETCH( &r[0], 0, chan_index );
1956 micro_ddy( &r[0], &r[0] );
1957 STORE( &r[0], 0, chan_index );
1958 }
1959 break;
1960
1961 case TGSI_OPCODE_KILP:
1962 exec_kilp (mach, inst);
1963 break;
1964
1965 case TGSI_OPCODE_KIL:
1966 /* for enabled ExecMask bits, set the killed bit */
1967 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1968 break;
1969
1970 case TGSI_OPCODE_PK2H:
1971 assert (0);
1972 break;
1973
1974 case TGSI_OPCODE_PK2US:
1975 assert (0);
1976 break;
1977
1978 case TGSI_OPCODE_PK4B:
1979 assert (0);
1980 break;
1981
1982 case TGSI_OPCODE_PK4UB:
1983 assert (0);
1984 break;
1985
1986 case TGSI_OPCODE_RFL:
1987 assert (0);
1988 break;
1989
1990 case TGSI_OPCODE_SEQ:
1991 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1992 FETCH( &r[0], 0, chan_index );
1993 FETCH( &r[1], 1, chan_index );
1994 micro_eq( &r[0], &r[0], &r[1],
1995 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
1996 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1997 STORE( &r[0], 0, chan_index );
1998 }
1999 break;
2000
2001 case TGSI_OPCODE_SFL:
2002 assert (0);
2003 break;
2004
2005 case TGSI_OPCODE_SGT:
2006 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2007 FETCH( &r[0], 0, chan_index );
2008 FETCH( &r[1], 1, chan_index );
2009 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2010 STORE( &r[0], 0, chan_index );
2011 }
2012 break;
2013
2014 case TGSI_OPCODE_SIN:
2015 FETCH( &r[0], 0, CHAN_X );
2016 micro_sin( &r[0], &r[0] );
2017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2018 STORE( &r[0], 0, chan_index );
2019 }
2020 break;
2021
2022 case TGSI_OPCODE_SLE:
2023 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2024 FETCH( &r[0], 0, chan_index );
2025 FETCH( &r[1], 1, chan_index );
2026 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2027 STORE( &r[0], 0, chan_index );
2028 }
2029 break;
2030
2031 case TGSI_OPCODE_SNE:
2032 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2033 FETCH( &r[0], 0, chan_index );
2034 FETCH( &r[1], 1, chan_index );
2035 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2036 STORE( &r[0], 0, chan_index );
2037 }
2038 break;
2039
2040 case TGSI_OPCODE_STR:
2041 assert (0);
2042 break;
2043
2044 case TGSI_OPCODE_TEX:
2045 /* simple texture lookup */
2046 /* src[0] = texcoord */
2047 /* src[1] = sampler unit */
2048 exec_tex(mach, inst, FALSE, FALSE);
2049 break;
2050
2051 case TGSI_OPCODE_TXB:
2052 /* Texture lookup with lod bias */
2053 /* src[0] = texcoord (src[0].w = LOD bias) */
2054 /* src[1] = sampler unit */
2055 exec_tex(mach, inst, TRUE, FALSE);
2056 break;
2057
2058 case TGSI_OPCODE_TXD:
2059 /* Texture lookup with explict partial derivatives */
2060 /* src[0] = texcoord */
2061 /* src[1] = d[strq]/dx */
2062 /* src[2] = d[strq]/dy */
2063 /* src[3] = sampler unit */
2064 assert (0);
2065 break;
2066
2067 case TGSI_OPCODE_TXL:
2068 /* Texture lookup with explit LOD */
2069 /* src[0] = texcoord (src[0].w = LOD) */
2070 /* src[1] = sampler unit */
2071 exec_tex(mach, inst, TRUE, FALSE);
2072 break;
2073
2074 case TGSI_OPCODE_TXP:
2075 /* Texture lookup with projection */
2076 /* src[0] = texcoord (src[0].w = projection) */
2077 /* src[1] = sampler unit */
2078 exec_tex(mach, inst, FALSE, TRUE);
2079 break;
2080
2081 case TGSI_OPCODE_UP2H:
2082 assert (0);
2083 break;
2084
2085 case TGSI_OPCODE_UP2US:
2086 assert (0);
2087 break;
2088
2089 case TGSI_OPCODE_UP4B:
2090 assert (0);
2091 break;
2092
2093 case TGSI_OPCODE_UP4UB:
2094 assert (0);
2095 break;
2096
2097 case TGSI_OPCODE_X2D:
2098 assert (0);
2099 break;
2100
2101 case TGSI_OPCODE_ARA:
2102 assert (0);
2103 break;
2104
2105 case TGSI_OPCODE_ARR:
2106 assert (0);
2107 break;
2108
2109 case TGSI_OPCODE_BRA:
2110 assert (0);
2111 break;
2112
2113 case TGSI_OPCODE_CAL:
2114 /* skip the call if no execution channels are enabled */
2115 if (mach->ExecMask) {
2116 /* do the call */
2117
2118 /* push the Cond, Loop, Cont stacks */
2119 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2120 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2121 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2122 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2123 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2124 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2125
2126 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2127 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2128
2129 /* note that PC was already incremented above */
2130 mach->CallStack[mach->CallStackTop++] = *pc;
2131 *pc = inst->InstructionExtLabel.Label;
2132 }
2133 break;
2134
2135 case TGSI_OPCODE_RET:
2136 mach->FuncMask &= ~mach->ExecMask;
2137 UPDATE_EXEC_MASK(mach);
2138
2139 if (mach->ExecMask == 0x0) {
2140 /* really return now (otherwise, keep executing */
2141
2142 if (mach->CallStackTop == 0) {
2143 /* returning from main() */
2144 *pc = -1;
2145 return;
2146 }
2147 *pc = mach->CallStack[--mach->CallStackTop];
2148
2149 /* pop the Cond, Loop, Cont stacks */
2150 assert(mach->CondStackTop > 0);
2151 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2152 assert(mach->LoopStackTop > 0);
2153 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2154 assert(mach->ContStackTop > 0);
2155 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2156 assert(mach->FuncStackTop > 0);
2157 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2158
2159 UPDATE_EXEC_MASK(mach);
2160 }
2161 break;
2162
2163 case TGSI_OPCODE_SSG:
2164 assert (0);
2165 break;
2166
2167 case TGSI_OPCODE_CMP:
2168 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2169 FETCH(&r[0], 0, chan_index);
2170 FETCH(&r[1], 1, chan_index);
2171 FETCH(&r[2], 2, chan_index);
2172
2173 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2174
2175 STORE(&r[0], 0, chan_index);
2176 }
2177 break;
2178
2179 case TGSI_OPCODE_SCS:
2180 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2181 FETCH( &r[0], 0, CHAN_X );
2182 }
2183 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2184 micro_cos( &r[1], &r[0] );
2185 STORE( &r[1], 0, CHAN_X );
2186 }
2187 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2188 micro_sin( &r[1], &r[0] );
2189 STORE( &r[1], 0, CHAN_Y );
2190 }
2191 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2192 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2193 }
2194 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2195 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2196 }
2197 break;
2198
2199 case TGSI_OPCODE_NRM:
2200 assert (0);
2201 break;
2202
2203 case TGSI_OPCODE_DIV:
2204 assert( 0 );
2205 break;
2206
2207 case TGSI_OPCODE_DP2:
2208 FETCH( &r[0], 0, CHAN_X );
2209 FETCH( &r[1], 1, CHAN_X );
2210 micro_mul( &r[0], &r[0], &r[1] );
2211
2212 FETCH( &r[1], 0, CHAN_Y );
2213 FETCH( &r[2], 1, CHAN_Y );
2214 micro_mul( &r[1], &r[1], &r[2] );
2215 micro_add( &r[0], &r[0], &r[1] );
2216
2217 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2218 STORE( &r[0], 0, chan_index );
2219 }
2220 break;
2221
2222 case TGSI_OPCODE_IF:
2223 /* push CondMask */
2224 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2225 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2226 FETCH( &r[0], 0, CHAN_X );
2227 /* update CondMask */
2228 if( ! r[0].u[0] ) {
2229 mach->CondMask &= ~0x1;
2230 }
2231 if( ! r[0].u[1] ) {
2232 mach->CondMask &= ~0x2;
2233 }
2234 if( ! r[0].u[2] ) {
2235 mach->CondMask &= ~0x4;
2236 }
2237 if( ! r[0].u[3] ) {
2238 mach->CondMask &= ~0x8;
2239 }
2240 UPDATE_EXEC_MASK(mach);
2241 /* Todo: If CondMask==0, jump to ELSE */
2242 break;
2243
2244 case TGSI_OPCODE_ELSE:
2245 /* invert CondMask wrt previous mask */
2246 {
2247 uint prevMask;
2248 assert(mach->CondStackTop > 0);
2249 prevMask = mach->CondStack[mach->CondStackTop - 1];
2250 mach->CondMask = ~mach->CondMask & prevMask;
2251 UPDATE_EXEC_MASK(mach);
2252 /* Todo: If CondMask==0, jump to ENDIF */
2253 }
2254 break;
2255
2256 case TGSI_OPCODE_ENDIF:
2257 /* pop CondMask */
2258 assert(mach->CondStackTop > 0);
2259 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2260 UPDATE_EXEC_MASK(mach);
2261 break;
2262
2263 case TGSI_OPCODE_END:
2264 /* halt execution */
2265 *pc = -1;
2266 break;
2267
2268 case TGSI_OPCODE_REP:
2269 assert (0);
2270 break;
2271
2272 case TGSI_OPCODE_ENDREP:
2273 assert (0);
2274 break;
2275
2276 case TGSI_OPCODE_PUSHA:
2277 assert (0);
2278 break;
2279
2280 case TGSI_OPCODE_POPA:
2281 assert (0);
2282 break;
2283
2284 case TGSI_OPCODE_CEIL:
2285 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2286 FETCH( &r[0], 0, chan_index );
2287 micro_ceil( &r[0], &r[0] );
2288 STORE( &r[0], 0, chan_index );
2289 }
2290 break;
2291
2292 case TGSI_OPCODE_I2F:
2293 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2294 FETCH( &r[0], 0, chan_index );
2295 micro_i2f( &r[0], &r[0] );
2296 STORE( &r[0], 0, chan_index );
2297 }
2298 break;
2299
2300 case TGSI_OPCODE_NOT:
2301 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2302 FETCH( &r[0], 0, chan_index );
2303 micro_not( &r[0], &r[0] );
2304 STORE( &r[0], 0, chan_index );
2305 }
2306 break;
2307
2308 case TGSI_OPCODE_TRUNC:
2309 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2310 FETCH( &r[0], 0, chan_index );
2311 micro_trunc( &r[0], &r[0] );
2312 STORE( &r[0], 0, chan_index );
2313 }
2314 break;
2315
2316 case TGSI_OPCODE_SHL:
2317 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2318 FETCH( &r[0], 0, chan_index );
2319 FETCH( &r[1], 1, chan_index );
2320 micro_shl( &r[0], &r[0], &r[1] );
2321 STORE( &r[0], 0, chan_index );
2322 }
2323 break;
2324
2325 case TGSI_OPCODE_SHR:
2326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2327 FETCH( &r[0], 0, chan_index );
2328 FETCH( &r[1], 1, chan_index );
2329 micro_ishr( &r[0], &r[0], &r[1] );
2330 STORE( &r[0], 0, chan_index );
2331 }
2332 break;
2333
2334 case TGSI_OPCODE_AND:
2335 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2336 FETCH( &r[0], 0, chan_index );
2337 FETCH( &r[1], 1, chan_index );
2338 micro_and( &r[0], &r[0], &r[1] );
2339 STORE( &r[0], 0, chan_index );
2340 }
2341 break;
2342
2343 case TGSI_OPCODE_OR:
2344 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2345 FETCH( &r[0], 0, chan_index );
2346 FETCH( &r[1], 1, chan_index );
2347 micro_or( &r[0], &r[0], &r[1] );
2348 STORE( &r[0], 0, chan_index );
2349 }
2350 break;
2351
2352 case TGSI_OPCODE_MOD:
2353 assert (0);
2354 break;
2355
2356 case TGSI_OPCODE_XOR:
2357 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2358 FETCH( &r[0], 0, chan_index );
2359 FETCH( &r[1], 1, chan_index );
2360 micro_xor( &r[0], &r[0], &r[1] );
2361 STORE( &r[0], 0, chan_index );
2362 }
2363 break;
2364
2365 case TGSI_OPCODE_SAD:
2366 assert (0);
2367 break;
2368
2369 case TGSI_OPCODE_TXF:
2370 assert (0);
2371 break;
2372
2373 case TGSI_OPCODE_TXQ:
2374 assert (0);
2375 break;
2376
2377 case TGSI_OPCODE_EMIT:
2378 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2379 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2380 break;
2381
2382 case TGSI_OPCODE_ENDPRIM:
2383 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2384 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2385 break;
2386
2387 case TGSI_OPCODE_LOOP:
2388 /* fall-through (for now) */
2389 case TGSI_OPCODE_BGNLOOP2:
2390 /* push LoopMask and ContMasks */
2391 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2392 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2393 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2394 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2395 break;
2396
2397 case TGSI_OPCODE_ENDLOOP:
2398 /* fall-through (for now at least) */
2399 case TGSI_OPCODE_ENDLOOP2:
2400 /* Restore ContMask, but don't pop */
2401 assert(mach->ContStackTop > 0);
2402 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2403 UPDATE_EXEC_MASK(mach);
2404 if (mach->ExecMask) {
2405 /* repeat loop: jump to instruction just past BGNLOOP */
2406 *pc = inst->InstructionExtLabel.Label + 1;
2407 }
2408 else {
2409 /* exit loop: pop LoopMask */
2410 assert(mach->LoopStackTop > 0);
2411 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2412 /* pop ContMask */
2413 assert(mach->ContStackTop > 0);
2414 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2415 }
2416 UPDATE_EXEC_MASK(mach);
2417 break;
2418
2419 case TGSI_OPCODE_BRK:
2420 /* turn off loop channels for each enabled exec channel */
2421 mach->LoopMask &= ~mach->ExecMask;
2422 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2423 UPDATE_EXEC_MASK(mach);
2424 break;
2425
2426 case TGSI_OPCODE_CONT:
2427 /* turn off cont channels for each enabled exec channel */
2428 mach->ContMask &= ~mach->ExecMask;
2429 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2430 UPDATE_EXEC_MASK(mach);
2431 break;
2432
2433 case TGSI_OPCODE_BGNSUB:
2434 /* no-op */
2435 break;
2436
2437 case TGSI_OPCODE_ENDSUB:
2438 /* no-op */
2439 break;
2440
2441 case TGSI_OPCODE_NOISE1:
2442 assert( 0 );
2443 break;
2444
2445 case TGSI_OPCODE_NOISE2:
2446 assert( 0 );
2447 break;
2448
2449 case TGSI_OPCODE_NOISE3:
2450 assert( 0 );
2451 break;
2452
2453 case TGSI_OPCODE_NOISE4:
2454 assert( 0 );
2455 break;
2456
2457 case TGSI_OPCODE_NOP:
2458 break;
2459
2460 default:
2461 assert( 0 );
2462 }
2463 }
2464
2465
2466 /**
2467 * Run TGSI interpreter.
2468 * \return bitmask of "alive" quad components
2469 */
2470 uint
2471 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2472 {
2473 uint i;
2474 int pc = 0;
2475
2476 mach->CondMask = 0xf;
2477 mach->LoopMask = 0xf;
2478 mach->ContMask = 0xf;
2479 mach->FuncMask = 0xf;
2480 mach->ExecMask = 0xf;
2481
2482 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2483 assert(mach->CondStackTop == 0);
2484 assert(mach->LoopStackTop == 0);
2485 assert(mach->ContStackTop == 0);
2486 assert(mach->CallStackTop == 0);
2487
2488 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2489 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2490
2491 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2492 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2493 mach->Primitives[0] = 0;
2494 }
2495
2496
2497 /* execute declarations (interpolants) */
2498 for (i = 0; i < mach->NumDeclarations; i++) {
2499 exec_declaration( mach, mach->Declarations+i );
2500 }
2501
2502 /* execute instructions, until pc is set to -1 */
2503 while (pc != -1) {
2504 assert(pc < (int) mach->NumInstructions);
2505 exec_instruction( mach, mach->Instructions + pc, &pc );
2506 }
2507
2508 #if 0
2509 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2510 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2511 /*
2512 * Scale back depth component.
2513 */
2514 for (i = 0; i < 4; i++)
2515 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2516 }
2517 #endif
2518
2519 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2520 }
2521
2522