336ae1c8b6d73abb5b7bc7f4470cfd7ba00a6f5f
[mesa.git] / src / mesa / pipe / tgsi / exec / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "pipe/tgsi/util/tgsi_parse.h"
58 #include "pipe/tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 /*
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
68 */
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
92
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
95
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
98
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
101
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
105
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
109
110
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
114
115
116 #define CHAN_X 0
117 #define CHAN_Y 1
118 #define CHAN_Z 2
119 #define CHAN_W 3
120
121
122
123 static void
124 tgsi_exec_prepare( struct tgsi_exec_machine *mach )
125 {
126 struct tgsi_exec_labels *labels = &mach->Labels;
127 struct tgsi_parse_context parse;
128 struct tgsi_full_instruction *instructions;
129 struct tgsi_full_declaration *declarations;
130 uint maxInstructions = 10, numInstructions = 0;
131 uint maxDeclarations = 10, numDeclarations = 0;
132 uint k;
133 uint instno = 0;
134
135 mach->ImmLimit = 0;
136 labels->count = 0;
137
138 declarations = (struct tgsi_full_declaration *)
139 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
140
141 instructions = (struct tgsi_full_instruction *)
142 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
143
144 k = tgsi_parse_init( &parse, mach->Tokens );
145 if (k != TGSI_PARSE_OK) {
146 debug_printf("Problem parsing!\n");
147 return;
148 }
149
150 while( !tgsi_parse_end_of_tokens( &parse ) ) {
151 uint pointer = parse.Position;
152 uint i;
153
154 tgsi_parse_token( &parse );
155 switch( parse.FullToken.Token.Type ) {
156 case TGSI_TOKEN_TYPE_DECLARATION:
157 /* save expanded declaration */
158 if (numDeclarations == maxDeclarations) {
159 declarations = REALLOC(declarations,
160 maxDeclarations
161 * sizeof(struct tgsi_full_instruction),
162 (maxDeclarations + 10)
163 * sizeof(struct tgsi_full_instruction));
164 maxDeclarations += 10;
165 }
166 memcpy(declarations + numDeclarations,
167 &parse.FullToken.FullInstruction,
168 sizeof(declarations[0]));
169 numDeclarations++;
170 break;
171
172 case TGSI_TOKEN_TYPE_IMMEDIATE:
173 {
174 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
175 assert( size % 4 == 0 );
176 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
177
178 for( i = 0; i < size; i++ ) {
179 mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
180 }
181 mach->ImmLimit += size / 4;
182 }
183 break;
184
185 case TGSI_TOKEN_TYPE_INSTRUCTION:
186 assert( labels->count < 128 );
187
188 labels->labels[labels->count][0] = instno;
189 labels->labels[labels->count][1] = pointer;
190 labels->count++;
191
192 /* save expanded instruction */
193 if (numInstructions == maxInstructions) {
194 instructions = REALLOC(instructions,
195 maxInstructions
196 * sizeof(struct tgsi_full_instruction),
197 (maxInstructions + 10)
198 * sizeof(struct tgsi_full_instruction));
199 maxInstructions += 10;
200 }
201 memcpy(instructions + numInstructions,
202 &parse.FullToken.FullInstruction,
203 sizeof(instructions[0]));
204 numInstructions++;
205 break;
206
207 default:
208 assert( 0 );
209 }
210 }
211 tgsi_parse_free (&parse);
212
213 if (mach->Declarations) {
214 FREE( mach->Declarations );
215 }
216 mach->Declarations = declarations;
217 mach->NumDeclarations = numDeclarations;
218
219 if (mach->Instructions) {
220 FREE( mach->Instructions );
221 }
222 mach->Instructions = instructions;
223 mach->NumInstructions = numInstructions;
224 }
225
226
227 /**
228 * Initialize machine state by expanding tokens to full instructions,
229 * allocating temporary storage, setting up constants, etc.
230 * After this, we can call tgsi_exec_machine_run() many times.
231 */
232 void
233 tgsi_exec_machine_init(
234 struct tgsi_exec_machine *mach,
235 const struct tgsi_token *tokens,
236 uint numSamplers,
237 struct tgsi_sampler *samplers)
238 {
239 uint i, k;
240 struct tgsi_parse_context parse;
241
242 #if 0
243 tgsi_dump(tokens, 0);
244 #endif
245
246 mach->Tokens = tokens;
247
248 mach->Samplers = samplers;
249
250 k = tgsi_parse_init (&parse, mach->Tokens);
251 if (k != TGSI_PARSE_OK) {
252 debug_printf( "Problem parsing!\n" );
253 return;
254 }
255
256 mach->Processor = parse.FullHeader.Processor.Processor;
257 tgsi_parse_free (&parse);
258
259 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
260 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
261
262 /* Setup constants. */
263 for( i = 0; i < 4; i++ ) {
264 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
265 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
266 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
267 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
268 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
269 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
270 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
271 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
272 }
273
274 tgsi_exec_prepare( mach );
275 }
276
277
278 void
279 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
280 {
281 if (mach->Instructions) {
282 FREE(mach->Instructions);
283 mach->Instructions = NULL;
284 mach->NumInstructions = 0;
285 }
286 if (mach->Declarations) {
287 FREE(mach->Declarations);
288 mach->Declarations = NULL;
289 mach->NumDeclarations = 0;
290 }
291 }
292
293
294 static void
295 micro_abs(
296 union tgsi_exec_channel *dst,
297 const union tgsi_exec_channel *src )
298 {
299 dst->f[0] = (float) fabs( (double) src->f[0] );
300 dst->f[1] = (float) fabs( (double) src->f[1] );
301 dst->f[2] = (float) fabs( (double) src->f[2] );
302 dst->f[3] = (float) fabs( (double) src->f[3] );
303 }
304
305 static void
306 micro_add(
307 union tgsi_exec_channel *dst,
308 const union tgsi_exec_channel *src0,
309 const union tgsi_exec_channel *src1 )
310 {
311 dst->f[0] = src0->f[0] + src1->f[0];
312 dst->f[1] = src0->f[1] + src1->f[1];
313 dst->f[2] = src0->f[2] + src1->f[2];
314 dst->f[3] = src0->f[3] + src1->f[3];
315 }
316
317 static void
318 micro_iadd(
319 union tgsi_exec_channel *dst,
320 const union tgsi_exec_channel *src0,
321 const union tgsi_exec_channel *src1 )
322 {
323 dst->i[0] = src0->i[0] + src1->i[0];
324 dst->i[1] = src0->i[1] + src1->i[1];
325 dst->i[2] = src0->i[2] + src1->i[2];
326 dst->i[3] = src0->i[3] + src1->i[3];
327 }
328
329 static void
330 micro_and(
331 union tgsi_exec_channel *dst,
332 const union tgsi_exec_channel *src0,
333 const union tgsi_exec_channel *src1 )
334 {
335 dst->u[0] = src0->u[0] & src1->u[0];
336 dst->u[1] = src0->u[1] & src1->u[1];
337 dst->u[2] = src0->u[2] & src1->u[2];
338 dst->u[3] = src0->u[3] & src1->u[3];
339 }
340
341 static void
342 micro_ceil(
343 union tgsi_exec_channel *dst,
344 const union tgsi_exec_channel *src )
345 {
346 dst->f[0] = (float) ceil( (double) src->f[0] );
347 dst->f[1] = (float) ceil( (double) src->f[1] );
348 dst->f[2] = (float) ceil( (double) src->f[2] );
349 dst->f[3] = (float) ceil( (double) src->f[3] );
350 }
351
352 static void
353 micro_cos(
354 union tgsi_exec_channel *dst,
355 const union tgsi_exec_channel *src )
356 {
357 dst->f[0] = (float) cos( (double) src->f[0] );
358 dst->f[1] = (float) cos( (double) src->f[1] );
359 dst->f[2] = (float) cos( (double) src->f[2] );
360 dst->f[3] = (float) cos( (double) src->f[3] );
361 }
362
363 static void
364 micro_ddx(
365 union tgsi_exec_channel *dst,
366 const union tgsi_exec_channel *src )
367 {
368 dst->f[0] =
369 dst->f[1] =
370 dst->f[2] =
371 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
372 }
373
374 static void
375 micro_ddy(
376 union tgsi_exec_channel *dst,
377 const union tgsi_exec_channel *src )
378 {
379 dst->f[0] =
380 dst->f[1] =
381 dst->f[2] =
382 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
383 }
384
385 static void
386 micro_div(
387 union tgsi_exec_channel *dst,
388 const union tgsi_exec_channel *src0,
389 const union tgsi_exec_channel *src1 )
390 {
391 dst->f[0] = src0->f[0] / src1->f[0];
392 dst->f[1] = src0->f[1] / src1->f[1];
393 dst->f[2] = src0->f[2] / src1->f[2];
394 dst->f[3] = src0->f[3] / src1->f[3];
395 }
396
397 static void
398 micro_udiv(
399 union tgsi_exec_channel *dst,
400 const union tgsi_exec_channel *src0,
401 const union tgsi_exec_channel *src1 )
402 {
403 dst->u[0] = src0->u[0] / src1->u[0];
404 dst->u[1] = src0->u[1] / src1->u[1];
405 dst->u[2] = src0->u[2] / src1->u[2];
406 dst->u[3] = src0->u[3] / src1->u[3];
407 }
408
409 static void
410 micro_eq(
411 union tgsi_exec_channel *dst,
412 const union tgsi_exec_channel *src0,
413 const union tgsi_exec_channel *src1,
414 const union tgsi_exec_channel *src2,
415 const union tgsi_exec_channel *src3 )
416 {
417 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
418 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
419 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
420 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
421 }
422
423 static void
424 micro_ieq(
425 union tgsi_exec_channel *dst,
426 const union tgsi_exec_channel *src0,
427 const union tgsi_exec_channel *src1,
428 const union tgsi_exec_channel *src2,
429 const union tgsi_exec_channel *src3 )
430 {
431 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
432 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
433 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
434 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
435 }
436
437 static void
438 micro_exp2(
439 union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src)
441 {
442 dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
443 dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
444 dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
445 dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
446 }
447
448 static void
449 micro_f2it(
450 union tgsi_exec_channel *dst,
451 const union tgsi_exec_channel *src )
452 {
453 dst->i[0] = (int) src->f[0];
454 dst->i[1] = (int) src->f[1];
455 dst->i[2] = (int) src->f[2];
456 dst->i[3] = (int) src->f[3];
457 }
458
459 static void
460 micro_f2ut(
461 union tgsi_exec_channel *dst,
462 const union tgsi_exec_channel *src )
463 {
464 dst->u[0] = (uint) src->f[0];
465 dst->u[1] = (uint) src->f[1];
466 dst->u[2] = (uint) src->f[2];
467 dst->u[3] = (uint) src->f[3];
468 }
469
470 static void
471 micro_flr(
472 union tgsi_exec_channel *dst,
473 const union tgsi_exec_channel *src )
474 {
475 dst->f[0] = (float) floor( (double) src->f[0] );
476 dst->f[1] = (float) floor( (double) src->f[1] );
477 dst->f[2] = (float) floor( (double) src->f[2] );
478 dst->f[3] = (float) floor( (double) src->f[3] );
479 }
480
481 static void
482 micro_frc(
483 union tgsi_exec_channel *dst,
484 const union tgsi_exec_channel *src )
485 {
486 dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
487 dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
488 dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
489 dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
490 }
491
492 static void
493 micro_ge(
494 union tgsi_exec_channel *dst,
495 const union tgsi_exec_channel *src0,
496 const union tgsi_exec_channel *src1,
497 const union tgsi_exec_channel *src2,
498 const union tgsi_exec_channel *src3 )
499 {
500 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
501 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
502 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
503 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
504 }
505
506 static void
507 micro_i2f(
508 union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src )
510 {
511 dst->f[0] = (float) src->i[0];
512 dst->f[1] = (float) src->i[1];
513 dst->f[2] = (float) src->i[2];
514 dst->f[3] = (float) src->i[3];
515 }
516
517 static void
518 micro_lg2(
519 union tgsi_exec_channel *dst,
520 const union tgsi_exec_channel *src )
521 {
522 dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
523 dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
524 dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
525 dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
526 }
527
528 static void
529 micro_lt(
530 union tgsi_exec_channel *dst,
531 const union tgsi_exec_channel *src0,
532 const union tgsi_exec_channel *src1,
533 const union tgsi_exec_channel *src2,
534 const union tgsi_exec_channel *src3 )
535 {
536 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
537 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
538 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
539 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
540 }
541
542 static void
543 micro_ilt(
544 union tgsi_exec_channel *dst,
545 const union tgsi_exec_channel *src0,
546 const union tgsi_exec_channel *src1,
547 const union tgsi_exec_channel *src2,
548 const union tgsi_exec_channel *src3 )
549 {
550 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
551 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
552 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
553 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
554 }
555
556 static void
557 micro_ult(
558 union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src0,
560 const union tgsi_exec_channel *src1,
561 const union tgsi_exec_channel *src2,
562 const union tgsi_exec_channel *src3 )
563 {
564 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
565 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
566 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
567 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
568 }
569
570 static void
571 micro_max(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src0,
574 const union tgsi_exec_channel *src1 )
575 {
576 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
577 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
578 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
579 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
580 }
581
582 static void
583 micro_imax(
584 union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src0,
586 const union tgsi_exec_channel *src1 )
587 {
588 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
589 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
590 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
591 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
592 }
593
594 static void
595 micro_umax(
596 union tgsi_exec_channel *dst,
597 const union tgsi_exec_channel *src0,
598 const union tgsi_exec_channel *src1 )
599 {
600 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
601 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
602 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
603 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
604 }
605
606 static void
607 micro_min(
608 union tgsi_exec_channel *dst,
609 const union tgsi_exec_channel *src0,
610 const union tgsi_exec_channel *src1 )
611 {
612 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
613 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
614 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
615 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
616 }
617
618 static void
619 micro_imin(
620 union tgsi_exec_channel *dst,
621 const union tgsi_exec_channel *src0,
622 const union tgsi_exec_channel *src1 )
623 {
624 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
625 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
626 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
627 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
628 }
629
630 static void
631 micro_umin(
632 union tgsi_exec_channel *dst,
633 const union tgsi_exec_channel *src0,
634 const union tgsi_exec_channel *src1 )
635 {
636 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
637 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
638 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
639 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
640 }
641
642 static void
643 micro_umod(
644 union tgsi_exec_channel *dst,
645 const union tgsi_exec_channel *src0,
646 const union tgsi_exec_channel *src1 )
647 {
648 dst->u[0] = src0->u[0] % src1->u[0];
649 dst->u[1] = src0->u[1] % src1->u[1];
650 dst->u[2] = src0->u[2] % src1->u[2];
651 dst->u[3] = src0->u[3] % src1->u[3];
652 }
653
654 static void
655 micro_mul(
656 union tgsi_exec_channel *dst,
657 const union tgsi_exec_channel *src0,
658 const union tgsi_exec_channel *src1 )
659 {
660 dst->f[0] = src0->f[0] * src1->f[0];
661 dst->f[1] = src0->f[1] * src1->f[1];
662 dst->f[2] = src0->f[2] * src1->f[2];
663 dst->f[3] = src0->f[3] * src1->f[3];
664 }
665
666 static void
667 micro_imul(
668 union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1 )
671 {
672 dst->i[0] = src0->i[0] * src1->i[0];
673 dst->i[1] = src0->i[1] * src1->i[1];
674 dst->i[2] = src0->i[2] * src1->i[2];
675 dst->i[3] = src0->i[3] * src1->i[3];
676 }
677
678 static void
679 micro_imul64(
680 union tgsi_exec_channel *dst0,
681 union tgsi_exec_channel *dst1,
682 const union tgsi_exec_channel *src0,
683 const union tgsi_exec_channel *src1 )
684 {
685 dst1->i[0] = src0->i[0] * src1->i[0];
686 dst1->i[1] = src0->i[1] * src1->i[1];
687 dst1->i[2] = src0->i[2] * src1->i[2];
688 dst1->i[3] = src0->i[3] * src1->i[3];
689 dst0->i[0] = 0;
690 dst0->i[1] = 0;
691 dst0->i[2] = 0;
692 dst0->i[3] = 0;
693 }
694
695 static void
696 micro_umul64(
697 union tgsi_exec_channel *dst0,
698 union tgsi_exec_channel *dst1,
699 const union tgsi_exec_channel *src0,
700 const union tgsi_exec_channel *src1 )
701 {
702 dst1->u[0] = src0->u[0] * src1->u[0];
703 dst1->u[1] = src0->u[1] * src1->u[1];
704 dst1->u[2] = src0->u[2] * src1->u[2];
705 dst1->u[3] = src0->u[3] * src1->u[3];
706 dst0->u[0] = 0;
707 dst0->u[1] = 0;
708 dst0->u[2] = 0;
709 dst0->u[3] = 0;
710 }
711
712 static void
713 micro_movc(
714 union tgsi_exec_channel *dst,
715 const union tgsi_exec_channel *src0,
716 const union tgsi_exec_channel *src1,
717 const union tgsi_exec_channel *src2 )
718 {
719 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
720 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
721 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
722 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
723 }
724
725 static void
726 micro_neg(
727 union tgsi_exec_channel *dst,
728 const union tgsi_exec_channel *src )
729 {
730 dst->f[0] = -src->f[0];
731 dst->f[1] = -src->f[1];
732 dst->f[2] = -src->f[2];
733 dst->f[3] = -src->f[3];
734 }
735
736 static void
737 micro_ineg(
738 union tgsi_exec_channel *dst,
739 const union tgsi_exec_channel *src )
740 {
741 dst->i[0] = -src->i[0];
742 dst->i[1] = -src->i[1];
743 dst->i[2] = -src->i[2];
744 dst->i[3] = -src->i[3];
745 }
746
747 static void
748 micro_not(
749 union tgsi_exec_channel *dst,
750 const union tgsi_exec_channel *src )
751 {
752 dst->u[0] = ~src->u[0];
753 dst->u[1] = ~src->u[1];
754 dst->u[2] = ~src->u[2];
755 dst->u[3] = ~src->u[3];
756 }
757
758 static void
759 micro_or(
760 union tgsi_exec_channel *dst,
761 const union tgsi_exec_channel *src0,
762 const union tgsi_exec_channel *src1 )
763 {
764 dst->u[0] = src0->u[0] | src1->u[0];
765 dst->u[1] = src0->u[1] | src1->u[1];
766 dst->u[2] = src0->u[2] | src1->u[2];
767 dst->u[3] = src0->u[3] | src1->u[3];
768 }
769
770 static void
771 micro_pow(
772 union tgsi_exec_channel *dst,
773 const union tgsi_exec_channel *src0,
774 const union tgsi_exec_channel *src1 )
775 {
776 dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
777 dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
778 dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
779 dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
780 }
781
782 static void
783 micro_rnd(
784 union tgsi_exec_channel *dst,
785 const union tgsi_exec_channel *src )
786 {
787 dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
788 dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
789 dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
790 dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
791 }
792
793 static void
794 micro_shl(
795 union tgsi_exec_channel *dst,
796 const union tgsi_exec_channel *src0,
797 const union tgsi_exec_channel *src1 )
798 {
799 dst->i[0] = src0->i[0] << src1->i[0];
800 dst->i[1] = src0->i[1] << src1->i[1];
801 dst->i[2] = src0->i[2] << src1->i[2];
802 dst->i[3] = src0->i[3] << src1->i[3];
803 }
804
805 static void
806 micro_ishr(
807 union tgsi_exec_channel *dst,
808 const union tgsi_exec_channel *src0,
809 const union tgsi_exec_channel *src1 )
810 {
811 dst->i[0] = src0->i[0] >> src1->i[0];
812 dst->i[1] = src0->i[1] >> src1->i[1];
813 dst->i[2] = src0->i[2] >> src1->i[2];
814 dst->i[3] = src0->i[3] >> src1->i[3];
815 }
816
817 static void
818 micro_trunc(
819 union tgsi_exec_channel *dst,
820 const union tgsi_exec_channel *src0 )
821 {
822 dst->f[0] = (float) (int) src0->f[0];
823 dst->f[1] = (float) (int) src0->f[1];
824 dst->f[2] = (float) (int) src0->f[2];
825 dst->f[3] = (float) (int) src0->f[3];
826 }
827
828 static void
829 micro_ushr(
830 union tgsi_exec_channel *dst,
831 const union tgsi_exec_channel *src0,
832 const union tgsi_exec_channel *src1 )
833 {
834 dst->u[0] = src0->u[0] >> src1->u[0];
835 dst->u[1] = src0->u[1] >> src1->u[1];
836 dst->u[2] = src0->u[2] >> src1->u[2];
837 dst->u[3] = src0->u[3] >> src1->u[3];
838 }
839
840 static void
841 micro_sin(
842 union tgsi_exec_channel *dst,
843 const union tgsi_exec_channel *src )
844 {
845 dst->f[0] = (float) sin( (double) src->f[0] );
846 dst->f[1] = (float) sin( (double) src->f[1] );
847 dst->f[2] = (float) sin( (double) src->f[2] );
848 dst->f[3] = (float) sin( (double) src->f[3] );
849 }
850
851 static void
852 micro_sqrt( union tgsi_exec_channel *dst,
853 const union tgsi_exec_channel *src )
854 {
855 dst->f[0] = (float) sqrt( (double) src->f[0] );
856 dst->f[1] = (float) sqrt( (double) src->f[1] );
857 dst->f[2] = (float) sqrt( (double) src->f[2] );
858 dst->f[3] = (float) sqrt( (double) src->f[3] );
859 }
860
861 static void
862 micro_sub(
863 union tgsi_exec_channel *dst,
864 const union tgsi_exec_channel *src0,
865 const union tgsi_exec_channel *src1 )
866 {
867 dst->f[0] = src0->f[0] - src1->f[0];
868 dst->f[1] = src0->f[1] - src1->f[1];
869 dst->f[2] = src0->f[2] - src1->f[2];
870 dst->f[3] = src0->f[3] - src1->f[3];
871 }
872
873 static void
874 micro_u2f(
875 union tgsi_exec_channel *dst,
876 const union tgsi_exec_channel *src )
877 {
878 dst->f[0] = (float) src->u[0];
879 dst->f[1] = (float) src->u[1];
880 dst->f[2] = (float) src->u[2];
881 dst->f[3] = (float) src->u[3];
882 }
883
884 static void
885 micro_xor(
886 union tgsi_exec_channel *dst,
887 const union tgsi_exec_channel *src0,
888 const union tgsi_exec_channel *src1 )
889 {
890 dst->u[0] = src0->u[0] ^ src1->u[0];
891 dst->u[1] = src0->u[1] ^ src1->u[1];
892 dst->u[2] = src0->u[2] ^ src1->u[2];
893 dst->u[3] = src0->u[3] ^ src1->u[3];
894 }
895
896 static void
897 fetch_src_file_channel(
898 const struct tgsi_exec_machine *mach,
899 const uint file,
900 const uint swizzle,
901 const union tgsi_exec_channel *index,
902 union tgsi_exec_channel *chan )
903 {
904 switch( swizzle ) {
905 case TGSI_EXTSWIZZLE_X:
906 case TGSI_EXTSWIZZLE_Y:
907 case TGSI_EXTSWIZZLE_Z:
908 case TGSI_EXTSWIZZLE_W:
909 switch( file ) {
910 case TGSI_FILE_CONSTANT:
911 chan->f[0] = mach->Consts[index->i[0]][swizzle];
912 chan->f[1] = mach->Consts[index->i[1]][swizzle];
913 chan->f[2] = mach->Consts[index->i[2]][swizzle];
914 chan->f[3] = mach->Consts[index->i[3]][swizzle];
915 break;
916
917 case TGSI_FILE_INPUT:
918 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
919 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
920 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
921 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
922 break;
923
924 case TGSI_FILE_TEMPORARY:
925 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
926 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
927 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
928 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
929 break;
930
931 case TGSI_FILE_IMMEDIATE:
932 assert( index->i[0] < (int) mach->ImmLimit );
933 chan->f[0] = mach->Imms[index->i[0]][swizzle];
934 assert( index->i[1] < (int) mach->ImmLimit );
935 chan->f[1] = mach->Imms[index->i[1]][swizzle];
936 assert( index->i[2] < (int) mach->ImmLimit );
937 chan->f[2] = mach->Imms[index->i[2]][swizzle];
938 assert( index->i[3] < (int) mach->ImmLimit );
939 chan->f[3] = mach->Imms[index->i[3]][swizzle];
940 break;
941
942 case TGSI_FILE_ADDRESS:
943 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
944 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
945 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
946 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
947 break;
948
949 case TGSI_FILE_OUTPUT:
950 /* vertex/fragment output vars can be read too */
951 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
952 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
953 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
954 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
955 break;
956
957 default:
958 assert( 0 );
959 }
960 break;
961
962 case TGSI_EXTSWIZZLE_ZERO:
963 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
964 break;
965
966 case TGSI_EXTSWIZZLE_ONE:
967 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
968 break;
969
970 default:
971 assert( 0 );
972 }
973 }
974
975 static void
976 fetch_source(
977 const struct tgsi_exec_machine *mach,
978 union tgsi_exec_channel *chan,
979 const struct tgsi_full_src_register *reg,
980 const uint chan_index )
981 {
982 union tgsi_exec_channel index;
983 uint swizzle;
984
985 index.i[0] =
986 index.i[1] =
987 index.i[2] =
988 index.i[3] = reg->SrcRegister.Index;
989
990 if (reg->SrcRegister.Indirect) {
991 union tgsi_exec_channel index2;
992 union tgsi_exec_channel indir_index;
993
994 index2.i[0] =
995 index2.i[1] =
996 index2.i[2] =
997 index2.i[3] = reg->SrcRegisterInd.Index;
998
999 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1000 fetch_src_file_channel(
1001 mach,
1002 reg->SrcRegisterInd.File,
1003 swizzle,
1004 &index2,
1005 &indir_index );
1006
1007 index.i[0] += indir_index.i[0];
1008 index.i[1] += indir_index.i[1];
1009 index.i[2] += indir_index.i[2];
1010 index.i[3] += indir_index.i[3];
1011 }
1012
1013 if( reg->SrcRegister.Dimension ) {
1014 switch( reg->SrcRegister.File ) {
1015 case TGSI_FILE_INPUT:
1016 index.i[0] *= 17;
1017 index.i[1] *= 17;
1018 index.i[2] *= 17;
1019 index.i[3] *= 17;
1020 break;
1021 case TGSI_FILE_CONSTANT:
1022 index.i[0] *= 4096;
1023 index.i[1] *= 4096;
1024 index.i[2] *= 4096;
1025 index.i[3] *= 4096;
1026 break;
1027 default:
1028 assert( 0 );
1029 }
1030
1031 index.i[0] += reg->SrcRegisterDim.Index;
1032 index.i[1] += reg->SrcRegisterDim.Index;
1033 index.i[2] += reg->SrcRegisterDim.Index;
1034 index.i[3] += reg->SrcRegisterDim.Index;
1035
1036 if (reg->SrcRegisterDim.Indirect) {
1037 union tgsi_exec_channel index2;
1038 union tgsi_exec_channel indir_index;
1039
1040 index2.i[0] =
1041 index2.i[1] =
1042 index2.i[2] =
1043 index2.i[3] = reg->SrcRegisterDimInd.Index;
1044
1045 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1046 fetch_src_file_channel(
1047 mach,
1048 reg->SrcRegisterDimInd.File,
1049 swizzle,
1050 &index2,
1051 &indir_index );
1052
1053 index.i[0] += indir_index.i[0];
1054 index.i[1] += indir_index.i[1];
1055 index.i[2] += indir_index.i[2];
1056 index.i[3] += indir_index.i[3];
1057 }
1058 }
1059
1060 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1061 fetch_src_file_channel(
1062 mach,
1063 reg->SrcRegister.File,
1064 swizzle,
1065 &index,
1066 chan );
1067
1068 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1069 case TGSI_UTIL_SIGN_CLEAR:
1070 micro_abs( chan, chan );
1071 break;
1072
1073 case TGSI_UTIL_SIGN_SET:
1074 micro_abs( chan, chan );
1075 micro_neg( chan, chan );
1076 break;
1077
1078 case TGSI_UTIL_SIGN_TOGGLE:
1079 micro_neg( chan, chan );
1080 break;
1081
1082 case TGSI_UTIL_SIGN_KEEP:
1083 break;
1084 }
1085
1086 if (reg->SrcRegisterExtMod.Complement) {
1087 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1088 }
1089 }
1090
1091 static void
1092 store_dest(
1093 struct tgsi_exec_machine *mach,
1094 const union tgsi_exec_channel *chan,
1095 const struct tgsi_full_dst_register *reg,
1096 const struct tgsi_full_instruction *inst,
1097 uint chan_index )
1098 {
1099 union tgsi_exec_channel *dst;
1100
1101 switch( reg->DstRegister.File ) {
1102 case TGSI_FILE_NULL:
1103 return;
1104
1105 case TGSI_FILE_OUTPUT:
1106 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1107 + reg->DstRegister.Index].xyzw[chan_index];
1108 break;
1109
1110 case TGSI_FILE_TEMPORARY:
1111 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1112 break;
1113
1114 case TGSI_FILE_ADDRESS:
1115 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1116 break;
1117
1118 default:
1119 assert( 0 );
1120 return;
1121 }
1122
1123 switch (inst->Instruction.Saturate)
1124 {
1125 case TGSI_SAT_NONE:
1126 if (mach->ExecMask & 0x1)
1127 dst->i[0] = chan->i[0];
1128 if (mach->ExecMask & 0x2)
1129 dst->i[1] = chan->i[1];
1130 if (mach->ExecMask & 0x4)
1131 dst->i[2] = chan->i[2];
1132 if (mach->ExecMask & 0x8)
1133 dst->i[3] = chan->i[3];
1134 break;
1135
1136 case TGSI_SAT_ZERO_ONE:
1137 /* XXX need to obey ExecMask here */
1138 micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1139 micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
1140 break;
1141
1142 case TGSI_SAT_MINUS_PLUS_ONE:
1143 assert( 0 );
1144 break;
1145
1146 default:
1147 assert( 0 );
1148 }
1149 }
1150
1151 #define FETCH(VAL,INDEX,CHAN)\
1152 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1153
1154 #define STORE(VAL,INDEX,CHAN)\
1155 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1156
1157
1158 /**
1159 * Execute ARB-style KIL which is predicated by a src register.
1160 * Kill fragment if any of the four values is less than zero.
1161 */
1162 static void
1163 exec_kilp(struct tgsi_exec_machine *mach,
1164 const struct tgsi_full_instruction *inst)
1165 {
1166 uint uniquemask;
1167 uint chan_index;
1168 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1169 union tgsi_exec_channel r[1];
1170
1171 /* This mask stores component bits that were already tested. Note that
1172 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1173 * tested. */
1174 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1175
1176 for (chan_index = 0; chan_index < 4; chan_index++)
1177 {
1178 uint swizzle;
1179 uint i;
1180
1181 /* unswizzle channel */
1182 swizzle = tgsi_util_get_full_src_register_extswizzle (
1183 &inst->FullSrcRegisters[0],
1184 chan_index);
1185
1186 /* check if the component has not been already tested */
1187 if (uniquemask & (1 << swizzle))
1188 continue;
1189 uniquemask |= 1 << swizzle;
1190
1191 FETCH(&r[0], 0, chan_index);
1192 for (i = 0; i < 4; i++)
1193 if (r[0].f[i] < 0.0f)
1194 kilmask |= 1 << i;
1195 }
1196
1197 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1198 }
1199
1200
1201 /*
1202 * Fetch a texel using STR texture coordinates.
1203 */
1204 static void
1205 fetch_texel( struct tgsi_sampler *sampler,
1206 const union tgsi_exec_channel *s,
1207 const union tgsi_exec_channel *t,
1208 const union tgsi_exec_channel *p,
1209 float lodbias, /* XXX should be float[4] */
1210 union tgsi_exec_channel *r,
1211 union tgsi_exec_channel *g,
1212 union tgsi_exec_channel *b,
1213 union tgsi_exec_channel *a )
1214 {
1215 uint j;
1216 float rgba[NUM_CHANNELS][QUAD_SIZE];
1217
1218 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1219
1220 for (j = 0; j < 4; j++) {
1221 r->f[j] = rgba[0][j];
1222 g->f[j] = rgba[1][j];
1223 b->f[j] = rgba[2][j];
1224 a->f[j] = rgba[3][j];
1225 }
1226 }
1227
1228
1229 static void
1230 exec_tex(struct tgsi_exec_machine *mach,
1231 const struct tgsi_full_instruction *inst,
1232 boolean biasLod)
1233 {
1234 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1235 union tgsi_exec_channel r[8];
1236 uint chan_index;
1237 float lodBias;
1238
1239 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1240
1241 switch (inst->InstructionExtTexture.Texture) {
1242 case TGSI_TEXTURE_1D:
1243
1244 FETCH(&r[0], 0, CHAN_X);
1245
1246 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1247 case TGSI_EXTSWIZZLE_W:
1248 FETCH(&r[1], 0, CHAN_W);
1249 micro_div( &r[0], &r[0], &r[1] );
1250 break;
1251
1252 case TGSI_EXTSWIZZLE_ONE:
1253 break;
1254
1255 default:
1256 assert (0);
1257 }
1258
1259 if (biasLod) {
1260 FETCH(&r[1], 0, CHAN_W);
1261 lodBias = r[2].f[0];
1262 }
1263 else
1264 lodBias = 0.0;
1265
1266 fetch_texel(&mach->Samplers[unit],
1267 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1268 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1269 break;
1270
1271 case TGSI_TEXTURE_2D:
1272 case TGSI_TEXTURE_RECT:
1273
1274 FETCH(&r[0], 0, CHAN_X);
1275 FETCH(&r[1], 0, CHAN_Y);
1276 FETCH(&r[2], 0, CHAN_Z);
1277
1278 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1279 case TGSI_EXTSWIZZLE_W:
1280 FETCH(&r[3], 0, CHAN_W);
1281 micro_div( &r[0], &r[0], &r[3] );
1282 micro_div( &r[1], &r[1], &r[3] );
1283 micro_div( &r[2], &r[2], &r[3] );
1284 break;
1285
1286 case TGSI_EXTSWIZZLE_ONE:
1287 break;
1288
1289 default:
1290 assert (0);
1291 }
1292
1293 if (biasLod) {
1294 FETCH(&r[3], 0, CHAN_W);
1295 lodBias = r[3].f[0];
1296 }
1297 else
1298 lodBias = 0.0;
1299
1300 fetch_texel(&mach->Samplers[unit],
1301 &r[0], &r[1], &r[2], lodBias, /* inputs */
1302 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1303 break;
1304
1305 case TGSI_TEXTURE_3D:
1306 case TGSI_TEXTURE_CUBE:
1307
1308 FETCH(&r[0], 0, CHAN_X);
1309 FETCH(&r[1], 0, CHAN_Y);
1310 FETCH(&r[2], 0, CHAN_Z);
1311
1312 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1313 case TGSI_EXTSWIZZLE_W:
1314 FETCH(&r[3], 0, CHAN_W);
1315 micro_div( &r[0], &r[0], &r[3] );
1316 micro_div( &r[1], &r[1], &r[3] );
1317 micro_div( &r[2], &r[2], &r[3] );
1318 break;
1319
1320 case TGSI_EXTSWIZZLE_ONE:
1321 break;
1322
1323 default:
1324 assert (0);
1325 }
1326
1327 if (biasLod) {
1328 FETCH(&r[3], 0, CHAN_W);
1329 lodBias = r[3].f[0];
1330 }
1331 else
1332 lodBias = 0.0;
1333
1334 fetch_texel(&mach->Samplers[unit],
1335 &r[0], &r[1], &r[2], lodBias,
1336 &r[0], &r[1], &r[2], &r[3]);
1337 break;
1338
1339 default:
1340 assert (0);
1341 }
1342
1343 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1344 STORE( &r[chan_index], 0, chan_index );
1345 }
1346 }
1347
1348
1349
1350 static void
1351 constant_interpolation(
1352 struct tgsi_exec_machine *mach,
1353 unsigned attrib,
1354 unsigned chan )
1355 {
1356 unsigned i;
1357
1358 for( i = 0; i < QUAD_SIZE; i++ ) {
1359 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1360 }
1361 }
1362
1363 static void
1364 linear_interpolation(
1365 struct tgsi_exec_machine *mach,
1366 unsigned attrib,
1367 unsigned chan )
1368 {
1369 const float x = mach->QuadPos.xyzw[0].f[0];
1370 const float y = mach->QuadPos.xyzw[1].f[0];
1371 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1372 const float dady = mach->InterpCoefs[attrib].dady[chan];
1373 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1374 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1375 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1376 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1377 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1378 }
1379
1380 static void
1381 perspective_interpolation(
1382 struct tgsi_exec_machine *mach,
1383 unsigned attrib,
1384 unsigned chan )
1385 {
1386 const float x = mach->QuadPos.xyzw[0].f[0];
1387 const float y = mach->QuadPos.xyzw[1].f[0];
1388 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1389 const float dady = mach->InterpCoefs[attrib].dady[chan];
1390 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1391 const float *w = mach->QuadPos.xyzw[3].f;
1392 /* divide by W here */
1393 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1394 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1395 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1396 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1397 }
1398
1399
1400 typedef void (* interpolation_func)(
1401 struct tgsi_exec_machine *mach,
1402 unsigned attrib,
1403 unsigned chan );
1404
1405 static void
1406 exec_declaration(
1407 struct tgsi_exec_machine *mach,
1408 const struct tgsi_full_declaration *decl )
1409 {
1410 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1411 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1412 unsigned first, last, mask;
1413 interpolation_func interp;
1414
1415 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
1416
1417 first = decl->u.DeclarationRange.First;
1418 last = decl->u.DeclarationRange.Last;
1419 mask = decl->Declaration.UsageMask;
1420
1421 switch( decl->Interpolation.Interpolate ) {
1422 case TGSI_INTERPOLATE_CONSTANT:
1423 interp = constant_interpolation;
1424 break;
1425
1426 case TGSI_INTERPOLATE_LINEAR:
1427 interp = linear_interpolation;
1428 break;
1429
1430 case TGSI_INTERPOLATE_PERSPECTIVE:
1431 interp = perspective_interpolation;
1432 break;
1433
1434 default:
1435 assert( 0 );
1436 }
1437
1438 if( mask == TGSI_WRITEMASK_XYZW ) {
1439 unsigned i, j;
1440
1441 for( i = first; i <= last; i++ ) {
1442 for( j = 0; j < NUM_CHANNELS; j++ ) {
1443 interp( mach, i, j );
1444 }
1445 }
1446 }
1447 else {
1448 unsigned i, j;
1449
1450 for( j = 0; j < NUM_CHANNELS; j++ ) {
1451 if( mask & (1 << j) ) {
1452 for( i = first; i <= last; i++ ) {
1453 interp( mach, i, j );
1454 }
1455 }
1456 }
1457 }
1458 }
1459 }
1460 }
1461
1462 static void
1463 exec_instruction(
1464 struct tgsi_exec_machine *mach,
1465 const struct tgsi_full_instruction *inst,
1466 int *pc )
1467 {
1468 uint chan_index;
1469 union tgsi_exec_channel r[8];
1470
1471 (*pc)++;
1472
1473 switch (inst->Instruction.Opcode) {
1474 case TGSI_OPCODE_ARL:
1475 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1476 FETCH( &r[0], 0, chan_index );
1477 micro_f2it( &r[0], &r[0] );
1478 STORE( &r[0], 0, chan_index );
1479 }
1480 break;
1481
1482 case TGSI_OPCODE_MOV:
1483 /* TGSI_OPCODE_SWZ */
1484 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1485 FETCH( &r[0], 0, chan_index );
1486 STORE( &r[0], 0, chan_index );
1487 }
1488 break;
1489
1490 case TGSI_OPCODE_LIT:
1491 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1492 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1493 }
1494
1495 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1496 FETCH( &r[0], 0, CHAN_X );
1497 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1498 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1499 STORE( &r[0], 0, CHAN_Y );
1500 }
1501
1502 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1503 FETCH( &r[1], 0, CHAN_Y );
1504 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1505
1506 FETCH( &r[2], 0, CHAN_W );
1507 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1508 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1509 micro_pow( &r[1], &r[1], &r[2] );
1510 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1511 STORE( &r[0], 0, CHAN_Z );
1512 }
1513 }
1514
1515 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1516 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1517 }
1518 break;
1519
1520 case TGSI_OPCODE_RCP:
1521 /* TGSI_OPCODE_RECIP */
1522 FETCH( &r[0], 0, CHAN_X );
1523 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1524 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1525 STORE( &r[0], 0, chan_index );
1526 }
1527 break;
1528
1529 case TGSI_OPCODE_RSQ:
1530 /* TGSI_OPCODE_RECIPSQRT */
1531 FETCH( &r[0], 0, CHAN_X );
1532 micro_sqrt( &r[0], &r[0] );
1533 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1534 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1535 STORE( &r[0], 0, chan_index );
1536 }
1537 break;
1538
1539 case TGSI_OPCODE_EXP:
1540 assert (0);
1541 break;
1542
1543 case TGSI_OPCODE_LOG:
1544 assert (0);
1545 break;
1546
1547 case TGSI_OPCODE_MUL:
1548 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1549 {
1550 FETCH(&r[0], 0, chan_index);
1551 FETCH(&r[1], 1, chan_index);
1552
1553 micro_mul( &r[0], &r[0], &r[1] );
1554
1555 STORE(&r[0], 0, chan_index);
1556 }
1557 break;
1558
1559 case TGSI_OPCODE_ADD:
1560 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1561 FETCH( &r[0], 0, chan_index );
1562 FETCH( &r[1], 1, chan_index );
1563 micro_add( &r[0], &r[0], &r[1] );
1564 STORE( &r[0], 0, chan_index );
1565 }
1566 break;
1567
1568 case TGSI_OPCODE_DP3:
1569 /* TGSI_OPCODE_DOT3 */
1570 FETCH( &r[0], 0, CHAN_X );
1571 FETCH( &r[1], 1, CHAN_X );
1572 micro_mul( &r[0], &r[0], &r[1] );
1573
1574 FETCH( &r[1], 0, CHAN_Y );
1575 FETCH( &r[2], 1, CHAN_Y );
1576 micro_mul( &r[1], &r[1], &r[2] );
1577 micro_add( &r[0], &r[0], &r[1] );
1578
1579 FETCH( &r[1], 0, CHAN_Z );
1580 FETCH( &r[2], 1, CHAN_Z );
1581 micro_mul( &r[1], &r[1], &r[2] );
1582 micro_add( &r[0], &r[0], &r[1] );
1583
1584 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1585 STORE( &r[0], 0, chan_index );
1586 }
1587 break;
1588
1589 case TGSI_OPCODE_DP4:
1590 /* TGSI_OPCODE_DOT4 */
1591 FETCH(&r[0], 0, CHAN_X);
1592 FETCH(&r[1], 1, CHAN_X);
1593
1594 micro_mul( &r[0], &r[0], &r[1] );
1595
1596 FETCH(&r[1], 0, CHAN_Y);
1597 FETCH(&r[2], 1, CHAN_Y);
1598
1599 micro_mul( &r[1], &r[1], &r[2] );
1600 micro_add( &r[0], &r[0], &r[1] );
1601
1602 FETCH(&r[1], 0, CHAN_Z);
1603 FETCH(&r[2], 1, CHAN_Z);
1604
1605 micro_mul( &r[1], &r[1], &r[2] );
1606 micro_add( &r[0], &r[0], &r[1] );
1607
1608 FETCH(&r[1], 0, CHAN_W);
1609 FETCH(&r[2], 1, CHAN_W);
1610
1611 micro_mul( &r[1], &r[1], &r[2] );
1612 micro_add( &r[0], &r[0], &r[1] );
1613
1614 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1615 STORE( &r[0], 0, chan_index );
1616 }
1617 break;
1618
1619 case TGSI_OPCODE_DST:
1620 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1621 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1622 }
1623
1624 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1625 FETCH( &r[0], 0, CHAN_Y );
1626 FETCH( &r[1], 1, CHAN_Y);
1627 micro_mul( &r[0], &r[0], &r[1] );
1628 STORE( &r[0], 0, CHAN_Y );
1629 }
1630
1631 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1632 FETCH( &r[0], 0, CHAN_Z );
1633 STORE( &r[0], 0, CHAN_Z );
1634 }
1635
1636 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1637 FETCH( &r[0], 1, CHAN_W );
1638 STORE( &r[0], 0, CHAN_W );
1639 }
1640 break;
1641
1642 case TGSI_OPCODE_MIN:
1643 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1644 FETCH(&r[0], 0, chan_index);
1645 FETCH(&r[1], 1, chan_index);
1646
1647 /* XXX use micro_min()?? */
1648 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1649
1650 STORE(&r[0], 0, chan_index);
1651 }
1652 break;
1653
1654 case TGSI_OPCODE_MAX:
1655 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1656 FETCH(&r[0], 0, chan_index);
1657 FETCH(&r[1], 1, chan_index);
1658
1659 /* XXX use micro_max()?? */
1660 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1661
1662 STORE(&r[0], 0, chan_index );
1663 }
1664 break;
1665
1666 case TGSI_OPCODE_SLT:
1667 /* TGSI_OPCODE_SETLT */
1668 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1669 FETCH( &r[0], 0, chan_index );
1670 FETCH( &r[1], 1, chan_index );
1671 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1672 STORE( &r[0], 0, chan_index );
1673 }
1674 break;
1675
1676 case TGSI_OPCODE_SGE:
1677 /* TGSI_OPCODE_SETGE */
1678 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1679 FETCH( &r[0], 0, chan_index );
1680 FETCH( &r[1], 1, chan_index );
1681 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1682 STORE( &r[0], 0, chan_index );
1683 }
1684 break;
1685
1686 case TGSI_OPCODE_MAD:
1687 /* TGSI_OPCODE_MADD */
1688 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1689 FETCH( &r[0], 0, chan_index );
1690 FETCH( &r[1], 1, chan_index );
1691 micro_mul( &r[0], &r[0], &r[1] );
1692 FETCH( &r[1], 2, chan_index );
1693 micro_add( &r[0], &r[0], &r[1] );
1694 STORE( &r[0], 0, chan_index );
1695 }
1696 break;
1697
1698 case TGSI_OPCODE_SUB:
1699 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1700 FETCH(&r[0], 0, chan_index);
1701 FETCH(&r[1], 1, chan_index);
1702
1703 micro_sub( &r[0], &r[0], &r[1] );
1704
1705 STORE(&r[0], 0, chan_index);
1706 }
1707 break;
1708
1709 case TGSI_OPCODE_LERP:
1710 /* TGSI_OPCODE_LRP */
1711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1712 FETCH(&r[0], 0, chan_index);
1713 FETCH(&r[1], 1, chan_index);
1714 FETCH(&r[2], 2, chan_index);
1715
1716 micro_sub( &r[1], &r[1], &r[2] );
1717 micro_mul( &r[0], &r[0], &r[1] );
1718 micro_add( &r[0], &r[0], &r[2] );
1719
1720 STORE(&r[0], 0, chan_index);
1721 }
1722 break;
1723
1724 case TGSI_OPCODE_CND:
1725 assert (0);
1726 break;
1727
1728 case TGSI_OPCODE_CND0:
1729 assert (0);
1730 break;
1731
1732 case TGSI_OPCODE_DOT2ADD:
1733 /* TGSI_OPCODE_DP2A */
1734 assert (0);
1735 break;
1736
1737 case TGSI_OPCODE_INDEX:
1738 assert (0);
1739 break;
1740
1741 case TGSI_OPCODE_NEGATE:
1742 assert (0);
1743 break;
1744
1745 case TGSI_OPCODE_FRAC:
1746 /* TGSI_OPCODE_FRC */
1747 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1748 FETCH( &r[0], 0, chan_index );
1749 micro_frc( &r[0], &r[0] );
1750 STORE( &r[0], 0, chan_index );
1751 }
1752 break;
1753
1754 case TGSI_OPCODE_CLAMP:
1755 assert (0);
1756 break;
1757
1758 case TGSI_OPCODE_FLOOR:
1759 /* TGSI_OPCODE_FLR */
1760 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1761 FETCH( &r[0], 0, chan_index );
1762 micro_flr( &r[0], &r[0] );
1763 STORE( &r[0], 0, chan_index );
1764 }
1765 break;
1766
1767 case TGSI_OPCODE_ROUND:
1768 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1769 FETCH( &r[0], 0, chan_index );
1770 micro_rnd( &r[0], &r[0] );
1771 STORE( &r[0], 0, chan_index );
1772 }
1773 break;
1774
1775 case TGSI_OPCODE_EXPBASE2:
1776 /* TGSI_OPCODE_EX2 */
1777 FETCH(&r[0], 0, CHAN_X);
1778
1779 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1780
1781 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1782 STORE( &r[0], 0, chan_index );
1783 }
1784 break;
1785
1786 case TGSI_OPCODE_LOGBASE2:
1787 /* TGSI_OPCODE_LG2 */
1788 FETCH( &r[0], 0, CHAN_X );
1789 micro_lg2( &r[0], &r[0] );
1790 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1791 STORE( &r[0], 0, chan_index );
1792 }
1793 break;
1794
1795 case TGSI_OPCODE_POWER:
1796 /* TGSI_OPCODE_POW */
1797 FETCH(&r[0], 0, CHAN_X);
1798 FETCH(&r[1], 1, CHAN_X);
1799
1800 micro_pow( &r[0], &r[0], &r[1] );
1801
1802 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1803 STORE( &r[0], 0, chan_index );
1804 }
1805 break;
1806
1807 case TGSI_OPCODE_CROSSPRODUCT:
1808 /* TGSI_OPCODE_XPD */
1809 FETCH(&r[0], 0, CHAN_Y);
1810 FETCH(&r[1], 1, CHAN_Z);
1811
1812 micro_mul( &r[2], &r[0], &r[1] );
1813
1814 FETCH(&r[3], 0, CHAN_Z);
1815 FETCH(&r[4], 1, CHAN_Y);
1816
1817 micro_mul( &r[5], &r[3], &r[4] );
1818 micro_sub( &r[2], &r[2], &r[5] );
1819
1820 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1821 STORE( &r[2], 0, CHAN_X );
1822 }
1823
1824 FETCH(&r[2], 1, CHAN_X);
1825
1826 micro_mul( &r[3], &r[3], &r[2] );
1827
1828 FETCH(&r[5], 0, CHAN_X);
1829
1830 micro_mul( &r[1], &r[1], &r[5] );
1831 micro_sub( &r[3], &r[3], &r[1] );
1832
1833 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1834 STORE( &r[3], 0, CHAN_Y );
1835 }
1836
1837 micro_mul( &r[5], &r[5], &r[4] );
1838 micro_mul( &r[0], &r[0], &r[2] );
1839 micro_sub( &r[5], &r[5], &r[0] );
1840
1841 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1842 STORE( &r[5], 0, CHAN_Z );
1843 }
1844
1845 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1846 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1847 }
1848 break;
1849
1850 case TGSI_OPCODE_MULTIPLYMATRIX:
1851 assert (0);
1852 break;
1853
1854 case TGSI_OPCODE_ABS:
1855 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1856 FETCH(&r[0], 0, chan_index);
1857
1858 micro_abs( &r[0], &r[0] );
1859
1860 STORE(&r[0], 0, chan_index);
1861 }
1862 break;
1863
1864 case TGSI_OPCODE_RCC:
1865 assert (0);
1866 break;
1867
1868 case TGSI_OPCODE_DPH:
1869 FETCH(&r[0], 0, CHAN_X);
1870 FETCH(&r[1], 1, CHAN_X);
1871
1872 micro_mul( &r[0], &r[0], &r[1] );
1873
1874 FETCH(&r[1], 0, CHAN_Y);
1875 FETCH(&r[2], 1, CHAN_Y);
1876
1877 micro_mul( &r[1], &r[1], &r[2] );
1878 micro_add( &r[0], &r[0], &r[1] );
1879
1880 FETCH(&r[1], 0, CHAN_Z);
1881 FETCH(&r[2], 1, CHAN_Z);
1882
1883 micro_mul( &r[1], &r[1], &r[2] );
1884 micro_add( &r[0], &r[0], &r[1] );
1885
1886 FETCH(&r[1], 1, CHAN_W);
1887
1888 micro_add( &r[0], &r[0], &r[1] );
1889
1890 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1891 STORE( &r[0], 0, chan_index );
1892 }
1893 break;
1894
1895 case TGSI_OPCODE_COS:
1896 FETCH(&r[0], 0, CHAN_X);
1897
1898 micro_cos( &r[0], &r[0] );
1899
1900 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1901 STORE( &r[0], 0, chan_index );
1902 }
1903 break;
1904
1905 case TGSI_OPCODE_DDX:
1906 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1907 FETCH( &r[0], 0, chan_index );
1908 micro_ddx( &r[0], &r[0] );
1909 STORE( &r[0], 0, chan_index );
1910 }
1911 break;
1912
1913 case TGSI_OPCODE_DDY:
1914 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1915 FETCH( &r[0], 0, chan_index );
1916 micro_ddy( &r[0], &r[0] );
1917 STORE( &r[0], 0, chan_index );
1918 }
1919 break;
1920
1921 case TGSI_OPCODE_KILP:
1922 exec_kilp (mach, inst);
1923 break;
1924
1925 case TGSI_OPCODE_KIL:
1926 /* for enabled ExecMask bits, set the killed bit */
1927 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1928 break;
1929
1930 case TGSI_OPCODE_PK2H:
1931 assert (0);
1932 break;
1933
1934 case TGSI_OPCODE_PK2US:
1935 assert (0);
1936 break;
1937
1938 case TGSI_OPCODE_PK4B:
1939 assert (0);
1940 break;
1941
1942 case TGSI_OPCODE_PK4UB:
1943 assert (0);
1944 break;
1945
1946 case TGSI_OPCODE_RFL:
1947 assert (0);
1948 break;
1949
1950 case TGSI_OPCODE_SEQ:
1951 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1952 FETCH( &r[0], 0, chan_index );
1953 FETCH( &r[1], 1, chan_index );
1954 micro_eq( &r[0], &r[0], &r[1],
1955 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
1956 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1957 STORE( &r[0], 0, chan_index );
1958 }
1959 break;
1960
1961 case TGSI_OPCODE_SFL:
1962 assert (0);
1963 break;
1964
1965 case TGSI_OPCODE_SGT:
1966 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1967 FETCH( &r[0], 0, chan_index );
1968 FETCH( &r[1], 1, chan_index );
1969 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1970 STORE( &r[0], 0, chan_index );
1971 }
1972 break;
1973
1974 case TGSI_OPCODE_SIN:
1975 FETCH( &r[0], 0, CHAN_X );
1976 micro_sin( &r[0], &r[0] );
1977 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1978 STORE( &r[0], 0, chan_index );
1979 }
1980 break;
1981
1982 case TGSI_OPCODE_SLE:
1983 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1984 FETCH( &r[0], 0, chan_index );
1985 FETCH( &r[1], 1, chan_index );
1986 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1987 STORE( &r[0], 0, chan_index );
1988 }
1989 break;
1990
1991 case TGSI_OPCODE_SNE:
1992 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1993 FETCH( &r[0], 0, chan_index );
1994 FETCH( &r[1], 1, chan_index );
1995 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1996 STORE( &r[0], 0, chan_index );
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_STR:
2001 assert (0);
2002 break;
2003
2004 case TGSI_OPCODE_TEX:
2005 /* simple texture lookup */
2006 /* src[0] = texcoord */
2007 /* src[1] = sampler unit */
2008 exec_tex(mach, inst, FALSE);
2009 break;
2010
2011 case TGSI_OPCODE_TXB:
2012 /* Texture lookup with lod bias */
2013 /* src[0] = texcoord (src[0].w = LOD bias) */
2014 /* src[1] = sampler unit */
2015 exec_tex(mach, inst, TRUE);
2016 break;
2017
2018 case TGSI_OPCODE_TXD:
2019 /* Texture lookup with explict partial derivatives */
2020 /* src[0] = texcoord */
2021 /* src[1] = d[strq]/dx */
2022 /* src[2] = d[strq]/dy */
2023 /* src[3] = sampler unit */
2024 assert (0);
2025 break;
2026
2027 case TGSI_OPCODE_TXL:
2028 /* Texture lookup with explit LOD */
2029 /* src[0] = texcoord (src[0].w = LOD) */
2030 /* src[1] = sampler unit */
2031 exec_tex(mach, inst, TRUE);
2032 break;
2033
2034 case TGSI_OPCODE_UP2H:
2035 assert (0);
2036 break;
2037
2038 case TGSI_OPCODE_UP2US:
2039 assert (0);
2040 break;
2041
2042 case TGSI_OPCODE_UP4B:
2043 assert (0);
2044 break;
2045
2046 case TGSI_OPCODE_UP4UB:
2047 assert (0);
2048 break;
2049
2050 case TGSI_OPCODE_X2D:
2051 assert (0);
2052 break;
2053
2054 case TGSI_OPCODE_ARA:
2055 assert (0);
2056 break;
2057
2058 case TGSI_OPCODE_ARR:
2059 assert (0);
2060 break;
2061
2062 case TGSI_OPCODE_BRA:
2063 assert (0);
2064 break;
2065
2066 case TGSI_OPCODE_CAL:
2067 /* skip the call if no execution channels are enabled */
2068 if (mach->ExecMask) {
2069 /* do the call */
2070
2071 /* push the Cond, Loop, Cont stacks */
2072 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2073 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2074 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2075 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2076 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2077 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2078
2079 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2080 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2081
2082 /* note that PC was already incremented above */
2083 mach->CallStack[mach->CallStackTop++] = *pc;
2084 *pc = inst->InstructionExtLabel.Label;
2085 }
2086 break;
2087
2088 case TGSI_OPCODE_RET:
2089 mach->FuncMask &= ~mach->ExecMask;
2090 UPDATE_EXEC_MASK(mach);
2091
2092 if (mach->ExecMask == 0x0) {
2093 /* really return now (otherwise, keep executing */
2094
2095 if (mach->CallStackTop == 0) {
2096 /* returning from main() */
2097 *pc = -1;
2098 return;
2099 }
2100 *pc = mach->CallStack[--mach->CallStackTop];
2101
2102 /* pop the Cond, Loop, Cont stacks */
2103 assert(mach->CondStackTop > 0);
2104 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2105 assert(mach->LoopStackTop > 0);
2106 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2107 assert(mach->ContStackTop > 0);
2108 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2109 assert(mach->FuncStackTop > 0);
2110 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2111
2112 UPDATE_EXEC_MASK(mach);
2113 }
2114 break;
2115
2116 case TGSI_OPCODE_SSG:
2117 assert (0);
2118 break;
2119
2120 case TGSI_OPCODE_CMP:
2121 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2122 FETCH(&r[0], 0, chan_index);
2123 FETCH(&r[1], 1, chan_index);
2124 FETCH(&r[2], 2, chan_index);
2125
2126 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2127
2128 STORE(&r[0], 0, chan_index);
2129 }
2130 break;
2131
2132 case TGSI_OPCODE_SCS:
2133 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2134 FETCH( &r[0], 0, CHAN_X );
2135 }
2136 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2137 micro_cos( &r[1], &r[0] );
2138 STORE( &r[1], 0, CHAN_X );
2139 }
2140 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2141 micro_sin( &r[1], &r[0] );
2142 STORE( &r[1], 0, CHAN_Y );
2143 }
2144 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2145 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2146 }
2147 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2148 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2149 }
2150 break;
2151
2152 case TGSI_OPCODE_NRM:
2153 assert (0);
2154 break;
2155
2156 case TGSI_OPCODE_DIV:
2157 assert( 0 );
2158 break;
2159
2160 case TGSI_OPCODE_DP2:
2161 FETCH( &r[0], 0, CHAN_X );
2162 FETCH( &r[1], 1, CHAN_X );
2163 micro_mul( &r[0], &r[0], &r[1] );
2164
2165 FETCH( &r[1], 0, CHAN_Y );
2166 FETCH( &r[2], 1, CHAN_Y );
2167 micro_mul( &r[1], &r[1], &r[2] );
2168 micro_add( &r[0], &r[0], &r[1] );
2169
2170 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2171 STORE( &r[0], 0, chan_index );
2172 }
2173 break;
2174
2175 case TGSI_OPCODE_IF:
2176 /* push CondMask */
2177 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2178 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2179 FETCH( &r[0], 0, CHAN_X );
2180 /* update CondMask */
2181 if( ! r[0].u[0] ) {
2182 mach->CondMask &= ~0x1;
2183 }
2184 if( ! r[0].u[1] ) {
2185 mach->CondMask &= ~0x2;
2186 }
2187 if( ! r[0].u[2] ) {
2188 mach->CondMask &= ~0x4;
2189 }
2190 if( ! r[0].u[3] ) {
2191 mach->CondMask &= ~0x8;
2192 }
2193 UPDATE_EXEC_MASK(mach);
2194 /* Todo: If CondMask==0, jump to ELSE */
2195 break;
2196
2197 case TGSI_OPCODE_ELSE:
2198 /* invert CondMask wrt previous mask */
2199 {
2200 uint prevMask;
2201 assert(mach->CondStackTop > 0);
2202 prevMask = mach->CondStack[mach->CondStackTop - 1];
2203 mach->CondMask = ~mach->CondMask & prevMask;
2204 UPDATE_EXEC_MASK(mach);
2205 /* Todo: If CondMask==0, jump to ENDIF */
2206 }
2207 break;
2208
2209 case TGSI_OPCODE_ENDIF:
2210 /* pop CondMask */
2211 assert(mach->CondStackTop > 0);
2212 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2213 UPDATE_EXEC_MASK(mach);
2214 break;
2215
2216 case TGSI_OPCODE_END:
2217 /* halt execution */
2218 *pc = -1;
2219 break;
2220
2221 case TGSI_OPCODE_REP:
2222 assert (0);
2223 break;
2224
2225 case TGSI_OPCODE_ENDREP:
2226 assert (0);
2227 break;
2228
2229 case TGSI_OPCODE_PUSHA:
2230 assert (0);
2231 break;
2232
2233 case TGSI_OPCODE_POPA:
2234 assert (0);
2235 break;
2236
2237 case TGSI_OPCODE_CEIL:
2238 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2239 FETCH( &r[0], 0, chan_index );
2240 micro_ceil( &r[0], &r[0] );
2241 STORE( &r[0], 0, chan_index );
2242 }
2243 break;
2244
2245 case TGSI_OPCODE_I2F:
2246 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2247 FETCH( &r[0], 0, chan_index );
2248 micro_i2f( &r[0], &r[0] );
2249 STORE( &r[0], 0, chan_index );
2250 }
2251 break;
2252
2253 case TGSI_OPCODE_NOT:
2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2255 FETCH( &r[0], 0, chan_index );
2256 micro_not( &r[0], &r[0] );
2257 STORE( &r[0], 0, chan_index );
2258 }
2259 break;
2260
2261 case TGSI_OPCODE_TRUNC:
2262 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2263 FETCH( &r[0], 0, chan_index );
2264 micro_trunc( &r[0], &r[0] );
2265 STORE( &r[0], 0, chan_index );
2266 }
2267 break;
2268
2269 case TGSI_OPCODE_SHL:
2270 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2271 FETCH( &r[0], 0, chan_index );
2272 FETCH( &r[1], 1, chan_index );
2273 micro_shl( &r[0], &r[0], &r[1] );
2274 STORE( &r[0], 0, chan_index );
2275 }
2276 break;
2277
2278 case TGSI_OPCODE_SHR:
2279 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2280 FETCH( &r[0], 0, chan_index );
2281 FETCH( &r[1], 1, chan_index );
2282 micro_ishr( &r[0], &r[0], &r[1] );
2283 STORE( &r[0], 0, chan_index );
2284 }
2285 break;
2286
2287 case TGSI_OPCODE_AND:
2288 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2289 FETCH( &r[0], 0, chan_index );
2290 FETCH( &r[1], 1, chan_index );
2291 micro_and( &r[0], &r[0], &r[1] );
2292 STORE( &r[0], 0, chan_index );
2293 }
2294 break;
2295
2296 case TGSI_OPCODE_OR:
2297 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2298 FETCH( &r[0], 0, chan_index );
2299 FETCH( &r[1], 1, chan_index );
2300 micro_or( &r[0], &r[0], &r[1] );
2301 STORE( &r[0], 0, chan_index );
2302 }
2303 break;
2304
2305 case TGSI_OPCODE_MOD:
2306 assert (0);
2307 break;
2308
2309 case TGSI_OPCODE_XOR:
2310 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2311 FETCH( &r[0], 0, chan_index );
2312 FETCH( &r[1], 1, chan_index );
2313 micro_xor( &r[0], &r[0], &r[1] );
2314 STORE( &r[0], 0, chan_index );
2315 }
2316 break;
2317
2318 case TGSI_OPCODE_SAD:
2319 assert (0);
2320 break;
2321
2322 case TGSI_OPCODE_TXF:
2323 assert (0);
2324 break;
2325
2326 case TGSI_OPCODE_TXQ:
2327 assert (0);
2328 break;
2329
2330 case TGSI_OPCODE_EMIT:
2331 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2332 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2333 break;
2334
2335 case TGSI_OPCODE_ENDPRIM:
2336 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2337 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2338 break;
2339
2340 case TGSI_OPCODE_LOOP:
2341 /* fall-through (for now) */
2342 case TGSI_OPCODE_BGNLOOP2:
2343 /* push LoopMask and ContMasks */
2344 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2345 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2346 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2347 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2348 break;
2349
2350 case TGSI_OPCODE_ENDLOOP:
2351 /* fall-through (for now at least) */
2352 case TGSI_OPCODE_ENDLOOP2:
2353 /* Restore ContMask, but don't pop */
2354 assert(mach->ContStackTop > 0);
2355 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2356 if (mach->LoopMask) {
2357 /* repeat loop: jump to instruction just past BGNLOOP */
2358 *pc = inst->InstructionExtLabel.Label + 1;
2359 }
2360 else {
2361 /* exit loop: pop LoopMask */
2362 assert(mach->LoopStackTop > 0);
2363 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2364 /* pop ContMask */
2365 assert(mach->ContStackTop > 0);
2366 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2367 }
2368 UPDATE_EXEC_MASK(mach);
2369 break;
2370
2371 case TGSI_OPCODE_BRK:
2372 /* turn off loop channels for each enabled exec channel */
2373 mach->LoopMask &= ~mach->ExecMask;
2374 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2375 UPDATE_EXEC_MASK(mach);
2376 break;
2377
2378 case TGSI_OPCODE_CONT:
2379 /* turn off cont channels for each enabled exec channel */
2380 mach->ContMask &= ~mach->ExecMask;
2381 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2382 UPDATE_EXEC_MASK(mach);
2383 break;
2384
2385 case TGSI_OPCODE_BGNSUB:
2386 /* no-op */
2387 break;
2388
2389 case TGSI_OPCODE_ENDSUB:
2390 /* no-op */
2391 break;
2392
2393 case TGSI_OPCODE_NOISE1:
2394 assert( 0 );
2395 break;
2396
2397 case TGSI_OPCODE_NOISE2:
2398 assert( 0 );
2399 break;
2400
2401 case TGSI_OPCODE_NOISE3:
2402 assert( 0 );
2403 break;
2404
2405 case TGSI_OPCODE_NOISE4:
2406 assert( 0 );
2407 break;
2408
2409 case TGSI_OPCODE_NOP:
2410 break;
2411
2412 default:
2413 assert( 0 );
2414 }
2415 }
2416
2417
2418 /**
2419 * Run TGSI interpreter.
2420 * \return bitmask of "alive" quad components
2421 */
2422 uint
2423 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2424 {
2425 uint i;
2426 int pc = 0;
2427
2428 mach->CondMask = 0xf;
2429 mach->LoopMask = 0xf;
2430 mach->ContMask = 0xf;
2431 mach->FuncMask = 0xf;
2432 mach->ExecMask = 0xf;
2433
2434 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2435 assert(mach->CondStackTop == 0);
2436 assert(mach->LoopStackTop == 0);
2437 assert(mach->ContStackTop == 0);
2438 assert(mach->CallStackTop == 0);
2439
2440 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2441 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2442
2443 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2444 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2445 mach->Primitives[0] = 0;
2446 }
2447
2448
2449 /* execute declarations (interpolants) */
2450 for (i = 0; i < mach->NumDeclarations; i++) {
2451 exec_declaration( mach, mach->Declarations+i );
2452 }
2453
2454 /* execute instructions, until pc is set to -1 */
2455 while (pc != -1) {
2456 assert(pc < mach->NumInstructions);
2457 exec_instruction( mach, mach->Instructions + pc, &pc );
2458 }
2459
2460 #if 0
2461 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2462 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2463 /*
2464 * Scale back depth component.
2465 */
2466 for (i = 0; i < 4; i++)
2467 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2468 }
2469 #endif
2470
2471 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2472 }
2473
2474