Merge commit 'origin/gallium-0.1' into gallium-0.1
[mesa.git] / src / gallium / auxiliary / tgsi / exec / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/util/tgsi_parse.h"
58 #include "tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 /*
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
68 */
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
92
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
95
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
98
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
101
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
105
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
109
110
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
114
115
116 #define CHAN_X 0
117 #define CHAN_Y 1
118 #define CHAN_Z 2
119 #define CHAN_W 3
120
121
122
123 /**
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
127 */
128 void
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine *mach,
131 const struct tgsi_token *tokens,
132 uint numSamplers,
133 struct tgsi_sampler *samplers)
134 {
135 uint k;
136 struct tgsi_parse_context parse;
137 struct tgsi_exec_labels *labels = &mach->Labels;
138 struct tgsi_full_instruction *instructions;
139 struct tgsi_full_declaration *declarations;
140 uint maxInstructions = 10, numInstructions = 0;
141 uint maxDeclarations = 10, numDeclarations = 0;
142 uint instno = 0;
143
144 #if 0
145 tgsi_dump(tokens, 0);
146 #endif
147
148 mach->Tokens = tokens;
149 mach->Samplers = samplers;
150
151 k = tgsi_parse_init (&parse, mach->Tokens);
152 if (k != TGSI_PARSE_OK) {
153 debug_printf( "Problem parsing!\n" );
154 return;
155 }
156
157 mach->Processor = parse.FullHeader.Processor.Processor;
158 mach->ImmLimit = 0;
159 labels->count = 0;
160
161 declarations = (struct tgsi_full_declaration *)
162 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
163
164 instructions = (struct tgsi_full_instruction *)
165 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
166
167
168 while( !tgsi_parse_end_of_tokens( &parse ) ) {
169 uint pointer = parse.Position;
170 uint i;
171
172 tgsi_parse_token( &parse );
173 switch( parse.FullToken.Token.Type ) {
174 case TGSI_TOKEN_TYPE_DECLARATION:
175 /* save expanded declaration */
176 if (numDeclarations == maxDeclarations) {
177 declarations = REALLOC(declarations,
178 maxDeclarations
179 * sizeof(struct tgsi_full_declaration),
180 (maxDeclarations + 10)
181 * sizeof(struct tgsi_full_declaration));
182 maxDeclarations += 10;
183 }
184 memcpy(declarations + numDeclarations,
185 &parse.FullToken.FullDeclaration,
186 sizeof(declarations[0]));
187 numDeclarations++;
188 break;
189
190 case TGSI_TOKEN_TYPE_IMMEDIATE:
191 {
192 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
193 assert( size % 4 == 0 );
194 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
195
196 for( i = 0; i < size; i++ ) {
197 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
198 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
199 }
200 mach->ImmLimit += size / 4;
201 }
202 break;
203
204 case TGSI_TOKEN_TYPE_INSTRUCTION:
205 assert( labels->count < 128 );
206
207 labels->labels[labels->count][0] = instno;
208 labels->labels[labels->count][1] = pointer;
209 labels->count++;
210
211 /* save expanded instruction */
212 if (numInstructions == maxInstructions) {
213 instructions = REALLOC(instructions,
214 maxInstructions
215 * sizeof(struct tgsi_full_instruction),
216 (maxInstructions + 10)
217 * sizeof(struct tgsi_full_instruction));
218 maxInstructions += 10;
219 }
220 memcpy(instructions + numInstructions,
221 &parse.FullToken.FullInstruction,
222 sizeof(instructions[0]));
223 numInstructions++;
224 break;
225
226 default:
227 assert( 0 );
228 }
229 }
230 tgsi_parse_free (&parse);
231
232 if (mach->Declarations) {
233 FREE( mach->Declarations );
234 }
235 mach->Declarations = declarations;
236 mach->NumDeclarations = numDeclarations;
237
238 if (mach->Instructions) {
239 FREE( mach->Instructions );
240 }
241 mach->Instructions = instructions;
242 mach->NumInstructions = numInstructions;
243 }
244
245
246 void
247 tgsi_exec_machine_init(
248 struct tgsi_exec_machine *mach )
249 {
250 uint i;
251
252 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
253 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
254
255 /* Setup constants. */
256 for( i = 0; i < 4; i++ ) {
257 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
258 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
259 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
260 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
261 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
262 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
263 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
264 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
265 }
266 }
267
268
269 void
270 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
271 {
272 if (mach->Instructions) {
273 FREE(mach->Instructions);
274 mach->Instructions = NULL;
275 mach->NumInstructions = 0;
276 }
277 if (mach->Declarations) {
278 FREE(mach->Declarations);
279 mach->Declarations = NULL;
280 mach->NumDeclarations = 0;
281 }
282 }
283
284
285 static void
286 micro_abs(
287 union tgsi_exec_channel *dst,
288 const union tgsi_exec_channel *src )
289 {
290 dst->f[0] = (float) fabs( (double) src->f[0] );
291 dst->f[1] = (float) fabs( (double) src->f[1] );
292 dst->f[2] = (float) fabs( (double) src->f[2] );
293 dst->f[3] = (float) fabs( (double) src->f[3] );
294 }
295
296 static void
297 micro_add(
298 union tgsi_exec_channel *dst,
299 const union tgsi_exec_channel *src0,
300 const union tgsi_exec_channel *src1 )
301 {
302 dst->f[0] = src0->f[0] + src1->f[0];
303 dst->f[1] = src0->f[1] + src1->f[1];
304 dst->f[2] = src0->f[2] + src1->f[2];
305 dst->f[3] = src0->f[3] + src1->f[3];
306 }
307
308 static void
309 micro_iadd(
310 union tgsi_exec_channel *dst,
311 const union tgsi_exec_channel *src0,
312 const union tgsi_exec_channel *src1 )
313 {
314 dst->i[0] = src0->i[0] + src1->i[0];
315 dst->i[1] = src0->i[1] + src1->i[1];
316 dst->i[2] = src0->i[2] + src1->i[2];
317 dst->i[3] = src0->i[3] + src1->i[3];
318 }
319
320 static void
321 micro_and(
322 union tgsi_exec_channel *dst,
323 const union tgsi_exec_channel *src0,
324 const union tgsi_exec_channel *src1 )
325 {
326 dst->u[0] = src0->u[0] & src1->u[0];
327 dst->u[1] = src0->u[1] & src1->u[1];
328 dst->u[2] = src0->u[2] & src1->u[2];
329 dst->u[3] = src0->u[3] & src1->u[3];
330 }
331
332 static void
333 micro_ceil(
334 union tgsi_exec_channel *dst,
335 const union tgsi_exec_channel *src )
336 {
337 dst->f[0] = (float) ceil( (double) src->f[0] );
338 dst->f[1] = (float) ceil( (double) src->f[1] );
339 dst->f[2] = (float) ceil( (double) src->f[2] );
340 dst->f[3] = (float) ceil( (double) src->f[3] );
341 }
342
343 static void
344 micro_cos(
345 union tgsi_exec_channel *dst,
346 const union tgsi_exec_channel *src )
347 {
348 dst->f[0] = (float) cos( (double) src->f[0] );
349 dst->f[1] = (float) cos( (double) src->f[1] );
350 dst->f[2] = (float) cos( (double) src->f[2] );
351 dst->f[3] = (float) cos( (double) src->f[3] );
352 }
353
354 static void
355 micro_ddx(
356 union tgsi_exec_channel *dst,
357 const union tgsi_exec_channel *src )
358 {
359 dst->f[0] =
360 dst->f[1] =
361 dst->f[2] =
362 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
363 }
364
365 static void
366 micro_ddy(
367 union tgsi_exec_channel *dst,
368 const union tgsi_exec_channel *src )
369 {
370 dst->f[0] =
371 dst->f[1] =
372 dst->f[2] =
373 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
374 }
375
376 static void
377 micro_div(
378 union tgsi_exec_channel *dst,
379 const union tgsi_exec_channel *src0,
380 const union tgsi_exec_channel *src1 )
381 {
382 dst->f[0] = src0->f[0] / src1->f[0];
383 dst->f[1] = src0->f[1] / src1->f[1];
384 dst->f[2] = src0->f[2] / src1->f[2];
385 dst->f[3] = src0->f[3] / src1->f[3];
386 }
387
388 static void
389 micro_udiv(
390 union tgsi_exec_channel *dst,
391 const union tgsi_exec_channel *src0,
392 const union tgsi_exec_channel *src1 )
393 {
394 dst->u[0] = src0->u[0] / src1->u[0];
395 dst->u[1] = src0->u[1] / src1->u[1];
396 dst->u[2] = src0->u[2] / src1->u[2];
397 dst->u[3] = src0->u[3] / src1->u[3];
398 }
399
400 static void
401 micro_eq(
402 union tgsi_exec_channel *dst,
403 const union tgsi_exec_channel *src0,
404 const union tgsi_exec_channel *src1,
405 const union tgsi_exec_channel *src2,
406 const union tgsi_exec_channel *src3 )
407 {
408 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
409 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
410 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
411 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
412 }
413
414 static void
415 micro_ieq(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1,
419 const union tgsi_exec_channel *src2,
420 const union tgsi_exec_channel *src3 )
421 {
422 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
423 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
424 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
425 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
426 }
427
428 static void
429 micro_exp2(
430 union tgsi_exec_channel *dst,
431 const union tgsi_exec_channel *src)
432 {
433 dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
434 dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
435 dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
436 dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
437 }
438
439 static void
440 micro_f2it(
441 union tgsi_exec_channel *dst,
442 const union tgsi_exec_channel *src )
443 {
444 dst->i[0] = (int) src->f[0];
445 dst->i[1] = (int) src->f[1];
446 dst->i[2] = (int) src->f[2];
447 dst->i[3] = (int) src->f[3];
448 }
449
450 static void
451 micro_f2ut(
452 union tgsi_exec_channel *dst,
453 const union tgsi_exec_channel *src )
454 {
455 dst->u[0] = (uint) src->f[0];
456 dst->u[1] = (uint) src->f[1];
457 dst->u[2] = (uint) src->f[2];
458 dst->u[3] = (uint) src->f[3];
459 }
460
461 static void
462 micro_flr(
463 union tgsi_exec_channel *dst,
464 const union tgsi_exec_channel *src )
465 {
466 dst->f[0] = (float) floor( (double) src->f[0] );
467 dst->f[1] = (float) floor( (double) src->f[1] );
468 dst->f[2] = (float) floor( (double) src->f[2] );
469 dst->f[3] = (float) floor( (double) src->f[3] );
470 }
471
472 static void
473 micro_frc(
474 union tgsi_exec_channel *dst,
475 const union tgsi_exec_channel *src )
476 {
477 dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
478 dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
479 dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
480 dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
481 }
482
483 static void
484 micro_ge(
485 union tgsi_exec_channel *dst,
486 const union tgsi_exec_channel *src0,
487 const union tgsi_exec_channel *src1,
488 const union tgsi_exec_channel *src2,
489 const union tgsi_exec_channel *src3 )
490 {
491 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
492 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
493 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
494 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
495 }
496
497 static void
498 micro_i2f(
499 union tgsi_exec_channel *dst,
500 const union tgsi_exec_channel *src )
501 {
502 dst->f[0] = (float) src->i[0];
503 dst->f[1] = (float) src->i[1];
504 dst->f[2] = (float) src->i[2];
505 dst->f[3] = (float) src->i[3];
506 }
507
508 static void
509 micro_lg2(
510 union tgsi_exec_channel *dst,
511 const union tgsi_exec_channel *src )
512 {
513 dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
514 dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
515 dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
516 dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
517 }
518
519 static void
520 micro_lt(
521 union tgsi_exec_channel *dst,
522 const union tgsi_exec_channel *src0,
523 const union tgsi_exec_channel *src1,
524 const union tgsi_exec_channel *src2,
525 const union tgsi_exec_channel *src3 )
526 {
527 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
528 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
529 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
530 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
531 }
532
533 static void
534 micro_ilt(
535 union tgsi_exec_channel *dst,
536 const union tgsi_exec_channel *src0,
537 const union tgsi_exec_channel *src1,
538 const union tgsi_exec_channel *src2,
539 const union tgsi_exec_channel *src3 )
540 {
541 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
542 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
543 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
544 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
545 }
546
547 static void
548 micro_ult(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src0,
551 const union tgsi_exec_channel *src1,
552 const union tgsi_exec_channel *src2,
553 const union tgsi_exec_channel *src3 )
554 {
555 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
556 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
557 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
558 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
559 }
560
561 static void
562 micro_max(
563 union tgsi_exec_channel *dst,
564 const union tgsi_exec_channel *src0,
565 const union tgsi_exec_channel *src1 )
566 {
567 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
568 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
569 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
570 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
571 }
572
573 static void
574 micro_imax(
575 union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src0,
577 const union tgsi_exec_channel *src1 )
578 {
579 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
580 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
581 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
582 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
583 }
584
585 static void
586 micro_umax(
587 union tgsi_exec_channel *dst,
588 const union tgsi_exec_channel *src0,
589 const union tgsi_exec_channel *src1 )
590 {
591 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
592 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
593 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
594 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
595 }
596
597 static void
598 micro_min(
599 union tgsi_exec_channel *dst,
600 const union tgsi_exec_channel *src0,
601 const union tgsi_exec_channel *src1 )
602 {
603 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
604 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
605 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
606 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
607 }
608
609 static void
610 micro_imin(
611 union tgsi_exec_channel *dst,
612 const union tgsi_exec_channel *src0,
613 const union tgsi_exec_channel *src1 )
614 {
615 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
616 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
617 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
618 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
619 }
620
621 static void
622 micro_umin(
623 union tgsi_exec_channel *dst,
624 const union tgsi_exec_channel *src0,
625 const union tgsi_exec_channel *src1 )
626 {
627 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
628 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
629 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
630 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
631 }
632
633 static void
634 micro_umod(
635 union tgsi_exec_channel *dst,
636 const union tgsi_exec_channel *src0,
637 const union tgsi_exec_channel *src1 )
638 {
639 dst->u[0] = src0->u[0] % src1->u[0];
640 dst->u[1] = src0->u[1] % src1->u[1];
641 dst->u[2] = src0->u[2] % src1->u[2];
642 dst->u[3] = src0->u[3] % src1->u[3];
643 }
644
645 static void
646 micro_mul(
647 union tgsi_exec_channel *dst,
648 const union tgsi_exec_channel *src0,
649 const union tgsi_exec_channel *src1 )
650 {
651 dst->f[0] = src0->f[0] * src1->f[0];
652 dst->f[1] = src0->f[1] * src1->f[1];
653 dst->f[2] = src0->f[2] * src1->f[2];
654 dst->f[3] = src0->f[3] * src1->f[3];
655 }
656
657 static void
658 micro_imul(
659 union tgsi_exec_channel *dst,
660 const union tgsi_exec_channel *src0,
661 const union tgsi_exec_channel *src1 )
662 {
663 dst->i[0] = src0->i[0] * src1->i[0];
664 dst->i[1] = src0->i[1] * src1->i[1];
665 dst->i[2] = src0->i[2] * src1->i[2];
666 dst->i[3] = src0->i[3] * src1->i[3];
667 }
668
669 static void
670 micro_imul64(
671 union tgsi_exec_channel *dst0,
672 union tgsi_exec_channel *dst1,
673 const union tgsi_exec_channel *src0,
674 const union tgsi_exec_channel *src1 )
675 {
676 dst1->i[0] = src0->i[0] * src1->i[0];
677 dst1->i[1] = src0->i[1] * src1->i[1];
678 dst1->i[2] = src0->i[2] * src1->i[2];
679 dst1->i[3] = src0->i[3] * src1->i[3];
680 dst0->i[0] = 0;
681 dst0->i[1] = 0;
682 dst0->i[2] = 0;
683 dst0->i[3] = 0;
684 }
685
686 static void
687 micro_umul64(
688 union tgsi_exec_channel *dst0,
689 union tgsi_exec_channel *dst1,
690 const union tgsi_exec_channel *src0,
691 const union tgsi_exec_channel *src1 )
692 {
693 dst1->u[0] = src0->u[0] * src1->u[0];
694 dst1->u[1] = src0->u[1] * src1->u[1];
695 dst1->u[2] = src0->u[2] * src1->u[2];
696 dst1->u[3] = src0->u[3] * src1->u[3];
697 dst0->u[0] = 0;
698 dst0->u[1] = 0;
699 dst0->u[2] = 0;
700 dst0->u[3] = 0;
701 }
702
703 static void
704 micro_movc(
705 union tgsi_exec_channel *dst,
706 const union tgsi_exec_channel *src0,
707 const union tgsi_exec_channel *src1,
708 const union tgsi_exec_channel *src2 )
709 {
710 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
711 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
712 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
713 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
714 }
715
716 static void
717 micro_neg(
718 union tgsi_exec_channel *dst,
719 const union tgsi_exec_channel *src )
720 {
721 dst->f[0] = -src->f[0];
722 dst->f[1] = -src->f[1];
723 dst->f[2] = -src->f[2];
724 dst->f[3] = -src->f[3];
725 }
726
727 static void
728 micro_ineg(
729 union tgsi_exec_channel *dst,
730 const union tgsi_exec_channel *src )
731 {
732 dst->i[0] = -src->i[0];
733 dst->i[1] = -src->i[1];
734 dst->i[2] = -src->i[2];
735 dst->i[3] = -src->i[3];
736 }
737
738 static void
739 micro_not(
740 union tgsi_exec_channel *dst,
741 const union tgsi_exec_channel *src )
742 {
743 dst->u[0] = ~src->u[0];
744 dst->u[1] = ~src->u[1];
745 dst->u[2] = ~src->u[2];
746 dst->u[3] = ~src->u[3];
747 }
748
749 static void
750 micro_or(
751 union tgsi_exec_channel *dst,
752 const union tgsi_exec_channel *src0,
753 const union tgsi_exec_channel *src1 )
754 {
755 dst->u[0] = src0->u[0] | src1->u[0];
756 dst->u[1] = src0->u[1] | src1->u[1];
757 dst->u[2] = src0->u[2] | src1->u[2];
758 dst->u[3] = src0->u[3] | src1->u[3];
759 }
760
761 static void
762 micro_pow(
763 union tgsi_exec_channel *dst,
764 const union tgsi_exec_channel *src0,
765 const union tgsi_exec_channel *src1 )
766 {
767 dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
768 dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
769 dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
770 dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
771 }
772
773 static void
774 micro_rnd(
775 union tgsi_exec_channel *dst,
776 const union tgsi_exec_channel *src )
777 {
778 dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
779 dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
780 dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
781 dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
782 }
783
784 static void
785 micro_shl(
786 union tgsi_exec_channel *dst,
787 const union tgsi_exec_channel *src0,
788 const union tgsi_exec_channel *src1 )
789 {
790 dst->i[0] = src0->i[0] << src1->i[0];
791 dst->i[1] = src0->i[1] << src1->i[1];
792 dst->i[2] = src0->i[2] << src1->i[2];
793 dst->i[3] = src0->i[3] << src1->i[3];
794 }
795
796 static void
797 micro_ishr(
798 union tgsi_exec_channel *dst,
799 const union tgsi_exec_channel *src0,
800 const union tgsi_exec_channel *src1 )
801 {
802 dst->i[0] = src0->i[0] >> src1->i[0];
803 dst->i[1] = src0->i[1] >> src1->i[1];
804 dst->i[2] = src0->i[2] >> src1->i[2];
805 dst->i[3] = src0->i[3] >> src1->i[3];
806 }
807
808 static void
809 micro_trunc(
810 union tgsi_exec_channel *dst,
811 const union tgsi_exec_channel *src0 )
812 {
813 dst->f[0] = (float) (int) src0->f[0];
814 dst->f[1] = (float) (int) src0->f[1];
815 dst->f[2] = (float) (int) src0->f[2];
816 dst->f[3] = (float) (int) src0->f[3];
817 }
818
819 static void
820 micro_ushr(
821 union tgsi_exec_channel *dst,
822 const union tgsi_exec_channel *src0,
823 const union tgsi_exec_channel *src1 )
824 {
825 dst->u[0] = src0->u[0] >> src1->u[0];
826 dst->u[1] = src0->u[1] >> src1->u[1];
827 dst->u[2] = src0->u[2] >> src1->u[2];
828 dst->u[3] = src0->u[3] >> src1->u[3];
829 }
830
831 static void
832 micro_sin(
833 union tgsi_exec_channel *dst,
834 const union tgsi_exec_channel *src )
835 {
836 dst->f[0] = (float) sin( (double) src->f[0] );
837 dst->f[1] = (float) sin( (double) src->f[1] );
838 dst->f[2] = (float) sin( (double) src->f[2] );
839 dst->f[3] = (float) sin( (double) src->f[3] );
840 }
841
842 static void
843 micro_sqrt( union tgsi_exec_channel *dst,
844 const union tgsi_exec_channel *src )
845 {
846 dst->f[0] = (float) sqrt( (double) src->f[0] );
847 dst->f[1] = (float) sqrt( (double) src->f[1] );
848 dst->f[2] = (float) sqrt( (double) src->f[2] );
849 dst->f[3] = (float) sqrt( (double) src->f[3] );
850 }
851
852 static void
853 micro_sub(
854 union tgsi_exec_channel *dst,
855 const union tgsi_exec_channel *src0,
856 const union tgsi_exec_channel *src1 )
857 {
858 dst->f[0] = src0->f[0] - src1->f[0];
859 dst->f[1] = src0->f[1] - src1->f[1];
860 dst->f[2] = src0->f[2] - src1->f[2];
861 dst->f[3] = src0->f[3] - src1->f[3];
862 }
863
864 static void
865 micro_u2f(
866 union tgsi_exec_channel *dst,
867 const union tgsi_exec_channel *src )
868 {
869 dst->f[0] = (float) src->u[0];
870 dst->f[1] = (float) src->u[1];
871 dst->f[2] = (float) src->u[2];
872 dst->f[3] = (float) src->u[3];
873 }
874
875 static void
876 micro_xor(
877 union tgsi_exec_channel *dst,
878 const union tgsi_exec_channel *src0,
879 const union tgsi_exec_channel *src1 )
880 {
881 dst->u[0] = src0->u[0] ^ src1->u[0];
882 dst->u[1] = src0->u[1] ^ src1->u[1];
883 dst->u[2] = src0->u[2] ^ src1->u[2];
884 dst->u[3] = src0->u[3] ^ src1->u[3];
885 }
886
887 static void
888 fetch_src_file_channel(
889 const struct tgsi_exec_machine *mach,
890 const uint file,
891 const uint swizzle,
892 const union tgsi_exec_channel *index,
893 union tgsi_exec_channel *chan )
894 {
895 switch( swizzle ) {
896 case TGSI_EXTSWIZZLE_X:
897 case TGSI_EXTSWIZZLE_Y:
898 case TGSI_EXTSWIZZLE_Z:
899 case TGSI_EXTSWIZZLE_W:
900 switch( file ) {
901 case TGSI_FILE_CONSTANT:
902 chan->f[0] = mach->Consts[index->i[0]][swizzle];
903 chan->f[1] = mach->Consts[index->i[1]][swizzle];
904 chan->f[2] = mach->Consts[index->i[2]][swizzle];
905 chan->f[3] = mach->Consts[index->i[3]][swizzle];
906 break;
907
908 case TGSI_FILE_INPUT:
909 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
910 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
911 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
912 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
913 break;
914
915 case TGSI_FILE_TEMPORARY:
916 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
917 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
918 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
919 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
920 break;
921
922 case TGSI_FILE_IMMEDIATE:
923 assert( index->i[0] < (int) mach->ImmLimit );
924 chan->f[0] = mach->Imms[index->i[0]][swizzle];
925 assert( index->i[1] < (int) mach->ImmLimit );
926 chan->f[1] = mach->Imms[index->i[1]][swizzle];
927 assert( index->i[2] < (int) mach->ImmLimit );
928 chan->f[2] = mach->Imms[index->i[2]][swizzle];
929 assert( index->i[3] < (int) mach->ImmLimit );
930 chan->f[3] = mach->Imms[index->i[3]][swizzle];
931 break;
932
933 case TGSI_FILE_ADDRESS:
934 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
935 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
936 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
937 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
938 break;
939
940 case TGSI_FILE_OUTPUT:
941 /* vertex/fragment output vars can be read too */
942 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
943 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
944 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
945 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
946 break;
947
948 default:
949 assert( 0 );
950 }
951 break;
952
953 case TGSI_EXTSWIZZLE_ZERO:
954 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
955 break;
956
957 case TGSI_EXTSWIZZLE_ONE:
958 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
959 break;
960
961 default:
962 assert( 0 );
963 }
964 }
965
966 static void
967 fetch_source(
968 const struct tgsi_exec_machine *mach,
969 union tgsi_exec_channel *chan,
970 const struct tgsi_full_src_register *reg,
971 const uint chan_index )
972 {
973 union tgsi_exec_channel index;
974 uint swizzle;
975
976 index.i[0] =
977 index.i[1] =
978 index.i[2] =
979 index.i[3] = reg->SrcRegister.Index;
980
981 if (reg->SrcRegister.Indirect) {
982 union tgsi_exec_channel index2;
983 union tgsi_exec_channel indir_index;
984
985 index2.i[0] =
986 index2.i[1] =
987 index2.i[2] =
988 index2.i[3] = reg->SrcRegisterInd.Index;
989
990 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
991 fetch_src_file_channel(
992 mach,
993 reg->SrcRegisterInd.File,
994 swizzle,
995 &index2,
996 &indir_index );
997
998 index.i[0] += indir_index.i[0];
999 index.i[1] += indir_index.i[1];
1000 index.i[2] += indir_index.i[2];
1001 index.i[3] += indir_index.i[3];
1002 }
1003
1004 if( reg->SrcRegister.Dimension ) {
1005 switch( reg->SrcRegister.File ) {
1006 case TGSI_FILE_INPUT:
1007 index.i[0] *= 17;
1008 index.i[1] *= 17;
1009 index.i[2] *= 17;
1010 index.i[3] *= 17;
1011 break;
1012 case TGSI_FILE_CONSTANT:
1013 index.i[0] *= 4096;
1014 index.i[1] *= 4096;
1015 index.i[2] *= 4096;
1016 index.i[3] *= 4096;
1017 break;
1018 default:
1019 assert( 0 );
1020 }
1021
1022 index.i[0] += reg->SrcRegisterDim.Index;
1023 index.i[1] += reg->SrcRegisterDim.Index;
1024 index.i[2] += reg->SrcRegisterDim.Index;
1025 index.i[3] += reg->SrcRegisterDim.Index;
1026
1027 if (reg->SrcRegisterDim.Indirect) {
1028 union tgsi_exec_channel index2;
1029 union tgsi_exec_channel indir_index;
1030
1031 index2.i[0] =
1032 index2.i[1] =
1033 index2.i[2] =
1034 index2.i[3] = reg->SrcRegisterDimInd.Index;
1035
1036 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1037 fetch_src_file_channel(
1038 mach,
1039 reg->SrcRegisterDimInd.File,
1040 swizzle,
1041 &index2,
1042 &indir_index );
1043
1044 index.i[0] += indir_index.i[0];
1045 index.i[1] += indir_index.i[1];
1046 index.i[2] += indir_index.i[2];
1047 index.i[3] += indir_index.i[3];
1048 }
1049 }
1050
1051 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1052 fetch_src_file_channel(
1053 mach,
1054 reg->SrcRegister.File,
1055 swizzle,
1056 &index,
1057 chan );
1058
1059 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1060 case TGSI_UTIL_SIGN_CLEAR:
1061 micro_abs( chan, chan );
1062 break;
1063
1064 case TGSI_UTIL_SIGN_SET:
1065 micro_abs( chan, chan );
1066 micro_neg( chan, chan );
1067 break;
1068
1069 case TGSI_UTIL_SIGN_TOGGLE:
1070 micro_neg( chan, chan );
1071 break;
1072
1073 case TGSI_UTIL_SIGN_KEEP:
1074 break;
1075 }
1076
1077 if (reg->SrcRegisterExtMod.Complement) {
1078 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1079 }
1080 }
1081
1082 static void
1083 store_dest(
1084 struct tgsi_exec_machine *mach,
1085 const union tgsi_exec_channel *chan,
1086 const struct tgsi_full_dst_register *reg,
1087 const struct tgsi_full_instruction *inst,
1088 uint chan_index )
1089 {
1090 union tgsi_exec_channel *dst;
1091
1092 switch( reg->DstRegister.File ) {
1093 case TGSI_FILE_NULL:
1094 return;
1095
1096 case TGSI_FILE_OUTPUT:
1097 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1098 + reg->DstRegister.Index].xyzw[chan_index];
1099 break;
1100
1101 case TGSI_FILE_TEMPORARY:
1102 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1103 break;
1104
1105 case TGSI_FILE_ADDRESS:
1106 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1107 break;
1108
1109 default:
1110 assert( 0 );
1111 return;
1112 }
1113
1114 switch (inst->Instruction.Saturate)
1115 {
1116 case TGSI_SAT_NONE:
1117 if (mach->ExecMask & 0x1)
1118 dst->i[0] = chan->i[0];
1119 if (mach->ExecMask & 0x2)
1120 dst->i[1] = chan->i[1];
1121 if (mach->ExecMask & 0x4)
1122 dst->i[2] = chan->i[2];
1123 if (mach->ExecMask & 0x8)
1124 dst->i[3] = chan->i[3];
1125 break;
1126
1127 case TGSI_SAT_ZERO_ONE:
1128 /* XXX need to obey ExecMask here */
1129 micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1130 micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
1131 break;
1132
1133 case TGSI_SAT_MINUS_PLUS_ONE:
1134 assert( 0 );
1135 break;
1136
1137 default:
1138 assert( 0 );
1139 }
1140 }
1141
1142 #define FETCH(VAL,INDEX,CHAN)\
1143 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1144
1145 #define STORE(VAL,INDEX,CHAN)\
1146 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1147
1148
1149 /**
1150 * Execute ARB-style KIL which is predicated by a src register.
1151 * Kill fragment if any of the four values is less than zero.
1152 */
1153 static void
1154 exec_kilp(struct tgsi_exec_machine *mach,
1155 const struct tgsi_full_instruction *inst)
1156 {
1157 uint uniquemask;
1158 uint chan_index;
1159 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1160 union tgsi_exec_channel r[1];
1161
1162 /* This mask stores component bits that were already tested. Note that
1163 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1164 * tested. */
1165 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1166
1167 for (chan_index = 0; chan_index < 4; chan_index++)
1168 {
1169 uint swizzle;
1170 uint i;
1171
1172 /* unswizzle channel */
1173 swizzle = tgsi_util_get_full_src_register_extswizzle (
1174 &inst->FullSrcRegisters[0],
1175 chan_index);
1176
1177 /* check if the component has not been already tested */
1178 if (uniquemask & (1 << swizzle))
1179 continue;
1180 uniquemask |= 1 << swizzle;
1181
1182 FETCH(&r[0], 0, chan_index);
1183 for (i = 0; i < 4; i++)
1184 if (r[0].f[i] < 0.0f)
1185 kilmask |= 1 << i;
1186 }
1187
1188 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1189 }
1190
1191
1192 /*
1193 * Fetch a texel using STR texture coordinates.
1194 */
1195 static void
1196 fetch_texel( struct tgsi_sampler *sampler,
1197 const union tgsi_exec_channel *s,
1198 const union tgsi_exec_channel *t,
1199 const union tgsi_exec_channel *p,
1200 float lodbias, /* XXX should be float[4] */
1201 union tgsi_exec_channel *r,
1202 union tgsi_exec_channel *g,
1203 union tgsi_exec_channel *b,
1204 union tgsi_exec_channel *a )
1205 {
1206 uint j;
1207 float rgba[NUM_CHANNELS][QUAD_SIZE];
1208
1209 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1210
1211 for (j = 0; j < 4; j++) {
1212 r->f[j] = rgba[0][j];
1213 g->f[j] = rgba[1][j];
1214 b->f[j] = rgba[2][j];
1215 a->f[j] = rgba[3][j];
1216 }
1217 }
1218
1219
1220 static void
1221 exec_tex(struct tgsi_exec_machine *mach,
1222 const struct tgsi_full_instruction *inst,
1223 boolean biasLod)
1224 {
1225 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1226 union tgsi_exec_channel r[8];
1227 uint chan_index;
1228 float lodBias;
1229
1230 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1231
1232 switch (inst->InstructionExtTexture.Texture) {
1233 case TGSI_TEXTURE_1D:
1234
1235 FETCH(&r[0], 0, CHAN_X);
1236
1237 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1238 case TGSI_EXTSWIZZLE_W:
1239 FETCH(&r[1], 0, CHAN_W);
1240 micro_div( &r[0], &r[0], &r[1] );
1241 break;
1242
1243 case TGSI_EXTSWIZZLE_ONE:
1244 break;
1245
1246 default:
1247 assert (0);
1248 }
1249
1250 if (biasLod) {
1251 FETCH(&r[1], 0, CHAN_W);
1252 lodBias = r[2].f[0];
1253 }
1254 else
1255 lodBias = 0.0;
1256
1257 fetch_texel(&mach->Samplers[unit],
1258 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1259 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1260 break;
1261
1262 case TGSI_TEXTURE_2D:
1263 case TGSI_TEXTURE_RECT:
1264
1265 FETCH(&r[0], 0, CHAN_X);
1266 FETCH(&r[1], 0, CHAN_Y);
1267 FETCH(&r[2], 0, CHAN_Z);
1268
1269 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1270 case TGSI_EXTSWIZZLE_W:
1271 FETCH(&r[3], 0, CHAN_W);
1272 micro_div( &r[0], &r[0], &r[3] );
1273 micro_div( &r[1], &r[1], &r[3] );
1274 micro_div( &r[2], &r[2], &r[3] );
1275 break;
1276
1277 case TGSI_EXTSWIZZLE_ONE:
1278 break;
1279
1280 default:
1281 assert (0);
1282 }
1283
1284 if (biasLod) {
1285 FETCH(&r[3], 0, CHAN_W);
1286 lodBias = r[3].f[0];
1287 }
1288 else
1289 lodBias = 0.0;
1290
1291 fetch_texel(&mach->Samplers[unit],
1292 &r[0], &r[1], &r[2], lodBias, /* inputs */
1293 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1294 break;
1295
1296 case TGSI_TEXTURE_3D:
1297 case TGSI_TEXTURE_CUBE:
1298
1299 FETCH(&r[0], 0, CHAN_X);
1300 FETCH(&r[1], 0, CHAN_Y);
1301 FETCH(&r[2], 0, CHAN_Z);
1302
1303 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1304 case TGSI_EXTSWIZZLE_W:
1305 FETCH(&r[3], 0, CHAN_W);
1306 micro_div( &r[0], &r[0], &r[3] );
1307 micro_div( &r[1], &r[1], &r[3] );
1308 micro_div( &r[2], &r[2], &r[3] );
1309 break;
1310
1311 case TGSI_EXTSWIZZLE_ONE:
1312 break;
1313
1314 default:
1315 assert (0);
1316 }
1317
1318 if (biasLod) {
1319 FETCH(&r[3], 0, CHAN_W);
1320 lodBias = r[3].f[0];
1321 }
1322 else
1323 lodBias = 0.0;
1324
1325 fetch_texel(&mach->Samplers[unit],
1326 &r[0], &r[1], &r[2], lodBias,
1327 &r[0], &r[1], &r[2], &r[3]);
1328 break;
1329
1330 default:
1331 assert (0);
1332 }
1333
1334 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1335 STORE( &r[chan_index], 0, chan_index );
1336 }
1337 }
1338
1339
1340 /**
1341 * Evaluate a constant-valued coefficient at the position of the
1342 * current quad.
1343 */
1344 static void
1345 eval_constant_coef(
1346 struct tgsi_exec_machine *mach,
1347 unsigned attrib,
1348 unsigned chan )
1349 {
1350 unsigned i;
1351
1352 for( i = 0; i < QUAD_SIZE; i++ ) {
1353 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1354 }
1355 }
1356
1357 /**
1358 * Evaluate a linear-valued coefficient at the position of the
1359 * current quad.
1360 */
1361 static void
1362 eval_linear_coef(
1363 struct tgsi_exec_machine *mach,
1364 unsigned attrib,
1365 unsigned chan )
1366 {
1367 const float x = mach->QuadPos.xyzw[0].f[0];
1368 const float y = mach->QuadPos.xyzw[1].f[0];
1369 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1370 const float dady = mach->InterpCoefs[attrib].dady[chan];
1371 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1372 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1373 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1374 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1375 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1376 }
1377
1378 /**
1379 * Evaluate a perspective-valued coefficient at the position of the
1380 * current quad.
1381 */
1382 static void
1383 eval_perspective_coef(
1384 struct tgsi_exec_machine *mach,
1385 unsigned attrib,
1386 unsigned chan )
1387 {
1388 const float x = mach->QuadPos.xyzw[0].f[0];
1389 const float y = mach->QuadPos.xyzw[1].f[0];
1390 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1391 const float dady = mach->InterpCoefs[attrib].dady[chan];
1392 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1393 const float *w = mach->QuadPos.xyzw[3].f;
1394 /* divide by W here */
1395 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1396 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1397 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1398 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1399 }
1400
1401
1402 typedef void (* eval_coef_func)(
1403 struct tgsi_exec_machine *mach,
1404 unsigned attrib,
1405 unsigned chan );
1406
1407 static void
1408 exec_declaration(
1409 struct tgsi_exec_machine *mach,
1410 const struct tgsi_full_declaration *decl )
1411 {
1412 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1413 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1414 unsigned first, last, mask;
1415 eval_coef_func eval;
1416
1417 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
1418
1419 first = decl->u.DeclarationRange.First;
1420 last = decl->u.DeclarationRange.Last;
1421 mask = decl->Declaration.UsageMask;
1422
1423 switch( decl->Interpolation.Interpolate ) {
1424 case TGSI_INTERPOLATE_CONSTANT:
1425 eval = eval_constant_coef;
1426 break;
1427
1428 case TGSI_INTERPOLATE_LINEAR:
1429 eval = eval_linear_coef;
1430 break;
1431
1432 case TGSI_INTERPOLATE_PERSPECTIVE:
1433 eval = eval_perspective_coef;
1434 break;
1435
1436 default:
1437 assert( 0 );
1438 }
1439
1440 if( mask == TGSI_WRITEMASK_XYZW ) {
1441 unsigned i, j;
1442
1443 for( i = first; i <= last; i++ ) {
1444 for( j = 0; j < NUM_CHANNELS; j++ ) {
1445 eval( mach, i, j );
1446 }
1447 }
1448 }
1449 else {
1450 unsigned i, j;
1451
1452 for( j = 0; j < NUM_CHANNELS; j++ ) {
1453 if( mask & (1 << j) ) {
1454 for( i = first; i <= last; i++ ) {
1455 eval( mach, i, j );
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462 }
1463
1464 static void
1465 exec_instruction(
1466 struct tgsi_exec_machine *mach,
1467 const struct tgsi_full_instruction *inst,
1468 int *pc )
1469 {
1470 uint chan_index;
1471 union tgsi_exec_channel r[8];
1472
1473 (*pc)++;
1474
1475 switch (inst->Instruction.Opcode) {
1476 case TGSI_OPCODE_ARL:
1477 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1478 FETCH( &r[0], 0, chan_index );
1479 micro_f2it( &r[0], &r[0] );
1480 STORE( &r[0], 0, chan_index );
1481 }
1482 break;
1483
1484 case TGSI_OPCODE_MOV:
1485 /* TGSI_OPCODE_SWZ */
1486 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1487 FETCH( &r[0], 0, chan_index );
1488 STORE( &r[0], 0, chan_index );
1489 }
1490 break;
1491
1492 case TGSI_OPCODE_LIT:
1493 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1494 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1495 }
1496
1497 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1498 FETCH( &r[0], 0, CHAN_X );
1499 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1500 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1501 STORE( &r[0], 0, CHAN_Y );
1502 }
1503
1504 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1505 FETCH( &r[1], 0, CHAN_Y );
1506 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1507
1508 FETCH( &r[2], 0, CHAN_W );
1509 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1510 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1511 micro_pow( &r[1], &r[1], &r[2] );
1512 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1513 STORE( &r[0], 0, CHAN_Z );
1514 }
1515 }
1516
1517 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1518 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_RCP:
1523 /* TGSI_OPCODE_RECIP */
1524 FETCH( &r[0], 0, CHAN_X );
1525 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1526 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1527 STORE( &r[0], 0, chan_index );
1528 }
1529 break;
1530
1531 case TGSI_OPCODE_RSQ:
1532 /* TGSI_OPCODE_RECIPSQRT */
1533 FETCH( &r[0], 0, CHAN_X );
1534 micro_sqrt( &r[0], &r[0] );
1535 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1536 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1537 STORE( &r[0], 0, chan_index );
1538 }
1539 break;
1540
1541 case TGSI_OPCODE_EXP:
1542 assert (0);
1543 break;
1544
1545 case TGSI_OPCODE_LOG:
1546 assert (0);
1547 break;
1548
1549 case TGSI_OPCODE_MUL:
1550 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1551 {
1552 FETCH(&r[0], 0, chan_index);
1553 FETCH(&r[1], 1, chan_index);
1554
1555 micro_mul( &r[0], &r[0], &r[1] );
1556
1557 STORE(&r[0], 0, chan_index);
1558 }
1559 break;
1560
1561 case TGSI_OPCODE_ADD:
1562 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1563 FETCH( &r[0], 0, chan_index );
1564 FETCH( &r[1], 1, chan_index );
1565 micro_add( &r[0], &r[0], &r[1] );
1566 STORE( &r[0], 0, chan_index );
1567 }
1568 break;
1569
1570 case TGSI_OPCODE_DP3:
1571 /* TGSI_OPCODE_DOT3 */
1572 FETCH( &r[0], 0, CHAN_X );
1573 FETCH( &r[1], 1, CHAN_X );
1574 micro_mul( &r[0], &r[0], &r[1] );
1575
1576 FETCH( &r[1], 0, CHAN_Y );
1577 FETCH( &r[2], 1, CHAN_Y );
1578 micro_mul( &r[1], &r[1], &r[2] );
1579 micro_add( &r[0], &r[0], &r[1] );
1580
1581 FETCH( &r[1], 0, CHAN_Z );
1582 FETCH( &r[2], 1, CHAN_Z );
1583 micro_mul( &r[1], &r[1], &r[2] );
1584 micro_add( &r[0], &r[0], &r[1] );
1585
1586 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1587 STORE( &r[0], 0, chan_index );
1588 }
1589 break;
1590
1591 case TGSI_OPCODE_DP4:
1592 /* TGSI_OPCODE_DOT4 */
1593 FETCH(&r[0], 0, CHAN_X);
1594 FETCH(&r[1], 1, CHAN_X);
1595
1596 micro_mul( &r[0], &r[0], &r[1] );
1597
1598 FETCH(&r[1], 0, CHAN_Y);
1599 FETCH(&r[2], 1, CHAN_Y);
1600
1601 micro_mul( &r[1], &r[1], &r[2] );
1602 micro_add( &r[0], &r[0], &r[1] );
1603
1604 FETCH(&r[1], 0, CHAN_Z);
1605 FETCH(&r[2], 1, CHAN_Z);
1606
1607 micro_mul( &r[1], &r[1], &r[2] );
1608 micro_add( &r[0], &r[0], &r[1] );
1609
1610 FETCH(&r[1], 0, CHAN_W);
1611 FETCH(&r[2], 1, CHAN_W);
1612
1613 micro_mul( &r[1], &r[1], &r[2] );
1614 micro_add( &r[0], &r[0], &r[1] );
1615
1616 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1617 STORE( &r[0], 0, chan_index );
1618 }
1619 break;
1620
1621 case TGSI_OPCODE_DST:
1622 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1623 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1624 }
1625
1626 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1627 FETCH( &r[0], 0, CHAN_Y );
1628 FETCH( &r[1], 1, CHAN_Y);
1629 micro_mul( &r[0], &r[0], &r[1] );
1630 STORE( &r[0], 0, CHAN_Y );
1631 }
1632
1633 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1634 FETCH( &r[0], 0, CHAN_Z );
1635 STORE( &r[0], 0, CHAN_Z );
1636 }
1637
1638 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1639 FETCH( &r[0], 1, CHAN_W );
1640 STORE( &r[0], 0, CHAN_W );
1641 }
1642 break;
1643
1644 case TGSI_OPCODE_MIN:
1645 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1646 FETCH(&r[0], 0, chan_index);
1647 FETCH(&r[1], 1, chan_index);
1648
1649 /* XXX use micro_min()?? */
1650 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1651
1652 STORE(&r[0], 0, chan_index);
1653 }
1654 break;
1655
1656 case TGSI_OPCODE_MAX:
1657 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1658 FETCH(&r[0], 0, chan_index);
1659 FETCH(&r[1], 1, chan_index);
1660
1661 /* XXX use micro_max()?? */
1662 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1663
1664 STORE(&r[0], 0, chan_index );
1665 }
1666 break;
1667
1668 case TGSI_OPCODE_SLT:
1669 /* TGSI_OPCODE_SETLT */
1670 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1671 FETCH( &r[0], 0, chan_index );
1672 FETCH( &r[1], 1, chan_index );
1673 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1674 STORE( &r[0], 0, chan_index );
1675 }
1676 break;
1677
1678 case TGSI_OPCODE_SGE:
1679 /* TGSI_OPCODE_SETGE */
1680 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1681 FETCH( &r[0], 0, chan_index );
1682 FETCH( &r[1], 1, chan_index );
1683 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1684 STORE( &r[0], 0, chan_index );
1685 }
1686 break;
1687
1688 case TGSI_OPCODE_MAD:
1689 /* TGSI_OPCODE_MADD */
1690 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1691 FETCH( &r[0], 0, chan_index );
1692 FETCH( &r[1], 1, chan_index );
1693 micro_mul( &r[0], &r[0], &r[1] );
1694 FETCH( &r[1], 2, chan_index );
1695 micro_add( &r[0], &r[0], &r[1] );
1696 STORE( &r[0], 0, chan_index );
1697 }
1698 break;
1699
1700 case TGSI_OPCODE_SUB:
1701 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1702 FETCH(&r[0], 0, chan_index);
1703 FETCH(&r[1], 1, chan_index);
1704
1705 micro_sub( &r[0], &r[0], &r[1] );
1706
1707 STORE(&r[0], 0, chan_index);
1708 }
1709 break;
1710
1711 case TGSI_OPCODE_LERP:
1712 /* TGSI_OPCODE_LRP */
1713 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1714 FETCH(&r[0], 0, chan_index);
1715 FETCH(&r[1], 1, chan_index);
1716 FETCH(&r[2], 2, chan_index);
1717
1718 micro_sub( &r[1], &r[1], &r[2] );
1719 micro_mul( &r[0], &r[0], &r[1] );
1720 micro_add( &r[0], &r[0], &r[2] );
1721
1722 STORE(&r[0], 0, chan_index);
1723 }
1724 break;
1725
1726 case TGSI_OPCODE_CND:
1727 assert (0);
1728 break;
1729
1730 case TGSI_OPCODE_CND0:
1731 assert (0);
1732 break;
1733
1734 case TGSI_OPCODE_DOT2ADD:
1735 /* TGSI_OPCODE_DP2A */
1736 assert (0);
1737 break;
1738
1739 case TGSI_OPCODE_INDEX:
1740 assert (0);
1741 break;
1742
1743 case TGSI_OPCODE_NEGATE:
1744 assert (0);
1745 break;
1746
1747 case TGSI_OPCODE_FRAC:
1748 /* TGSI_OPCODE_FRC */
1749 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1750 FETCH( &r[0], 0, chan_index );
1751 micro_frc( &r[0], &r[0] );
1752 STORE( &r[0], 0, chan_index );
1753 }
1754 break;
1755
1756 case TGSI_OPCODE_CLAMP:
1757 assert (0);
1758 break;
1759
1760 case TGSI_OPCODE_FLOOR:
1761 /* TGSI_OPCODE_FLR */
1762 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1763 FETCH( &r[0], 0, chan_index );
1764 micro_flr( &r[0], &r[0] );
1765 STORE( &r[0], 0, chan_index );
1766 }
1767 break;
1768
1769 case TGSI_OPCODE_ROUND:
1770 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1771 FETCH( &r[0], 0, chan_index );
1772 micro_rnd( &r[0], &r[0] );
1773 STORE( &r[0], 0, chan_index );
1774 }
1775 break;
1776
1777 case TGSI_OPCODE_EXPBASE2:
1778 /* TGSI_OPCODE_EX2 */
1779 FETCH(&r[0], 0, CHAN_X);
1780
1781 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1782
1783 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1784 STORE( &r[0], 0, chan_index );
1785 }
1786 break;
1787
1788 case TGSI_OPCODE_LOGBASE2:
1789 /* TGSI_OPCODE_LG2 */
1790 FETCH( &r[0], 0, CHAN_X );
1791 micro_lg2( &r[0], &r[0] );
1792 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1793 STORE( &r[0], 0, chan_index );
1794 }
1795 break;
1796
1797 case TGSI_OPCODE_POWER:
1798 /* TGSI_OPCODE_POW */
1799 FETCH(&r[0], 0, CHAN_X);
1800 FETCH(&r[1], 1, CHAN_X);
1801
1802 micro_pow( &r[0], &r[0], &r[1] );
1803
1804 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1805 STORE( &r[0], 0, chan_index );
1806 }
1807 break;
1808
1809 case TGSI_OPCODE_CROSSPRODUCT:
1810 /* TGSI_OPCODE_XPD */
1811 FETCH(&r[0], 0, CHAN_Y);
1812 FETCH(&r[1], 1, CHAN_Z);
1813
1814 micro_mul( &r[2], &r[0], &r[1] );
1815
1816 FETCH(&r[3], 0, CHAN_Z);
1817 FETCH(&r[4], 1, CHAN_Y);
1818
1819 micro_mul( &r[5], &r[3], &r[4] );
1820 micro_sub( &r[2], &r[2], &r[5] );
1821
1822 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1823 STORE( &r[2], 0, CHAN_X );
1824 }
1825
1826 FETCH(&r[2], 1, CHAN_X);
1827
1828 micro_mul( &r[3], &r[3], &r[2] );
1829
1830 FETCH(&r[5], 0, CHAN_X);
1831
1832 micro_mul( &r[1], &r[1], &r[5] );
1833 micro_sub( &r[3], &r[3], &r[1] );
1834
1835 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1836 STORE( &r[3], 0, CHAN_Y );
1837 }
1838
1839 micro_mul( &r[5], &r[5], &r[4] );
1840 micro_mul( &r[0], &r[0], &r[2] );
1841 micro_sub( &r[5], &r[5], &r[0] );
1842
1843 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1844 STORE( &r[5], 0, CHAN_Z );
1845 }
1846
1847 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1848 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1849 }
1850 break;
1851
1852 case TGSI_OPCODE_MULTIPLYMATRIX:
1853 assert (0);
1854 break;
1855
1856 case TGSI_OPCODE_ABS:
1857 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1858 FETCH(&r[0], 0, chan_index);
1859
1860 micro_abs( &r[0], &r[0] );
1861
1862 STORE(&r[0], 0, chan_index);
1863 }
1864 break;
1865
1866 case TGSI_OPCODE_RCC:
1867 assert (0);
1868 break;
1869
1870 case TGSI_OPCODE_DPH:
1871 FETCH(&r[0], 0, CHAN_X);
1872 FETCH(&r[1], 1, CHAN_X);
1873
1874 micro_mul( &r[0], &r[0], &r[1] );
1875
1876 FETCH(&r[1], 0, CHAN_Y);
1877 FETCH(&r[2], 1, CHAN_Y);
1878
1879 micro_mul( &r[1], &r[1], &r[2] );
1880 micro_add( &r[0], &r[0], &r[1] );
1881
1882 FETCH(&r[1], 0, CHAN_Z);
1883 FETCH(&r[2], 1, CHAN_Z);
1884
1885 micro_mul( &r[1], &r[1], &r[2] );
1886 micro_add( &r[0], &r[0], &r[1] );
1887
1888 FETCH(&r[1], 1, CHAN_W);
1889
1890 micro_add( &r[0], &r[0], &r[1] );
1891
1892 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1893 STORE( &r[0], 0, chan_index );
1894 }
1895 break;
1896
1897 case TGSI_OPCODE_COS:
1898 FETCH(&r[0], 0, CHAN_X);
1899
1900 micro_cos( &r[0], &r[0] );
1901
1902 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1903 STORE( &r[0], 0, chan_index );
1904 }
1905 break;
1906
1907 case TGSI_OPCODE_DDX:
1908 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1909 FETCH( &r[0], 0, chan_index );
1910 micro_ddx( &r[0], &r[0] );
1911 STORE( &r[0], 0, chan_index );
1912 }
1913 break;
1914
1915 case TGSI_OPCODE_DDY:
1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917 FETCH( &r[0], 0, chan_index );
1918 micro_ddy( &r[0], &r[0] );
1919 STORE( &r[0], 0, chan_index );
1920 }
1921 break;
1922
1923 case TGSI_OPCODE_KILP:
1924 exec_kilp (mach, inst);
1925 break;
1926
1927 case TGSI_OPCODE_KIL:
1928 /* for enabled ExecMask bits, set the killed bit */
1929 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1930 break;
1931
1932 case TGSI_OPCODE_PK2H:
1933 assert (0);
1934 break;
1935
1936 case TGSI_OPCODE_PK2US:
1937 assert (0);
1938 break;
1939
1940 case TGSI_OPCODE_PK4B:
1941 assert (0);
1942 break;
1943
1944 case TGSI_OPCODE_PK4UB:
1945 assert (0);
1946 break;
1947
1948 case TGSI_OPCODE_RFL:
1949 assert (0);
1950 break;
1951
1952 case TGSI_OPCODE_SEQ:
1953 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1954 FETCH( &r[0], 0, chan_index );
1955 FETCH( &r[1], 1, chan_index );
1956 micro_eq( &r[0], &r[0], &r[1],
1957 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
1958 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1959 STORE( &r[0], 0, chan_index );
1960 }
1961 break;
1962
1963 case TGSI_OPCODE_SFL:
1964 assert (0);
1965 break;
1966
1967 case TGSI_OPCODE_SGT:
1968 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1969 FETCH( &r[0], 0, chan_index );
1970 FETCH( &r[1], 1, chan_index );
1971 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1972 STORE( &r[0], 0, chan_index );
1973 }
1974 break;
1975
1976 case TGSI_OPCODE_SIN:
1977 FETCH( &r[0], 0, CHAN_X );
1978 micro_sin( &r[0], &r[0] );
1979 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1980 STORE( &r[0], 0, chan_index );
1981 }
1982 break;
1983
1984 case TGSI_OPCODE_SLE:
1985 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1986 FETCH( &r[0], 0, chan_index );
1987 FETCH( &r[1], 1, chan_index );
1988 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1989 STORE( &r[0], 0, chan_index );
1990 }
1991 break;
1992
1993 case TGSI_OPCODE_SNE:
1994 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1995 FETCH( &r[0], 0, chan_index );
1996 FETCH( &r[1], 1, chan_index );
1997 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1998 STORE( &r[0], 0, chan_index );
1999 }
2000 break;
2001
2002 case TGSI_OPCODE_STR:
2003 assert (0);
2004 break;
2005
2006 case TGSI_OPCODE_TEX:
2007 /* simple texture lookup */
2008 /* src[0] = texcoord */
2009 /* src[1] = sampler unit */
2010 exec_tex(mach, inst, FALSE);
2011 break;
2012
2013 case TGSI_OPCODE_TXB:
2014 /* Texture lookup with lod bias */
2015 /* src[0] = texcoord (src[0].w = LOD bias) */
2016 /* src[1] = sampler unit */
2017 exec_tex(mach, inst, TRUE);
2018 break;
2019
2020 case TGSI_OPCODE_TXD:
2021 /* Texture lookup with explict partial derivatives */
2022 /* src[0] = texcoord */
2023 /* src[1] = d[strq]/dx */
2024 /* src[2] = d[strq]/dy */
2025 /* src[3] = sampler unit */
2026 assert (0);
2027 break;
2028
2029 case TGSI_OPCODE_TXL:
2030 /* Texture lookup with explit LOD */
2031 /* src[0] = texcoord (src[0].w = LOD) */
2032 /* src[1] = sampler unit */
2033 exec_tex(mach, inst, TRUE);
2034 break;
2035
2036 case TGSI_OPCODE_UP2H:
2037 assert (0);
2038 break;
2039
2040 case TGSI_OPCODE_UP2US:
2041 assert (0);
2042 break;
2043
2044 case TGSI_OPCODE_UP4B:
2045 assert (0);
2046 break;
2047
2048 case TGSI_OPCODE_UP4UB:
2049 assert (0);
2050 break;
2051
2052 case TGSI_OPCODE_X2D:
2053 assert (0);
2054 break;
2055
2056 case TGSI_OPCODE_ARA:
2057 assert (0);
2058 break;
2059
2060 case TGSI_OPCODE_ARR:
2061 assert (0);
2062 break;
2063
2064 case TGSI_OPCODE_BRA:
2065 assert (0);
2066 break;
2067
2068 case TGSI_OPCODE_CAL:
2069 /* skip the call if no execution channels are enabled */
2070 if (mach->ExecMask) {
2071 /* do the call */
2072
2073 /* push the Cond, Loop, Cont stacks */
2074 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2075 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2076 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2077 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2078 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2079 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2080
2081 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2082 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2083
2084 /* note that PC was already incremented above */
2085 mach->CallStack[mach->CallStackTop++] = *pc;
2086 *pc = inst->InstructionExtLabel.Label;
2087 }
2088 break;
2089
2090 case TGSI_OPCODE_RET:
2091 mach->FuncMask &= ~mach->ExecMask;
2092 UPDATE_EXEC_MASK(mach);
2093
2094 if (mach->ExecMask == 0x0) {
2095 /* really return now (otherwise, keep executing */
2096
2097 if (mach->CallStackTop == 0) {
2098 /* returning from main() */
2099 *pc = -1;
2100 return;
2101 }
2102 *pc = mach->CallStack[--mach->CallStackTop];
2103
2104 /* pop the Cond, Loop, Cont stacks */
2105 assert(mach->CondStackTop > 0);
2106 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2107 assert(mach->LoopStackTop > 0);
2108 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2109 assert(mach->ContStackTop > 0);
2110 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2111 assert(mach->FuncStackTop > 0);
2112 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2113
2114 UPDATE_EXEC_MASK(mach);
2115 }
2116 break;
2117
2118 case TGSI_OPCODE_SSG:
2119 assert (0);
2120 break;
2121
2122 case TGSI_OPCODE_CMP:
2123 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2124 FETCH(&r[0], 0, chan_index);
2125 FETCH(&r[1], 1, chan_index);
2126 FETCH(&r[2], 2, chan_index);
2127
2128 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2129
2130 STORE(&r[0], 0, chan_index);
2131 }
2132 break;
2133
2134 case TGSI_OPCODE_SCS:
2135 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2136 FETCH( &r[0], 0, CHAN_X );
2137 }
2138 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2139 micro_cos( &r[1], &r[0] );
2140 STORE( &r[1], 0, CHAN_X );
2141 }
2142 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2143 micro_sin( &r[1], &r[0] );
2144 STORE( &r[1], 0, CHAN_Y );
2145 }
2146 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2147 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2148 }
2149 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2150 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2151 }
2152 break;
2153
2154 case TGSI_OPCODE_NRM:
2155 assert (0);
2156 break;
2157
2158 case TGSI_OPCODE_DIV:
2159 assert( 0 );
2160 break;
2161
2162 case TGSI_OPCODE_DP2:
2163 FETCH( &r[0], 0, CHAN_X );
2164 FETCH( &r[1], 1, CHAN_X );
2165 micro_mul( &r[0], &r[0], &r[1] );
2166
2167 FETCH( &r[1], 0, CHAN_Y );
2168 FETCH( &r[2], 1, CHAN_Y );
2169 micro_mul( &r[1], &r[1], &r[2] );
2170 micro_add( &r[0], &r[0], &r[1] );
2171
2172 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2173 STORE( &r[0], 0, chan_index );
2174 }
2175 break;
2176
2177 case TGSI_OPCODE_IF:
2178 /* push CondMask */
2179 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2180 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2181 FETCH( &r[0], 0, CHAN_X );
2182 /* update CondMask */
2183 if( ! r[0].u[0] ) {
2184 mach->CondMask &= ~0x1;
2185 }
2186 if( ! r[0].u[1] ) {
2187 mach->CondMask &= ~0x2;
2188 }
2189 if( ! r[0].u[2] ) {
2190 mach->CondMask &= ~0x4;
2191 }
2192 if( ! r[0].u[3] ) {
2193 mach->CondMask &= ~0x8;
2194 }
2195 UPDATE_EXEC_MASK(mach);
2196 /* Todo: If CondMask==0, jump to ELSE */
2197 break;
2198
2199 case TGSI_OPCODE_ELSE:
2200 /* invert CondMask wrt previous mask */
2201 {
2202 uint prevMask;
2203 assert(mach->CondStackTop > 0);
2204 prevMask = mach->CondStack[mach->CondStackTop - 1];
2205 mach->CondMask = ~mach->CondMask & prevMask;
2206 UPDATE_EXEC_MASK(mach);
2207 /* Todo: If CondMask==0, jump to ENDIF */
2208 }
2209 break;
2210
2211 case TGSI_OPCODE_ENDIF:
2212 /* pop CondMask */
2213 assert(mach->CondStackTop > 0);
2214 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2215 UPDATE_EXEC_MASK(mach);
2216 break;
2217
2218 case TGSI_OPCODE_END:
2219 /* halt execution */
2220 *pc = -1;
2221 break;
2222
2223 case TGSI_OPCODE_REP:
2224 assert (0);
2225 break;
2226
2227 case TGSI_OPCODE_ENDREP:
2228 assert (0);
2229 break;
2230
2231 case TGSI_OPCODE_PUSHA:
2232 assert (0);
2233 break;
2234
2235 case TGSI_OPCODE_POPA:
2236 assert (0);
2237 break;
2238
2239 case TGSI_OPCODE_CEIL:
2240 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2241 FETCH( &r[0], 0, chan_index );
2242 micro_ceil( &r[0], &r[0] );
2243 STORE( &r[0], 0, chan_index );
2244 }
2245 break;
2246
2247 case TGSI_OPCODE_I2F:
2248 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2249 FETCH( &r[0], 0, chan_index );
2250 micro_i2f( &r[0], &r[0] );
2251 STORE( &r[0], 0, chan_index );
2252 }
2253 break;
2254
2255 case TGSI_OPCODE_NOT:
2256 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2257 FETCH( &r[0], 0, chan_index );
2258 micro_not( &r[0], &r[0] );
2259 STORE( &r[0], 0, chan_index );
2260 }
2261 break;
2262
2263 case TGSI_OPCODE_TRUNC:
2264 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2265 FETCH( &r[0], 0, chan_index );
2266 micro_trunc( &r[0], &r[0] );
2267 STORE( &r[0], 0, chan_index );
2268 }
2269 break;
2270
2271 case TGSI_OPCODE_SHL:
2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2273 FETCH( &r[0], 0, chan_index );
2274 FETCH( &r[1], 1, chan_index );
2275 micro_shl( &r[0], &r[0], &r[1] );
2276 STORE( &r[0], 0, chan_index );
2277 }
2278 break;
2279
2280 case TGSI_OPCODE_SHR:
2281 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2282 FETCH( &r[0], 0, chan_index );
2283 FETCH( &r[1], 1, chan_index );
2284 micro_ishr( &r[0], &r[0], &r[1] );
2285 STORE( &r[0], 0, chan_index );
2286 }
2287 break;
2288
2289 case TGSI_OPCODE_AND:
2290 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2291 FETCH( &r[0], 0, chan_index );
2292 FETCH( &r[1], 1, chan_index );
2293 micro_and( &r[0], &r[0], &r[1] );
2294 STORE( &r[0], 0, chan_index );
2295 }
2296 break;
2297
2298 case TGSI_OPCODE_OR:
2299 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2300 FETCH( &r[0], 0, chan_index );
2301 FETCH( &r[1], 1, chan_index );
2302 micro_or( &r[0], &r[0], &r[1] );
2303 STORE( &r[0], 0, chan_index );
2304 }
2305 break;
2306
2307 case TGSI_OPCODE_MOD:
2308 assert (0);
2309 break;
2310
2311 case TGSI_OPCODE_XOR:
2312 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2313 FETCH( &r[0], 0, chan_index );
2314 FETCH( &r[1], 1, chan_index );
2315 micro_xor( &r[0], &r[0], &r[1] );
2316 STORE( &r[0], 0, chan_index );
2317 }
2318 break;
2319
2320 case TGSI_OPCODE_SAD:
2321 assert (0);
2322 break;
2323
2324 case TGSI_OPCODE_TXF:
2325 assert (0);
2326 break;
2327
2328 case TGSI_OPCODE_TXQ:
2329 assert (0);
2330 break;
2331
2332 case TGSI_OPCODE_EMIT:
2333 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2334 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2335 break;
2336
2337 case TGSI_OPCODE_ENDPRIM:
2338 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2339 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2340 break;
2341
2342 case TGSI_OPCODE_LOOP:
2343 /* fall-through (for now) */
2344 case TGSI_OPCODE_BGNLOOP2:
2345 /* push LoopMask and ContMasks */
2346 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2347 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2348 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2349 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2350 break;
2351
2352 case TGSI_OPCODE_ENDLOOP:
2353 /* fall-through (for now at least) */
2354 case TGSI_OPCODE_ENDLOOP2:
2355 /* Restore ContMask, but don't pop */
2356 assert(mach->ContStackTop > 0);
2357 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2358 if (mach->LoopMask) {
2359 /* repeat loop: jump to instruction just past BGNLOOP */
2360 *pc = inst->InstructionExtLabel.Label + 1;
2361 }
2362 else {
2363 /* exit loop: pop LoopMask */
2364 assert(mach->LoopStackTop > 0);
2365 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2366 /* pop ContMask */
2367 assert(mach->ContStackTop > 0);
2368 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2369 }
2370 UPDATE_EXEC_MASK(mach);
2371 break;
2372
2373 case TGSI_OPCODE_BRK:
2374 /* turn off loop channels for each enabled exec channel */
2375 mach->LoopMask &= ~mach->ExecMask;
2376 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2377 UPDATE_EXEC_MASK(mach);
2378 break;
2379
2380 case TGSI_OPCODE_CONT:
2381 /* turn off cont channels for each enabled exec channel */
2382 mach->ContMask &= ~mach->ExecMask;
2383 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2384 UPDATE_EXEC_MASK(mach);
2385 break;
2386
2387 case TGSI_OPCODE_BGNSUB:
2388 /* no-op */
2389 break;
2390
2391 case TGSI_OPCODE_ENDSUB:
2392 /* no-op */
2393 break;
2394
2395 case TGSI_OPCODE_NOISE1:
2396 assert( 0 );
2397 break;
2398
2399 case TGSI_OPCODE_NOISE2:
2400 assert( 0 );
2401 break;
2402
2403 case TGSI_OPCODE_NOISE3:
2404 assert( 0 );
2405 break;
2406
2407 case TGSI_OPCODE_NOISE4:
2408 assert( 0 );
2409 break;
2410
2411 case TGSI_OPCODE_NOP:
2412 break;
2413
2414 default:
2415 assert( 0 );
2416 }
2417 }
2418
2419
2420 /**
2421 * Run TGSI interpreter.
2422 * \return bitmask of "alive" quad components
2423 */
2424 uint
2425 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2426 {
2427 uint i;
2428 int pc = 0;
2429
2430 mach->CondMask = 0xf;
2431 mach->LoopMask = 0xf;
2432 mach->ContMask = 0xf;
2433 mach->FuncMask = 0xf;
2434 mach->ExecMask = 0xf;
2435
2436 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2437 assert(mach->CondStackTop == 0);
2438 assert(mach->LoopStackTop == 0);
2439 assert(mach->ContStackTop == 0);
2440 assert(mach->CallStackTop == 0);
2441
2442 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2443 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2444
2445 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2446 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2447 mach->Primitives[0] = 0;
2448 }
2449
2450
2451 /* execute declarations (interpolants) */
2452 for (i = 0; i < mach->NumDeclarations; i++) {
2453 exec_declaration( mach, mach->Declarations+i );
2454 }
2455
2456 /* execute instructions, until pc is set to -1 */
2457 while (pc != -1) {
2458 assert(pc < mach->NumInstructions);
2459 exec_instruction( mach, mach->Instructions + pc, &pc );
2460 }
2461
2462 #if 0
2463 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2464 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2465 /*
2466 * Scale back depth component.
2467 */
2468 for (i = 0; i < 4; i++)
2469 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2470 }
2471 #endif
2472
2473 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2474 }
2475
2476