gallium: implement TGSI_OPCODE_EXP
[mesa.git] / src / gallium / auxiliary / tgsi / exec / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include "pipe/p_compiler.h"
54 #include "pipe/p_state.h"
55 #include "pipe/p_util.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/util/tgsi_parse.h"
58 #include "tgsi/util/tgsi_util.h"
59 #include "tgsi_exec.h"
60
61 #define TILE_TOP_LEFT 0
62 #define TILE_TOP_RIGHT 1
63 #define TILE_BOTTOM_LEFT 2
64 #define TILE_BOTTOM_RIGHT 3
65
66 /*
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
68 */
69 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
70 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
71 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
72 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
73 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
74 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
75 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
76 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
77 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
78 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
79 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
80 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
81 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
82 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
83 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
84 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
85 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
86 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
87 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
88 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
89 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
90 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
91 #define TEMP_R0 TGSI_EXEC_TEMP_R0
92
93 #define FOR_EACH_CHANNEL(CHAN)\
94 for (CHAN = 0; CHAN < 4; CHAN++)
95
96 #define IS_CHANNEL_ENABLED(INST, CHAN)\
97 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
98
99 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
100 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
101
102 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
103 FOR_EACH_CHANNEL( CHAN )\
104 if (IS_CHANNEL_ENABLED( INST, CHAN ))
105
106 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
107 FOR_EACH_CHANNEL( CHAN )\
108 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
109
110
111 /** The execution mask depends on the conditional mask and the loop mask */
112 #define UPDATE_EXEC_MASK(MACH) \
113 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
114
115
116 #define CHAN_X 0
117 #define CHAN_Y 1
118 #define CHAN_Z 2
119 #define CHAN_W 3
120
121
122
123 /**
124 * Initialize machine state by expanding tokens to full instructions,
125 * allocating temporary storage, setting up constants, etc.
126 * After this, we can call tgsi_exec_machine_run() many times.
127 */
128 void
129 tgsi_exec_machine_bind_shader(
130 struct tgsi_exec_machine *mach,
131 const struct tgsi_token *tokens,
132 uint numSamplers,
133 struct tgsi_sampler *samplers)
134 {
135 uint k;
136 struct tgsi_parse_context parse;
137 struct tgsi_exec_labels *labels = &mach->Labels;
138 struct tgsi_full_instruction *instructions;
139 struct tgsi_full_declaration *declarations;
140 uint maxInstructions = 10, numInstructions = 0;
141 uint maxDeclarations = 10, numDeclarations = 0;
142 uint instno = 0;
143
144 #if 0
145 tgsi_dump(tokens, 0);
146 #endif
147
148 mach->Tokens = tokens;
149 mach->Samplers = samplers;
150
151 k = tgsi_parse_init (&parse, mach->Tokens);
152 if (k != TGSI_PARSE_OK) {
153 debug_printf( "Problem parsing!\n" );
154 return;
155 }
156
157 mach->Processor = parse.FullHeader.Processor.Processor;
158 mach->ImmLimit = 0;
159 labels->count = 0;
160
161 declarations = (struct tgsi_full_declaration *)
162 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
163
164 instructions = (struct tgsi_full_instruction *)
165 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
166
167
168 while( !tgsi_parse_end_of_tokens( &parse ) ) {
169 uint pointer = parse.Position;
170 uint i;
171
172 tgsi_parse_token( &parse );
173 switch( parse.FullToken.Token.Type ) {
174 case TGSI_TOKEN_TYPE_DECLARATION:
175 /* save expanded declaration */
176 if (numDeclarations == maxDeclarations) {
177 declarations = REALLOC(declarations,
178 maxDeclarations
179 * sizeof(struct tgsi_full_declaration),
180 (maxDeclarations + 10)
181 * sizeof(struct tgsi_full_declaration));
182 maxDeclarations += 10;
183 }
184 memcpy(declarations + numDeclarations,
185 &parse.FullToken.FullDeclaration,
186 sizeof(declarations[0]));
187 numDeclarations++;
188 break;
189
190 case TGSI_TOKEN_TYPE_IMMEDIATE:
191 {
192 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
193 assert( size % 4 == 0 );
194 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
195
196 for( i = 0; i < size; i++ ) {
197 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
198 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
199 }
200 mach->ImmLimit += size / 4;
201 }
202 break;
203
204 case TGSI_TOKEN_TYPE_INSTRUCTION:
205 assert( labels->count < 128 );
206
207 labels->labels[labels->count][0] = instno;
208 labels->labels[labels->count][1] = pointer;
209 labels->count++;
210
211 /* save expanded instruction */
212 if (numInstructions == maxInstructions) {
213 instructions = REALLOC(instructions,
214 maxInstructions
215 * sizeof(struct tgsi_full_instruction),
216 (maxInstructions + 10)
217 * sizeof(struct tgsi_full_instruction));
218 maxInstructions += 10;
219 }
220 memcpy(instructions + numInstructions,
221 &parse.FullToken.FullInstruction,
222 sizeof(instructions[0]));
223 numInstructions++;
224 break;
225
226 default:
227 assert( 0 );
228 }
229 }
230 tgsi_parse_free (&parse);
231
232 if (mach->Declarations) {
233 FREE( mach->Declarations );
234 }
235 mach->Declarations = declarations;
236 mach->NumDeclarations = numDeclarations;
237
238 if (mach->Instructions) {
239 FREE( mach->Instructions );
240 }
241 mach->Instructions = instructions;
242 mach->NumInstructions = numInstructions;
243 }
244
245
246 void
247 tgsi_exec_machine_init(
248 struct tgsi_exec_machine *mach )
249 {
250 uint i;
251
252 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
253 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
254
255 /* Setup constants. */
256 for( i = 0; i < 4; i++ ) {
257 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
258 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
259 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
260 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
261 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
262 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
263 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
264 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
265 }
266 }
267
268
269 void
270 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
271 {
272 if (mach->Instructions) {
273 FREE(mach->Instructions);
274 mach->Instructions = NULL;
275 mach->NumInstructions = 0;
276 }
277 if (mach->Declarations) {
278 FREE(mach->Declarations);
279 mach->Declarations = NULL;
280 mach->NumDeclarations = 0;
281 }
282 }
283
284
285 static void
286 micro_abs(
287 union tgsi_exec_channel *dst,
288 const union tgsi_exec_channel *src )
289 {
290 dst->f[0] = fabsf( src->f[0] );
291 dst->f[1] = fabsf( src->f[1] );
292 dst->f[2] = fabsf( src->f[2] );
293 dst->f[3] = fabsf( src->f[3] );
294 }
295
296 static void
297 micro_add(
298 union tgsi_exec_channel *dst,
299 const union tgsi_exec_channel *src0,
300 const union tgsi_exec_channel *src1 )
301 {
302 dst->f[0] = src0->f[0] + src1->f[0];
303 dst->f[1] = src0->f[1] + src1->f[1];
304 dst->f[2] = src0->f[2] + src1->f[2];
305 dst->f[3] = src0->f[3] + src1->f[3];
306 }
307
308 static void
309 micro_iadd(
310 union tgsi_exec_channel *dst,
311 const union tgsi_exec_channel *src0,
312 const union tgsi_exec_channel *src1 )
313 {
314 dst->i[0] = src0->i[0] + src1->i[0];
315 dst->i[1] = src0->i[1] + src1->i[1];
316 dst->i[2] = src0->i[2] + src1->i[2];
317 dst->i[3] = src0->i[3] + src1->i[3];
318 }
319
320 static void
321 micro_and(
322 union tgsi_exec_channel *dst,
323 const union tgsi_exec_channel *src0,
324 const union tgsi_exec_channel *src1 )
325 {
326 dst->u[0] = src0->u[0] & src1->u[0];
327 dst->u[1] = src0->u[1] & src1->u[1];
328 dst->u[2] = src0->u[2] & src1->u[2];
329 dst->u[3] = src0->u[3] & src1->u[3];
330 }
331
332 static void
333 micro_ceil(
334 union tgsi_exec_channel *dst,
335 const union tgsi_exec_channel *src )
336 {
337 dst->f[0] = ceilf( src->f[0] );
338 dst->f[1] = ceilf( src->f[1] );
339 dst->f[2] = ceilf( src->f[2] );
340 dst->f[3] = ceilf( src->f[3] );
341 }
342
343 static void
344 micro_cos(
345 union tgsi_exec_channel *dst,
346 const union tgsi_exec_channel *src )
347 {
348 dst->f[0] = cosf( src->f[0] );
349 dst->f[1] = cosf( src->f[1] );
350 dst->f[2] = cosf( src->f[2] );
351 dst->f[3] = cosf( src->f[3] );
352 }
353
354 static void
355 micro_ddx(
356 union tgsi_exec_channel *dst,
357 const union tgsi_exec_channel *src )
358 {
359 dst->f[0] =
360 dst->f[1] =
361 dst->f[2] =
362 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
363 }
364
365 static void
366 micro_ddy(
367 union tgsi_exec_channel *dst,
368 const union tgsi_exec_channel *src )
369 {
370 dst->f[0] =
371 dst->f[1] =
372 dst->f[2] =
373 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
374 }
375
376 static void
377 micro_div(
378 union tgsi_exec_channel *dst,
379 const union tgsi_exec_channel *src0,
380 const union tgsi_exec_channel *src1 )
381 {
382 dst->f[0] = src0->f[0] / src1->f[0];
383 dst->f[1] = src0->f[1] / src1->f[1];
384 dst->f[2] = src0->f[2] / src1->f[2];
385 dst->f[3] = src0->f[3] / src1->f[3];
386 }
387
388 static void
389 micro_udiv(
390 union tgsi_exec_channel *dst,
391 const union tgsi_exec_channel *src0,
392 const union tgsi_exec_channel *src1 )
393 {
394 dst->u[0] = src0->u[0] / src1->u[0];
395 dst->u[1] = src0->u[1] / src1->u[1];
396 dst->u[2] = src0->u[2] / src1->u[2];
397 dst->u[3] = src0->u[3] / src1->u[3];
398 }
399
400 static void
401 micro_eq(
402 union tgsi_exec_channel *dst,
403 const union tgsi_exec_channel *src0,
404 const union tgsi_exec_channel *src1,
405 const union tgsi_exec_channel *src2,
406 const union tgsi_exec_channel *src3 )
407 {
408 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
409 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
410 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
411 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
412 }
413
414 static void
415 micro_ieq(
416 union tgsi_exec_channel *dst,
417 const union tgsi_exec_channel *src0,
418 const union tgsi_exec_channel *src1,
419 const union tgsi_exec_channel *src2,
420 const union tgsi_exec_channel *src3 )
421 {
422 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
423 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
424 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
425 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
426 }
427
428 static void
429 micro_exp2(
430 union tgsi_exec_channel *dst,
431 const union tgsi_exec_channel *src)
432 {
433 dst->f[0] = powf( 2.0f, src->f[0] );
434 dst->f[1] = powf( 2.0f, src->f[1] );
435 dst->f[2] = powf( 2.0f, src->f[2] );
436 dst->f[3] = powf( 2.0f, src->f[3] );
437 }
438
439 static void
440 micro_f2it(
441 union tgsi_exec_channel *dst,
442 const union tgsi_exec_channel *src )
443 {
444 dst->i[0] = (int) src->f[0];
445 dst->i[1] = (int) src->f[1];
446 dst->i[2] = (int) src->f[2];
447 dst->i[3] = (int) src->f[3];
448 }
449
450 static void
451 micro_f2ut(
452 union tgsi_exec_channel *dst,
453 const union tgsi_exec_channel *src )
454 {
455 dst->u[0] = (uint) src->f[0];
456 dst->u[1] = (uint) src->f[1];
457 dst->u[2] = (uint) src->f[2];
458 dst->u[3] = (uint) src->f[3];
459 }
460
461 static void
462 micro_flr(
463 union tgsi_exec_channel *dst,
464 const union tgsi_exec_channel *src )
465 {
466 dst->f[0] = floorf( src->f[0] );
467 dst->f[1] = floorf( src->f[1] );
468 dst->f[2] = floorf( src->f[2] );
469 dst->f[3] = floorf( src->f[3] );
470 }
471
472 static void
473 micro_frc(
474 union tgsi_exec_channel *dst,
475 const union tgsi_exec_channel *src )
476 {
477 dst->f[0] = src->f[0] - floorf( src->f[0] );
478 dst->f[1] = src->f[1] - floorf( src->f[1] );
479 dst->f[2] = src->f[2] - floorf( src->f[2] );
480 dst->f[3] = src->f[3] - floorf( src->f[3] );
481 }
482
483 static void
484 micro_ge(
485 union tgsi_exec_channel *dst,
486 const union tgsi_exec_channel *src0,
487 const union tgsi_exec_channel *src1,
488 const union tgsi_exec_channel *src2,
489 const union tgsi_exec_channel *src3 )
490 {
491 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
492 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
493 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
494 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
495 }
496
497 static void
498 micro_i2f(
499 union tgsi_exec_channel *dst,
500 const union tgsi_exec_channel *src )
501 {
502 dst->f[0] = (float) src->i[0];
503 dst->f[1] = (float) src->i[1];
504 dst->f[2] = (float) src->i[2];
505 dst->f[3] = (float) src->i[3];
506 }
507
508 static void
509 micro_lg2(
510 union tgsi_exec_channel *dst,
511 const union tgsi_exec_channel *src )
512 {
513 dst->f[0] = logf( src->f[0] ) * 1.442695f;
514 dst->f[1] = logf( src->f[1] ) * 1.442695f;
515 dst->f[2] = logf( src->f[2] ) * 1.442695f;
516 dst->f[3] = logf( src->f[3] ) * 1.442695f;
517 }
518
519 static void
520 micro_le(
521 union tgsi_exec_channel *dst,
522 const union tgsi_exec_channel *src0,
523 const union tgsi_exec_channel *src1,
524 const union tgsi_exec_channel *src2,
525 const union tgsi_exec_channel *src3 )
526 {
527 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
528 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
529 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
530 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
531 }
532
533 static void
534 micro_lt(
535 union tgsi_exec_channel *dst,
536 const union tgsi_exec_channel *src0,
537 const union tgsi_exec_channel *src1,
538 const union tgsi_exec_channel *src2,
539 const union tgsi_exec_channel *src3 )
540 {
541 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
542 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
543 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
544 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
545 }
546
547 static void
548 micro_ilt(
549 union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src0,
551 const union tgsi_exec_channel *src1,
552 const union tgsi_exec_channel *src2,
553 const union tgsi_exec_channel *src3 )
554 {
555 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
556 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
557 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
558 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
559 }
560
561 static void
562 micro_ult(
563 union tgsi_exec_channel *dst,
564 const union tgsi_exec_channel *src0,
565 const union tgsi_exec_channel *src1,
566 const union tgsi_exec_channel *src2,
567 const union tgsi_exec_channel *src3 )
568 {
569 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
570 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
571 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
572 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
573 }
574
575 static void
576 micro_max(
577 union tgsi_exec_channel *dst,
578 const union tgsi_exec_channel *src0,
579 const union tgsi_exec_channel *src1 )
580 {
581 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
582 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
583 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
584 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
585 }
586
587 static void
588 micro_imax(
589 union tgsi_exec_channel *dst,
590 const union tgsi_exec_channel *src0,
591 const union tgsi_exec_channel *src1 )
592 {
593 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
594 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
595 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
596 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
597 }
598
599 static void
600 micro_umax(
601 union tgsi_exec_channel *dst,
602 const union tgsi_exec_channel *src0,
603 const union tgsi_exec_channel *src1 )
604 {
605 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
606 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
607 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
608 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
609 }
610
611 static void
612 micro_min(
613 union tgsi_exec_channel *dst,
614 const union tgsi_exec_channel *src0,
615 const union tgsi_exec_channel *src1 )
616 {
617 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
618 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
619 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
620 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
621 }
622
623 static void
624 micro_imin(
625 union tgsi_exec_channel *dst,
626 const union tgsi_exec_channel *src0,
627 const union tgsi_exec_channel *src1 )
628 {
629 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
630 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
631 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
632 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
633 }
634
635 static void
636 micro_umin(
637 union tgsi_exec_channel *dst,
638 const union tgsi_exec_channel *src0,
639 const union tgsi_exec_channel *src1 )
640 {
641 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
642 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
643 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
644 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
645 }
646
647 static void
648 micro_umod(
649 union tgsi_exec_channel *dst,
650 const union tgsi_exec_channel *src0,
651 const union tgsi_exec_channel *src1 )
652 {
653 dst->u[0] = src0->u[0] % src1->u[0];
654 dst->u[1] = src0->u[1] % src1->u[1];
655 dst->u[2] = src0->u[2] % src1->u[2];
656 dst->u[3] = src0->u[3] % src1->u[3];
657 }
658
659 static void
660 micro_mul(
661 union tgsi_exec_channel *dst,
662 const union tgsi_exec_channel *src0,
663 const union tgsi_exec_channel *src1 )
664 {
665 dst->f[0] = src0->f[0] * src1->f[0];
666 dst->f[1] = src0->f[1] * src1->f[1];
667 dst->f[2] = src0->f[2] * src1->f[2];
668 dst->f[3] = src0->f[3] * src1->f[3];
669 }
670
671 static void
672 micro_imul(
673 union tgsi_exec_channel *dst,
674 const union tgsi_exec_channel *src0,
675 const union tgsi_exec_channel *src1 )
676 {
677 dst->i[0] = src0->i[0] * src1->i[0];
678 dst->i[1] = src0->i[1] * src1->i[1];
679 dst->i[2] = src0->i[2] * src1->i[2];
680 dst->i[3] = src0->i[3] * src1->i[3];
681 }
682
683 static void
684 micro_imul64(
685 union tgsi_exec_channel *dst0,
686 union tgsi_exec_channel *dst1,
687 const union tgsi_exec_channel *src0,
688 const union tgsi_exec_channel *src1 )
689 {
690 dst1->i[0] = src0->i[0] * src1->i[0];
691 dst1->i[1] = src0->i[1] * src1->i[1];
692 dst1->i[2] = src0->i[2] * src1->i[2];
693 dst1->i[3] = src0->i[3] * src1->i[3];
694 dst0->i[0] = 0;
695 dst0->i[1] = 0;
696 dst0->i[2] = 0;
697 dst0->i[3] = 0;
698 }
699
700 static void
701 micro_umul64(
702 union tgsi_exec_channel *dst0,
703 union tgsi_exec_channel *dst1,
704 const union tgsi_exec_channel *src0,
705 const union tgsi_exec_channel *src1 )
706 {
707 dst1->u[0] = src0->u[0] * src1->u[0];
708 dst1->u[1] = src0->u[1] * src1->u[1];
709 dst1->u[2] = src0->u[2] * src1->u[2];
710 dst1->u[3] = src0->u[3] * src1->u[3];
711 dst0->u[0] = 0;
712 dst0->u[1] = 0;
713 dst0->u[2] = 0;
714 dst0->u[3] = 0;
715 }
716
717 static void
718 micro_movc(
719 union tgsi_exec_channel *dst,
720 const union tgsi_exec_channel *src0,
721 const union tgsi_exec_channel *src1,
722 const union tgsi_exec_channel *src2 )
723 {
724 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
725 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
726 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
727 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
728 }
729
730 static void
731 micro_neg(
732 union tgsi_exec_channel *dst,
733 const union tgsi_exec_channel *src )
734 {
735 dst->f[0] = -src->f[0];
736 dst->f[1] = -src->f[1];
737 dst->f[2] = -src->f[2];
738 dst->f[3] = -src->f[3];
739 }
740
741 static void
742 micro_ineg(
743 union tgsi_exec_channel *dst,
744 const union tgsi_exec_channel *src )
745 {
746 dst->i[0] = -src->i[0];
747 dst->i[1] = -src->i[1];
748 dst->i[2] = -src->i[2];
749 dst->i[3] = -src->i[3];
750 }
751
752 static void
753 micro_not(
754 union tgsi_exec_channel *dst,
755 const union tgsi_exec_channel *src )
756 {
757 dst->u[0] = ~src->u[0];
758 dst->u[1] = ~src->u[1];
759 dst->u[2] = ~src->u[2];
760 dst->u[3] = ~src->u[3];
761 }
762
763 static void
764 micro_or(
765 union tgsi_exec_channel *dst,
766 const union tgsi_exec_channel *src0,
767 const union tgsi_exec_channel *src1 )
768 {
769 dst->u[0] = src0->u[0] | src1->u[0];
770 dst->u[1] = src0->u[1] | src1->u[1];
771 dst->u[2] = src0->u[2] | src1->u[2];
772 dst->u[3] = src0->u[3] | src1->u[3];
773 }
774
775 static void
776 micro_pow(
777 union tgsi_exec_channel *dst,
778 const union tgsi_exec_channel *src0,
779 const union tgsi_exec_channel *src1 )
780 {
781 dst->f[0] = powf( src0->f[0], src1->f[0] );
782 dst->f[1] = powf( src0->f[1], src1->f[1] );
783 dst->f[2] = powf( src0->f[2], src1->f[2] );
784 dst->f[3] = powf( src0->f[3], src1->f[3] );
785 }
786
787 static void
788 micro_rnd(
789 union tgsi_exec_channel *dst,
790 const union tgsi_exec_channel *src )
791 {
792 dst->f[0] = floorf( src->f[0] + 0.5f );
793 dst->f[1] = floorf( src->f[1] + 0.5f );
794 dst->f[2] = floorf( src->f[2] + 0.5f );
795 dst->f[3] = floorf( src->f[3] + 0.5f );
796 }
797
798 static void
799 micro_shl(
800 union tgsi_exec_channel *dst,
801 const union tgsi_exec_channel *src0,
802 const union tgsi_exec_channel *src1 )
803 {
804 dst->i[0] = src0->i[0] << src1->i[0];
805 dst->i[1] = src0->i[1] << src1->i[1];
806 dst->i[2] = src0->i[2] << src1->i[2];
807 dst->i[3] = src0->i[3] << src1->i[3];
808 }
809
810 static void
811 micro_ishr(
812 union tgsi_exec_channel *dst,
813 const union tgsi_exec_channel *src0,
814 const union tgsi_exec_channel *src1 )
815 {
816 dst->i[0] = src0->i[0] >> src1->i[0];
817 dst->i[1] = src0->i[1] >> src1->i[1];
818 dst->i[2] = src0->i[2] >> src1->i[2];
819 dst->i[3] = src0->i[3] >> src1->i[3];
820 }
821
822 static void
823 micro_trunc(
824 union tgsi_exec_channel *dst,
825 const union tgsi_exec_channel *src0 )
826 {
827 dst->f[0] = (float) (int) src0->f[0];
828 dst->f[1] = (float) (int) src0->f[1];
829 dst->f[2] = (float) (int) src0->f[2];
830 dst->f[3] = (float) (int) src0->f[3];
831 }
832
833 static void
834 micro_ushr(
835 union tgsi_exec_channel *dst,
836 const union tgsi_exec_channel *src0,
837 const union tgsi_exec_channel *src1 )
838 {
839 dst->u[0] = src0->u[0] >> src1->u[0];
840 dst->u[1] = src0->u[1] >> src1->u[1];
841 dst->u[2] = src0->u[2] >> src1->u[2];
842 dst->u[3] = src0->u[3] >> src1->u[3];
843 }
844
845 static void
846 micro_sin(
847 union tgsi_exec_channel *dst,
848 const union tgsi_exec_channel *src )
849 {
850 dst->f[0] = sinf( src->f[0] );
851 dst->f[1] = sinf( src->f[1] );
852 dst->f[2] = sinf( src->f[2] );
853 dst->f[3] = sinf( src->f[3] );
854 }
855
856 static void
857 micro_sqrt( union tgsi_exec_channel *dst,
858 const union tgsi_exec_channel *src )
859 {
860 dst->f[0] = sqrtf( src->f[0] );
861 dst->f[1] = sqrtf( src->f[1] );
862 dst->f[2] = sqrtf( src->f[2] );
863 dst->f[3] = sqrtf( src->f[3] );
864 }
865
866 static void
867 micro_sub(
868 union tgsi_exec_channel *dst,
869 const union tgsi_exec_channel *src0,
870 const union tgsi_exec_channel *src1 )
871 {
872 dst->f[0] = src0->f[0] - src1->f[0];
873 dst->f[1] = src0->f[1] - src1->f[1];
874 dst->f[2] = src0->f[2] - src1->f[2];
875 dst->f[3] = src0->f[3] - src1->f[3];
876 }
877
878 static void
879 micro_u2f(
880 union tgsi_exec_channel *dst,
881 const union tgsi_exec_channel *src )
882 {
883 dst->f[0] = (float) src->u[0];
884 dst->f[1] = (float) src->u[1];
885 dst->f[2] = (float) src->u[2];
886 dst->f[3] = (float) src->u[3];
887 }
888
889 static void
890 micro_xor(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src0,
893 const union tgsi_exec_channel *src1 )
894 {
895 dst->u[0] = src0->u[0] ^ src1->u[0];
896 dst->u[1] = src0->u[1] ^ src1->u[1];
897 dst->u[2] = src0->u[2] ^ src1->u[2];
898 dst->u[3] = src0->u[3] ^ src1->u[3];
899 }
900
901 static void
902 fetch_src_file_channel(
903 const struct tgsi_exec_machine *mach,
904 const uint file,
905 const uint swizzle,
906 const union tgsi_exec_channel *index,
907 union tgsi_exec_channel *chan )
908 {
909 switch( swizzle ) {
910 case TGSI_EXTSWIZZLE_X:
911 case TGSI_EXTSWIZZLE_Y:
912 case TGSI_EXTSWIZZLE_Z:
913 case TGSI_EXTSWIZZLE_W:
914 switch( file ) {
915 case TGSI_FILE_CONSTANT:
916 chan->f[0] = mach->Consts[index->i[0]][swizzle];
917 chan->f[1] = mach->Consts[index->i[1]][swizzle];
918 chan->f[2] = mach->Consts[index->i[2]][swizzle];
919 chan->f[3] = mach->Consts[index->i[3]][swizzle];
920 break;
921
922 case TGSI_FILE_INPUT:
923 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
924 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
925 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
926 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
927 break;
928
929 case TGSI_FILE_TEMPORARY:
930 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
931 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
932 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
933 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
934 break;
935
936 case TGSI_FILE_IMMEDIATE:
937 assert( index->i[0] < (int) mach->ImmLimit );
938 chan->f[0] = mach->Imms[index->i[0]][swizzle];
939 assert( index->i[1] < (int) mach->ImmLimit );
940 chan->f[1] = mach->Imms[index->i[1]][swizzle];
941 assert( index->i[2] < (int) mach->ImmLimit );
942 chan->f[2] = mach->Imms[index->i[2]][swizzle];
943 assert( index->i[3] < (int) mach->ImmLimit );
944 chan->f[3] = mach->Imms[index->i[3]][swizzle];
945 break;
946
947 case TGSI_FILE_ADDRESS:
948 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
949 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
950 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
951 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
952 break;
953
954 case TGSI_FILE_OUTPUT:
955 /* vertex/fragment output vars can be read too */
956 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
957 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
958 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
959 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
960 break;
961
962 default:
963 assert( 0 );
964 }
965 break;
966
967 case TGSI_EXTSWIZZLE_ZERO:
968 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
969 break;
970
971 case TGSI_EXTSWIZZLE_ONE:
972 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
973 break;
974
975 default:
976 assert( 0 );
977 }
978 }
979
980 static void
981 fetch_source(
982 const struct tgsi_exec_machine *mach,
983 union tgsi_exec_channel *chan,
984 const struct tgsi_full_src_register *reg,
985 const uint chan_index )
986 {
987 union tgsi_exec_channel index;
988 uint swizzle;
989
990 index.i[0] =
991 index.i[1] =
992 index.i[2] =
993 index.i[3] = reg->SrcRegister.Index;
994
995 if (reg->SrcRegister.Indirect) {
996 union tgsi_exec_channel index2;
997 union tgsi_exec_channel indir_index;
998
999 index2.i[0] =
1000 index2.i[1] =
1001 index2.i[2] =
1002 index2.i[3] = reg->SrcRegisterInd.Index;
1003
1004 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1005 fetch_src_file_channel(
1006 mach,
1007 reg->SrcRegisterInd.File,
1008 swizzle,
1009 &index2,
1010 &indir_index );
1011
1012 index.i[0] += indir_index.i[0];
1013 index.i[1] += indir_index.i[1];
1014 index.i[2] += indir_index.i[2];
1015 index.i[3] += indir_index.i[3];
1016 }
1017
1018 if( reg->SrcRegister.Dimension ) {
1019 switch( reg->SrcRegister.File ) {
1020 case TGSI_FILE_INPUT:
1021 index.i[0] *= 17;
1022 index.i[1] *= 17;
1023 index.i[2] *= 17;
1024 index.i[3] *= 17;
1025 break;
1026 case TGSI_FILE_CONSTANT:
1027 index.i[0] *= 4096;
1028 index.i[1] *= 4096;
1029 index.i[2] *= 4096;
1030 index.i[3] *= 4096;
1031 break;
1032 default:
1033 assert( 0 );
1034 }
1035
1036 index.i[0] += reg->SrcRegisterDim.Index;
1037 index.i[1] += reg->SrcRegisterDim.Index;
1038 index.i[2] += reg->SrcRegisterDim.Index;
1039 index.i[3] += reg->SrcRegisterDim.Index;
1040
1041 if (reg->SrcRegisterDim.Indirect) {
1042 union tgsi_exec_channel index2;
1043 union tgsi_exec_channel indir_index;
1044
1045 index2.i[0] =
1046 index2.i[1] =
1047 index2.i[2] =
1048 index2.i[3] = reg->SrcRegisterDimInd.Index;
1049
1050 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1051 fetch_src_file_channel(
1052 mach,
1053 reg->SrcRegisterDimInd.File,
1054 swizzle,
1055 &index2,
1056 &indir_index );
1057
1058 index.i[0] += indir_index.i[0];
1059 index.i[1] += indir_index.i[1];
1060 index.i[2] += indir_index.i[2];
1061 index.i[3] += indir_index.i[3];
1062 }
1063 }
1064
1065 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1066 fetch_src_file_channel(
1067 mach,
1068 reg->SrcRegister.File,
1069 swizzle,
1070 &index,
1071 chan );
1072
1073 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1074 case TGSI_UTIL_SIGN_CLEAR:
1075 micro_abs( chan, chan );
1076 break;
1077
1078 case TGSI_UTIL_SIGN_SET:
1079 micro_abs( chan, chan );
1080 micro_neg( chan, chan );
1081 break;
1082
1083 case TGSI_UTIL_SIGN_TOGGLE:
1084 micro_neg( chan, chan );
1085 break;
1086
1087 case TGSI_UTIL_SIGN_KEEP:
1088 break;
1089 }
1090
1091 if (reg->SrcRegisterExtMod.Complement) {
1092 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1093 }
1094 }
1095
1096 static void
1097 store_dest(
1098 struct tgsi_exec_machine *mach,
1099 const union tgsi_exec_channel *chan,
1100 const struct tgsi_full_dst_register *reg,
1101 const struct tgsi_full_instruction *inst,
1102 uint chan_index )
1103 {
1104 union tgsi_exec_channel *dst;
1105
1106 switch( reg->DstRegister.File ) {
1107 case TGSI_FILE_NULL:
1108 return;
1109
1110 case TGSI_FILE_OUTPUT:
1111 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1112 + reg->DstRegister.Index].xyzw[chan_index];
1113 break;
1114
1115 case TGSI_FILE_TEMPORARY:
1116 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1117 break;
1118
1119 case TGSI_FILE_ADDRESS:
1120 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1121 break;
1122
1123 default:
1124 assert( 0 );
1125 return;
1126 }
1127
1128 switch (inst->Instruction.Saturate)
1129 {
1130 case TGSI_SAT_NONE:
1131 if (mach->ExecMask & 0x1)
1132 dst->i[0] = chan->i[0];
1133 if (mach->ExecMask & 0x2)
1134 dst->i[1] = chan->i[1];
1135 if (mach->ExecMask & 0x4)
1136 dst->i[2] = chan->i[2];
1137 if (mach->ExecMask & 0x8)
1138 dst->i[3] = chan->i[3];
1139 break;
1140
1141 case TGSI_SAT_ZERO_ONE:
1142 /* XXX need to obey ExecMask here */
1143 micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1144 micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
1145 break;
1146
1147 case TGSI_SAT_MINUS_PLUS_ONE:
1148 assert( 0 );
1149 break;
1150
1151 default:
1152 assert( 0 );
1153 }
1154 }
1155
1156 #define FETCH(VAL,INDEX,CHAN)\
1157 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1158
1159 #define STORE(VAL,INDEX,CHAN)\
1160 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1161
1162
1163 /**
1164 * Execute ARB-style KIL which is predicated by a src register.
1165 * Kill fragment if any of the four values is less than zero.
1166 */
1167 static void
1168 exec_kilp(struct tgsi_exec_machine *mach,
1169 const struct tgsi_full_instruction *inst)
1170 {
1171 uint uniquemask;
1172 uint chan_index;
1173 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1174 union tgsi_exec_channel r[1];
1175
1176 /* This mask stores component bits that were already tested. Note that
1177 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1178 * tested. */
1179 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1180
1181 for (chan_index = 0; chan_index < 4; chan_index++)
1182 {
1183 uint swizzle;
1184 uint i;
1185
1186 /* unswizzle channel */
1187 swizzle = tgsi_util_get_full_src_register_extswizzle (
1188 &inst->FullSrcRegisters[0],
1189 chan_index);
1190
1191 /* check if the component has not been already tested */
1192 if (uniquemask & (1 << swizzle))
1193 continue;
1194 uniquemask |= 1 << swizzle;
1195
1196 FETCH(&r[0], 0, chan_index);
1197 for (i = 0; i < 4; i++)
1198 if (r[0].f[i] < 0.0f)
1199 kilmask |= 1 << i;
1200 }
1201
1202 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1203 }
1204
1205
1206 /*
1207 * Fetch a texel using STR texture coordinates.
1208 */
1209 static void
1210 fetch_texel( struct tgsi_sampler *sampler,
1211 const union tgsi_exec_channel *s,
1212 const union tgsi_exec_channel *t,
1213 const union tgsi_exec_channel *p,
1214 float lodbias, /* XXX should be float[4] */
1215 union tgsi_exec_channel *r,
1216 union tgsi_exec_channel *g,
1217 union tgsi_exec_channel *b,
1218 union tgsi_exec_channel *a )
1219 {
1220 uint j;
1221 float rgba[NUM_CHANNELS][QUAD_SIZE];
1222
1223 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1224
1225 for (j = 0; j < 4; j++) {
1226 r->f[j] = rgba[0][j];
1227 g->f[j] = rgba[1][j];
1228 b->f[j] = rgba[2][j];
1229 a->f[j] = rgba[3][j];
1230 }
1231 }
1232
1233
1234 static void
1235 exec_tex(struct tgsi_exec_machine *mach,
1236 const struct tgsi_full_instruction *inst,
1237 boolean biasLod,
1238 boolean projected)
1239 {
1240 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1241 union tgsi_exec_channel r[8];
1242 uint chan_index;
1243 float lodBias;
1244
1245 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1246
1247 switch (inst->InstructionExtTexture.Texture) {
1248 case TGSI_TEXTURE_1D:
1249
1250 FETCH(&r[0], 0, CHAN_X);
1251
1252 if (projected) {
1253 FETCH(&r[1], 0, CHAN_W);
1254 micro_div( &r[0], &r[0], &r[1] );
1255 }
1256
1257 if (biasLod) {
1258 FETCH(&r[1], 0, CHAN_W);
1259 lodBias = r[2].f[0];
1260 }
1261 else
1262 lodBias = 0.0;
1263
1264 fetch_texel(&mach->Samplers[unit],
1265 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
1266 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1267 break;
1268
1269 case TGSI_TEXTURE_2D:
1270 case TGSI_TEXTURE_RECT:
1271
1272 FETCH(&r[0], 0, CHAN_X);
1273 FETCH(&r[1], 0, CHAN_Y);
1274 FETCH(&r[2], 0, CHAN_Z);
1275
1276 if (projected) {
1277 FETCH(&r[3], 0, CHAN_W);
1278 micro_div( &r[0], &r[0], &r[3] );
1279 micro_div( &r[1], &r[1], &r[3] );
1280 micro_div( &r[2], &r[2], &r[3] );
1281 }
1282
1283 if (biasLod) {
1284 FETCH(&r[3], 0, CHAN_W);
1285 lodBias = r[3].f[0];
1286 }
1287 else
1288 lodBias = 0.0;
1289
1290 fetch_texel(&mach->Samplers[unit],
1291 &r[0], &r[1], &r[2], lodBias, /* inputs */
1292 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1293 break;
1294
1295 case TGSI_TEXTURE_3D:
1296 case TGSI_TEXTURE_CUBE:
1297
1298 FETCH(&r[0], 0, CHAN_X);
1299 FETCH(&r[1], 0, CHAN_Y);
1300 FETCH(&r[2], 0, CHAN_Z);
1301
1302 if (projected) {
1303 FETCH(&r[3], 0, CHAN_W);
1304 micro_div( &r[0], &r[0], &r[3] );
1305 micro_div( &r[1], &r[1], &r[3] );
1306 micro_div( &r[2], &r[2], &r[3] );
1307 }
1308
1309 if (biasLod) {
1310 FETCH(&r[3], 0, CHAN_W);
1311 lodBias = r[3].f[0];
1312 }
1313 else
1314 lodBias = 0.0;
1315
1316 fetch_texel(&mach->Samplers[unit],
1317 &r[0], &r[1], &r[2], lodBias,
1318 &r[0], &r[1], &r[2], &r[3]);
1319 break;
1320
1321 default:
1322 assert (0);
1323 }
1324
1325 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1326 STORE( &r[chan_index], 0, chan_index );
1327 }
1328 }
1329
1330
1331 /**
1332 * Evaluate a constant-valued coefficient at the position of the
1333 * current quad.
1334 */
1335 static void
1336 eval_constant_coef(
1337 struct tgsi_exec_machine *mach,
1338 unsigned attrib,
1339 unsigned chan )
1340 {
1341 unsigned i;
1342
1343 for( i = 0; i < QUAD_SIZE; i++ ) {
1344 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1345 }
1346 }
1347
1348 /**
1349 * Evaluate a linear-valued coefficient at the position of the
1350 * current quad.
1351 */
1352 static void
1353 eval_linear_coef(
1354 struct tgsi_exec_machine *mach,
1355 unsigned attrib,
1356 unsigned chan )
1357 {
1358 const float x = mach->QuadPos.xyzw[0].f[0];
1359 const float y = mach->QuadPos.xyzw[1].f[0];
1360 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1361 const float dady = mach->InterpCoefs[attrib].dady[chan];
1362 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1363 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1364 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1365 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1366 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1367 }
1368
1369 /**
1370 * Evaluate a perspective-valued coefficient at the position of the
1371 * current quad.
1372 */
1373 static void
1374 eval_perspective_coef(
1375 struct tgsi_exec_machine *mach,
1376 unsigned attrib,
1377 unsigned chan )
1378 {
1379 const float x = mach->QuadPos.xyzw[0].f[0];
1380 const float y = mach->QuadPos.xyzw[1].f[0];
1381 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1382 const float dady = mach->InterpCoefs[attrib].dady[chan];
1383 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1384 const float *w = mach->QuadPos.xyzw[3].f;
1385 /* divide by W here */
1386 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1387 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1388 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1389 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1390 }
1391
1392
1393 typedef void (* eval_coef_func)(
1394 struct tgsi_exec_machine *mach,
1395 unsigned attrib,
1396 unsigned chan );
1397
1398 static void
1399 exec_declaration(
1400 struct tgsi_exec_machine *mach,
1401 const struct tgsi_full_declaration *decl )
1402 {
1403 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1404 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1405 unsigned first, last, mask;
1406 eval_coef_func eval;
1407
1408 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
1409
1410 first = decl->u.DeclarationRange.First;
1411 last = decl->u.DeclarationRange.Last;
1412 mask = decl->Declaration.UsageMask;
1413
1414 switch( decl->Interpolation.Interpolate ) {
1415 case TGSI_INTERPOLATE_CONSTANT:
1416 eval = eval_constant_coef;
1417 break;
1418
1419 case TGSI_INTERPOLATE_LINEAR:
1420 eval = eval_linear_coef;
1421 break;
1422
1423 case TGSI_INTERPOLATE_PERSPECTIVE:
1424 eval = eval_perspective_coef;
1425 break;
1426
1427 default:
1428 assert( 0 );
1429 }
1430
1431 if( mask == TGSI_WRITEMASK_XYZW ) {
1432 unsigned i, j;
1433
1434 for( i = first; i <= last; i++ ) {
1435 for( j = 0; j < NUM_CHANNELS; j++ ) {
1436 eval( mach, i, j );
1437 }
1438 }
1439 }
1440 else {
1441 unsigned i, j;
1442
1443 for( j = 0; j < NUM_CHANNELS; j++ ) {
1444 if( mask & (1 << j) ) {
1445 for( i = first; i <= last; i++ ) {
1446 eval( mach, i, j );
1447 }
1448 }
1449 }
1450 }
1451 }
1452 }
1453 }
1454
1455 static void
1456 exec_instruction(
1457 struct tgsi_exec_machine *mach,
1458 const struct tgsi_full_instruction *inst,
1459 int *pc )
1460 {
1461 uint chan_index;
1462 union tgsi_exec_channel r[8];
1463
1464 (*pc)++;
1465
1466 switch (inst->Instruction.Opcode) {
1467 case TGSI_OPCODE_ARL:
1468 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1469 FETCH( &r[0], 0, chan_index );
1470 micro_f2it( &r[0], &r[0] );
1471 STORE( &r[0], 0, chan_index );
1472 }
1473 break;
1474
1475 case TGSI_OPCODE_MOV:
1476 /* TGSI_OPCODE_SWZ */
1477 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1478 FETCH( &r[0], 0, chan_index );
1479 STORE( &r[0], 0, chan_index );
1480 }
1481 break;
1482
1483 case TGSI_OPCODE_LIT:
1484 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1485 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1486 }
1487
1488 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1489 FETCH( &r[0], 0, CHAN_X );
1490 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1491 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1492 STORE( &r[0], 0, CHAN_Y );
1493 }
1494
1495 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1496 FETCH( &r[1], 0, CHAN_Y );
1497 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1498
1499 FETCH( &r[2], 0, CHAN_W );
1500 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1501 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1502 micro_pow( &r[1], &r[1], &r[2] );
1503 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1504 STORE( &r[0], 0, CHAN_Z );
1505 }
1506 }
1507
1508 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1509 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1510 }
1511 break;
1512
1513 case TGSI_OPCODE_RCP:
1514 /* TGSI_OPCODE_RECIP */
1515 FETCH( &r[0], 0, CHAN_X );
1516 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1517 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1518 STORE( &r[0], 0, chan_index );
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_RSQ:
1523 /* TGSI_OPCODE_RECIPSQRT */
1524 FETCH( &r[0], 0, CHAN_X );
1525 micro_sqrt( &r[0], &r[0] );
1526 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1527 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1528 STORE( &r[0], 0, chan_index );
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_EXP:
1533 FETCH( &r[0], 0, CHAN_X );
1534 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
1535 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1536 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
1537 STORE( &r[2], 0, CHAN_X ); /* store r2 */
1538 }
1539 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1540 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1541 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
1542 }
1543 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1544 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
1545 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
1546 }
1547 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1548 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1549 }
1550 break;
1551
1552 case TGSI_OPCODE_LOG:
1553 FETCH( &r[0], 0, CHAN_X );
1554 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
1555 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
1556 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
1557 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1558 STORE( &r[0], 0, CHAN_X );
1559 }
1560 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1561 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
1562 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1563 STORE( &r[0], 0, CHAN_Y );
1564 }
1565 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1566 STORE( &r[1], 0, CHAN_Z );
1567 }
1568 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1569 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1570 }
1571 break;
1572
1573 case TGSI_OPCODE_MUL:
1574 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1575 {
1576 FETCH(&r[0], 0, chan_index);
1577 FETCH(&r[1], 1, chan_index);
1578
1579 micro_mul( &r[0], &r[0], &r[1] );
1580
1581 STORE(&r[0], 0, chan_index);
1582 }
1583 break;
1584
1585 case TGSI_OPCODE_ADD:
1586 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1587 FETCH( &r[0], 0, chan_index );
1588 FETCH( &r[1], 1, chan_index );
1589 micro_add( &r[0], &r[0], &r[1] );
1590 STORE( &r[0], 0, chan_index );
1591 }
1592 break;
1593
1594 case TGSI_OPCODE_DP3:
1595 /* TGSI_OPCODE_DOT3 */
1596 FETCH( &r[0], 0, CHAN_X );
1597 FETCH( &r[1], 1, CHAN_X );
1598 micro_mul( &r[0], &r[0], &r[1] );
1599
1600 FETCH( &r[1], 0, CHAN_Y );
1601 FETCH( &r[2], 1, CHAN_Y );
1602 micro_mul( &r[1], &r[1], &r[2] );
1603 micro_add( &r[0], &r[0], &r[1] );
1604
1605 FETCH( &r[1], 0, CHAN_Z );
1606 FETCH( &r[2], 1, CHAN_Z );
1607 micro_mul( &r[1], &r[1], &r[2] );
1608 micro_add( &r[0], &r[0], &r[1] );
1609
1610 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1611 STORE( &r[0], 0, chan_index );
1612 }
1613 break;
1614
1615 case TGSI_OPCODE_DP4:
1616 /* TGSI_OPCODE_DOT4 */
1617 FETCH(&r[0], 0, CHAN_X);
1618 FETCH(&r[1], 1, CHAN_X);
1619
1620 micro_mul( &r[0], &r[0], &r[1] );
1621
1622 FETCH(&r[1], 0, CHAN_Y);
1623 FETCH(&r[2], 1, CHAN_Y);
1624
1625 micro_mul( &r[1], &r[1], &r[2] );
1626 micro_add( &r[0], &r[0], &r[1] );
1627
1628 FETCH(&r[1], 0, CHAN_Z);
1629 FETCH(&r[2], 1, CHAN_Z);
1630
1631 micro_mul( &r[1], &r[1], &r[2] );
1632 micro_add( &r[0], &r[0], &r[1] );
1633
1634 FETCH(&r[1], 0, CHAN_W);
1635 FETCH(&r[2], 1, CHAN_W);
1636
1637 micro_mul( &r[1], &r[1], &r[2] );
1638 micro_add( &r[0], &r[0], &r[1] );
1639
1640 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1641 STORE( &r[0], 0, chan_index );
1642 }
1643 break;
1644
1645 case TGSI_OPCODE_DST:
1646 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1647 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1648 }
1649
1650 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1651 FETCH( &r[0], 0, CHAN_Y );
1652 FETCH( &r[1], 1, CHAN_Y);
1653 micro_mul( &r[0], &r[0], &r[1] );
1654 STORE( &r[0], 0, CHAN_Y );
1655 }
1656
1657 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1658 FETCH( &r[0], 0, CHAN_Z );
1659 STORE( &r[0], 0, CHAN_Z );
1660 }
1661
1662 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1663 FETCH( &r[0], 1, CHAN_W );
1664 STORE( &r[0], 0, CHAN_W );
1665 }
1666 break;
1667
1668 case TGSI_OPCODE_MIN:
1669 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1670 FETCH(&r[0], 0, chan_index);
1671 FETCH(&r[1], 1, chan_index);
1672
1673 /* XXX use micro_min()?? */
1674 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1675
1676 STORE(&r[0], 0, chan_index);
1677 }
1678 break;
1679
1680 case TGSI_OPCODE_MAX:
1681 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1682 FETCH(&r[0], 0, chan_index);
1683 FETCH(&r[1], 1, chan_index);
1684
1685 /* XXX use micro_max()?? */
1686 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1687
1688 STORE(&r[0], 0, chan_index );
1689 }
1690 break;
1691
1692 case TGSI_OPCODE_SLT:
1693 /* TGSI_OPCODE_SETLT */
1694 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1695 FETCH( &r[0], 0, chan_index );
1696 FETCH( &r[1], 1, chan_index );
1697 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1698 STORE( &r[0], 0, chan_index );
1699 }
1700 break;
1701
1702 case TGSI_OPCODE_SGE:
1703 /* TGSI_OPCODE_SETGE */
1704 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1705 FETCH( &r[0], 0, chan_index );
1706 FETCH( &r[1], 1, chan_index );
1707 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1708 STORE( &r[0], 0, chan_index );
1709 }
1710 break;
1711
1712 case TGSI_OPCODE_MAD:
1713 /* TGSI_OPCODE_MADD */
1714 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1715 FETCH( &r[0], 0, chan_index );
1716 FETCH( &r[1], 1, chan_index );
1717 micro_mul( &r[0], &r[0], &r[1] );
1718 FETCH( &r[1], 2, chan_index );
1719 micro_add( &r[0], &r[0], &r[1] );
1720 STORE( &r[0], 0, chan_index );
1721 }
1722 break;
1723
1724 case TGSI_OPCODE_SUB:
1725 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1726 FETCH(&r[0], 0, chan_index);
1727 FETCH(&r[1], 1, chan_index);
1728
1729 micro_sub( &r[0], &r[0], &r[1] );
1730
1731 STORE(&r[0], 0, chan_index);
1732 }
1733 break;
1734
1735 case TGSI_OPCODE_LERP:
1736 /* TGSI_OPCODE_LRP */
1737 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1738 FETCH(&r[0], 0, chan_index);
1739 FETCH(&r[1], 1, chan_index);
1740 FETCH(&r[2], 2, chan_index);
1741
1742 micro_sub( &r[1], &r[1], &r[2] );
1743 micro_mul( &r[0], &r[0], &r[1] );
1744 micro_add( &r[0], &r[0], &r[2] );
1745
1746 STORE(&r[0], 0, chan_index);
1747 }
1748 break;
1749
1750 case TGSI_OPCODE_CND:
1751 assert (0);
1752 break;
1753
1754 case TGSI_OPCODE_CND0:
1755 assert (0);
1756 break;
1757
1758 case TGSI_OPCODE_DOT2ADD:
1759 /* TGSI_OPCODE_DP2A */
1760 assert (0);
1761 break;
1762
1763 case TGSI_OPCODE_INDEX:
1764 assert (0);
1765 break;
1766
1767 case TGSI_OPCODE_NEGATE:
1768 assert (0);
1769 break;
1770
1771 case TGSI_OPCODE_FRAC:
1772 /* TGSI_OPCODE_FRC */
1773 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1774 FETCH( &r[0], 0, chan_index );
1775 micro_frc( &r[0], &r[0] );
1776 STORE( &r[0], 0, chan_index );
1777 }
1778 break;
1779
1780 case TGSI_OPCODE_CLAMP:
1781 assert (0);
1782 break;
1783
1784 case TGSI_OPCODE_FLOOR:
1785 /* TGSI_OPCODE_FLR */
1786 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1787 FETCH( &r[0], 0, chan_index );
1788 micro_flr( &r[0], &r[0] );
1789 STORE( &r[0], 0, chan_index );
1790 }
1791 break;
1792
1793 case TGSI_OPCODE_ROUND:
1794 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1795 FETCH( &r[0], 0, chan_index );
1796 micro_rnd( &r[0], &r[0] );
1797 STORE( &r[0], 0, chan_index );
1798 }
1799 break;
1800
1801 case TGSI_OPCODE_EXPBASE2:
1802 /* TGSI_OPCODE_EX2 */
1803 FETCH(&r[0], 0, CHAN_X);
1804
1805 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1806
1807 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1808 STORE( &r[0], 0, chan_index );
1809 }
1810 break;
1811
1812 case TGSI_OPCODE_LOGBASE2:
1813 /* TGSI_OPCODE_LG2 */
1814 FETCH( &r[0], 0, CHAN_X );
1815 micro_lg2( &r[0], &r[0] );
1816 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1817 STORE( &r[0], 0, chan_index );
1818 }
1819 break;
1820
1821 case TGSI_OPCODE_POWER:
1822 /* TGSI_OPCODE_POW */
1823 FETCH(&r[0], 0, CHAN_X);
1824 FETCH(&r[1], 1, CHAN_X);
1825
1826 micro_pow( &r[0], &r[0], &r[1] );
1827
1828 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1829 STORE( &r[0], 0, chan_index );
1830 }
1831 break;
1832
1833 case TGSI_OPCODE_CROSSPRODUCT:
1834 /* TGSI_OPCODE_XPD */
1835 FETCH(&r[0], 0, CHAN_Y);
1836 FETCH(&r[1], 1, CHAN_Z);
1837
1838 micro_mul( &r[2], &r[0], &r[1] );
1839
1840 FETCH(&r[3], 0, CHAN_Z);
1841 FETCH(&r[4], 1, CHAN_Y);
1842
1843 micro_mul( &r[5], &r[3], &r[4] );
1844 micro_sub( &r[2], &r[2], &r[5] );
1845
1846 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1847 STORE( &r[2], 0, CHAN_X );
1848 }
1849
1850 FETCH(&r[2], 1, CHAN_X);
1851
1852 micro_mul( &r[3], &r[3], &r[2] );
1853
1854 FETCH(&r[5], 0, CHAN_X);
1855
1856 micro_mul( &r[1], &r[1], &r[5] );
1857 micro_sub( &r[3], &r[3], &r[1] );
1858
1859 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1860 STORE( &r[3], 0, CHAN_Y );
1861 }
1862
1863 micro_mul( &r[5], &r[5], &r[4] );
1864 micro_mul( &r[0], &r[0], &r[2] );
1865 micro_sub( &r[5], &r[5], &r[0] );
1866
1867 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1868 STORE( &r[5], 0, CHAN_Z );
1869 }
1870
1871 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1872 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1873 }
1874 break;
1875
1876 case TGSI_OPCODE_MULTIPLYMATRIX:
1877 assert (0);
1878 break;
1879
1880 case TGSI_OPCODE_ABS:
1881 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1882 FETCH(&r[0], 0, chan_index);
1883
1884 micro_abs( &r[0], &r[0] );
1885
1886 STORE(&r[0], 0, chan_index);
1887 }
1888 break;
1889
1890 case TGSI_OPCODE_RCC:
1891 assert (0);
1892 break;
1893
1894 case TGSI_OPCODE_DPH:
1895 FETCH(&r[0], 0, CHAN_X);
1896 FETCH(&r[1], 1, CHAN_X);
1897
1898 micro_mul( &r[0], &r[0], &r[1] );
1899
1900 FETCH(&r[1], 0, CHAN_Y);
1901 FETCH(&r[2], 1, CHAN_Y);
1902
1903 micro_mul( &r[1], &r[1], &r[2] );
1904 micro_add( &r[0], &r[0], &r[1] );
1905
1906 FETCH(&r[1], 0, CHAN_Z);
1907 FETCH(&r[2], 1, CHAN_Z);
1908
1909 micro_mul( &r[1], &r[1], &r[2] );
1910 micro_add( &r[0], &r[0], &r[1] );
1911
1912 FETCH(&r[1], 1, CHAN_W);
1913
1914 micro_add( &r[0], &r[0], &r[1] );
1915
1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917 STORE( &r[0], 0, chan_index );
1918 }
1919 break;
1920
1921 case TGSI_OPCODE_COS:
1922 FETCH(&r[0], 0, CHAN_X);
1923
1924 micro_cos( &r[0], &r[0] );
1925
1926 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1927 STORE( &r[0], 0, chan_index );
1928 }
1929 break;
1930
1931 case TGSI_OPCODE_DDX:
1932 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1933 FETCH( &r[0], 0, chan_index );
1934 micro_ddx( &r[0], &r[0] );
1935 STORE( &r[0], 0, chan_index );
1936 }
1937 break;
1938
1939 case TGSI_OPCODE_DDY:
1940 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1941 FETCH( &r[0], 0, chan_index );
1942 micro_ddy( &r[0], &r[0] );
1943 STORE( &r[0], 0, chan_index );
1944 }
1945 break;
1946
1947 case TGSI_OPCODE_KILP:
1948 exec_kilp (mach, inst);
1949 break;
1950
1951 case TGSI_OPCODE_KIL:
1952 /* for enabled ExecMask bits, set the killed bit */
1953 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1954 break;
1955
1956 case TGSI_OPCODE_PK2H:
1957 assert (0);
1958 break;
1959
1960 case TGSI_OPCODE_PK2US:
1961 assert (0);
1962 break;
1963
1964 case TGSI_OPCODE_PK4B:
1965 assert (0);
1966 break;
1967
1968 case TGSI_OPCODE_PK4UB:
1969 assert (0);
1970 break;
1971
1972 case TGSI_OPCODE_RFL:
1973 assert (0);
1974 break;
1975
1976 case TGSI_OPCODE_SEQ:
1977 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1978 FETCH( &r[0], 0, chan_index );
1979 FETCH( &r[1], 1, chan_index );
1980 micro_eq( &r[0], &r[0], &r[1],
1981 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
1982 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1983 STORE( &r[0], 0, chan_index );
1984 }
1985 break;
1986
1987 case TGSI_OPCODE_SFL:
1988 assert (0);
1989 break;
1990
1991 case TGSI_OPCODE_SGT:
1992 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1993 FETCH( &r[0], 0, chan_index );
1994 FETCH( &r[1], 1, chan_index );
1995 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1996 STORE( &r[0], 0, chan_index );
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_SIN:
2001 FETCH( &r[0], 0, CHAN_X );
2002 micro_sin( &r[0], &r[0] );
2003 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2004 STORE( &r[0], 0, chan_index );
2005 }
2006 break;
2007
2008 case TGSI_OPCODE_SLE:
2009 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2010 FETCH( &r[0], 0, chan_index );
2011 FETCH( &r[1], 1, chan_index );
2012 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2013 STORE( &r[0], 0, chan_index );
2014 }
2015 break;
2016
2017 case TGSI_OPCODE_SNE:
2018 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2019 FETCH( &r[0], 0, chan_index );
2020 FETCH( &r[1], 1, chan_index );
2021 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2022 STORE( &r[0], 0, chan_index );
2023 }
2024 break;
2025
2026 case TGSI_OPCODE_STR:
2027 assert (0);
2028 break;
2029
2030 case TGSI_OPCODE_TEX:
2031 /* simple texture lookup */
2032 /* src[0] = texcoord */
2033 /* src[1] = sampler unit */
2034 exec_tex(mach, inst, FALSE, FALSE);
2035 break;
2036
2037 case TGSI_OPCODE_TXB:
2038 /* Texture lookup with lod bias */
2039 /* src[0] = texcoord (src[0].w = LOD bias) */
2040 /* src[1] = sampler unit */
2041 exec_tex(mach, inst, TRUE, FALSE);
2042 break;
2043
2044 case TGSI_OPCODE_TXD:
2045 /* Texture lookup with explict partial derivatives */
2046 /* src[0] = texcoord */
2047 /* src[1] = d[strq]/dx */
2048 /* src[2] = d[strq]/dy */
2049 /* src[3] = sampler unit */
2050 assert (0);
2051 break;
2052
2053 case TGSI_OPCODE_TXL:
2054 /* Texture lookup with explit LOD */
2055 /* src[0] = texcoord (src[0].w = LOD) */
2056 /* src[1] = sampler unit */
2057 exec_tex(mach, inst, TRUE, FALSE);
2058 break;
2059
2060 case TGSI_OPCODE_TXP:
2061 /* Texture lookup with projection */
2062 /* src[0] = texcoord (src[0].w = projection) */
2063 /* src[1] = sampler unit */
2064 exec_tex(mach, inst, FALSE, TRUE);
2065 break;
2066
2067 case TGSI_OPCODE_UP2H:
2068 assert (0);
2069 break;
2070
2071 case TGSI_OPCODE_UP2US:
2072 assert (0);
2073 break;
2074
2075 case TGSI_OPCODE_UP4B:
2076 assert (0);
2077 break;
2078
2079 case TGSI_OPCODE_UP4UB:
2080 assert (0);
2081 break;
2082
2083 case TGSI_OPCODE_X2D:
2084 assert (0);
2085 break;
2086
2087 case TGSI_OPCODE_ARA:
2088 assert (0);
2089 break;
2090
2091 case TGSI_OPCODE_ARR:
2092 assert (0);
2093 break;
2094
2095 case TGSI_OPCODE_BRA:
2096 assert (0);
2097 break;
2098
2099 case TGSI_OPCODE_CAL:
2100 /* skip the call if no execution channels are enabled */
2101 if (mach->ExecMask) {
2102 /* do the call */
2103
2104 /* push the Cond, Loop, Cont stacks */
2105 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2106 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2107 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2108 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2109 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2110 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2111
2112 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2113 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2114
2115 /* note that PC was already incremented above */
2116 mach->CallStack[mach->CallStackTop++] = *pc;
2117 *pc = inst->InstructionExtLabel.Label;
2118 }
2119 break;
2120
2121 case TGSI_OPCODE_RET:
2122 mach->FuncMask &= ~mach->ExecMask;
2123 UPDATE_EXEC_MASK(mach);
2124
2125 if (mach->ExecMask == 0x0) {
2126 /* really return now (otherwise, keep executing */
2127
2128 if (mach->CallStackTop == 0) {
2129 /* returning from main() */
2130 *pc = -1;
2131 return;
2132 }
2133 *pc = mach->CallStack[--mach->CallStackTop];
2134
2135 /* pop the Cond, Loop, Cont stacks */
2136 assert(mach->CondStackTop > 0);
2137 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2138 assert(mach->LoopStackTop > 0);
2139 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2140 assert(mach->ContStackTop > 0);
2141 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2142 assert(mach->FuncStackTop > 0);
2143 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2144
2145 UPDATE_EXEC_MASK(mach);
2146 }
2147 break;
2148
2149 case TGSI_OPCODE_SSG:
2150 assert (0);
2151 break;
2152
2153 case TGSI_OPCODE_CMP:
2154 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2155 FETCH(&r[0], 0, chan_index);
2156 FETCH(&r[1], 1, chan_index);
2157 FETCH(&r[2], 2, chan_index);
2158
2159 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2160
2161 STORE(&r[0], 0, chan_index);
2162 }
2163 break;
2164
2165 case TGSI_OPCODE_SCS:
2166 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2167 FETCH( &r[0], 0, CHAN_X );
2168 }
2169 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2170 micro_cos( &r[1], &r[0] );
2171 STORE( &r[1], 0, CHAN_X );
2172 }
2173 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2174 micro_sin( &r[1], &r[0] );
2175 STORE( &r[1], 0, CHAN_Y );
2176 }
2177 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2178 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2179 }
2180 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2181 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2182 }
2183 break;
2184
2185 case TGSI_OPCODE_NRM:
2186 assert (0);
2187 break;
2188
2189 case TGSI_OPCODE_DIV:
2190 assert( 0 );
2191 break;
2192
2193 case TGSI_OPCODE_DP2:
2194 FETCH( &r[0], 0, CHAN_X );
2195 FETCH( &r[1], 1, CHAN_X );
2196 micro_mul( &r[0], &r[0], &r[1] );
2197
2198 FETCH( &r[1], 0, CHAN_Y );
2199 FETCH( &r[2], 1, CHAN_Y );
2200 micro_mul( &r[1], &r[1], &r[2] );
2201 micro_add( &r[0], &r[0], &r[1] );
2202
2203 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2204 STORE( &r[0], 0, chan_index );
2205 }
2206 break;
2207
2208 case TGSI_OPCODE_IF:
2209 /* push CondMask */
2210 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2211 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2212 FETCH( &r[0], 0, CHAN_X );
2213 /* update CondMask */
2214 if( ! r[0].u[0] ) {
2215 mach->CondMask &= ~0x1;
2216 }
2217 if( ! r[0].u[1] ) {
2218 mach->CondMask &= ~0x2;
2219 }
2220 if( ! r[0].u[2] ) {
2221 mach->CondMask &= ~0x4;
2222 }
2223 if( ! r[0].u[3] ) {
2224 mach->CondMask &= ~0x8;
2225 }
2226 UPDATE_EXEC_MASK(mach);
2227 /* Todo: If CondMask==0, jump to ELSE */
2228 break;
2229
2230 case TGSI_OPCODE_ELSE:
2231 /* invert CondMask wrt previous mask */
2232 {
2233 uint prevMask;
2234 assert(mach->CondStackTop > 0);
2235 prevMask = mach->CondStack[mach->CondStackTop - 1];
2236 mach->CondMask = ~mach->CondMask & prevMask;
2237 UPDATE_EXEC_MASK(mach);
2238 /* Todo: If CondMask==0, jump to ENDIF */
2239 }
2240 break;
2241
2242 case TGSI_OPCODE_ENDIF:
2243 /* pop CondMask */
2244 assert(mach->CondStackTop > 0);
2245 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2246 UPDATE_EXEC_MASK(mach);
2247 break;
2248
2249 case TGSI_OPCODE_END:
2250 /* halt execution */
2251 *pc = -1;
2252 break;
2253
2254 case TGSI_OPCODE_REP:
2255 assert (0);
2256 break;
2257
2258 case TGSI_OPCODE_ENDREP:
2259 assert (0);
2260 break;
2261
2262 case TGSI_OPCODE_PUSHA:
2263 assert (0);
2264 break;
2265
2266 case TGSI_OPCODE_POPA:
2267 assert (0);
2268 break;
2269
2270 case TGSI_OPCODE_CEIL:
2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272 FETCH( &r[0], 0, chan_index );
2273 micro_ceil( &r[0], &r[0] );
2274 STORE( &r[0], 0, chan_index );
2275 }
2276 break;
2277
2278 case TGSI_OPCODE_I2F:
2279 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2280 FETCH( &r[0], 0, chan_index );
2281 micro_i2f( &r[0], &r[0] );
2282 STORE( &r[0], 0, chan_index );
2283 }
2284 break;
2285
2286 case TGSI_OPCODE_NOT:
2287 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2288 FETCH( &r[0], 0, chan_index );
2289 micro_not( &r[0], &r[0] );
2290 STORE( &r[0], 0, chan_index );
2291 }
2292 break;
2293
2294 case TGSI_OPCODE_TRUNC:
2295 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2296 FETCH( &r[0], 0, chan_index );
2297 micro_trunc( &r[0], &r[0] );
2298 STORE( &r[0], 0, chan_index );
2299 }
2300 break;
2301
2302 case TGSI_OPCODE_SHL:
2303 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2304 FETCH( &r[0], 0, chan_index );
2305 FETCH( &r[1], 1, chan_index );
2306 micro_shl( &r[0], &r[0], &r[1] );
2307 STORE( &r[0], 0, chan_index );
2308 }
2309 break;
2310
2311 case TGSI_OPCODE_SHR:
2312 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2313 FETCH( &r[0], 0, chan_index );
2314 FETCH( &r[1], 1, chan_index );
2315 micro_ishr( &r[0], &r[0], &r[1] );
2316 STORE( &r[0], 0, chan_index );
2317 }
2318 break;
2319
2320 case TGSI_OPCODE_AND:
2321 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2322 FETCH( &r[0], 0, chan_index );
2323 FETCH( &r[1], 1, chan_index );
2324 micro_and( &r[0], &r[0], &r[1] );
2325 STORE( &r[0], 0, chan_index );
2326 }
2327 break;
2328
2329 case TGSI_OPCODE_OR:
2330 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2331 FETCH( &r[0], 0, chan_index );
2332 FETCH( &r[1], 1, chan_index );
2333 micro_or( &r[0], &r[0], &r[1] );
2334 STORE( &r[0], 0, chan_index );
2335 }
2336 break;
2337
2338 case TGSI_OPCODE_MOD:
2339 assert (0);
2340 break;
2341
2342 case TGSI_OPCODE_XOR:
2343 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2344 FETCH( &r[0], 0, chan_index );
2345 FETCH( &r[1], 1, chan_index );
2346 micro_xor( &r[0], &r[0], &r[1] );
2347 STORE( &r[0], 0, chan_index );
2348 }
2349 break;
2350
2351 case TGSI_OPCODE_SAD:
2352 assert (0);
2353 break;
2354
2355 case TGSI_OPCODE_TXF:
2356 assert (0);
2357 break;
2358
2359 case TGSI_OPCODE_TXQ:
2360 assert (0);
2361 break;
2362
2363 case TGSI_OPCODE_EMIT:
2364 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2365 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2366 break;
2367
2368 case TGSI_OPCODE_ENDPRIM:
2369 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2370 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2371 break;
2372
2373 case TGSI_OPCODE_LOOP:
2374 /* fall-through (for now) */
2375 case TGSI_OPCODE_BGNLOOP2:
2376 /* push LoopMask and ContMasks */
2377 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2378 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2379 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2380 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2381 break;
2382
2383 case TGSI_OPCODE_ENDLOOP:
2384 /* fall-through (for now at least) */
2385 case TGSI_OPCODE_ENDLOOP2:
2386 /* Restore ContMask, but don't pop */
2387 assert(mach->ContStackTop > 0);
2388 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2389 if (mach->LoopMask) {
2390 /* repeat loop: jump to instruction just past BGNLOOP */
2391 *pc = inst->InstructionExtLabel.Label + 1;
2392 }
2393 else {
2394 /* exit loop: pop LoopMask */
2395 assert(mach->LoopStackTop > 0);
2396 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2397 /* pop ContMask */
2398 assert(mach->ContStackTop > 0);
2399 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2400 }
2401 UPDATE_EXEC_MASK(mach);
2402 break;
2403
2404 case TGSI_OPCODE_BRK:
2405 /* turn off loop channels for each enabled exec channel */
2406 mach->LoopMask &= ~mach->ExecMask;
2407 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2408 UPDATE_EXEC_MASK(mach);
2409 break;
2410
2411 case TGSI_OPCODE_CONT:
2412 /* turn off cont channels for each enabled exec channel */
2413 mach->ContMask &= ~mach->ExecMask;
2414 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2415 UPDATE_EXEC_MASK(mach);
2416 break;
2417
2418 case TGSI_OPCODE_BGNSUB:
2419 /* no-op */
2420 break;
2421
2422 case TGSI_OPCODE_ENDSUB:
2423 /* no-op */
2424 break;
2425
2426 case TGSI_OPCODE_NOISE1:
2427 assert( 0 );
2428 break;
2429
2430 case TGSI_OPCODE_NOISE2:
2431 assert( 0 );
2432 break;
2433
2434 case TGSI_OPCODE_NOISE3:
2435 assert( 0 );
2436 break;
2437
2438 case TGSI_OPCODE_NOISE4:
2439 assert( 0 );
2440 break;
2441
2442 case TGSI_OPCODE_NOP:
2443 break;
2444
2445 default:
2446 assert( 0 );
2447 }
2448 }
2449
2450
2451 /**
2452 * Run TGSI interpreter.
2453 * \return bitmask of "alive" quad components
2454 */
2455 uint
2456 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2457 {
2458 uint i;
2459 int pc = 0;
2460
2461 mach->CondMask = 0xf;
2462 mach->LoopMask = 0xf;
2463 mach->ContMask = 0xf;
2464 mach->FuncMask = 0xf;
2465 mach->ExecMask = 0xf;
2466
2467 mach->CondStackTop = 0; /* temporarily subvert this assertion */
2468 assert(mach->CondStackTop == 0);
2469 assert(mach->LoopStackTop == 0);
2470 assert(mach->ContStackTop == 0);
2471 assert(mach->CallStackTop == 0);
2472
2473 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2474 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2475
2476 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2477 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2478 mach->Primitives[0] = 0;
2479 }
2480
2481
2482 /* execute declarations (interpolants) */
2483 for (i = 0; i < mach->NumDeclarations; i++) {
2484 exec_declaration( mach, mach->Declarations+i );
2485 }
2486
2487 /* execute instructions, until pc is set to -1 */
2488 while (pc != -1) {
2489 assert(pc < (int) mach->NumInstructions);
2490 exec_instruction( mach, mach->Instructions + pc, &pc );
2491 }
2492
2493 #if 0
2494 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2495 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2496 /*
2497 * Scale back depth component.
2498 */
2499 for (i = 0; i < 4; i++)
2500 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2501 }
2502 #endif
2503
2504 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2505 }
2506
2507