Implement TGSI_OPCODE_SNE with micro_ne()
[mesa.git] / src / mesa / pipe / tgsi / exec / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michael Krol
50 * Brian Paul
51 */
52
53 #include "tgsi_platform.h"
54 #include "tgsi_core.h"
55 #include "pipe/p_state.h"
56
57 #define MESA 1
58 #if MESA
59 #include "main/context.h"
60 #include "main/macros.h"
61 #endif
62
63 #define TILE_BOTTOM_LEFT 0
64 #define TILE_BOTTOM_RIGHT 1
65 #define TILE_TOP_LEFT 2
66 #define TILE_TOP_RIGHT 3
67
68 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
69 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
70 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
71 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
72 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
73 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
74 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
75 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
76 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
77 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
78 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
79 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
80 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
81 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
82 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
83 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
84 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
85 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
86 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
87 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
88 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
89 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
90 #define TEMP_R0 TGSI_EXEC_TEMP_R0
91
92 #define FOR_EACH_CHANNEL(CHAN)\
93 for (CHAN = 0; CHAN < 4; CHAN++)
94
95 #define IS_CHANNEL_ENABLED(INST, CHAN)\
96 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
97
98 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
99 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
100
101 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
102 FOR_EACH_CHANNEL( CHAN )\
103 if (IS_CHANNEL_ENABLED( INST, CHAN ))
104
105 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
106 FOR_EACH_CHANNEL( CHAN )\
107 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
108
109
110 /** The execution mask depends on the conditional mask and the loop mask */
111 #define UPDATE_EXEC_MASK(MACH) \
112 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask
113
114
115 #define CHAN_X 0
116 #define CHAN_Y 1
117 #define CHAN_Z 2
118 #define CHAN_W 3
119
120
121
122 static void
123 tgsi_exec_prepare( struct tgsi_exec_machine *mach )
124 {
125 struct tgsi_exec_labels *labels = &mach->Labels;
126 struct tgsi_parse_context parse;
127 struct tgsi_full_instruction *instructions;
128 struct tgsi_full_declaration *declarations;
129 uint maxInstructions = 10, numInstructions = 0;
130 uint maxDeclarations = 10, numDeclarations = 0;
131 GLuint k;
132 GLuint instno = 0;
133
134 mach->ImmLimit = 0;
135 labels->count = 0;
136
137 declarations = (struct tgsi_full_declaration *)
138 malloc(maxDeclarations * sizeof(struct tgsi_full_declaration));
139
140 instructions = (struct tgsi_full_instruction *)
141 malloc(maxInstructions * sizeof(struct tgsi_full_instruction));
142
143 k = tgsi_parse_init( &parse, mach->Tokens );
144 if (k != TGSI_PARSE_OK) {
145 fprintf(stderr, "Problem parsing!\n");
146 return;
147 }
148
149 while( !tgsi_parse_end_of_tokens( &parse ) ) {
150 GLuint pointer = parse.Position;
151 GLuint i;
152
153 tgsi_parse_token( &parse );
154 switch( parse.FullToken.Token.Type ) {
155 case TGSI_TOKEN_TYPE_DECLARATION:
156 /* save expanded declaration */
157 if (numDeclarations == maxDeclarations) {
158 maxDeclarations += 10;
159 declarations = realloc(declarations,
160 maxDeclarations
161 * sizeof(struct tgsi_full_instruction));
162 }
163 memcpy(declarations + numDeclarations,
164 &parse.FullToken.FullInstruction,
165 sizeof(declarations[0]));
166 numDeclarations++;
167 break;
168
169 case TGSI_TOKEN_TYPE_IMMEDIATE:
170 assert( (parse.FullToken.FullImmediate.Immediate.Size - 1) % 4 == 0 );
171 assert( mach->ImmLimit + (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4 <= 256 );
172
173 for( i = 0; i < parse.FullToken.FullImmediate.Immediate.Size - 1; i++ ) {
174 mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
175 }
176 mach->ImmLimit += (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4;
177 break;
178
179 case TGSI_TOKEN_TYPE_INSTRUCTION:
180 assert( labels->count < 128 );
181
182 labels->labels[labels->count][0] = instno;
183 labels->labels[labels->count][1] = pointer;
184 labels->count++;
185
186 /* save expanded instruction */
187 if (numInstructions == maxInstructions) {
188 maxInstructions += 10;
189 instructions = realloc(instructions,
190 maxInstructions
191 * sizeof(struct tgsi_full_instruction));
192 }
193 memcpy(instructions + numInstructions,
194 &parse.FullToken.FullInstruction,
195 sizeof(instructions[0]));
196 numInstructions++;
197 break;
198
199 default:
200 assert( 0 );
201 }
202 }
203 tgsi_parse_free (&parse);
204
205 if (mach->Declarations) {
206 free(mach->Declarations);
207 }
208 mach->Declarations = declarations;
209 mach->NumDeclarations = numDeclarations;
210
211 if (mach->Instructions) {
212 free(mach->Instructions);
213 }
214 mach->Instructions = instructions;
215 mach->NumInstructions = numInstructions;
216 }
217
218
219 /**
220 * Initialize machine state by expanding tokens to full instructions,
221 * allocating temporary storage, setting up constants, etc.
222 * After this, we can call tgsi_exec_machine_run() many times.
223 */
224 void
225 tgsi_exec_machine_init(
226 struct tgsi_exec_machine *mach,
227 const struct tgsi_token *tokens,
228 GLuint numSamplers,
229 struct tgsi_sampler *samplers)
230 {
231 GLuint i, k;
232 struct tgsi_parse_context parse;
233
234 #if 0
235 tgsi_dump(tokens, 0);
236 #endif
237
238 mach->Tokens = tokens;
239
240 mach->Samplers = samplers;
241
242 k = tgsi_parse_init (&parse, mach->Tokens);
243 if (k != TGSI_PARSE_OK) {
244 printf("Problem parsing!\n");
245 return;
246 }
247
248 mach->Processor = parse.FullHeader.Processor.Processor;
249 tgsi_parse_free (&parse);
250
251 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
252 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
253
254 /* Setup constants. */
255 for( i = 0; i < 4; i++ ) {
256 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
257 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
258 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
259 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
260 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
261 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
262 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
263 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
264 }
265
266 tgsi_exec_prepare( mach );
267 }
268
269
270 static void
271 micro_abs(
272 union tgsi_exec_channel *dst,
273 const union tgsi_exec_channel *src )
274 {
275 dst->f[0] = (GLfloat) fabs( (GLdouble) src->f[0] );
276 dst->f[1] = (GLfloat) fabs( (GLdouble) src->f[1] );
277 dst->f[2] = (GLfloat) fabs( (GLdouble) src->f[2] );
278 dst->f[3] = (GLfloat) fabs( (GLdouble) src->f[3] );
279 }
280
281 static void
282 micro_add(
283 union tgsi_exec_channel *dst,
284 const union tgsi_exec_channel *src0,
285 const union tgsi_exec_channel *src1 )
286 {
287 dst->f[0] = src0->f[0] + src1->f[0];
288 dst->f[1] = src0->f[1] + src1->f[1];
289 dst->f[2] = src0->f[2] + src1->f[2];
290 dst->f[3] = src0->f[3] + src1->f[3];
291 }
292
293 static void
294 micro_iadd(
295 union tgsi_exec_channel *dst,
296 const union tgsi_exec_channel *src0,
297 const union tgsi_exec_channel *src1 )
298 {
299 dst->i[0] = src0->i[0] + src1->i[0];
300 dst->i[1] = src0->i[1] + src1->i[1];
301 dst->i[2] = src0->i[2] + src1->i[2];
302 dst->i[3] = src0->i[3] + src1->i[3];
303 }
304
305 static void
306 micro_and(
307 union tgsi_exec_channel *dst,
308 const union tgsi_exec_channel *src0,
309 const union tgsi_exec_channel *src1 )
310 {
311 dst->u[0] = src0->u[0] & src1->u[0];
312 dst->u[1] = src0->u[1] & src1->u[1];
313 dst->u[2] = src0->u[2] & src1->u[2];
314 dst->u[3] = src0->u[3] & src1->u[3];
315 }
316
317 static void
318 micro_ceil(
319 union tgsi_exec_channel *dst,
320 const union tgsi_exec_channel *src )
321 {
322 dst->f[0] = (GLfloat) ceil( (GLdouble) src->f[0] );
323 dst->f[1] = (GLfloat) ceil( (GLdouble) src->f[1] );
324 dst->f[2] = (GLfloat) ceil( (GLdouble) src->f[2] );
325 dst->f[3] = (GLfloat) ceil( (GLdouble) src->f[3] );
326 }
327
328 static void
329 micro_cos(
330 union tgsi_exec_channel *dst,
331 const union tgsi_exec_channel *src )
332 {
333 dst->f[0] = (GLfloat) cos( (GLdouble) src->f[0] );
334 dst->f[1] = (GLfloat) cos( (GLdouble) src->f[1] );
335 dst->f[2] = (GLfloat) cos( (GLdouble) src->f[2] );
336 dst->f[3] = (GLfloat) cos( (GLdouble) src->f[3] );
337 }
338
339 static void
340 micro_ddx(
341 union tgsi_exec_channel *dst,
342 const union tgsi_exec_channel *src )
343 {
344 dst->f[0] =
345 dst->f[1] =
346 dst->f[2] =
347 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
348 }
349
350 static void
351 micro_ddy(
352 union tgsi_exec_channel *dst,
353 const union tgsi_exec_channel *src )
354 {
355 dst->f[0] =
356 dst->f[1] =
357 dst->f[2] =
358 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
359 }
360
361 static void
362 micro_div(
363 union tgsi_exec_channel *dst,
364 const union tgsi_exec_channel *src0,
365 const union tgsi_exec_channel *src1 )
366 {
367 dst->f[0] = src0->f[0] / src1->f[0];
368 dst->f[1] = src0->f[1] / src1->f[1];
369 dst->f[2] = src0->f[2] / src1->f[2];
370 dst->f[3] = src0->f[3] / src1->f[3];
371 }
372
373 static void
374 micro_udiv(
375 union tgsi_exec_channel *dst,
376 const union tgsi_exec_channel *src0,
377 const union tgsi_exec_channel *src1 )
378 {
379 dst->u[0] = src0->u[0] / src1->u[0];
380 dst->u[1] = src0->u[1] / src1->u[1];
381 dst->u[2] = src0->u[2] / src1->u[2];
382 dst->u[3] = src0->u[3] / src1->u[3];
383 }
384
385 static void
386 micro_eq(
387 union tgsi_exec_channel *dst,
388 const union tgsi_exec_channel *src0,
389 const union tgsi_exec_channel *src1,
390 const union tgsi_exec_channel *src2,
391 const union tgsi_exec_channel *src3 )
392 {
393 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
394 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
395 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
396 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
397 }
398
399 static void
400 micro_ieq(
401 union tgsi_exec_channel *dst,
402 const union tgsi_exec_channel *src0,
403 const union tgsi_exec_channel *src1,
404 const union tgsi_exec_channel *src2,
405 const union tgsi_exec_channel *src3 )
406 {
407 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
408 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
409 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
410 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
411 }
412
413 static void
414 micro_exp2(
415 union tgsi_exec_channel *dst,
416 const union tgsi_exec_channel *src)
417 {
418 dst->f[0] = (GLfloat) pow( 2.0, (GLdouble) src->f[0] );
419 dst->f[1] = (GLfloat) pow( 2.0, (GLdouble) src->f[1] );
420 dst->f[2] = (GLfloat) pow( 2.0, (GLdouble) src->f[2] );
421 dst->f[3] = (GLfloat) pow( 2.0, (GLdouble) src->f[3] );
422 }
423
424 static void
425 micro_f2it(
426 union tgsi_exec_channel *dst,
427 const union tgsi_exec_channel *src )
428 {
429 dst->i[0] = (GLint) src->f[0];
430 dst->i[1] = (GLint) src->f[1];
431 dst->i[2] = (GLint) src->f[2];
432 dst->i[3] = (GLint) src->f[3];
433 }
434
435 static void
436 micro_f2ut(
437 union tgsi_exec_channel *dst,
438 const union tgsi_exec_channel *src )
439 {
440 dst->u[0] = (GLuint) src->f[0];
441 dst->u[1] = (GLuint) src->f[1];
442 dst->u[2] = (GLuint) src->f[2];
443 dst->u[3] = (GLuint) src->f[3];
444 }
445
446 static void
447 micro_flr(
448 union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src )
450 {
451 dst->f[0] = (GLfloat) floor( (GLdouble) src->f[0] );
452 dst->f[1] = (GLfloat) floor( (GLdouble) src->f[1] );
453 dst->f[2] = (GLfloat) floor( (GLdouble) src->f[2] );
454 dst->f[3] = (GLfloat) floor( (GLdouble) src->f[3] );
455 }
456
457 static void
458 micro_frc(
459 union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src )
461 {
462 dst->f[0] = src->f[0] - (GLfloat) floor( (GLdouble) src->f[0] );
463 dst->f[1] = src->f[1] - (GLfloat) floor( (GLdouble) src->f[1] );
464 dst->f[2] = src->f[2] - (GLfloat) floor( (GLdouble) src->f[2] );
465 dst->f[3] = src->f[3] - (GLfloat) floor( (GLdouble) src->f[3] );
466 }
467
468 static void
469 micro_ge(
470 union tgsi_exec_channel *dst,
471 const union tgsi_exec_channel *src0,
472 const union tgsi_exec_channel *src1,
473 const union tgsi_exec_channel *src2,
474 const union tgsi_exec_channel *src3 )
475 {
476 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
477 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
478 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
479 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
480 }
481
482 static void
483 micro_i2f(
484 union tgsi_exec_channel *dst,
485 const union tgsi_exec_channel *src )
486 {
487 dst->f[0] = (GLfloat) src->i[0];
488 dst->f[1] = (GLfloat) src->i[1];
489 dst->f[2] = (GLfloat) src->i[2];
490 dst->f[3] = (GLfloat) src->i[3];
491 }
492
493 static void
494 micro_lg2(
495 union tgsi_exec_channel *dst,
496 const union tgsi_exec_channel *src )
497 {
498 dst->f[0] = (GLfloat) log( (GLdouble) src->f[0] ) * 1.442695f;
499 dst->f[1] = (GLfloat) log( (GLdouble) src->f[1] ) * 1.442695f;
500 dst->f[2] = (GLfloat) log( (GLdouble) src->f[2] ) * 1.442695f;
501 dst->f[3] = (GLfloat) log( (GLdouble) src->f[3] ) * 1.442695f;
502 }
503
504 static void
505 micro_lt(
506 union tgsi_exec_channel *dst,
507 const union tgsi_exec_channel *src0,
508 const union tgsi_exec_channel *src1,
509 const union tgsi_exec_channel *src2,
510 const union tgsi_exec_channel *src3 )
511 {
512 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
513 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
514 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
515 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
516 }
517
518 static void
519 micro_ilt(
520 union tgsi_exec_channel *dst,
521 const union tgsi_exec_channel *src0,
522 const union tgsi_exec_channel *src1,
523 const union tgsi_exec_channel *src2,
524 const union tgsi_exec_channel *src3 )
525 {
526 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
527 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
528 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
529 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
530 }
531
532 static void
533 micro_ult(
534 union tgsi_exec_channel *dst,
535 const union tgsi_exec_channel *src0,
536 const union tgsi_exec_channel *src1,
537 const union tgsi_exec_channel *src2,
538 const union tgsi_exec_channel *src3 )
539 {
540 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
541 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
542 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
543 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
544 }
545
546 static void
547 micro_max(
548 union tgsi_exec_channel *dst,
549 const union tgsi_exec_channel *src0,
550 const union tgsi_exec_channel *src1 )
551 {
552 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
553 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
554 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
555 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
556 }
557
558 static void
559 micro_imax(
560 union tgsi_exec_channel *dst,
561 const union tgsi_exec_channel *src0,
562 const union tgsi_exec_channel *src1 )
563 {
564 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
565 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
566 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
567 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
568 }
569
570 static void
571 micro_umax(
572 union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src0,
574 const union tgsi_exec_channel *src1 )
575 {
576 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
577 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
578 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
579 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
580 }
581
582 static void
583 micro_min(
584 union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src0,
586 const union tgsi_exec_channel *src1 )
587 {
588 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
589 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
590 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
591 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
592 }
593
594 static void
595 micro_imin(
596 union tgsi_exec_channel *dst,
597 const union tgsi_exec_channel *src0,
598 const union tgsi_exec_channel *src1 )
599 {
600 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
601 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
602 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
603 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
604 }
605
606 static void
607 micro_umin(
608 union tgsi_exec_channel *dst,
609 const union tgsi_exec_channel *src0,
610 const union tgsi_exec_channel *src1 )
611 {
612 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
613 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
614 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
615 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
616 }
617
618 static void
619 micro_umod(
620 union tgsi_exec_channel *dst,
621 const union tgsi_exec_channel *src0,
622 const union tgsi_exec_channel *src1 )
623 {
624 dst->u[0] = src0->u[0] % src1->u[0];
625 dst->u[1] = src0->u[1] % src1->u[1];
626 dst->u[2] = src0->u[2] % src1->u[2];
627 dst->u[3] = src0->u[3] % src1->u[3];
628 }
629
630 static void
631 micro_mul(
632 union tgsi_exec_channel *dst,
633 const union tgsi_exec_channel *src0,
634 const union tgsi_exec_channel *src1 )
635 {
636 dst->f[0] = src0->f[0] * src1->f[0];
637 dst->f[1] = src0->f[1] * src1->f[1];
638 dst->f[2] = src0->f[2] * src1->f[2];
639 dst->f[3] = src0->f[3] * src1->f[3];
640 }
641
642 static void
643 micro_imul(
644 union tgsi_exec_channel *dst,
645 const union tgsi_exec_channel *src0,
646 const union tgsi_exec_channel *src1 )
647 {
648 dst->i[0] = src0->i[0] * src1->i[0];
649 dst->i[1] = src0->i[1] * src1->i[1];
650 dst->i[2] = src0->i[2] * src1->i[2];
651 dst->i[3] = src0->i[3] * src1->i[3];
652 }
653
654 static void
655 micro_imul64(
656 union tgsi_exec_channel *dst0,
657 union tgsi_exec_channel *dst1,
658 const union tgsi_exec_channel *src0,
659 const union tgsi_exec_channel *src1 )
660 {
661 dst1->i[0] = src0->i[0] * src1->i[0];
662 dst1->i[1] = src0->i[1] * src1->i[1];
663 dst1->i[2] = src0->i[2] * src1->i[2];
664 dst1->i[3] = src0->i[3] * src1->i[3];
665 dst0->i[0] = 0;
666 dst0->i[1] = 0;
667 dst0->i[2] = 0;
668 dst0->i[3] = 0;
669 }
670
671 static void
672 micro_umul64(
673 union tgsi_exec_channel *dst0,
674 union tgsi_exec_channel *dst1,
675 const union tgsi_exec_channel *src0,
676 const union tgsi_exec_channel *src1 )
677 {
678 dst1->u[0] = src0->u[0] * src1->u[0];
679 dst1->u[1] = src0->u[1] * src1->u[1];
680 dst1->u[2] = src0->u[2] * src1->u[2];
681 dst1->u[3] = src0->u[3] * src1->u[3];
682 dst0->u[0] = 0;
683 dst0->u[1] = 0;
684 dst0->u[2] = 0;
685 dst0->u[3] = 0;
686 }
687
688 static void
689 micro_movc(
690 union tgsi_exec_channel *dst,
691 const union tgsi_exec_channel *src0,
692 const union tgsi_exec_channel *src1,
693 const union tgsi_exec_channel *src2 )
694 {
695 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
696 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
697 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
698 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
699 }
700
701 static void
702 micro_ne(
703 union tgsi_exec_channel *dst,
704 const union tgsi_exec_channel *src0,
705 const union tgsi_exec_channel *src1,
706 const union tgsi_exec_channel *src2,
707 const union tgsi_exec_channel *src3 )
708 {
709 dst->f[0] = src0->f[0] != src1->f[0] ? src2->f[0] : src3->f[0];
710 dst->f[1] = src0->f[1] != src1->f[1] ? src2->f[1] : src3->f[1];
711 dst->f[2] = src0->f[2] != src1->f[2] ? src2->f[2] : src3->f[2];
712 dst->f[3] = src0->f[3] != src1->f[3] ? src2->f[3] : src3->f[3];
713 }
714
715 static void
716 micro_neg(
717 union tgsi_exec_channel *dst,
718 const union tgsi_exec_channel *src )
719 {
720 dst->f[0] = -src->f[0];
721 dst->f[1] = -src->f[1];
722 dst->f[2] = -src->f[2];
723 dst->f[3] = -src->f[3];
724 }
725
726 static void
727 micro_ineg(
728 union tgsi_exec_channel *dst,
729 const union tgsi_exec_channel *src )
730 {
731 dst->i[0] = -src->i[0];
732 dst->i[1] = -src->i[1];
733 dst->i[2] = -src->i[2];
734 dst->i[3] = -src->i[3];
735 }
736
737 static void
738 micro_not(
739 union tgsi_exec_channel *dst,
740 const union tgsi_exec_channel *src )
741 {
742 dst->u[0] = ~src->u[0];
743 dst->u[1] = ~src->u[1];
744 dst->u[2] = ~src->u[2];
745 dst->u[3] = ~src->u[3];
746 }
747
748 static void
749 micro_or(
750 union tgsi_exec_channel *dst,
751 const union tgsi_exec_channel *src0,
752 const union tgsi_exec_channel *src1 )
753 {
754 dst->u[0] = src0->u[0] | src1->u[0];
755 dst->u[1] = src0->u[1] | src1->u[1];
756 dst->u[2] = src0->u[2] | src1->u[2];
757 dst->u[3] = src0->u[3] | src1->u[3];
758 }
759
760 static void
761 micro_pow(
762 union tgsi_exec_channel *dst,
763 const union tgsi_exec_channel *src0,
764 const union tgsi_exec_channel *src1 )
765 {
766 dst->f[0] = (GLfloat) pow( (GLdouble) src0->f[0], (GLdouble) src1->f[0] );
767 dst->f[1] = (GLfloat) pow( (GLdouble) src0->f[1], (GLdouble) src1->f[1] );
768 dst->f[2] = (GLfloat) pow( (GLdouble) src0->f[2], (GLdouble) src1->f[2] );
769 dst->f[3] = (GLfloat) pow( (GLdouble) src0->f[3], (GLdouble) src1->f[3] );
770 }
771
772 static void
773 micro_rnd(
774 union tgsi_exec_channel *dst,
775 const union tgsi_exec_channel *src )
776 {
777 dst->f[0] = (GLfloat) floor( (GLdouble) (src->f[0] + 0.5f) );
778 dst->f[1] = (GLfloat) floor( (GLdouble) (src->f[1] + 0.5f) );
779 dst->f[2] = (GLfloat) floor( (GLdouble) (src->f[2] + 0.5f) );
780 dst->f[3] = (GLfloat) floor( (GLdouble) (src->f[3] + 0.5f) );
781 }
782
783 static void
784 micro_shl(
785 union tgsi_exec_channel *dst,
786 const union tgsi_exec_channel *src0,
787 const union tgsi_exec_channel *src1 )
788 {
789 dst->i[0] = src0->i[0] << src1->i[0];
790 dst->i[1] = src0->i[1] << src1->i[1];
791 dst->i[2] = src0->i[2] << src1->i[2];
792 dst->i[3] = src0->i[3] << src1->i[3];
793 }
794
795 static void
796 micro_ishr(
797 union tgsi_exec_channel *dst,
798 const union tgsi_exec_channel *src0,
799 const union tgsi_exec_channel *src1 )
800 {
801 dst->i[0] = src0->i[0] >> src1->i[0];
802 dst->i[1] = src0->i[1] >> src1->i[1];
803 dst->i[2] = src0->i[2] >> src1->i[2];
804 dst->i[3] = src0->i[3] >> src1->i[3];
805 }
806
807 static void
808 micro_trunc(
809 union tgsi_exec_channel *dst,
810 const union tgsi_exec_channel *src0 )
811 {
812 dst->f[0] = (float) (int) src0->u[0];
813 dst->f[1] = (float) (int) src0->u[1];
814 dst->f[2] = (float) (int) src0->u[2];
815 dst->f[3] = (float) (int) src0->u[3];
816 }
817
818 static void
819 micro_ushr(
820 union tgsi_exec_channel *dst,
821 const union tgsi_exec_channel *src0,
822 const union tgsi_exec_channel *src1 )
823 {
824 dst->u[0] = src0->u[0] >> src1->u[0];
825 dst->u[1] = src0->u[1] >> src1->u[1];
826 dst->u[2] = src0->u[2] >> src1->u[2];
827 dst->u[3] = src0->u[3] >> src1->u[3];
828 }
829
830 static void
831 micro_sin(
832 union tgsi_exec_channel *dst,
833 const union tgsi_exec_channel *src )
834 {
835 dst->f[0] = (GLfloat) sin( (GLdouble) src->f[0] );
836 dst->f[1] = (GLfloat) sin( (GLdouble) src->f[1] );
837 dst->f[2] = (GLfloat) sin( (GLdouble) src->f[2] );
838 dst->f[3] = (GLfloat) sin( (GLdouble) src->f[3] );
839 }
840
841 static void
842 micro_sqrt( union tgsi_exec_channel *dst,
843 const union tgsi_exec_channel *src )
844 {
845 dst->f[0] = (GLfloat) sqrt( (GLdouble) src->f[0] );
846 dst->f[1] = (GLfloat) sqrt( (GLdouble) src->f[1] );
847 dst->f[2] = (GLfloat) sqrt( (GLdouble) src->f[2] );
848 dst->f[3] = (GLfloat) sqrt( (GLdouble) src->f[3] );
849 }
850
851 static void
852 micro_sub(
853 union tgsi_exec_channel *dst,
854 const union tgsi_exec_channel *src0,
855 const union tgsi_exec_channel *src1 )
856 {
857 dst->f[0] = src0->f[0] - src1->f[0];
858 dst->f[1] = src0->f[1] - src1->f[1];
859 dst->f[2] = src0->f[2] - src1->f[2];
860 dst->f[3] = src0->f[3] - src1->f[3];
861 }
862
863 static void
864 micro_u2f(
865 union tgsi_exec_channel *dst,
866 const union tgsi_exec_channel *src )
867 {
868 dst->f[0] = (GLfloat) src->u[0];
869 dst->f[1] = (GLfloat) src->u[1];
870 dst->f[2] = (GLfloat) src->u[2];
871 dst->f[3] = (GLfloat) src->u[3];
872 }
873
874 static void
875 micro_xor(
876 union tgsi_exec_channel *dst,
877 const union tgsi_exec_channel *src0,
878 const union tgsi_exec_channel *src1 )
879 {
880 dst->u[0] = src0->u[0] ^ src1->u[0];
881 dst->u[1] = src0->u[1] ^ src1->u[1];
882 dst->u[2] = src0->u[2] ^ src1->u[2];
883 dst->u[3] = src0->u[3] ^ src1->u[3];
884 }
885
886 static void
887 fetch_src_file_channel(
888 const struct tgsi_exec_machine *mach,
889 const GLuint file,
890 const GLuint swizzle,
891 const union tgsi_exec_channel *index,
892 union tgsi_exec_channel *chan )
893 {
894 switch( swizzle ) {
895 case TGSI_EXTSWIZZLE_X:
896 case TGSI_EXTSWIZZLE_Y:
897 case TGSI_EXTSWIZZLE_Z:
898 case TGSI_EXTSWIZZLE_W:
899 switch( file ) {
900 case TGSI_FILE_CONSTANT:
901 chan->f[0] = mach->Consts[index->i[0]][swizzle];
902 chan->f[1] = mach->Consts[index->i[1]][swizzle];
903 chan->f[2] = mach->Consts[index->i[2]][swizzle];
904 chan->f[3] = mach->Consts[index->i[3]][swizzle];
905 break;
906
907 case TGSI_FILE_INPUT:
908 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
909 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
910 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
911 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
912 break;
913
914 case TGSI_FILE_TEMPORARY:
915 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
916 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
917 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
918 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
919 break;
920
921 case TGSI_FILE_IMMEDIATE:
922 assert( index->i[0] < (GLint) mach->ImmLimit );
923 chan->f[0] = mach->Imms[index->i[0]][swizzle];
924 assert( index->i[1] < (GLint) mach->ImmLimit );
925 chan->f[1] = mach->Imms[index->i[1]][swizzle];
926 assert( index->i[2] < (GLint) mach->ImmLimit );
927 chan->f[2] = mach->Imms[index->i[2]][swizzle];
928 assert( index->i[3] < (GLint) mach->ImmLimit );
929 chan->f[3] = mach->Imms[index->i[3]][swizzle];
930 break;
931
932 case TGSI_FILE_ADDRESS:
933 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
934 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
935 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
936 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
937 break;
938
939 case TGSI_FILE_OUTPUT:
940 /* vertex varying/output vars can be read too */
941 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
942 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
943 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
944 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
945 break;
946
947 default:
948 assert( 0 );
949 }
950 break;
951
952 case TGSI_EXTSWIZZLE_ZERO:
953 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
954 break;
955
956 case TGSI_EXTSWIZZLE_ONE:
957 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
958 break;
959
960 default:
961 assert( 0 );
962 }
963 }
964
965 static void
966 fetch_source(
967 const struct tgsi_exec_machine *mach,
968 union tgsi_exec_channel *chan,
969 const struct tgsi_full_src_register *reg,
970 const GLuint chan_index )
971 {
972 union tgsi_exec_channel index;
973 GLuint swizzle;
974
975 index.i[0] =
976 index.i[1] =
977 index.i[2] =
978 index.i[3] = reg->SrcRegister.Index;
979
980 if (reg->SrcRegister.Indirect) {
981 union tgsi_exec_channel index2;
982 union tgsi_exec_channel indir_index;
983
984 index2.i[0] =
985 index2.i[1] =
986 index2.i[2] =
987 index2.i[3] = reg->SrcRegisterInd.Index;
988
989 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
990 fetch_src_file_channel(
991 mach,
992 reg->SrcRegisterInd.File,
993 swizzle,
994 &index2,
995 &indir_index );
996
997 index.i[0] += indir_index.i[0];
998 index.i[1] += indir_index.i[1];
999 index.i[2] += indir_index.i[2];
1000 index.i[3] += indir_index.i[3];
1001 }
1002
1003 if( reg->SrcRegister.Dimension ) {
1004 switch( reg->SrcRegister.File ) {
1005 case TGSI_FILE_INPUT:
1006 index.i[0] *= 17;
1007 index.i[1] *= 17;
1008 index.i[2] *= 17;
1009 index.i[3] *= 17;
1010 break;
1011 case TGSI_FILE_CONSTANT:
1012 index.i[0] *= 4096;
1013 index.i[1] *= 4096;
1014 index.i[2] *= 4096;
1015 index.i[3] *= 4096;
1016 break;
1017 default:
1018 assert( 0 );
1019 }
1020
1021 index.i[0] += reg->SrcRegisterDim.Index;
1022 index.i[1] += reg->SrcRegisterDim.Index;
1023 index.i[2] += reg->SrcRegisterDim.Index;
1024 index.i[3] += reg->SrcRegisterDim.Index;
1025
1026 if (reg->SrcRegisterDim.Indirect) {
1027 union tgsi_exec_channel index2;
1028 union tgsi_exec_channel indir_index;
1029
1030 index2.i[0] =
1031 index2.i[1] =
1032 index2.i[2] =
1033 index2.i[3] = reg->SrcRegisterDimInd.Index;
1034
1035 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1036 fetch_src_file_channel(
1037 mach,
1038 reg->SrcRegisterDimInd.File,
1039 swizzle,
1040 &index2,
1041 &indir_index );
1042
1043 index.i[0] += indir_index.i[0];
1044 index.i[1] += indir_index.i[1];
1045 index.i[2] += indir_index.i[2];
1046 index.i[3] += indir_index.i[3];
1047 }
1048 }
1049
1050 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1051 fetch_src_file_channel(
1052 mach,
1053 reg->SrcRegister.File,
1054 swizzle,
1055 &index,
1056 chan );
1057
1058 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1059 case TGSI_UTIL_SIGN_CLEAR:
1060 micro_abs( chan, chan );
1061 break;
1062
1063 case TGSI_UTIL_SIGN_SET:
1064 micro_abs( chan, chan );
1065 micro_neg( chan, chan );
1066 break;
1067
1068 case TGSI_UTIL_SIGN_TOGGLE:
1069 micro_neg( chan, chan );
1070 break;
1071
1072 case TGSI_UTIL_SIGN_KEEP:
1073 break;
1074 }
1075 }
1076
1077 static void
1078 store_dest(
1079 struct tgsi_exec_machine *mach,
1080 const union tgsi_exec_channel *chan,
1081 const struct tgsi_full_dst_register *reg,
1082 const struct tgsi_full_instruction *inst,
1083 GLuint chan_index )
1084 {
1085 union tgsi_exec_channel *dst;
1086
1087 switch( reg->DstRegister.File ) {
1088 case TGSI_FILE_NULL:
1089 return;
1090
1091 case TGSI_FILE_OUTPUT:
1092 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->DstRegister.Index].xyzw[chan_index];
1093 break;
1094
1095 case TGSI_FILE_TEMPORARY:
1096 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1097 break;
1098
1099 case TGSI_FILE_ADDRESS:
1100 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1101 break;
1102
1103 default:
1104 assert( 0 );
1105 return;
1106 }
1107
1108 switch (inst->Instruction.Saturate)
1109 {
1110 case TGSI_SAT_NONE:
1111 #if 0
1112 *dst = *chan;
1113 #else
1114 if (mach->ExecMask & 0x1)
1115 dst->i[0] = chan->i[0];
1116 if (mach->ExecMask & 0x2)
1117 dst->i[1] = chan->i[1];
1118 if (mach->ExecMask & 0x4)
1119 dst->i[2] = chan->i[2];
1120 if (mach->ExecMask & 0x8)
1121 dst->i[3] = chan->i[3];
1122 #endif
1123 break;
1124
1125 case TGSI_SAT_ZERO_ONE:
1126 micro_lt( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], chan );
1127 micro_lt( dst, chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1128 break;
1129
1130 case TGSI_SAT_MINUS_PLUS_ONE:
1131 assert( 0 );
1132 break;
1133
1134 default:
1135 assert( 0 );
1136 }
1137 }
1138
1139 #define FETCH(VAL,INDEX,CHAN)\
1140 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1141
1142 #define STORE(VAL,INDEX,CHAN)\
1143 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1144
1145
1146 /**
1147 * Execute ARB-style KIL which is predicated by a src register.
1148 * Kill fragment if any of the four values is less than zero.
1149 */
1150 static void
1151 exec_kilp(struct tgsi_exec_machine *mach,
1152 const struct tgsi_full_instruction *inst)
1153 {
1154 GLuint uniquemask;
1155 GLuint chan_index;
1156 GLuint kilmask = 0;
1157 union tgsi_exec_channel r[1];
1158
1159 /* This mask stores component bits that were already tested. Note that
1160 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1161 * tested. */
1162 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1163
1164 for (chan_index = 0; chan_index < 4; chan_index++)
1165 {
1166 GLuint swizzle;
1167 GLuint i;
1168
1169 /* unswizzle channel */
1170 swizzle = tgsi_util_get_full_src_register_extswizzle (
1171 &inst->FullSrcRegisters[0],
1172 chan_index);
1173
1174 /* check if the component has not been already tested */
1175 if (uniquemask & (1 << swizzle))
1176 continue;
1177 uniquemask |= 1 << swizzle;
1178
1179 FETCH(&r[0], 0, chan_index);
1180 for (i = 0; i < 4; i++)
1181 if (r[0].f[i] < 0.0f)
1182 kilmask |= 1 << (i * 4);
1183 }
1184
1185 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1186 }
1187
1188
1189 static void
1190 exec_kil(struct tgsi_exec_machine *mach,
1191 const struct tgsi_full_instruction *inst)
1192 {
1193 /* for enabled ExecMask bits, set the killed bit */
1194 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1195 }
1196
1197
1198
1199 /*
1200 * Fetch a texel using STR texture coordinates.
1201 */
1202 static void
1203 fetch_texel( struct tgsi_sampler *sampler,
1204 const union tgsi_exec_channel *s,
1205 const union tgsi_exec_channel *t,
1206 const union tgsi_exec_channel *p,
1207 float lodbias,
1208 union tgsi_exec_channel *r,
1209 union tgsi_exec_channel *g,
1210 union tgsi_exec_channel *b,
1211 union tgsi_exec_channel *a )
1212 {
1213 GLuint j;
1214 GLfloat rgba[NUM_CHANNELS][QUAD_SIZE];
1215
1216 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1217
1218 for (j = 0; j < 4; j++) {
1219 r->f[j] = rgba[0][j];
1220 g->f[j] = rgba[1][j];
1221 b->f[j] = rgba[2][j];
1222 a->f[j] = rgba[3][j];
1223 }
1224 }
1225
1226 static void
1227 constant_interpolation(
1228 struct tgsi_exec_machine *mach,
1229 unsigned attrib,
1230 unsigned chan )
1231 {
1232 unsigned i;
1233
1234 for( i = 0; i < QUAD_SIZE; i++ ) {
1235 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1236 }
1237 }
1238
1239 static void
1240 linear_interpolation(
1241 struct tgsi_exec_machine *mach,
1242 unsigned attrib,
1243 unsigned chan )
1244 {
1245 unsigned i;
1246
1247 for( i = 0; i < QUAD_SIZE; i++ ) {
1248 const float x = mach->Inputs[0].xyzw[0].f[i];
1249 const float y = mach->Inputs[0].xyzw[1].f[i];
1250
1251 mach->Inputs[attrib].xyzw[chan].f[i] =
1252 mach->InterpCoefs[attrib].a0[chan] +
1253 mach->InterpCoefs[attrib].dadx[chan] * x +
1254 mach->InterpCoefs[attrib].dady[chan] * y;
1255 }
1256 }
1257
1258 static void
1259 perspective_interpolation(
1260 struct tgsi_exec_machine *mach,
1261 unsigned attrib,
1262 unsigned chan )
1263 {
1264 unsigned i;
1265
1266 for( i = 0; i < QUAD_SIZE; i++ ) {
1267 const float x = mach->Inputs[0].xyzw[0].f[i];
1268 const float y = mach->Inputs[0].xyzw[1].f[i];
1269 // WPOS.w here is really 1/w
1270 const float w = 1.0f / mach->Inputs[0].xyzw[3].f[i];
1271 assert(mach->Inputs[0].xyzw[3].f[i] != 0.0);
1272
1273 mach->Inputs[attrib].xyzw[chan].f[i] =
1274 (mach->InterpCoefs[attrib].a0[chan] +
1275 mach->InterpCoefs[attrib].dadx[chan] * x +
1276 mach->InterpCoefs[attrib].dady[chan] * y) * w;
1277 }
1278 }
1279
1280
1281 typedef void (* interpolation_func)(
1282 struct tgsi_exec_machine *mach,
1283 unsigned attrib,
1284 unsigned chan );
1285
1286 static void
1287 exec_declaration(
1288 struct tgsi_exec_machine *mach,
1289 const struct tgsi_full_declaration *decl )
1290 {
1291 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1292 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1293 unsigned first, last, mask;
1294 interpolation_func interp;
1295
1296 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
1297
1298 first = decl->u.DeclarationRange.First;
1299 last = decl->u.DeclarationRange.Last;
1300 mask = decl->Declaration.UsageMask;
1301
1302 /* Do not touch WPOS.xy */
1303 if( first == 0 ) {
1304 mask &= ~TGSI_WRITEMASK_XY;
1305 if( mask == TGSI_WRITEMASK_NONE ) {
1306 first++;
1307 if( first > last ) {
1308 return;
1309 }
1310 }
1311 }
1312
1313 switch( decl->Interpolation.Interpolate ) {
1314 case TGSI_INTERPOLATE_CONSTANT:
1315 interp = constant_interpolation;
1316 break;
1317
1318 case TGSI_INTERPOLATE_LINEAR:
1319 interp = linear_interpolation;
1320 break;
1321
1322 case TGSI_INTERPOLATE_PERSPECTIVE:
1323 interp = perspective_interpolation;
1324 break;
1325
1326 default:
1327 assert( 0 );
1328 }
1329
1330 if( mask == TGSI_WRITEMASK_XYZW ) {
1331 unsigned i, j;
1332
1333 for( i = first; i <= last; i++ ) {
1334 for( j = 0; j < NUM_CHANNELS; j++ ) {
1335 interp( mach, i, j );
1336 }
1337 }
1338 }
1339 else {
1340 unsigned i, j;
1341
1342 for( j = 0; j < NUM_CHANNELS; j++ ) {
1343 if( mask & (1 << j) ) {
1344 for( i = first; i <= last; i++ ) {
1345 interp( mach, i, j );
1346 }
1347 }
1348 }
1349 }
1350 }
1351 }
1352 }
1353
1354 static void
1355 exec_instruction(
1356 struct tgsi_exec_machine *mach,
1357 const struct tgsi_full_instruction *inst,
1358 int *pc )
1359 {
1360 GLuint chan_index;
1361 union tgsi_exec_channel r[8];
1362
1363 (*pc)++;
1364
1365 switch (inst->Instruction.Opcode) {
1366 case TGSI_OPCODE_ARL:
1367 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1368 FETCH( &r[0], 0, chan_index );
1369 micro_f2it( &r[0], &r[0] );
1370 STORE( &r[0], 0, chan_index );
1371 }
1372 break;
1373
1374 case TGSI_OPCODE_MOV:
1375 /* TGSI_OPCODE_SWZ */
1376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1377 FETCH( &r[0], 0, chan_index );
1378 STORE( &r[0], 0, chan_index );
1379 }
1380 break;
1381
1382 case TGSI_OPCODE_LIT:
1383 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1384 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1385 }
1386
1387 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1388 FETCH( &r[0], 0, CHAN_X );
1389 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1390 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1391 STORE( &r[0], 0, CHAN_Y );
1392 }
1393
1394 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1395 FETCH( &r[1], 0, CHAN_Y );
1396 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1397
1398 FETCH( &r[2], 0, CHAN_W );
1399 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1400 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1401 micro_pow( &r[1], &r[1], &r[2] );
1402 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1403 STORE( &r[0], 0, CHAN_Z );
1404 }
1405 }
1406
1407 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1408 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_RCP:
1413 /* TGSI_OPCODE_RECIP */
1414 FETCH( &r[0], 0, CHAN_X );
1415 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1416 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1417 STORE( &r[0], 0, chan_index );
1418 }
1419 break;
1420
1421 case TGSI_OPCODE_RSQ:
1422 /* TGSI_OPCODE_RECIPSQRT */
1423 FETCH( &r[0], 0, CHAN_X );
1424 micro_sqrt( &r[0], &r[0] );
1425 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1426 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1427 STORE( &r[0], 0, chan_index );
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_EXP:
1432 assert (0);
1433 break;
1434
1435 case TGSI_OPCODE_LOG:
1436 assert (0);
1437 break;
1438
1439 case TGSI_OPCODE_MUL:
1440 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1441 {
1442 FETCH(&r[0], 0, chan_index);
1443 FETCH(&r[1], 1, chan_index);
1444
1445 micro_mul( &r[0], &r[0], &r[1] );
1446
1447 STORE(&r[0], 0, chan_index);
1448 }
1449 break;
1450
1451 case TGSI_OPCODE_ADD:
1452 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1453 FETCH( &r[0], 0, chan_index );
1454 FETCH( &r[1], 1, chan_index );
1455 micro_add( &r[0], &r[0], &r[1] );
1456 STORE( &r[0], 0, chan_index );
1457 }
1458 break;
1459
1460 case TGSI_OPCODE_DP3:
1461 /* TGSI_OPCODE_DOT3 */
1462 FETCH( &r[0], 0, CHAN_X );
1463 FETCH( &r[1], 1, CHAN_X );
1464 micro_mul( &r[0], &r[0], &r[1] );
1465
1466 FETCH( &r[1], 0, CHAN_Y );
1467 FETCH( &r[2], 1, CHAN_Y );
1468 micro_mul( &r[1], &r[1], &r[2] );
1469 micro_add( &r[0], &r[0], &r[1] );
1470
1471 FETCH( &r[1], 0, CHAN_Z );
1472 FETCH( &r[2], 1, CHAN_Z );
1473 micro_mul( &r[1], &r[1], &r[2] );
1474 micro_add( &r[0], &r[0], &r[1] );
1475
1476 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1477 STORE( &r[0], 0, chan_index );
1478 }
1479 break;
1480
1481 case TGSI_OPCODE_DP4:
1482 /* TGSI_OPCODE_DOT4 */
1483 FETCH(&r[0], 0, CHAN_X);
1484 FETCH(&r[1], 1, CHAN_X);
1485
1486 micro_mul( &r[0], &r[0], &r[1] );
1487
1488 FETCH(&r[1], 0, CHAN_Y);
1489 FETCH(&r[2], 1, CHAN_Y);
1490
1491 micro_mul( &r[1], &r[1], &r[2] );
1492 micro_add( &r[0], &r[0], &r[1] );
1493
1494 FETCH(&r[1], 0, CHAN_Z);
1495 FETCH(&r[2], 1, CHAN_Z);
1496
1497 micro_mul( &r[1], &r[1], &r[2] );
1498 micro_add( &r[0], &r[0], &r[1] );
1499
1500 FETCH(&r[1], 0, CHAN_W);
1501 FETCH(&r[2], 1, CHAN_W);
1502
1503 micro_mul( &r[1], &r[1], &r[2] );
1504 micro_add( &r[0], &r[0], &r[1] );
1505
1506 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1507 STORE( &r[0], 0, chan_index );
1508 }
1509 break;
1510
1511 case TGSI_OPCODE_DST:
1512 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1513 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1514 }
1515
1516 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1517 FETCH( &r[0], 0, CHAN_Y );
1518 FETCH( &r[1], 1, CHAN_Y);
1519 micro_mul( &r[0], &r[0], &r[1] );
1520 STORE( &r[0], 0, CHAN_Y );
1521 }
1522
1523 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1524 FETCH( &r[0], 0, CHAN_Z );
1525 STORE( &r[0], 0, CHAN_Z );
1526 }
1527
1528 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1529 FETCH( &r[0], 1, CHAN_W );
1530 STORE( &r[0], 0, CHAN_W );
1531 }
1532 break;
1533
1534 case TGSI_OPCODE_MIN:
1535 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1536 FETCH(&r[0], 0, chan_index);
1537 FETCH(&r[1], 1, chan_index);
1538
1539 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1540
1541 STORE(&r[0], 0, chan_index);
1542 }
1543 break;
1544
1545 case TGSI_OPCODE_MAX:
1546 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1547 FETCH(&r[0], 0, chan_index);
1548 FETCH(&r[1], 1, chan_index);
1549
1550 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1551
1552 STORE(&r[0], 0, chan_index );
1553 }
1554 break;
1555
1556 case TGSI_OPCODE_SLT:
1557 /* TGSI_OPCODE_SETLT */
1558 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1559 FETCH( &r[0], 0, chan_index );
1560 FETCH( &r[1], 1, chan_index );
1561 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1562 STORE( &r[0], 0, chan_index );
1563 }
1564 break;
1565
1566 case TGSI_OPCODE_SGE:
1567 /* TGSI_OPCODE_SETGE */
1568 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1569 FETCH( &r[0], 0, chan_index );
1570 FETCH( &r[1], 1, chan_index );
1571 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1572 STORE( &r[0], 0, chan_index );
1573 }
1574 break;
1575
1576 case TGSI_OPCODE_MAD:
1577 /* TGSI_OPCODE_MADD */
1578 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1579 FETCH( &r[0], 0, chan_index );
1580 FETCH( &r[1], 1, chan_index );
1581 micro_mul( &r[0], &r[0], &r[1] );
1582 FETCH( &r[1], 2, chan_index );
1583 micro_add( &r[0], &r[0], &r[1] );
1584 STORE( &r[0], 0, chan_index );
1585 }
1586 break;
1587
1588 case TGSI_OPCODE_SUB:
1589 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1590 FETCH(&r[0], 0, chan_index);
1591 FETCH(&r[1], 1, chan_index);
1592
1593 micro_sub( &r[0], &r[0], &r[1] );
1594
1595 STORE(&r[0], 0, chan_index);
1596 }
1597 break;
1598
1599 case TGSI_OPCODE_LERP:
1600 /* TGSI_OPCODE_LRP */
1601 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1602 FETCH(&r[0], 0, chan_index);
1603 FETCH(&r[1], 1, chan_index);
1604 FETCH(&r[2], 2, chan_index);
1605
1606 micro_sub( &r[1], &r[1], &r[2] );
1607 micro_mul( &r[0], &r[0], &r[1] );
1608 micro_add( &r[0], &r[0], &r[2] );
1609
1610 STORE(&r[0], 0, chan_index);
1611 }
1612 break;
1613
1614 case TGSI_OPCODE_CND:
1615 assert (0);
1616 break;
1617
1618 case TGSI_OPCODE_CND0:
1619 assert (0);
1620 break;
1621
1622 case TGSI_OPCODE_DOT2ADD:
1623 /* TGSI_OPCODE_DP2A */
1624 assert (0);
1625 break;
1626
1627 case TGSI_OPCODE_INDEX:
1628 assert (0);
1629 break;
1630
1631 case TGSI_OPCODE_NEGATE:
1632 assert (0);
1633 break;
1634
1635 case TGSI_OPCODE_FRAC:
1636 /* TGSI_OPCODE_FRC */
1637 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1638 FETCH( &r[0], 0, chan_index );
1639 micro_frc( &r[0], &r[0] );
1640 STORE( &r[0], 0, chan_index );
1641 }
1642 break;
1643
1644 case TGSI_OPCODE_CLAMP:
1645 assert (0);
1646 break;
1647
1648 case TGSI_OPCODE_FLOOR:
1649 /* TGSI_OPCODE_FLR */
1650 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1651 FETCH( &r[0], 0, chan_index );
1652 micro_flr( &r[0], &r[0] );
1653 STORE( &r[0], 0, chan_index );
1654 }
1655 break;
1656
1657 case TGSI_OPCODE_ROUND:
1658 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1659 FETCH( &r[0], 0, chan_index );
1660 micro_rnd( &r[0], &r[0] );
1661 STORE( &r[0], 0, chan_index );
1662 }
1663 break;
1664
1665 case TGSI_OPCODE_EXPBASE2:
1666 /* TGSI_OPCODE_EX2 */
1667 FETCH(&r[0], 0, CHAN_X);
1668
1669 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1670
1671 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1672 STORE( &r[0], 0, chan_index );
1673 }
1674 break;
1675
1676 case TGSI_OPCODE_LOGBASE2:
1677 /* TGSI_OPCODE_LG2 */
1678 FETCH( &r[0], 0, CHAN_X );
1679 micro_lg2( &r[0], &r[0] );
1680 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1681 STORE( &r[0], 0, chan_index );
1682 }
1683 break;
1684
1685 case TGSI_OPCODE_POWER:
1686 /* TGSI_OPCODE_POW */
1687 FETCH(&r[0], 0, CHAN_X);
1688 FETCH(&r[1], 1, CHAN_X);
1689
1690 micro_pow( &r[0], &r[0], &r[1] );
1691
1692 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1693 STORE( &r[0], 0, chan_index );
1694 }
1695 break;
1696
1697 case TGSI_OPCODE_CROSSPRODUCT:
1698 /* TGSI_OPCODE_XPD */
1699 FETCH(&r[0], 0, CHAN_Y);
1700 FETCH(&r[1], 1, CHAN_Z);
1701
1702 micro_mul( &r[2], &r[0], &r[1] );
1703
1704 FETCH(&r[3], 0, CHAN_Z);
1705 FETCH(&r[4], 1, CHAN_Y);
1706
1707 micro_mul( &r[5], &r[3], &r[4] );
1708 micro_sub( &r[2], &r[2], &r[5] );
1709
1710 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1711 STORE( &r[2], 0, CHAN_X );
1712 }
1713
1714 FETCH(&r[2], 1, CHAN_X);
1715
1716 micro_mul( &r[3], &r[3], &r[2] );
1717
1718 FETCH(&r[5], 0, CHAN_X);
1719
1720 micro_mul( &r[1], &r[1], &r[5] );
1721 micro_sub( &r[3], &r[3], &r[1] );
1722
1723 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1724 STORE( &r[3], 0, CHAN_Y );
1725 }
1726
1727 micro_mul( &r[5], &r[5], &r[4] );
1728 micro_mul( &r[0], &r[0], &r[2] );
1729 micro_sub( &r[5], &r[5], &r[0] );
1730
1731 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1732 STORE( &r[5], 0, CHAN_Z );
1733 }
1734
1735 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1736 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1737 }
1738 break;
1739
1740 case TGSI_OPCODE_MULTIPLYMATRIX:
1741 assert (0);
1742 break;
1743
1744 case TGSI_OPCODE_ABS:
1745 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1746 FETCH(&r[0], 0, chan_index);
1747
1748 micro_abs( &r[0], &r[0] );
1749
1750 STORE(&r[0], 0, chan_index);
1751 }
1752 break;
1753
1754 case TGSI_OPCODE_RCC:
1755 assert (0);
1756 break;
1757
1758 case TGSI_OPCODE_DPH:
1759 FETCH(&r[0], 0, CHAN_X);
1760 FETCH(&r[1], 1, CHAN_X);
1761
1762 micro_mul( &r[0], &r[0], &r[1] );
1763
1764 FETCH(&r[1], 0, CHAN_Y);
1765 FETCH(&r[2], 1, CHAN_Y);
1766
1767 micro_mul( &r[1], &r[1], &r[2] );
1768 micro_add( &r[0], &r[0], &r[1] );
1769
1770 FETCH(&r[1], 0, CHAN_Z);
1771 FETCH(&r[2], 1, CHAN_Z);
1772
1773 micro_mul( &r[1], &r[1], &r[2] );
1774 micro_add( &r[0], &r[0], &r[1] );
1775
1776 FETCH(&r[1], 1, CHAN_W);
1777
1778 micro_add( &r[0], &r[0], &r[1] );
1779
1780 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1781 STORE( &r[0], 0, chan_index );
1782 }
1783 break;
1784
1785 case TGSI_OPCODE_COS:
1786 FETCH(&r[0], 0, CHAN_X);
1787
1788 micro_cos( &r[0], &r[0] );
1789
1790 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1791 STORE( &r[0], 0, chan_index );
1792 }
1793 break;
1794
1795 case TGSI_OPCODE_DDX:
1796 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1797 FETCH( &r[0], 0, chan_index );
1798 micro_ddx( &r[0], &r[0] );
1799 STORE( &r[0], 0, chan_index );
1800 }
1801 break;
1802
1803 case TGSI_OPCODE_DDY:
1804 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1805 FETCH( &r[0], 0, chan_index );
1806 micro_ddy( &r[0], &r[0] );
1807 STORE( &r[0], 0, chan_index );
1808 }
1809 break;
1810
1811 case TGSI_OPCODE_KILP:
1812 exec_kilp (mach, inst);
1813 break;
1814
1815 case TGSI_OPCODE_KIL:
1816 exec_kil (mach, inst);
1817 break;
1818
1819 case TGSI_OPCODE_PK2H:
1820 assert (0);
1821 break;
1822
1823 case TGSI_OPCODE_PK2US:
1824 assert (0);
1825 break;
1826
1827 case TGSI_OPCODE_PK4B:
1828 assert (0);
1829 break;
1830
1831 case TGSI_OPCODE_PK4UB:
1832 assert (0);
1833 break;
1834
1835 case TGSI_OPCODE_RFL:
1836 assert (0);
1837 break;
1838
1839 case TGSI_OPCODE_SEQ:
1840 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1841 FETCH( &r[0], 0, chan_index );
1842 FETCH( &r[1], 1, chan_index );
1843 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1844 STORE( &r[0], 0, chan_index );
1845 }
1846 break;
1847
1848 case TGSI_OPCODE_SFL:
1849 assert (0);
1850 break;
1851
1852 case TGSI_OPCODE_SGT:
1853 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1854 FETCH( &r[0], 0, chan_index );
1855 FETCH( &r[1], 1, chan_index );
1856 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1857 STORE( &r[0], 0, chan_index );
1858 }
1859 break;
1860
1861 case TGSI_OPCODE_SIN:
1862 FETCH( &r[0], 0, CHAN_X );
1863 micro_sin( &r[0], &r[0] );
1864 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1865 STORE( &r[0], 0, chan_index );
1866 }
1867 break;
1868
1869 case TGSI_OPCODE_SLE:
1870 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1871 FETCH( &r[0], 0, chan_index );
1872 FETCH( &r[1], 1, chan_index );
1873 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1874 STORE( &r[0], 0, chan_index );
1875 }
1876 break;
1877
1878 case TGSI_OPCODE_SNE:
1879 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1880 FETCH( &r[0], 0, chan_index );
1881 FETCH( &r[1], 1, chan_index );
1882 micro_ne( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1883 STORE( &r[0], 0, chan_index );
1884 }
1885 break;
1886
1887 case TGSI_OPCODE_STR:
1888 assert (0);
1889 break;
1890
1891 case TGSI_OPCODE_TEX:
1892 {
1893 const GLuint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1894 switch (inst->InstructionExtTexture.Texture) {
1895 case TGSI_TEXTURE_1D:
1896
1897 FETCH(&r[0], 0, CHAN_X);
1898
1899 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1900 case TGSI_EXTSWIZZLE_W:
1901 FETCH(&r[1], 0, CHAN_W);
1902 micro_div( &r[0], &r[0], &r[1] );
1903 break;
1904
1905 case TGSI_EXTSWIZZLE_ONE:
1906 break;
1907
1908 default:
1909 assert (0);
1910 }
1911
1912 fetch_texel(&mach->Samplers[unit],
1913 &r[0], NULL, NULL, 0.0,
1914 &r[0], &r[1], &r[2], &r[3]);
1915 break;
1916
1917 case TGSI_TEXTURE_2D:
1918 case TGSI_TEXTURE_RECT:
1919
1920 FETCH(&r[0], 0, CHAN_X);
1921 FETCH(&r[1], 0, CHAN_Y);
1922
1923 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1924 case TGSI_EXTSWIZZLE_W:
1925 FETCH(&r[2], 0, CHAN_W);
1926 micro_div( &r[0], &r[0], &r[2] );
1927 micro_div( &r[1], &r[1], &r[2] );
1928 break;
1929
1930 case TGSI_EXTSWIZZLE_ONE:
1931 break;
1932
1933 default:
1934 assert (0);
1935 }
1936
1937 fetch_texel(&mach->Samplers[unit],
1938 &r[0], &r[1], NULL, 0.0,
1939 &r[0], &r[1], &r[2], &r[3]);
1940 break;
1941
1942 case TGSI_TEXTURE_3D:
1943 case TGSI_TEXTURE_CUBE:
1944
1945 FETCH(&r[0], 0, CHAN_X);
1946 FETCH(&r[1], 0, CHAN_Y);
1947 FETCH(&r[2], 0, CHAN_Z);
1948
1949 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1950 case TGSI_EXTSWIZZLE_W:
1951 FETCH(&r[3], 0, CHAN_W);
1952 micro_div( &r[0], &r[0], &r[3] );
1953 micro_div( &r[1], &r[1], &r[3] );
1954 micro_div( &r[2], &r[2], &r[3] );
1955 break;
1956
1957 case TGSI_EXTSWIZZLE_ONE:
1958 break;
1959
1960 default:
1961 assert (0);
1962 }
1963
1964 fetch_texel(&mach->Samplers[unit],
1965 &r[0], &r[1], &r[2], 0.0,
1966 &r[0], &r[1], &r[2], &r[3]);
1967 break;
1968
1969 default:
1970 assert (0);
1971 }
1972
1973 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1974 STORE( &r[chan_index], 0, chan_index );
1975 }
1976 }
1977 break;
1978
1979 case TGSI_OPCODE_TXD:
1980 assert (0);
1981 break;
1982
1983 case TGSI_OPCODE_UP2H:
1984 assert (0);
1985 break;
1986
1987 case TGSI_OPCODE_UP2US:
1988 assert (0);
1989 break;
1990
1991 case TGSI_OPCODE_UP4B:
1992 assert (0);
1993 break;
1994
1995 case TGSI_OPCODE_UP4UB:
1996 assert (0);
1997 break;
1998
1999 case TGSI_OPCODE_X2D:
2000 assert (0);
2001 break;
2002
2003 case TGSI_OPCODE_ARA:
2004 assert (0);
2005 break;
2006
2007 case TGSI_OPCODE_ARR:
2008 assert (0);
2009 break;
2010
2011 case TGSI_OPCODE_BRA:
2012 assert (0);
2013 break;
2014
2015 case TGSI_OPCODE_CAL:
2016 /* skip the call if no execution channels are enabled */
2017 if (mach->ExecMask) {
2018 /* do the call */
2019
2020 /* push Cond, Loop, Cont stacks */
2021 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2022 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2023 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2024 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2025 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2026 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2027
2028 /* note that PC was already incremented above */
2029 mach->CallStack[mach->CallStackTop++] = *pc;
2030 *pc = inst->InstructionExtLabel.Label;
2031 }
2032 break;
2033
2034 case TGSI_OPCODE_RET:
2035 /* XXX examine ExecMask to determine if we should _really_ return */
2036 /* pop Cond, Loop, Cont stacks */
2037 assert(mach->CondStackTop > 0);
2038 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2039 assert(mach->LoopStackTop > 0);
2040 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2041 assert(mach->ContStackTop > 0);
2042 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2043
2044 assert(mach->CallStackTop >= 0);
2045 if (mach->CallStackTop == 0) {
2046 /* XXX error? */
2047 return;
2048 }
2049 *pc = mach->CallStack[--mach->CallStackTop];
2050 break;
2051
2052 case TGSI_OPCODE_SSG:
2053 assert (0);
2054 break;
2055
2056 case TGSI_OPCODE_CMP:
2057 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2058 FETCH(&r[0], 0, chan_index);
2059 FETCH(&r[1], 1, chan_index);
2060 FETCH(&r[2], 2, chan_index);
2061
2062 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2063
2064 STORE(&r[0], 0, chan_index);
2065 }
2066 break;
2067
2068 case TGSI_OPCODE_SCS:
2069 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2070 FETCH( &r[0], 0, CHAN_X );
2071 }
2072 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2073 micro_cos( &r[1], &r[0] );
2074 STORE( &r[1], 0, CHAN_X );
2075 }
2076 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2077 micro_sin( &r[1], &r[0] );
2078 STORE( &r[1], 0, CHAN_Y );
2079 }
2080 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2081 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2082 }
2083 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2084 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2085 }
2086 break;
2087
2088 case TGSI_OPCODE_TXB:
2089 assert (0);
2090 break;
2091
2092 case TGSI_OPCODE_NRM:
2093 assert (0);
2094 break;
2095
2096 case TGSI_OPCODE_DIV:
2097 assert( 0 );
2098 break;
2099
2100 case TGSI_OPCODE_DP2:
2101 FETCH( &r[0], 0, CHAN_X );
2102 FETCH( &r[1], 1, CHAN_X );
2103 micro_mul( &r[0], &r[0], &r[1] );
2104
2105 FETCH( &r[1], 0, CHAN_Y );
2106 FETCH( &r[2], 1, CHAN_Y );
2107 micro_mul( &r[1], &r[1], &r[2] );
2108 micro_add( &r[0], &r[0], &r[1] );
2109
2110 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2111 STORE( &r[0], 0, chan_index );
2112 }
2113 break;
2114
2115 case TGSI_OPCODE_TXL:
2116 assert (0);
2117 break;
2118
2119 case TGSI_OPCODE_IF:
2120 /* push CondMask */
2121 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2122 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2123 FETCH( &r[0], 0, CHAN_X );
2124 /* update CondMask */
2125 if( ! r[0].u[0] ) {
2126 mach->CondMask &= ~0x1;
2127 }
2128 if( ! r[0].u[1] ) {
2129 mach->CondMask &= ~0x2;
2130 }
2131 if( ! r[0].u[2] ) {
2132 mach->CondMask &= ~0x4;
2133 }
2134 if( ! r[0].u[3] ) {
2135 mach->CondMask &= ~0x8;
2136 }
2137 UPDATE_EXEC_MASK(mach);
2138 /* Todo: If CondMask==0, jump to ELSE */
2139 break;
2140
2141 case TGSI_OPCODE_ELSE:
2142 /* invert CondMask wrt previous mask */
2143 {
2144 uint prevMask;
2145 assert(mach->CondStackTop > 0);
2146 prevMask = mach->CondStack[mach->CondStackTop - 1];
2147 mach->CondMask = ~mach->CondMask & prevMask;
2148 UPDATE_EXEC_MASK(mach);
2149 /* Todo: If CondMask==0, jump to ENDIF */
2150 }
2151 break;
2152
2153 case TGSI_OPCODE_ENDIF:
2154 /* pop CondMask */
2155 assert(mach->CondStackTop > 0);
2156 mach->CondMask = mach->CondStack[--mach->CondStackTop];
2157 UPDATE_EXEC_MASK(mach);
2158 break;
2159
2160 case TGSI_OPCODE_END:
2161 /* halt execution */
2162 *pc = -1;
2163 break;
2164
2165 case TGSI_OPCODE_REP:
2166 assert (0);
2167 break;
2168
2169 case TGSI_OPCODE_ENDREP:
2170 assert (0);
2171 break;
2172
2173 case TGSI_OPCODE_PUSHA:
2174 assert (0);
2175 break;
2176
2177 case TGSI_OPCODE_POPA:
2178 assert (0);
2179 break;
2180
2181 case TGSI_OPCODE_CEIL:
2182 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2183 FETCH( &r[0], 0, chan_index );
2184 micro_ceil( &r[0], &r[0] );
2185 STORE( &r[0], 0, chan_index );
2186 }
2187 break;
2188
2189 case TGSI_OPCODE_I2F:
2190 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2191 FETCH( &r[0], 0, chan_index );
2192 micro_i2f( &r[0], &r[0] );
2193 STORE( &r[0], 0, chan_index );
2194 }
2195 break;
2196
2197 case TGSI_OPCODE_NOT:
2198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2199 FETCH( &r[0], 0, chan_index );
2200 micro_not( &r[0], &r[0] );
2201 STORE( &r[0], 0, chan_index );
2202 }
2203 break;
2204
2205 case TGSI_OPCODE_TRUNC:
2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207 FETCH( &r[0], 0, chan_index );
2208 micro_trunc( &r[0], &r[0] );
2209 STORE( &r[0], 0, chan_index );
2210 }
2211 break;
2212
2213 case TGSI_OPCODE_SHL:
2214 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2215 FETCH( &r[0], 0, chan_index );
2216 FETCH( &r[1], 1, chan_index );
2217 micro_shl( &r[0], &r[0], &r[1] );
2218 STORE( &r[0], 0, chan_index );
2219 }
2220 break;
2221
2222 case TGSI_OPCODE_SHR:
2223 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2224 FETCH( &r[0], 0, chan_index );
2225 FETCH( &r[1], 1, chan_index );
2226 micro_ishr( &r[0], &r[0], &r[1] );
2227 STORE( &r[0], 0, chan_index );
2228 }
2229 break;
2230
2231 case TGSI_OPCODE_AND:
2232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2233 FETCH( &r[0], 0, chan_index );
2234 FETCH( &r[1], 1, chan_index );
2235 micro_and( &r[0], &r[0], &r[1] );
2236 STORE( &r[0], 0, chan_index );
2237 }
2238 break;
2239
2240 case TGSI_OPCODE_OR:
2241 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2242 FETCH( &r[0], 0, chan_index );
2243 FETCH( &r[1], 1, chan_index );
2244 micro_or( &r[0], &r[0], &r[1] );
2245 STORE( &r[0], 0, chan_index );
2246 }
2247 break;
2248
2249 case TGSI_OPCODE_MOD:
2250 assert (0);
2251 break;
2252
2253 case TGSI_OPCODE_XOR:
2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2255 FETCH( &r[0], 0, chan_index );
2256 FETCH( &r[1], 1, chan_index );
2257 micro_xor( &r[0], &r[0], &r[1] );
2258 STORE( &r[0], 0, chan_index );
2259 }
2260 break;
2261
2262 case TGSI_OPCODE_SAD:
2263 assert (0);
2264 break;
2265
2266 case TGSI_OPCODE_TXF:
2267 assert (0);
2268 break;
2269
2270 case TGSI_OPCODE_TXQ:
2271 assert (0);
2272 break;
2273
2274 case TGSI_OPCODE_EMIT:
2275 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2276 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2277 break;
2278
2279 case TGSI_OPCODE_ENDPRIM:
2280 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2281 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2282 break;
2283
2284 case TGSI_OPCODE_LOOP:
2285 /* fall-through (for now) */
2286 case TGSI_OPCODE_BGNLOOP2:
2287 /* push LoopMask and ContMasks */
2288 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2289 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2290 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2291 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2292 break;
2293
2294 case TGSI_OPCODE_ENDLOOP:
2295 /* fall-through (for now at least) */
2296 case TGSI_OPCODE_ENDLOOP2:
2297 /* Restore ContMask, but don't pop */
2298 assert(mach->ContStackTop > 0);
2299 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2300 if (mach->LoopMask) {
2301 /* repeat loop: jump to instruction just past BGNLOOP */
2302 *pc = inst->InstructionExtLabel.Label + 1;
2303 }
2304 else {
2305 /* exit loop: pop LoopMask */
2306 assert(mach->LoopStackTop > 0);
2307 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2308 /* pop ContMask */
2309 assert(mach->ContStackTop > 0);
2310 mach->ContMask = mach->ContStack[--mach->ContStackTop];
2311 }
2312 UPDATE_EXEC_MASK(mach);
2313 break;
2314
2315 case TGSI_OPCODE_BRK:
2316 /* turn off loop channels for each enabled exec channel */
2317 mach->LoopMask &= ~mach->ExecMask;
2318 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2319 UPDATE_EXEC_MASK(mach);
2320 break;
2321
2322 case TGSI_OPCODE_CONT:
2323 /* turn off cont channels for each enabled exec channel */
2324 mach->ContMask &= ~mach->ExecMask;
2325 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2326 UPDATE_EXEC_MASK(mach);
2327 break;
2328
2329 case TGSI_OPCODE_BGNSUB:
2330 /* no-op */
2331 break;
2332
2333 case TGSI_OPCODE_ENDSUB:
2334 /* no-op */
2335 break;
2336
2337 case TGSI_OPCODE_NOISE1:
2338 assert( 0 );
2339 break;
2340
2341 case TGSI_OPCODE_NOISE2:
2342 assert( 0 );
2343 break;
2344
2345 case TGSI_OPCODE_NOISE3:
2346 assert( 0 );
2347 break;
2348
2349 case TGSI_OPCODE_NOISE4:
2350 assert( 0 );
2351 break;
2352
2353 case TGSI_OPCODE_NOP:
2354 break;
2355
2356 default:
2357 assert( 0 );
2358 }
2359 }
2360
2361
2362 void
2363 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2364 {
2365 uint i;
2366 int pc = 0;
2367
2368 #if XXX_SSE
2369 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2370 #else
2371
2372 mach->CondMask = 0xf;
2373 mach->LoopMask = 0xf;
2374 mach->ContMask = 0xf;
2375 mach->ExecMask = 0xf;
2376
2377 assert(mach->CondStackTop == 0);
2378 assert(mach->LoopStackTop == 0);
2379 assert(mach->ContStackTop == 0);
2380 assert(mach->CallStackTop == 0);
2381
2382 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2383 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2384
2385 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2386 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2387 mach->Primitives[0] = 0;
2388 }
2389
2390
2391 /* execute declarations (interpolants) */
2392 for (i = 0; i < mach->NumDeclarations; i++) {
2393 exec_declaration( mach, mach->Declarations+i );
2394 }
2395
2396 /* execute instructions, until pc is set to -1 */
2397 while (pc != -1) {
2398 assert(pc < mach->NumInstructions);
2399 exec_instruction( mach, mach->Instructions + pc, &pc );
2400 }
2401
2402 #endif
2403
2404 #if 0
2405 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2406 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2407 /*
2408 * Scale back depth component.
2409 */
2410 for (i = 0; i < 4; i++)
2411 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2412 }
2413 #endif
2414 }
2415
2416