6aaaef98a832277cfccc610ba371248fac13fe38
[mesa.git] / src / mesa / pipe / tgsi / core / tgsi_exec.c
1 #include "tgsi_platform.h"
2 #include "tgsi_core.h"
3
4 #define MESA 1
5 #if MESA
6 #include "main/context.h"
7 #include "main/macros.h"
8 #include "main/colormac.h"
9 #include "swrast/swrast.h"
10 #include "swrast/s_context.h"
11 #endif
12
13 #define TILE_BOTTOM_LEFT 0
14 #define TILE_BOTTOM_RIGHT 1
15 #define TILE_TOP_LEFT 2
16 #define TILE_TOP_RIGHT 3
17
18 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
19 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
20 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
21 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
22 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
23 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
24 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
25 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
26 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
27 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
28 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
29 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
30 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
31 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
32 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
33 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
34 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
35 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
36 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
37 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
38 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
39 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
40 #define TEMP_R0 TGSI_EXEC_TEMP_R0
41
42 #define FOR_EACH_CHANNEL(CHAN)\
43 for (CHAN = 0; CHAN < 4; CHAN++)
44
45 #define IS_CHANNEL_ENABLED(INST, CHAN)\
46 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
47
48 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
49 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
50
51 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
52 FOR_EACH_CHANNEL( CHAN )\
53 if (IS_CHANNEL_ENABLED( INST, CHAN ))
54
55 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
56 FOR_EACH_CHANNEL( CHAN )\
57 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
58
59 #define CHAN_X 0
60 #define CHAN_Y 1
61 #define CHAN_Z 2
62 #define CHAN_W 3
63
64 void
65 tgsi_exec_machine_init(
66 struct tgsi_exec_machine *mach,
67 struct tgsi_token *tokens )
68 {
69 GLuint i, k;
70 struct tgsi_parse_context parse;
71
72 mach->Tokens = tokens;
73
74 k = tgsi_parse_init (&parse, mach->Tokens);
75 if (k != TGSI_PARSE_OK) {
76 printf("Problem parsing!\n");
77 return;
78 }
79
80 mach->Processor = parse.FullHeader.Processor.Processor;
81 tgsi_parse_free (&parse);
82
83 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
84 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
85
86 #if XXX_SSE
87 tgsi_emit_sse (tokens,
88 &mach->Function);
89 #endif
90
91 /* Setup constants. */
92 for( i = 0; i < 4; i++ ) {
93 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
94 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
95 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
96 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
97 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
98 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
99 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
100 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
101 }
102 }
103
104 void
105 tgsi_exec_prepare(
106 struct tgsi_exec_machine *mach,
107 struct tgsi_exec_labels *labels )
108 {
109 struct tgsi_parse_context parse;
110 GLuint k;
111
112 mach->ImmLimit = 0;
113 labels->count = 0;
114
115 k = tgsi_parse_init( &parse, mach->Tokens );
116 if (k != TGSI_PARSE_OK) {
117 printf("Problem parsing!\n");
118 return;
119 }
120
121 while( !tgsi_parse_end_of_tokens( &parse ) ) {
122 GLuint pointer = parse.Position;
123 GLuint i;
124 tgsi_parse_token( &parse );
125 switch( parse.FullToken.Token.Type ) {
126 case TGSI_TOKEN_TYPE_DECLARATION:
127 break;
128 case TGSI_TOKEN_TYPE_IMMEDIATE:
129 assert( (parse.FullToken.FullImmediate.Immediate.Size - 1) % 4 == 0 );
130 assert( mach->ImmLimit + (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4 <= 256 );
131 for( i = 0; i < parse.FullToken.FullImmediate.Immediate.Size - 1; i++ ) {
132 mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
133 }
134 mach->ImmLimit += (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4;
135 break;
136 case TGSI_TOKEN_TYPE_INSTRUCTION:
137 if( parse.FullToken.FullInstruction.InstructionExtLabel.Label &&
138 parse.FullToken.FullInstruction.InstructionExtLabel.Target ) {
139 assert( labels->count < 128 );
140 labels->labels[labels->count][0] = parse.FullToken.FullInstruction.InstructionExtLabel.Label;
141 labels->labels[labels->count][1] = pointer;
142 labels->count++;
143 }
144 break;
145 default:
146 assert( 0 );
147 }
148 }
149 tgsi_parse_free (&parse);
150 }
151
152 void
153 tgsi_exec_machine_run(
154 struct tgsi_exec_machine *mach )
155 {
156 struct tgsi_exec_labels labels;
157
158 tgsi_exec_prepare( mach, &labels );
159 tgsi_exec_machine_run2( mach, &labels );
160 }
161
162 static void
163 micro_abs(
164 union tgsi_exec_channel *dst,
165 const union tgsi_exec_channel *src )
166 {
167 dst->f[0] = (GLfloat) fabs( (GLdouble) src->f[0] );
168 dst->f[1] = (GLfloat) fabs( (GLdouble) src->f[1] );
169 dst->f[2] = (GLfloat) fabs( (GLdouble) src->f[2] );
170 dst->f[3] = (GLfloat) fabs( (GLdouble) src->f[3] );
171 }
172
173 static void
174 micro_add(
175 union tgsi_exec_channel *dst,
176 const union tgsi_exec_channel *src0,
177 const union tgsi_exec_channel *src1 )
178 {
179 dst->f[0] = src0->f[0] + src1->f[0];
180 dst->f[1] = src0->f[1] + src1->f[1];
181 dst->f[2] = src0->f[2] + src1->f[2];
182 dst->f[3] = src0->f[3] + src1->f[3];
183 }
184
185 static void
186 micro_iadd(
187 union tgsi_exec_channel *dst,
188 const union tgsi_exec_channel *src0,
189 const union tgsi_exec_channel *src1 )
190 {
191 dst->i[0] = src0->i[0] + src1->i[0];
192 dst->i[1] = src0->i[1] + src1->i[1];
193 dst->i[2] = src0->i[2] + src1->i[2];
194 dst->i[3] = src0->i[3] + src1->i[3];
195 }
196
197 static void
198 micro_and(
199 union tgsi_exec_channel *dst,
200 const union tgsi_exec_channel *src0,
201 const union tgsi_exec_channel *src1 )
202 {
203 dst->u[0] = src0->u[0] & src1->u[0];
204 dst->u[1] = src0->u[1] & src1->u[1];
205 dst->u[2] = src0->u[2] & src1->u[2];
206 dst->u[3] = src0->u[3] & src1->u[3];
207 }
208
209 static void
210 micro_ceil(
211 union tgsi_exec_channel *dst,
212 const union tgsi_exec_channel *src )
213 {
214 dst->f[0] = (GLfloat) ceil( (GLdouble) src->f[0] );
215 dst->f[1] = (GLfloat) ceil( (GLdouble) src->f[1] );
216 dst->f[2] = (GLfloat) ceil( (GLdouble) src->f[2] );
217 dst->f[3] = (GLfloat) ceil( (GLdouble) src->f[3] );
218 }
219
220 static void
221 micro_cos(
222 union tgsi_exec_channel *dst,
223 const union tgsi_exec_channel *src )
224 {
225 dst->f[0] = (GLfloat) cos( (GLdouble) src->f[0] );
226 dst->f[1] = (GLfloat) cos( (GLdouble) src->f[1] );
227 dst->f[2] = (GLfloat) cos( (GLdouble) src->f[2] );
228 dst->f[3] = (GLfloat) cos( (GLdouble) src->f[3] );
229 }
230
231 static void
232 micro_ddx(
233 union tgsi_exec_channel *dst,
234 const union tgsi_exec_channel *src )
235 {
236 dst->f[0] =
237 dst->f[1] =
238 dst->f[2] =
239 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
240 }
241
242 static void
243 micro_ddy(
244 union tgsi_exec_channel *dst,
245 const union tgsi_exec_channel *src )
246 {
247 dst->f[0] =
248 dst->f[1] =
249 dst->f[2] =
250 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
251 }
252
253 static void
254 micro_div(
255 union tgsi_exec_channel *dst,
256 const union tgsi_exec_channel *src0,
257 const union tgsi_exec_channel *src1 )
258 {
259 dst->f[0] = src0->f[0] / src1->f[0];
260 dst->f[1] = src0->f[1] / src1->f[1];
261 dst->f[2] = src0->f[2] / src1->f[2];
262 dst->f[3] = src0->f[3] / src1->f[3];
263 }
264
265 static void
266 micro_udiv(
267 union tgsi_exec_channel *dst,
268 const union tgsi_exec_channel *src0,
269 const union tgsi_exec_channel *src1 )
270 {
271 dst->u[0] = src0->u[0] / src1->u[0];
272 dst->u[1] = src0->u[1] / src1->u[1];
273 dst->u[2] = src0->u[2] / src1->u[2];
274 dst->u[3] = src0->u[3] / src1->u[3];
275 }
276
277 static void
278 micro_eq(
279 union tgsi_exec_channel *dst,
280 const union tgsi_exec_channel *src0,
281 const union tgsi_exec_channel *src1,
282 const union tgsi_exec_channel *src2,
283 const union tgsi_exec_channel *src3 )
284 {
285 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
286 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
287 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
288 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
289 }
290
291 static void
292 micro_ieq(
293 union tgsi_exec_channel *dst,
294 const union tgsi_exec_channel *src0,
295 const union tgsi_exec_channel *src1,
296 const union tgsi_exec_channel *src2,
297 const union tgsi_exec_channel *src3 )
298 {
299 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
300 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
301 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
302 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
303 }
304
305 static void
306 micro_exp2(
307 union tgsi_exec_channel *dst,
308 const union tgsi_exec_channel *src )
309 {
310 dst->f[0] = (GLfloat) pow( 2.0, (GLdouble) src->f[0] );
311 dst->f[1] = (GLfloat) pow( 2.0, (GLdouble) src->f[1] );
312 dst->f[2] = (GLfloat) pow( 2.0, (GLdouble) src->f[2] );
313 dst->f[3] = (GLfloat) pow( 2.0, (GLdouble) src->f[3] );
314 }
315
316 static void
317 micro_f2it(
318 union tgsi_exec_channel *dst,
319 const union tgsi_exec_channel *src )
320 {
321 dst->i[0] = (GLint) src->f[0];
322 dst->i[1] = (GLint) src->f[1];
323 dst->i[2] = (GLint) src->f[2];
324 dst->i[3] = (GLint) src->f[3];
325 }
326
327 static void
328 micro_f2ut(
329 union tgsi_exec_channel *dst,
330 const union tgsi_exec_channel *src )
331 {
332 dst->u[0] = (GLuint) src->f[0];
333 dst->u[1] = (GLuint) src->f[1];
334 dst->u[2] = (GLuint) src->f[2];
335 dst->u[3] = (GLuint) src->f[3];
336 }
337
338 static void
339 micro_flr(
340 union tgsi_exec_channel *dst,
341 const union tgsi_exec_channel *src )
342 {
343 dst->f[0] = (GLfloat) floor( (GLdouble) src->f[0] );
344 dst->f[1] = (GLfloat) floor( (GLdouble) src->f[1] );
345 dst->f[2] = (GLfloat) floor( (GLdouble) src->f[2] );
346 dst->f[3] = (GLfloat) floor( (GLdouble) src->f[3] );
347 }
348
349 static void
350 micro_frc(
351 union tgsi_exec_channel *dst,
352 const union tgsi_exec_channel *src )
353 {
354 dst->f[0] = src->f[0] - (GLfloat) floor( (GLdouble) src->f[0] );
355 dst->f[1] = src->f[1] - (GLfloat) floor( (GLdouble) src->f[1] );
356 dst->f[2] = src->f[2] - (GLfloat) floor( (GLdouble) src->f[2] );
357 dst->f[3] = src->f[3] - (GLfloat) floor( (GLdouble) src->f[3] );
358 }
359
360 static void
361 micro_i2f(
362 union tgsi_exec_channel *dst,
363 const union tgsi_exec_channel *src )
364 {
365 dst->f[0] = (GLfloat) src->i[0];
366 dst->f[1] = (GLfloat) src->i[1];
367 dst->f[2] = (GLfloat) src->i[2];
368 dst->f[3] = (GLfloat) src->i[3];
369 }
370
371 static void
372 micro_lg2(
373 union tgsi_exec_channel *dst,
374 const union tgsi_exec_channel *src )
375 {
376 dst->f[0] = (GLfloat) log( (GLdouble) src->f[0] ) * 1.442695f;
377 dst->f[1] = (GLfloat) log( (GLdouble) src->f[1] ) * 1.442695f;
378 dst->f[2] = (GLfloat) log( (GLdouble) src->f[2] ) * 1.442695f;
379 dst->f[3] = (GLfloat) log( (GLdouble) src->f[3] ) * 1.442695f;
380 }
381
382 static void
383 micro_lt(
384 union tgsi_exec_channel *dst,
385 const union tgsi_exec_channel *src0,
386 const union tgsi_exec_channel *src1,
387 const union tgsi_exec_channel *src2,
388 const union tgsi_exec_channel *src3 )
389 {
390 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
391 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
392 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
393 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
394 }
395
396 static void
397 micro_ilt(
398 union tgsi_exec_channel *dst,
399 const union tgsi_exec_channel *src0,
400 const union tgsi_exec_channel *src1,
401 const union tgsi_exec_channel *src2,
402 const union tgsi_exec_channel *src3 )
403 {
404 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
405 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
406 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
407 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
408 }
409
410 static void
411 micro_ult(
412 union tgsi_exec_channel *dst,
413 const union tgsi_exec_channel *src0,
414 const union tgsi_exec_channel *src1,
415 const union tgsi_exec_channel *src2,
416 const union tgsi_exec_channel *src3 )
417 {
418 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
419 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
420 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
421 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
422 }
423
424 static void
425 micro_max(
426 union tgsi_exec_channel *dst,
427 const union tgsi_exec_channel *src0,
428 const union tgsi_exec_channel *src1 )
429 {
430 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
431 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
432 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
433 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
434 }
435
436 static void
437 micro_imax(
438 union tgsi_exec_channel *dst,
439 const union tgsi_exec_channel *src0,
440 const union tgsi_exec_channel *src1 )
441 {
442 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
443 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
444 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
445 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
446 }
447
448 static void
449 micro_umax(
450 union tgsi_exec_channel *dst,
451 const union tgsi_exec_channel *src0,
452 const union tgsi_exec_channel *src1 )
453 {
454 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
455 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
456 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
457 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
458 }
459
460 static void
461 micro_min(
462 union tgsi_exec_channel *dst,
463 const union tgsi_exec_channel *src0,
464 const union tgsi_exec_channel *src1 )
465 {
466 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
467 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
468 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
469 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
470 }
471
472 static void
473 micro_imin(
474 union tgsi_exec_channel *dst,
475 const union tgsi_exec_channel *src0,
476 const union tgsi_exec_channel *src1 )
477 {
478 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
479 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
480 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
481 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
482 }
483
484 static void
485 micro_umin(
486 union tgsi_exec_channel *dst,
487 const union tgsi_exec_channel *src0,
488 const union tgsi_exec_channel *src1 )
489 {
490 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
491 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
492 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
493 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
494 }
495
496 static void
497 micro_umod(
498 union tgsi_exec_channel *dst,
499 const union tgsi_exec_channel *src0,
500 const union tgsi_exec_channel *src1 )
501 {
502 dst->u[0] = src0->u[0] % src1->u[0];
503 dst->u[1] = src0->u[1] % src1->u[1];
504 dst->u[2] = src0->u[2] % src1->u[2];
505 dst->u[3] = src0->u[3] % src1->u[3];
506 }
507
508 static void
509 micro_mul(
510 union tgsi_exec_channel *dst,
511 const union tgsi_exec_channel *src0,
512 const union tgsi_exec_channel *src1 )
513 {
514 dst->f[0] = src0->f[0] * src1->f[0];
515 dst->f[1] = src0->f[1] * src1->f[1];
516 dst->f[2] = src0->f[2] * src1->f[2];
517 dst->f[3] = src0->f[3] * src1->f[3];
518 }
519
520 static void
521 micro_imul(
522 union tgsi_exec_channel *dst,
523 const union tgsi_exec_channel *src0,
524 const union tgsi_exec_channel *src1 )
525 {
526 dst->i[0] = src0->i[0] * src1->i[0];
527 dst->i[1] = src0->i[1] * src1->i[1];
528 dst->i[2] = src0->i[2] * src1->i[2];
529 dst->i[3] = src0->i[3] * src1->i[3];
530 }
531
532 static void
533 micro_imul64(
534 union tgsi_exec_channel *dst0,
535 union tgsi_exec_channel *dst1,
536 const union tgsi_exec_channel *src0,
537 const union tgsi_exec_channel *src1 )
538 {
539 dst1->i[0] = src0->i[0] * src1->i[0];
540 dst1->i[1] = src0->i[1] * src1->i[1];
541 dst1->i[2] = src0->i[2] * src1->i[2];
542 dst1->i[3] = src0->i[3] * src1->i[3];
543 dst0->i[0] = 0;
544 dst0->i[1] = 0;
545 dst0->i[2] = 0;
546 dst0->i[3] = 0;
547 }
548
549 static void
550 micro_umul64(
551 union tgsi_exec_channel *dst0,
552 union tgsi_exec_channel *dst1,
553 const union tgsi_exec_channel *src0,
554 const union tgsi_exec_channel *src1 )
555 {
556 dst1->u[0] = src0->u[0] * src1->u[0];
557 dst1->u[1] = src0->u[1] * src1->u[1];
558 dst1->u[2] = src0->u[2] * src1->u[2];
559 dst1->u[3] = src0->u[3] * src1->u[3];
560 dst0->u[0] = 0;
561 dst0->u[1] = 0;
562 dst0->u[2] = 0;
563 dst0->u[3] = 0;
564 }
565
566 static void
567 micro_movc(
568 union tgsi_exec_channel *dst,
569 const union tgsi_exec_channel *src0,
570 const union tgsi_exec_channel *src1,
571 const union tgsi_exec_channel *src2 )
572 {
573 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
574 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
575 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
576 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
577 }
578
579 static void
580 micro_neg(
581 union tgsi_exec_channel *dst,
582 const union tgsi_exec_channel *src )
583 {
584 dst->f[0] = -src->f[0];
585 dst->f[1] = -src->f[1];
586 dst->f[2] = -src->f[2];
587 dst->f[3] = -src->f[3];
588 }
589
590 static void
591 micro_ineg(
592 union tgsi_exec_channel *dst,
593 const union tgsi_exec_channel *src )
594 {
595 dst->i[0] = -src->i[0];
596 dst->i[1] = -src->i[1];
597 dst->i[2] = -src->i[2];
598 dst->i[3] = -src->i[3];
599 }
600
601 static void
602 micro_not(
603 union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src )
605 {
606 dst->u[0] = ~src->u[0];
607 dst->u[1] = ~src->u[1];
608 dst->u[2] = ~src->u[2];
609 dst->u[3] = ~src->u[3];
610 }
611
612 static void
613 micro_or(
614 union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src0,
616 const union tgsi_exec_channel *src1 )
617 {
618 dst->u[0] = src0->u[0] | src1->u[0];
619 dst->u[1] = src0->u[1] | src1->u[1];
620 dst->u[2] = src0->u[2] | src1->u[2];
621 dst->u[3] = src0->u[3] | src1->u[3];
622 }
623
624 static void
625 micro_pow(
626 union tgsi_exec_channel *dst,
627 const union tgsi_exec_channel *src0,
628 const union tgsi_exec_channel *src1 )
629 {
630 dst->f[0] = (GLfloat) pow( (GLdouble) src0->f[0], (GLdouble) src1->f[0] );
631 dst->f[1] = (GLfloat) pow( (GLdouble) src0->f[1], (GLdouble) src1->f[1] );
632 dst->f[2] = (GLfloat) pow( (GLdouble) src0->f[2], (GLdouble) src1->f[2] );
633 dst->f[3] = (GLfloat) pow( (GLdouble) src0->f[3], (GLdouble) src1->f[3] );
634 }
635
636 static void
637 micro_rnd(
638 union tgsi_exec_channel *dst,
639 const union tgsi_exec_channel *src )
640 {
641 dst->f[0] = (GLfloat) floor( (GLdouble) (src->f[0] + 0.5f) );
642 dst->f[1] = (GLfloat) floor( (GLdouble) (src->f[1] + 0.5f) );
643 dst->f[2] = (GLfloat) floor( (GLdouble) (src->f[2] + 0.5f) );
644 dst->f[3] = (GLfloat) floor( (GLdouble) (src->f[3] + 0.5f) );
645 }
646
647 static void
648 micro_shl(
649 union tgsi_exec_channel *dst,
650 const union tgsi_exec_channel *src0,
651 const union tgsi_exec_channel *src1 )
652 {
653 dst->i[0] = src0->i[0] << src1->i[0];
654 dst->i[1] = src0->i[1] << src1->i[1];
655 dst->i[2] = src0->i[2] << src1->i[2];
656 dst->i[3] = src0->i[3] << src1->i[3];
657 }
658
659 static void
660 micro_ishr(
661 union tgsi_exec_channel *dst,
662 const union tgsi_exec_channel *src0,
663 const union tgsi_exec_channel *src1 )
664 {
665 dst->i[0] = src0->i[0] >> src1->i[0];
666 dst->i[1] = src0->i[1] >> src1->i[1];
667 dst->i[2] = src0->i[2] >> src1->i[2];
668 dst->i[3] = src0->i[3] >> src1->i[3];
669 }
670
671 static void
672 micro_ushr(
673 union tgsi_exec_channel *dst,
674 const union tgsi_exec_channel *src0,
675 const union tgsi_exec_channel *src1 )
676 {
677 dst->u[0] = src0->u[0] >> src1->u[0];
678 dst->u[1] = src0->u[1] >> src1->u[1];
679 dst->u[2] = src0->u[2] >> src1->u[2];
680 dst->u[3] = src0->u[3] >> src1->u[3];
681 }
682
683 static void
684 micro_sin(
685 union tgsi_exec_channel *dst,
686 const union tgsi_exec_channel *src )
687 {
688 dst->f[0] = (GLfloat) sin( (GLdouble) src->f[0] );
689 dst->f[1] = (GLfloat) sin( (GLdouble) src->f[1] );
690 dst->f[2] = (GLfloat) sin( (GLdouble) src->f[2] );
691 dst->f[3] = (GLfloat) sin( (GLdouble) src->f[3] );
692 }
693
694 static void
695 micro_sqrt( union tgsi_exec_channel *dst,
696 const union tgsi_exec_channel *src )
697 {
698 dst->f[0] = (GLfloat) sqrt( (GLdouble) src->f[0] );
699 dst->f[1] = (GLfloat) sqrt( (GLdouble) src->f[1] );
700 dst->f[2] = (GLfloat) sqrt( (GLdouble) src->f[2] );
701 dst->f[3] = (GLfloat) sqrt( (GLdouble) src->f[3] );
702 }
703
704 static void
705 micro_sub(
706 union tgsi_exec_channel *dst,
707 const union tgsi_exec_channel *src0,
708 const union tgsi_exec_channel *src1 )
709 {
710 dst->f[0] = src0->f[0] - src1->f[0];
711 dst->f[1] = src0->f[1] - src1->f[1];
712 dst->f[2] = src0->f[2] - src1->f[2];
713 dst->f[3] = src0->f[3] - src1->f[3];
714 }
715
716 static void
717 micro_u2f(
718 union tgsi_exec_channel *dst,
719 const union tgsi_exec_channel *src )
720 {
721 dst->f[0] = (GLfloat) src->u[0];
722 dst->f[1] = (GLfloat) src->u[1];
723 dst->f[2] = (GLfloat) src->u[2];
724 dst->f[3] = (GLfloat) src->u[3];
725 }
726
727 static void
728 micro_xor(
729 union tgsi_exec_channel *dst,
730 const union tgsi_exec_channel *src0,
731 const union tgsi_exec_channel *src1 )
732 {
733 dst->u[0] = src0->u[0] ^ src1->u[0];
734 dst->u[1] = src0->u[1] ^ src1->u[1];
735 dst->u[2] = src0->u[2] ^ src1->u[2];
736 dst->u[3] = src0->u[3] ^ src1->u[3];
737 }
738
739 static void
740 fetch_src_file_channel(
741 const struct tgsi_exec_machine *mach,
742 const GLuint file,
743 const GLuint swizzle,
744 const union tgsi_exec_channel *index,
745 union tgsi_exec_channel *chan )
746 {
747 switch( swizzle ) {
748 case TGSI_EXTSWIZZLE_X:
749 case TGSI_EXTSWIZZLE_Y:
750 case TGSI_EXTSWIZZLE_Z:
751 case TGSI_EXTSWIZZLE_W:
752 switch( file ) {
753 case TGSI_FILE_CONSTANT:
754 chan->f[0] = mach->Consts[index->i[0]][swizzle];
755 chan->f[1] = mach->Consts[index->i[1]][swizzle];
756 chan->f[2] = mach->Consts[index->i[2]][swizzle];
757 chan->f[3] = mach->Consts[index->i[3]][swizzle];
758 break;
759
760 case TGSI_FILE_INPUT:
761 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
762 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
763 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
764 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
765 break;
766
767 case TGSI_FILE_TEMPORARY:
768 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
769 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
770 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
771 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
772 break;
773
774 case TGSI_FILE_IMMEDIATE:
775 assert( index->i[0] < (GLint) mach->ImmLimit );
776 chan->f[0] = mach->Imms[index->i[0]][swizzle];
777 assert( index->i[1] < (GLint) mach->ImmLimit );
778 chan->f[1] = mach->Imms[index->i[1]][swizzle];
779 assert( index->i[2] < (GLint) mach->ImmLimit );
780 chan->f[2] = mach->Imms[index->i[2]][swizzle];
781 assert( index->i[3] < (GLint) mach->ImmLimit );
782 chan->f[3] = mach->Imms[index->i[3]][swizzle];
783 break;
784
785 case TGSI_FILE_ADDRESS:
786 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
787 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
788 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
789 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
790 break;
791
792 default:
793 assert( 0 );
794 }
795 break;
796
797 case TGSI_EXTSWIZZLE_ZERO:
798 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
799 break;
800
801 case TGSI_EXTSWIZZLE_ONE:
802 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
803 break;
804
805 default:
806 assert( 0 );
807 }
808 }
809
810 static void
811 fetch_source(
812 const struct tgsi_exec_machine *mach,
813 union tgsi_exec_channel *chan,
814 const struct tgsi_full_src_register *reg,
815 const GLuint chan_index )
816 {
817 union tgsi_exec_channel index;
818 GLuint swizzle;
819
820 index.i[0] =
821 index.i[1] =
822 index.i[2] =
823 index.i[3] = reg->SrcRegister.Index;
824
825 if (reg->SrcRegister.Indirect) {
826 union tgsi_exec_channel index2;
827 union tgsi_exec_channel indir_index;
828
829 index2.i[0] =
830 index2.i[1] =
831 index2.i[2] =
832 index2.i[3] = reg->SrcRegisterInd.Index;
833
834 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
835 fetch_src_file_channel(
836 mach,
837 reg->SrcRegisterInd.File,
838 swizzle,
839 &index2,
840 &indir_index );
841
842 index.i[0] += indir_index.i[0];
843 index.i[1] += indir_index.i[1];
844 index.i[2] += indir_index.i[2];
845 index.i[3] += indir_index.i[3];
846 }
847
848 if( reg->SrcRegister.Dimension ) {
849 switch( reg->SrcRegister.File ) {
850 case TGSI_FILE_INPUT:
851 index.i[0] *= 17;
852 index.i[1] *= 17;
853 index.i[2] *= 17;
854 index.i[3] *= 17;
855 break;
856 case TGSI_FILE_CONSTANT:
857 index.i[0] *= 4096;
858 index.i[1] *= 4096;
859 index.i[2] *= 4096;
860 index.i[3] *= 4096;
861 break;
862 default:
863 assert( 0 );
864 }
865
866 index.i[0] += reg->SrcRegisterDim.Index;
867 index.i[1] += reg->SrcRegisterDim.Index;
868 index.i[2] += reg->SrcRegisterDim.Index;
869 index.i[3] += reg->SrcRegisterDim.Index;
870
871 if (reg->SrcRegisterDim.Indirect) {
872 union tgsi_exec_channel index2;
873 union tgsi_exec_channel indir_index;
874
875 index2.i[0] =
876 index2.i[1] =
877 index2.i[2] =
878 index2.i[3] = reg->SrcRegisterDimInd.Index;
879
880 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
881 fetch_src_file_channel(
882 mach,
883 reg->SrcRegisterDimInd.File,
884 swizzle,
885 &index2,
886 &indir_index );
887
888 index.i[0] += indir_index.i[0];
889 index.i[1] += indir_index.i[1];
890 index.i[2] += indir_index.i[2];
891 index.i[3] += indir_index.i[3];
892 }
893 }
894
895 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
896 fetch_src_file_channel(
897 mach,
898 reg->SrcRegister.File,
899 swizzle,
900 &index,
901 chan );
902
903 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
904 case TGSI_UTIL_SIGN_CLEAR:
905 micro_abs( chan, chan );
906 break;
907
908 case TGSI_UTIL_SIGN_SET:
909 micro_abs( chan, chan );
910 micro_neg( chan, chan );
911 break;
912
913 case TGSI_UTIL_SIGN_TOGGLE:
914 micro_neg( chan, chan );
915 break;
916
917 case TGSI_UTIL_SIGN_KEEP:
918 break;
919 }
920 }
921
922 static void
923 store_dest(
924 struct tgsi_exec_machine *mach,
925 const union tgsi_exec_channel *chan,
926 const struct tgsi_full_dst_register *reg,
927 const struct tgsi_full_instruction *inst,
928 GLuint chan_index )
929 {
930 union tgsi_exec_channel *dst;
931
932 switch( reg->DstRegister.File ) {
933 case TGSI_FILE_NULL:
934 return;
935
936 case TGSI_FILE_OUTPUT:
937 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->DstRegister.Index].xyzw[chan_index];
938 break;
939
940 case TGSI_FILE_TEMPORARY:
941 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
942 break;
943
944 case TGSI_FILE_ADDRESS:
945 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
946 break;
947
948 default:
949 assert( 0 );
950 }
951
952 switch (inst->Instruction.Saturate)
953 {
954 case TGSI_SAT_NONE:
955 *dst = *chan;
956 break;
957
958 case TGSI_SAT_ZERO_ONE:
959 micro_lt( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], chan );
960 micro_lt( dst, chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
961 break;
962
963 case TGSI_SAT_MINUS_PLUS_ONE:
964 assert( 0 );
965 break;
966
967 default:
968 assert( 0 );
969 }
970 }
971
972 #define FETCH(VAL,INDEX,CHAN)\
973 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
974
975 #define STORE(VAL,INDEX,CHAN)\
976 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN)
977
978 static void
979 exec_kil (struct tgsi_exec_machine *mach,
980 const struct tgsi_full_instruction *inst)
981 {
982 GLuint uniquemask;
983 GLuint chan_index;
984 GLuint kilmask = 0;
985 union tgsi_exec_channel r[1];
986
987 /* This mask stores component bits that were already tested. Note that
988 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
989 * tested. */
990 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
991
992 for (chan_index = 0; chan_index < 4; chan_index++)
993 {
994 GLuint swizzle;
995 GLuint i;
996
997 /* unswizzle channel */
998 swizzle = tgsi_util_get_full_src_register_extswizzle (
999 &inst->FullSrcRegisters[0],
1000 chan_index);
1001
1002 /* check if the component has not been already tested */
1003 if (uniquemask & (1 << swizzle))
1004 continue;
1005 uniquemask |= 1 << swizzle;
1006
1007 FETCH(&r[0], 0, chan_index);
1008 for (i = 0; i < 4; i++)
1009 if (r[0].f[i] < 0.0f)
1010 kilmask |= 1 << (i * 4);
1011 }
1012
1013 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1014 }
1015
1016 #if MESA
1017 /*
1018 * Fetch a texel using S texture coordinate.
1019 */
1020 static void
1021 fetch_texel_1d( GLcontext *ctx,
1022 struct tgsi_sampler_state *sampler,
1023 const union tgsi_exec_channel *s,
1024 GLuint unit,
1025 union tgsi_exec_channel *r,
1026 union tgsi_exec_channel *g,
1027 union tgsi_exec_channel *b,
1028 union tgsi_exec_channel *a )
1029 {
1030 SWcontext *swrast = SWRAST_CONTEXT(ctx);
1031 GLuint fragment_index;
1032 GLfloat stpq[4][4];
1033 GLfloat lambdas[4];
1034 GLchan rgba[4][4];
1035
1036 for (fragment_index = 0; fragment_index < 4; fragment_index++)
1037 {
1038 stpq[fragment_index][0] = s->f[fragment_index];
1039 }
1040
1041 if (sampler->NeedLambda)
1042 {
1043 GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT];
1044 GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT];
1045
1046 GLfloat rho, lambda;
1047
1048 dsdx = FABSF(dsdx);
1049 dsdy = FABSF(dsdy);
1050
1051 rho = MAX2(dsdx, dsdy) * sampler->ImageWidth;
1052
1053 lambda = LOG2(rho);
1054
1055 if (sampler->NeedLodBias)
1056 lambda += sampler->LodBias;
1057
1058 if (sampler->NeedLambdaClamp)
1059 lambda = CLAMP(lambda, sampler->MinLod, sampler->MaxLod);
1060
1061 /* XXX: Use the same lambda value throughout the tile. Could
1062 * end up with four unique values by recalculating partial
1063 * derivs in the other row and column, and calculating lambda
1064 * using the dx and dy values appropriate for each fragment in
1065 * the tile.
1066 */
1067 lambdas[0] =
1068 lambdas[1] =
1069 lambdas[2] =
1070 lambdas[3] = lambda;
1071 }
1072
1073 if (!swrast->TextureSample[unit]) {
1074 _swrast_update_texture_samplers(ctx);
1075 }
1076
1077 /* XXX use a float-valued TextureSample routine here!!! */
1078 swrast->TextureSample[unit] (ctx,
1079 ctx->Texture.Unit[unit]._Current,
1080 4,
1081 (const GLfloat (*)[4])stpq,
1082 lambdas,
1083 rgba);
1084
1085 for (fragment_index = 0; fragment_index < 4; fragment_index++)
1086 {
1087 r->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][0]);
1088 g->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][1]);
1089 b->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][2]);
1090 a->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][3]);
1091 }
1092 }
1093
1094 /*
1095 * Fetch a texel using ST texture coordinates.
1096 */
1097 static void
1098 fetch_texel_2d( GLcontext *ctx,
1099 struct tgsi_sampler_state *sampler,
1100 const union tgsi_exec_channel *s,
1101 const union tgsi_exec_channel *t,
1102 GLuint unit,
1103 union tgsi_exec_channel *r,
1104 union tgsi_exec_channel *g,
1105 union tgsi_exec_channel *b,
1106 union tgsi_exec_channel *a )
1107 {
1108 SWcontext *swrast = SWRAST_CONTEXT( ctx );
1109 GLuint fragment_index;
1110 GLfloat stpq[4][4];
1111 GLfloat lambdas[4];
1112 GLchan rgba[4][4];
1113
1114 for (fragment_index = 0; fragment_index < 4; fragment_index++) {
1115 stpq[fragment_index][0] = s->f[fragment_index];
1116 stpq[fragment_index][1] = t->f[fragment_index];
1117 }
1118
1119 if (sampler->NeedLambda) {
1120 GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT];
1121 GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT];
1122
1123 GLfloat dtdx = t->f[TILE_BOTTOM_RIGHT] - t->f[TILE_BOTTOM_LEFT];
1124 GLfloat dtdy = t->f[TILE_TOP_LEFT] - t->f[TILE_BOTTOM_LEFT];
1125
1126 GLfloat maxU, maxV, rho, lambda;
1127
1128 dsdx = FABSF( dsdx );
1129 dsdy = FABSF( dsdy );
1130 dtdx = FABSF( dtdx );
1131 dtdy = FABSF( dtdy );
1132
1133 maxU = MAX2( dsdx, dsdy ) * sampler->ImageWidth;
1134 maxV = MAX2( dtdx, dtdy ) * sampler->ImageHeight;
1135
1136 rho = MAX2( maxU, maxV );
1137
1138 lambda = LOG2( rho );
1139
1140 if (sampler->NeedLodBias)
1141 lambda += sampler->LodBias;
1142
1143 if (sampler->NeedLambdaClamp)
1144 lambda = CLAMP(
1145 lambda,
1146 sampler->MinLod,
1147 sampler->MaxLod );
1148
1149 /* XXX: Use the same lambda value throughout the tile. Could
1150 * end up with four unique values by recalculating partial
1151 * derivs in the other row and column, and calculating lambda
1152 * using the dx and dy values appropriate for each fragment in
1153 * the tile.
1154 */
1155 lambdas[0] =
1156 lambdas[1] =
1157 lambdas[2] =
1158 lambdas[3] = lambda;
1159 }
1160
1161 if (!swrast->TextureSample[unit]) {
1162 _swrast_update_texture_samplers(ctx);
1163 }
1164
1165 /* XXX use a float-valued TextureSample routine here!!! */
1166 swrast->TextureSample[unit](
1167 ctx,
1168 ctx->Texture.Unit[unit]._Current,
1169 4,
1170 (const GLfloat (*)[4]) stpq,
1171 lambdas,
1172 rgba );
1173
1174 for (fragment_index = 0; fragment_index < 4; fragment_index++) {
1175 r->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][0] );
1176 g->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][1] );
1177 b->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][2] );
1178 a->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][3] );
1179 }
1180 }
1181
1182 /*
1183 * Fetch a texel using STR texture coordinates.
1184 */
1185 static void
1186 fetch_texel_3d( GLcontext *ctx,
1187 struct tgsi_sampler_state *sampler,
1188 const union tgsi_exec_channel *s,
1189 const union tgsi_exec_channel *t,
1190 const union tgsi_exec_channel *p,
1191 GLuint unit,
1192 union tgsi_exec_channel *r,
1193 union tgsi_exec_channel *g,
1194 union tgsi_exec_channel *b,
1195 union tgsi_exec_channel *a )
1196 {
1197 SWcontext *swrast = SWRAST_CONTEXT(ctx);
1198 GLuint fragment_index;
1199 GLfloat stpq[4][4];
1200 GLfloat lambdas[4];
1201 GLchan rgba[4][4];
1202
1203 for (fragment_index = 0; fragment_index < 4; fragment_index++)
1204 {
1205 stpq[fragment_index][0] = s->f[fragment_index];
1206 stpq[fragment_index][1] = t->f[fragment_index];
1207 stpq[fragment_index][2] = p->f[fragment_index];
1208 }
1209
1210 if (sampler->NeedLambda)
1211 {
1212 GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT];
1213 GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT];
1214
1215 GLfloat dtdx = t->f[TILE_BOTTOM_RIGHT] - t->f[TILE_BOTTOM_LEFT];
1216 GLfloat dtdy = t->f[TILE_TOP_LEFT] - t->f[TILE_BOTTOM_LEFT];
1217
1218 GLfloat dpdx = p->f[TILE_BOTTOM_RIGHT] - p->f[TILE_BOTTOM_LEFT];
1219 GLfloat dpdy = p->f[TILE_TOP_LEFT] - p->f[TILE_BOTTOM_LEFT];
1220
1221 GLfloat maxU, maxV, maxW, rho, lambda;
1222
1223 dsdx = FABSF(dsdx);
1224 dsdy = FABSF(dsdy);
1225 dtdx = FABSF(dtdx);
1226 dtdy = FABSF(dtdy);
1227 dpdx = FABSF(dpdx);
1228 dpdy = FABSF(dpdy);
1229
1230 maxU = MAX2(dsdx, dsdy) * sampler->ImageWidth;
1231 maxV = MAX2(dtdx, dtdy) * sampler->ImageHeight;
1232 maxW = MAX2(dpdx, dpdy) * sampler->ImageDepth;
1233
1234 rho = MAX2(maxU, MAX2(maxV, maxW));
1235
1236 lambda = LOG2(rho);
1237
1238 if (sampler->NeedLodBias)
1239 lambda += sampler->LodBias;
1240
1241 if (sampler->NeedLambdaClamp)
1242 lambda = CLAMP(lambda, sampler->MinLod, sampler->MaxLod);
1243
1244 /* XXX: Use the same lambda value throughout the tile. Could
1245 * end up with four unique values by recalculating partial
1246 * derivs in the other row and column, and calculating lambda
1247 * using the dx and dy values appropriate for each fragment in
1248 * the tile.
1249 */
1250 lambdas[0] =
1251 lambdas[1] =
1252 lambdas[2] =
1253 lambdas[3] = lambda;
1254 }
1255
1256 if (!swrast->TextureSample[unit]) {
1257 _swrast_update_texture_samplers(ctx);
1258 }
1259
1260 /* XXX use a float-valued TextureSample routine here!!! */
1261 swrast->TextureSample[unit] (ctx,
1262 ctx->Texture.Unit[unit]._Current,
1263 4,
1264 (const GLfloat (*)[4])stpq,
1265 lambdas,
1266 rgba);
1267
1268 for (fragment_index = 0; fragment_index < 4; fragment_index++)
1269 {
1270 r->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][0]);
1271 g->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][1]);
1272 b->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][2]);
1273 a->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][3]);
1274 }
1275 }
1276 #endif
1277
1278 static GLuint
1279 map_label(
1280 GLuint label,
1281 struct tgsi_exec_labels *labels )
1282 {
1283 GLuint i;
1284
1285 for( i = 0; i < labels->count; i++ ) {
1286 if( labels->labels[i][0] == label ) {
1287 return labels->labels[i][1];
1288 }
1289 }
1290 assert( 0 );
1291 return 0;
1292 }
1293
1294 static void
1295 exec_instruction(
1296 struct tgsi_exec_machine *mach,
1297 const struct tgsi_full_instruction *inst,
1298 struct tgsi_exec_labels *labels,
1299 GLuint *programCounter )
1300 {
1301 #if MESA
1302 GET_CURRENT_CONTEXT(ctx);
1303 #endif
1304 GLuint chan_index;
1305 union tgsi_exec_channel r[8];
1306
1307 switch (inst->Instruction.Opcode) {
1308 case TGSI_OPCODE_ARL:
1309 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1310 FETCH( &r[0], 0, chan_index );
1311 micro_f2it( &r[0], &r[0] );
1312 STORE( &r[0], 0, chan_index );
1313 }
1314 break;
1315
1316 case TGSI_OPCODE_MOV:
1317 /* TGSI_OPCODE_SWZ */
1318 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1319 FETCH( &r[0], 0, chan_index );
1320 STORE( &r[0], 0, chan_index );
1321 }
1322 break;
1323
1324 case TGSI_OPCODE_LIT:
1325 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1326 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1327 }
1328
1329 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1330 FETCH( &r[0], 0, CHAN_X );
1331 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1332 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1333 STORE( &r[0], 0, CHAN_Y );
1334 }
1335
1336 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1337 FETCH( &r[1], 0, CHAN_Y );
1338 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1339
1340 FETCH( &r[2], 0, CHAN_W );
1341 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1342 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1343 micro_pow( &r[1], &r[1], &r[2] );
1344 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1345 STORE( &r[0], 0, CHAN_Z );
1346 }
1347 }
1348
1349 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1350 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1351 }
1352 break;
1353
1354 case TGSI_OPCODE_RCP:
1355 /* TGSI_OPCODE_RECIP */
1356 FETCH( &r[0], 0, CHAN_X );
1357 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1358 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1359 STORE( &r[0], 0, chan_index );
1360 }
1361 break;
1362
1363 case TGSI_OPCODE_RSQ:
1364 /* TGSI_OPCODE_RECIPSQRT */
1365 FETCH( &r[0], 0, CHAN_X );
1366 micro_sqrt( &r[0], &r[0] );
1367 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1368 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1369 STORE( &r[0], 0, chan_index );
1370 }
1371 break;
1372
1373 case TGSI_OPCODE_EXP:
1374 assert (0);
1375 break;
1376
1377 case TGSI_OPCODE_LOG:
1378 assert (0);
1379 break;
1380
1381 case TGSI_OPCODE_MUL:
1382 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1383 {
1384 FETCH(&r[0], 0, chan_index);
1385 FETCH(&r[1], 1, chan_index);
1386
1387 micro_mul( &r[0], &r[0], &r[1] );
1388
1389 STORE(&r[0], 0, chan_index);
1390 }
1391 break;
1392
1393 case TGSI_OPCODE_ADD:
1394 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1395 FETCH( &r[0], 0, chan_index );
1396 FETCH( &r[1], 1, chan_index );
1397 micro_add( &r[0], &r[0], &r[1] );
1398 STORE( &r[0], 0, chan_index );
1399 }
1400 break;
1401
1402 case TGSI_OPCODE_DP3:
1403 /* TGSI_OPCODE_DOT3 */
1404 FETCH( &r[0], 0, CHAN_X );
1405 FETCH( &r[1], 1, CHAN_X );
1406 micro_mul( &r[0], &r[0], &r[1] );
1407
1408 FETCH( &r[1], 0, CHAN_Y );
1409 FETCH( &r[2], 1, CHAN_Y );
1410 micro_mul( &r[1], &r[1], &r[2] );
1411 micro_add( &r[0], &r[0], &r[1] );
1412
1413 FETCH( &r[1], 0, CHAN_Z );
1414 FETCH( &r[2], 1, CHAN_Z );
1415 micro_mul( &r[1], &r[1], &r[2] );
1416 micro_add( &r[0], &r[0], &r[1] );
1417
1418 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1419 STORE( &r[0], 0, chan_index );
1420 }
1421 break;
1422
1423 case TGSI_OPCODE_DP4:
1424 /* TGSI_OPCODE_DOT4 */
1425 FETCH(&r[0], 0, CHAN_X);
1426 FETCH(&r[1], 1, CHAN_X);
1427
1428 micro_mul( &r[0], &r[0], &r[1] );
1429
1430 FETCH(&r[1], 0, CHAN_Y);
1431 FETCH(&r[2], 1, CHAN_Y);
1432
1433 micro_mul( &r[1], &r[1], &r[2] );
1434 micro_add( &r[0], &r[0], &r[1] );
1435
1436 FETCH(&r[1], 0, CHAN_Z);
1437 FETCH(&r[2], 1, CHAN_Z);
1438
1439 micro_mul( &r[1], &r[1], &r[2] );
1440 micro_add( &r[0], &r[0], &r[1] );
1441
1442 FETCH(&r[1], 0, CHAN_W);
1443 FETCH(&r[2], 1, CHAN_W);
1444
1445 micro_mul( &r[1], &r[1], &r[2] );
1446 micro_add( &r[0], &r[0], &r[1] );
1447
1448 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1449 STORE( &r[0], 0, chan_index );
1450 }
1451 break;
1452
1453 case TGSI_OPCODE_DST:
1454 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1455 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1456 }
1457
1458 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1459 FETCH( &r[0], 0, CHAN_Y );
1460 FETCH( &r[1], 1, CHAN_Y);
1461 micro_mul( &r[0], &r[0], &r[1] );
1462 STORE( &r[0], 0, CHAN_Y );
1463 }
1464
1465 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1466 FETCH( &r[0], 0, CHAN_Z );
1467 STORE( &r[0], 0, CHAN_Z );
1468 }
1469
1470 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1471 FETCH( &r[0], 1, CHAN_W );
1472 STORE( &r[0], 0, CHAN_W );
1473 }
1474 break;
1475
1476 case TGSI_OPCODE_MIN:
1477 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1478 FETCH(&r[0], 0, chan_index);
1479 FETCH(&r[1], 1, chan_index);
1480
1481 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1482
1483 STORE(&r[0], 0, chan_index);
1484 }
1485 break;
1486
1487 case TGSI_OPCODE_MAX:
1488 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1489 FETCH(&r[0], 0, chan_index);
1490 FETCH(&r[1], 1, chan_index);
1491
1492 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1493
1494 STORE(&r[0], 0, chan_index);
1495 }
1496 break;
1497
1498 case TGSI_OPCODE_SLT:
1499 /* TGSI_OPCODE_SETLT */
1500 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1501 FETCH( &r[0], 0, chan_index );
1502 FETCH( &r[1], 1, chan_index );
1503 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1504 STORE( &r[0], 0, chan_index );
1505 }
1506 break;
1507
1508 case TGSI_OPCODE_SGE:
1509 /* TGSI_OPCODE_SETGE */
1510 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1511 FETCH(&r[0], 0, chan_index);
1512 FETCH(&r[1], 1, chan_index);
1513
1514 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
1515
1516 STORE(&r[0], 0, chan_index);
1517 }
1518 break;
1519
1520 case TGSI_OPCODE_MAD:
1521 /* TGSI_OPCODE_MADD */
1522 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1523 FETCH( &r[0], 0, chan_index );
1524 FETCH( &r[1], 1, chan_index );
1525 micro_mul( &r[0], &r[0], &r[1] );
1526 FETCH( &r[1], 2, chan_index );
1527 micro_add( &r[0], &r[0], &r[1] );
1528 STORE( &r[0], 0, chan_index );
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_SUB:
1533 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1534 FETCH(&r[0], 0, chan_index);
1535 FETCH(&r[1], 1, chan_index);
1536
1537 micro_sub( &r[0], &r[0], &r[1] );
1538
1539 STORE(&r[0], 0, chan_index);
1540 }
1541 break;
1542
1543 case TGSI_OPCODE_LERP:
1544 /* TGSI_OPCODE_LRP */
1545 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1546 FETCH(&r[0], 0, chan_index);
1547 FETCH(&r[1], 1, chan_index);
1548 FETCH(&r[2], 2, chan_index);
1549
1550 micro_sub( &r[1], &r[1], &r[2] );
1551 micro_mul( &r[0], &r[0], &r[1] );
1552 micro_add( &r[0], &r[0], &r[2] );
1553
1554 STORE(&r[0], 0, chan_index);
1555 }
1556 break;
1557
1558 case TGSI_OPCODE_CND:
1559 assert (0);
1560 break;
1561
1562 case TGSI_OPCODE_CND0:
1563 assert (0);
1564 break;
1565
1566 case TGSI_OPCODE_DOT2ADD:
1567 /* TGSI_OPCODE_DP2A */
1568 assert (0);
1569 break;
1570
1571 case TGSI_OPCODE_INDEX:
1572 assert (0);
1573 break;
1574
1575 case TGSI_OPCODE_NEGATE:
1576 assert (0);
1577 break;
1578
1579 case TGSI_OPCODE_FRAC:
1580 /* TGSI_OPCODE_FRC */
1581 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1582 FETCH( &r[0], 0, chan_index );
1583 micro_frc( &r[0], &r[0] );
1584 STORE( &r[0], 0, chan_index );
1585 }
1586 break;
1587
1588 case TGSI_OPCODE_CLAMP:
1589 assert (0);
1590 break;
1591
1592 case TGSI_OPCODE_FLOOR:
1593 /* TGSI_OPCODE_FLR */
1594 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1595 FETCH( &r[0], 0, chan_index );
1596 micro_flr( &r[0], &r[0] );
1597 STORE( &r[0], 0, chan_index );
1598 }
1599 break;
1600
1601 case TGSI_OPCODE_ROUND:
1602 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1603 FETCH( &r[0], 0, chan_index );
1604 micro_rnd( &r[0], &r[0] );
1605 STORE( &r[0], 0, chan_index );
1606 }
1607 break;
1608
1609 case TGSI_OPCODE_EXPBASE2:
1610 /* TGSI_OPCODE_EX2 */
1611 FETCH(&r[0], 0, CHAN_X);
1612
1613 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1614
1615 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1616 STORE( &r[0], 0, chan_index );
1617 }
1618 break;
1619
1620 case TGSI_OPCODE_LOGBASE2:
1621 /* TGSI_OPCODE_LG2 */
1622 FETCH( &r[0], 0, CHAN_X );
1623 micro_lg2( &r[0], &r[0] );
1624 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1625 STORE( &r[0], 0, chan_index );
1626 }
1627 break;
1628
1629 case TGSI_OPCODE_POWER:
1630 /* TGSI_OPCODE_POW */
1631 FETCH(&r[0], 0, CHAN_X);
1632 FETCH(&r[1], 1, CHAN_X);
1633
1634 micro_pow( &r[0], &r[0], &r[1] );
1635
1636 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1637 STORE( &r[0], 0, chan_index );
1638 }
1639 break;
1640
1641 case TGSI_OPCODE_CROSSPRODUCT:
1642 /* TGSI_OPCODE_XPD */
1643 FETCH(&r[0], 0, CHAN_Y);
1644 FETCH(&r[1], 1, CHAN_Z);
1645
1646 micro_mul( &r[2], &r[0], &r[1] );
1647
1648 FETCH(&r[3], 0, CHAN_Z);
1649 FETCH(&r[4], 1, CHAN_Y);
1650
1651 micro_mul( &r[5], &r[3], &r[4] );
1652 micro_sub( &r[2], &r[2], &r[5] );
1653
1654 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1655 STORE( &r[2], 0, CHAN_X );
1656 }
1657
1658 FETCH(&r[2], 1, CHAN_X);
1659
1660 micro_mul( &r[3], &r[3], &r[2] );
1661
1662 FETCH(&r[5], 0, CHAN_X);
1663
1664 micro_mul( &r[1], &r[1], &r[5] );
1665 micro_sub( &r[3], &r[3], &r[1] );
1666
1667 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1668 STORE( &r[3], 0, CHAN_Y );
1669 }
1670
1671 micro_mul( &r[5], &r[5], &r[4] );
1672 micro_mul( &r[0], &r[0], &r[2] );
1673 micro_sub( &r[5], &r[5], &r[0] );
1674
1675 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1676 STORE( &r[5], 0, CHAN_Z );
1677 }
1678
1679 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1680 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1681 }
1682 break;
1683
1684 case TGSI_OPCODE_MULTIPLYMATRIX:
1685 assert (0);
1686 break;
1687
1688 case TGSI_OPCODE_ABS:
1689 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1690 FETCH(&r[0], 0, chan_index);
1691
1692 micro_abs( &r[0], &r[0] );
1693
1694 STORE(&r[0], 0, chan_index);
1695 }
1696 break;
1697
1698 case TGSI_OPCODE_RCC:
1699 assert (0);
1700 break;
1701
1702 case TGSI_OPCODE_DPH:
1703 FETCH(&r[0], 0, CHAN_X);
1704 FETCH(&r[1], 1, CHAN_X);
1705
1706 micro_mul( &r[0], &r[0], &r[1] );
1707
1708 FETCH(&r[1], 0, CHAN_Y);
1709 FETCH(&r[2], 1, CHAN_Y);
1710
1711 micro_mul( &r[1], &r[1], &r[2] );
1712 micro_add( &r[0], &r[0], &r[1] );
1713
1714 FETCH(&r[1], 0, CHAN_Z);
1715 FETCH(&r[2], 1, CHAN_Z);
1716
1717 micro_mul( &r[1], &r[1], &r[2] );
1718 micro_add( &r[0], &r[0], &r[1] );
1719
1720 FETCH(&r[1], 1, CHAN_W);
1721
1722 micro_add( &r[0], &r[0], &r[1] );
1723
1724 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1725 STORE( &r[0], 0, chan_index );
1726 }
1727 break;
1728
1729 case TGSI_OPCODE_COS:
1730 FETCH(&r[0], 0, CHAN_X);
1731
1732 micro_cos( &r[0], &r[0] );
1733
1734 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1735 STORE( &r[0], 0, chan_index );
1736 }
1737 break;
1738
1739 case TGSI_OPCODE_DDX:
1740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1741 FETCH( &r[0], 0, chan_index );
1742 micro_ddx( &r[0], &r[0] );
1743 STORE( &r[0], 0, chan_index );
1744 }
1745 break;
1746
1747 case TGSI_OPCODE_DDY:
1748 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1749 FETCH( &r[0], 0, chan_index );
1750 micro_ddy( &r[0], &r[0] );
1751 STORE( &r[0], 0, chan_index );
1752 }
1753 break;
1754
1755 case TGSI_OPCODE_KIL:
1756 exec_kil (mach, inst);
1757 break;
1758
1759 case TGSI_OPCODE_PK2H:
1760 assert (0);
1761 break;
1762
1763 case TGSI_OPCODE_PK2US:
1764 assert (0);
1765 break;
1766
1767 case TGSI_OPCODE_PK4B:
1768 assert (0);
1769 break;
1770
1771 case TGSI_OPCODE_PK4UB:
1772 assert (0);
1773 break;
1774
1775 case TGSI_OPCODE_RFL:
1776 assert (0);
1777 break;
1778
1779 case TGSI_OPCODE_SEQ:
1780 assert (0);
1781 break;
1782
1783 case TGSI_OPCODE_SFL:
1784 assert (0);
1785 break;
1786
1787 case TGSI_OPCODE_SGT:
1788 assert (0);
1789 break;
1790
1791 case TGSI_OPCODE_SIN:
1792 FETCH(&r[0], 0, CHAN_X);
1793
1794 micro_sin( &r[0], &r[0] );
1795
1796 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1797 STORE( &r[0], 0, chan_index );
1798 }
1799 break;
1800
1801 case TGSI_OPCODE_SLE:
1802 assert (0);
1803 break;
1804
1805 case TGSI_OPCODE_SNE:
1806 assert (0);
1807 break;
1808
1809 case TGSI_OPCODE_STR:
1810 assert (0);
1811 break;
1812
1813 case TGSI_OPCODE_TEX:
1814 switch (inst->InstructionExtTexture.Texture) {
1815 case TGSI_TEXTURE_1D:
1816
1817 FETCH(&r[0], 0, CHAN_X);
1818
1819 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1820 case TGSI_EXTSWIZZLE_W:
1821 FETCH(&r[1], 0, CHAN_W);
1822 micro_div( &r[0], &r[0], &r[1] );
1823 break;
1824
1825 case TGSI_EXTSWIZZLE_ONE:
1826 break;
1827
1828 default:
1829 assert (0);
1830 }
1831 #if MESA
1832 fetch_texel_1d (ctx,
1833 &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index],
1834 &r[0],
1835 inst->FullSrcRegisters[1].SrcRegister.Index,
1836 &r[0], &r[1], &r[2], &r[3]);
1837 #endif
1838 break;
1839
1840 case TGSI_TEXTURE_2D:
1841 case TGSI_TEXTURE_RECT:
1842
1843 FETCH(&r[0], 0, CHAN_X);
1844 FETCH(&r[1], 0, CHAN_Y);
1845
1846 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1847 case TGSI_EXTSWIZZLE_W:
1848 FETCH(&r[2], 0, CHAN_W);
1849 micro_div( &r[0], &r[0], &r[2] );
1850 micro_div( &r[1], &r[1], &r[2] );
1851 break;
1852
1853 case TGSI_EXTSWIZZLE_ONE:
1854 break;
1855
1856 default:
1857 assert (0);
1858 }
1859
1860 #if MESA
1861 fetch_texel_2d (ctx,
1862 &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index],
1863 &r[0], &r[1],
1864 inst->FullSrcRegisters[1].SrcRegister.Index,
1865 &r[0], &r[1], &r[2], &r[3]);
1866 #endif
1867 break;
1868
1869 case TGSI_TEXTURE_3D:
1870 case TGSI_TEXTURE_CUBE:
1871
1872 FETCH(&r[0], 0, CHAN_X);
1873 FETCH(&r[1], 0, CHAN_Y);
1874 FETCH(&r[2], 0, CHAN_Z);
1875
1876 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
1877 case TGSI_EXTSWIZZLE_W:
1878 FETCH(&r[3], 0, CHAN_W);
1879 micro_div( &r[0], &r[0], &r[3] );
1880 micro_div( &r[1], &r[1], &r[3] );
1881 micro_div( &r[2], &r[2], &r[3] );
1882 break;
1883
1884 case TGSI_EXTSWIZZLE_ONE:
1885 break;
1886
1887 default:
1888 assert (0);
1889 }
1890
1891 #if MESA
1892 fetch_texel_3d (ctx,
1893 &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index],
1894 &r[0], &r[1], &r[2],
1895 inst->FullSrcRegisters[1].SrcRegister.Index,
1896 &r[0], &r[1], &r[2], &r[3]);
1897 #endif
1898 break;
1899
1900 default:
1901 assert (0);
1902 }
1903
1904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1905 STORE( &r[chan_index], 0, chan_index );
1906 }
1907 break;
1908
1909 case TGSI_OPCODE_TXD:
1910 assert (0);
1911 break;
1912
1913 case TGSI_OPCODE_UP2H:
1914 assert (0);
1915 break;
1916
1917 case TGSI_OPCODE_UP2US:
1918 assert (0);
1919 break;
1920
1921 case TGSI_OPCODE_UP4B:
1922 assert (0);
1923 break;
1924
1925 case TGSI_OPCODE_UP4UB:
1926 assert (0);
1927 break;
1928
1929 case TGSI_OPCODE_X2D:
1930 assert (0);
1931 break;
1932
1933 case TGSI_OPCODE_ARA:
1934 assert (0);
1935 break;
1936
1937 case TGSI_OPCODE_ARR:
1938 assert (0);
1939 break;
1940
1941 case TGSI_OPCODE_BRA:
1942 assert (0);
1943 break;
1944
1945 case TGSI_OPCODE_CAL:
1946 assert (0);
1947 break;
1948
1949 case TGSI_OPCODE_RET:
1950 /* XXX: end of shader! */
1951 /*assert (0);*/
1952 break;
1953
1954 case TGSI_OPCODE_SSG:
1955 assert (0);
1956 break;
1957
1958 case TGSI_OPCODE_CMP:
1959 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1960 FETCH(&r[0], 0, chan_index);
1961 FETCH(&r[1], 1, chan_index);
1962 FETCH(&r[2], 2, chan_index);
1963
1964 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
1965
1966 STORE(&r[0], 0, chan_index);
1967 }
1968 break;
1969
1970 case TGSI_OPCODE_SCS:
1971 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1972 FETCH( &r[0], 0, CHAN_X );
1973 }
1974 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1975 micro_cos( &r[1], &r[0] );
1976 STORE( &r[1], 0, CHAN_X );
1977 }
1978 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1979 micro_sin( &r[1], &r[0] );
1980 STORE( &r[1], 0, CHAN_Y );
1981 }
1982 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1983 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
1984 }
1985 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1986 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1987 }
1988 break;
1989
1990 case TGSI_OPCODE_TXB:
1991 assert (0);
1992 break;
1993
1994 case TGSI_OPCODE_NRM:
1995 assert (0);
1996 break;
1997
1998 case TGSI_OPCODE_DIV:
1999 assert( 0 );
2000 break;
2001
2002 case TGSI_OPCODE_DP2:
2003 FETCH( &r[0], 0, CHAN_X );
2004 FETCH( &r[1], 1, CHAN_X );
2005 micro_mul( &r[0], &r[0], &r[1] );
2006
2007 FETCH( &r[1], 0, CHAN_Y );
2008 FETCH( &r[2], 1, CHAN_Y );
2009 micro_mul( &r[1], &r[1], &r[2] );
2010 micro_add( &r[0], &r[0], &r[1] );
2011
2012 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2013 STORE( &r[0], 0, chan_index );
2014 }
2015 break;
2016
2017 case TGSI_OPCODE_TXL:
2018 assert (0);
2019 break;
2020
2021 case TGSI_OPCODE_BRK:
2022 assert (0);
2023 break;
2024
2025 case TGSI_OPCODE_IF:
2026 assert (0);
2027 break;
2028
2029 case TGSI_OPCODE_LOOP:
2030 assert (0);
2031 break;
2032
2033 case TGSI_OPCODE_REP:
2034 assert (0);
2035 break;
2036
2037 case TGSI_OPCODE_ELSE:
2038 assert (0);
2039 break;
2040
2041 case TGSI_OPCODE_ENDIF:
2042 assert (0);
2043 break;
2044
2045 case TGSI_OPCODE_ENDLOOP:
2046 assert (0);
2047 break;
2048
2049 case TGSI_OPCODE_ENDREP:
2050 assert (0);
2051 break;
2052
2053 case TGSI_OPCODE_PUSHA:
2054 assert (0);
2055 break;
2056
2057 case TGSI_OPCODE_POPA:
2058 assert (0);
2059 break;
2060
2061 case TGSI_OPCODE_CEIL:
2062 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2063 FETCH( &r[0], 0, chan_index );
2064 micro_ceil( &r[0], &r[0] );
2065 STORE( &r[0], 0, chan_index );
2066 }
2067 break;
2068
2069 case TGSI_OPCODE_I2F:
2070 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2071 FETCH( &r[0], 0, chan_index );
2072 micro_i2f( &r[0], &r[0] );
2073 STORE( &r[0], 0, chan_index );
2074 }
2075 break;
2076
2077 case TGSI_OPCODE_NOT:
2078 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2079 FETCH( &r[0], 0, chan_index );
2080 micro_not( &r[0], &r[0] );
2081 STORE( &r[0], 0, chan_index );
2082 }
2083 break;
2084
2085 case TGSI_OPCODE_TRUNC:
2086 assert (0);
2087 break;
2088
2089 case TGSI_OPCODE_SHL:
2090 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2091 FETCH( &r[0], 0, chan_index );
2092 FETCH( &r[1], 1, chan_index );
2093 micro_shl( &r[0], &r[0], &r[1] );
2094 STORE( &r[0], 0, chan_index );
2095 }
2096 break;
2097
2098 case TGSI_OPCODE_SHR:
2099 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2100 FETCH( &r[0], 0, chan_index );
2101 FETCH( &r[1], 1, chan_index );
2102 micro_ishr( &r[0], &r[0], &r[1] );
2103 STORE( &r[0], 0, chan_index );
2104 }
2105 break;
2106
2107 case TGSI_OPCODE_AND:
2108 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2109 FETCH( &r[0], 0, chan_index );
2110 FETCH( &r[1], 1, chan_index );
2111 micro_and( &r[0], &r[0], &r[1] );
2112 STORE( &r[0], 0, chan_index );
2113 }
2114 break;
2115
2116 case TGSI_OPCODE_OR:
2117 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2118 FETCH( &r[0], 0, chan_index );
2119 FETCH( &r[1], 1, chan_index );
2120 micro_or( &r[0], &r[0], &r[1] );
2121 STORE( &r[0], 0, chan_index );
2122 }
2123 break;
2124
2125 case TGSI_OPCODE_MOD:
2126 assert (0);
2127 break;
2128
2129 case TGSI_OPCODE_XOR:
2130 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2131 FETCH( &r[0], 0, chan_index );
2132 FETCH( &r[1], 1, chan_index );
2133 micro_xor( &r[0], &r[0], &r[1] );
2134 STORE( &r[0], 0, chan_index );
2135 }
2136 break;
2137
2138 case TGSI_OPCODE_SAD:
2139 assert (0);
2140 break;
2141
2142 case TGSI_OPCODE_TXF:
2143 assert (0);
2144 break;
2145
2146 case TGSI_OPCODE_TXQ:
2147 assert (0);
2148 break;
2149
2150 case TGSI_OPCODE_CONT:
2151 assert (0);
2152 break;
2153
2154 case TGSI_OPCODE_EMIT:
2155 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2156 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2157 break;
2158
2159 case TGSI_OPCODE_ENDPRIM:
2160 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2161 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2162 break;
2163
2164 default:
2165 assert( 0 );
2166 }
2167 }
2168
2169
2170 #if !defined(XSTDCALL)
2171 #if defined(WIN32)
2172 #define XSTDCALL __stdcall
2173 #else
2174 #define XSTDCALL
2175 #endif
2176 #endif
2177
2178 typedef void (XSTDCALL *fp_function) (const struct tgsi_exec_vector *input,
2179 struct tgsi_exec_vector *output,
2180 GLfloat (*constant)[4],
2181 struct tgsi_exec_vector *temporary);
2182
2183 void
2184 tgsi_exec_machine_run2(
2185 struct tgsi_exec_machine *mach,
2186 struct tgsi_exec_labels *labels )
2187 {
2188 #if MESA
2189 GET_CURRENT_CONTEXT(ctx);
2190 GLuint i;
2191 #endif
2192
2193 #if XXX_SSE
2194 fp_function function;
2195
2196 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2197
2198 function = (fp_function) x86_get_func (&mach->Function);
2199
2200 function (mach->Inputs,
2201 mach->Outputs,
2202 mach->Consts,
2203 mach->Temps);
2204 #else
2205 struct tgsi_parse_context parse;
2206 GLuint k;
2207
2208 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2209 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2210
2211 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2212 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2213 mach->Primitives[0] = 0;
2214 }
2215
2216 k = tgsi_parse_init( &parse, mach->Tokens );
2217 if (k != TGSI_PARSE_OK) {
2218 printf("Problem parsing!\n");
2219 return;
2220 }
2221
2222 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2223 tgsi_parse_token( &parse );
2224 switch( parse.FullToken.Token.Type ) {
2225 case TGSI_TOKEN_TYPE_DECLARATION:
2226 break;
2227 case TGSI_TOKEN_TYPE_IMMEDIATE:
2228 break;
2229 case TGSI_TOKEN_TYPE_INSTRUCTION:
2230 exec_instruction( mach, &parse.FullToken.FullInstruction, labels, &parse.Position );
2231 break;
2232 default:
2233 assert( 0 );
2234 }
2235 }
2236 tgsi_parse_free (&parse);
2237 #endif
2238
2239 #if MESA
2240 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2241 /*
2242 * Scale back depth component.
2243 */
2244 for (i = 0; i < 4; i++)
2245 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2246 }
2247 #endif
2248 }
2249