1 #include "tgsi_platform.h"
6 #include "main/context.h"
7 #include "main/macros.h"
8 #include "main/colormac.h"
9 #include "swrast/swrast.h"
10 #include "swrast/s_context.h"
13 #define TILE_BOTTOM_LEFT 0
14 #define TILE_BOTTOM_RIGHT 1
15 #define TILE_TOP_LEFT 2
16 #define TILE_TOP_RIGHT 3
18 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
19 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
20 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
21 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
22 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
23 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
24 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
25 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
26 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
27 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
28 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
29 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
30 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
31 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
32 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
33 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
34 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
35 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
36 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
37 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
38 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
39 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
40 #define TEMP_R0 TGSI_EXEC_TEMP_R0
42 #define FOR_EACH_CHANNEL(CHAN)\
43 for (CHAN = 0; CHAN < 4; CHAN++)
45 #define IS_CHANNEL_ENABLED(INST, CHAN)\
46 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
48 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
49 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
51 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
52 FOR_EACH_CHANNEL( CHAN )\
53 if (IS_CHANNEL_ENABLED( INST, CHAN ))
55 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
56 FOR_EACH_CHANNEL( CHAN )\
57 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
65 tgsi_exec_machine_init(
66 struct tgsi_exec_machine
*mach
,
67 struct tgsi_token
*tokens
)
70 struct tgsi_parse_context parse
;
72 mach
->Tokens
= tokens
;
74 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
75 if (k
!= TGSI_PARSE_OK
) {
76 printf("Problem parsing!\n");
80 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
81 tgsi_parse_free (&parse
);
83 mach
->Temps
= (struct tgsi_exec_vector
*) tgsi_align_128bit( mach
->_Temps
);
84 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_NUM_TEMPS
];
87 tgsi_emit_sse (tokens
,
91 /* Setup constants. */
92 for( i
= 0; i
< 4; i
++ ) {
93 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
94 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
95 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
96 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
97 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
98 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
99 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
100 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
106 struct tgsi_exec_machine
*mach
,
107 struct tgsi_exec_labels
*labels
)
109 struct tgsi_parse_context parse
;
115 k
= tgsi_parse_init( &parse
, mach
->Tokens
);
116 if (k
!= TGSI_PARSE_OK
) {
117 printf("Problem parsing!\n");
121 while( !tgsi_parse_end_of_tokens( &parse
) ) {
122 GLuint pointer
= parse
.Position
;
124 tgsi_parse_token( &parse
);
125 switch( parse
.FullToken
.Token
.Type
) {
126 case TGSI_TOKEN_TYPE_DECLARATION
:
128 case TGSI_TOKEN_TYPE_IMMEDIATE
:
129 assert( (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) % 4 == 0 );
130 assert( mach
->ImmLimit
+ (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) / 4 <= 256 );
131 for( i
= 0; i
< parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1; i
++ ) {
132 mach
->Imms
[mach
->ImmLimit
+ i
/ 4][i
% 4] = parse
.FullToken
.FullImmediate
.u
.ImmediateFloat32
[i
].Float
;
134 mach
->ImmLimit
+= (parse
.FullToken
.FullImmediate
.Immediate
.Size
- 1) / 4;
136 case TGSI_TOKEN_TYPE_INSTRUCTION
:
137 if( parse
.FullToken
.FullInstruction
.InstructionExtLabel
.Label
&&
138 parse
.FullToken
.FullInstruction
.InstructionExtLabel
.Target
) {
139 assert( labels
->count
< 128 );
140 labels
->labels
[labels
->count
][0] = parse
.FullToken
.FullInstruction
.InstructionExtLabel
.Label
;
141 labels
->labels
[labels
->count
][1] = pointer
;
149 tgsi_parse_free (&parse
);
153 tgsi_exec_machine_run(
154 struct tgsi_exec_machine
*mach
)
156 struct tgsi_exec_labels labels
;
158 tgsi_exec_prepare( mach
, &labels
);
159 tgsi_exec_machine_run2( mach
, &labels
);
164 union tgsi_exec_channel
*dst
,
165 const union tgsi_exec_channel
*src
)
167 dst
->f
[0] = (GLfloat
) fabs( (GLdouble
) src
->f
[0] );
168 dst
->f
[1] = (GLfloat
) fabs( (GLdouble
) src
->f
[1] );
169 dst
->f
[2] = (GLfloat
) fabs( (GLdouble
) src
->f
[2] );
170 dst
->f
[3] = (GLfloat
) fabs( (GLdouble
) src
->f
[3] );
175 union tgsi_exec_channel
*dst
,
176 const union tgsi_exec_channel
*src0
,
177 const union tgsi_exec_channel
*src1
)
179 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
180 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
181 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
182 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
187 union tgsi_exec_channel
*dst
,
188 const union tgsi_exec_channel
*src0
,
189 const union tgsi_exec_channel
*src1
)
191 dst
->i
[0] = src0
->i
[0] + src1
->i
[0];
192 dst
->i
[1] = src0
->i
[1] + src1
->i
[1];
193 dst
->i
[2] = src0
->i
[2] + src1
->i
[2];
194 dst
->i
[3] = src0
->i
[3] + src1
->i
[3];
199 union tgsi_exec_channel
*dst
,
200 const union tgsi_exec_channel
*src0
,
201 const union tgsi_exec_channel
*src1
)
203 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
204 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
205 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
206 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
211 union tgsi_exec_channel
*dst
,
212 const union tgsi_exec_channel
*src
)
214 dst
->f
[0] = (GLfloat
) ceil( (GLdouble
) src
->f
[0] );
215 dst
->f
[1] = (GLfloat
) ceil( (GLdouble
) src
->f
[1] );
216 dst
->f
[2] = (GLfloat
) ceil( (GLdouble
) src
->f
[2] );
217 dst
->f
[3] = (GLfloat
) ceil( (GLdouble
) src
->f
[3] );
222 union tgsi_exec_channel
*dst
,
223 const union tgsi_exec_channel
*src
)
225 dst
->f
[0] = (GLfloat
) cos( (GLdouble
) src
->f
[0] );
226 dst
->f
[1] = (GLfloat
) cos( (GLdouble
) src
->f
[1] );
227 dst
->f
[2] = (GLfloat
) cos( (GLdouble
) src
->f
[2] );
228 dst
->f
[3] = (GLfloat
) cos( (GLdouble
) src
->f
[3] );
233 union tgsi_exec_channel
*dst
,
234 const union tgsi_exec_channel
*src
)
239 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
244 union tgsi_exec_channel
*dst
,
245 const union tgsi_exec_channel
*src
)
250 dst
->f
[3] = src
->f
[TILE_TOP_LEFT
] - src
->f
[TILE_BOTTOM_LEFT
];
255 union tgsi_exec_channel
*dst
,
256 const union tgsi_exec_channel
*src0
,
257 const union tgsi_exec_channel
*src1
)
259 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
260 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
261 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
262 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
267 union tgsi_exec_channel
*dst
,
268 const union tgsi_exec_channel
*src0
,
269 const union tgsi_exec_channel
*src1
)
271 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
272 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
273 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
274 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
279 union tgsi_exec_channel
*dst
,
280 const union tgsi_exec_channel
*src0
,
281 const union tgsi_exec_channel
*src1
,
282 const union tgsi_exec_channel
*src2
,
283 const union tgsi_exec_channel
*src3
)
285 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
286 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
287 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
288 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
293 union tgsi_exec_channel
*dst
,
294 const union tgsi_exec_channel
*src0
,
295 const union tgsi_exec_channel
*src1
,
296 const union tgsi_exec_channel
*src2
,
297 const union tgsi_exec_channel
*src3
)
299 dst
->i
[0] = src0
->i
[0] == src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
300 dst
->i
[1] = src0
->i
[1] == src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
301 dst
->i
[2] = src0
->i
[2] == src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
302 dst
->i
[3] = src0
->i
[3] == src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
307 union tgsi_exec_channel
*dst
,
308 const union tgsi_exec_channel
*src
)
310 dst
->f
[0] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[0] );
311 dst
->f
[1] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[1] );
312 dst
->f
[2] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[2] );
313 dst
->f
[3] = (GLfloat
) pow( 2.0, (GLdouble
) src
->f
[3] );
318 union tgsi_exec_channel
*dst
,
319 const union tgsi_exec_channel
*src
)
321 dst
->i
[0] = (GLint
) src
->f
[0];
322 dst
->i
[1] = (GLint
) src
->f
[1];
323 dst
->i
[2] = (GLint
) src
->f
[2];
324 dst
->i
[3] = (GLint
) src
->f
[3];
329 union tgsi_exec_channel
*dst
,
330 const union tgsi_exec_channel
*src
)
332 dst
->u
[0] = (GLuint
) src
->f
[0];
333 dst
->u
[1] = (GLuint
) src
->f
[1];
334 dst
->u
[2] = (GLuint
) src
->f
[2];
335 dst
->u
[3] = (GLuint
) src
->f
[3];
340 union tgsi_exec_channel
*dst
,
341 const union tgsi_exec_channel
*src
)
343 dst
->f
[0] = (GLfloat
) floor( (GLdouble
) src
->f
[0] );
344 dst
->f
[1] = (GLfloat
) floor( (GLdouble
) src
->f
[1] );
345 dst
->f
[2] = (GLfloat
) floor( (GLdouble
) src
->f
[2] );
346 dst
->f
[3] = (GLfloat
) floor( (GLdouble
) src
->f
[3] );
351 union tgsi_exec_channel
*dst
,
352 const union tgsi_exec_channel
*src
)
354 dst
->f
[0] = src
->f
[0] - (GLfloat
) floor( (GLdouble
) src
->f
[0] );
355 dst
->f
[1] = src
->f
[1] - (GLfloat
) floor( (GLdouble
) src
->f
[1] );
356 dst
->f
[2] = src
->f
[2] - (GLfloat
) floor( (GLdouble
) src
->f
[2] );
357 dst
->f
[3] = src
->f
[3] - (GLfloat
) floor( (GLdouble
) src
->f
[3] );
362 union tgsi_exec_channel
*dst
,
363 const union tgsi_exec_channel
*src
)
365 dst
->f
[0] = (GLfloat
) src
->i
[0];
366 dst
->f
[1] = (GLfloat
) src
->i
[1];
367 dst
->f
[2] = (GLfloat
) src
->i
[2];
368 dst
->f
[3] = (GLfloat
) src
->i
[3];
373 union tgsi_exec_channel
*dst
,
374 const union tgsi_exec_channel
*src
)
376 dst
->f
[0] = (GLfloat
) log( (GLdouble
) src
->f
[0] ) * 1.442695f
;
377 dst
->f
[1] = (GLfloat
) log( (GLdouble
) src
->f
[1] ) * 1.442695f
;
378 dst
->f
[2] = (GLfloat
) log( (GLdouble
) src
->f
[2] ) * 1.442695f
;
379 dst
->f
[3] = (GLfloat
) log( (GLdouble
) src
->f
[3] ) * 1.442695f
;
384 union tgsi_exec_channel
*dst
,
385 const union tgsi_exec_channel
*src0
,
386 const union tgsi_exec_channel
*src1
,
387 const union tgsi_exec_channel
*src2
,
388 const union tgsi_exec_channel
*src3
)
390 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
391 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
392 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
393 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
398 union tgsi_exec_channel
*dst
,
399 const union tgsi_exec_channel
*src0
,
400 const union tgsi_exec_channel
*src1
,
401 const union tgsi_exec_channel
*src2
,
402 const union tgsi_exec_channel
*src3
)
404 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src2
->i
[0] : src3
->i
[0];
405 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src2
->i
[1] : src3
->i
[1];
406 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src2
->i
[2] : src3
->i
[2];
407 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src2
->i
[3] : src3
->i
[3];
412 union tgsi_exec_channel
*dst
,
413 const union tgsi_exec_channel
*src0
,
414 const union tgsi_exec_channel
*src1
,
415 const union tgsi_exec_channel
*src2
,
416 const union tgsi_exec_channel
*src3
)
418 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src2
->u
[0] : src3
->u
[0];
419 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src2
->u
[1] : src3
->u
[1];
420 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src2
->u
[2] : src3
->u
[2];
421 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src2
->u
[3] : src3
->u
[3];
426 union tgsi_exec_channel
*dst
,
427 const union tgsi_exec_channel
*src0
,
428 const union tgsi_exec_channel
*src1
)
430 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
431 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
432 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
433 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
438 union tgsi_exec_channel
*dst
,
439 const union tgsi_exec_channel
*src0
,
440 const union tgsi_exec_channel
*src1
)
442 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
443 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
444 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
445 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
450 union tgsi_exec_channel
*dst
,
451 const union tgsi_exec_channel
*src0
,
452 const union tgsi_exec_channel
*src1
)
454 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
455 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
456 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
457 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
462 union tgsi_exec_channel
*dst
,
463 const union tgsi_exec_channel
*src0
,
464 const union tgsi_exec_channel
*src1
)
466 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
467 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
468 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
469 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
474 union tgsi_exec_channel
*dst
,
475 const union tgsi_exec_channel
*src0
,
476 const union tgsi_exec_channel
*src1
)
478 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
479 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
480 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
481 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
486 union tgsi_exec_channel
*dst
,
487 const union tgsi_exec_channel
*src0
,
488 const union tgsi_exec_channel
*src1
)
490 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
491 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
492 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
493 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
498 union tgsi_exec_channel
*dst
,
499 const union tgsi_exec_channel
*src0
,
500 const union tgsi_exec_channel
*src1
)
502 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
503 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
504 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
505 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
510 union tgsi_exec_channel
*dst
,
511 const union tgsi_exec_channel
*src0
,
512 const union tgsi_exec_channel
*src1
)
514 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
515 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
516 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
517 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
522 union tgsi_exec_channel
*dst
,
523 const union tgsi_exec_channel
*src0
,
524 const union tgsi_exec_channel
*src1
)
526 dst
->i
[0] = src0
->i
[0] * src1
->i
[0];
527 dst
->i
[1] = src0
->i
[1] * src1
->i
[1];
528 dst
->i
[2] = src0
->i
[2] * src1
->i
[2];
529 dst
->i
[3] = src0
->i
[3] * src1
->i
[3];
534 union tgsi_exec_channel
*dst0
,
535 union tgsi_exec_channel
*dst1
,
536 const union tgsi_exec_channel
*src0
,
537 const union tgsi_exec_channel
*src1
)
539 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
540 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
541 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
542 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
551 union tgsi_exec_channel
*dst0
,
552 union tgsi_exec_channel
*dst1
,
553 const union tgsi_exec_channel
*src0
,
554 const union tgsi_exec_channel
*src1
)
556 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
557 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
558 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
559 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
568 union tgsi_exec_channel
*dst
,
569 const union tgsi_exec_channel
*src0
,
570 const union tgsi_exec_channel
*src1
,
571 const union tgsi_exec_channel
*src2
)
573 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
574 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
575 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
576 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
581 union tgsi_exec_channel
*dst
,
582 const union tgsi_exec_channel
*src
)
584 dst
->f
[0] = -src
->f
[0];
585 dst
->f
[1] = -src
->f
[1];
586 dst
->f
[2] = -src
->f
[2];
587 dst
->f
[3] = -src
->f
[3];
592 union tgsi_exec_channel
*dst
,
593 const union tgsi_exec_channel
*src
)
595 dst
->i
[0] = -src
->i
[0];
596 dst
->i
[1] = -src
->i
[1];
597 dst
->i
[2] = -src
->i
[2];
598 dst
->i
[3] = -src
->i
[3];
603 union tgsi_exec_channel
*dst
,
604 const union tgsi_exec_channel
*src
)
606 dst
->u
[0] = ~src
->u
[0];
607 dst
->u
[1] = ~src
->u
[1];
608 dst
->u
[2] = ~src
->u
[2];
609 dst
->u
[3] = ~src
->u
[3];
614 union tgsi_exec_channel
*dst
,
615 const union tgsi_exec_channel
*src0
,
616 const union tgsi_exec_channel
*src1
)
618 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
619 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
620 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
621 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
626 union tgsi_exec_channel
*dst
,
627 const union tgsi_exec_channel
*src0
,
628 const union tgsi_exec_channel
*src1
)
630 dst
->f
[0] = (GLfloat
) pow( (GLdouble
) src0
->f
[0], (GLdouble
) src1
->f
[0] );
631 dst
->f
[1] = (GLfloat
) pow( (GLdouble
) src0
->f
[1], (GLdouble
) src1
->f
[1] );
632 dst
->f
[2] = (GLfloat
) pow( (GLdouble
) src0
->f
[2], (GLdouble
) src1
->f
[2] );
633 dst
->f
[3] = (GLfloat
) pow( (GLdouble
) src0
->f
[3], (GLdouble
) src1
->f
[3] );
638 union tgsi_exec_channel
*dst
,
639 const union tgsi_exec_channel
*src
)
641 dst
->f
[0] = (GLfloat
) floor( (GLdouble
) (src
->f
[0] + 0.5f
) );
642 dst
->f
[1] = (GLfloat
) floor( (GLdouble
) (src
->f
[1] + 0.5f
) );
643 dst
->f
[2] = (GLfloat
) floor( (GLdouble
) (src
->f
[2] + 0.5f
) );
644 dst
->f
[3] = (GLfloat
) floor( (GLdouble
) (src
->f
[3] + 0.5f
) );
649 union tgsi_exec_channel
*dst
,
650 const union tgsi_exec_channel
*src0
,
651 const union tgsi_exec_channel
*src1
)
653 dst
->i
[0] = src0
->i
[0] << src1
->i
[0];
654 dst
->i
[1] = src0
->i
[1] << src1
->i
[1];
655 dst
->i
[2] = src0
->i
[2] << src1
->i
[2];
656 dst
->i
[3] = src0
->i
[3] << src1
->i
[3];
661 union tgsi_exec_channel
*dst
,
662 const union tgsi_exec_channel
*src0
,
663 const union tgsi_exec_channel
*src1
)
665 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
666 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
667 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
668 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
673 union tgsi_exec_channel
*dst
,
674 const union tgsi_exec_channel
*src0
,
675 const union tgsi_exec_channel
*src1
)
677 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
678 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
679 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
680 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
685 union tgsi_exec_channel
*dst
,
686 const union tgsi_exec_channel
*src
)
688 dst
->f
[0] = (GLfloat
) sin( (GLdouble
) src
->f
[0] );
689 dst
->f
[1] = (GLfloat
) sin( (GLdouble
) src
->f
[1] );
690 dst
->f
[2] = (GLfloat
) sin( (GLdouble
) src
->f
[2] );
691 dst
->f
[3] = (GLfloat
) sin( (GLdouble
) src
->f
[3] );
695 micro_sqrt( union tgsi_exec_channel
*dst
,
696 const union tgsi_exec_channel
*src
)
698 dst
->f
[0] = (GLfloat
) sqrt( (GLdouble
) src
->f
[0] );
699 dst
->f
[1] = (GLfloat
) sqrt( (GLdouble
) src
->f
[1] );
700 dst
->f
[2] = (GLfloat
) sqrt( (GLdouble
) src
->f
[2] );
701 dst
->f
[3] = (GLfloat
) sqrt( (GLdouble
) src
->f
[3] );
706 union tgsi_exec_channel
*dst
,
707 const union tgsi_exec_channel
*src0
,
708 const union tgsi_exec_channel
*src1
)
710 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
711 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
712 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
713 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
718 union tgsi_exec_channel
*dst
,
719 const union tgsi_exec_channel
*src
)
721 dst
->f
[0] = (GLfloat
) src
->u
[0];
722 dst
->f
[1] = (GLfloat
) src
->u
[1];
723 dst
->f
[2] = (GLfloat
) src
->u
[2];
724 dst
->f
[3] = (GLfloat
) src
->u
[3];
729 union tgsi_exec_channel
*dst
,
730 const union tgsi_exec_channel
*src0
,
731 const union tgsi_exec_channel
*src1
)
733 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
734 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
735 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
736 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
740 fetch_src_file_channel(
741 const struct tgsi_exec_machine
*mach
,
743 const GLuint swizzle
,
744 const union tgsi_exec_channel
*index
,
745 union tgsi_exec_channel
*chan
)
748 case TGSI_EXTSWIZZLE_X
:
749 case TGSI_EXTSWIZZLE_Y
:
750 case TGSI_EXTSWIZZLE_Z
:
751 case TGSI_EXTSWIZZLE_W
:
753 case TGSI_FILE_CONSTANT
:
754 chan
->f
[0] = mach
->Consts
[index
->i
[0]][swizzle
];
755 chan
->f
[1] = mach
->Consts
[index
->i
[1]][swizzle
];
756 chan
->f
[2] = mach
->Consts
[index
->i
[2]][swizzle
];
757 chan
->f
[3] = mach
->Consts
[index
->i
[3]][swizzle
];
760 case TGSI_FILE_INPUT
:
761 chan
->u
[0] = mach
->Inputs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
762 chan
->u
[1] = mach
->Inputs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
763 chan
->u
[2] = mach
->Inputs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
764 chan
->u
[3] = mach
->Inputs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
767 case TGSI_FILE_TEMPORARY
:
768 chan
->u
[0] = mach
->Temps
[index
->i
[0]].xyzw
[swizzle
].u
[0];
769 chan
->u
[1] = mach
->Temps
[index
->i
[1]].xyzw
[swizzle
].u
[1];
770 chan
->u
[2] = mach
->Temps
[index
->i
[2]].xyzw
[swizzle
].u
[2];
771 chan
->u
[3] = mach
->Temps
[index
->i
[3]].xyzw
[swizzle
].u
[3];
774 case TGSI_FILE_IMMEDIATE
:
775 assert( index
->i
[0] < (GLint
) mach
->ImmLimit
);
776 chan
->f
[0] = mach
->Imms
[index
->i
[0]][swizzle
];
777 assert( index
->i
[1] < (GLint
) mach
->ImmLimit
);
778 chan
->f
[1] = mach
->Imms
[index
->i
[1]][swizzle
];
779 assert( index
->i
[2] < (GLint
) mach
->ImmLimit
);
780 chan
->f
[2] = mach
->Imms
[index
->i
[2]][swizzle
];
781 assert( index
->i
[3] < (GLint
) mach
->ImmLimit
);
782 chan
->f
[3] = mach
->Imms
[index
->i
[3]][swizzle
];
785 case TGSI_FILE_ADDRESS
:
786 chan
->u
[0] = mach
->Addrs
[index
->i
[0]].xyzw
[swizzle
].u
[0];
787 chan
->u
[1] = mach
->Addrs
[index
->i
[1]].xyzw
[swizzle
].u
[1];
788 chan
->u
[2] = mach
->Addrs
[index
->i
[2]].xyzw
[swizzle
].u
[2];
789 chan
->u
[3] = mach
->Addrs
[index
->i
[3]].xyzw
[swizzle
].u
[3];
797 case TGSI_EXTSWIZZLE_ZERO
:
798 *chan
= mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
];
801 case TGSI_EXTSWIZZLE_ONE
:
802 *chan
= mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
];
812 const struct tgsi_exec_machine
*mach
,
813 union tgsi_exec_channel
*chan
,
814 const struct tgsi_full_src_register
*reg
,
815 const GLuint chan_index
)
817 union tgsi_exec_channel index
;
823 index
.i
[3] = reg
->SrcRegister
.Index
;
825 if (reg
->SrcRegister
.Indirect
) {
826 union tgsi_exec_channel index2
;
827 union tgsi_exec_channel indir_index
;
832 index2
.i
[3] = reg
->SrcRegisterInd
.Index
;
834 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterInd
, CHAN_X
);
835 fetch_src_file_channel(
837 reg
->SrcRegisterInd
.File
,
842 index
.i
[0] += indir_index
.i
[0];
843 index
.i
[1] += indir_index
.i
[1];
844 index
.i
[2] += indir_index
.i
[2];
845 index
.i
[3] += indir_index
.i
[3];
848 if( reg
->SrcRegister
.Dimension
) {
849 switch( reg
->SrcRegister
.File
) {
850 case TGSI_FILE_INPUT
:
856 case TGSI_FILE_CONSTANT
:
866 index
.i
[0] += reg
->SrcRegisterDim
.Index
;
867 index
.i
[1] += reg
->SrcRegisterDim
.Index
;
868 index
.i
[2] += reg
->SrcRegisterDim
.Index
;
869 index
.i
[3] += reg
->SrcRegisterDim
.Index
;
871 if (reg
->SrcRegisterDim
.Indirect
) {
872 union tgsi_exec_channel index2
;
873 union tgsi_exec_channel indir_index
;
878 index2
.i
[3] = reg
->SrcRegisterDimInd
.Index
;
880 swizzle
= tgsi_util_get_src_register_swizzle( ®
->SrcRegisterDimInd
, CHAN_X
);
881 fetch_src_file_channel(
883 reg
->SrcRegisterDimInd
.File
,
888 index
.i
[0] += indir_index
.i
[0];
889 index
.i
[1] += indir_index
.i
[1];
890 index
.i
[2] += indir_index
.i
[2];
891 index
.i
[3] += indir_index
.i
[3];
895 swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
896 fetch_src_file_channel(
898 reg
->SrcRegister
.File
,
903 switch (tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
)) {
904 case TGSI_UTIL_SIGN_CLEAR
:
905 micro_abs( chan
, chan
);
908 case TGSI_UTIL_SIGN_SET
:
909 micro_abs( chan
, chan
);
910 micro_neg( chan
, chan
);
913 case TGSI_UTIL_SIGN_TOGGLE
:
914 micro_neg( chan
, chan
);
917 case TGSI_UTIL_SIGN_KEEP
:
924 struct tgsi_exec_machine
*mach
,
925 const union tgsi_exec_channel
*chan
,
926 const struct tgsi_full_dst_register
*reg
,
927 const struct tgsi_full_instruction
*inst
,
930 union tgsi_exec_channel
*dst
;
932 switch( reg
->DstRegister
.File
) {
936 case TGSI_FILE_OUTPUT
:
937 dst
= &mach
->Outputs
[mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] + reg
->DstRegister
.Index
].xyzw
[chan_index
];
940 case TGSI_FILE_TEMPORARY
:
941 dst
= &mach
->Temps
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
944 case TGSI_FILE_ADDRESS
:
945 dst
= &mach
->Addrs
[reg
->DstRegister
.Index
].xyzw
[chan_index
];
952 switch (inst
->Instruction
.Saturate
)
958 case TGSI_SAT_ZERO_ONE
:
959 micro_lt( dst
, chan
, &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], chan
);
960 micro_lt( dst
, chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], chan
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
963 case TGSI_SAT_MINUS_PLUS_ONE
:
972 #define FETCH(VAL,INDEX,CHAN)\
973 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
975 #define STORE(VAL,INDEX,CHAN)\
976 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN)
979 exec_kil (struct tgsi_exec_machine
*mach
,
980 const struct tgsi_full_instruction
*inst
)
985 union tgsi_exec_channel r
[1];
987 /* This mask stores component bits that were already tested. Note that
988 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
990 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
992 for (chan_index
= 0; chan_index
< 4; chan_index
++)
997 /* unswizzle channel */
998 swizzle
= tgsi_util_get_full_src_register_extswizzle (
999 &inst
->FullSrcRegisters
[0],
1002 /* check if the component has not been already tested */
1003 if (uniquemask
& (1 << swizzle
))
1005 uniquemask
|= 1 << swizzle
;
1007 FETCH(&r
[0], 0, chan_index
);
1008 for (i
= 0; i
< 4; i
++)
1009 if (r
[0].f
[i
] < 0.0f
)
1010 kilmask
|= 1 << (i
* 4);
1013 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1018 * Fetch a texel using S texture coordinate.
1021 fetch_texel_1d( GLcontext
*ctx
,
1022 struct tgsi_sampler_state
*sampler
,
1023 const union tgsi_exec_channel
*s
,
1025 union tgsi_exec_channel
*r
,
1026 union tgsi_exec_channel
*g
,
1027 union tgsi_exec_channel
*b
,
1028 union tgsi_exec_channel
*a
)
1030 SWcontext
*swrast
= SWRAST_CONTEXT(ctx
);
1031 GLuint fragment_index
;
1036 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++)
1038 stpq
[fragment_index
][0] = s
->f
[fragment_index
];
1041 if (sampler
->NeedLambda
)
1043 GLfloat dsdx
= s
->f
[TILE_BOTTOM_RIGHT
] - s
->f
[TILE_BOTTOM_LEFT
];
1044 GLfloat dsdy
= s
->f
[TILE_TOP_LEFT
] - s
->f
[TILE_BOTTOM_LEFT
];
1046 GLfloat rho
, lambda
;
1051 rho
= MAX2(dsdx
, dsdy
) * sampler
->ImageWidth
;
1055 if (sampler
->NeedLodBias
)
1056 lambda
+= sampler
->LodBias
;
1058 if (sampler
->NeedLambdaClamp
)
1059 lambda
= CLAMP(lambda
, sampler
->MinLod
, sampler
->MaxLod
);
1061 /* XXX: Use the same lambda value throughout the tile. Could
1062 * end up with four unique values by recalculating partial
1063 * derivs in the other row and column, and calculating lambda
1064 * using the dx and dy values appropriate for each fragment in
1070 lambdas
[3] = lambda
;
1073 if (!swrast
->TextureSample
[unit
]) {
1074 _swrast_update_texture_samplers(ctx
);
1077 /* XXX use a float-valued TextureSample routine here!!! */
1078 swrast
->TextureSample
[unit
] (ctx
,
1079 ctx
->Texture
.Unit
[unit
]._Current
,
1081 (const GLfloat (*)[4])stpq
,
1085 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++)
1087 r
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][0]);
1088 g
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][1]);
1089 b
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][2]);
1090 a
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][3]);
1095 * Fetch a texel using ST texture coordinates.
1098 fetch_texel_2d( GLcontext
*ctx
,
1099 struct tgsi_sampler_state
*sampler
,
1100 const union tgsi_exec_channel
*s
,
1101 const union tgsi_exec_channel
*t
,
1103 union tgsi_exec_channel
*r
,
1104 union tgsi_exec_channel
*g
,
1105 union tgsi_exec_channel
*b
,
1106 union tgsi_exec_channel
*a
)
1108 SWcontext
*swrast
= SWRAST_CONTEXT( ctx
);
1109 GLuint fragment_index
;
1114 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++) {
1115 stpq
[fragment_index
][0] = s
->f
[fragment_index
];
1116 stpq
[fragment_index
][1] = t
->f
[fragment_index
];
1119 if (sampler
->NeedLambda
) {
1120 GLfloat dsdx
= s
->f
[TILE_BOTTOM_RIGHT
] - s
->f
[TILE_BOTTOM_LEFT
];
1121 GLfloat dsdy
= s
->f
[TILE_TOP_LEFT
] - s
->f
[TILE_BOTTOM_LEFT
];
1123 GLfloat dtdx
= t
->f
[TILE_BOTTOM_RIGHT
] - t
->f
[TILE_BOTTOM_LEFT
];
1124 GLfloat dtdy
= t
->f
[TILE_TOP_LEFT
] - t
->f
[TILE_BOTTOM_LEFT
];
1126 GLfloat maxU
, maxV
, rho
, lambda
;
1128 dsdx
= FABSF( dsdx
);
1129 dsdy
= FABSF( dsdy
);
1130 dtdx
= FABSF( dtdx
);
1131 dtdy
= FABSF( dtdy
);
1133 maxU
= MAX2( dsdx
, dsdy
) * sampler
->ImageWidth
;
1134 maxV
= MAX2( dtdx
, dtdy
) * sampler
->ImageHeight
;
1136 rho
= MAX2( maxU
, maxV
);
1138 lambda
= LOG2( rho
);
1140 if (sampler
->NeedLodBias
)
1141 lambda
+= sampler
->LodBias
;
1143 if (sampler
->NeedLambdaClamp
)
1149 /* XXX: Use the same lambda value throughout the tile. Could
1150 * end up with four unique values by recalculating partial
1151 * derivs in the other row and column, and calculating lambda
1152 * using the dx and dy values appropriate for each fragment in
1158 lambdas
[3] = lambda
;
1161 if (!swrast
->TextureSample
[unit
]) {
1162 _swrast_update_texture_samplers(ctx
);
1165 /* XXX use a float-valued TextureSample routine here!!! */
1166 swrast
->TextureSample
[unit
](
1168 ctx
->Texture
.Unit
[unit
]._Current
,
1170 (const GLfloat (*)[4]) stpq
,
1174 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++) {
1175 r
->f
[fragment_index
] = CHAN_TO_FLOAT( rgba
[fragment_index
][0] );
1176 g
->f
[fragment_index
] = CHAN_TO_FLOAT( rgba
[fragment_index
][1] );
1177 b
->f
[fragment_index
] = CHAN_TO_FLOAT( rgba
[fragment_index
][2] );
1178 a
->f
[fragment_index
] = CHAN_TO_FLOAT( rgba
[fragment_index
][3] );
1183 * Fetch a texel using STR texture coordinates.
1186 fetch_texel_3d( GLcontext
*ctx
,
1187 struct tgsi_sampler_state
*sampler
,
1188 const union tgsi_exec_channel
*s
,
1189 const union tgsi_exec_channel
*t
,
1190 const union tgsi_exec_channel
*p
,
1192 union tgsi_exec_channel
*r
,
1193 union tgsi_exec_channel
*g
,
1194 union tgsi_exec_channel
*b
,
1195 union tgsi_exec_channel
*a
)
1197 SWcontext
*swrast
= SWRAST_CONTEXT(ctx
);
1198 GLuint fragment_index
;
1203 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++)
1205 stpq
[fragment_index
][0] = s
->f
[fragment_index
];
1206 stpq
[fragment_index
][1] = t
->f
[fragment_index
];
1207 stpq
[fragment_index
][2] = p
->f
[fragment_index
];
1210 if (sampler
->NeedLambda
)
1212 GLfloat dsdx
= s
->f
[TILE_BOTTOM_RIGHT
] - s
->f
[TILE_BOTTOM_LEFT
];
1213 GLfloat dsdy
= s
->f
[TILE_TOP_LEFT
] - s
->f
[TILE_BOTTOM_LEFT
];
1215 GLfloat dtdx
= t
->f
[TILE_BOTTOM_RIGHT
] - t
->f
[TILE_BOTTOM_LEFT
];
1216 GLfloat dtdy
= t
->f
[TILE_TOP_LEFT
] - t
->f
[TILE_BOTTOM_LEFT
];
1218 GLfloat dpdx
= p
->f
[TILE_BOTTOM_RIGHT
] - p
->f
[TILE_BOTTOM_LEFT
];
1219 GLfloat dpdy
= p
->f
[TILE_TOP_LEFT
] - p
->f
[TILE_BOTTOM_LEFT
];
1221 GLfloat maxU
, maxV
, maxW
, rho
, lambda
;
1230 maxU
= MAX2(dsdx
, dsdy
) * sampler
->ImageWidth
;
1231 maxV
= MAX2(dtdx
, dtdy
) * sampler
->ImageHeight
;
1232 maxW
= MAX2(dpdx
, dpdy
) * sampler
->ImageDepth
;
1234 rho
= MAX2(maxU
, MAX2(maxV
, maxW
));
1238 if (sampler
->NeedLodBias
)
1239 lambda
+= sampler
->LodBias
;
1241 if (sampler
->NeedLambdaClamp
)
1242 lambda
= CLAMP(lambda
, sampler
->MinLod
, sampler
->MaxLod
);
1244 /* XXX: Use the same lambda value throughout the tile. Could
1245 * end up with four unique values by recalculating partial
1246 * derivs in the other row and column, and calculating lambda
1247 * using the dx and dy values appropriate for each fragment in
1253 lambdas
[3] = lambda
;
1256 if (!swrast
->TextureSample
[unit
]) {
1257 _swrast_update_texture_samplers(ctx
);
1260 /* XXX use a float-valued TextureSample routine here!!! */
1261 swrast
->TextureSample
[unit
] (ctx
,
1262 ctx
->Texture
.Unit
[unit
]._Current
,
1264 (const GLfloat (*)[4])stpq
,
1268 for (fragment_index
= 0; fragment_index
< 4; fragment_index
++)
1270 r
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][0]);
1271 g
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][1]);
1272 b
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][2]);
1273 a
->f
[fragment_index
] = CHAN_TO_FLOAT(rgba
[fragment_index
][3]);
1281 struct tgsi_exec_labels
*labels
)
1285 for( i
= 0; i
< labels
->count
; i
++ ) {
1286 if( labels
->labels
[i
][0] == label
) {
1287 return labels
->labels
[i
][1];
1296 struct tgsi_exec_machine
*mach
,
1297 const struct tgsi_full_instruction
*inst
,
1298 struct tgsi_exec_labels
*labels
,
1299 GLuint
*programCounter
)
1302 GET_CURRENT_CONTEXT(ctx
);
1305 union tgsi_exec_channel r
[8];
1307 switch (inst
->Instruction
.Opcode
) {
1308 case TGSI_OPCODE_ARL
:
1309 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1310 FETCH( &r
[0], 0, chan_index
);
1311 micro_f2it( &r
[0], &r
[0] );
1312 STORE( &r
[0], 0, chan_index
);
1316 case TGSI_OPCODE_MOV
:
1317 /* TGSI_OPCODE_SWZ */
1318 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1319 FETCH( &r
[0], 0, chan_index
);
1320 STORE( &r
[0], 0, chan_index
);
1324 case TGSI_OPCODE_LIT
:
1325 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1326 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1329 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1330 FETCH( &r
[0], 0, CHAN_X
);
1331 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1332 micro_max( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1333 STORE( &r
[0], 0, CHAN_Y
);
1336 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1337 FETCH( &r
[1], 0, CHAN_Y
);
1338 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1340 FETCH( &r
[2], 0, CHAN_W
);
1341 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
1342 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
1343 micro_pow( &r
[1], &r
[1], &r
[2] );
1344 micro_lt( &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1345 STORE( &r
[0], 0, CHAN_Z
);
1349 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1350 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1354 case TGSI_OPCODE_RCP
:
1355 /* TGSI_OPCODE_RECIP */
1356 FETCH( &r
[0], 0, CHAN_X
);
1357 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1358 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1359 STORE( &r
[0], 0, chan_index
);
1363 case TGSI_OPCODE_RSQ
:
1364 /* TGSI_OPCODE_RECIPSQRT */
1365 FETCH( &r
[0], 0, CHAN_X
);
1366 micro_sqrt( &r
[0], &r
[0] );
1367 micro_div( &r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0] );
1368 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1369 STORE( &r
[0], 0, chan_index
);
1373 case TGSI_OPCODE_EXP
:
1377 case TGSI_OPCODE_LOG
:
1381 case TGSI_OPCODE_MUL
:
1382 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
)
1384 FETCH(&r
[0], 0, chan_index
);
1385 FETCH(&r
[1], 1, chan_index
);
1387 micro_mul( &r
[0], &r
[0], &r
[1] );
1389 STORE(&r
[0], 0, chan_index
);
1393 case TGSI_OPCODE_ADD
:
1394 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1395 FETCH( &r
[0], 0, chan_index
);
1396 FETCH( &r
[1], 1, chan_index
);
1397 micro_add( &r
[0], &r
[0], &r
[1] );
1398 STORE( &r
[0], 0, chan_index
);
1402 case TGSI_OPCODE_DP3
:
1403 /* TGSI_OPCODE_DOT3 */
1404 FETCH( &r
[0], 0, CHAN_X
);
1405 FETCH( &r
[1], 1, CHAN_X
);
1406 micro_mul( &r
[0], &r
[0], &r
[1] );
1408 FETCH( &r
[1], 0, CHAN_Y
);
1409 FETCH( &r
[2], 1, CHAN_Y
);
1410 micro_mul( &r
[1], &r
[1], &r
[2] );
1411 micro_add( &r
[0], &r
[0], &r
[1] );
1413 FETCH( &r
[1], 0, CHAN_Z
);
1414 FETCH( &r
[2], 1, CHAN_Z
);
1415 micro_mul( &r
[1], &r
[1], &r
[2] );
1416 micro_add( &r
[0], &r
[0], &r
[1] );
1418 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1419 STORE( &r
[0], 0, chan_index
);
1423 case TGSI_OPCODE_DP4
:
1424 /* TGSI_OPCODE_DOT4 */
1425 FETCH(&r
[0], 0, CHAN_X
);
1426 FETCH(&r
[1], 1, CHAN_X
);
1428 micro_mul( &r
[0], &r
[0], &r
[1] );
1430 FETCH(&r
[1], 0, CHAN_Y
);
1431 FETCH(&r
[2], 1, CHAN_Y
);
1433 micro_mul( &r
[1], &r
[1], &r
[2] );
1434 micro_add( &r
[0], &r
[0], &r
[1] );
1436 FETCH(&r
[1], 0, CHAN_Z
);
1437 FETCH(&r
[2], 1, CHAN_Z
);
1439 micro_mul( &r
[1], &r
[1], &r
[2] );
1440 micro_add( &r
[0], &r
[0], &r
[1] );
1442 FETCH(&r
[1], 0, CHAN_W
);
1443 FETCH(&r
[2], 1, CHAN_W
);
1445 micro_mul( &r
[1], &r
[1], &r
[2] );
1446 micro_add( &r
[0], &r
[0], &r
[1] );
1448 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1449 STORE( &r
[0], 0, chan_index
);
1453 case TGSI_OPCODE_DST
:
1454 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1455 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
1458 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1459 FETCH( &r
[0], 0, CHAN_Y
);
1460 FETCH( &r
[1], 1, CHAN_Y
);
1461 micro_mul( &r
[0], &r
[0], &r
[1] );
1462 STORE( &r
[0], 0, CHAN_Y
);
1465 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1466 FETCH( &r
[0], 0, CHAN_Z
);
1467 STORE( &r
[0], 0, CHAN_Z
);
1470 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1471 FETCH( &r
[0], 1, CHAN_W
);
1472 STORE( &r
[0], 0, CHAN_W
);
1476 case TGSI_OPCODE_MIN
:
1477 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1478 FETCH(&r
[0], 0, chan_index
);
1479 FETCH(&r
[1], 1, chan_index
);
1481 micro_lt( &r
[0], &r
[0], &r
[1], &r
[0], &r
[1] );
1483 STORE(&r
[0], 0, chan_index
);
1487 case TGSI_OPCODE_MAX
:
1488 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1489 FETCH(&r
[0], 0, chan_index
);
1490 FETCH(&r
[1], 1, chan_index
);
1492 micro_lt( &r
[0], &r
[0], &r
[1], &r
[1], &r
[0] );
1494 STORE(&r
[0], 0, chan_index
);
1498 case TGSI_OPCODE_SLT
:
1499 /* TGSI_OPCODE_SETLT */
1500 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1501 FETCH( &r
[0], 0, chan_index
);
1502 FETCH( &r
[1], 1, chan_index
);
1503 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
1504 STORE( &r
[0], 0, chan_index
);
1508 case TGSI_OPCODE_SGE
:
1509 /* TGSI_OPCODE_SETGE */
1510 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1511 FETCH(&r
[0], 0, chan_index
);
1512 FETCH(&r
[1], 1, chan_index
);
1514 micro_lt( &r
[0], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
] );
1516 STORE(&r
[0], 0, chan_index
);
1520 case TGSI_OPCODE_MAD
:
1521 /* TGSI_OPCODE_MADD */
1522 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1523 FETCH( &r
[0], 0, chan_index
);
1524 FETCH( &r
[1], 1, chan_index
);
1525 micro_mul( &r
[0], &r
[0], &r
[1] );
1526 FETCH( &r
[1], 2, chan_index
);
1527 micro_add( &r
[0], &r
[0], &r
[1] );
1528 STORE( &r
[0], 0, chan_index
);
1532 case TGSI_OPCODE_SUB
:
1533 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1534 FETCH(&r
[0], 0, chan_index
);
1535 FETCH(&r
[1], 1, chan_index
);
1537 micro_sub( &r
[0], &r
[0], &r
[1] );
1539 STORE(&r
[0], 0, chan_index
);
1543 case TGSI_OPCODE_LERP
:
1544 /* TGSI_OPCODE_LRP */
1545 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1546 FETCH(&r
[0], 0, chan_index
);
1547 FETCH(&r
[1], 1, chan_index
);
1548 FETCH(&r
[2], 2, chan_index
);
1550 micro_sub( &r
[1], &r
[1], &r
[2] );
1551 micro_mul( &r
[0], &r
[0], &r
[1] );
1552 micro_add( &r
[0], &r
[0], &r
[2] );
1554 STORE(&r
[0], 0, chan_index
);
1558 case TGSI_OPCODE_CND
:
1562 case TGSI_OPCODE_CND0
:
1566 case TGSI_OPCODE_DOT2ADD
:
1567 /* TGSI_OPCODE_DP2A */
1571 case TGSI_OPCODE_INDEX
:
1575 case TGSI_OPCODE_NEGATE
:
1579 case TGSI_OPCODE_FRAC
:
1580 /* TGSI_OPCODE_FRC */
1581 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1582 FETCH( &r
[0], 0, chan_index
);
1583 micro_frc( &r
[0], &r
[0] );
1584 STORE( &r
[0], 0, chan_index
);
1588 case TGSI_OPCODE_CLAMP
:
1592 case TGSI_OPCODE_FLOOR
:
1593 /* TGSI_OPCODE_FLR */
1594 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1595 FETCH( &r
[0], 0, chan_index
);
1596 micro_flr( &r
[0], &r
[0] );
1597 STORE( &r
[0], 0, chan_index
);
1601 case TGSI_OPCODE_ROUND
:
1602 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1603 FETCH( &r
[0], 0, chan_index
);
1604 micro_rnd( &r
[0], &r
[0] );
1605 STORE( &r
[0], 0, chan_index
);
1609 case TGSI_OPCODE_EXPBASE2
:
1610 /* TGSI_OPCODE_EX2 */
1611 FETCH(&r
[0], 0, CHAN_X
);
1613 micro_pow( &r
[0], &mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
], &r
[0] );
1615 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1616 STORE( &r
[0], 0, chan_index
);
1620 case TGSI_OPCODE_LOGBASE2
:
1621 /* TGSI_OPCODE_LG2 */
1622 FETCH( &r
[0], 0, CHAN_X
);
1623 micro_lg2( &r
[0], &r
[0] );
1624 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1625 STORE( &r
[0], 0, chan_index
);
1629 case TGSI_OPCODE_POWER
:
1630 /* TGSI_OPCODE_POW */
1631 FETCH(&r
[0], 0, CHAN_X
);
1632 FETCH(&r
[1], 1, CHAN_X
);
1634 micro_pow( &r
[0], &r
[0], &r
[1] );
1636 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1637 STORE( &r
[0], 0, chan_index
);
1641 case TGSI_OPCODE_CROSSPRODUCT
:
1642 /* TGSI_OPCODE_XPD */
1643 FETCH(&r
[0], 0, CHAN_Y
);
1644 FETCH(&r
[1], 1, CHAN_Z
);
1646 micro_mul( &r
[2], &r
[0], &r
[1] );
1648 FETCH(&r
[3], 0, CHAN_Z
);
1649 FETCH(&r
[4], 1, CHAN_Y
);
1651 micro_mul( &r
[5], &r
[3], &r
[4] );
1652 micro_sub( &r
[2], &r
[2], &r
[5] );
1654 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
1655 STORE( &r
[2], 0, CHAN_X
);
1658 FETCH(&r
[2], 1, CHAN_X
);
1660 micro_mul( &r
[3], &r
[3], &r
[2] );
1662 FETCH(&r
[5], 0, CHAN_X
);
1664 micro_mul( &r
[1], &r
[1], &r
[5] );
1665 micro_sub( &r
[3], &r
[3], &r
[1] );
1667 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
1668 STORE( &r
[3], 0, CHAN_Y
);
1671 micro_mul( &r
[5], &r
[5], &r
[4] );
1672 micro_mul( &r
[0], &r
[0], &r
[2] );
1673 micro_sub( &r
[5], &r
[5], &r
[0] );
1675 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
1676 STORE( &r
[5], 0, CHAN_Z
);
1679 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
1680 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1684 case TGSI_OPCODE_MULTIPLYMATRIX
:
1688 case TGSI_OPCODE_ABS
:
1689 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1690 FETCH(&r
[0], 0, chan_index
);
1692 micro_abs( &r
[0], &r
[0] );
1694 STORE(&r
[0], 0, chan_index
);
1698 case TGSI_OPCODE_RCC
:
1702 case TGSI_OPCODE_DPH
:
1703 FETCH(&r
[0], 0, CHAN_X
);
1704 FETCH(&r
[1], 1, CHAN_X
);
1706 micro_mul( &r
[0], &r
[0], &r
[1] );
1708 FETCH(&r
[1], 0, CHAN_Y
);
1709 FETCH(&r
[2], 1, CHAN_Y
);
1711 micro_mul( &r
[1], &r
[1], &r
[2] );
1712 micro_add( &r
[0], &r
[0], &r
[1] );
1714 FETCH(&r
[1], 0, CHAN_Z
);
1715 FETCH(&r
[2], 1, CHAN_Z
);
1717 micro_mul( &r
[1], &r
[1], &r
[2] );
1718 micro_add( &r
[0], &r
[0], &r
[1] );
1720 FETCH(&r
[1], 1, CHAN_W
);
1722 micro_add( &r
[0], &r
[0], &r
[1] );
1724 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1725 STORE( &r
[0], 0, chan_index
);
1729 case TGSI_OPCODE_COS
:
1730 FETCH(&r
[0], 0, CHAN_X
);
1732 micro_cos( &r
[0], &r
[0] );
1734 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1735 STORE( &r
[0], 0, chan_index
);
1739 case TGSI_OPCODE_DDX
:
1740 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1741 FETCH( &r
[0], 0, chan_index
);
1742 micro_ddx( &r
[0], &r
[0] );
1743 STORE( &r
[0], 0, chan_index
);
1747 case TGSI_OPCODE_DDY
:
1748 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1749 FETCH( &r
[0], 0, chan_index
);
1750 micro_ddy( &r
[0], &r
[0] );
1751 STORE( &r
[0], 0, chan_index
);
1755 case TGSI_OPCODE_KIL
:
1756 exec_kil (mach
, inst
);
1759 case TGSI_OPCODE_PK2H
:
1763 case TGSI_OPCODE_PK2US
:
1767 case TGSI_OPCODE_PK4B
:
1771 case TGSI_OPCODE_PK4UB
:
1775 case TGSI_OPCODE_RFL
:
1779 case TGSI_OPCODE_SEQ
:
1783 case TGSI_OPCODE_SFL
:
1787 case TGSI_OPCODE_SGT
:
1791 case TGSI_OPCODE_SIN
:
1792 FETCH(&r
[0], 0, CHAN_X
);
1794 micro_sin( &r
[0], &r
[0] );
1796 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1797 STORE( &r
[0], 0, chan_index
);
1801 case TGSI_OPCODE_SLE
:
1805 case TGSI_OPCODE_SNE
:
1809 case TGSI_OPCODE_STR
:
1813 case TGSI_OPCODE_TEX
:
1814 switch (inst
->InstructionExtTexture
.Texture
) {
1815 case TGSI_TEXTURE_1D
:
1817 FETCH(&r
[0], 0, CHAN_X
);
1819 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1820 case TGSI_EXTSWIZZLE_W
:
1821 FETCH(&r
[1], 0, CHAN_W
);
1822 micro_div( &r
[0], &r
[0], &r
[1] );
1825 case TGSI_EXTSWIZZLE_ONE
:
1832 fetch_texel_1d (ctx
,
1833 &mach
->Samplers
[inst
->FullSrcRegisters
[1].SrcRegister
.Index
],
1835 inst
->FullSrcRegisters
[1].SrcRegister
.Index
,
1836 &r
[0], &r
[1], &r
[2], &r
[3]);
1840 case TGSI_TEXTURE_2D
:
1841 case TGSI_TEXTURE_RECT
:
1843 FETCH(&r
[0], 0, CHAN_X
);
1844 FETCH(&r
[1], 0, CHAN_Y
);
1846 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1847 case TGSI_EXTSWIZZLE_W
:
1848 FETCH(&r
[2], 0, CHAN_W
);
1849 micro_div( &r
[0], &r
[0], &r
[2] );
1850 micro_div( &r
[1], &r
[1], &r
[2] );
1853 case TGSI_EXTSWIZZLE_ONE
:
1861 fetch_texel_2d (ctx
,
1862 &mach
->Samplers
[inst
->FullSrcRegisters
[1].SrcRegister
.Index
],
1864 inst
->FullSrcRegisters
[1].SrcRegister
.Index
,
1865 &r
[0], &r
[1], &r
[2], &r
[3]);
1869 case TGSI_TEXTURE_3D
:
1870 case TGSI_TEXTURE_CUBE
:
1872 FETCH(&r
[0], 0, CHAN_X
);
1873 FETCH(&r
[1], 0, CHAN_Y
);
1874 FETCH(&r
[2], 0, CHAN_Z
);
1876 switch (inst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
) {
1877 case TGSI_EXTSWIZZLE_W
:
1878 FETCH(&r
[3], 0, CHAN_W
);
1879 micro_div( &r
[0], &r
[0], &r
[3] );
1880 micro_div( &r
[1], &r
[1], &r
[3] );
1881 micro_div( &r
[2], &r
[2], &r
[3] );
1884 case TGSI_EXTSWIZZLE_ONE
:
1892 fetch_texel_3d (ctx
,
1893 &mach
->Samplers
[inst
->FullSrcRegisters
[1].SrcRegister
.Index
],
1894 &r
[0], &r
[1], &r
[2],
1895 inst
->FullSrcRegisters
[1].SrcRegister
.Index
,
1896 &r
[0], &r
[1], &r
[2], &r
[3]);
1904 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1905 STORE( &r
[chan_index
], 0, chan_index
);
1909 case TGSI_OPCODE_TXD
:
1913 case TGSI_OPCODE_UP2H
:
1917 case TGSI_OPCODE_UP2US
:
1921 case TGSI_OPCODE_UP4B
:
1925 case TGSI_OPCODE_UP4UB
:
1929 case TGSI_OPCODE_X2D
:
1933 case TGSI_OPCODE_ARA
:
1937 case TGSI_OPCODE_ARR
:
1941 case TGSI_OPCODE_BRA
:
1945 case TGSI_OPCODE_CAL
:
1949 case TGSI_OPCODE_RET
:
1950 /* XXX: end of shader! */
1954 case TGSI_OPCODE_SSG
:
1958 case TGSI_OPCODE_CMP
:
1959 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
1960 FETCH(&r
[0], 0, chan_index
);
1961 FETCH(&r
[1], 1, chan_index
);
1962 FETCH(&r
[2], 2, chan_index
);
1964 micro_lt( &r
[0], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2] );
1966 STORE(&r
[0], 0, chan_index
);
1970 case TGSI_OPCODE_SCS
:
1971 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1972 FETCH( &r
[0], 0, CHAN_X
);
1974 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
1975 micro_cos( &r
[1], &r
[0] );
1976 STORE( &r
[1], 0, CHAN_X
);
1978 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1979 micro_sin( &r
[1], &r
[0] );
1980 STORE( &r
[1], 0, CHAN_Y
);
1982 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1983 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
1985 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1986 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
1990 case TGSI_OPCODE_TXB
:
1994 case TGSI_OPCODE_NRM
:
1998 case TGSI_OPCODE_DIV
:
2002 case TGSI_OPCODE_DP2
:
2003 FETCH( &r
[0], 0, CHAN_X
);
2004 FETCH( &r
[1], 1, CHAN_X
);
2005 micro_mul( &r
[0], &r
[0], &r
[1] );
2007 FETCH( &r
[1], 0, CHAN_Y
);
2008 FETCH( &r
[2], 1, CHAN_Y
);
2009 micro_mul( &r
[1], &r
[1], &r
[2] );
2010 micro_add( &r
[0], &r
[0], &r
[1] );
2012 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2013 STORE( &r
[0], 0, chan_index
);
2017 case TGSI_OPCODE_TXL
:
2021 case TGSI_OPCODE_BRK
:
2025 case TGSI_OPCODE_IF
:
2029 case TGSI_OPCODE_LOOP
:
2033 case TGSI_OPCODE_REP
:
2037 case TGSI_OPCODE_ELSE
:
2041 case TGSI_OPCODE_ENDIF
:
2045 case TGSI_OPCODE_ENDLOOP
:
2049 case TGSI_OPCODE_ENDREP
:
2053 case TGSI_OPCODE_PUSHA
:
2057 case TGSI_OPCODE_POPA
:
2061 case TGSI_OPCODE_CEIL
:
2062 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2063 FETCH( &r
[0], 0, chan_index
);
2064 micro_ceil( &r
[0], &r
[0] );
2065 STORE( &r
[0], 0, chan_index
);
2069 case TGSI_OPCODE_I2F
:
2070 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2071 FETCH( &r
[0], 0, chan_index
);
2072 micro_i2f( &r
[0], &r
[0] );
2073 STORE( &r
[0], 0, chan_index
);
2077 case TGSI_OPCODE_NOT
:
2078 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2079 FETCH( &r
[0], 0, chan_index
);
2080 micro_not( &r
[0], &r
[0] );
2081 STORE( &r
[0], 0, chan_index
);
2085 case TGSI_OPCODE_TRUNC
:
2089 case TGSI_OPCODE_SHL
:
2090 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2091 FETCH( &r
[0], 0, chan_index
);
2092 FETCH( &r
[1], 1, chan_index
);
2093 micro_shl( &r
[0], &r
[0], &r
[1] );
2094 STORE( &r
[0], 0, chan_index
);
2098 case TGSI_OPCODE_SHR
:
2099 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2100 FETCH( &r
[0], 0, chan_index
);
2101 FETCH( &r
[1], 1, chan_index
);
2102 micro_ishr( &r
[0], &r
[0], &r
[1] );
2103 STORE( &r
[0], 0, chan_index
);
2107 case TGSI_OPCODE_AND
:
2108 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2109 FETCH( &r
[0], 0, chan_index
);
2110 FETCH( &r
[1], 1, chan_index
);
2111 micro_and( &r
[0], &r
[0], &r
[1] );
2112 STORE( &r
[0], 0, chan_index
);
2116 case TGSI_OPCODE_OR
:
2117 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2118 FETCH( &r
[0], 0, chan_index
);
2119 FETCH( &r
[1], 1, chan_index
);
2120 micro_or( &r
[0], &r
[0], &r
[1] );
2121 STORE( &r
[0], 0, chan_index
);
2125 case TGSI_OPCODE_MOD
:
2129 case TGSI_OPCODE_XOR
:
2130 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2131 FETCH( &r
[0], 0, chan_index
);
2132 FETCH( &r
[1], 1, chan_index
);
2133 micro_xor( &r
[0], &r
[0], &r
[1] );
2134 STORE( &r
[0], 0, chan_index
);
2138 case TGSI_OPCODE_SAD
:
2142 case TGSI_OPCODE_TXF
:
2146 case TGSI_OPCODE_TXQ
:
2150 case TGSI_OPCODE_CONT
:
2154 case TGSI_OPCODE_EMIT
:
2155 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += 16;
2156 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
2159 case TGSI_OPCODE_ENDPRIM
:
2160 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]++;
2161 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]] = 0;
2170 #if !defined(XSTDCALL)
2172 #define XSTDCALL __stdcall
2178 typedef void (XSTDCALL
*fp_function
) (const struct tgsi_exec_vector
*input
,
2179 struct tgsi_exec_vector
*output
,
2180 GLfloat (*constant
)[4],
2181 struct tgsi_exec_vector
*temporary
);
2184 tgsi_exec_machine_run2(
2185 struct tgsi_exec_machine
*mach
,
2186 struct tgsi_exec_labels
*labels
)
2189 GET_CURRENT_CONTEXT(ctx
);
2194 fp_function function
;
2196 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2198 function
= (fp_function
) x86_get_func (&mach
->Function
);
2200 function (mach
->Inputs
,
2205 struct tgsi_parse_context parse
;
2208 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
2209 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
2211 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
2212 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
2213 mach
->Primitives
[0] = 0;
2216 k
= tgsi_parse_init( &parse
, mach
->Tokens
);
2217 if (k
!= TGSI_PARSE_OK
) {
2218 printf("Problem parsing!\n");
2222 while( !tgsi_parse_end_of_tokens( &parse
) ) {
2223 tgsi_parse_token( &parse
);
2224 switch( parse
.FullToken
.Token
.Type
) {
2225 case TGSI_TOKEN_TYPE_DECLARATION
:
2227 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2229 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2230 exec_instruction( mach
, &parse
.FullToken
.FullInstruction
, labels
, &parse
.Position
);
2236 tgsi_parse_free (&parse
);
2240 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2242 * Scale back depth component.
2244 for (i
= 0; i
< 4; i
++)
2245 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;