1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
30 * TGSI interpreter/executor.
32 * Flow control information:
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
73 micro_abs(union tgsi_exec_channel
*dst
,
74 const union tgsi_exec_channel
*src
)
76 dst
->f
[0] = fabsf(src
->f
[0]);
77 dst
->f
[1] = fabsf(src
->f
[1]);
78 dst
->f
[2] = fabsf(src
->f
[2]);
79 dst
->f
[3] = fabsf(src
->f
[3]);
83 micro_arl(union tgsi_exec_channel
*dst
,
84 const union tgsi_exec_channel
*src
)
86 dst
->i
[0] = (int)floorf(src
->f
[0]);
87 dst
->i
[1] = (int)floorf(src
->f
[1]);
88 dst
->i
[2] = (int)floorf(src
->f
[2]);
89 dst
->i
[3] = (int)floorf(src
->f
[3]);
93 micro_arr(union tgsi_exec_channel
*dst
,
94 const union tgsi_exec_channel
*src
)
96 dst
->i
[0] = (int)floorf(src
->f
[0] + 0.5f
);
97 dst
->i
[1] = (int)floorf(src
->f
[1] + 0.5f
);
98 dst
->i
[2] = (int)floorf(src
->f
[2] + 0.5f
);
99 dst
->i
[3] = (int)floorf(src
->f
[3] + 0.5f
);
103 micro_ceil(union tgsi_exec_channel
*dst
,
104 const union tgsi_exec_channel
*src
)
106 dst
->f
[0] = ceilf(src
->f
[0]);
107 dst
->f
[1] = ceilf(src
->f
[1]);
108 dst
->f
[2] = ceilf(src
->f
[2]);
109 dst
->f
[3] = ceilf(src
->f
[3]);
113 micro_cos(union tgsi_exec_channel
*dst
,
114 const union tgsi_exec_channel
*src
)
116 dst
->f
[0] = cosf(src
->f
[0]);
117 dst
->f
[1] = cosf(src
->f
[1]);
118 dst
->f
[2] = cosf(src
->f
[2]);
119 dst
->f
[3] = cosf(src
->f
[3]);
123 micro_ddx(union tgsi_exec_channel
*dst
,
124 const union tgsi_exec_channel
*src
)
129 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
133 micro_ddy(union tgsi_exec_channel
*dst
,
134 const union tgsi_exec_channel
*src
)
139 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
143 micro_exp2(union tgsi_exec_channel
*dst
,
144 const union tgsi_exec_channel
*src
)
147 dst
->f
[0] = util_fast_exp2(src
->f
[0]);
148 dst
->f
[1] = util_fast_exp2(src
->f
[1]);
149 dst
->f
[2] = util_fast_exp2(src
->f
[2]);
150 dst
->f
[3] = util_fast_exp2(src
->f
[3]);
153 /* Inf is okay for this instruction, so clamp it to silence assertions. */
155 union tgsi_exec_channel clamped
;
157 for (i
= 0; i
< 4; i
++) {
158 if (src
->f
[i
] > 127.99999f
) {
159 clamped
.f
[i
] = 127.99999f
;
160 } else if (src
->f
[i
] < -126.99999f
) {
161 clamped
.f
[i
] = -126.99999f
;
163 clamped
.f
[i
] = src
->f
[i
];
169 dst
->f
[0] = powf(2.0f
, src
->f
[0]);
170 dst
->f
[1] = powf(2.0f
, src
->f
[1]);
171 dst
->f
[2] = powf(2.0f
, src
->f
[2]);
172 dst
->f
[3] = powf(2.0f
, src
->f
[3]);
173 #endif /* FAST_MATH */
177 micro_flr(union tgsi_exec_channel
*dst
,
178 const union tgsi_exec_channel
*src
)
180 dst
->f
[0] = floorf(src
->f
[0]);
181 dst
->f
[1] = floorf(src
->f
[1]);
182 dst
->f
[2] = floorf(src
->f
[2]);
183 dst
->f
[3] = floorf(src
->f
[3]);
187 micro_frc(union tgsi_exec_channel
*dst
,
188 const union tgsi_exec_channel
*src
)
190 dst
->f
[0] = src
->f
[0] - floorf(src
->f
[0]);
191 dst
->f
[1] = src
->f
[1] - floorf(src
->f
[1]);
192 dst
->f
[2] = src
->f
[2] - floorf(src
->f
[2]);
193 dst
->f
[3] = src
->f
[3] - floorf(src
->f
[3]);
197 micro_iabs(union tgsi_exec_channel
*dst
,
198 const union tgsi_exec_channel
*src
)
200 dst
->i
[0] = src
->i
[0] >= 0 ? src
->i
[0] : -src
->i
[0];
201 dst
->i
[1] = src
->i
[1] >= 0 ? src
->i
[1] : -src
->i
[1];
202 dst
->i
[2] = src
->i
[2] >= 0 ? src
->i
[2] : -src
->i
[2];
203 dst
->i
[3] = src
->i
[3] >= 0 ? src
->i
[3] : -src
->i
[3];
207 micro_ineg(union tgsi_exec_channel
*dst
,
208 const union tgsi_exec_channel
*src
)
210 dst
->i
[0] = -src
->i
[0];
211 dst
->i
[1] = -src
->i
[1];
212 dst
->i
[2] = -src
->i
[2];
213 dst
->i
[3] = -src
->i
[3];
217 micro_lg2(union tgsi_exec_channel
*dst
,
218 const union tgsi_exec_channel
*src
)
221 dst
->f
[0] = util_fast_log2(src
->f
[0]);
222 dst
->f
[1] = util_fast_log2(src
->f
[1]);
223 dst
->f
[2] = util_fast_log2(src
->f
[2]);
224 dst
->f
[3] = util_fast_log2(src
->f
[3]);
226 dst
->f
[0] = logf(src
->f
[0]) * 1.442695f
;
227 dst
->f
[1] = logf(src
->f
[1]) * 1.442695f
;
228 dst
->f
[2] = logf(src
->f
[2]) * 1.442695f
;
229 dst
->f
[3] = logf(src
->f
[3]) * 1.442695f
;
234 micro_lrp(union tgsi_exec_channel
*dst
,
235 const union tgsi_exec_channel
*src
)
237 dst
->f
[0] = src
[0].f
[0] * (src
[1].f
[0] - src
[2].f
[0]) + src
[2].f
[0];
238 dst
->f
[1] = src
[0].f
[1] * (src
[1].f
[1] - src
[2].f
[1]) + src
[2].f
[1];
239 dst
->f
[2] = src
[0].f
[2] * (src
[1].f
[2] - src
[2].f
[2]) + src
[2].f
[2];
240 dst
->f
[3] = src
[0].f
[3] * (src
[1].f
[3] - src
[2].f
[3]) + src
[2].f
[3];
244 micro_mad(union tgsi_exec_channel
*dst
,
245 const union tgsi_exec_channel
*src
)
247 dst
->f
[0] = src
[0].f
[0] * src
[1].f
[0] + src
[2].f
[0];
248 dst
->f
[1] = src
[0].f
[1] * src
[1].f
[1] + src
[2].f
[1];
249 dst
->f
[2] = src
[0].f
[2] * src
[1].f
[2] + src
[2].f
[2];
250 dst
->f
[3] = src
[0].f
[3] * src
[1].f
[3] + src
[2].f
[3];
254 micro_mov(union tgsi_exec_channel
*dst
,
255 const union tgsi_exec_channel
*src
)
257 dst
->u
[0] = src
->u
[0];
258 dst
->u
[1] = src
->u
[1];
259 dst
->u
[2] = src
->u
[2];
260 dst
->u
[3] = src
->u
[3];
264 micro_rcp(union tgsi_exec_channel
*dst
,
265 const union tgsi_exec_channel
*src
)
267 dst
->f
[0] = 1.0f
/ src
->f
[0];
268 dst
->f
[1] = 1.0f
/ src
->f
[1];
269 dst
->f
[2] = 1.0f
/ src
->f
[2];
270 dst
->f
[3] = 1.0f
/ src
->f
[3];
274 micro_rnd(union tgsi_exec_channel
*dst
,
275 const union tgsi_exec_channel
*src
)
277 dst
->f
[0] = floorf(src
->f
[0] + 0.5f
);
278 dst
->f
[1] = floorf(src
->f
[1] + 0.5f
);
279 dst
->f
[2] = floorf(src
->f
[2] + 0.5f
);
280 dst
->f
[3] = floorf(src
->f
[3] + 0.5f
);
284 micro_rsq(union tgsi_exec_channel
*dst
,
285 const union tgsi_exec_channel
*src
)
287 dst
->f
[0] = 1.0f
/ sqrtf(fabsf(src
->f
[0]));
288 dst
->f
[1] = 1.0f
/ sqrtf(fabsf(src
->f
[1]));
289 dst
->f
[2] = 1.0f
/ sqrtf(fabsf(src
->f
[2]));
290 dst
->f
[3] = 1.0f
/ sqrtf(fabsf(src
->f
[3]));
294 micro_seq(union tgsi_exec_channel
*dst
,
295 const union tgsi_exec_channel
*src
)
297 dst
->f
[0] = src
[0].f
[0] == src
[1].f
[0] ? 1.0f
: 0.0f
;
298 dst
->f
[1] = src
[0].f
[1] == src
[1].f
[1] ? 1.0f
: 0.0f
;
299 dst
->f
[2] = src
[0].f
[2] == src
[1].f
[2] ? 1.0f
: 0.0f
;
300 dst
->f
[3] = src
[0].f
[3] == src
[1].f
[3] ? 1.0f
: 0.0f
;
304 micro_sge(union tgsi_exec_channel
*dst
,
305 const union tgsi_exec_channel
*src
)
307 dst
->f
[0] = src
[0].f
[0] >= src
[1].f
[0] ? 1.0f
: 0.0f
;
308 dst
->f
[1] = src
[0].f
[1] >= src
[1].f
[1] ? 1.0f
: 0.0f
;
309 dst
->f
[2] = src
[0].f
[2] >= src
[1].f
[2] ? 1.0f
: 0.0f
;
310 dst
->f
[3] = src
[0].f
[3] >= src
[1].f
[3] ? 1.0f
: 0.0f
;
314 micro_sgn(union tgsi_exec_channel
*dst
,
315 const union tgsi_exec_channel
*src
)
317 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
318 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
319 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
320 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
324 micro_sgt(union tgsi_exec_channel
*dst
,
325 const union tgsi_exec_channel
*src
)
327 dst
->f
[0] = src
[0].f
[0] > src
[1].f
[0] ? 1.0f
: 0.0f
;
328 dst
->f
[1] = src
[0].f
[1] > src
[1].f
[1] ? 1.0f
: 0.0f
;
329 dst
->f
[2] = src
[0].f
[2] > src
[1].f
[2] ? 1.0f
: 0.0f
;
330 dst
->f
[3] = src
[0].f
[3] > src
[1].f
[3] ? 1.0f
: 0.0f
;
334 micro_sin(union tgsi_exec_channel
*dst
,
335 const union tgsi_exec_channel
*src
)
337 dst
->f
[0] = sinf(src
->f
[0]);
338 dst
->f
[1] = sinf(src
->f
[1]);
339 dst
->f
[2] = sinf(src
->f
[2]);
340 dst
->f
[3] = sinf(src
->f
[3]);
344 micro_sle(union tgsi_exec_channel
*dst
,
345 const union tgsi_exec_channel
*src
)
347 dst
->f
[0] = src
[0].f
[0] <= src
[1].f
[0] ? 1.0f
: 0.0f
;
348 dst
->f
[1] = src
[0].f
[1] <= src
[1].f
[1] ? 1.0f
: 0.0f
;
349 dst
->f
[2] = src
[0].f
[2] <= src
[1].f
[2] ? 1.0f
: 0.0f
;
350 dst
->f
[3] = src
[0].f
[3] <= src
[1].f
[3] ? 1.0f
: 0.0f
;
354 micro_slt(union tgsi_exec_channel
*dst
,
355 const union tgsi_exec_channel
*src
)
357 dst
->f
[0] = src
[0].f
[0] < src
[1].f
[0] ? 1.0f
: 0.0f
;
358 dst
->f
[1] = src
[0].f
[1] < src
[1].f
[1] ? 1.0f
: 0.0f
;
359 dst
->f
[2] = src
[0].f
[2] < src
[1].f
[2] ? 1.0f
: 0.0f
;
360 dst
->f
[3] = src
[0].f
[3] < src
[1].f
[3] ? 1.0f
: 0.0f
;
364 micro_sne(union tgsi_exec_channel
*dst
,
365 const union tgsi_exec_channel
*src
)
367 dst
->f
[0] = src
[0].f
[0] != src
[1].f
[0] ? 1.0f
: 0.0f
;
368 dst
->f
[1] = src
[0].f
[1] != src
[1].f
[1] ? 1.0f
: 0.0f
;
369 dst
->f
[2] = src
[0].f
[2] != src
[1].f
[2] ? 1.0f
: 0.0f
;
370 dst
->f
[3] = src
[0].f
[3] != src
[1].f
[3] ? 1.0f
: 0.0f
;
374 micro_trunc(union tgsi_exec_channel
*dst
,
375 const union tgsi_exec_channel
*src
)
377 dst
->f
[0] = (float)(int)src
->f
[0];
378 dst
->f
[1] = (float)(int)src
->f
[1];
379 dst
->f
[2] = (float)(int)src
->f
[2];
380 dst
->f
[3] = (float)(int)src
->f
[3];
389 enum tgsi_exec_datatype
{
390 TGSI_EXEC_DATA_FLOAT
,
396 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
398 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
399 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
400 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
401 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
402 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
403 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
404 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
405 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
406 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
407 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
408 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
409 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
410 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
411 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
412 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
413 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
414 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
415 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
416 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
417 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
418 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
419 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
420 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
421 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
422 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
423 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
424 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
425 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
426 #define TEMP_R0 TGSI_EXEC_TEMP_R0
427 #define TEMP_P0 TGSI_EXEC_TEMP_P0
429 #define IS_CHANNEL_ENABLED(INST, CHAN)\
430 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
432 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
433 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
435 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
436 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
437 if (IS_CHANNEL_ENABLED( INST, CHAN ))
439 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
440 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
441 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
444 /** The execution mask depends on the conditional mask and the loop mask */
445 #define UPDATE_EXEC_MASK(MACH) \
446 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
449 static const union tgsi_exec_channel ZeroVec
=
450 { { 0.0, 0.0, 0.0, 0.0 } };
454 * Assert that none of the float values in 'chan' are infinite or NaN.
455 * NaN and Inf may occur normally during program execution and should
456 * not lead to crashes, etc. But when debugging, it's helpful to catch
460 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
462 assert(!util_is_inf_or_nan((chan
)->f
[0]));
463 assert(!util_is_inf_or_nan((chan
)->f
[1]));
464 assert(!util_is_inf_or_nan((chan
)->f
[2]));
465 assert(!util_is_inf_or_nan((chan
)->f
[3]));
471 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
473 debug_printf("%s = {%f, %f, %f, %f}\n",
474 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
481 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
483 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
485 debug_printf("Temp[%u] =\n", index
);
486 for (i
= 0; i
< 4; i
++) {
487 debug_printf(" %c: { %f, %f, %f, %f }\n",
499 * Check if there's a potential src/dst register data dependency when
500 * using SOA execution.
503 * This would expand into:
508 * The second instruction will have the wrong value for t0 if executed as-is.
511 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
515 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
516 if (writemask
== TGSI_WRITEMASK_X
||
517 writemask
== TGSI_WRITEMASK_Y
||
518 writemask
== TGSI_WRITEMASK_Z
||
519 writemask
== TGSI_WRITEMASK_W
||
520 writemask
== TGSI_WRITEMASK_NONE
) {
521 /* no chance of data dependency */
525 /* loop over src regs */
526 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
527 if ((inst
->Src
[i
].Register
.File
==
528 inst
->Dst
[0].Register
.File
) &&
529 (inst
->Src
[i
].Register
.Index
==
530 inst
->Dst
[0].Register
.Index
)) {
531 /* loop over dest channels */
532 uint channelsWritten
= 0x0;
533 FOR_EACH_ENABLED_CHANNEL(*inst
, chan
) {
534 /* check if we're reading a channel that's been written */
535 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
536 if (channelsWritten
& (1 << swizzle
)) {
540 channelsWritten
|= (1 << chan
);
549 * Initialize machine state by expanding tokens to full instructions,
550 * allocating temporary storage, setting up constants, etc.
551 * After this, we can call tgsi_exec_machine_run() many times.
554 tgsi_exec_machine_bind_shader(
555 struct tgsi_exec_machine
*mach
,
556 const struct tgsi_token
*tokens
,
558 struct tgsi_sampler
**samplers
)
561 struct tgsi_parse_context parse
;
562 struct tgsi_exec_labels
*labels
= &mach
->Labels
;
563 struct tgsi_full_instruction
*instructions
;
564 struct tgsi_full_declaration
*declarations
;
565 uint maxInstructions
= 10, numInstructions
= 0;
566 uint maxDeclarations
= 10, numDeclarations
= 0;
570 tgsi_dump(tokens
, 0);
575 mach
->Tokens
= tokens
;
576 mach
->Samplers
= samplers
;
578 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
579 if (k
!= TGSI_PARSE_OK
) {
580 debug_printf( "Problem parsing!\n" );
584 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
588 declarations
= (struct tgsi_full_declaration
*)
589 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
595 instructions
= (struct tgsi_full_instruction
*)
596 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
599 FREE( declarations
);
603 while( !tgsi_parse_end_of_tokens( &parse
) ) {
604 uint pointer
= parse
.Position
;
607 tgsi_parse_token( &parse
);
608 switch( parse
.FullToken
.Token
.Type
) {
609 case TGSI_TOKEN_TYPE_DECLARATION
:
610 /* save expanded declaration */
611 if (numDeclarations
== maxDeclarations
) {
612 declarations
= REALLOC(declarations
,
614 * sizeof(struct tgsi_full_declaration
),
615 (maxDeclarations
+ 10)
616 * sizeof(struct tgsi_full_declaration
));
617 maxDeclarations
+= 10;
619 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
621 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
622 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
627 memcpy(declarations
+ numDeclarations
,
628 &parse
.FullToken
.FullDeclaration
,
629 sizeof(declarations
[0]));
633 case TGSI_TOKEN_TYPE_IMMEDIATE
:
635 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
637 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
639 for( i
= 0; i
< size
; i
++ ) {
640 mach
->Imms
[mach
->ImmLimit
][i
] =
641 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
647 case TGSI_TOKEN_TYPE_INSTRUCTION
:
648 assert( labels
->count
< MAX_LABELS
);
650 labels
->labels
[labels
->count
][0] = instno
;
651 labels
->labels
[labels
->count
][1] = pointer
;
654 /* save expanded instruction */
655 if (numInstructions
== maxInstructions
) {
656 instructions
= REALLOC(instructions
,
658 * sizeof(struct tgsi_full_instruction
),
659 (maxInstructions
+ 10)
660 * sizeof(struct tgsi_full_instruction
));
661 maxInstructions
+= 10;
664 memcpy(instructions
+ numInstructions
,
665 &parse
.FullToken
.FullInstruction
,
666 sizeof(instructions
[0]));
671 case TGSI_TOKEN_TYPE_PROPERTY
:
678 tgsi_parse_free (&parse
);
680 if (mach
->Declarations
) {
681 FREE( mach
->Declarations
);
683 mach
->Declarations
= declarations
;
684 mach
->NumDeclarations
= numDeclarations
;
686 if (mach
->Instructions
) {
687 FREE( mach
->Instructions
);
689 mach
->Instructions
= instructions
;
690 mach
->NumInstructions
= numInstructions
;
694 struct tgsi_exec_machine
*
695 tgsi_exec_machine_create( void )
697 struct tgsi_exec_machine
*mach
;
700 mach
= align_malloc( sizeof *mach
, 16 );
704 memset(mach
, 0, sizeof(*mach
));
706 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
707 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
708 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
710 /* Setup constants. */
711 for( i
= 0; i
< 4; i
++ ) {
712 mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
].u
[i
] = 0x00000000;
713 mach
->Temps
[TEMP_7F_I
].xyzw
[TEMP_7F_C
].u
[i
] = 0x7FFFFFFF;
714 mach
->Temps
[TEMP_80_I
].xyzw
[TEMP_80_C
].u
[i
] = 0x80000000;
715 mach
->Temps
[TEMP_FF_I
].xyzw
[TEMP_FF_C
].u
[i
] = 0xFFFFFFFF;
716 mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
].f
[i
] = 1.0f
;
717 mach
->Temps
[TEMP_2_I
].xyzw
[TEMP_2_C
].f
[i
] = 2.0f
;
718 mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
].f
[i
] = 128.0f
;
719 mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
].f
[i
] = -128.0f
;
720 mach
->Temps
[TEMP_3_I
].xyzw
[TEMP_3_C
].f
[i
] = 3.0f
;
721 mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
].f
[i
] = 0.5f
;
725 /* silence warnings */
739 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
742 FREE(mach
->Instructions
);
743 FREE(mach
->Declarations
);
751 union tgsi_exec_channel
*dst
,
752 const union tgsi_exec_channel
*src0
,
753 const union tgsi_exec_channel
*src1
)
755 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
756 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
757 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
758 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
763 union tgsi_exec_channel
*dst
,
764 const union tgsi_exec_channel
*src0
,
765 const union tgsi_exec_channel
*src1
)
767 if (src1
->f
[0] != 0) {
768 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
770 if (src1
->f
[1] != 0) {
771 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
773 if (src1
->f
[2] != 0) {
774 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
776 if (src1
->f
[3] != 0) {
777 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
782 micro_float_clamp(union tgsi_exec_channel
*dst
,
783 const union tgsi_exec_channel
*src
)
787 for (i
= 0; i
< 4; i
++) {
788 if (src
->f
[i
] > 0.0f
) {
789 if (src
->f
[i
] > 1.884467e+019f
)
790 dst
->f
[i
] = 1.884467e+019f
;
791 else if (src
->f
[i
] < 5.42101e-020f
)
792 dst
->f
[i
] = 5.42101e-020f
;
794 dst
->f
[i
] = src
->f
[i
];
797 if (src
->f
[i
] < -1.884467e+019f
)
798 dst
->f
[i
] = -1.884467e+019f
;
799 else if (src
->f
[i
] > -5.42101e-020f
)
800 dst
->f
[i
] = -5.42101e-020f
;
802 dst
->f
[i
] = src
->f
[i
];
809 union tgsi_exec_channel
*dst
,
810 const union tgsi_exec_channel
*src0
,
811 const union tgsi_exec_channel
*src1
,
812 const union tgsi_exec_channel
*src2
,
813 const union tgsi_exec_channel
*src3
)
815 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
816 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
817 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
818 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
823 union tgsi_exec_channel
*dst
,
824 const union tgsi_exec_channel
*src0
,
825 const union tgsi_exec_channel
*src1
)
827 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
828 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
829 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
830 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
835 union tgsi_exec_channel
*dst
,
836 const union tgsi_exec_channel
*src0
,
837 const union tgsi_exec_channel
*src1
)
839 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
840 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
841 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
842 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
847 union tgsi_exec_channel
*dst
,
848 const union tgsi_exec_channel
*src0
,
849 const union tgsi_exec_channel
*src1
)
851 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
852 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
853 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
854 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
860 union tgsi_exec_channel
*dst0
,
861 union tgsi_exec_channel
*dst1
,
862 const union tgsi_exec_channel
*src0
,
863 const union tgsi_exec_channel
*src1
)
865 dst1
->i
[0] = src0
->i
[0] * src1
->i
[0];
866 dst1
->i
[1] = src0
->i
[1] * src1
->i
[1];
867 dst1
->i
[2] = src0
->i
[2] * src1
->i
[2];
868 dst1
->i
[3] = src0
->i
[3] * src1
->i
[3];
879 union tgsi_exec_channel
*dst0
,
880 union tgsi_exec_channel
*dst1
,
881 const union tgsi_exec_channel
*src0
,
882 const union tgsi_exec_channel
*src1
)
884 dst1
->u
[0] = src0
->u
[0] * src1
->u
[0];
885 dst1
->u
[1] = src0
->u
[1] * src1
->u
[1];
886 dst1
->u
[2] = src0
->u
[2] * src1
->u
[2];
887 dst1
->u
[3] = src0
->u
[3] * src1
->u
[3];
899 union tgsi_exec_channel
*dst
,
900 const union tgsi_exec_channel
*src0
,
901 const union tgsi_exec_channel
*src1
,
902 const union tgsi_exec_channel
*src2
)
904 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
905 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
906 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
907 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
913 union tgsi_exec_channel
*dst
,
914 const union tgsi_exec_channel
*src
)
916 dst
->f
[0] = -src
->f
[0];
917 dst
->f
[1] = -src
->f
[1];
918 dst
->f
[2] = -src
->f
[2];
919 dst
->f
[3] = -src
->f
[3];
924 union tgsi_exec_channel
*dst
,
925 const union tgsi_exec_channel
*src0
,
926 const union tgsi_exec_channel
*src1
)
929 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
930 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
931 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
932 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
934 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
935 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
936 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
937 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
942 micro_sqrt( union tgsi_exec_channel
*dst
,
943 const union tgsi_exec_channel
*src
)
945 dst
->f
[0] = sqrtf( src
->f
[0] );
946 dst
->f
[1] = sqrtf( src
->f
[1] );
947 dst
->f
[2] = sqrtf( src
->f
[2] );
948 dst
->f
[3] = sqrtf( src
->f
[3] );
953 union tgsi_exec_channel
*dst
,
954 const union tgsi_exec_channel
*src0
,
955 const union tgsi_exec_channel
*src1
)
957 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
958 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
959 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
960 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
964 fetch_src_file_channel(const struct tgsi_exec_machine
*mach
,
967 const union tgsi_exec_channel
*index
,
968 const union tgsi_exec_channel
*index2D
,
969 union tgsi_exec_channel
*chan
)
974 case TGSI_FILE_CONSTANT
:
975 for (i
= 0; i
< QUAD_SIZE
; i
++) {
976 assert(index2D
->i
[i
] >= 0 && index2D
->i
[i
] < PIPE_MAX_CONSTANT_BUFFERS
);
977 assert(mach
->Consts
[index2D
->i
[i
]]);
979 if (index
->i
[i
] < 0) {
982 const uint
*p
= (const uint
*)mach
->Consts
[index2D
->i
[i
]];
984 chan
->u
[i
] = p
[index
->i
[i
] * 4 + swizzle
];
989 case TGSI_FILE_INPUT
:
990 case TGSI_FILE_SYSTEM_VALUE
:
991 for (i
= 0; i
< QUAD_SIZE
; i
++) {
992 /* XXX: 2D indexing */
993 chan
->u
[i
] = mach
->Inputs
[index2D
->i
[i
] * TGSI_EXEC_MAX_INPUT_ATTRIBS
+ index
->i
[i
]].xyzw
[swizzle
].u
[i
];
997 case TGSI_FILE_TEMPORARY
:
998 for (i
= 0; i
< QUAD_SIZE
; i
++) {
999 assert(index
->i
[i
] < TGSI_EXEC_NUM_TEMPS
);
1000 assert(index2D
->i
[i
] == 0);
1002 chan
->u
[i
] = mach
->Temps
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1006 case TGSI_FILE_IMMEDIATE
:
1007 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1008 assert(index
->i
[i
] >= 0 && index
->i
[i
] < (int)mach
->ImmLimit
);
1009 assert(index2D
->i
[i
] == 0);
1011 chan
->f
[i
] = mach
->Imms
[index
->i
[i
]][swizzle
];
1015 case TGSI_FILE_ADDRESS
:
1016 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1017 assert(index
->i
[i
] >= 0);
1018 assert(index2D
->i
[i
] == 0);
1020 chan
->u
[i
] = mach
->Addrs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1024 case TGSI_FILE_PREDICATE
:
1025 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1026 assert(index
->i
[i
] >= 0 && index
->i
[i
] < TGSI_EXEC_NUM_PREDS
);
1027 assert(index2D
->i
[i
] == 0);
1029 chan
->u
[i
] = mach
->Predicates
[0].xyzw
[swizzle
].u
[i
];
1033 case TGSI_FILE_OUTPUT
:
1034 /* vertex/fragment output vars can be read too */
1035 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1036 assert(index
->i
[i
] >= 0);
1037 assert(index2D
->i
[i
] == 0);
1039 chan
->u
[i
] = mach
->Outputs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1045 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1052 fetch_source(const struct tgsi_exec_machine
*mach
,
1053 union tgsi_exec_channel
*chan
,
1054 const struct tgsi_full_src_register
*reg
,
1055 const uint chan_index
,
1056 enum tgsi_exec_datatype src_datatype
)
1058 union tgsi_exec_channel index
;
1059 union tgsi_exec_channel index2D
;
1062 /* We start with a direct index into a register file.
1066 * file = Register.File
1067 * [1] = Register.Index
1072 index
.i
[3] = reg
->Register
.Index
;
1074 /* There is an extra source register that indirectly subscripts
1075 * a register file. The direct index now becomes an offset
1076 * that is being added to the indirect register.
1080 * ind = Indirect.File
1081 * [2] = Indirect.Index
1082 * .x = Indirect.SwizzleX
1084 if (reg
->Register
.Indirect
) {
1085 union tgsi_exec_channel index2
;
1086 union tgsi_exec_channel indir_index
;
1087 const uint execmask
= mach
->ExecMask
;
1090 /* which address register (always zero now) */
1094 index2
.i
[3] = reg
->Indirect
.Index
;
1096 /* get current value of address register[swizzle] */
1097 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1098 fetch_src_file_channel(mach
,
1105 /* add value of address register to the offset */
1106 index
.i
[0] += indir_index
.i
[0];
1107 index
.i
[1] += indir_index
.i
[1];
1108 index
.i
[2] += indir_index
.i
[2];
1109 index
.i
[3] += indir_index
.i
[3];
1111 /* for disabled execution channels, zero-out the index to
1112 * avoid using a potential garbage value.
1114 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1115 if ((execmask
& (1 << i
)) == 0)
1120 /* There is an extra source register that is a second
1121 * subscript to a register file. Effectively it means that
1122 * the register file is actually a 2D array of registers.
1126 * [3] = Dimension.Index
1128 if (reg
->Register
.Dimension
) {
1132 index2D
.i
[3] = reg
->Dimension
.Index
;
1134 /* Again, the second subscript index can be addressed indirectly
1135 * identically to the first one.
1136 * Nothing stops us from indirectly addressing the indirect register,
1137 * but there is no need for that, so we won't exercise it.
1139 * file[ind[4].y+3][1],
1141 * ind = DimIndirect.File
1142 * [4] = DimIndirect.Index
1143 * .y = DimIndirect.SwizzleX
1145 if (reg
->Dimension
.Indirect
) {
1146 union tgsi_exec_channel index2
;
1147 union tgsi_exec_channel indir_index
;
1148 const uint execmask
= mach
->ExecMask
;
1154 index2
.i
[3] = reg
->DimIndirect
.Index
;
1156 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, CHAN_X
);
1157 fetch_src_file_channel(mach
,
1158 reg
->DimIndirect
.File
,
1164 index2D
.i
[0] += indir_index
.i
[0];
1165 index2D
.i
[1] += indir_index
.i
[1];
1166 index2D
.i
[2] += indir_index
.i
[2];
1167 index2D
.i
[3] += indir_index
.i
[3];
1169 /* for disabled execution channels, zero-out the index to
1170 * avoid using a potential garbage value.
1172 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1173 if ((execmask
& (1 << i
)) == 0) {
1179 /* If by any chance there was a need for a 3D array of register
1180 * files, we would have to check whether Dimension is followed
1181 * by a dimension register and continue the saga.
1190 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1191 fetch_src_file_channel(mach
,
1198 if (reg
->Register
.Absolute
) {
1199 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1200 micro_abs(chan
, chan
);
1202 micro_iabs(chan
, chan
);
1206 if (reg
->Register
.Negate
) {
1207 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1208 micro_neg(chan
, chan
);
1210 micro_ineg(chan
, chan
);
1216 store_dest(struct tgsi_exec_machine
*mach
,
1217 const union tgsi_exec_channel
*chan
,
1218 const struct tgsi_full_dst_register
*reg
,
1219 const struct tgsi_full_instruction
*inst
,
1221 enum tgsi_exec_datatype dst_datatype
)
1224 union tgsi_exec_channel null
;
1225 union tgsi_exec_channel
*dst
;
1226 uint execmask
= mach
->ExecMask
;
1227 int offset
= 0; /* indirection offset */
1231 if (0 && dst_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1232 check_inf_or_nan(chan
);
1235 /* There is an extra source register that indirectly subscripts
1236 * a register file. The direct index now becomes an offset
1237 * that is being added to the indirect register.
1241 * ind = Indirect.File
1242 * [2] = Indirect.Index
1243 * .x = Indirect.SwizzleX
1245 if (reg
->Register
.Indirect
) {
1246 union tgsi_exec_channel index
;
1247 union tgsi_exec_channel indir_index
;
1250 /* which address register (always zero for now) */
1254 index
.i
[3] = reg
->Indirect
.Index
;
1256 /* get current value of address register[swizzle] */
1257 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, CHAN_X
);
1259 /* fetch values from the address/indirection register */
1260 fetch_src_file_channel(mach
,
1267 /* save indirection offset */
1268 offset
= indir_index
.i
[0];
1271 switch (reg
->Register
.File
) {
1272 case TGSI_FILE_NULL
:
1276 case TGSI_FILE_OUTPUT
:
1277 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1278 + reg
->Register
.Index
;
1279 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1281 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1282 fprintf(stderr
, "STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1283 for (i
= 0; i
< QUAD_SIZE
; i
++)
1284 if (execmask
& (1 << i
))
1285 fprintf(stderr
, "%f, ", chan
->f
[i
]);
1286 fprintf(stderr
, ")\n");
1291 case TGSI_FILE_TEMPORARY
:
1292 index
= reg
->Register
.Index
;
1293 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1294 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1297 case TGSI_FILE_ADDRESS
:
1298 index
= reg
->Register
.Index
;
1299 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1302 case TGSI_FILE_LOOP
:
1303 assert(reg
->Register
.Index
== 0);
1304 assert(mach
->LoopCounterStackTop
> 0);
1305 assert(chan_index
== CHAN_X
);
1306 dst
= &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[chan_index
];
1309 case TGSI_FILE_PREDICATE
:
1310 index
= reg
->Register
.Index
;
1311 assert(index
< TGSI_EXEC_NUM_PREDS
);
1312 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1320 if (inst
->Instruction
.Predicate
) {
1322 union tgsi_exec_channel
*pred
;
1324 switch (chan_index
) {
1326 swizzle
= inst
->Predicate
.SwizzleX
;
1329 swizzle
= inst
->Predicate
.SwizzleY
;
1332 swizzle
= inst
->Predicate
.SwizzleZ
;
1335 swizzle
= inst
->Predicate
.SwizzleW
;
1342 assert(inst
->Predicate
.Index
== 0);
1344 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1346 if (inst
->Predicate
.Negate
) {
1347 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1349 execmask
&= ~(1 << i
);
1353 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1355 execmask
&= ~(1 << i
);
1361 switch (inst
->Instruction
.Saturate
) {
1363 for (i
= 0; i
< QUAD_SIZE
; i
++)
1364 if (execmask
& (1 << i
))
1365 dst
->i
[i
] = chan
->i
[i
];
1368 case TGSI_SAT_ZERO_ONE
:
1369 for (i
= 0; i
< QUAD_SIZE
; i
++)
1370 if (execmask
& (1 << i
)) {
1371 if (chan
->f
[i
] < 0.0f
)
1373 else if (chan
->f
[i
] > 1.0f
)
1376 dst
->i
[i
] = chan
->i
[i
];
1380 case TGSI_SAT_MINUS_PLUS_ONE
:
1381 for (i
= 0; i
< QUAD_SIZE
; i
++)
1382 if (execmask
& (1 << i
)) {
1383 if (chan
->f
[i
] < -1.0f
)
1385 else if (chan
->f
[i
] > 1.0f
)
1388 dst
->i
[i
] = chan
->i
[i
];
1397 #define FETCH(VAL,INDEX,CHAN)\
1398 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1400 #define STORE(VAL,INDEX,CHAN)\
1401 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1405 * Execute ARB-style KIL which is predicated by a src register.
1406 * Kill fragment if any of the four values is less than zero.
1409 exec_kil(struct tgsi_exec_machine
*mach
,
1410 const struct tgsi_full_instruction
*inst
)
1414 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1415 union tgsi_exec_channel r
[1];
1417 /* This mask stores component bits that were already tested. */
1420 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1425 /* unswizzle channel */
1426 swizzle
= tgsi_util_get_full_src_register_swizzle (
1430 /* check if the component has not been already tested */
1431 if (uniquemask
& (1 << swizzle
))
1433 uniquemask
|= 1 << swizzle
;
1435 FETCH(&r
[0], 0, chan_index
);
1436 for (i
= 0; i
< 4; i
++)
1437 if (r
[0].f
[i
] < 0.0f
)
1441 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1445 * Execute NVIDIA-style KIL which is predicated by a condition code.
1446 * Kill fragment if the condition code is TRUE.
1449 exec_kilp(struct tgsi_exec_machine
*mach
,
1450 const struct tgsi_full_instruction
*inst
)
1452 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1454 /* "unconditional" kil */
1455 kilmask
= mach
->ExecMask
;
1456 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1460 emit_vertex(struct tgsi_exec_machine
*mach
)
1462 /* FIXME: check for exec mask correctly
1464 for (i = 0; i < QUAD_SIZE; ++i) {
1465 if ((mach->ExecMask & (1 << i)))
1467 if (mach
->ExecMask
) {
1468 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
1469 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
1474 emit_primitive(struct tgsi_exec_machine
*mach
)
1476 unsigned *prim_count
= &mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0];
1477 /* FIXME: check for exec mask correctly
1479 for (i = 0; i < QUAD_SIZE; ++i) {
1480 if ((mach->ExecMask & (1 << i)))
1482 if (mach
->ExecMask
) {
1484 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
1485 mach
->Primitives
[*prim_count
] = 0;
1490 * Fetch four texture samples using STR texture coordinates.
1493 fetch_texel( struct tgsi_sampler
*sampler
,
1494 const union tgsi_exec_channel
*s
,
1495 const union tgsi_exec_channel
*t
,
1496 const union tgsi_exec_channel
*p
,
1497 const union tgsi_exec_channel
*c0
,
1498 enum tgsi_sampler_control control
,
1499 union tgsi_exec_channel
*r
,
1500 union tgsi_exec_channel
*g
,
1501 union tgsi_exec_channel
*b
,
1502 union tgsi_exec_channel
*a
)
1505 float rgba
[NUM_CHANNELS
][QUAD_SIZE
];
1507 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, c0
->f
, control
, rgba
);
1509 for (j
= 0; j
< 4; j
++) {
1510 r
->f
[j
] = rgba
[0][j
];
1511 g
->f
[j
] = rgba
[1][j
];
1512 b
->f
[j
] = rgba
[2][j
];
1513 a
->f
[j
] = rgba
[3][j
];
1518 #define TEX_MODIFIER_NONE 0
1519 #define TEX_MODIFIER_PROJECTED 1
1520 #define TEX_MODIFIER_LOD_BIAS 2
1521 #define TEX_MODIFIER_EXPLICIT_LOD 3
1525 exec_tex(struct tgsi_exec_machine
*mach
,
1526 const struct tgsi_full_instruction
*inst
,
1529 const uint unit
= inst
->Src
[1].Register
.Index
;
1530 union tgsi_exec_channel r
[4];
1531 const union tgsi_exec_channel
*lod
= &ZeroVec
;
1532 enum tgsi_sampler_control control
;
1535 if (modifier
!= TEX_MODIFIER_NONE
) {
1536 FETCH(&r
[3], 0, CHAN_W
);
1537 if (modifier
!= TEX_MODIFIER_PROJECTED
) {
1542 if (modifier
== TEX_MODIFIER_EXPLICIT_LOD
) {
1543 control
= tgsi_sampler_lod_explicit
;
1545 control
= tgsi_sampler_lod_bias
;
1548 switch (inst
->Texture
.Texture
) {
1549 case TGSI_TEXTURE_1D
:
1550 case TGSI_TEXTURE_SHADOW1D
:
1551 FETCH(&r
[0], 0, CHAN_X
);
1553 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1554 micro_div(&r
[0], &r
[0], &r
[3]);
1557 fetch_texel(mach
->Samplers
[unit
],
1558 &r
[0], &ZeroVec
, &ZeroVec
, lod
, /* S, T, P, LOD */
1560 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1563 case TGSI_TEXTURE_2D
:
1564 case TGSI_TEXTURE_RECT
:
1565 case TGSI_TEXTURE_SHADOW2D
:
1566 case TGSI_TEXTURE_SHADOWRECT
:
1567 FETCH(&r
[0], 0, CHAN_X
);
1568 FETCH(&r
[1], 0, CHAN_Y
);
1569 FETCH(&r
[2], 0, CHAN_Z
);
1571 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1572 micro_div(&r
[0], &r
[0], &r
[3]);
1573 micro_div(&r
[1], &r
[1], &r
[3]);
1574 micro_div(&r
[2], &r
[2], &r
[3]);
1577 fetch_texel(mach
->Samplers
[unit
],
1578 &r
[0], &r
[1], &r
[2], lod
, /* S, T, P, LOD */
1580 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1583 case TGSI_TEXTURE_3D
:
1584 case TGSI_TEXTURE_CUBE
:
1585 FETCH(&r
[0], 0, CHAN_X
);
1586 FETCH(&r
[1], 0, CHAN_Y
);
1587 FETCH(&r
[2], 0, CHAN_Z
);
1589 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1590 micro_div(&r
[0], &r
[0], &r
[3]);
1591 micro_div(&r
[1], &r
[1], &r
[3]);
1592 micro_div(&r
[2], &r
[2], &r
[3]);
1595 fetch_texel(mach
->Samplers
[unit
],
1596 &r
[0], &r
[1], &r
[2], lod
,
1598 &r
[0], &r
[1], &r
[2], &r
[3]);
1605 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1606 STORE(&r
[chan_index
], 0, chan_index
);
1611 exec_txd(struct tgsi_exec_machine
*mach
,
1612 const struct tgsi_full_instruction
*inst
)
1614 const uint unit
= inst
->Src
[3].Register
.Index
;
1615 union tgsi_exec_channel r
[4];
1619 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1622 switch (inst
->Texture
.Texture
) {
1623 case TGSI_TEXTURE_1D
:
1624 case TGSI_TEXTURE_SHADOW1D
:
1626 FETCH(&r
[0], 0, CHAN_X
);
1628 fetch_texel(mach
->Samplers
[unit
],
1629 &r
[0], &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, BIAS */
1630 tgsi_sampler_lod_bias
,
1631 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1634 case TGSI_TEXTURE_2D
:
1635 case TGSI_TEXTURE_RECT
:
1636 case TGSI_TEXTURE_SHADOW2D
:
1637 case TGSI_TEXTURE_SHADOWRECT
:
1639 FETCH(&r
[0], 0, CHAN_X
);
1640 FETCH(&r
[1], 0, CHAN_Y
);
1641 FETCH(&r
[2], 0, CHAN_Z
);
1643 fetch_texel(mach
->Samplers
[unit
],
1644 &r
[0], &r
[1], &r
[2], &ZeroVec
, /* inputs */
1645 tgsi_sampler_lod_bias
,
1646 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1649 case TGSI_TEXTURE_3D
:
1650 case TGSI_TEXTURE_CUBE
:
1652 FETCH(&r
[0], 0, CHAN_X
);
1653 FETCH(&r
[1], 0, CHAN_Y
);
1654 FETCH(&r
[2], 0, CHAN_Z
);
1656 fetch_texel(mach
->Samplers
[unit
],
1657 &r
[0], &r
[1], &r
[2], &ZeroVec
,
1658 tgsi_sampler_lod_bias
,
1659 &r
[0], &r
[1], &r
[2], &r
[3]);
1666 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
1667 STORE(&r
[chan_index
], 0, chan_index
);
1673 * Evaluate a constant-valued coefficient at the position of the
1678 struct tgsi_exec_machine
*mach
,
1684 for( i
= 0; i
< QUAD_SIZE
; i
++ ) {
1685 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
1690 * Evaluate a linear-valued coefficient at the position of the
1695 struct tgsi_exec_machine
*mach
,
1699 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1700 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1701 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1702 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1703 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1704 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
1705 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
1706 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
1707 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
1711 * Evaluate a perspective-valued coefficient at the position of the
1715 eval_perspective_coef(
1716 struct tgsi_exec_machine
*mach
,
1720 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
1721 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
1722 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
1723 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
1724 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
1725 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
1726 /* divide by W here */
1727 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
1728 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
1729 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
1730 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
1734 typedef void (* eval_coef_func
)(
1735 struct tgsi_exec_machine
*mach
,
1740 exec_declaration(struct tgsi_exec_machine
*mach
,
1741 const struct tgsi_full_declaration
*decl
)
1743 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
1744 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
||
1745 decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1746 uint first
, last
, mask
;
1748 first
= decl
->Range
.First
;
1749 last
= decl
->Range
.Last
;
1750 mask
= decl
->Declaration
.UsageMask
;
1752 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
1753 assert(decl
->Semantic
.Index
== 0);
1754 assert(first
== last
);
1755 assert(mask
== TGSI_WRITEMASK_XYZW
);
1757 mach
->Inputs
[first
] = mach
->QuadPos
;
1758 } else if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
1761 assert(decl
->Semantic
.Index
== 0);
1762 assert(first
== last
);
1764 for (i
= 0; i
< QUAD_SIZE
; i
++) {
1765 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
1768 eval_coef_func eval
;
1771 switch (decl
->Declaration
.Interpolate
) {
1772 case TGSI_INTERPOLATE_CONSTANT
:
1773 eval
= eval_constant_coef
;
1776 case TGSI_INTERPOLATE_LINEAR
:
1777 eval
= eval_linear_coef
;
1780 case TGSI_INTERPOLATE_PERSPECTIVE
:
1781 eval
= eval_perspective_coef
;
1789 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
1790 if (mask
& (1 << j
)) {
1791 for (i
= first
; i
<= last
; i
++) {
1801 typedef void (* micro_op
)(union tgsi_exec_channel
*dst
,
1802 const union tgsi_exec_channel
*src
);
1805 exec_scalar_unary(struct tgsi_exec_machine
*mach
,
1806 const struct tgsi_full_instruction
*inst
,
1808 enum tgsi_exec_datatype dst_datatype
,
1809 enum tgsi_exec_datatype src_datatype
)
1812 union tgsi_exec_channel src
;
1813 union tgsi_exec_channel dst
;
1815 fetch_source(mach
, &src
, &inst
->Src
[0], CHAN_X
, src_datatype
);
1817 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1818 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1819 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1825 exec_vector_unary(struct tgsi_exec_machine
*mach
,
1826 const struct tgsi_full_instruction
*inst
,
1828 enum tgsi_exec_datatype dst_datatype
,
1829 enum tgsi_exec_datatype src_datatype
)
1832 struct tgsi_exec_vector dst
;
1834 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1835 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1836 union tgsi_exec_channel src
;
1838 fetch_source(mach
, &src
, &inst
->Src
[0], chan
, src_datatype
);
1839 op(&dst
.xyzw
[chan
], &src
);
1842 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1843 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1844 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1850 exec_vector_binary(struct tgsi_exec_machine
*mach
,
1851 const struct tgsi_full_instruction
*inst
,
1853 enum tgsi_exec_datatype dst_datatype
,
1854 enum tgsi_exec_datatype src_datatype
)
1857 struct tgsi_exec_vector dst
;
1859 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1860 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1861 union tgsi_exec_channel src
[2];
1863 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1864 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1865 op(&dst
.xyzw
[chan
], src
);
1868 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1869 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1870 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1876 exec_vector_trinary(struct tgsi_exec_machine
*mach
,
1877 const struct tgsi_full_instruction
*inst
,
1879 enum tgsi_exec_datatype dst_datatype
,
1880 enum tgsi_exec_datatype src_datatype
)
1883 struct tgsi_exec_vector dst
;
1885 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1886 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1887 union tgsi_exec_channel src
[3];
1889 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
1890 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
1891 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
1892 op(&dst
.xyzw
[chan
], src
);
1895 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1896 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1897 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
1903 exec_dp3(struct tgsi_exec_machine
*mach
,
1904 const struct tgsi_full_instruction
*inst
)
1907 union tgsi_exec_channel arg
[3];
1909 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1910 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1911 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
1913 for (chan
= CHAN_Y
; chan
<= CHAN_Z
; chan
++) {
1914 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
1915 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
1916 micro_mad(&arg
[2], arg
);
1919 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1920 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1921 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
1927 exec_dp4(struct tgsi_exec_machine
*mach
,
1928 const struct tgsi_full_instruction
*inst
)
1931 union tgsi_exec_channel arg
[3];
1933 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1934 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1935 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
1937 for (chan
= CHAN_Y
; chan
<= CHAN_W
; chan
++) {
1938 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
1939 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
1940 micro_mad(&arg
[2], arg
);
1943 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1944 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1945 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
1951 exec_dp2a(struct tgsi_exec_machine
*mach
,
1952 const struct tgsi_full_instruction
*inst
)
1955 union tgsi_exec_channel arg
[3];
1957 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1958 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1959 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
1961 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
1962 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
1963 micro_mad(&arg
[0], arg
);
1965 fetch_source(mach
, &arg
[1], &inst
->Src
[2], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1966 micro_add(&arg
[0], &arg
[0], &arg
[1]);
1968 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1969 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1970 store_dest(mach
, &arg
[0], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
1976 exec_dph(struct tgsi_exec_machine
*mach
,
1977 const struct tgsi_full_instruction
*inst
)
1980 union tgsi_exec_channel arg
[3];
1982 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1983 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
1984 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
1986 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
1987 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
1988 micro_mad(&arg
[2], arg
);
1990 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
1991 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
1992 micro_mad(&arg
[0], arg
);
1994 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
1995 micro_add(&arg
[0], &arg
[0], &arg
[1]);
1997 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
1998 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1999 store_dest(mach
, &arg
[0], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2005 exec_dp2(struct tgsi_exec_machine
*mach
,
2006 const struct tgsi_full_instruction
*inst
)
2009 union tgsi_exec_channel arg
[3];
2011 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2012 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2013 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2015 fetch_source(mach
, &arg
[0], &inst
->Src
[0], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2016 fetch_source(mach
, &arg
[1], &inst
->Src
[1], CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2017 micro_mad(&arg
[2], arg
);
2019 for (chan
= 0; chan
< NUM_CHANNELS
; chan
++) {
2020 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2021 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2027 exec_break(struct tgsi_exec_machine
*mach
)
2029 if (mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_LOOP
) {
2030 /* turn off loop channels for each enabled exec channel */
2031 mach
->LoopMask
&= ~mach
->ExecMask
;
2032 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2033 UPDATE_EXEC_MASK(mach
);
2035 assert(mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_SWITCH
);
2037 mach
->Switch
.mask
= 0x0;
2039 UPDATE_EXEC_MASK(mach
);
2044 exec_switch(struct tgsi_exec_machine
*mach
,
2045 const struct tgsi_full_instruction
*inst
)
2047 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
2048 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
2050 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
2051 fetch_source(mach
, &mach
->Switch
.selector
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
2052 mach
->Switch
.mask
= 0x0;
2053 mach
->Switch
.defaultMask
= 0x0;
2055 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
2056 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_SWITCH
;
2058 UPDATE_EXEC_MASK(mach
);
2062 exec_case(struct tgsi_exec_machine
*mach
,
2063 const struct tgsi_full_instruction
*inst
)
2065 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
2066 union tgsi_exec_channel src
;
2069 fetch_source(mach
, &src
, &inst
->Src
[0], CHAN_X
, TGSI_EXEC_DATA_UINT
);
2071 if (mach
->Switch
.selector
.u
[0] == src
.u
[0]) {
2074 if (mach
->Switch
.selector
.u
[1] == src
.u
[1]) {
2077 if (mach
->Switch
.selector
.u
[2] == src
.u
[2]) {
2080 if (mach
->Switch
.selector
.u
[3] == src
.u
[3]) {
2084 mach
->Switch
.defaultMask
|= mask
;
2086 mach
->Switch
.mask
|= mask
& prevMask
;
2088 UPDATE_EXEC_MASK(mach
);
2092 exec_default(struct tgsi_exec_machine
*mach
)
2094 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
2096 mach
->Switch
.mask
|= ~mach
->Switch
.defaultMask
& prevMask
;
2098 UPDATE_EXEC_MASK(mach
);
2102 exec_endswitch(struct tgsi_exec_machine
*mach
)
2104 mach
->Switch
= mach
->SwitchStack
[--mach
->SwitchStackTop
];
2105 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
2107 UPDATE_EXEC_MASK(mach
);
2111 micro_i2f(union tgsi_exec_channel
*dst
,
2112 const union tgsi_exec_channel
*src
)
2114 dst
->f
[0] = (float)src
->i
[0];
2115 dst
->f
[1] = (float)src
->i
[1];
2116 dst
->f
[2] = (float)src
->i
[2];
2117 dst
->f
[3] = (float)src
->i
[3];
2121 micro_not(union tgsi_exec_channel
*dst
,
2122 const union tgsi_exec_channel
*src
)
2124 dst
->u
[0] = ~src
->u
[0];
2125 dst
->u
[1] = ~src
->u
[1];
2126 dst
->u
[2] = ~src
->u
[2];
2127 dst
->u
[3] = ~src
->u
[3];
2131 micro_shl(union tgsi_exec_channel
*dst
,
2132 const union tgsi_exec_channel
*src
)
2134 dst
->u
[0] = src
[0].u
[0] << src
[1].u
[0];
2135 dst
->u
[1] = src
[0].u
[1] << src
[1].u
[1];
2136 dst
->u
[2] = src
[0].u
[2] << src
[1].u
[2];
2137 dst
->u
[3] = src
[0].u
[3] << src
[1].u
[3];
2141 micro_and(union tgsi_exec_channel
*dst
,
2142 const union tgsi_exec_channel
*src
)
2144 dst
->u
[0] = src
[0].u
[0] & src
[1].u
[0];
2145 dst
->u
[1] = src
[0].u
[1] & src
[1].u
[1];
2146 dst
->u
[2] = src
[0].u
[2] & src
[1].u
[2];
2147 dst
->u
[3] = src
[0].u
[3] & src
[1].u
[3];
2151 micro_or(union tgsi_exec_channel
*dst
,
2152 const union tgsi_exec_channel
*src
)
2154 dst
->u
[0] = src
[0].u
[0] | src
[1].u
[0];
2155 dst
->u
[1] = src
[0].u
[1] | src
[1].u
[1];
2156 dst
->u
[2] = src
[0].u
[2] | src
[1].u
[2];
2157 dst
->u
[3] = src
[0].u
[3] | src
[1].u
[3];
2161 micro_xor(union tgsi_exec_channel
*dst
,
2162 const union tgsi_exec_channel
*src
)
2164 dst
->u
[0] = src
[0].u
[0] ^ src
[1].u
[0];
2165 dst
->u
[1] = src
[0].u
[1] ^ src
[1].u
[1];
2166 dst
->u
[2] = src
[0].u
[2] ^ src
[1].u
[2];
2167 dst
->u
[3] = src
[0].u
[3] ^ src
[1].u
[3];
2171 micro_f2i(union tgsi_exec_channel
*dst
,
2172 const union tgsi_exec_channel
*src
)
2174 dst
->i
[0] = (int)src
->f
[0];
2175 dst
->i
[1] = (int)src
->f
[1];
2176 dst
->i
[2] = (int)src
->f
[2];
2177 dst
->i
[3] = (int)src
->f
[3];
2181 micro_idiv(union tgsi_exec_channel
*dst
,
2182 const union tgsi_exec_channel
*src
)
2184 dst
->i
[0] = src
[0].i
[0] / src
[1].i
[0];
2185 dst
->i
[1] = src
[0].i
[1] / src
[1].i
[1];
2186 dst
->i
[2] = src
[0].i
[2] / src
[1].i
[2];
2187 dst
->i
[3] = src
[0].i
[3] / src
[1].i
[3];
2191 micro_imax(union tgsi_exec_channel
*dst
,
2192 const union tgsi_exec_channel
*src
)
2194 dst
->i
[0] = src
[0].i
[0] > src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
2195 dst
->i
[1] = src
[0].i
[1] > src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
2196 dst
->i
[2] = src
[0].i
[2] > src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
2197 dst
->i
[3] = src
[0].i
[3] > src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
2201 micro_imin(union tgsi_exec_channel
*dst
,
2202 const union tgsi_exec_channel
*src
)
2204 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? src
[0].i
[0] : src
[1].i
[0];
2205 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? src
[0].i
[1] : src
[1].i
[1];
2206 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? src
[0].i
[2] : src
[1].i
[2];
2207 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? src
[0].i
[3] : src
[1].i
[3];
2211 micro_isge(union tgsi_exec_channel
*dst
,
2212 const union tgsi_exec_channel
*src
)
2214 dst
->i
[0] = src
[0].i
[0] >= src
[1].i
[0] ? -1 : 0;
2215 dst
->i
[1] = src
[0].i
[1] >= src
[1].i
[1] ? -1 : 0;
2216 dst
->i
[2] = src
[0].i
[2] >= src
[1].i
[2] ? -1 : 0;
2217 dst
->i
[3] = src
[0].i
[3] >= src
[1].i
[3] ? -1 : 0;
2221 micro_ishr(union tgsi_exec_channel
*dst
,
2222 const union tgsi_exec_channel
*src
)
2224 dst
->i
[0] = src
[0].i
[0] >> src
[1].i
[0];
2225 dst
->i
[1] = src
[0].i
[1] >> src
[1].i
[1];
2226 dst
->i
[2] = src
[0].i
[2] >> src
[1].i
[2];
2227 dst
->i
[3] = src
[0].i
[3] >> src
[1].i
[3];
2231 micro_islt(union tgsi_exec_channel
*dst
,
2232 const union tgsi_exec_channel
*src
)
2234 dst
->i
[0] = src
[0].i
[0] < src
[1].i
[0] ? -1 : 0;
2235 dst
->i
[1] = src
[0].i
[1] < src
[1].i
[1] ? -1 : 0;
2236 dst
->i
[2] = src
[0].i
[2] < src
[1].i
[2] ? -1 : 0;
2237 dst
->i
[3] = src
[0].i
[3] < src
[1].i
[3] ? -1 : 0;
2241 micro_f2u(union tgsi_exec_channel
*dst
,
2242 const union tgsi_exec_channel
*src
)
2244 dst
->u
[0] = (uint
)src
->f
[0];
2245 dst
->u
[1] = (uint
)src
->f
[1];
2246 dst
->u
[2] = (uint
)src
->f
[2];
2247 dst
->u
[3] = (uint
)src
->f
[3];
2251 micro_u2f(union tgsi_exec_channel
*dst
,
2252 const union tgsi_exec_channel
*src
)
2254 dst
->f
[0] = (float)src
->u
[0];
2255 dst
->f
[1] = (float)src
->u
[1];
2256 dst
->f
[2] = (float)src
->u
[2];
2257 dst
->f
[3] = (float)src
->u
[3];
2261 micro_uadd(union tgsi_exec_channel
*dst
,
2262 const union tgsi_exec_channel
*src
)
2264 dst
->u
[0] = src
[0].u
[0] + src
[1].u
[0];
2265 dst
->u
[1] = src
[0].u
[1] + src
[1].u
[1];
2266 dst
->u
[2] = src
[0].u
[2] + src
[1].u
[2];
2267 dst
->u
[3] = src
[0].u
[3] + src
[1].u
[3];
2271 micro_udiv(union tgsi_exec_channel
*dst
,
2272 const union tgsi_exec_channel
*src
)
2274 dst
->u
[0] = src
[0].u
[0] / src
[1].u
[0];
2275 dst
->u
[1] = src
[0].u
[1] / src
[1].u
[1];
2276 dst
->u
[2] = src
[0].u
[2] / src
[1].u
[2];
2277 dst
->u
[3] = src
[0].u
[3] / src
[1].u
[3];
2281 micro_umad(union tgsi_exec_channel
*dst
,
2282 const union tgsi_exec_channel
*src
)
2284 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0] + src
[2].u
[0];
2285 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1] + src
[2].u
[1];
2286 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2] + src
[2].u
[2];
2287 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3] + src
[2].u
[3];
2291 micro_umax(union tgsi_exec_channel
*dst
,
2292 const union tgsi_exec_channel
*src
)
2294 dst
->u
[0] = src
[0].u
[0] > src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2295 dst
->u
[1] = src
[0].u
[1] > src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2296 dst
->u
[2] = src
[0].u
[2] > src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2297 dst
->u
[3] = src
[0].u
[3] > src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2301 micro_umin(union tgsi_exec_channel
*dst
,
2302 const union tgsi_exec_channel
*src
)
2304 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? src
[0].u
[0] : src
[1].u
[0];
2305 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? src
[0].u
[1] : src
[1].u
[1];
2306 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? src
[0].u
[2] : src
[1].u
[2];
2307 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? src
[0].u
[3] : src
[1].u
[3];
2311 micro_umod(union tgsi_exec_channel
*dst
,
2312 const union tgsi_exec_channel
*src
)
2314 dst
->u
[0] = src
[0].u
[0] % src
[1].u
[0];
2315 dst
->u
[1] = src
[0].u
[1] % src
[1].u
[1];
2316 dst
->u
[2] = src
[0].u
[2] % src
[1].u
[2];
2317 dst
->u
[3] = src
[0].u
[3] % src
[1].u
[3];
2321 micro_umul(union tgsi_exec_channel
*dst
,
2322 const union tgsi_exec_channel
*src
)
2324 dst
->u
[0] = src
[0].u
[0] * src
[1].u
[0];
2325 dst
->u
[1] = src
[0].u
[1] * src
[1].u
[1];
2326 dst
->u
[2] = src
[0].u
[2] * src
[1].u
[2];
2327 dst
->u
[3] = src
[0].u
[3] * src
[1].u
[3];
2331 micro_useq(union tgsi_exec_channel
*dst
,
2332 const union tgsi_exec_channel
*src
)
2334 dst
->u
[0] = src
[0].u
[0] == src
[1].u
[0] ? ~0 : 0;
2335 dst
->u
[1] = src
[0].u
[1] == src
[1].u
[1] ? ~0 : 0;
2336 dst
->u
[2] = src
[0].u
[2] == src
[1].u
[2] ? ~0 : 0;
2337 dst
->u
[3] = src
[0].u
[3] == src
[1].u
[3] ? ~0 : 0;
2341 micro_usge(union tgsi_exec_channel
*dst
,
2342 const union tgsi_exec_channel
*src
)
2344 dst
->u
[0] = src
[0].u
[0] >= src
[1].u
[0] ? ~0 : 0;
2345 dst
->u
[1] = src
[0].u
[1] >= src
[1].u
[1] ? ~0 : 0;
2346 dst
->u
[2] = src
[0].u
[2] >= src
[1].u
[2] ? ~0 : 0;
2347 dst
->u
[3] = src
[0].u
[3] >= src
[1].u
[3] ? ~0 : 0;
2351 micro_ushr(union tgsi_exec_channel
*dst
,
2352 const union tgsi_exec_channel
*src
)
2354 dst
->u
[0] = src
[0].u
[0] >> src
[1].u
[0];
2355 dst
->u
[1] = src
[0].u
[1] >> src
[1].u
[1];
2356 dst
->u
[2] = src
[0].u
[2] >> src
[1].u
[2];
2357 dst
->u
[3] = src
[0].u
[3] >> src
[1].u
[3];
2361 micro_uslt(union tgsi_exec_channel
*dst
,
2362 const union tgsi_exec_channel
*src
)
2364 dst
->u
[0] = src
[0].u
[0] < src
[1].u
[0] ? ~0 : 0;
2365 dst
->u
[1] = src
[0].u
[1] < src
[1].u
[1] ? ~0 : 0;
2366 dst
->u
[2] = src
[0].u
[2] < src
[1].u
[2] ? ~0 : 0;
2367 dst
->u
[3] = src
[0].u
[3] < src
[1].u
[3] ? ~0 : 0;
2371 micro_usne(union tgsi_exec_channel
*dst
,
2372 const union tgsi_exec_channel
*src
)
2374 dst
->u
[0] = src
[0].u
[0] != src
[1].u
[0] ? ~0 : 0;
2375 dst
->u
[1] = src
[0].u
[1] != src
[1].u
[1] ? ~0 : 0;
2376 dst
->u
[2] = src
[0].u
[2] != src
[1].u
[2] ? ~0 : 0;
2377 dst
->u
[3] = src
[0].u
[3] != src
[1].u
[3] ? ~0 : 0;
2382 struct tgsi_exec_machine
*mach
,
2383 const struct tgsi_full_instruction
*inst
,
2387 union tgsi_exec_channel r
[10];
2388 union tgsi_exec_channel d
[8];
2392 switch (inst
->Instruction
.Opcode
) {
2393 case TGSI_OPCODE_ARL
:
2394 exec_vector_unary(mach
, inst
, micro_arl
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
2397 case TGSI_OPCODE_MOV
:
2398 exec_vector_unary(mach
, inst
, micro_mov
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
2401 case TGSI_OPCODE_LIT
:
2402 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2403 FETCH( &r
[0], 0, CHAN_X
);
2404 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2405 micro_max(&d
[CHAN_Y
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2408 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2409 FETCH( &r
[1], 0, CHAN_Y
);
2410 micro_max( &r
[1], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
] );
2412 FETCH( &r
[2], 0, CHAN_W
);
2413 micro_min( &r
[2], &r
[2], &mach
->Temps
[TEMP_128_I
].xyzw
[TEMP_128_C
] );
2414 micro_max( &r
[2], &r
[2], &mach
->Temps
[TEMP_M128_I
].xyzw
[TEMP_M128_C
] );
2415 micro_pow( &r
[1], &r
[1], &r
[2] );
2416 micro_lt(&d
[CHAN_Z
], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[0], &r
[1], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
]);
2419 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2420 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2422 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2423 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2426 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2427 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2429 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2430 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2434 case TGSI_OPCODE_RCP
:
2435 exec_scalar_unary(mach
, inst
, micro_rcp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2438 case TGSI_OPCODE_RSQ
:
2439 exec_scalar_unary(mach
, inst
, micro_rsq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2442 case TGSI_OPCODE_EXP
:
2443 FETCH( &r
[0], 0, CHAN_X
);
2444 micro_flr( &r
[1], &r
[0] ); /* r1 = floor(r0) */
2445 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2446 micro_exp2( &r
[2], &r
[1] ); /* r2 = 2 ^ r1 */
2447 STORE( &r
[2], 0, CHAN_X
); /* store r2 */
2449 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2450 micro_sub( &r
[2], &r
[0], &r
[1] ); /* r2 = r0 - r1 */
2451 STORE( &r
[2], 0, CHAN_Y
); /* store r2 */
2453 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2454 micro_exp2( &r
[2], &r
[0] ); /* r2 = 2 ^ r0 */
2455 STORE( &r
[2], 0, CHAN_Z
); /* store r2 */
2457 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2458 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2462 case TGSI_OPCODE_LOG
:
2463 FETCH( &r
[0], 0, CHAN_X
);
2464 micro_abs( &r
[2], &r
[0] ); /* r2 = abs(r0) */
2465 micro_lg2( &r
[1], &r
[2] ); /* r1 = lg2(r2) */
2466 micro_flr( &r
[0], &r
[1] ); /* r0 = floor(r1) */
2467 if (IS_CHANNEL_ENABLED( *inst
, CHAN_X
)) {
2468 STORE( &r
[0], 0, CHAN_X
);
2470 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2471 micro_exp2( &r
[0], &r
[0] ); /* r0 = 2 ^ r0 */
2472 micro_div( &r
[0], &r
[2], &r
[0] ); /* r0 = r2 / r0 */
2473 STORE( &r
[0], 0, CHAN_Y
);
2475 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2476 STORE( &r
[1], 0, CHAN_Z
);
2478 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2479 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2483 case TGSI_OPCODE_MUL
:
2484 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2485 FETCH(&r
[0], 0, chan_index
);
2486 FETCH(&r
[1], 1, chan_index
);
2487 micro_mul(&d
[chan_index
], &r
[0], &r
[1]);
2489 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2490 STORE(&d
[chan_index
], 0, chan_index
);
2494 case TGSI_OPCODE_ADD
:
2495 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2496 FETCH( &r
[0], 0, chan_index
);
2497 FETCH( &r
[1], 1, chan_index
);
2498 micro_add(&d
[chan_index
], &r
[0], &r
[1]);
2500 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2501 STORE(&d
[chan_index
], 0, chan_index
);
2505 case TGSI_OPCODE_DP3
:
2506 exec_dp3(mach
, inst
);
2509 case TGSI_OPCODE_DP4
:
2510 exec_dp4(mach
, inst
);
2513 case TGSI_OPCODE_DST
:
2514 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Y
)) {
2515 FETCH( &r
[0], 0, CHAN_Y
);
2516 FETCH( &r
[1], 1, CHAN_Y
);
2517 micro_mul(&d
[CHAN_Y
], &r
[0], &r
[1]);
2519 if (IS_CHANNEL_ENABLED( *inst
, CHAN_Z
)) {
2520 FETCH(&d
[CHAN_Z
], 0, CHAN_Z
);
2522 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2523 FETCH(&d
[CHAN_W
], 1, CHAN_W
);
2526 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2527 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_X
);
2529 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2530 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2532 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2533 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2535 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2536 STORE(&d
[CHAN_W
], 0, CHAN_W
);
2540 case TGSI_OPCODE_MIN
:
2541 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2542 FETCH(&r
[0], 0, chan_index
);
2543 FETCH(&r
[1], 1, chan_index
);
2545 /* XXX use micro_min()?? */
2546 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[0], &r
[1]);
2548 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2549 STORE(&d
[chan_index
], 0, chan_index
);
2553 case TGSI_OPCODE_MAX
:
2554 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2555 FETCH(&r
[0], 0, chan_index
);
2556 FETCH(&r
[1], 1, chan_index
);
2558 /* XXX use micro_max()?? */
2559 micro_lt(&d
[chan_index
], &r
[0], &r
[1], &r
[1], &r
[0] );
2561 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2562 STORE(&d
[chan_index
], 0, chan_index
);
2566 case TGSI_OPCODE_SLT
:
2567 exec_vector_binary(mach
, inst
, micro_slt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2570 case TGSI_OPCODE_SGE
:
2571 exec_vector_binary(mach
, inst
, micro_sge
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2574 case TGSI_OPCODE_MAD
:
2575 exec_vector_trinary(mach
, inst
, micro_mad
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2578 case TGSI_OPCODE_SUB
:
2579 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2580 FETCH(&r
[0], 0, chan_index
);
2581 FETCH(&r
[1], 1, chan_index
);
2582 micro_sub(&d
[chan_index
], &r
[0], &r
[1]);
2584 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2585 STORE(&d
[chan_index
], 0, chan_index
);
2589 case TGSI_OPCODE_LRP
:
2590 exec_vector_trinary(mach
, inst
, micro_lrp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2593 case TGSI_OPCODE_CND
:
2594 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2595 FETCH(&r
[0], 0, chan_index
);
2596 FETCH(&r
[1], 1, chan_index
);
2597 FETCH(&r
[2], 2, chan_index
);
2598 micro_lt(&d
[chan_index
], &mach
->Temps
[TEMP_HALF_I
].xyzw
[TEMP_HALF_C
], &r
[2], &r
[0], &r
[1]);
2600 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2601 STORE(&d
[chan_index
], 0, chan_index
);
2605 case TGSI_OPCODE_DP2A
:
2606 exec_dp2a(mach
, inst
);
2609 case TGSI_OPCODE_FRC
:
2610 exec_vector_unary(mach
, inst
, micro_frc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2613 case TGSI_OPCODE_CLAMP
:
2614 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2615 FETCH(&r
[0], 0, chan_index
);
2616 FETCH(&r
[1], 1, chan_index
);
2617 micro_max(&r
[0], &r
[0], &r
[1]);
2618 FETCH(&r
[1], 2, chan_index
);
2619 micro_min(&d
[chan_index
], &r
[0], &r
[1]);
2621 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2622 STORE(&d
[chan_index
], 0, chan_index
);
2626 case TGSI_OPCODE_FLR
:
2627 exec_vector_unary(mach
, inst
, micro_flr
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2630 case TGSI_OPCODE_ROUND
:
2631 exec_vector_unary(mach
, inst
, micro_rnd
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2634 case TGSI_OPCODE_EX2
:
2635 exec_scalar_unary(mach
, inst
, micro_exp2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2638 case TGSI_OPCODE_LG2
:
2639 exec_scalar_unary(mach
, inst
, micro_lg2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2642 case TGSI_OPCODE_POW
:
2643 FETCH(&r
[0], 0, CHAN_X
);
2644 FETCH(&r
[1], 1, CHAN_X
);
2646 micro_pow( &r
[0], &r
[0], &r
[1] );
2648 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
2649 STORE( &r
[0], 0, chan_index
);
2653 case TGSI_OPCODE_XPD
:
2654 FETCH(&r
[0], 0, CHAN_Y
);
2655 FETCH(&r
[1], 1, CHAN_Z
);
2657 micro_mul( &r
[2], &r
[0], &r
[1] );
2659 FETCH(&r
[3], 0, CHAN_Z
);
2660 FETCH(&r
[4], 1, CHAN_Y
);
2662 micro_mul( &r
[5], &r
[3], &r
[4] );
2663 micro_sub(&d
[CHAN_X
], &r
[2], &r
[5]);
2665 FETCH(&r
[2], 1, CHAN_X
);
2667 micro_mul( &r
[3], &r
[3], &r
[2] );
2669 FETCH(&r
[5], 0, CHAN_X
);
2671 micro_mul( &r
[1], &r
[1], &r
[5] );
2672 micro_sub(&d
[CHAN_Y
], &r
[3], &r
[1]);
2674 micro_mul( &r
[5], &r
[5], &r
[4] );
2675 micro_mul( &r
[0], &r
[0], &r
[2] );
2676 micro_sub(&d
[CHAN_Z
], &r
[5], &r
[0]);
2678 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2679 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2681 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2682 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2684 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2685 STORE(&d
[CHAN_Z
], 0, CHAN_Z
);
2687 if (IS_CHANNEL_ENABLED( *inst
, CHAN_W
)) {
2688 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2692 case TGSI_OPCODE_ABS
:
2693 exec_vector_unary(mach
, inst
, micro_abs
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2696 case TGSI_OPCODE_RCC
:
2697 FETCH(&r
[0], 0, CHAN_X
);
2698 micro_div(&r
[0], &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &r
[0]);
2699 micro_float_clamp(&r
[0], &r
[0]);
2700 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2701 STORE(&r
[0], 0, chan_index
);
2705 case TGSI_OPCODE_DPH
:
2706 exec_dph(mach
, inst
);
2709 case TGSI_OPCODE_COS
:
2710 exec_scalar_unary(mach
, inst
, micro_cos
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2713 case TGSI_OPCODE_DDX
:
2714 exec_vector_unary(mach
, inst
, micro_ddx
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2717 case TGSI_OPCODE_DDY
:
2718 exec_vector_unary(mach
, inst
, micro_ddy
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2721 case TGSI_OPCODE_KILP
:
2722 exec_kilp (mach
, inst
);
2725 case TGSI_OPCODE_KIL
:
2726 exec_kil (mach
, inst
);
2729 case TGSI_OPCODE_PK2H
:
2733 case TGSI_OPCODE_PK2US
:
2737 case TGSI_OPCODE_PK4B
:
2741 case TGSI_OPCODE_PK4UB
:
2745 case TGSI_OPCODE_RFL
:
2746 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2747 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2748 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2749 /* r0 = dp3(src0, src0) */
2750 FETCH(&r
[2], 0, CHAN_X
);
2751 micro_mul(&r
[0], &r
[2], &r
[2]);
2752 FETCH(&r
[4], 0, CHAN_Y
);
2753 micro_mul(&r
[8], &r
[4], &r
[4]);
2754 micro_add(&r
[0], &r
[0], &r
[8]);
2755 FETCH(&r
[6], 0, CHAN_Z
);
2756 micro_mul(&r
[8], &r
[6], &r
[6]);
2757 micro_add(&r
[0], &r
[0], &r
[8]);
2759 /* r1 = dp3(src0, src1) */
2760 FETCH(&r
[3], 1, CHAN_X
);
2761 micro_mul(&r
[1], &r
[2], &r
[3]);
2762 FETCH(&r
[5], 1, CHAN_Y
);
2763 micro_mul(&r
[8], &r
[4], &r
[5]);
2764 micro_add(&r
[1], &r
[1], &r
[8]);
2765 FETCH(&r
[7], 1, CHAN_Z
);
2766 micro_mul(&r
[8], &r
[6], &r
[7]);
2767 micro_add(&r
[1], &r
[1], &r
[8]);
2769 /* r1 = 2 * r1 / r0 */
2770 micro_add(&r
[1], &r
[1], &r
[1]);
2771 micro_div(&r
[1], &r
[1], &r
[0]);
2773 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2774 micro_mul(&r
[2], &r
[2], &r
[1]);
2775 micro_sub(&r
[2], &r
[2], &r
[3]);
2776 STORE(&r
[2], 0, CHAN_X
);
2778 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2779 micro_mul(&r
[4], &r
[4], &r
[1]);
2780 micro_sub(&r
[4], &r
[4], &r
[5]);
2781 STORE(&r
[4], 0, CHAN_Y
);
2783 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2784 micro_mul(&r
[6], &r
[6], &r
[1]);
2785 micro_sub(&r
[6], &r
[6], &r
[7]);
2786 STORE(&r
[6], 0, CHAN_Z
);
2789 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2790 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
2794 case TGSI_OPCODE_SEQ
:
2795 exec_vector_binary(mach
, inst
, micro_seq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2798 case TGSI_OPCODE_SFL
:
2799 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2800 STORE(&mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, chan_index
);
2804 case TGSI_OPCODE_SGT
:
2805 exec_vector_binary(mach
, inst
, micro_sgt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2808 case TGSI_OPCODE_SIN
:
2809 exec_scalar_unary(mach
, inst
, micro_sin
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2812 case TGSI_OPCODE_SLE
:
2813 exec_vector_binary(mach
, inst
, micro_sle
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2816 case TGSI_OPCODE_SNE
:
2817 exec_vector_binary(mach
, inst
, micro_sne
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
2820 case TGSI_OPCODE_STR
:
2821 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
2822 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, chan_index
);
2826 case TGSI_OPCODE_TEX
:
2827 /* simple texture lookup */
2828 /* src[0] = texcoord */
2829 /* src[1] = sampler unit */
2830 exec_tex(mach
, inst
, TEX_MODIFIER_NONE
);
2833 case TGSI_OPCODE_TXB
:
2834 /* Texture lookup with lod bias */
2835 /* src[0] = texcoord (src[0].w = LOD bias) */
2836 /* src[1] = sampler unit */
2837 exec_tex(mach
, inst
, TEX_MODIFIER_LOD_BIAS
);
2840 case TGSI_OPCODE_TXD
:
2841 /* Texture lookup with explict partial derivatives */
2842 /* src[0] = texcoord */
2843 /* src[1] = d[strq]/dx */
2844 /* src[2] = d[strq]/dy */
2845 /* src[3] = sampler unit */
2846 exec_txd(mach
, inst
);
2849 case TGSI_OPCODE_TXL
:
2850 /* Texture lookup with explit LOD */
2851 /* src[0] = texcoord (src[0].w = LOD) */
2852 /* src[1] = sampler unit */
2853 exec_tex(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
);
2856 case TGSI_OPCODE_TXP
:
2857 /* Texture lookup with projection */
2858 /* src[0] = texcoord (src[0].w = projection) */
2859 /* src[1] = sampler unit */
2860 exec_tex(mach
, inst
, TEX_MODIFIER_PROJECTED
);
2863 case TGSI_OPCODE_UP2H
:
2867 case TGSI_OPCODE_UP2US
:
2871 case TGSI_OPCODE_UP4B
:
2875 case TGSI_OPCODE_UP4UB
:
2879 case TGSI_OPCODE_X2D
:
2880 FETCH(&r
[0], 1, CHAN_X
);
2881 FETCH(&r
[1], 1, CHAN_Y
);
2882 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
2883 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2884 FETCH(&r
[2], 2, CHAN_X
);
2885 micro_mul(&r
[2], &r
[2], &r
[0]);
2886 FETCH(&r
[3], 2, CHAN_Y
);
2887 micro_mul(&r
[3], &r
[3], &r
[1]);
2888 micro_add(&r
[2], &r
[2], &r
[3]);
2889 FETCH(&r
[3], 0, CHAN_X
);
2890 micro_add(&d
[CHAN_X
], &r
[2], &r
[3]);
2893 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
2894 IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2895 FETCH(&r
[2], 2, CHAN_Z
);
2896 micro_mul(&r
[2], &r
[2], &r
[0]);
2897 FETCH(&r
[3], 2, CHAN_W
);
2898 micro_mul(&r
[3], &r
[3], &r
[1]);
2899 micro_add(&r
[2], &r
[2], &r
[3]);
2900 FETCH(&r
[3], 0, CHAN_Y
);
2901 micro_add(&d
[CHAN_Y
], &r
[2], &r
[3]);
2904 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
2905 STORE(&d
[CHAN_X
], 0, CHAN_X
);
2907 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
2908 STORE(&d
[CHAN_Y
], 0, CHAN_Y
);
2910 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
2911 STORE(&d
[CHAN_X
], 0, CHAN_Z
);
2913 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
2914 STORE(&d
[CHAN_Y
], 0, CHAN_W
);
2918 case TGSI_OPCODE_ARA
:
2922 case TGSI_OPCODE_ARR
:
2923 exec_vector_unary(mach
, inst
, micro_arr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
2926 case TGSI_OPCODE_BRA
:
2930 case TGSI_OPCODE_CAL
:
2931 /* skip the call if no execution channels are enabled */
2932 if (mach
->ExecMask
) {
2935 /* First, record the depths of the execution stacks.
2936 * This is important for deeply nested/looped return statements.
2937 * We have to unwind the stacks by the correct amount. For a
2938 * real code generator, we could determine the number of entries
2939 * to pop off each stack with simple static analysis and avoid
2940 * implementing this data structure at run time.
2942 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
2943 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
2944 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
2945 mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
= mach
->SwitchStackTop
;
2946 mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
= mach
->BreakStackTop
;
2947 /* note that PC was already incremented above */
2948 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
2950 mach
->CallStackTop
++;
2952 /* Second, push the Cond, Loop, Cont, Func stacks */
2953 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
2954 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2955 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
2956 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
2957 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
2958 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
2960 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
2961 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
2962 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
2963 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
2964 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
2965 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
2967 /* Finally, jump to the subroutine */
2968 *pc
= inst
->Label
.Label
;
2972 case TGSI_OPCODE_RET
:
2973 mach
->FuncMask
&= ~mach
->ExecMask
;
2974 UPDATE_EXEC_MASK(mach
);
2976 if (mach
->FuncMask
== 0x0) {
2977 /* really return now (otherwise, keep executing */
2979 if (mach
->CallStackTop
== 0) {
2980 /* returning from main() */
2985 assert(mach
->CallStackTop
> 0);
2986 mach
->CallStackTop
--;
2988 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
2989 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
2991 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
2992 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
2994 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
2995 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
2997 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
2998 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3000 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3001 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3003 assert(mach
->FuncStackTop
> 0);
3004 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3006 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3008 UPDATE_EXEC_MASK(mach
);
3012 case TGSI_OPCODE_SSG
:
3013 exec_vector_unary(mach
, inst
, micro_sgn
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3016 case TGSI_OPCODE_CMP
:
3017 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3018 FETCH(&r
[0], 0, chan_index
);
3019 FETCH(&r
[1], 1, chan_index
);
3020 FETCH(&r
[2], 2, chan_index
);
3021 micro_lt(&d
[chan_index
], &r
[0], &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], &r
[1], &r
[2]);
3023 FOR_EACH_ENABLED_CHANNEL(*inst
, chan_index
) {
3024 STORE(&d
[chan_index
], 0, chan_index
);
3028 case TGSI_OPCODE_SCS
:
3029 if( IS_CHANNEL_ENABLED( *inst
, CHAN_X
) || IS_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
3030 FETCH( &r
[0], 0, CHAN_X
);
3031 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3032 micro_cos(&r
[1], &r
[0]);
3033 STORE(&r
[1], 0, CHAN_X
);
3035 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3036 micro_sin(&r
[1], &r
[0]);
3037 STORE(&r
[1], 0, CHAN_Y
);
3040 if( IS_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
3041 STORE( &mach
->Temps
[TEMP_0_I
].xyzw
[TEMP_0_C
], 0, CHAN_Z
);
3043 if( IS_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
3044 STORE( &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3048 case TGSI_OPCODE_NRM
:
3049 /* 3-component vector normalize */
3050 if(IS_CHANNEL_ENABLED(*inst
, CHAN_X
) ||
3051 IS_CHANNEL_ENABLED(*inst
, CHAN_Y
) ||
3052 IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3053 /* r3 = sqrt(dp3(src0, src0)) */
3054 FETCH(&r
[0], 0, CHAN_X
);
3055 micro_mul(&r
[3], &r
[0], &r
[0]);
3056 FETCH(&r
[1], 0, CHAN_Y
);
3057 micro_mul(&r
[4], &r
[1], &r
[1]);
3058 micro_add(&r
[3], &r
[3], &r
[4]);
3059 FETCH(&r
[2], 0, CHAN_Z
);
3060 micro_mul(&r
[4], &r
[2], &r
[2]);
3061 micro_add(&r
[3], &r
[3], &r
[4]);
3062 micro_sqrt(&r
[3], &r
[3]);
3064 if (IS_CHANNEL_ENABLED(*inst
, CHAN_X
)) {
3065 micro_div(&r
[0], &r
[0], &r
[3]);
3066 STORE(&r
[0], 0, CHAN_X
);
3068 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Y
)) {
3069 micro_div(&r
[1], &r
[1], &r
[3]);
3070 STORE(&r
[1], 0, CHAN_Y
);
3072 if (IS_CHANNEL_ENABLED(*inst
, CHAN_Z
)) {
3073 micro_div(&r
[2], &r
[2], &r
[3]);
3074 STORE(&r
[2], 0, CHAN_Z
);
3077 if (IS_CHANNEL_ENABLED(*inst
, CHAN_W
)) {
3078 STORE(&mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], 0, CHAN_W
);
3082 case TGSI_OPCODE_NRM4
:
3083 /* 4-component vector normalize */
3085 union tgsi_exec_channel tmp
, dot
;
3087 /* tmp = dp4(src0, src0): */
3088 FETCH( &r
[0], 0, CHAN_X
);
3089 micro_mul( &tmp
, &r
[0], &r
[0] );
3091 FETCH( &r
[1], 0, CHAN_Y
);
3092 micro_mul( &dot
, &r
[1], &r
[1] );
3093 micro_add( &tmp
, &tmp
, &dot
);
3095 FETCH( &r
[2], 0, CHAN_Z
);
3096 micro_mul( &dot
, &r
[2], &r
[2] );
3097 micro_add( &tmp
, &tmp
, &dot
);
3099 FETCH( &r
[3], 0, CHAN_W
);
3100 micro_mul( &dot
, &r
[3], &r
[3] );
3101 micro_add( &tmp
, &tmp
, &dot
);
3103 /* tmp = 1 / sqrt(tmp) */
3104 micro_sqrt( &tmp
, &tmp
);
3105 micro_div( &tmp
, &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
], &tmp
);
3107 FOR_EACH_ENABLED_CHANNEL( *inst
, chan_index
) {
3108 /* chan = chan * tmp */
3109 micro_mul( &r
[chan_index
], &tmp
, &r
[chan_index
] );
3110 STORE( &r
[chan_index
], 0, chan_index
);
3115 case TGSI_OPCODE_DIV
:
3119 case TGSI_OPCODE_DP2
:
3120 exec_dp2(mach
, inst
);
3123 case TGSI_OPCODE_IF
:
3125 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
3126 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
3127 FETCH( &r
[0], 0, CHAN_X
);
3128 /* update CondMask */
3130 mach
->CondMask
&= ~0x1;
3133 mach
->CondMask
&= ~0x2;
3136 mach
->CondMask
&= ~0x4;
3139 mach
->CondMask
&= ~0x8;
3141 UPDATE_EXEC_MASK(mach
);
3142 /* Todo: If CondMask==0, jump to ELSE */
3145 case TGSI_OPCODE_ELSE
:
3146 /* invert CondMask wrt previous mask */
3149 assert(mach
->CondStackTop
> 0);
3150 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
3151 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
3152 UPDATE_EXEC_MASK(mach
);
3153 /* Todo: If CondMask==0, jump to ENDIF */
3157 case TGSI_OPCODE_ENDIF
:
3159 assert(mach
->CondStackTop
> 0);
3160 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
3161 UPDATE_EXEC_MASK(mach
);
3164 case TGSI_OPCODE_END
:
3165 /* halt execution */
3169 case TGSI_OPCODE_REP
:
3173 case TGSI_OPCODE_ENDREP
:
3177 case TGSI_OPCODE_PUSHA
:
3181 case TGSI_OPCODE_POPA
:
3185 case TGSI_OPCODE_CEIL
:
3186 exec_vector_unary(mach
, inst
, micro_ceil
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3189 case TGSI_OPCODE_I2F
:
3190 exec_vector_unary(mach
, inst
, micro_i2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_INT
);
3193 case TGSI_OPCODE_NOT
:
3194 exec_vector_unary(mach
, inst
, micro_not
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3197 case TGSI_OPCODE_TRUNC
:
3198 exec_vector_unary(mach
, inst
, micro_trunc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3201 case TGSI_OPCODE_SHL
:
3202 exec_vector_binary(mach
, inst
, micro_shl
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3205 case TGSI_OPCODE_AND
:
3206 exec_vector_binary(mach
, inst
, micro_and
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3209 case TGSI_OPCODE_OR
:
3210 exec_vector_binary(mach
, inst
, micro_or
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3213 case TGSI_OPCODE_MOD
:
3217 case TGSI_OPCODE_XOR
:
3218 exec_vector_binary(mach
, inst
, micro_xor
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3221 case TGSI_OPCODE_SAD
:
3225 case TGSI_OPCODE_TXF
:
3229 case TGSI_OPCODE_TXQ
:
3233 case TGSI_OPCODE_EMIT
:
3237 case TGSI_OPCODE_ENDPRIM
:
3238 emit_primitive(mach
);
3241 case TGSI_OPCODE_BGNFOR
:
3242 assert(mach
->LoopCounterStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3243 for (chan_index
= 0; chan_index
< 3; chan_index
++) {
3244 FETCH( &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[chan_index
], 0, chan_index
);
3246 ++mach
->LoopCounterStackTop
;
3247 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
], 0, CHAN_X
);
3248 /* update LoopMask */
3249 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3250 mach
->LoopMask
&= ~0x1;
3252 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3253 mach
->LoopMask
&= ~0x2;
3255 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3256 mach
->LoopMask
&= ~0x4;
3258 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3259 mach
->LoopMask
&= ~0x8;
3261 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3262 UPDATE_EXEC_MASK(mach
);
3263 /* fall-through (for now) */
3264 case TGSI_OPCODE_BGNLOOP
:
3265 /* push LoopMask and ContMasks */
3266 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3267 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3268 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3269 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3271 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3272 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3273 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3274 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3275 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_LOOP
;
3278 case TGSI_OPCODE_ENDFOR
:
3279 assert(mach
->LoopCounterStackTop
> 0);
3280 micro_sub(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3281 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
],
3282 &mach
->Temps
[TEMP_1_I
].xyzw
[TEMP_1_C
]);
3283 /* update LoopMask */
3284 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[0] <= 0.0f
) {
3285 mach
->LoopMask
&= ~0x1;
3287 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[1] <= 0.0f
) {
3288 mach
->LoopMask
&= ~0x2;
3290 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[2] <= 0.0f
) {
3291 mach
->LoopMask
&= ~0x4;
3293 if (mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Y
].f
[3] <= 0.0f
) {
3294 mach
->LoopMask
&= ~0x8;
3296 micro_add(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3297 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_X
],
3298 &mach
->LoopCounterStack
[mach
->LoopCounterStackTop
- 1].xyzw
[CHAN_Z
]);
3299 assert(mach
->LoopLabelStackTop
> 0);
3300 inst
= mach
->Instructions
+ mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1];
3301 STORE(&mach
->LoopCounterStack
[mach
->LoopCounterStackTop
].xyzw
[CHAN_X
], 0, CHAN_X
);
3302 /* Restore ContMask, but don't pop */
3303 assert(mach
->ContStackTop
> 0);
3304 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3305 UPDATE_EXEC_MASK(mach
);
3306 if (mach
->ExecMask
) {
3307 /* repeat loop: jump to instruction just past BGNLOOP */
3308 assert(mach
->LoopLabelStackTop
> 0);
3309 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3312 /* exit loop: pop LoopMask */
3313 assert(mach
->LoopStackTop
> 0);
3314 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3316 assert(mach
->ContStackTop
> 0);
3317 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3318 assert(mach
->LoopLabelStackTop
> 0);
3319 --mach
->LoopLabelStackTop
;
3320 assert(mach
->LoopCounterStackTop
> 0);
3321 --mach
->LoopCounterStackTop
;
3323 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3325 UPDATE_EXEC_MASK(mach
);
3328 case TGSI_OPCODE_ENDLOOP
:
3329 /* Restore ContMask, but don't pop */
3330 assert(mach
->ContStackTop
> 0);
3331 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3332 UPDATE_EXEC_MASK(mach
);
3333 if (mach
->ExecMask
) {
3334 /* repeat loop: jump to instruction just past BGNLOOP */
3335 assert(mach
->LoopLabelStackTop
> 0);
3336 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3339 /* exit loop: pop LoopMask */
3340 assert(mach
->LoopStackTop
> 0);
3341 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3343 assert(mach
->ContStackTop
> 0);
3344 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3345 assert(mach
->LoopLabelStackTop
> 0);
3346 --mach
->LoopLabelStackTop
;
3348 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3350 UPDATE_EXEC_MASK(mach
);
3353 case TGSI_OPCODE_BRK
:
3357 case TGSI_OPCODE_CONT
:
3358 /* turn off cont channels for each enabled exec channel */
3359 mach
->ContMask
&= ~mach
->ExecMask
;
3360 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3361 UPDATE_EXEC_MASK(mach
);
3364 case TGSI_OPCODE_BGNSUB
:
3368 case TGSI_OPCODE_ENDSUB
:
3370 * XXX: This really should be a no-op. We should never reach this opcode.
3373 assert(mach
->CallStackTop
> 0);
3374 mach
->CallStackTop
--;
3376 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3377 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3379 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3380 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3382 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3383 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3385 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3386 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3388 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3389 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3391 assert(mach
->FuncStackTop
> 0);
3392 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3394 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3396 UPDATE_EXEC_MASK(mach
);
3399 case TGSI_OPCODE_NOP
:
3402 case TGSI_OPCODE_BREAKC
:
3403 FETCH(&r
[0], 0, CHAN_X
);
3404 /* update CondMask */
3405 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
3406 mach
->LoopMask
&= ~0x1;
3408 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
3409 mach
->LoopMask
&= ~0x2;
3411 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
3412 mach
->LoopMask
&= ~0x4;
3414 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
3415 mach
->LoopMask
&= ~0x8;
3417 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3418 UPDATE_EXEC_MASK(mach
);
3421 case TGSI_OPCODE_F2I
:
3422 exec_vector_unary(mach
, inst
, micro_f2i
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
3425 case TGSI_OPCODE_IDIV
:
3426 exec_vector_binary(mach
, inst
, micro_idiv
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3429 case TGSI_OPCODE_IMAX
:
3430 exec_vector_binary(mach
, inst
, micro_imax
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3433 case TGSI_OPCODE_IMIN
:
3434 exec_vector_binary(mach
, inst
, micro_imin
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3437 case TGSI_OPCODE_INEG
:
3438 exec_vector_unary(mach
, inst
, micro_ineg
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3441 case TGSI_OPCODE_ISGE
:
3442 exec_vector_binary(mach
, inst
, micro_isge
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3445 case TGSI_OPCODE_ISHR
:
3446 exec_vector_binary(mach
, inst
, micro_ishr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3449 case TGSI_OPCODE_ISLT
:
3450 exec_vector_binary(mach
, inst
, micro_islt
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3453 case TGSI_OPCODE_F2U
:
3454 exec_vector_unary(mach
, inst
, micro_f2u
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
3457 case TGSI_OPCODE_U2F
:
3458 exec_vector_unary(mach
, inst
, micro_u2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_UINT
);
3461 case TGSI_OPCODE_UADD
:
3462 exec_vector_binary(mach
, inst
, micro_uadd
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3465 case TGSI_OPCODE_UDIV
:
3466 exec_vector_binary(mach
, inst
, micro_udiv
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3469 case TGSI_OPCODE_UMAD
:
3470 exec_vector_trinary(mach
, inst
, micro_umad
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3473 case TGSI_OPCODE_UMAX
:
3474 exec_vector_binary(mach
, inst
, micro_umax
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3477 case TGSI_OPCODE_UMIN
:
3478 exec_vector_binary(mach
, inst
, micro_umin
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3481 case TGSI_OPCODE_UMOD
:
3482 exec_vector_binary(mach
, inst
, micro_umod
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3485 case TGSI_OPCODE_UMUL
:
3486 exec_vector_binary(mach
, inst
, micro_umul
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3489 case TGSI_OPCODE_USEQ
:
3490 exec_vector_binary(mach
, inst
, micro_useq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3493 case TGSI_OPCODE_USGE
:
3494 exec_vector_binary(mach
, inst
, micro_usge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3497 case TGSI_OPCODE_USHR
:
3498 exec_vector_binary(mach
, inst
, micro_ushr
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3501 case TGSI_OPCODE_USLT
:
3502 exec_vector_binary(mach
, inst
, micro_uslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3505 case TGSI_OPCODE_USNE
:
3506 exec_vector_binary(mach
, inst
, micro_usne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3509 case TGSI_OPCODE_SWITCH
:
3510 exec_switch(mach
, inst
);
3513 case TGSI_OPCODE_CASE
:
3514 exec_case(mach
, inst
);
3517 case TGSI_OPCODE_DEFAULT
:
3521 case TGSI_OPCODE_ENDSWITCH
:
3522 exec_endswitch(mach
);
3531 #define DEBUG_EXECUTION 0
3535 * Run TGSI interpreter.
3536 * \return bitmask of "alive" quad components
3539 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
3544 mach
->CondMask
= 0xf;
3545 mach
->LoopMask
= 0xf;
3546 mach
->ContMask
= 0xf;
3547 mach
->FuncMask
= 0xf;
3548 mach
->ExecMask
= 0xf;
3550 mach
->Switch
.mask
= 0xf;
3552 assert(mach
->CondStackTop
== 0);
3553 assert(mach
->LoopStackTop
== 0);
3554 assert(mach
->ContStackTop
== 0);
3555 assert(mach
->SwitchStackTop
== 0);
3556 assert(mach
->BreakStackTop
== 0);
3557 assert(mach
->CallStackTop
== 0);
3559 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
3560 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
3562 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
3563 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
3564 mach
->Primitives
[0] = 0;
3567 for (i
= 0; i
< QUAD_SIZE
; i
++) {
3568 mach
->Temps
[TEMP_CC_I
].xyzw
[TEMP_CC_C
].u
[i
] =
3569 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_X_SHIFT
) |
3570 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Y_SHIFT
) |
3571 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_Z_SHIFT
) |
3572 (TGSI_EXEC_CC_EQ
<< TGSI_EXEC_CC_W_SHIFT
);
3575 /* execute declarations (interpolants) */
3576 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
3577 exec_declaration( mach
, mach
->Declarations
+i
);
3582 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
3583 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
3586 memcpy(temps
, mach
->Temps
, sizeof(temps
));
3587 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
3590 /* execute instructions, until pc is set to -1 */
3596 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
3599 assert(pc
< (int) mach
->NumInstructions
);
3600 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
3603 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
3604 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
3607 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
3608 debug_printf("TEMP[%2u] = ", i
);
3609 for (j
= 0; j
< 4; j
++) {
3613 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3614 temps
[i
].xyzw
[0].f
[j
], temps
[i
].xyzw
[0].u
[j
],
3615 temps
[i
].xyzw
[1].f
[j
], temps
[i
].xyzw
[1].u
[j
],
3616 temps
[i
].xyzw
[2].f
[j
], temps
[i
].xyzw
[2].u
[j
],
3617 temps
[i
].xyzw
[3].f
[j
], temps
[i
].xyzw
[3].u
[j
]);
3621 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
3622 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
3625 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
3626 debug_printf("OUT[%2u] = ", i
);
3627 for (j
= 0; j
< 4; j
++) {
3631 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3632 outputs
[i
].xyzw
[0].f
[j
], outputs
[i
].xyzw
[0].u
[j
],
3633 outputs
[i
].xyzw
[1].f
[j
], outputs
[i
].xyzw
[1].u
[j
],
3634 outputs
[i
].xyzw
[2].f
[j
], outputs
[i
].xyzw
[2].u
[j
],
3635 outputs
[i
].xyzw
[3].f
[j
], outputs
[i
].xyzw
[3].u
[j
]);
3644 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3645 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
3647 * Scale back depth component.
3649 for (i
= 0; i
< 4; i
++)
3650 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
3654 assert(mach
->CondStackTop
== 0);
3655 assert(mach
->LoopStackTop
== 0);
3656 assert(mach
->ContStackTop
== 0);
3657 assert(mach
->SwitchStackTop
== 0);
3658 assert(mach
->BreakStackTop
== 0);
3659 assert(mach
->CallStackTop
== 0);
3661 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];