tgsi: convert CHECK_INF_OR_NAN to inline function
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_cos(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src)
115 {
116 dst->f[0] = cosf(src->f[0]);
117 dst->f[1] = cosf(src->f[1]);
118 dst->f[2] = cosf(src->f[2]);
119 dst->f[3] = cosf(src->f[3]);
120 }
121
122 static void
123 micro_ddx(union tgsi_exec_channel *dst,
124 const union tgsi_exec_channel *src)
125 {
126 dst->f[0] =
127 dst->f[1] =
128 dst->f[2] =
129 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
130 }
131
132 static void
133 micro_ddy(union tgsi_exec_channel *dst,
134 const union tgsi_exec_channel *src)
135 {
136 dst->f[0] =
137 dst->f[1] =
138 dst->f[2] =
139 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
140 }
141
142 static void
143 micro_exp2(union tgsi_exec_channel *dst,
144 const union tgsi_exec_channel *src)
145 {
146 #if FAST_MATH
147 dst->f[0] = util_fast_exp2(src->f[0]);
148 dst->f[1] = util_fast_exp2(src->f[1]);
149 dst->f[2] = util_fast_exp2(src->f[2]);
150 dst->f[3] = util_fast_exp2(src->f[3]);
151 #else
152 #if DEBUG
153 /* Inf is okay for this instruction, so clamp it to silence assertions. */
154 uint i;
155 union tgsi_exec_channel clamped;
156
157 for (i = 0; i < 4; i++) {
158 if (src->f[i] > 127.99999f) {
159 clamped.f[i] = 127.99999f;
160 } else if (src->f[i] < -126.99999f) {
161 clamped.f[i] = -126.99999f;
162 } else {
163 clamped.f[i] = src->f[i];
164 }
165 }
166 src = &clamped;
167 #endif /* DEBUG */
168
169 dst->f[0] = powf(2.0f, src->f[0]);
170 dst->f[1] = powf(2.0f, src->f[1]);
171 dst->f[2] = powf(2.0f, src->f[2]);
172 dst->f[3] = powf(2.0f, src->f[3]);
173 #endif /* FAST_MATH */
174 }
175
176 static void
177 micro_flr(union tgsi_exec_channel *dst,
178 const union tgsi_exec_channel *src)
179 {
180 dst->f[0] = floorf(src->f[0]);
181 dst->f[1] = floorf(src->f[1]);
182 dst->f[2] = floorf(src->f[2]);
183 dst->f[3] = floorf(src->f[3]);
184 }
185
186 static void
187 micro_frc(union tgsi_exec_channel *dst,
188 const union tgsi_exec_channel *src)
189 {
190 dst->f[0] = src->f[0] - floorf(src->f[0]);
191 dst->f[1] = src->f[1] - floorf(src->f[1]);
192 dst->f[2] = src->f[2] - floorf(src->f[2]);
193 dst->f[3] = src->f[3] - floorf(src->f[3]);
194 }
195
196 static void
197 micro_iabs(union tgsi_exec_channel *dst,
198 const union tgsi_exec_channel *src)
199 {
200 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
201 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
202 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
203 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
204 }
205
206 static void
207 micro_ineg(union tgsi_exec_channel *dst,
208 const union tgsi_exec_channel *src)
209 {
210 dst->i[0] = -src->i[0];
211 dst->i[1] = -src->i[1];
212 dst->i[2] = -src->i[2];
213 dst->i[3] = -src->i[3];
214 }
215
216 static void
217 micro_lg2(union tgsi_exec_channel *dst,
218 const union tgsi_exec_channel *src)
219 {
220 #if FAST_MATH
221 dst->f[0] = util_fast_log2(src->f[0]);
222 dst->f[1] = util_fast_log2(src->f[1]);
223 dst->f[2] = util_fast_log2(src->f[2]);
224 dst->f[3] = util_fast_log2(src->f[3]);
225 #else
226 dst->f[0] = logf(src->f[0]) * 1.442695f;
227 dst->f[1] = logf(src->f[1]) * 1.442695f;
228 dst->f[2] = logf(src->f[2]) * 1.442695f;
229 dst->f[3] = logf(src->f[3]) * 1.442695f;
230 #endif
231 }
232
233 static void
234 micro_lrp(union tgsi_exec_channel *dst,
235 const union tgsi_exec_channel *src)
236 {
237 dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
238 dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
239 dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
240 dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
241 }
242
243 static void
244 micro_mad(union tgsi_exec_channel *dst,
245 const union tgsi_exec_channel *src)
246 {
247 dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
248 dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
249 dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
250 dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
251 }
252
253 static void
254 micro_mov(union tgsi_exec_channel *dst,
255 const union tgsi_exec_channel *src)
256 {
257 dst->u[0] = src->u[0];
258 dst->u[1] = src->u[1];
259 dst->u[2] = src->u[2];
260 dst->u[3] = src->u[3];
261 }
262
263 static void
264 micro_rcp(union tgsi_exec_channel *dst,
265 const union tgsi_exec_channel *src)
266 {
267 dst->f[0] = 1.0f / src->f[0];
268 dst->f[1] = 1.0f / src->f[1];
269 dst->f[2] = 1.0f / src->f[2];
270 dst->f[3] = 1.0f / src->f[3];
271 }
272
273 static void
274 micro_rnd(union tgsi_exec_channel *dst,
275 const union tgsi_exec_channel *src)
276 {
277 dst->f[0] = floorf(src->f[0] + 0.5f);
278 dst->f[1] = floorf(src->f[1] + 0.5f);
279 dst->f[2] = floorf(src->f[2] + 0.5f);
280 dst->f[3] = floorf(src->f[3] + 0.5f);
281 }
282
283 static void
284 micro_rsq(union tgsi_exec_channel *dst,
285 const union tgsi_exec_channel *src)
286 {
287 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
288 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
289 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
290 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
291 }
292
293 static void
294 micro_seq(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
298 dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
299 dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
300 dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
301 }
302
303 static void
304 micro_sge(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
308 dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
309 dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
310 dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
311 }
312
313 static void
314 micro_sgn(union tgsi_exec_channel *dst,
315 const union tgsi_exec_channel *src)
316 {
317 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
318 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
319 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
320 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
321 }
322
323 static void
324 micro_sgt(union tgsi_exec_channel *dst,
325 const union tgsi_exec_channel *src)
326 {
327 dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
328 dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
329 dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
330 dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
331 }
332
333 static void
334 micro_sin(union tgsi_exec_channel *dst,
335 const union tgsi_exec_channel *src)
336 {
337 dst->f[0] = sinf(src->f[0]);
338 dst->f[1] = sinf(src->f[1]);
339 dst->f[2] = sinf(src->f[2]);
340 dst->f[3] = sinf(src->f[3]);
341 }
342
343 static void
344 micro_sle(union tgsi_exec_channel *dst,
345 const union tgsi_exec_channel *src)
346 {
347 dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
348 dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
349 dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
350 dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
351 }
352
353 static void
354 micro_slt(union tgsi_exec_channel *dst,
355 const union tgsi_exec_channel *src)
356 {
357 dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
358 dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
359 dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
360 dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
361 }
362
363 static void
364 micro_sne(union tgsi_exec_channel *dst,
365 const union tgsi_exec_channel *src)
366 {
367 dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
368 dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
369 dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
370 dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
371 }
372
373 static void
374 micro_trunc(union tgsi_exec_channel *dst,
375 const union tgsi_exec_channel *src)
376 {
377 dst->f[0] = (float)(int)src->f[0];
378 dst->f[1] = (float)(int)src->f[1];
379 dst->f[2] = (float)(int)src->f[2];
380 dst->f[3] = (float)(int)src->f[3];
381 }
382
383
384 #define CHAN_X 0
385 #define CHAN_Y 1
386 #define CHAN_Z 2
387 #define CHAN_W 3
388
389 enum tgsi_exec_datatype {
390 TGSI_EXEC_DATA_FLOAT,
391 TGSI_EXEC_DATA_INT,
392 TGSI_EXEC_DATA_UINT
393 };
394
395 /*
396 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
397 */
398 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
399 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
400 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
401 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
402 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
403 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
404 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
405 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
406 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
407 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
408 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
409 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
410 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
411 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
412 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
413 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
414 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
415 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
416 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
417 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
418 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
419 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
420 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
421 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
422 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
423 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
424 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
425 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
426 #define TEMP_R0 TGSI_EXEC_TEMP_R0
427 #define TEMP_P0 TGSI_EXEC_TEMP_P0
428
429 #define IS_CHANNEL_ENABLED(INST, CHAN)\
430 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
431
432 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
433 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
434
435 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
436 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
437 if (IS_CHANNEL_ENABLED( INST, CHAN ))
438
439 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
440 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
441 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
442
443
444 /** The execution mask depends on the conditional mask and the loop mask */
445 #define UPDATE_EXEC_MASK(MACH) \
446 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
447
448
449 static const union tgsi_exec_channel ZeroVec =
450 { { 0.0, 0.0, 0.0, 0.0 } };
451
452
453 /**
454 * Assert that none of the float values in 'chan' are infinite or NaN.
455 * NaN and Inf may occur normally during program execution and should
456 * not lead to crashes, etc. But when debugging, it's helpful to catch
457 * them.
458 */
459 static INLINE void
460 check_inf_or_nan(const union tgsi_exec_channel *chan)
461 {
462 assert(!util_is_inf_or_nan((chan)->f[0]));
463 assert(!util_is_inf_or_nan((chan)->f[1]));
464 assert(!util_is_inf_or_nan((chan)->f[2]));
465 assert(!util_is_inf_or_nan((chan)->f[3]));
466 }
467
468
469 #ifdef DEBUG
470 static void
471 print_chan(const char *msg, const union tgsi_exec_channel *chan)
472 {
473 debug_printf("%s = {%f, %f, %f, %f}\n",
474 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
475 }
476 #endif
477
478
479 #ifdef DEBUG
480 static void
481 print_temp(const struct tgsi_exec_machine *mach, uint index)
482 {
483 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
484 int i;
485 debug_printf("Temp[%u] =\n", index);
486 for (i = 0; i < 4; i++) {
487 debug_printf(" %c: { %f, %f, %f, %f }\n",
488 "XYZW"[i],
489 tmp->xyzw[i].f[0],
490 tmp->xyzw[i].f[1],
491 tmp->xyzw[i].f[2],
492 tmp->xyzw[i].f[3]);
493 }
494 }
495 #endif
496
497
498 /**
499 * Check if there's a potential src/dst register data dependency when
500 * using SOA execution.
501 * Example:
502 * MOV T, T.yxwz;
503 * This would expand into:
504 * MOV t0, t1;
505 * MOV t1, t0;
506 * MOV t2, t3;
507 * MOV t3, t2;
508 * The second instruction will have the wrong value for t0 if executed as-is.
509 */
510 boolean
511 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
512 {
513 uint i, chan;
514
515 uint writemask = inst->Dst[0].Register.WriteMask;
516 if (writemask == TGSI_WRITEMASK_X ||
517 writemask == TGSI_WRITEMASK_Y ||
518 writemask == TGSI_WRITEMASK_Z ||
519 writemask == TGSI_WRITEMASK_W ||
520 writemask == TGSI_WRITEMASK_NONE) {
521 /* no chance of data dependency */
522 return FALSE;
523 }
524
525 /* loop over src regs */
526 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
527 if ((inst->Src[i].Register.File ==
528 inst->Dst[0].Register.File) &&
529 (inst->Src[i].Register.Index ==
530 inst->Dst[0].Register.Index)) {
531 /* loop over dest channels */
532 uint channelsWritten = 0x0;
533 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
534 /* check if we're reading a channel that's been written */
535 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
536 if (channelsWritten & (1 << swizzle)) {
537 return TRUE;
538 }
539
540 channelsWritten |= (1 << chan);
541 }
542 }
543 }
544 return FALSE;
545 }
546
547
548 /**
549 * Initialize machine state by expanding tokens to full instructions,
550 * allocating temporary storage, setting up constants, etc.
551 * After this, we can call tgsi_exec_machine_run() many times.
552 */
553 void
554 tgsi_exec_machine_bind_shader(
555 struct tgsi_exec_machine *mach,
556 const struct tgsi_token *tokens,
557 uint numSamplers,
558 struct tgsi_sampler **samplers)
559 {
560 uint k;
561 struct tgsi_parse_context parse;
562 struct tgsi_exec_labels *labels = &mach->Labels;
563 struct tgsi_full_instruction *instructions;
564 struct tgsi_full_declaration *declarations;
565 uint maxInstructions = 10, numInstructions = 0;
566 uint maxDeclarations = 10, numDeclarations = 0;
567 uint instno = 0;
568
569 #if 0
570 tgsi_dump(tokens, 0);
571 #endif
572
573 util_init_math();
574
575 mach->Tokens = tokens;
576 mach->Samplers = samplers;
577
578 k = tgsi_parse_init (&parse, mach->Tokens);
579 if (k != TGSI_PARSE_OK) {
580 debug_printf( "Problem parsing!\n" );
581 return;
582 }
583
584 mach->Processor = parse.FullHeader.Processor.Processor;
585 mach->ImmLimit = 0;
586 labels->count = 0;
587
588 declarations = (struct tgsi_full_declaration *)
589 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
590
591 if (!declarations) {
592 return;
593 }
594
595 instructions = (struct tgsi_full_instruction *)
596 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
597
598 if (!instructions) {
599 FREE( declarations );
600 return;
601 }
602
603 while( !tgsi_parse_end_of_tokens( &parse ) ) {
604 uint pointer = parse.Position;
605 uint i;
606
607 tgsi_parse_token( &parse );
608 switch( parse.FullToken.Token.Type ) {
609 case TGSI_TOKEN_TYPE_DECLARATION:
610 /* save expanded declaration */
611 if (numDeclarations == maxDeclarations) {
612 declarations = REALLOC(declarations,
613 maxDeclarations
614 * sizeof(struct tgsi_full_declaration),
615 (maxDeclarations + 10)
616 * sizeof(struct tgsi_full_declaration));
617 maxDeclarations += 10;
618 }
619 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
620 unsigned reg;
621 for (reg = parse.FullToken.FullDeclaration.Range.First;
622 reg <= parse.FullToken.FullDeclaration.Range.Last;
623 ++reg) {
624 ++mach->NumOutputs;
625 }
626 }
627 memcpy(declarations + numDeclarations,
628 &parse.FullToken.FullDeclaration,
629 sizeof(declarations[0]));
630 numDeclarations++;
631 break;
632
633 case TGSI_TOKEN_TYPE_IMMEDIATE:
634 {
635 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
636 assert( size <= 4 );
637 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
638
639 for( i = 0; i < size; i++ ) {
640 mach->Imms[mach->ImmLimit][i] =
641 parse.FullToken.FullImmediate.u[i].Float;
642 }
643 mach->ImmLimit += 1;
644 }
645 break;
646
647 case TGSI_TOKEN_TYPE_INSTRUCTION:
648 assert( labels->count < MAX_LABELS );
649
650 labels->labels[labels->count][0] = instno;
651 labels->labels[labels->count][1] = pointer;
652 labels->count++;
653
654 /* save expanded instruction */
655 if (numInstructions == maxInstructions) {
656 instructions = REALLOC(instructions,
657 maxInstructions
658 * sizeof(struct tgsi_full_instruction),
659 (maxInstructions + 10)
660 * sizeof(struct tgsi_full_instruction));
661 maxInstructions += 10;
662 }
663
664 memcpy(instructions + numInstructions,
665 &parse.FullToken.FullInstruction,
666 sizeof(instructions[0]));
667
668 numInstructions++;
669 break;
670
671 case TGSI_TOKEN_TYPE_PROPERTY:
672 break;
673
674 default:
675 assert( 0 );
676 }
677 }
678 tgsi_parse_free (&parse);
679
680 if (mach->Declarations) {
681 FREE( mach->Declarations );
682 }
683 mach->Declarations = declarations;
684 mach->NumDeclarations = numDeclarations;
685
686 if (mach->Instructions) {
687 FREE( mach->Instructions );
688 }
689 mach->Instructions = instructions;
690 mach->NumInstructions = numInstructions;
691 }
692
693
694 struct tgsi_exec_machine *
695 tgsi_exec_machine_create( void )
696 {
697 struct tgsi_exec_machine *mach;
698 uint i;
699
700 mach = align_malloc( sizeof *mach, 16 );
701 if (!mach)
702 goto fail;
703
704 memset(mach, 0, sizeof(*mach));
705
706 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
707 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
708 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
709
710 /* Setup constants. */
711 for( i = 0; i < 4; i++ ) {
712 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
713 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
714 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
715 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
716 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
717 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
718 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
719 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
720 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
721 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
722 }
723
724 #ifdef DEBUG
725 /* silence warnings */
726 (void) print_chan;
727 (void) print_temp;
728 #endif
729
730 return mach;
731
732 fail:
733 align_free(mach);
734 return NULL;
735 }
736
737
738 void
739 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
740 {
741 if (mach) {
742 FREE(mach->Instructions);
743 FREE(mach->Declarations);
744 }
745
746 align_free(mach);
747 }
748
749 static void
750 micro_add(
751 union tgsi_exec_channel *dst,
752 const union tgsi_exec_channel *src0,
753 const union tgsi_exec_channel *src1 )
754 {
755 dst->f[0] = src0->f[0] + src1->f[0];
756 dst->f[1] = src0->f[1] + src1->f[1];
757 dst->f[2] = src0->f[2] + src1->f[2];
758 dst->f[3] = src0->f[3] + src1->f[3];
759 }
760
761 static void
762 micro_div(
763 union tgsi_exec_channel *dst,
764 const union tgsi_exec_channel *src0,
765 const union tgsi_exec_channel *src1 )
766 {
767 if (src1->f[0] != 0) {
768 dst->f[0] = src0->f[0] / src1->f[0];
769 }
770 if (src1->f[1] != 0) {
771 dst->f[1] = src0->f[1] / src1->f[1];
772 }
773 if (src1->f[2] != 0) {
774 dst->f[2] = src0->f[2] / src1->f[2];
775 }
776 if (src1->f[3] != 0) {
777 dst->f[3] = src0->f[3] / src1->f[3];
778 }
779 }
780
781 static void
782 micro_float_clamp(union tgsi_exec_channel *dst,
783 const union tgsi_exec_channel *src)
784 {
785 uint i;
786
787 for (i = 0; i < 4; i++) {
788 if (src->f[i] > 0.0f) {
789 if (src->f[i] > 1.884467e+019f)
790 dst->f[i] = 1.884467e+019f;
791 else if (src->f[i] < 5.42101e-020f)
792 dst->f[i] = 5.42101e-020f;
793 else
794 dst->f[i] = src->f[i];
795 }
796 else {
797 if (src->f[i] < -1.884467e+019f)
798 dst->f[i] = -1.884467e+019f;
799 else if (src->f[i] > -5.42101e-020f)
800 dst->f[i] = -5.42101e-020f;
801 else
802 dst->f[i] = src->f[i];
803 }
804 }
805 }
806
807 static void
808 micro_lt(
809 union tgsi_exec_channel *dst,
810 const union tgsi_exec_channel *src0,
811 const union tgsi_exec_channel *src1,
812 const union tgsi_exec_channel *src2,
813 const union tgsi_exec_channel *src3 )
814 {
815 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
816 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
817 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
818 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
819 }
820
821 static void
822 micro_max(
823 union tgsi_exec_channel *dst,
824 const union tgsi_exec_channel *src0,
825 const union tgsi_exec_channel *src1 )
826 {
827 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
828 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
829 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
830 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
831 }
832
833 static void
834 micro_min(
835 union tgsi_exec_channel *dst,
836 const union tgsi_exec_channel *src0,
837 const union tgsi_exec_channel *src1 )
838 {
839 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
840 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
841 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
842 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
843 }
844
845 static void
846 micro_mul(
847 union tgsi_exec_channel *dst,
848 const union tgsi_exec_channel *src0,
849 const union tgsi_exec_channel *src1 )
850 {
851 dst->f[0] = src0->f[0] * src1->f[0];
852 dst->f[1] = src0->f[1] * src1->f[1];
853 dst->f[2] = src0->f[2] * src1->f[2];
854 dst->f[3] = src0->f[3] * src1->f[3];
855 }
856
857 #if 0
858 static void
859 micro_imul64(
860 union tgsi_exec_channel *dst0,
861 union tgsi_exec_channel *dst1,
862 const union tgsi_exec_channel *src0,
863 const union tgsi_exec_channel *src1 )
864 {
865 dst1->i[0] = src0->i[0] * src1->i[0];
866 dst1->i[1] = src0->i[1] * src1->i[1];
867 dst1->i[2] = src0->i[2] * src1->i[2];
868 dst1->i[3] = src0->i[3] * src1->i[3];
869 dst0->i[0] = 0;
870 dst0->i[1] = 0;
871 dst0->i[2] = 0;
872 dst0->i[3] = 0;
873 }
874 #endif
875
876 #if 0
877 static void
878 micro_umul64(
879 union tgsi_exec_channel *dst0,
880 union tgsi_exec_channel *dst1,
881 const union tgsi_exec_channel *src0,
882 const union tgsi_exec_channel *src1 )
883 {
884 dst1->u[0] = src0->u[0] * src1->u[0];
885 dst1->u[1] = src0->u[1] * src1->u[1];
886 dst1->u[2] = src0->u[2] * src1->u[2];
887 dst1->u[3] = src0->u[3] * src1->u[3];
888 dst0->u[0] = 0;
889 dst0->u[1] = 0;
890 dst0->u[2] = 0;
891 dst0->u[3] = 0;
892 }
893 #endif
894
895
896 #if 0
897 static void
898 micro_movc(
899 union tgsi_exec_channel *dst,
900 const union tgsi_exec_channel *src0,
901 const union tgsi_exec_channel *src1,
902 const union tgsi_exec_channel *src2 )
903 {
904 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
905 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
906 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
907 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
908 }
909 #endif
910
911 static void
912 micro_neg(
913 union tgsi_exec_channel *dst,
914 const union tgsi_exec_channel *src )
915 {
916 dst->f[0] = -src->f[0];
917 dst->f[1] = -src->f[1];
918 dst->f[2] = -src->f[2];
919 dst->f[3] = -src->f[3];
920 }
921
922 static void
923 micro_pow(
924 union tgsi_exec_channel *dst,
925 const union tgsi_exec_channel *src0,
926 const union tgsi_exec_channel *src1 )
927 {
928 #if FAST_MATH
929 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
930 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
931 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
932 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
933 #else
934 dst->f[0] = powf( src0->f[0], src1->f[0] );
935 dst->f[1] = powf( src0->f[1], src1->f[1] );
936 dst->f[2] = powf( src0->f[2], src1->f[2] );
937 dst->f[3] = powf( src0->f[3], src1->f[3] );
938 #endif
939 }
940
941 static void
942 micro_sqrt( union tgsi_exec_channel *dst,
943 const union tgsi_exec_channel *src )
944 {
945 dst->f[0] = sqrtf( src->f[0] );
946 dst->f[1] = sqrtf( src->f[1] );
947 dst->f[2] = sqrtf( src->f[2] );
948 dst->f[3] = sqrtf( src->f[3] );
949 }
950
951 static void
952 micro_sub(
953 union tgsi_exec_channel *dst,
954 const union tgsi_exec_channel *src0,
955 const union tgsi_exec_channel *src1 )
956 {
957 dst->f[0] = src0->f[0] - src1->f[0];
958 dst->f[1] = src0->f[1] - src1->f[1];
959 dst->f[2] = src0->f[2] - src1->f[2];
960 dst->f[3] = src0->f[3] - src1->f[3];
961 }
962
963 static void
964 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
965 const uint file,
966 const uint swizzle,
967 const union tgsi_exec_channel *index,
968 const union tgsi_exec_channel *index2D,
969 union tgsi_exec_channel *chan)
970 {
971 uint i;
972
973 switch (file) {
974 case TGSI_FILE_CONSTANT:
975 for (i = 0; i < QUAD_SIZE; i++) {
976 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
977 assert(mach->Consts[index2D->i[i]]);
978
979 if (index->i[i] < 0) {
980 chan->u[i] = 0;
981 } else {
982 const uint *p = (const uint *)mach->Consts[index2D->i[i]];
983
984 chan->u[i] = p[index->i[i] * 4 + swizzle];
985 }
986 }
987 break;
988
989 case TGSI_FILE_INPUT:
990 case TGSI_FILE_SYSTEM_VALUE:
991 for (i = 0; i < QUAD_SIZE; i++) {
992 /* XXX: 2D indexing */
993 chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i];
994 }
995 break;
996
997 case TGSI_FILE_TEMPORARY:
998 for (i = 0; i < QUAD_SIZE; i++) {
999 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1000 assert(index2D->i[i] == 0);
1001
1002 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1003 }
1004 break;
1005
1006 case TGSI_FILE_IMMEDIATE:
1007 for (i = 0; i < QUAD_SIZE; i++) {
1008 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1009 assert(index2D->i[i] == 0);
1010
1011 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1012 }
1013 break;
1014
1015 case TGSI_FILE_ADDRESS:
1016 for (i = 0; i < QUAD_SIZE; i++) {
1017 assert(index->i[i] >= 0);
1018 assert(index2D->i[i] == 0);
1019
1020 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1021 }
1022 break;
1023
1024 case TGSI_FILE_PREDICATE:
1025 for (i = 0; i < QUAD_SIZE; i++) {
1026 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1027 assert(index2D->i[i] == 0);
1028
1029 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1030 }
1031 break;
1032
1033 case TGSI_FILE_OUTPUT:
1034 /* vertex/fragment output vars can be read too */
1035 for (i = 0; i < QUAD_SIZE; i++) {
1036 assert(index->i[i] >= 0);
1037 assert(index2D->i[i] == 0);
1038
1039 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1040 }
1041 break;
1042
1043 default:
1044 assert(0);
1045 for (i = 0; i < QUAD_SIZE; i++) {
1046 chan->u[i] = 0;
1047 }
1048 }
1049 }
1050
1051 static void
1052 fetch_source(const struct tgsi_exec_machine *mach,
1053 union tgsi_exec_channel *chan,
1054 const struct tgsi_full_src_register *reg,
1055 const uint chan_index,
1056 enum tgsi_exec_datatype src_datatype)
1057 {
1058 union tgsi_exec_channel index;
1059 union tgsi_exec_channel index2D;
1060 uint swizzle;
1061
1062 /* We start with a direct index into a register file.
1063 *
1064 * file[1],
1065 * where:
1066 * file = Register.File
1067 * [1] = Register.Index
1068 */
1069 index.i[0] =
1070 index.i[1] =
1071 index.i[2] =
1072 index.i[3] = reg->Register.Index;
1073
1074 /* There is an extra source register that indirectly subscripts
1075 * a register file. The direct index now becomes an offset
1076 * that is being added to the indirect register.
1077 *
1078 * file[ind[2].x+1],
1079 * where:
1080 * ind = Indirect.File
1081 * [2] = Indirect.Index
1082 * .x = Indirect.SwizzleX
1083 */
1084 if (reg->Register.Indirect) {
1085 union tgsi_exec_channel index2;
1086 union tgsi_exec_channel indir_index;
1087 const uint execmask = mach->ExecMask;
1088 uint i;
1089
1090 /* which address register (always zero now) */
1091 index2.i[0] =
1092 index2.i[1] =
1093 index2.i[2] =
1094 index2.i[3] = reg->Indirect.Index;
1095
1096 /* get current value of address register[swizzle] */
1097 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1098 fetch_src_file_channel(mach,
1099 reg->Indirect.File,
1100 swizzle,
1101 &index2,
1102 &ZeroVec,
1103 &indir_index);
1104
1105 /* add value of address register to the offset */
1106 index.i[0] += indir_index.i[0];
1107 index.i[1] += indir_index.i[1];
1108 index.i[2] += indir_index.i[2];
1109 index.i[3] += indir_index.i[3];
1110
1111 /* for disabled execution channels, zero-out the index to
1112 * avoid using a potential garbage value.
1113 */
1114 for (i = 0; i < QUAD_SIZE; i++) {
1115 if ((execmask & (1 << i)) == 0)
1116 index.i[i] = 0;
1117 }
1118 }
1119
1120 /* There is an extra source register that is a second
1121 * subscript to a register file. Effectively it means that
1122 * the register file is actually a 2D array of registers.
1123 *
1124 * file[3][1],
1125 * where:
1126 * [3] = Dimension.Index
1127 */
1128 if (reg->Register.Dimension) {
1129 index2D.i[0] =
1130 index2D.i[1] =
1131 index2D.i[2] =
1132 index2D.i[3] = reg->Dimension.Index;
1133
1134 /* Again, the second subscript index can be addressed indirectly
1135 * identically to the first one.
1136 * Nothing stops us from indirectly addressing the indirect register,
1137 * but there is no need for that, so we won't exercise it.
1138 *
1139 * file[ind[4].y+3][1],
1140 * where:
1141 * ind = DimIndirect.File
1142 * [4] = DimIndirect.Index
1143 * .y = DimIndirect.SwizzleX
1144 */
1145 if (reg->Dimension.Indirect) {
1146 union tgsi_exec_channel index2;
1147 union tgsi_exec_channel indir_index;
1148 const uint execmask = mach->ExecMask;
1149 uint i;
1150
1151 index2.i[0] =
1152 index2.i[1] =
1153 index2.i[2] =
1154 index2.i[3] = reg->DimIndirect.Index;
1155
1156 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1157 fetch_src_file_channel(mach,
1158 reg->DimIndirect.File,
1159 swizzle,
1160 &index2,
1161 &ZeroVec,
1162 &indir_index);
1163
1164 index2D.i[0] += indir_index.i[0];
1165 index2D.i[1] += indir_index.i[1];
1166 index2D.i[2] += indir_index.i[2];
1167 index2D.i[3] += indir_index.i[3];
1168
1169 /* for disabled execution channels, zero-out the index to
1170 * avoid using a potential garbage value.
1171 */
1172 for (i = 0; i < QUAD_SIZE; i++) {
1173 if ((execmask & (1 << i)) == 0) {
1174 index2D.i[i] = 0;
1175 }
1176 }
1177 }
1178
1179 /* If by any chance there was a need for a 3D array of register
1180 * files, we would have to check whether Dimension is followed
1181 * by a dimension register and continue the saga.
1182 */
1183 } else {
1184 index2D.i[0] =
1185 index2D.i[1] =
1186 index2D.i[2] =
1187 index2D.i[3] = 0;
1188 }
1189
1190 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1191 fetch_src_file_channel(mach,
1192 reg->Register.File,
1193 swizzle,
1194 &index,
1195 &index2D,
1196 chan);
1197
1198 if (reg->Register.Absolute) {
1199 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1200 micro_abs(chan, chan);
1201 } else {
1202 micro_iabs(chan, chan);
1203 }
1204 }
1205
1206 if (reg->Register.Negate) {
1207 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1208 micro_neg(chan, chan);
1209 } else {
1210 micro_ineg(chan, chan);
1211 }
1212 }
1213 }
1214
1215 static void
1216 store_dest(struct tgsi_exec_machine *mach,
1217 const union tgsi_exec_channel *chan,
1218 const struct tgsi_full_dst_register *reg,
1219 const struct tgsi_full_instruction *inst,
1220 uint chan_index,
1221 enum tgsi_exec_datatype dst_datatype)
1222 {
1223 uint i;
1224 union tgsi_exec_channel null;
1225 union tgsi_exec_channel *dst;
1226 uint execmask = mach->ExecMask;
1227 int offset = 0; /* indirection offset */
1228 int index;
1229
1230 /* for debugging */
1231 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1232 check_inf_or_nan(chan);
1233 }
1234
1235 /* There is an extra source register that indirectly subscripts
1236 * a register file. The direct index now becomes an offset
1237 * that is being added to the indirect register.
1238 *
1239 * file[ind[2].x+1],
1240 * where:
1241 * ind = Indirect.File
1242 * [2] = Indirect.Index
1243 * .x = Indirect.SwizzleX
1244 */
1245 if (reg->Register.Indirect) {
1246 union tgsi_exec_channel index;
1247 union tgsi_exec_channel indir_index;
1248 uint swizzle;
1249
1250 /* which address register (always zero for now) */
1251 index.i[0] =
1252 index.i[1] =
1253 index.i[2] =
1254 index.i[3] = reg->Indirect.Index;
1255
1256 /* get current value of address register[swizzle] */
1257 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1258
1259 /* fetch values from the address/indirection register */
1260 fetch_src_file_channel(mach,
1261 reg->Indirect.File,
1262 swizzle,
1263 &index,
1264 &ZeroVec,
1265 &indir_index);
1266
1267 /* save indirection offset */
1268 offset = indir_index.i[0];
1269 }
1270
1271 switch (reg->Register.File) {
1272 case TGSI_FILE_NULL:
1273 dst = &null;
1274 break;
1275
1276 case TGSI_FILE_OUTPUT:
1277 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1278 + reg->Register.Index;
1279 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1280 #if 0
1281 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1282 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1283 for (i = 0; i < QUAD_SIZE; i++)
1284 if (execmask & (1 << i))
1285 fprintf(stderr, "%f, ", chan->f[i]);
1286 fprintf(stderr, ")\n");
1287 }
1288 #endif
1289 break;
1290
1291 case TGSI_FILE_TEMPORARY:
1292 index = reg->Register.Index;
1293 assert( index < TGSI_EXEC_NUM_TEMPS );
1294 dst = &mach->Temps[offset + index].xyzw[chan_index];
1295 break;
1296
1297 case TGSI_FILE_ADDRESS:
1298 index = reg->Register.Index;
1299 dst = &mach->Addrs[index].xyzw[chan_index];
1300 break;
1301
1302 case TGSI_FILE_LOOP:
1303 assert(reg->Register.Index == 0);
1304 assert(mach->LoopCounterStackTop > 0);
1305 assert(chan_index == CHAN_X);
1306 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1307 break;
1308
1309 case TGSI_FILE_PREDICATE:
1310 index = reg->Register.Index;
1311 assert(index < TGSI_EXEC_NUM_PREDS);
1312 dst = &mach->Predicates[index].xyzw[chan_index];
1313 break;
1314
1315 default:
1316 assert( 0 );
1317 return;
1318 }
1319
1320 if (inst->Instruction.Predicate) {
1321 uint swizzle;
1322 union tgsi_exec_channel *pred;
1323
1324 switch (chan_index) {
1325 case CHAN_X:
1326 swizzle = inst->Predicate.SwizzleX;
1327 break;
1328 case CHAN_Y:
1329 swizzle = inst->Predicate.SwizzleY;
1330 break;
1331 case CHAN_Z:
1332 swizzle = inst->Predicate.SwizzleZ;
1333 break;
1334 case CHAN_W:
1335 swizzle = inst->Predicate.SwizzleW;
1336 break;
1337 default:
1338 assert(0);
1339 return;
1340 }
1341
1342 assert(inst->Predicate.Index == 0);
1343
1344 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1345
1346 if (inst->Predicate.Negate) {
1347 for (i = 0; i < QUAD_SIZE; i++) {
1348 if (pred->u[i]) {
1349 execmask &= ~(1 << i);
1350 }
1351 }
1352 } else {
1353 for (i = 0; i < QUAD_SIZE; i++) {
1354 if (!pred->u[i]) {
1355 execmask &= ~(1 << i);
1356 }
1357 }
1358 }
1359 }
1360
1361 switch (inst->Instruction.Saturate) {
1362 case TGSI_SAT_NONE:
1363 for (i = 0; i < QUAD_SIZE; i++)
1364 if (execmask & (1 << i))
1365 dst->i[i] = chan->i[i];
1366 break;
1367
1368 case TGSI_SAT_ZERO_ONE:
1369 for (i = 0; i < QUAD_SIZE; i++)
1370 if (execmask & (1 << i)) {
1371 if (chan->f[i] < 0.0f)
1372 dst->f[i] = 0.0f;
1373 else if (chan->f[i] > 1.0f)
1374 dst->f[i] = 1.0f;
1375 else
1376 dst->i[i] = chan->i[i];
1377 }
1378 break;
1379
1380 case TGSI_SAT_MINUS_PLUS_ONE:
1381 for (i = 0; i < QUAD_SIZE; i++)
1382 if (execmask & (1 << i)) {
1383 if (chan->f[i] < -1.0f)
1384 dst->f[i] = -1.0f;
1385 else if (chan->f[i] > 1.0f)
1386 dst->f[i] = 1.0f;
1387 else
1388 dst->i[i] = chan->i[i];
1389 }
1390 break;
1391
1392 default:
1393 assert( 0 );
1394 }
1395 }
1396
1397 #define FETCH(VAL,INDEX,CHAN)\
1398 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1399
1400 #define STORE(VAL,INDEX,CHAN)\
1401 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1402
1403
1404 /**
1405 * Execute ARB-style KIL which is predicated by a src register.
1406 * Kill fragment if any of the four values is less than zero.
1407 */
1408 static void
1409 exec_kil(struct tgsi_exec_machine *mach,
1410 const struct tgsi_full_instruction *inst)
1411 {
1412 uint uniquemask;
1413 uint chan_index;
1414 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1415 union tgsi_exec_channel r[1];
1416
1417 /* This mask stores component bits that were already tested. */
1418 uniquemask = 0;
1419
1420 for (chan_index = 0; chan_index < 4; chan_index++)
1421 {
1422 uint swizzle;
1423 uint i;
1424
1425 /* unswizzle channel */
1426 swizzle = tgsi_util_get_full_src_register_swizzle (
1427 &inst->Src[0],
1428 chan_index);
1429
1430 /* check if the component has not been already tested */
1431 if (uniquemask & (1 << swizzle))
1432 continue;
1433 uniquemask |= 1 << swizzle;
1434
1435 FETCH(&r[0], 0, chan_index);
1436 for (i = 0; i < 4; i++)
1437 if (r[0].f[i] < 0.0f)
1438 kilmask |= 1 << i;
1439 }
1440
1441 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1442 }
1443
1444 /**
1445 * Execute NVIDIA-style KIL which is predicated by a condition code.
1446 * Kill fragment if the condition code is TRUE.
1447 */
1448 static void
1449 exec_kilp(struct tgsi_exec_machine *mach,
1450 const struct tgsi_full_instruction *inst)
1451 {
1452 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1453
1454 /* "unconditional" kil */
1455 kilmask = mach->ExecMask;
1456 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1457 }
1458
1459 static void
1460 emit_vertex(struct tgsi_exec_machine *mach)
1461 {
1462 /* FIXME: check for exec mask correctly
1463 unsigned i;
1464 for (i = 0; i < QUAD_SIZE; ++i) {
1465 if ((mach->ExecMask & (1 << i)))
1466 */
1467 if (mach->ExecMask) {
1468 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1469 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1470 }
1471 }
1472
1473 static void
1474 emit_primitive(struct tgsi_exec_machine *mach)
1475 {
1476 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1477 /* FIXME: check for exec mask correctly
1478 unsigned i;
1479 for (i = 0; i < QUAD_SIZE; ++i) {
1480 if ((mach->ExecMask & (1 << i)))
1481 */
1482 if (mach->ExecMask) {
1483 ++(*prim_count);
1484 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1485 mach->Primitives[*prim_count] = 0;
1486 }
1487 }
1488
1489 /*
1490 * Fetch four texture samples using STR texture coordinates.
1491 */
1492 static void
1493 fetch_texel( struct tgsi_sampler *sampler,
1494 const union tgsi_exec_channel *s,
1495 const union tgsi_exec_channel *t,
1496 const union tgsi_exec_channel *p,
1497 const union tgsi_exec_channel *c0,
1498 enum tgsi_sampler_control control,
1499 union tgsi_exec_channel *r,
1500 union tgsi_exec_channel *g,
1501 union tgsi_exec_channel *b,
1502 union tgsi_exec_channel *a )
1503 {
1504 uint j;
1505 float rgba[NUM_CHANNELS][QUAD_SIZE];
1506
1507 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1508
1509 for (j = 0; j < 4; j++) {
1510 r->f[j] = rgba[0][j];
1511 g->f[j] = rgba[1][j];
1512 b->f[j] = rgba[2][j];
1513 a->f[j] = rgba[3][j];
1514 }
1515 }
1516
1517
1518 #define TEX_MODIFIER_NONE 0
1519 #define TEX_MODIFIER_PROJECTED 1
1520 #define TEX_MODIFIER_LOD_BIAS 2
1521 #define TEX_MODIFIER_EXPLICIT_LOD 3
1522
1523
1524 static void
1525 exec_tex(struct tgsi_exec_machine *mach,
1526 const struct tgsi_full_instruction *inst,
1527 uint modifier)
1528 {
1529 const uint unit = inst->Src[1].Register.Index;
1530 union tgsi_exec_channel r[4];
1531 const union tgsi_exec_channel *lod = &ZeroVec;
1532 enum tgsi_sampler_control control;
1533 uint chan_index;
1534
1535 if (modifier != TEX_MODIFIER_NONE) {
1536 FETCH(&r[3], 0, CHAN_W);
1537 if (modifier != TEX_MODIFIER_PROJECTED) {
1538 lod = &r[3];
1539 }
1540 }
1541
1542 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1543 control = tgsi_sampler_lod_explicit;
1544 } else {
1545 control = tgsi_sampler_lod_bias;
1546 }
1547
1548 switch (inst->Texture.Texture) {
1549 case TGSI_TEXTURE_1D:
1550 case TGSI_TEXTURE_SHADOW1D:
1551 FETCH(&r[0], 0, CHAN_X);
1552
1553 if (modifier == TEX_MODIFIER_PROJECTED) {
1554 micro_div(&r[0], &r[0], &r[3]);
1555 }
1556
1557 fetch_texel(mach->Samplers[unit],
1558 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1559 control,
1560 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1561 break;
1562
1563 case TGSI_TEXTURE_2D:
1564 case TGSI_TEXTURE_RECT:
1565 case TGSI_TEXTURE_SHADOW2D:
1566 case TGSI_TEXTURE_SHADOWRECT:
1567 FETCH(&r[0], 0, CHAN_X);
1568 FETCH(&r[1], 0, CHAN_Y);
1569 FETCH(&r[2], 0, CHAN_Z);
1570
1571 if (modifier == TEX_MODIFIER_PROJECTED) {
1572 micro_div(&r[0], &r[0], &r[3]);
1573 micro_div(&r[1], &r[1], &r[3]);
1574 micro_div(&r[2], &r[2], &r[3]);
1575 }
1576
1577 fetch_texel(mach->Samplers[unit],
1578 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1579 control,
1580 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1581 break;
1582
1583 case TGSI_TEXTURE_3D:
1584 case TGSI_TEXTURE_CUBE:
1585 FETCH(&r[0], 0, CHAN_X);
1586 FETCH(&r[1], 0, CHAN_Y);
1587 FETCH(&r[2], 0, CHAN_Z);
1588
1589 if (modifier == TEX_MODIFIER_PROJECTED) {
1590 micro_div(&r[0], &r[0], &r[3]);
1591 micro_div(&r[1], &r[1], &r[3]);
1592 micro_div(&r[2], &r[2], &r[3]);
1593 }
1594
1595 fetch_texel(mach->Samplers[unit],
1596 &r[0], &r[1], &r[2], lod,
1597 control,
1598 &r[0], &r[1], &r[2], &r[3]);
1599 break;
1600
1601 default:
1602 assert(0);
1603 }
1604
1605 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1606 STORE(&r[chan_index], 0, chan_index);
1607 }
1608 }
1609
1610 static void
1611 exec_txd(struct tgsi_exec_machine *mach,
1612 const struct tgsi_full_instruction *inst)
1613 {
1614 const uint unit = inst->Src[3].Register.Index;
1615 union tgsi_exec_channel r[4];
1616 uint chan_index;
1617
1618 /*
1619 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1620 */
1621
1622 switch (inst->Texture.Texture) {
1623 case TGSI_TEXTURE_1D:
1624 case TGSI_TEXTURE_SHADOW1D:
1625
1626 FETCH(&r[0], 0, CHAN_X);
1627
1628 fetch_texel(mach->Samplers[unit],
1629 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1630 tgsi_sampler_lod_bias,
1631 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1632 break;
1633
1634 case TGSI_TEXTURE_2D:
1635 case TGSI_TEXTURE_RECT:
1636 case TGSI_TEXTURE_SHADOW2D:
1637 case TGSI_TEXTURE_SHADOWRECT:
1638
1639 FETCH(&r[0], 0, CHAN_X);
1640 FETCH(&r[1], 0, CHAN_Y);
1641 FETCH(&r[2], 0, CHAN_Z);
1642
1643 fetch_texel(mach->Samplers[unit],
1644 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1645 tgsi_sampler_lod_bias,
1646 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1647 break;
1648
1649 case TGSI_TEXTURE_3D:
1650 case TGSI_TEXTURE_CUBE:
1651
1652 FETCH(&r[0], 0, CHAN_X);
1653 FETCH(&r[1], 0, CHAN_Y);
1654 FETCH(&r[2], 0, CHAN_Z);
1655
1656 fetch_texel(mach->Samplers[unit],
1657 &r[0], &r[1], &r[2], &ZeroVec,
1658 tgsi_sampler_lod_bias,
1659 &r[0], &r[1], &r[2], &r[3]);
1660 break;
1661
1662 default:
1663 assert(0);
1664 }
1665
1666 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1667 STORE(&r[chan_index], 0, chan_index);
1668 }
1669 }
1670
1671
1672 /**
1673 * Evaluate a constant-valued coefficient at the position of the
1674 * current quad.
1675 */
1676 static void
1677 eval_constant_coef(
1678 struct tgsi_exec_machine *mach,
1679 unsigned attrib,
1680 unsigned chan )
1681 {
1682 unsigned i;
1683
1684 for( i = 0; i < QUAD_SIZE; i++ ) {
1685 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1686 }
1687 }
1688
1689 /**
1690 * Evaluate a linear-valued coefficient at the position of the
1691 * current quad.
1692 */
1693 static void
1694 eval_linear_coef(
1695 struct tgsi_exec_machine *mach,
1696 unsigned attrib,
1697 unsigned chan )
1698 {
1699 const float x = mach->QuadPos.xyzw[0].f[0];
1700 const float y = mach->QuadPos.xyzw[1].f[0];
1701 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1702 const float dady = mach->InterpCoefs[attrib].dady[chan];
1703 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1704 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1705 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1706 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1707 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1708 }
1709
1710 /**
1711 * Evaluate a perspective-valued coefficient at the position of the
1712 * current quad.
1713 */
1714 static void
1715 eval_perspective_coef(
1716 struct tgsi_exec_machine *mach,
1717 unsigned attrib,
1718 unsigned chan )
1719 {
1720 const float x = mach->QuadPos.xyzw[0].f[0];
1721 const float y = mach->QuadPos.xyzw[1].f[0];
1722 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1723 const float dady = mach->InterpCoefs[attrib].dady[chan];
1724 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1725 const float *w = mach->QuadPos.xyzw[3].f;
1726 /* divide by W here */
1727 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1728 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1729 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1730 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1731 }
1732
1733
1734 typedef void (* eval_coef_func)(
1735 struct tgsi_exec_machine *mach,
1736 unsigned attrib,
1737 unsigned chan );
1738
1739 static void
1740 exec_declaration(struct tgsi_exec_machine *mach,
1741 const struct tgsi_full_declaration *decl)
1742 {
1743 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1744 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1745 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1746 uint first, last, mask;
1747
1748 first = decl->Range.First;
1749 last = decl->Range.Last;
1750 mask = decl->Declaration.UsageMask;
1751
1752 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1753 assert(decl->Semantic.Index == 0);
1754 assert(first == last);
1755 assert(mask == TGSI_WRITEMASK_XYZW);
1756
1757 mach->Inputs[first] = mach->QuadPos;
1758 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1759 uint i;
1760
1761 assert(decl->Semantic.Index == 0);
1762 assert(first == last);
1763
1764 for (i = 0; i < QUAD_SIZE; i++) {
1765 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1766 }
1767 } else {
1768 eval_coef_func eval;
1769 uint i, j;
1770
1771 switch (decl->Declaration.Interpolate) {
1772 case TGSI_INTERPOLATE_CONSTANT:
1773 eval = eval_constant_coef;
1774 break;
1775
1776 case TGSI_INTERPOLATE_LINEAR:
1777 eval = eval_linear_coef;
1778 break;
1779
1780 case TGSI_INTERPOLATE_PERSPECTIVE:
1781 eval = eval_perspective_coef;
1782 break;
1783
1784 default:
1785 assert(0);
1786 return;
1787 }
1788
1789 for (j = 0; j < NUM_CHANNELS; j++) {
1790 if (mask & (1 << j)) {
1791 for (i = first; i <= last; i++) {
1792 eval(mach, i, j);
1793 }
1794 }
1795 }
1796 }
1797 }
1798 }
1799 }
1800
1801 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1802 const union tgsi_exec_channel *src);
1803
1804 static void
1805 exec_scalar_unary(struct tgsi_exec_machine *mach,
1806 const struct tgsi_full_instruction *inst,
1807 micro_op op,
1808 enum tgsi_exec_datatype dst_datatype,
1809 enum tgsi_exec_datatype src_datatype)
1810 {
1811 unsigned int chan;
1812 union tgsi_exec_channel src;
1813 union tgsi_exec_channel dst;
1814
1815 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
1816 op(&dst, &src);
1817 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1818 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1819 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
1820 }
1821 }
1822 }
1823
1824 static void
1825 exec_vector_unary(struct tgsi_exec_machine *mach,
1826 const struct tgsi_full_instruction *inst,
1827 micro_op op,
1828 enum tgsi_exec_datatype dst_datatype,
1829 enum tgsi_exec_datatype src_datatype)
1830 {
1831 unsigned int chan;
1832 struct tgsi_exec_vector dst;
1833
1834 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1835 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1836 union tgsi_exec_channel src;
1837
1838 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1839 op(&dst.xyzw[chan], &src);
1840 }
1841 }
1842 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1843 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1844 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1845 }
1846 }
1847 }
1848
1849 static void
1850 exec_vector_binary(struct tgsi_exec_machine *mach,
1851 const struct tgsi_full_instruction *inst,
1852 micro_op op,
1853 enum tgsi_exec_datatype dst_datatype,
1854 enum tgsi_exec_datatype src_datatype)
1855 {
1856 unsigned int chan;
1857 struct tgsi_exec_vector dst;
1858
1859 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1860 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1861 union tgsi_exec_channel src[2];
1862
1863 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1864 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1865 op(&dst.xyzw[chan], src);
1866 }
1867 }
1868 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1869 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1870 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1871 }
1872 }
1873 }
1874
1875 static void
1876 exec_vector_trinary(struct tgsi_exec_machine *mach,
1877 const struct tgsi_full_instruction *inst,
1878 micro_op op,
1879 enum tgsi_exec_datatype dst_datatype,
1880 enum tgsi_exec_datatype src_datatype)
1881 {
1882 unsigned int chan;
1883 struct tgsi_exec_vector dst;
1884
1885 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1886 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1887 union tgsi_exec_channel src[3];
1888
1889 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1890 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1891 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1892 op(&dst.xyzw[chan], src);
1893 }
1894 }
1895 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1896 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1897 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1898 }
1899 }
1900 }
1901
1902 static void
1903 exec_dp3(struct tgsi_exec_machine *mach,
1904 const struct tgsi_full_instruction *inst)
1905 {
1906 unsigned int chan;
1907 union tgsi_exec_channel arg[3];
1908
1909 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1910 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1911 micro_mul(&arg[2], &arg[0], &arg[1]);
1912
1913 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
1914 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1915 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1916 micro_mad(&arg[2], arg);
1917 }
1918
1919 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1920 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1921 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1922 }
1923 }
1924 }
1925
1926 static void
1927 exec_dp4(struct tgsi_exec_machine *mach,
1928 const struct tgsi_full_instruction *inst)
1929 {
1930 unsigned int chan;
1931 union tgsi_exec_channel arg[3];
1932
1933 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1934 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1935 micro_mul(&arg[2], &arg[0], &arg[1]);
1936
1937 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
1938 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1939 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1940 micro_mad(&arg[2], arg);
1941 }
1942
1943 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1944 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1945 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1946 }
1947 }
1948 }
1949
1950 static void
1951 exec_dp2a(struct tgsi_exec_machine *mach,
1952 const struct tgsi_full_instruction *inst)
1953 {
1954 unsigned int chan;
1955 union tgsi_exec_channel arg[3];
1956
1957 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1958 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1959 micro_mul(&arg[2], &arg[0], &arg[1]);
1960
1961 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1962 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1963 micro_mad(&arg[0], arg);
1964
1965 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1966 micro_add(&arg[0], &arg[0], &arg[1]);
1967
1968 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1969 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1970 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1971 }
1972 }
1973 }
1974
1975 static void
1976 exec_dph(struct tgsi_exec_machine *mach,
1977 const struct tgsi_full_instruction *inst)
1978 {
1979 unsigned int chan;
1980 union tgsi_exec_channel arg[3];
1981
1982 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1983 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1984 micro_mul(&arg[2], &arg[0], &arg[1]);
1985
1986 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1987 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1988 micro_mad(&arg[2], arg);
1989
1990 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
1991 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
1992 micro_mad(&arg[0], arg);
1993
1994 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
1995 micro_add(&arg[0], &arg[0], &arg[1]);
1996
1997 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1998 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1999 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2000 }
2001 }
2002 }
2003
2004 static void
2005 exec_dp2(struct tgsi_exec_machine *mach,
2006 const struct tgsi_full_instruction *inst)
2007 {
2008 unsigned int chan;
2009 union tgsi_exec_channel arg[3];
2010
2011 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2012 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2013 micro_mul(&arg[2], &arg[0], &arg[1]);
2014
2015 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2016 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2017 micro_mad(&arg[2], arg);
2018
2019 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2020 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2021 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2022 }
2023 }
2024 }
2025
2026 static void
2027 exec_break(struct tgsi_exec_machine *mach)
2028 {
2029 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
2030 /* turn off loop channels for each enabled exec channel */
2031 mach->LoopMask &= ~mach->ExecMask;
2032 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2033 UPDATE_EXEC_MASK(mach);
2034 } else {
2035 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
2036
2037 mach->Switch.mask = 0x0;
2038
2039 UPDATE_EXEC_MASK(mach);
2040 }
2041 }
2042
2043 static void
2044 exec_switch(struct tgsi_exec_machine *mach,
2045 const struct tgsi_full_instruction *inst)
2046 {
2047 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2048 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2049
2050 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2051 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2052 mach->Switch.mask = 0x0;
2053 mach->Switch.defaultMask = 0x0;
2054
2055 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2056 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
2057
2058 UPDATE_EXEC_MASK(mach);
2059 }
2060
2061 static void
2062 exec_case(struct tgsi_exec_machine *mach,
2063 const struct tgsi_full_instruction *inst)
2064 {
2065 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2066 union tgsi_exec_channel src;
2067 uint mask = 0;
2068
2069 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2070
2071 if (mach->Switch.selector.u[0] == src.u[0]) {
2072 mask |= 0x1;
2073 }
2074 if (mach->Switch.selector.u[1] == src.u[1]) {
2075 mask |= 0x2;
2076 }
2077 if (mach->Switch.selector.u[2] == src.u[2]) {
2078 mask |= 0x4;
2079 }
2080 if (mach->Switch.selector.u[3] == src.u[3]) {
2081 mask |= 0x8;
2082 }
2083
2084 mach->Switch.defaultMask |= mask;
2085
2086 mach->Switch.mask |= mask & prevMask;
2087
2088 UPDATE_EXEC_MASK(mach);
2089 }
2090
2091 static void
2092 exec_default(struct tgsi_exec_machine *mach)
2093 {
2094 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2095
2096 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
2097
2098 UPDATE_EXEC_MASK(mach);
2099 }
2100
2101 static void
2102 exec_endswitch(struct tgsi_exec_machine *mach)
2103 {
2104 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
2105 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
2106
2107 UPDATE_EXEC_MASK(mach);
2108 }
2109
2110 static void
2111 micro_i2f(union tgsi_exec_channel *dst,
2112 const union tgsi_exec_channel *src)
2113 {
2114 dst->f[0] = (float)src->i[0];
2115 dst->f[1] = (float)src->i[1];
2116 dst->f[2] = (float)src->i[2];
2117 dst->f[3] = (float)src->i[3];
2118 }
2119
2120 static void
2121 micro_not(union tgsi_exec_channel *dst,
2122 const union tgsi_exec_channel *src)
2123 {
2124 dst->u[0] = ~src->u[0];
2125 dst->u[1] = ~src->u[1];
2126 dst->u[2] = ~src->u[2];
2127 dst->u[3] = ~src->u[3];
2128 }
2129
2130 static void
2131 micro_shl(union tgsi_exec_channel *dst,
2132 const union tgsi_exec_channel *src)
2133 {
2134 dst->u[0] = src[0].u[0] << src[1].u[0];
2135 dst->u[1] = src[0].u[1] << src[1].u[1];
2136 dst->u[2] = src[0].u[2] << src[1].u[2];
2137 dst->u[3] = src[0].u[3] << src[1].u[3];
2138 }
2139
2140 static void
2141 micro_and(union tgsi_exec_channel *dst,
2142 const union tgsi_exec_channel *src)
2143 {
2144 dst->u[0] = src[0].u[0] & src[1].u[0];
2145 dst->u[1] = src[0].u[1] & src[1].u[1];
2146 dst->u[2] = src[0].u[2] & src[1].u[2];
2147 dst->u[3] = src[0].u[3] & src[1].u[3];
2148 }
2149
2150 static void
2151 micro_or(union tgsi_exec_channel *dst,
2152 const union tgsi_exec_channel *src)
2153 {
2154 dst->u[0] = src[0].u[0] | src[1].u[0];
2155 dst->u[1] = src[0].u[1] | src[1].u[1];
2156 dst->u[2] = src[0].u[2] | src[1].u[2];
2157 dst->u[3] = src[0].u[3] | src[1].u[3];
2158 }
2159
2160 static void
2161 micro_xor(union tgsi_exec_channel *dst,
2162 const union tgsi_exec_channel *src)
2163 {
2164 dst->u[0] = src[0].u[0] ^ src[1].u[0];
2165 dst->u[1] = src[0].u[1] ^ src[1].u[1];
2166 dst->u[2] = src[0].u[2] ^ src[1].u[2];
2167 dst->u[3] = src[0].u[3] ^ src[1].u[3];
2168 }
2169
2170 static void
2171 micro_f2i(union tgsi_exec_channel *dst,
2172 const union tgsi_exec_channel *src)
2173 {
2174 dst->i[0] = (int)src->f[0];
2175 dst->i[1] = (int)src->f[1];
2176 dst->i[2] = (int)src->f[2];
2177 dst->i[3] = (int)src->f[3];
2178 }
2179
2180 static void
2181 micro_idiv(union tgsi_exec_channel *dst,
2182 const union tgsi_exec_channel *src)
2183 {
2184 dst->i[0] = src[0].i[0] / src[1].i[0];
2185 dst->i[1] = src[0].i[1] / src[1].i[1];
2186 dst->i[2] = src[0].i[2] / src[1].i[2];
2187 dst->i[3] = src[0].i[3] / src[1].i[3];
2188 }
2189
2190 static void
2191 micro_imax(union tgsi_exec_channel *dst,
2192 const union tgsi_exec_channel *src)
2193 {
2194 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
2195 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
2196 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
2197 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
2198 }
2199
2200 static void
2201 micro_imin(union tgsi_exec_channel *dst,
2202 const union tgsi_exec_channel *src)
2203 {
2204 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2205 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2206 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2207 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2208 }
2209
2210 static void
2211 micro_isge(union tgsi_exec_channel *dst,
2212 const union tgsi_exec_channel *src)
2213 {
2214 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2215 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2216 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2217 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2218 }
2219
2220 static void
2221 micro_ishr(union tgsi_exec_channel *dst,
2222 const union tgsi_exec_channel *src)
2223 {
2224 dst->i[0] = src[0].i[0] >> src[1].i[0];
2225 dst->i[1] = src[0].i[1] >> src[1].i[1];
2226 dst->i[2] = src[0].i[2] >> src[1].i[2];
2227 dst->i[3] = src[0].i[3] >> src[1].i[3];
2228 }
2229
2230 static void
2231 micro_islt(union tgsi_exec_channel *dst,
2232 const union tgsi_exec_channel *src)
2233 {
2234 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2235 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2236 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2237 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2238 }
2239
2240 static void
2241 micro_f2u(union tgsi_exec_channel *dst,
2242 const union tgsi_exec_channel *src)
2243 {
2244 dst->u[0] = (uint)src->f[0];
2245 dst->u[1] = (uint)src->f[1];
2246 dst->u[2] = (uint)src->f[2];
2247 dst->u[3] = (uint)src->f[3];
2248 }
2249
2250 static void
2251 micro_u2f(union tgsi_exec_channel *dst,
2252 const union tgsi_exec_channel *src)
2253 {
2254 dst->f[0] = (float)src->u[0];
2255 dst->f[1] = (float)src->u[1];
2256 dst->f[2] = (float)src->u[2];
2257 dst->f[3] = (float)src->u[3];
2258 }
2259
2260 static void
2261 micro_uadd(union tgsi_exec_channel *dst,
2262 const union tgsi_exec_channel *src)
2263 {
2264 dst->u[0] = src[0].u[0] + src[1].u[0];
2265 dst->u[1] = src[0].u[1] + src[1].u[1];
2266 dst->u[2] = src[0].u[2] + src[1].u[2];
2267 dst->u[3] = src[0].u[3] + src[1].u[3];
2268 }
2269
2270 static void
2271 micro_udiv(union tgsi_exec_channel *dst,
2272 const union tgsi_exec_channel *src)
2273 {
2274 dst->u[0] = src[0].u[0] / src[1].u[0];
2275 dst->u[1] = src[0].u[1] / src[1].u[1];
2276 dst->u[2] = src[0].u[2] / src[1].u[2];
2277 dst->u[3] = src[0].u[3] / src[1].u[3];
2278 }
2279
2280 static void
2281 micro_umad(union tgsi_exec_channel *dst,
2282 const union tgsi_exec_channel *src)
2283 {
2284 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2285 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2286 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2287 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2288 }
2289
2290 static void
2291 micro_umax(union tgsi_exec_channel *dst,
2292 const union tgsi_exec_channel *src)
2293 {
2294 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2295 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2296 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2297 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2298 }
2299
2300 static void
2301 micro_umin(union tgsi_exec_channel *dst,
2302 const union tgsi_exec_channel *src)
2303 {
2304 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2305 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2306 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2307 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2308 }
2309
2310 static void
2311 micro_umod(union tgsi_exec_channel *dst,
2312 const union tgsi_exec_channel *src)
2313 {
2314 dst->u[0] = src[0].u[0] % src[1].u[0];
2315 dst->u[1] = src[0].u[1] % src[1].u[1];
2316 dst->u[2] = src[0].u[2] % src[1].u[2];
2317 dst->u[3] = src[0].u[3] % src[1].u[3];
2318 }
2319
2320 static void
2321 micro_umul(union tgsi_exec_channel *dst,
2322 const union tgsi_exec_channel *src)
2323 {
2324 dst->u[0] = src[0].u[0] * src[1].u[0];
2325 dst->u[1] = src[0].u[1] * src[1].u[1];
2326 dst->u[2] = src[0].u[2] * src[1].u[2];
2327 dst->u[3] = src[0].u[3] * src[1].u[3];
2328 }
2329
2330 static void
2331 micro_useq(union tgsi_exec_channel *dst,
2332 const union tgsi_exec_channel *src)
2333 {
2334 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2335 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2336 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2337 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2338 }
2339
2340 static void
2341 micro_usge(union tgsi_exec_channel *dst,
2342 const union tgsi_exec_channel *src)
2343 {
2344 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2345 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2346 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2347 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2348 }
2349
2350 static void
2351 micro_ushr(union tgsi_exec_channel *dst,
2352 const union tgsi_exec_channel *src)
2353 {
2354 dst->u[0] = src[0].u[0] >> src[1].u[0];
2355 dst->u[1] = src[0].u[1] >> src[1].u[1];
2356 dst->u[2] = src[0].u[2] >> src[1].u[2];
2357 dst->u[3] = src[0].u[3] >> src[1].u[3];
2358 }
2359
2360 static void
2361 micro_uslt(union tgsi_exec_channel *dst,
2362 const union tgsi_exec_channel *src)
2363 {
2364 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2365 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2366 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2367 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2368 }
2369
2370 static void
2371 micro_usne(union tgsi_exec_channel *dst,
2372 const union tgsi_exec_channel *src)
2373 {
2374 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2375 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2376 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2377 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2378 }
2379
2380 static void
2381 exec_instruction(
2382 struct tgsi_exec_machine *mach,
2383 const struct tgsi_full_instruction *inst,
2384 int *pc )
2385 {
2386 uint chan_index;
2387 union tgsi_exec_channel r[10];
2388 union tgsi_exec_channel d[8];
2389
2390 (*pc)++;
2391
2392 switch (inst->Instruction.Opcode) {
2393 case TGSI_OPCODE_ARL:
2394 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2395 break;
2396
2397 case TGSI_OPCODE_MOV:
2398 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2399 break;
2400
2401 case TGSI_OPCODE_LIT:
2402 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2403 FETCH( &r[0], 0, CHAN_X );
2404 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2405 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2406 }
2407
2408 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2409 FETCH( &r[1], 0, CHAN_Y );
2410 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2411
2412 FETCH( &r[2], 0, CHAN_W );
2413 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2414 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2415 micro_pow( &r[1], &r[1], &r[2] );
2416 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2417 }
2418
2419 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2420 STORE(&d[CHAN_Y], 0, CHAN_Y);
2421 }
2422 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2423 STORE(&d[CHAN_Z], 0, CHAN_Z);
2424 }
2425 }
2426 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2427 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2428 }
2429 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2430 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2431 }
2432 break;
2433
2434 case TGSI_OPCODE_RCP:
2435 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2436 break;
2437
2438 case TGSI_OPCODE_RSQ:
2439 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2440 break;
2441
2442 case TGSI_OPCODE_EXP:
2443 FETCH( &r[0], 0, CHAN_X );
2444 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2445 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2446 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2447 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2448 }
2449 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2450 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2451 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2452 }
2453 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2454 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2455 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2456 }
2457 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2458 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2459 }
2460 break;
2461
2462 case TGSI_OPCODE_LOG:
2463 FETCH( &r[0], 0, CHAN_X );
2464 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2465 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2466 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2467 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2468 STORE( &r[0], 0, CHAN_X );
2469 }
2470 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2471 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2472 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2473 STORE( &r[0], 0, CHAN_Y );
2474 }
2475 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2476 STORE( &r[1], 0, CHAN_Z );
2477 }
2478 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2479 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2480 }
2481 break;
2482
2483 case TGSI_OPCODE_MUL:
2484 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2485 FETCH(&r[0], 0, chan_index);
2486 FETCH(&r[1], 1, chan_index);
2487 micro_mul(&d[chan_index], &r[0], &r[1]);
2488 }
2489 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2490 STORE(&d[chan_index], 0, chan_index);
2491 }
2492 break;
2493
2494 case TGSI_OPCODE_ADD:
2495 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2496 FETCH( &r[0], 0, chan_index );
2497 FETCH( &r[1], 1, chan_index );
2498 micro_add(&d[chan_index], &r[0], &r[1]);
2499 }
2500 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2501 STORE(&d[chan_index], 0, chan_index);
2502 }
2503 break;
2504
2505 case TGSI_OPCODE_DP3:
2506 exec_dp3(mach, inst);
2507 break;
2508
2509 case TGSI_OPCODE_DP4:
2510 exec_dp4(mach, inst);
2511 break;
2512
2513 case TGSI_OPCODE_DST:
2514 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2515 FETCH( &r[0], 0, CHAN_Y );
2516 FETCH( &r[1], 1, CHAN_Y);
2517 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2518 }
2519 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2520 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2521 }
2522 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2523 FETCH(&d[CHAN_W], 1, CHAN_W);
2524 }
2525
2526 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2527 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2528 }
2529 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2530 STORE(&d[CHAN_Y], 0, CHAN_Y);
2531 }
2532 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2533 STORE(&d[CHAN_Z], 0, CHAN_Z);
2534 }
2535 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2536 STORE(&d[CHAN_W], 0, CHAN_W);
2537 }
2538 break;
2539
2540 case TGSI_OPCODE_MIN:
2541 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2542 FETCH(&r[0], 0, chan_index);
2543 FETCH(&r[1], 1, chan_index);
2544
2545 /* XXX use micro_min()?? */
2546 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2547 }
2548 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2549 STORE(&d[chan_index], 0, chan_index);
2550 }
2551 break;
2552
2553 case TGSI_OPCODE_MAX:
2554 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2555 FETCH(&r[0], 0, chan_index);
2556 FETCH(&r[1], 1, chan_index);
2557
2558 /* XXX use micro_max()?? */
2559 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2560 }
2561 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2562 STORE(&d[chan_index], 0, chan_index);
2563 }
2564 break;
2565
2566 case TGSI_OPCODE_SLT:
2567 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2568 break;
2569
2570 case TGSI_OPCODE_SGE:
2571 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2572 break;
2573
2574 case TGSI_OPCODE_MAD:
2575 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2576 break;
2577
2578 case TGSI_OPCODE_SUB:
2579 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2580 FETCH(&r[0], 0, chan_index);
2581 FETCH(&r[1], 1, chan_index);
2582 micro_sub(&d[chan_index], &r[0], &r[1]);
2583 }
2584 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2585 STORE(&d[chan_index], 0, chan_index);
2586 }
2587 break;
2588
2589 case TGSI_OPCODE_LRP:
2590 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2591 break;
2592
2593 case TGSI_OPCODE_CND:
2594 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2595 FETCH(&r[0], 0, chan_index);
2596 FETCH(&r[1], 1, chan_index);
2597 FETCH(&r[2], 2, chan_index);
2598 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2599 }
2600 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2601 STORE(&d[chan_index], 0, chan_index);
2602 }
2603 break;
2604
2605 case TGSI_OPCODE_DP2A:
2606 exec_dp2a(mach, inst);
2607 break;
2608
2609 case TGSI_OPCODE_FRC:
2610 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2611 break;
2612
2613 case TGSI_OPCODE_CLAMP:
2614 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2615 FETCH(&r[0], 0, chan_index);
2616 FETCH(&r[1], 1, chan_index);
2617 micro_max(&r[0], &r[0], &r[1]);
2618 FETCH(&r[1], 2, chan_index);
2619 micro_min(&d[chan_index], &r[0], &r[1]);
2620 }
2621 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2622 STORE(&d[chan_index], 0, chan_index);
2623 }
2624 break;
2625
2626 case TGSI_OPCODE_FLR:
2627 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2628 break;
2629
2630 case TGSI_OPCODE_ROUND:
2631 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2632 break;
2633
2634 case TGSI_OPCODE_EX2:
2635 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2636 break;
2637
2638 case TGSI_OPCODE_LG2:
2639 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2640 break;
2641
2642 case TGSI_OPCODE_POW:
2643 FETCH(&r[0], 0, CHAN_X);
2644 FETCH(&r[1], 1, CHAN_X);
2645
2646 micro_pow( &r[0], &r[0], &r[1] );
2647
2648 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2649 STORE( &r[0], 0, chan_index );
2650 }
2651 break;
2652
2653 case TGSI_OPCODE_XPD:
2654 FETCH(&r[0], 0, CHAN_Y);
2655 FETCH(&r[1], 1, CHAN_Z);
2656
2657 micro_mul( &r[2], &r[0], &r[1] );
2658
2659 FETCH(&r[3], 0, CHAN_Z);
2660 FETCH(&r[4], 1, CHAN_Y);
2661
2662 micro_mul( &r[5], &r[3], &r[4] );
2663 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2664
2665 FETCH(&r[2], 1, CHAN_X);
2666
2667 micro_mul( &r[3], &r[3], &r[2] );
2668
2669 FETCH(&r[5], 0, CHAN_X);
2670
2671 micro_mul( &r[1], &r[1], &r[5] );
2672 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2673
2674 micro_mul( &r[5], &r[5], &r[4] );
2675 micro_mul( &r[0], &r[0], &r[2] );
2676 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2677
2678 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2679 STORE(&d[CHAN_X], 0, CHAN_X);
2680 }
2681 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2682 STORE(&d[CHAN_Y], 0, CHAN_Y);
2683 }
2684 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2685 STORE(&d[CHAN_Z], 0, CHAN_Z);
2686 }
2687 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2688 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2689 }
2690 break;
2691
2692 case TGSI_OPCODE_ABS:
2693 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2694 break;
2695
2696 case TGSI_OPCODE_RCC:
2697 FETCH(&r[0], 0, CHAN_X);
2698 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2699 micro_float_clamp(&r[0], &r[0]);
2700 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2701 STORE(&r[0], 0, chan_index);
2702 }
2703 break;
2704
2705 case TGSI_OPCODE_DPH:
2706 exec_dph(mach, inst);
2707 break;
2708
2709 case TGSI_OPCODE_COS:
2710 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2711 break;
2712
2713 case TGSI_OPCODE_DDX:
2714 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2715 break;
2716
2717 case TGSI_OPCODE_DDY:
2718 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2719 break;
2720
2721 case TGSI_OPCODE_KILP:
2722 exec_kilp (mach, inst);
2723 break;
2724
2725 case TGSI_OPCODE_KIL:
2726 exec_kil (mach, inst);
2727 break;
2728
2729 case TGSI_OPCODE_PK2H:
2730 assert (0);
2731 break;
2732
2733 case TGSI_OPCODE_PK2US:
2734 assert (0);
2735 break;
2736
2737 case TGSI_OPCODE_PK4B:
2738 assert (0);
2739 break;
2740
2741 case TGSI_OPCODE_PK4UB:
2742 assert (0);
2743 break;
2744
2745 case TGSI_OPCODE_RFL:
2746 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2747 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2748 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2749 /* r0 = dp3(src0, src0) */
2750 FETCH(&r[2], 0, CHAN_X);
2751 micro_mul(&r[0], &r[2], &r[2]);
2752 FETCH(&r[4], 0, CHAN_Y);
2753 micro_mul(&r[8], &r[4], &r[4]);
2754 micro_add(&r[0], &r[0], &r[8]);
2755 FETCH(&r[6], 0, CHAN_Z);
2756 micro_mul(&r[8], &r[6], &r[6]);
2757 micro_add(&r[0], &r[0], &r[8]);
2758
2759 /* r1 = dp3(src0, src1) */
2760 FETCH(&r[3], 1, CHAN_X);
2761 micro_mul(&r[1], &r[2], &r[3]);
2762 FETCH(&r[5], 1, CHAN_Y);
2763 micro_mul(&r[8], &r[4], &r[5]);
2764 micro_add(&r[1], &r[1], &r[8]);
2765 FETCH(&r[7], 1, CHAN_Z);
2766 micro_mul(&r[8], &r[6], &r[7]);
2767 micro_add(&r[1], &r[1], &r[8]);
2768
2769 /* r1 = 2 * r1 / r0 */
2770 micro_add(&r[1], &r[1], &r[1]);
2771 micro_div(&r[1], &r[1], &r[0]);
2772
2773 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2774 micro_mul(&r[2], &r[2], &r[1]);
2775 micro_sub(&r[2], &r[2], &r[3]);
2776 STORE(&r[2], 0, CHAN_X);
2777 }
2778 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2779 micro_mul(&r[4], &r[4], &r[1]);
2780 micro_sub(&r[4], &r[4], &r[5]);
2781 STORE(&r[4], 0, CHAN_Y);
2782 }
2783 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2784 micro_mul(&r[6], &r[6], &r[1]);
2785 micro_sub(&r[6], &r[6], &r[7]);
2786 STORE(&r[6], 0, CHAN_Z);
2787 }
2788 }
2789 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2790 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2791 }
2792 break;
2793
2794 case TGSI_OPCODE_SEQ:
2795 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2796 break;
2797
2798 case TGSI_OPCODE_SFL:
2799 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2800 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2801 }
2802 break;
2803
2804 case TGSI_OPCODE_SGT:
2805 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2806 break;
2807
2808 case TGSI_OPCODE_SIN:
2809 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2810 break;
2811
2812 case TGSI_OPCODE_SLE:
2813 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2814 break;
2815
2816 case TGSI_OPCODE_SNE:
2817 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2818 break;
2819
2820 case TGSI_OPCODE_STR:
2821 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2822 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2823 }
2824 break;
2825
2826 case TGSI_OPCODE_TEX:
2827 /* simple texture lookup */
2828 /* src[0] = texcoord */
2829 /* src[1] = sampler unit */
2830 exec_tex(mach, inst, TEX_MODIFIER_NONE);
2831 break;
2832
2833 case TGSI_OPCODE_TXB:
2834 /* Texture lookup with lod bias */
2835 /* src[0] = texcoord (src[0].w = LOD bias) */
2836 /* src[1] = sampler unit */
2837 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
2838 break;
2839
2840 case TGSI_OPCODE_TXD:
2841 /* Texture lookup with explict partial derivatives */
2842 /* src[0] = texcoord */
2843 /* src[1] = d[strq]/dx */
2844 /* src[2] = d[strq]/dy */
2845 /* src[3] = sampler unit */
2846 exec_txd(mach, inst);
2847 break;
2848
2849 case TGSI_OPCODE_TXL:
2850 /* Texture lookup with explit LOD */
2851 /* src[0] = texcoord (src[0].w = LOD) */
2852 /* src[1] = sampler unit */
2853 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
2854 break;
2855
2856 case TGSI_OPCODE_TXP:
2857 /* Texture lookup with projection */
2858 /* src[0] = texcoord (src[0].w = projection) */
2859 /* src[1] = sampler unit */
2860 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
2861 break;
2862
2863 case TGSI_OPCODE_UP2H:
2864 assert (0);
2865 break;
2866
2867 case TGSI_OPCODE_UP2US:
2868 assert (0);
2869 break;
2870
2871 case TGSI_OPCODE_UP4B:
2872 assert (0);
2873 break;
2874
2875 case TGSI_OPCODE_UP4UB:
2876 assert (0);
2877 break;
2878
2879 case TGSI_OPCODE_X2D:
2880 FETCH(&r[0], 1, CHAN_X);
2881 FETCH(&r[1], 1, CHAN_Y);
2882 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2883 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2884 FETCH(&r[2], 2, CHAN_X);
2885 micro_mul(&r[2], &r[2], &r[0]);
2886 FETCH(&r[3], 2, CHAN_Y);
2887 micro_mul(&r[3], &r[3], &r[1]);
2888 micro_add(&r[2], &r[2], &r[3]);
2889 FETCH(&r[3], 0, CHAN_X);
2890 micro_add(&d[CHAN_X], &r[2], &r[3]);
2891
2892 }
2893 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2894 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2895 FETCH(&r[2], 2, CHAN_Z);
2896 micro_mul(&r[2], &r[2], &r[0]);
2897 FETCH(&r[3], 2, CHAN_W);
2898 micro_mul(&r[3], &r[3], &r[1]);
2899 micro_add(&r[2], &r[2], &r[3]);
2900 FETCH(&r[3], 0, CHAN_Y);
2901 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2902
2903 }
2904 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2905 STORE(&d[CHAN_X], 0, CHAN_X);
2906 }
2907 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2908 STORE(&d[CHAN_Y], 0, CHAN_Y);
2909 }
2910 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2911 STORE(&d[CHAN_X], 0, CHAN_Z);
2912 }
2913 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2914 STORE(&d[CHAN_Y], 0, CHAN_W);
2915 }
2916 break;
2917
2918 case TGSI_OPCODE_ARA:
2919 assert (0);
2920 break;
2921
2922 case TGSI_OPCODE_ARR:
2923 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2924 break;
2925
2926 case TGSI_OPCODE_BRA:
2927 assert (0);
2928 break;
2929
2930 case TGSI_OPCODE_CAL:
2931 /* skip the call if no execution channels are enabled */
2932 if (mach->ExecMask) {
2933 /* do the call */
2934
2935 /* First, record the depths of the execution stacks.
2936 * This is important for deeply nested/looped return statements.
2937 * We have to unwind the stacks by the correct amount. For a
2938 * real code generator, we could determine the number of entries
2939 * to pop off each stack with simple static analysis and avoid
2940 * implementing this data structure at run time.
2941 */
2942 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2943 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2944 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2945 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
2946 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
2947 /* note that PC was already incremented above */
2948 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2949
2950 mach->CallStackTop++;
2951
2952 /* Second, push the Cond, Loop, Cont, Func stacks */
2953 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2954 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2955 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2956 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2957 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2958 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2959
2960 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2961 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2962 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2963 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2964 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2965 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2966
2967 /* Finally, jump to the subroutine */
2968 *pc = inst->Label.Label;
2969 }
2970 break;
2971
2972 case TGSI_OPCODE_RET:
2973 mach->FuncMask &= ~mach->ExecMask;
2974 UPDATE_EXEC_MASK(mach);
2975
2976 if (mach->FuncMask == 0x0) {
2977 /* really return now (otherwise, keep executing */
2978
2979 if (mach->CallStackTop == 0) {
2980 /* returning from main() */
2981 *pc = -1;
2982 return;
2983 }
2984
2985 assert(mach->CallStackTop > 0);
2986 mach->CallStackTop--;
2987
2988 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2989 mach->CondMask = mach->CondStack[mach->CondStackTop];
2990
2991 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2992 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2993
2994 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2995 mach->ContMask = mach->ContStack[mach->ContStackTop];
2996
2997 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
2998 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
2999
3000 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3001 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3002
3003 assert(mach->FuncStackTop > 0);
3004 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3005
3006 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3007
3008 UPDATE_EXEC_MASK(mach);
3009 }
3010 break;
3011
3012 case TGSI_OPCODE_SSG:
3013 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3014 break;
3015
3016 case TGSI_OPCODE_CMP:
3017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3018 FETCH(&r[0], 0, chan_index);
3019 FETCH(&r[1], 1, chan_index);
3020 FETCH(&r[2], 2, chan_index);
3021 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
3022 }
3023 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3024 STORE(&d[chan_index], 0, chan_index);
3025 }
3026 break;
3027
3028 case TGSI_OPCODE_SCS:
3029 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3030 FETCH( &r[0], 0, CHAN_X );
3031 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3032 micro_cos(&r[1], &r[0]);
3033 STORE(&r[1], 0, CHAN_X);
3034 }
3035 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3036 micro_sin(&r[1], &r[0]);
3037 STORE(&r[1], 0, CHAN_Y);
3038 }
3039 }
3040 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3041 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3042 }
3043 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3044 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3045 }
3046 break;
3047
3048 case TGSI_OPCODE_NRM:
3049 /* 3-component vector normalize */
3050 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
3051 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
3052 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3053 /* r3 = sqrt(dp3(src0, src0)) */
3054 FETCH(&r[0], 0, CHAN_X);
3055 micro_mul(&r[3], &r[0], &r[0]);
3056 FETCH(&r[1], 0, CHAN_Y);
3057 micro_mul(&r[4], &r[1], &r[1]);
3058 micro_add(&r[3], &r[3], &r[4]);
3059 FETCH(&r[2], 0, CHAN_Z);
3060 micro_mul(&r[4], &r[2], &r[2]);
3061 micro_add(&r[3], &r[3], &r[4]);
3062 micro_sqrt(&r[3], &r[3]);
3063
3064 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3065 micro_div(&r[0], &r[0], &r[3]);
3066 STORE(&r[0], 0, CHAN_X);
3067 }
3068 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3069 micro_div(&r[1], &r[1], &r[3]);
3070 STORE(&r[1], 0, CHAN_Y);
3071 }
3072 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3073 micro_div(&r[2], &r[2], &r[3]);
3074 STORE(&r[2], 0, CHAN_Z);
3075 }
3076 }
3077 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
3078 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
3079 }
3080 break;
3081
3082 case TGSI_OPCODE_NRM4:
3083 /* 4-component vector normalize */
3084 {
3085 union tgsi_exec_channel tmp, dot;
3086
3087 /* tmp = dp4(src0, src0): */
3088 FETCH( &r[0], 0, CHAN_X );
3089 micro_mul( &tmp, &r[0], &r[0] );
3090
3091 FETCH( &r[1], 0, CHAN_Y );
3092 micro_mul( &dot, &r[1], &r[1] );
3093 micro_add( &tmp, &tmp, &dot );
3094
3095 FETCH( &r[2], 0, CHAN_Z );
3096 micro_mul( &dot, &r[2], &r[2] );
3097 micro_add( &tmp, &tmp, &dot );
3098
3099 FETCH( &r[3], 0, CHAN_W );
3100 micro_mul( &dot, &r[3], &r[3] );
3101 micro_add( &tmp, &tmp, &dot );
3102
3103 /* tmp = 1 / sqrt(tmp) */
3104 micro_sqrt( &tmp, &tmp );
3105 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
3106
3107 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3108 /* chan = chan * tmp */
3109 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
3110 STORE( &r[chan_index], 0, chan_index );
3111 }
3112 }
3113 break;
3114
3115 case TGSI_OPCODE_DIV:
3116 assert( 0 );
3117 break;
3118
3119 case TGSI_OPCODE_DP2:
3120 exec_dp2(mach, inst);
3121 break;
3122
3123 case TGSI_OPCODE_IF:
3124 /* push CondMask */
3125 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3126 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3127 FETCH( &r[0], 0, CHAN_X );
3128 /* update CondMask */
3129 if( ! r[0].u[0] ) {
3130 mach->CondMask &= ~0x1;
3131 }
3132 if( ! r[0].u[1] ) {
3133 mach->CondMask &= ~0x2;
3134 }
3135 if( ! r[0].u[2] ) {
3136 mach->CondMask &= ~0x4;
3137 }
3138 if( ! r[0].u[3] ) {
3139 mach->CondMask &= ~0x8;
3140 }
3141 UPDATE_EXEC_MASK(mach);
3142 /* Todo: If CondMask==0, jump to ELSE */
3143 break;
3144
3145 case TGSI_OPCODE_ELSE:
3146 /* invert CondMask wrt previous mask */
3147 {
3148 uint prevMask;
3149 assert(mach->CondStackTop > 0);
3150 prevMask = mach->CondStack[mach->CondStackTop - 1];
3151 mach->CondMask = ~mach->CondMask & prevMask;
3152 UPDATE_EXEC_MASK(mach);
3153 /* Todo: If CondMask==0, jump to ENDIF */
3154 }
3155 break;
3156
3157 case TGSI_OPCODE_ENDIF:
3158 /* pop CondMask */
3159 assert(mach->CondStackTop > 0);
3160 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3161 UPDATE_EXEC_MASK(mach);
3162 break;
3163
3164 case TGSI_OPCODE_END:
3165 /* halt execution */
3166 *pc = -1;
3167 break;
3168
3169 case TGSI_OPCODE_REP:
3170 assert (0);
3171 break;
3172
3173 case TGSI_OPCODE_ENDREP:
3174 assert (0);
3175 break;
3176
3177 case TGSI_OPCODE_PUSHA:
3178 assert (0);
3179 break;
3180
3181 case TGSI_OPCODE_POPA:
3182 assert (0);
3183 break;
3184
3185 case TGSI_OPCODE_CEIL:
3186 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3187 break;
3188
3189 case TGSI_OPCODE_I2F:
3190 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3191 break;
3192
3193 case TGSI_OPCODE_NOT:
3194 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3195 break;
3196
3197 case TGSI_OPCODE_TRUNC:
3198 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3199 break;
3200
3201 case TGSI_OPCODE_SHL:
3202 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3203 break;
3204
3205 case TGSI_OPCODE_AND:
3206 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3207 break;
3208
3209 case TGSI_OPCODE_OR:
3210 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3211 break;
3212
3213 case TGSI_OPCODE_MOD:
3214 assert (0);
3215 break;
3216
3217 case TGSI_OPCODE_XOR:
3218 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3219 break;
3220
3221 case TGSI_OPCODE_SAD:
3222 assert (0);
3223 break;
3224
3225 case TGSI_OPCODE_TXF:
3226 assert (0);
3227 break;
3228
3229 case TGSI_OPCODE_TXQ:
3230 assert (0);
3231 break;
3232
3233 case TGSI_OPCODE_EMIT:
3234 emit_vertex(mach);
3235 break;
3236
3237 case TGSI_OPCODE_ENDPRIM:
3238 emit_primitive(mach);
3239 break;
3240
3241 case TGSI_OPCODE_BGNFOR:
3242 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3243 for (chan_index = 0; chan_index < 3; chan_index++) {
3244 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3245 }
3246 ++mach->LoopCounterStackTop;
3247 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3248 /* update LoopMask */
3249 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3250 mach->LoopMask &= ~0x1;
3251 }
3252 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3253 mach->LoopMask &= ~0x2;
3254 }
3255 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3256 mach->LoopMask &= ~0x4;
3257 }
3258 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3259 mach->LoopMask &= ~0x8;
3260 }
3261 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3262 UPDATE_EXEC_MASK(mach);
3263 /* fall-through (for now) */
3264 case TGSI_OPCODE_BGNLOOP:
3265 /* push LoopMask and ContMasks */
3266 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3267 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3268 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3269 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3270
3271 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3272 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3273 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3274 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3275 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3276 break;
3277
3278 case TGSI_OPCODE_ENDFOR:
3279 assert(mach->LoopCounterStackTop > 0);
3280 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3281 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3282 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3283 /* update LoopMask */
3284 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3285 mach->LoopMask &= ~0x1;
3286 }
3287 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3288 mach->LoopMask &= ~0x2;
3289 }
3290 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3291 mach->LoopMask &= ~0x4;
3292 }
3293 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3294 mach->LoopMask &= ~0x8;
3295 }
3296 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3297 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3298 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3299 assert(mach->LoopLabelStackTop > 0);
3300 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3301 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3302 /* Restore ContMask, but don't pop */
3303 assert(mach->ContStackTop > 0);
3304 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3305 UPDATE_EXEC_MASK(mach);
3306 if (mach->ExecMask) {
3307 /* repeat loop: jump to instruction just past BGNLOOP */
3308 assert(mach->LoopLabelStackTop > 0);
3309 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3310 }
3311 else {
3312 /* exit loop: pop LoopMask */
3313 assert(mach->LoopStackTop > 0);
3314 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3315 /* pop ContMask */
3316 assert(mach->ContStackTop > 0);
3317 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3318 assert(mach->LoopLabelStackTop > 0);
3319 --mach->LoopLabelStackTop;
3320 assert(mach->LoopCounterStackTop > 0);
3321 --mach->LoopCounterStackTop;
3322
3323 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3324 }
3325 UPDATE_EXEC_MASK(mach);
3326 break;
3327
3328 case TGSI_OPCODE_ENDLOOP:
3329 /* Restore ContMask, but don't pop */
3330 assert(mach->ContStackTop > 0);
3331 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3332 UPDATE_EXEC_MASK(mach);
3333 if (mach->ExecMask) {
3334 /* repeat loop: jump to instruction just past BGNLOOP */
3335 assert(mach->LoopLabelStackTop > 0);
3336 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3337 }
3338 else {
3339 /* exit loop: pop LoopMask */
3340 assert(mach->LoopStackTop > 0);
3341 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3342 /* pop ContMask */
3343 assert(mach->ContStackTop > 0);
3344 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3345 assert(mach->LoopLabelStackTop > 0);
3346 --mach->LoopLabelStackTop;
3347
3348 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3349 }
3350 UPDATE_EXEC_MASK(mach);
3351 break;
3352
3353 case TGSI_OPCODE_BRK:
3354 exec_break(mach);
3355 break;
3356
3357 case TGSI_OPCODE_CONT:
3358 /* turn off cont channels for each enabled exec channel */
3359 mach->ContMask &= ~mach->ExecMask;
3360 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3361 UPDATE_EXEC_MASK(mach);
3362 break;
3363
3364 case TGSI_OPCODE_BGNSUB:
3365 /* no-op */
3366 break;
3367
3368 case TGSI_OPCODE_ENDSUB:
3369 /*
3370 * XXX: This really should be a no-op. We should never reach this opcode.
3371 */
3372
3373 assert(mach->CallStackTop > 0);
3374 mach->CallStackTop--;
3375
3376 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3377 mach->CondMask = mach->CondStack[mach->CondStackTop];
3378
3379 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3380 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3381
3382 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3383 mach->ContMask = mach->ContStack[mach->ContStackTop];
3384
3385 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3386 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3387
3388 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3389 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3390
3391 assert(mach->FuncStackTop > 0);
3392 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3393
3394 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3395
3396 UPDATE_EXEC_MASK(mach);
3397 break;
3398
3399 case TGSI_OPCODE_NOP:
3400 break;
3401
3402 case TGSI_OPCODE_BREAKC:
3403 FETCH(&r[0], 0, CHAN_X);
3404 /* update CondMask */
3405 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3406 mach->LoopMask &= ~0x1;
3407 }
3408 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3409 mach->LoopMask &= ~0x2;
3410 }
3411 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3412 mach->LoopMask &= ~0x4;
3413 }
3414 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3415 mach->LoopMask &= ~0x8;
3416 }
3417 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3418 UPDATE_EXEC_MASK(mach);
3419 break;
3420
3421 case TGSI_OPCODE_F2I:
3422 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3423 break;
3424
3425 case TGSI_OPCODE_IDIV:
3426 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3427 break;
3428
3429 case TGSI_OPCODE_IMAX:
3430 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3431 break;
3432
3433 case TGSI_OPCODE_IMIN:
3434 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3435 break;
3436
3437 case TGSI_OPCODE_INEG:
3438 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3439 break;
3440
3441 case TGSI_OPCODE_ISGE:
3442 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3443 break;
3444
3445 case TGSI_OPCODE_ISHR:
3446 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3447 break;
3448
3449 case TGSI_OPCODE_ISLT:
3450 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3451 break;
3452
3453 case TGSI_OPCODE_F2U:
3454 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3455 break;
3456
3457 case TGSI_OPCODE_U2F:
3458 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3459 break;
3460
3461 case TGSI_OPCODE_UADD:
3462 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3463 break;
3464
3465 case TGSI_OPCODE_UDIV:
3466 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3467 break;
3468
3469 case TGSI_OPCODE_UMAD:
3470 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3471 break;
3472
3473 case TGSI_OPCODE_UMAX:
3474 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3475 break;
3476
3477 case TGSI_OPCODE_UMIN:
3478 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3479 break;
3480
3481 case TGSI_OPCODE_UMOD:
3482 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3483 break;
3484
3485 case TGSI_OPCODE_UMUL:
3486 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3487 break;
3488
3489 case TGSI_OPCODE_USEQ:
3490 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3491 break;
3492
3493 case TGSI_OPCODE_USGE:
3494 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3495 break;
3496
3497 case TGSI_OPCODE_USHR:
3498 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3499 break;
3500
3501 case TGSI_OPCODE_USLT:
3502 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3503 break;
3504
3505 case TGSI_OPCODE_USNE:
3506 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3507 break;
3508
3509 case TGSI_OPCODE_SWITCH:
3510 exec_switch(mach, inst);
3511 break;
3512
3513 case TGSI_OPCODE_CASE:
3514 exec_case(mach, inst);
3515 break;
3516
3517 case TGSI_OPCODE_DEFAULT:
3518 exec_default(mach);
3519 break;
3520
3521 case TGSI_OPCODE_ENDSWITCH:
3522 exec_endswitch(mach);
3523 break;
3524
3525 default:
3526 assert( 0 );
3527 }
3528 }
3529
3530
3531 #define DEBUG_EXECUTION 0
3532
3533
3534 /**
3535 * Run TGSI interpreter.
3536 * \return bitmask of "alive" quad components
3537 */
3538 uint
3539 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3540 {
3541 uint i;
3542 int pc = 0;
3543
3544 mach->CondMask = 0xf;
3545 mach->LoopMask = 0xf;
3546 mach->ContMask = 0xf;
3547 mach->FuncMask = 0xf;
3548 mach->ExecMask = 0xf;
3549
3550 mach->Switch.mask = 0xf;
3551
3552 assert(mach->CondStackTop == 0);
3553 assert(mach->LoopStackTop == 0);
3554 assert(mach->ContStackTop == 0);
3555 assert(mach->SwitchStackTop == 0);
3556 assert(mach->BreakStackTop == 0);
3557 assert(mach->CallStackTop == 0);
3558
3559 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3560 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3561
3562 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3563 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3564 mach->Primitives[0] = 0;
3565 }
3566
3567 for (i = 0; i < QUAD_SIZE; i++) {
3568 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3569 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3570 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3571 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3572 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3573 }
3574
3575 /* execute declarations (interpolants) */
3576 for (i = 0; i < mach->NumDeclarations; i++) {
3577 exec_declaration( mach, mach->Declarations+i );
3578 }
3579
3580 {
3581 #if DEBUG_EXECUTION
3582 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3583 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3584 uint inst = 1;
3585
3586 memcpy(temps, mach->Temps, sizeof(temps));
3587 memcpy(outputs, mach->Outputs, sizeof(outputs));
3588 #endif
3589
3590 /* execute instructions, until pc is set to -1 */
3591 while (pc != -1) {
3592
3593 #if DEBUG_EXECUTION
3594 uint i;
3595
3596 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3597 #endif
3598
3599 assert(pc < (int) mach->NumInstructions);
3600 exec_instruction(mach, mach->Instructions + pc, &pc);
3601
3602 #if DEBUG_EXECUTION
3603 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3604 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3605 uint j;
3606
3607 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3608 debug_printf("TEMP[%2u] = ", i);
3609 for (j = 0; j < 4; j++) {
3610 if (j > 0) {
3611 debug_printf(" ");
3612 }
3613 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3614 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3615 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3616 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3617 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3618 }
3619 }
3620 }
3621 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3622 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3623 uint j;
3624
3625 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3626 debug_printf("OUT[%2u] = ", i);
3627 for (j = 0; j < 4; j++) {
3628 if (j > 0) {
3629 debug_printf(" ");
3630 }
3631 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3632 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3633 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3634 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3635 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3636 }
3637 }
3638 }
3639 #endif
3640 }
3641 }
3642
3643 #if 0
3644 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3645 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3646 /*
3647 * Scale back depth component.
3648 */
3649 for (i = 0; i < 4; i++)
3650 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3651 }
3652 #endif
3653
3654 assert(mach->CondStackTop == 0);
3655 assert(mach->LoopStackTop == 0);
3656 assert(mach->ContStackTop == 0);
3657 assert(mach->SwitchStackTop == 0);
3658 assert(mach->BreakStackTop == 0);
3659 assert(mach->CallStackTop == 0);
3660
3661 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3662 }