tgsi: Simplify implementation of few interpreter's instructions.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_cos(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src)
115 {
116 dst->f[0] = cosf(src->f[0]);
117 dst->f[1] = cosf(src->f[1]);
118 dst->f[2] = cosf(src->f[2]);
119 dst->f[3] = cosf(src->f[3]);
120 }
121
122 static void
123 micro_ddx(union tgsi_exec_channel *dst,
124 const union tgsi_exec_channel *src)
125 {
126 dst->f[0] =
127 dst->f[1] =
128 dst->f[2] =
129 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
130 }
131
132 static void
133 micro_ddy(union tgsi_exec_channel *dst,
134 const union tgsi_exec_channel *src)
135 {
136 dst->f[0] =
137 dst->f[1] =
138 dst->f[2] =
139 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
140 }
141
142 static void
143 micro_exp2(union tgsi_exec_channel *dst,
144 const union tgsi_exec_channel *src)
145 {
146 #if FAST_MATH
147 dst->f[0] = util_fast_exp2(src->f[0]);
148 dst->f[1] = util_fast_exp2(src->f[1]);
149 dst->f[2] = util_fast_exp2(src->f[2]);
150 dst->f[3] = util_fast_exp2(src->f[3]);
151 #else
152 #if DEBUG
153 /* Inf is okay for this instruction, so clamp it to silence assertions. */
154 uint i;
155 union tgsi_exec_channel clamped;
156
157 for (i = 0; i < 4; i++) {
158 if (src->f[i] > 127.99999f) {
159 clamped.f[i] = 127.99999f;
160 } else if (src->f[i] < -126.99999f) {
161 clamped.f[i] = -126.99999f;
162 } else {
163 clamped.f[i] = src->f[i];
164 }
165 }
166 src = &clamped;
167 #endif /* DEBUG */
168
169 dst->f[0] = powf(2.0f, src->f[0]);
170 dst->f[1] = powf(2.0f, src->f[1]);
171 dst->f[2] = powf(2.0f, src->f[2]);
172 dst->f[3] = powf(2.0f, src->f[3]);
173 #endif /* FAST_MATH */
174 }
175
176 static void
177 micro_flr(union tgsi_exec_channel *dst,
178 const union tgsi_exec_channel *src)
179 {
180 dst->f[0] = floorf(src->f[0]);
181 dst->f[1] = floorf(src->f[1]);
182 dst->f[2] = floorf(src->f[2]);
183 dst->f[3] = floorf(src->f[3]);
184 }
185
186 static void
187 micro_frc(union tgsi_exec_channel *dst,
188 const union tgsi_exec_channel *src)
189 {
190 dst->f[0] = src->f[0] - floorf(src->f[0]);
191 dst->f[1] = src->f[1] - floorf(src->f[1]);
192 dst->f[2] = src->f[2] - floorf(src->f[2]);
193 dst->f[3] = src->f[3] - floorf(src->f[3]);
194 }
195
196 static void
197 micro_iabs(union tgsi_exec_channel *dst,
198 const union tgsi_exec_channel *src)
199 {
200 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
201 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
202 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
203 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
204 }
205
206 static void
207 micro_ineg(union tgsi_exec_channel *dst,
208 const union tgsi_exec_channel *src)
209 {
210 dst->i[0] = -src->i[0];
211 dst->i[1] = -src->i[1];
212 dst->i[2] = -src->i[2];
213 dst->i[3] = -src->i[3];
214 }
215
216 static void
217 micro_lg2(union tgsi_exec_channel *dst,
218 const union tgsi_exec_channel *src)
219 {
220 #if FAST_MATH
221 dst->f[0] = util_fast_log2(src->f[0]);
222 dst->f[1] = util_fast_log2(src->f[1]);
223 dst->f[2] = util_fast_log2(src->f[2]);
224 dst->f[3] = util_fast_log2(src->f[3]);
225 #else
226 dst->f[0] = logf(src->f[0]) * 1.442695f;
227 dst->f[1] = logf(src->f[1]) * 1.442695f;
228 dst->f[2] = logf(src->f[2]) * 1.442695f;
229 dst->f[3] = logf(src->f[3]) * 1.442695f;
230 #endif
231 }
232
233 static void
234 micro_lrp(union tgsi_exec_channel *dst,
235 const union tgsi_exec_channel *src)
236 {
237 dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
238 dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
239 dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
240 dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
241 }
242
243 static void
244 micro_mad(union tgsi_exec_channel *dst,
245 const union tgsi_exec_channel *src)
246 {
247 dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
248 dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
249 dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
250 dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
251 }
252
253 static void
254 micro_mov(union tgsi_exec_channel *dst,
255 const union tgsi_exec_channel *src)
256 {
257 dst->u[0] = src->u[0];
258 dst->u[1] = src->u[1];
259 dst->u[2] = src->u[2];
260 dst->u[3] = src->u[3];
261 }
262
263 static void
264 micro_rcp(union tgsi_exec_channel *dst,
265 const union tgsi_exec_channel *src)
266 {
267 dst->f[0] = 1.0f / src->f[0];
268 dst->f[1] = 1.0f / src->f[1];
269 dst->f[2] = 1.0f / src->f[2];
270 dst->f[3] = 1.0f / src->f[3];
271 }
272
273 static void
274 micro_rnd(union tgsi_exec_channel *dst,
275 const union tgsi_exec_channel *src)
276 {
277 dst->f[0] = floorf(src->f[0] + 0.5f);
278 dst->f[1] = floorf(src->f[1] + 0.5f);
279 dst->f[2] = floorf(src->f[2] + 0.5f);
280 dst->f[3] = floorf(src->f[3] + 0.5f);
281 }
282
283 static void
284 micro_rsq(union tgsi_exec_channel *dst,
285 const union tgsi_exec_channel *src)
286 {
287 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
288 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
289 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
290 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
291 }
292
293 static void
294 micro_seq(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
298 dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
299 dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
300 dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
301 }
302
303 static void
304 micro_sge(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
308 dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
309 dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
310 dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
311 }
312
313 static void
314 micro_sgn(union tgsi_exec_channel *dst,
315 const union tgsi_exec_channel *src)
316 {
317 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
318 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
319 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
320 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
321 }
322
323 static void
324 micro_sgt(union tgsi_exec_channel *dst,
325 const union tgsi_exec_channel *src)
326 {
327 dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
328 dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
329 dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
330 dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
331 }
332
333 static void
334 micro_sin(union tgsi_exec_channel *dst,
335 const union tgsi_exec_channel *src)
336 {
337 dst->f[0] = sinf(src->f[0]);
338 dst->f[1] = sinf(src->f[1]);
339 dst->f[2] = sinf(src->f[2]);
340 dst->f[3] = sinf(src->f[3]);
341 }
342
343 static void
344 micro_sle(union tgsi_exec_channel *dst,
345 const union tgsi_exec_channel *src)
346 {
347 dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
348 dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
349 dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
350 dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
351 }
352
353 static void
354 micro_slt(union tgsi_exec_channel *dst,
355 const union tgsi_exec_channel *src)
356 {
357 dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
358 dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
359 dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
360 dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
361 }
362
363 static void
364 micro_sne(union tgsi_exec_channel *dst,
365 const union tgsi_exec_channel *src)
366 {
367 dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
368 dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
369 dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
370 dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
371 }
372
373 static void
374 micro_trunc(union tgsi_exec_channel *dst,
375 const union tgsi_exec_channel *src)
376 {
377 dst->f[0] = (float)(int)src->f[0];
378 dst->f[1] = (float)(int)src->f[1];
379 dst->f[2] = (float)(int)src->f[2];
380 dst->f[3] = (float)(int)src->f[3];
381 }
382
383
384 #define CHAN_X 0
385 #define CHAN_Y 1
386 #define CHAN_Z 2
387 #define CHAN_W 3
388
389 enum tgsi_exec_datatype {
390 TGSI_EXEC_DATA_FLOAT,
391 TGSI_EXEC_DATA_INT,
392 TGSI_EXEC_DATA_UINT
393 };
394
395 /*
396 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
397 */
398 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
399 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
400 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
401 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
402 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
403 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
404 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
405 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
406 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
407 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
408 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
409 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
410 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
411 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
412 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
413 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
414 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
415 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
416 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
417 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
418 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
419 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
420 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
421 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
422 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
423 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
424 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
425 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
426 #define TEMP_R0 TGSI_EXEC_TEMP_R0
427 #define TEMP_P0 TGSI_EXEC_TEMP_P0
428
429 #define IS_CHANNEL_ENABLED(INST, CHAN)\
430 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
431
432 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
433 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
434
435 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
436 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
437 if (IS_CHANNEL_ENABLED( INST, CHAN ))
438
439 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
440 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
441 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
442
443
444 /** The execution mask depends on the conditional mask and the loop mask */
445 #define UPDATE_EXEC_MASK(MACH) \
446 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
447
448
449 static const union tgsi_exec_channel ZeroVec =
450 { { 0.0, 0.0, 0.0, 0.0 } };
451
452
453 #define CHECK_INF_OR_NAN(chan) do {\
454 assert(!util_is_inf_or_nan((chan)->f[0]));\
455 assert(!util_is_inf_or_nan((chan)->f[1]));\
456 assert(!util_is_inf_or_nan((chan)->f[2]));\
457 assert(!util_is_inf_or_nan((chan)->f[3]));\
458 } while (0)
459
460
461 #ifdef DEBUG
462 static void
463 print_chan(const char *msg, const union tgsi_exec_channel *chan)
464 {
465 debug_printf("%s = {%f, %f, %f, %f}\n",
466 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
467 }
468 #endif
469
470
471 #ifdef DEBUG
472 static void
473 print_temp(const struct tgsi_exec_machine *mach, uint index)
474 {
475 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
476 int i;
477 debug_printf("Temp[%u] =\n", index);
478 for (i = 0; i < 4; i++) {
479 debug_printf(" %c: { %f, %f, %f, %f }\n",
480 "XYZW"[i],
481 tmp->xyzw[i].f[0],
482 tmp->xyzw[i].f[1],
483 tmp->xyzw[i].f[2],
484 tmp->xyzw[i].f[3]);
485 }
486 }
487 #endif
488
489
490 /**
491 * Check if there's a potential src/dst register data dependency when
492 * using SOA execution.
493 * Example:
494 * MOV T, T.yxwz;
495 * This would expand into:
496 * MOV t0, t1;
497 * MOV t1, t0;
498 * MOV t2, t3;
499 * MOV t3, t2;
500 * The second instruction will have the wrong value for t0 if executed as-is.
501 */
502 boolean
503 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
504 {
505 uint i, chan;
506
507 uint writemask = inst->Dst[0].Register.WriteMask;
508 if (writemask == TGSI_WRITEMASK_X ||
509 writemask == TGSI_WRITEMASK_Y ||
510 writemask == TGSI_WRITEMASK_Z ||
511 writemask == TGSI_WRITEMASK_W ||
512 writemask == TGSI_WRITEMASK_NONE) {
513 /* no chance of data dependency */
514 return FALSE;
515 }
516
517 /* loop over src regs */
518 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
519 if ((inst->Src[i].Register.File ==
520 inst->Dst[0].Register.File) &&
521 (inst->Src[i].Register.Index ==
522 inst->Dst[0].Register.Index)) {
523 /* loop over dest channels */
524 uint channelsWritten = 0x0;
525 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
526 /* check if we're reading a channel that's been written */
527 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
528 if (channelsWritten & (1 << swizzle)) {
529 return TRUE;
530 }
531
532 channelsWritten |= (1 << chan);
533 }
534 }
535 }
536 return FALSE;
537 }
538
539
540 /**
541 * Initialize machine state by expanding tokens to full instructions,
542 * allocating temporary storage, setting up constants, etc.
543 * After this, we can call tgsi_exec_machine_run() many times.
544 */
545 void
546 tgsi_exec_machine_bind_shader(
547 struct tgsi_exec_machine *mach,
548 const struct tgsi_token *tokens,
549 uint numSamplers,
550 struct tgsi_sampler **samplers)
551 {
552 uint k;
553 struct tgsi_parse_context parse;
554 struct tgsi_exec_labels *labels = &mach->Labels;
555 struct tgsi_full_instruction *instructions;
556 struct tgsi_full_declaration *declarations;
557 uint maxInstructions = 10, numInstructions = 0;
558 uint maxDeclarations = 10, numDeclarations = 0;
559 uint instno = 0;
560
561 #if 0
562 tgsi_dump(tokens, 0);
563 #endif
564
565 util_init_math();
566
567 mach->Tokens = tokens;
568 mach->Samplers = samplers;
569
570 k = tgsi_parse_init (&parse, mach->Tokens);
571 if (k != TGSI_PARSE_OK) {
572 debug_printf( "Problem parsing!\n" );
573 return;
574 }
575
576 mach->Processor = parse.FullHeader.Processor.Processor;
577 mach->ImmLimit = 0;
578 labels->count = 0;
579
580 declarations = (struct tgsi_full_declaration *)
581 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
582
583 if (!declarations) {
584 return;
585 }
586
587 instructions = (struct tgsi_full_instruction *)
588 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
589
590 if (!instructions) {
591 FREE( declarations );
592 return;
593 }
594
595 while( !tgsi_parse_end_of_tokens( &parse ) ) {
596 uint pointer = parse.Position;
597 uint i;
598
599 tgsi_parse_token( &parse );
600 switch( parse.FullToken.Token.Type ) {
601 case TGSI_TOKEN_TYPE_DECLARATION:
602 /* save expanded declaration */
603 if (numDeclarations == maxDeclarations) {
604 declarations = REALLOC(declarations,
605 maxDeclarations
606 * sizeof(struct tgsi_full_declaration),
607 (maxDeclarations + 10)
608 * sizeof(struct tgsi_full_declaration));
609 maxDeclarations += 10;
610 }
611 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
612 unsigned reg;
613 for (reg = parse.FullToken.FullDeclaration.Range.First;
614 reg <= parse.FullToken.FullDeclaration.Range.Last;
615 ++reg) {
616 ++mach->NumOutputs;
617 }
618 }
619 memcpy(declarations + numDeclarations,
620 &parse.FullToken.FullDeclaration,
621 sizeof(declarations[0]));
622 numDeclarations++;
623 break;
624
625 case TGSI_TOKEN_TYPE_IMMEDIATE:
626 {
627 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
628 assert( size <= 4 );
629 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
630
631 for( i = 0; i < size; i++ ) {
632 mach->Imms[mach->ImmLimit][i] =
633 parse.FullToken.FullImmediate.u[i].Float;
634 }
635 mach->ImmLimit += 1;
636 }
637 break;
638
639 case TGSI_TOKEN_TYPE_INSTRUCTION:
640 assert( labels->count < MAX_LABELS );
641
642 labels->labels[labels->count][0] = instno;
643 labels->labels[labels->count][1] = pointer;
644 labels->count++;
645
646 /* save expanded instruction */
647 if (numInstructions == maxInstructions) {
648 instructions = REALLOC(instructions,
649 maxInstructions
650 * sizeof(struct tgsi_full_instruction),
651 (maxInstructions + 10)
652 * sizeof(struct tgsi_full_instruction));
653 maxInstructions += 10;
654 }
655
656 memcpy(instructions + numInstructions,
657 &parse.FullToken.FullInstruction,
658 sizeof(instructions[0]));
659
660 numInstructions++;
661 break;
662
663 case TGSI_TOKEN_TYPE_PROPERTY:
664 break;
665
666 default:
667 assert( 0 );
668 }
669 }
670 tgsi_parse_free (&parse);
671
672 if (mach->Declarations) {
673 FREE( mach->Declarations );
674 }
675 mach->Declarations = declarations;
676 mach->NumDeclarations = numDeclarations;
677
678 if (mach->Instructions) {
679 FREE( mach->Instructions );
680 }
681 mach->Instructions = instructions;
682 mach->NumInstructions = numInstructions;
683 }
684
685
686 struct tgsi_exec_machine *
687 tgsi_exec_machine_create( void )
688 {
689 struct tgsi_exec_machine *mach;
690 uint i;
691
692 mach = align_malloc( sizeof *mach, 16 );
693 if (!mach)
694 goto fail;
695
696 memset(mach, 0, sizeof(*mach));
697
698 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
699 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
700 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
701
702 /* Setup constants. */
703 for( i = 0; i < 4; i++ ) {
704 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
705 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
706 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
707 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
708 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
709 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
710 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
711 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
712 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
713 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
714 }
715
716 #ifdef DEBUG
717 /* silence warnings */
718 (void) print_chan;
719 (void) print_temp;
720 #endif
721
722 return mach;
723
724 fail:
725 align_free(mach);
726 return NULL;
727 }
728
729
730 void
731 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
732 {
733 if (mach) {
734 FREE(mach->Instructions);
735 FREE(mach->Declarations);
736 }
737
738 align_free(mach);
739 }
740
741 static void
742 micro_add(
743 union tgsi_exec_channel *dst,
744 const union tgsi_exec_channel *src0,
745 const union tgsi_exec_channel *src1 )
746 {
747 dst->f[0] = src0->f[0] + src1->f[0];
748 dst->f[1] = src0->f[1] + src1->f[1];
749 dst->f[2] = src0->f[2] + src1->f[2];
750 dst->f[3] = src0->f[3] + src1->f[3];
751 }
752
753 static void
754 micro_div(
755 union tgsi_exec_channel *dst,
756 const union tgsi_exec_channel *src0,
757 const union tgsi_exec_channel *src1 )
758 {
759 if (src1->f[0] != 0) {
760 dst->f[0] = src0->f[0] / src1->f[0];
761 }
762 if (src1->f[1] != 0) {
763 dst->f[1] = src0->f[1] / src1->f[1];
764 }
765 if (src1->f[2] != 0) {
766 dst->f[2] = src0->f[2] / src1->f[2];
767 }
768 if (src1->f[3] != 0) {
769 dst->f[3] = src0->f[3] / src1->f[3];
770 }
771 }
772
773 static void
774 micro_float_clamp(union tgsi_exec_channel *dst,
775 const union tgsi_exec_channel *src)
776 {
777 uint i;
778
779 for (i = 0; i < 4; i++) {
780 if (src->f[i] > 0.0f) {
781 if (src->f[i] > 1.884467e+019f)
782 dst->f[i] = 1.884467e+019f;
783 else if (src->f[i] < 5.42101e-020f)
784 dst->f[i] = 5.42101e-020f;
785 else
786 dst->f[i] = src->f[i];
787 }
788 else {
789 if (src->f[i] < -1.884467e+019f)
790 dst->f[i] = -1.884467e+019f;
791 else if (src->f[i] > -5.42101e-020f)
792 dst->f[i] = -5.42101e-020f;
793 else
794 dst->f[i] = src->f[i];
795 }
796 }
797 }
798
799 static void
800 micro_lt(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src0,
803 const union tgsi_exec_channel *src1,
804 const union tgsi_exec_channel *src2,
805 const union tgsi_exec_channel *src3 )
806 {
807 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
808 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
809 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
810 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
811 }
812
813 static void
814 micro_max(
815 union tgsi_exec_channel *dst,
816 const union tgsi_exec_channel *src0,
817 const union tgsi_exec_channel *src1 )
818 {
819 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
820 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
821 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
822 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
823 }
824
825 static void
826 micro_min(
827 union tgsi_exec_channel *dst,
828 const union tgsi_exec_channel *src0,
829 const union tgsi_exec_channel *src1 )
830 {
831 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
832 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
833 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
834 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
835 }
836
837 static void
838 micro_mul(
839 union tgsi_exec_channel *dst,
840 const union tgsi_exec_channel *src0,
841 const union tgsi_exec_channel *src1 )
842 {
843 dst->f[0] = src0->f[0] * src1->f[0];
844 dst->f[1] = src0->f[1] * src1->f[1];
845 dst->f[2] = src0->f[2] * src1->f[2];
846 dst->f[3] = src0->f[3] * src1->f[3];
847 }
848
849 #if 0
850 static void
851 micro_imul64(
852 union tgsi_exec_channel *dst0,
853 union tgsi_exec_channel *dst1,
854 const union tgsi_exec_channel *src0,
855 const union tgsi_exec_channel *src1 )
856 {
857 dst1->i[0] = src0->i[0] * src1->i[0];
858 dst1->i[1] = src0->i[1] * src1->i[1];
859 dst1->i[2] = src0->i[2] * src1->i[2];
860 dst1->i[3] = src0->i[3] * src1->i[3];
861 dst0->i[0] = 0;
862 dst0->i[1] = 0;
863 dst0->i[2] = 0;
864 dst0->i[3] = 0;
865 }
866 #endif
867
868 #if 0
869 static void
870 micro_umul64(
871 union tgsi_exec_channel *dst0,
872 union tgsi_exec_channel *dst1,
873 const union tgsi_exec_channel *src0,
874 const union tgsi_exec_channel *src1 )
875 {
876 dst1->u[0] = src0->u[0] * src1->u[0];
877 dst1->u[1] = src0->u[1] * src1->u[1];
878 dst1->u[2] = src0->u[2] * src1->u[2];
879 dst1->u[3] = src0->u[3] * src1->u[3];
880 dst0->u[0] = 0;
881 dst0->u[1] = 0;
882 dst0->u[2] = 0;
883 dst0->u[3] = 0;
884 }
885 #endif
886
887
888 #if 0
889 static void
890 micro_movc(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src0,
893 const union tgsi_exec_channel *src1,
894 const union tgsi_exec_channel *src2 )
895 {
896 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
897 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
898 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
899 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
900 }
901 #endif
902
903 static void
904 micro_neg(
905 union tgsi_exec_channel *dst,
906 const union tgsi_exec_channel *src )
907 {
908 dst->f[0] = -src->f[0];
909 dst->f[1] = -src->f[1];
910 dst->f[2] = -src->f[2];
911 dst->f[3] = -src->f[3];
912 }
913
914 static void
915 micro_pow(
916 union tgsi_exec_channel *dst,
917 const union tgsi_exec_channel *src0,
918 const union tgsi_exec_channel *src1 )
919 {
920 #if FAST_MATH
921 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
922 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
923 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
924 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
925 #else
926 dst->f[0] = powf( src0->f[0], src1->f[0] );
927 dst->f[1] = powf( src0->f[1], src1->f[1] );
928 dst->f[2] = powf( src0->f[2], src1->f[2] );
929 dst->f[3] = powf( src0->f[3], src1->f[3] );
930 #endif
931 }
932
933 static void
934 micro_sqrt( union tgsi_exec_channel *dst,
935 const union tgsi_exec_channel *src )
936 {
937 dst->f[0] = sqrtf( src->f[0] );
938 dst->f[1] = sqrtf( src->f[1] );
939 dst->f[2] = sqrtf( src->f[2] );
940 dst->f[3] = sqrtf( src->f[3] );
941 }
942
943 static void
944 micro_sub(
945 union tgsi_exec_channel *dst,
946 const union tgsi_exec_channel *src0,
947 const union tgsi_exec_channel *src1 )
948 {
949 dst->f[0] = src0->f[0] - src1->f[0];
950 dst->f[1] = src0->f[1] - src1->f[1];
951 dst->f[2] = src0->f[2] - src1->f[2];
952 dst->f[3] = src0->f[3] - src1->f[3];
953 }
954
955 static void
956 fetch_src_file_channel(
957 const struct tgsi_exec_machine *mach,
958 const uint file,
959 const uint swizzle,
960 const union tgsi_exec_channel *index,
961 union tgsi_exec_channel *chan )
962 {
963 switch( swizzle ) {
964 case TGSI_SWIZZLE_X:
965 case TGSI_SWIZZLE_Y:
966 case TGSI_SWIZZLE_Z:
967 case TGSI_SWIZZLE_W:
968 switch( file ) {
969 case TGSI_FILE_CONSTANT:
970 assert(mach->Consts);
971 if (index->i[0] < 0)
972 chan->f[0] = 0.0f;
973 else
974 chan->f[0] = mach->Consts[index->i[0]][swizzle];
975 if (index->i[1] < 0)
976 chan->f[1] = 0.0f;
977 else
978 chan->f[1] = mach->Consts[index->i[1]][swizzle];
979 if (index->i[2] < 0)
980 chan->f[2] = 0.0f;
981 else
982 chan->f[2] = mach->Consts[index->i[2]][swizzle];
983 if (index->i[3] < 0)
984 chan->f[3] = 0.0f;
985 else
986 chan->f[3] = mach->Consts[index->i[3]][swizzle];
987 break;
988
989 case TGSI_FILE_INPUT:
990 case TGSI_FILE_SYSTEM_VALUE:
991 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
992 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
993 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
994 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
995 break;
996
997 case TGSI_FILE_TEMPORARY:
998 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
999 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1000 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1001 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1002 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1003 break;
1004
1005 case TGSI_FILE_IMMEDIATE:
1006 assert( index->i[0] < (int) mach->ImmLimit );
1007 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1008 assert( index->i[1] < (int) mach->ImmLimit );
1009 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1010 assert( index->i[2] < (int) mach->ImmLimit );
1011 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1012 assert( index->i[3] < (int) mach->ImmLimit );
1013 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1014 break;
1015
1016 case TGSI_FILE_ADDRESS:
1017 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1018 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1019 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1020 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1021 break;
1022
1023 case TGSI_FILE_PREDICATE:
1024 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1025 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1026 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1027 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1028 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
1029 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
1030 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
1031 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
1032 break;
1033
1034 case TGSI_FILE_OUTPUT:
1035 /* vertex/fragment output vars can be read too */
1036 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1037 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1038 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1039 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1040 break;
1041
1042 default:
1043 assert( 0 );
1044 }
1045 break;
1046
1047 default:
1048 assert( 0 );
1049 }
1050 }
1051
1052 static void
1053 fetch_source(const struct tgsi_exec_machine *mach,
1054 union tgsi_exec_channel *chan,
1055 const struct tgsi_full_src_register *reg,
1056 const uint chan_index,
1057 enum tgsi_exec_datatype src_datatype)
1058 {
1059 union tgsi_exec_channel index;
1060 uint swizzle;
1061
1062 /* We start with a direct index into a register file.
1063 *
1064 * file[1],
1065 * where:
1066 * file = Register.File
1067 * [1] = Register.Index
1068 */
1069 index.i[0] =
1070 index.i[1] =
1071 index.i[2] =
1072 index.i[3] = reg->Register.Index;
1073
1074 /* There is an extra source register that indirectly subscripts
1075 * a register file. The direct index now becomes an offset
1076 * that is being added to the indirect register.
1077 *
1078 * file[ind[2].x+1],
1079 * where:
1080 * ind = Indirect.File
1081 * [2] = Indirect.Index
1082 * .x = Indirect.SwizzleX
1083 */
1084 if (reg->Register.Indirect) {
1085 union tgsi_exec_channel index2;
1086 union tgsi_exec_channel indir_index;
1087 const uint execmask = mach->ExecMask;
1088 uint i;
1089
1090 /* which address register (always zero now) */
1091 index2.i[0] =
1092 index2.i[1] =
1093 index2.i[2] =
1094 index2.i[3] = reg->Indirect.Index;
1095
1096 /* get current value of address register[swizzle] */
1097 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1098 fetch_src_file_channel(
1099 mach,
1100 reg->Indirect.File,
1101 swizzle,
1102 &index2,
1103 &indir_index );
1104
1105 /* add value of address register to the offset */
1106 index.i[0] += indir_index.i[0];
1107 index.i[1] += indir_index.i[1];
1108 index.i[2] += indir_index.i[2];
1109 index.i[3] += indir_index.i[3];
1110
1111 /* for disabled execution channels, zero-out the index to
1112 * avoid using a potential garbage value.
1113 */
1114 for (i = 0; i < QUAD_SIZE; i++) {
1115 if ((execmask & (1 << i)) == 0)
1116 index.i[i] = 0;
1117 }
1118 }
1119
1120 /* There is an extra source register that is a second
1121 * subscript to a register file. Effectively it means that
1122 * the register file is actually a 2D array of registers.
1123 *
1124 * file[1][3] == file[1*sizeof(file[1])+3],
1125 * where:
1126 * [3] = Dimension.Index
1127 */
1128 if (reg->Register.Dimension) {
1129 /* The size of the first-order array depends on the register file type.
1130 * We need to multiply the index to the first array to get an effective,
1131 * "flat" index that points to the beginning of the second-order array.
1132 */
1133 switch (reg->Register.File) {
1134 case TGSI_FILE_INPUT:
1135 case TGSI_FILE_SYSTEM_VALUE:
1136 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1137 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1138 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1139 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1140 break;
1141 case TGSI_FILE_CONSTANT:
1142 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1143 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1144 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1145 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1146 break;
1147 default:
1148 assert( 0 );
1149 }
1150
1151 index.i[0] += reg->Dimension.Index;
1152 index.i[1] += reg->Dimension.Index;
1153 index.i[2] += reg->Dimension.Index;
1154 index.i[3] += reg->Dimension.Index;
1155
1156 /* Again, the second subscript index can be addressed indirectly
1157 * identically to the first one.
1158 * Nothing stops us from indirectly addressing the indirect register,
1159 * but there is no need for that, so we won't exercise it.
1160 *
1161 * file[1][ind[4].y+3],
1162 * where:
1163 * ind = DimIndirect.File
1164 * [4] = DimIndirect.Index
1165 * .y = DimIndirect.SwizzleX
1166 */
1167 if (reg->Dimension.Indirect) {
1168 union tgsi_exec_channel index2;
1169 union tgsi_exec_channel indir_index;
1170 const uint execmask = mach->ExecMask;
1171 uint i;
1172
1173 index2.i[0] =
1174 index2.i[1] =
1175 index2.i[2] =
1176 index2.i[3] = reg->DimIndirect.Index;
1177
1178 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1179 fetch_src_file_channel(
1180 mach,
1181 reg->DimIndirect.File,
1182 swizzle,
1183 &index2,
1184 &indir_index );
1185
1186 index.i[0] += indir_index.i[0];
1187 index.i[1] += indir_index.i[1];
1188 index.i[2] += indir_index.i[2];
1189 index.i[3] += indir_index.i[3];
1190
1191 /* for disabled execution channels, zero-out the index to
1192 * avoid using a potential garbage value.
1193 */
1194 for (i = 0; i < QUAD_SIZE; i++) {
1195 if ((execmask & (1 << i)) == 0)
1196 index.i[i] = 0;
1197 }
1198 }
1199
1200 /* If by any chance there was a need for a 3D array of register
1201 * files, we would have to check whether Dimension is followed
1202 * by a dimension register and continue the saga.
1203 */
1204 }
1205
1206 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1207 fetch_src_file_channel(
1208 mach,
1209 reg->Register.File,
1210 swizzle,
1211 &index,
1212 chan );
1213
1214 if (reg->Register.Absolute) {
1215 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1216 micro_abs(chan, chan);
1217 } else {
1218 micro_iabs(chan, chan);
1219 }
1220 }
1221
1222 if (reg->Register.Negate) {
1223 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1224 micro_neg(chan, chan);
1225 } else {
1226 micro_ineg(chan, chan);
1227 }
1228 }
1229 }
1230
1231 static void
1232 store_dest(struct tgsi_exec_machine *mach,
1233 const union tgsi_exec_channel *chan,
1234 const struct tgsi_full_dst_register *reg,
1235 const struct tgsi_full_instruction *inst,
1236 uint chan_index,
1237 enum tgsi_exec_datatype dst_datatype)
1238 {
1239 uint i;
1240 union tgsi_exec_channel null;
1241 union tgsi_exec_channel *dst;
1242 uint execmask = mach->ExecMask;
1243 int offset = 0; /* indirection offset */
1244 int index;
1245
1246 if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1247 CHECK_INF_OR_NAN(chan);
1248 }
1249
1250 /* There is an extra source register that indirectly subscripts
1251 * a register file. The direct index now becomes an offset
1252 * that is being added to the indirect register.
1253 *
1254 * file[ind[2].x+1],
1255 * where:
1256 * ind = Indirect.File
1257 * [2] = Indirect.Index
1258 * .x = Indirect.SwizzleX
1259 */
1260 if (reg->Register.Indirect) {
1261 union tgsi_exec_channel index;
1262 union tgsi_exec_channel indir_index;
1263 uint swizzle;
1264
1265 /* which address register (always zero for now) */
1266 index.i[0] =
1267 index.i[1] =
1268 index.i[2] =
1269 index.i[3] = reg->Indirect.Index;
1270
1271 /* get current value of address register[swizzle] */
1272 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1273
1274 /* fetch values from the address/indirection register */
1275 fetch_src_file_channel(
1276 mach,
1277 reg->Indirect.File,
1278 swizzle,
1279 &index,
1280 &indir_index );
1281
1282 /* save indirection offset */
1283 offset = indir_index.i[0];
1284 }
1285
1286 switch (reg->Register.File) {
1287 case TGSI_FILE_NULL:
1288 dst = &null;
1289 break;
1290
1291 case TGSI_FILE_OUTPUT:
1292 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1293 + reg->Register.Index;
1294 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1295 #if 0
1296 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1297 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1298 for (i = 0; i < QUAD_SIZE; i++)
1299 if (execmask & (1 << i))
1300 fprintf(stderr, "%f, ", chan->f[i]);
1301 fprintf(stderr, ")\n");
1302 }
1303 #endif
1304 break;
1305
1306 case TGSI_FILE_TEMPORARY:
1307 index = reg->Register.Index;
1308 assert( index < TGSI_EXEC_NUM_TEMPS );
1309 dst = &mach->Temps[offset + index].xyzw[chan_index];
1310 break;
1311
1312 case TGSI_FILE_ADDRESS:
1313 index = reg->Register.Index;
1314 dst = &mach->Addrs[index].xyzw[chan_index];
1315 break;
1316
1317 case TGSI_FILE_LOOP:
1318 assert(reg->Register.Index == 0);
1319 assert(mach->LoopCounterStackTop > 0);
1320 assert(chan_index == CHAN_X);
1321 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1322 break;
1323
1324 case TGSI_FILE_PREDICATE:
1325 index = reg->Register.Index;
1326 assert(index < TGSI_EXEC_NUM_PREDS);
1327 dst = &mach->Predicates[index].xyzw[chan_index];
1328 break;
1329
1330 default:
1331 assert( 0 );
1332 return;
1333 }
1334
1335 if (inst->Instruction.Predicate) {
1336 uint swizzle;
1337 union tgsi_exec_channel *pred;
1338
1339 switch (chan_index) {
1340 case CHAN_X:
1341 swizzle = inst->Predicate.SwizzleX;
1342 break;
1343 case CHAN_Y:
1344 swizzle = inst->Predicate.SwizzleY;
1345 break;
1346 case CHAN_Z:
1347 swizzle = inst->Predicate.SwizzleZ;
1348 break;
1349 case CHAN_W:
1350 swizzle = inst->Predicate.SwizzleW;
1351 break;
1352 default:
1353 assert(0);
1354 return;
1355 }
1356
1357 assert(inst->Predicate.Index == 0);
1358
1359 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1360
1361 if (inst->Predicate.Negate) {
1362 for (i = 0; i < QUAD_SIZE; i++) {
1363 if (pred->u[i]) {
1364 execmask &= ~(1 << i);
1365 }
1366 }
1367 } else {
1368 for (i = 0; i < QUAD_SIZE; i++) {
1369 if (!pred->u[i]) {
1370 execmask &= ~(1 << i);
1371 }
1372 }
1373 }
1374 }
1375
1376 switch (inst->Instruction.Saturate) {
1377 case TGSI_SAT_NONE:
1378 for (i = 0; i < QUAD_SIZE; i++)
1379 if (execmask & (1 << i))
1380 dst->i[i] = chan->i[i];
1381 break;
1382
1383 case TGSI_SAT_ZERO_ONE:
1384 for (i = 0; i < QUAD_SIZE; i++)
1385 if (execmask & (1 << i)) {
1386 if (chan->f[i] < 0.0f)
1387 dst->f[i] = 0.0f;
1388 else if (chan->f[i] > 1.0f)
1389 dst->f[i] = 1.0f;
1390 else
1391 dst->i[i] = chan->i[i];
1392 }
1393 break;
1394
1395 case TGSI_SAT_MINUS_PLUS_ONE:
1396 for (i = 0; i < QUAD_SIZE; i++)
1397 if (execmask & (1 << i)) {
1398 if (chan->f[i] < -1.0f)
1399 dst->f[i] = -1.0f;
1400 else if (chan->f[i] > 1.0f)
1401 dst->f[i] = 1.0f;
1402 else
1403 dst->i[i] = chan->i[i];
1404 }
1405 break;
1406
1407 default:
1408 assert( 0 );
1409 }
1410 }
1411
1412 #define FETCH(VAL,INDEX,CHAN)\
1413 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1414
1415 #define STORE(VAL,INDEX,CHAN)\
1416 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1417
1418
1419 /**
1420 * Execute ARB-style KIL which is predicated by a src register.
1421 * Kill fragment if any of the four values is less than zero.
1422 */
1423 static void
1424 exec_kil(struct tgsi_exec_machine *mach,
1425 const struct tgsi_full_instruction *inst)
1426 {
1427 uint uniquemask;
1428 uint chan_index;
1429 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1430 union tgsi_exec_channel r[1];
1431
1432 /* This mask stores component bits that were already tested. */
1433 uniquemask = 0;
1434
1435 for (chan_index = 0; chan_index < 4; chan_index++)
1436 {
1437 uint swizzle;
1438 uint i;
1439
1440 /* unswizzle channel */
1441 swizzle = tgsi_util_get_full_src_register_swizzle (
1442 &inst->Src[0],
1443 chan_index);
1444
1445 /* check if the component has not been already tested */
1446 if (uniquemask & (1 << swizzle))
1447 continue;
1448 uniquemask |= 1 << swizzle;
1449
1450 FETCH(&r[0], 0, chan_index);
1451 for (i = 0; i < 4; i++)
1452 if (r[0].f[i] < 0.0f)
1453 kilmask |= 1 << i;
1454 }
1455
1456 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1457 }
1458
1459 /**
1460 * Execute NVIDIA-style KIL which is predicated by a condition code.
1461 * Kill fragment if the condition code is TRUE.
1462 */
1463 static void
1464 exec_kilp(struct tgsi_exec_machine *mach,
1465 const struct tgsi_full_instruction *inst)
1466 {
1467 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1468
1469 /* "unconditional" kil */
1470 kilmask = mach->ExecMask;
1471 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1472 }
1473
1474 static void
1475 emit_vertex(struct tgsi_exec_machine *mach)
1476 {
1477 /* FIXME: check for exec mask correctly
1478 unsigned i;
1479 for (i = 0; i < QUAD_SIZE; ++i) {
1480 if ((mach->ExecMask & (1 << i)))
1481 */
1482 if (mach->ExecMask) {
1483 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1484 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1485 }
1486 }
1487
1488 static void
1489 emit_primitive(struct tgsi_exec_machine *mach)
1490 {
1491 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1492 /* FIXME: check for exec mask correctly
1493 unsigned i;
1494 for (i = 0; i < QUAD_SIZE; ++i) {
1495 if ((mach->ExecMask & (1 << i)))
1496 */
1497 if (mach->ExecMask) {
1498 ++(*prim_count);
1499 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1500 mach->Primitives[*prim_count] = 0;
1501 }
1502 }
1503
1504 /*
1505 * Fetch a four texture samples using STR texture coordinates.
1506 */
1507 static void
1508 fetch_texel( struct tgsi_sampler *sampler,
1509 const union tgsi_exec_channel *s,
1510 const union tgsi_exec_channel *t,
1511 const union tgsi_exec_channel *p,
1512 float lodbias, /* XXX should be float[4] */
1513 union tgsi_exec_channel *r,
1514 union tgsi_exec_channel *g,
1515 union tgsi_exec_channel *b,
1516 union tgsi_exec_channel *a )
1517 {
1518 uint j;
1519 float rgba[NUM_CHANNELS][QUAD_SIZE];
1520
1521 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1522
1523 for (j = 0; j < 4; j++) {
1524 r->f[j] = rgba[0][j];
1525 g->f[j] = rgba[1][j];
1526 b->f[j] = rgba[2][j];
1527 a->f[j] = rgba[3][j];
1528 }
1529 }
1530
1531
1532 static void
1533 exec_tex(struct tgsi_exec_machine *mach,
1534 const struct tgsi_full_instruction *inst,
1535 boolean biasLod,
1536 boolean projected)
1537 {
1538 const uint unit = inst->Src[1].Register.Index;
1539 union tgsi_exec_channel r[4];
1540 uint chan_index;
1541 float lodBias;
1542
1543 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
1544
1545 switch (inst->Texture.Texture) {
1546 case TGSI_TEXTURE_1D:
1547 case TGSI_TEXTURE_SHADOW1D:
1548
1549 FETCH(&r[0], 0, CHAN_X);
1550
1551 if (projected) {
1552 FETCH(&r[1], 0, CHAN_W);
1553 micro_div( &r[0], &r[0], &r[1] );
1554 }
1555
1556 if (biasLod) {
1557 FETCH(&r[1], 0, CHAN_W);
1558 lodBias = r[2].f[0];
1559 }
1560 else
1561 lodBias = 0.0;
1562
1563 fetch_texel(mach->Samplers[unit],
1564 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
1565 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1566 break;
1567
1568 case TGSI_TEXTURE_2D:
1569 case TGSI_TEXTURE_RECT:
1570 case TGSI_TEXTURE_SHADOW2D:
1571 case TGSI_TEXTURE_SHADOWRECT:
1572
1573 FETCH(&r[0], 0, CHAN_X);
1574 FETCH(&r[1], 0, CHAN_Y);
1575 FETCH(&r[2], 0, CHAN_Z);
1576
1577 if (projected) {
1578 FETCH(&r[3], 0, CHAN_W);
1579 micro_div( &r[0], &r[0], &r[3] );
1580 micro_div( &r[1], &r[1], &r[3] );
1581 micro_div( &r[2], &r[2], &r[3] );
1582 }
1583
1584 if (biasLod) {
1585 FETCH(&r[3], 0, CHAN_W);
1586 lodBias = r[3].f[0];
1587 }
1588 else
1589 lodBias = 0.0;
1590
1591 fetch_texel(mach->Samplers[unit],
1592 &r[0], &r[1], &r[2], lodBias, /* inputs */
1593 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1594 break;
1595
1596 case TGSI_TEXTURE_3D:
1597 case TGSI_TEXTURE_CUBE:
1598
1599 FETCH(&r[0], 0, CHAN_X);
1600 FETCH(&r[1], 0, CHAN_Y);
1601 FETCH(&r[2], 0, CHAN_Z);
1602
1603 if (projected) {
1604 FETCH(&r[3], 0, CHAN_W);
1605 micro_div( &r[0], &r[0], &r[3] );
1606 micro_div( &r[1], &r[1], &r[3] );
1607 micro_div( &r[2], &r[2], &r[3] );
1608 }
1609
1610 if (biasLod) {
1611 FETCH(&r[3], 0, CHAN_W);
1612 lodBias = r[3].f[0];
1613 }
1614 else
1615 lodBias = 0.0;
1616
1617 fetch_texel(mach->Samplers[unit],
1618 &r[0], &r[1], &r[2], lodBias,
1619 &r[0], &r[1], &r[2], &r[3]);
1620 break;
1621
1622 default:
1623 assert (0);
1624 }
1625
1626 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1627 STORE( &r[chan_index], 0, chan_index );
1628 }
1629 }
1630
1631 static void
1632 exec_txd(struct tgsi_exec_machine *mach,
1633 const struct tgsi_full_instruction *inst)
1634 {
1635 const uint unit = inst->Src[3].Register.Index;
1636 union tgsi_exec_channel r[4];
1637 uint chan_index;
1638
1639 /*
1640 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1641 */
1642
1643 switch (inst->Texture.Texture) {
1644 case TGSI_TEXTURE_1D:
1645 case TGSI_TEXTURE_SHADOW1D:
1646
1647 FETCH(&r[0], 0, CHAN_X);
1648
1649 fetch_texel(mach->Samplers[unit],
1650 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
1651 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1652 break;
1653
1654 case TGSI_TEXTURE_2D:
1655 case TGSI_TEXTURE_RECT:
1656 case TGSI_TEXTURE_SHADOW2D:
1657 case TGSI_TEXTURE_SHADOWRECT:
1658
1659 FETCH(&r[0], 0, CHAN_X);
1660 FETCH(&r[1], 0, CHAN_Y);
1661 FETCH(&r[2], 0, CHAN_Z);
1662
1663 fetch_texel(mach->Samplers[unit],
1664 &r[0], &r[1], &r[2], 0.0f, /* inputs */
1665 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1666 break;
1667
1668 case TGSI_TEXTURE_3D:
1669 case TGSI_TEXTURE_CUBE:
1670
1671 FETCH(&r[0], 0, CHAN_X);
1672 FETCH(&r[1], 0, CHAN_Y);
1673 FETCH(&r[2], 0, CHAN_Z);
1674
1675 fetch_texel(mach->Samplers[unit],
1676 &r[0], &r[1], &r[2], 0.0f,
1677 &r[0], &r[1], &r[2], &r[3]);
1678 break;
1679
1680 default:
1681 assert(0);
1682 }
1683
1684 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1685 STORE(&r[chan_index], 0, chan_index);
1686 }
1687 }
1688
1689
1690 /**
1691 * Evaluate a constant-valued coefficient at the position of the
1692 * current quad.
1693 */
1694 static void
1695 eval_constant_coef(
1696 struct tgsi_exec_machine *mach,
1697 unsigned attrib,
1698 unsigned chan )
1699 {
1700 unsigned i;
1701
1702 for( i = 0; i < QUAD_SIZE; i++ ) {
1703 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1704 }
1705 }
1706
1707 /**
1708 * Evaluate a linear-valued coefficient at the position of the
1709 * current quad.
1710 */
1711 static void
1712 eval_linear_coef(
1713 struct tgsi_exec_machine *mach,
1714 unsigned attrib,
1715 unsigned chan )
1716 {
1717 const float x = mach->QuadPos.xyzw[0].f[0];
1718 const float y = mach->QuadPos.xyzw[1].f[0];
1719 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1720 const float dady = mach->InterpCoefs[attrib].dady[chan];
1721 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1722 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1723 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1724 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1725 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1726 }
1727
1728 /**
1729 * Evaluate a perspective-valued coefficient at the position of the
1730 * current quad.
1731 */
1732 static void
1733 eval_perspective_coef(
1734 struct tgsi_exec_machine *mach,
1735 unsigned attrib,
1736 unsigned chan )
1737 {
1738 const float x = mach->QuadPos.xyzw[0].f[0];
1739 const float y = mach->QuadPos.xyzw[1].f[0];
1740 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1741 const float dady = mach->InterpCoefs[attrib].dady[chan];
1742 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1743 const float *w = mach->QuadPos.xyzw[3].f;
1744 /* divide by W here */
1745 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1746 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1747 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1748 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1749 }
1750
1751
1752 typedef void (* eval_coef_func)(
1753 struct tgsi_exec_machine *mach,
1754 unsigned attrib,
1755 unsigned chan );
1756
1757 static void
1758 exec_declaration(struct tgsi_exec_machine *mach,
1759 const struct tgsi_full_declaration *decl)
1760 {
1761 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1762 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1763 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1764 uint first, last, mask;
1765
1766 first = decl->Range.First;
1767 last = decl->Range.Last;
1768 mask = decl->Declaration.UsageMask;
1769
1770 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1771 assert(decl->Semantic.Index == 0);
1772 assert(first == last);
1773 assert(mask == TGSI_WRITEMASK_XYZW);
1774
1775 mach->Inputs[first] = mach->QuadPos;
1776 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1777 uint i;
1778
1779 assert(decl->Semantic.Index == 0);
1780 assert(first == last);
1781
1782 for (i = 0; i < QUAD_SIZE; i++) {
1783 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1784 }
1785 } else {
1786 eval_coef_func eval;
1787 uint i, j;
1788
1789 switch (decl->Declaration.Interpolate) {
1790 case TGSI_INTERPOLATE_CONSTANT:
1791 eval = eval_constant_coef;
1792 break;
1793
1794 case TGSI_INTERPOLATE_LINEAR:
1795 eval = eval_linear_coef;
1796 break;
1797
1798 case TGSI_INTERPOLATE_PERSPECTIVE:
1799 eval = eval_perspective_coef;
1800 break;
1801
1802 default:
1803 assert(0);
1804 return;
1805 }
1806
1807 for (j = 0; j < NUM_CHANNELS; j++) {
1808 if (mask & (1 << j)) {
1809 for (i = first; i <= last; i++) {
1810 eval(mach, i, j);
1811 }
1812 }
1813 }
1814 }
1815 }
1816 }
1817 }
1818
1819 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1820 const union tgsi_exec_channel *src);
1821
1822 static void
1823 exec_scalar_unary(struct tgsi_exec_machine *mach,
1824 const struct tgsi_full_instruction *inst,
1825 micro_op op,
1826 enum tgsi_exec_datatype dst_datatype,
1827 enum tgsi_exec_datatype src_datatype)
1828 {
1829 unsigned int chan;
1830 union tgsi_exec_channel src;
1831 union tgsi_exec_channel dst;
1832
1833 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
1834 op(&dst, &src);
1835 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1836 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1837 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
1838 }
1839 }
1840 }
1841
1842 static void
1843 exec_vector_unary(struct tgsi_exec_machine *mach,
1844 const struct tgsi_full_instruction *inst,
1845 micro_op op,
1846 enum tgsi_exec_datatype dst_datatype,
1847 enum tgsi_exec_datatype src_datatype)
1848 {
1849 unsigned int chan;
1850 struct tgsi_exec_vector dst;
1851
1852 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1853 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1854 union tgsi_exec_channel src;
1855
1856 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1857 op(&dst.xyzw[chan], &src);
1858 }
1859 }
1860 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1861 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1862 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1863 }
1864 }
1865 }
1866
1867 static void
1868 exec_vector_binary(struct tgsi_exec_machine *mach,
1869 const struct tgsi_full_instruction *inst,
1870 micro_op op,
1871 enum tgsi_exec_datatype dst_datatype,
1872 enum tgsi_exec_datatype src_datatype)
1873 {
1874 unsigned int chan;
1875 struct tgsi_exec_vector dst;
1876
1877 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1878 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1879 union tgsi_exec_channel src[2];
1880
1881 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1882 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1883 op(&dst.xyzw[chan], src);
1884 }
1885 }
1886 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1887 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1888 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1889 }
1890 }
1891 }
1892
1893 static void
1894 exec_vector_trinary(struct tgsi_exec_machine *mach,
1895 const struct tgsi_full_instruction *inst,
1896 micro_op op,
1897 enum tgsi_exec_datatype dst_datatype,
1898 enum tgsi_exec_datatype src_datatype)
1899 {
1900 unsigned int chan;
1901 struct tgsi_exec_vector dst;
1902
1903 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1904 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1905 union tgsi_exec_channel src[3];
1906
1907 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1908 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1909 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1910 op(&dst.xyzw[chan], src);
1911 }
1912 }
1913 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1914 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1915 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1916 }
1917 }
1918 }
1919
1920 static void
1921 exec_break(struct tgsi_exec_machine *mach)
1922 {
1923 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
1924 /* turn off loop channels for each enabled exec channel */
1925 mach->LoopMask &= ~mach->ExecMask;
1926 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1927 UPDATE_EXEC_MASK(mach);
1928 } else {
1929 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
1930
1931 mach->Switch.mask = 0x0;
1932
1933 UPDATE_EXEC_MASK(mach);
1934 }
1935 }
1936
1937 static void
1938 exec_switch(struct tgsi_exec_machine *mach,
1939 const struct tgsi_full_instruction *inst)
1940 {
1941 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
1942 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
1943
1944 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
1945 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1946 mach->Switch.mask = 0x0;
1947 mach->Switch.defaultMask = 0x0;
1948
1949 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
1950 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
1951
1952 UPDATE_EXEC_MASK(mach);
1953 }
1954
1955 static void
1956 exec_case(struct tgsi_exec_machine *mach,
1957 const struct tgsi_full_instruction *inst)
1958 {
1959 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1960 union tgsi_exec_channel src;
1961 uint mask = 0;
1962
1963 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
1964
1965 if (mach->Switch.selector.u[0] == src.u[0]) {
1966 mask |= 0x1;
1967 }
1968 if (mach->Switch.selector.u[1] == src.u[1]) {
1969 mask |= 0x2;
1970 }
1971 if (mach->Switch.selector.u[2] == src.u[2]) {
1972 mask |= 0x4;
1973 }
1974 if (mach->Switch.selector.u[3] == src.u[3]) {
1975 mask |= 0x8;
1976 }
1977
1978 mach->Switch.defaultMask |= mask;
1979
1980 mach->Switch.mask |= mask & prevMask;
1981
1982 UPDATE_EXEC_MASK(mach);
1983 }
1984
1985 static void
1986 exec_default(struct tgsi_exec_machine *mach)
1987 {
1988 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
1989
1990 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
1991
1992 UPDATE_EXEC_MASK(mach);
1993 }
1994
1995 static void
1996 exec_endswitch(struct tgsi_exec_machine *mach)
1997 {
1998 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
1999 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
2000
2001 UPDATE_EXEC_MASK(mach);
2002 }
2003
2004 static void
2005 micro_i2f(union tgsi_exec_channel *dst,
2006 const union tgsi_exec_channel *src)
2007 {
2008 dst->f[0] = (float)src->i[0];
2009 dst->f[1] = (float)src->i[1];
2010 dst->f[2] = (float)src->i[2];
2011 dst->f[3] = (float)src->i[3];
2012 }
2013
2014 static void
2015 micro_not(union tgsi_exec_channel *dst,
2016 const union tgsi_exec_channel *src)
2017 {
2018 dst->u[0] = ~src->u[0];
2019 dst->u[1] = ~src->u[1];
2020 dst->u[2] = ~src->u[2];
2021 dst->u[3] = ~src->u[3];
2022 }
2023
2024 static void
2025 micro_shl(union tgsi_exec_channel *dst,
2026 const union tgsi_exec_channel *src)
2027 {
2028 dst->u[0] = src[0].u[0] << src[1].u[0];
2029 dst->u[1] = src[0].u[1] << src[1].u[1];
2030 dst->u[2] = src[0].u[2] << src[1].u[2];
2031 dst->u[3] = src[0].u[3] << src[1].u[3];
2032 }
2033
2034 static void
2035 micro_and(union tgsi_exec_channel *dst,
2036 const union tgsi_exec_channel *src)
2037 {
2038 dst->u[0] = src[0].u[0] & src[1].u[0];
2039 dst->u[1] = src[0].u[1] & src[1].u[1];
2040 dst->u[2] = src[0].u[2] & src[1].u[2];
2041 dst->u[3] = src[0].u[3] & src[1].u[3];
2042 }
2043
2044 static void
2045 micro_or(union tgsi_exec_channel *dst,
2046 const union tgsi_exec_channel *src)
2047 {
2048 dst->u[0] = src[0].u[0] | src[1].u[0];
2049 dst->u[1] = src[0].u[1] | src[1].u[1];
2050 dst->u[2] = src[0].u[2] | src[1].u[2];
2051 dst->u[3] = src[0].u[3] | src[1].u[3];
2052 }
2053
2054 static void
2055 micro_xor(union tgsi_exec_channel *dst,
2056 const union tgsi_exec_channel *src)
2057 {
2058 dst->u[0] = src[0].u[0] ^ src[1].u[0];
2059 dst->u[1] = src[0].u[1] ^ src[1].u[1];
2060 dst->u[2] = src[0].u[2] ^ src[1].u[2];
2061 dst->u[3] = src[0].u[3] ^ src[1].u[3];
2062 }
2063
2064 static void
2065 micro_f2i(union tgsi_exec_channel *dst,
2066 const union tgsi_exec_channel *src)
2067 {
2068 dst->i[0] = (int)src->f[0];
2069 dst->i[1] = (int)src->f[1];
2070 dst->i[2] = (int)src->f[2];
2071 dst->i[3] = (int)src->f[3];
2072 }
2073
2074 static void
2075 micro_idiv(union tgsi_exec_channel *dst,
2076 const union tgsi_exec_channel *src)
2077 {
2078 dst->i[0] = src[0].i[0] / src[1].i[0];
2079 dst->i[1] = src[0].i[1] / src[1].i[1];
2080 dst->i[2] = src[0].i[2] / src[1].i[2];
2081 dst->i[3] = src[0].i[3] / src[1].i[3];
2082 }
2083
2084 static void
2085 micro_imax(union tgsi_exec_channel *dst,
2086 const union tgsi_exec_channel *src)
2087 {
2088 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
2089 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
2090 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
2091 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
2092 }
2093
2094 static void
2095 micro_imin(union tgsi_exec_channel *dst,
2096 const union tgsi_exec_channel *src)
2097 {
2098 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2099 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2100 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2101 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2102 }
2103
2104 static void
2105 micro_isge(union tgsi_exec_channel *dst,
2106 const union tgsi_exec_channel *src)
2107 {
2108 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2109 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2110 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2111 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2112 }
2113
2114 static void
2115 micro_ishr(union tgsi_exec_channel *dst,
2116 const union tgsi_exec_channel *src)
2117 {
2118 dst->i[0] = src[0].i[0] >> src[1].i[0];
2119 dst->i[1] = src[0].i[1] >> src[1].i[1];
2120 dst->i[2] = src[0].i[2] >> src[1].i[2];
2121 dst->i[3] = src[0].i[3] >> src[1].i[3];
2122 }
2123
2124 static void
2125 micro_islt(union tgsi_exec_channel *dst,
2126 const union tgsi_exec_channel *src)
2127 {
2128 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2129 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2130 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2131 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2132 }
2133
2134 static void
2135 micro_f2u(union tgsi_exec_channel *dst,
2136 const union tgsi_exec_channel *src)
2137 {
2138 dst->u[0] = (uint)src->f[0];
2139 dst->u[1] = (uint)src->f[1];
2140 dst->u[2] = (uint)src->f[2];
2141 dst->u[3] = (uint)src->f[3];
2142 }
2143
2144 static void
2145 micro_u2f(union tgsi_exec_channel *dst,
2146 const union tgsi_exec_channel *src)
2147 {
2148 dst->f[0] = (float)src->u[0];
2149 dst->f[1] = (float)src->u[1];
2150 dst->f[2] = (float)src->u[2];
2151 dst->f[3] = (float)src->u[3];
2152 }
2153
2154 static void
2155 micro_uadd(union tgsi_exec_channel *dst,
2156 const union tgsi_exec_channel *src)
2157 {
2158 dst->u[0] = src[0].u[0] + src[1].u[0];
2159 dst->u[1] = src[0].u[1] + src[1].u[1];
2160 dst->u[2] = src[0].u[2] + src[1].u[2];
2161 dst->u[3] = src[0].u[3] + src[1].u[3];
2162 }
2163
2164 static void
2165 micro_udiv(union tgsi_exec_channel *dst,
2166 const union tgsi_exec_channel *src)
2167 {
2168 dst->u[0] = src[0].u[0] / src[1].u[0];
2169 dst->u[1] = src[0].u[1] / src[1].u[1];
2170 dst->u[2] = src[0].u[2] / src[1].u[2];
2171 dst->u[3] = src[0].u[3] / src[1].u[3];
2172 }
2173
2174 static void
2175 micro_umad(union tgsi_exec_channel *dst,
2176 const union tgsi_exec_channel *src)
2177 {
2178 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2179 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2180 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2181 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2182 }
2183
2184 static void
2185 micro_umax(union tgsi_exec_channel *dst,
2186 const union tgsi_exec_channel *src)
2187 {
2188 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2189 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2190 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2191 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2192 }
2193
2194 static void
2195 micro_umin(union tgsi_exec_channel *dst,
2196 const union tgsi_exec_channel *src)
2197 {
2198 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2199 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2200 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2201 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2202 }
2203
2204 static void
2205 micro_umod(union tgsi_exec_channel *dst,
2206 const union tgsi_exec_channel *src)
2207 {
2208 dst->u[0] = src[0].u[0] % src[1].u[0];
2209 dst->u[1] = src[0].u[1] % src[1].u[1];
2210 dst->u[2] = src[0].u[2] % src[1].u[2];
2211 dst->u[3] = src[0].u[3] % src[1].u[3];
2212 }
2213
2214 static void
2215 micro_umul(union tgsi_exec_channel *dst,
2216 const union tgsi_exec_channel *src)
2217 {
2218 dst->u[0] = src[0].u[0] * src[1].u[0];
2219 dst->u[1] = src[0].u[1] * src[1].u[1];
2220 dst->u[2] = src[0].u[2] * src[1].u[2];
2221 dst->u[3] = src[0].u[3] * src[1].u[3];
2222 }
2223
2224 static void
2225 micro_useq(union tgsi_exec_channel *dst,
2226 const union tgsi_exec_channel *src)
2227 {
2228 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2229 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2230 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2231 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2232 }
2233
2234 static void
2235 micro_usge(union tgsi_exec_channel *dst,
2236 const union tgsi_exec_channel *src)
2237 {
2238 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2239 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2240 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2241 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2242 }
2243
2244 static void
2245 micro_ushr(union tgsi_exec_channel *dst,
2246 const union tgsi_exec_channel *src)
2247 {
2248 dst->u[0] = src[0].u[0] >> src[1].u[0];
2249 dst->u[1] = src[0].u[1] >> src[1].u[1];
2250 dst->u[2] = src[0].u[2] >> src[1].u[2];
2251 dst->u[3] = src[0].u[3] >> src[1].u[3];
2252 }
2253
2254 static void
2255 micro_uslt(union tgsi_exec_channel *dst,
2256 const union tgsi_exec_channel *src)
2257 {
2258 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2259 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2260 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2261 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2262 }
2263
2264 static void
2265 micro_usne(union tgsi_exec_channel *dst,
2266 const union tgsi_exec_channel *src)
2267 {
2268 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2269 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2270 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2271 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2272 }
2273
2274 static void
2275 exec_instruction(
2276 struct tgsi_exec_machine *mach,
2277 const struct tgsi_full_instruction *inst,
2278 int *pc )
2279 {
2280 uint chan_index;
2281 union tgsi_exec_channel r[10];
2282 union tgsi_exec_channel d[8];
2283
2284 (*pc)++;
2285
2286 switch (inst->Instruction.Opcode) {
2287 case TGSI_OPCODE_ARL:
2288 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2289 break;
2290
2291 case TGSI_OPCODE_MOV:
2292 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2293 break;
2294
2295 case TGSI_OPCODE_LIT:
2296 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2297 FETCH( &r[0], 0, CHAN_X );
2298 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2299 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2300 }
2301
2302 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2303 FETCH( &r[1], 0, CHAN_Y );
2304 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2305
2306 FETCH( &r[2], 0, CHAN_W );
2307 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2308 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2309 micro_pow( &r[1], &r[1], &r[2] );
2310 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2311 }
2312
2313 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2314 STORE(&d[CHAN_Y], 0, CHAN_Y);
2315 }
2316 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2317 STORE(&d[CHAN_Z], 0, CHAN_Z);
2318 }
2319 }
2320 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2321 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2322 }
2323 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2324 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2325 }
2326 break;
2327
2328 case TGSI_OPCODE_RCP:
2329 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2330 break;
2331
2332 case TGSI_OPCODE_RSQ:
2333 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2334 break;
2335
2336 case TGSI_OPCODE_EXP:
2337 FETCH( &r[0], 0, CHAN_X );
2338 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2339 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2340 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2341 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2342 }
2343 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2344 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2345 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2346 }
2347 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2348 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2349 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2350 }
2351 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2352 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2353 }
2354 break;
2355
2356 case TGSI_OPCODE_LOG:
2357 FETCH( &r[0], 0, CHAN_X );
2358 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2359 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2360 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2361 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2362 STORE( &r[0], 0, CHAN_X );
2363 }
2364 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2365 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2366 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2367 STORE( &r[0], 0, CHAN_Y );
2368 }
2369 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2370 STORE( &r[1], 0, CHAN_Z );
2371 }
2372 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2373 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2374 }
2375 break;
2376
2377 case TGSI_OPCODE_MUL:
2378 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2379 FETCH(&r[0], 0, chan_index);
2380 FETCH(&r[1], 1, chan_index);
2381 micro_mul(&d[chan_index], &r[0], &r[1]);
2382 }
2383 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2384 STORE(&d[chan_index], 0, chan_index);
2385 }
2386 break;
2387
2388 case TGSI_OPCODE_ADD:
2389 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2390 FETCH( &r[0], 0, chan_index );
2391 FETCH( &r[1], 1, chan_index );
2392 micro_add(&d[chan_index], &r[0], &r[1]);
2393 }
2394 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2395 STORE(&d[chan_index], 0, chan_index);
2396 }
2397 break;
2398
2399 case TGSI_OPCODE_DP3:
2400 /* TGSI_OPCODE_DOT3 */
2401 FETCH( &r[0], 0, CHAN_X );
2402 FETCH( &r[1], 1, CHAN_X );
2403 micro_mul( &r[0], &r[0], &r[1] );
2404
2405 FETCH( &r[1], 0, CHAN_Y );
2406 FETCH( &r[2], 1, CHAN_Y );
2407 micro_mul( &r[1], &r[1], &r[2] );
2408 micro_add( &r[0], &r[0], &r[1] );
2409
2410 FETCH( &r[1], 0, CHAN_Z );
2411 FETCH( &r[2], 1, CHAN_Z );
2412 micro_mul( &r[1], &r[1], &r[2] );
2413 micro_add( &r[0], &r[0], &r[1] );
2414
2415 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2416 STORE( &r[0], 0, chan_index );
2417 }
2418 break;
2419
2420 case TGSI_OPCODE_DP4:
2421 /* TGSI_OPCODE_DOT4 */
2422 FETCH(&r[0], 0, CHAN_X);
2423 FETCH(&r[1], 1, CHAN_X);
2424
2425 micro_mul( &r[0], &r[0], &r[1] );
2426
2427 FETCH(&r[1], 0, CHAN_Y);
2428 FETCH(&r[2], 1, CHAN_Y);
2429
2430 micro_mul( &r[1], &r[1], &r[2] );
2431 micro_add( &r[0], &r[0], &r[1] );
2432
2433 FETCH(&r[1], 0, CHAN_Z);
2434 FETCH(&r[2], 1, CHAN_Z);
2435
2436 micro_mul( &r[1], &r[1], &r[2] );
2437 micro_add( &r[0], &r[0], &r[1] );
2438
2439 FETCH(&r[1], 0, CHAN_W);
2440 FETCH(&r[2], 1, CHAN_W);
2441
2442 micro_mul( &r[1], &r[1], &r[2] );
2443 micro_add( &r[0], &r[0], &r[1] );
2444
2445 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2446 STORE( &r[0], 0, chan_index );
2447 }
2448 break;
2449
2450 case TGSI_OPCODE_DST:
2451 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2452 FETCH( &r[0], 0, CHAN_Y );
2453 FETCH( &r[1], 1, CHAN_Y);
2454 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2455 }
2456 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2457 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2458 }
2459 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2460 FETCH(&d[CHAN_W], 1, CHAN_W);
2461 }
2462
2463 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2464 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2465 }
2466 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2467 STORE(&d[CHAN_Y], 0, CHAN_Y);
2468 }
2469 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2470 STORE(&d[CHAN_Z], 0, CHAN_Z);
2471 }
2472 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2473 STORE(&d[CHAN_W], 0, CHAN_W);
2474 }
2475 break;
2476
2477 case TGSI_OPCODE_MIN:
2478 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2479 FETCH(&r[0], 0, chan_index);
2480 FETCH(&r[1], 1, chan_index);
2481
2482 /* XXX use micro_min()?? */
2483 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2484 }
2485 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2486 STORE(&d[chan_index], 0, chan_index);
2487 }
2488 break;
2489
2490 case TGSI_OPCODE_MAX:
2491 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2492 FETCH(&r[0], 0, chan_index);
2493 FETCH(&r[1], 1, chan_index);
2494
2495 /* XXX use micro_max()?? */
2496 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2497 }
2498 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2499 STORE(&d[chan_index], 0, chan_index);
2500 }
2501 break;
2502
2503 case TGSI_OPCODE_SLT:
2504 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2505 break;
2506
2507 case TGSI_OPCODE_SGE:
2508 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2509 break;
2510
2511 case TGSI_OPCODE_MAD:
2512 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2513 break;
2514
2515 case TGSI_OPCODE_SUB:
2516 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2517 FETCH(&r[0], 0, chan_index);
2518 FETCH(&r[1], 1, chan_index);
2519 micro_sub(&d[chan_index], &r[0], &r[1]);
2520 }
2521 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2522 STORE(&d[chan_index], 0, chan_index);
2523 }
2524 break;
2525
2526 case TGSI_OPCODE_LRP:
2527 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2528 break;
2529
2530 case TGSI_OPCODE_CND:
2531 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2532 FETCH(&r[0], 0, chan_index);
2533 FETCH(&r[1], 1, chan_index);
2534 FETCH(&r[2], 2, chan_index);
2535 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2536 }
2537 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2538 STORE(&d[chan_index], 0, chan_index);
2539 }
2540 break;
2541
2542 case TGSI_OPCODE_DP2A:
2543 FETCH( &r[0], 0, CHAN_X );
2544 FETCH( &r[1], 1, CHAN_X );
2545 micro_mul( &r[0], &r[0], &r[1] );
2546
2547 FETCH( &r[1], 0, CHAN_Y );
2548 FETCH( &r[2], 1, CHAN_Y );
2549 micro_mul( &r[1], &r[1], &r[2] );
2550 micro_add( &r[0], &r[0], &r[1] );
2551
2552 FETCH( &r[2], 2, CHAN_X );
2553 micro_add( &r[0], &r[0], &r[2] );
2554
2555 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2556 STORE( &r[0], 0, chan_index );
2557 }
2558 break;
2559
2560 case TGSI_OPCODE_FRC:
2561 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2562 break;
2563
2564 case TGSI_OPCODE_CLAMP:
2565 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2566 FETCH(&r[0], 0, chan_index);
2567 FETCH(&r[1], 1, chan_index);
2568 micro_max(&r[0], &r[0], &r[1]);
2569 FETCH(&r[1], 2, chan_index);
2570 micro_min(&d[chan_index], &r[0], &r[1]);
2571 }
2572 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2573 STORE(&d[chan_index], 0, chan_index);
2574 }
2575 break;
2576
2577 case TGSI_OPCODE_FLR:
2578 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2579 break;
2580
2581 case TGSI_OPCODE_ROUND:
2582 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2583 break;
2584
2585 case TGSI_OPCODE_EX2:
2586 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2587 break;
2588
2589 case TGSI_OPCODE_LG2:
2590 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2591 break;
2592
2593 case TGSI_OPCODE_POW:
2594 FETCH(&r[0], 0, CHAN_X);
2595 FETCH(&r[1], 1, CHAN_X);
2596
2597 micro_pow( &r[0], &r[0], &r[1] );
2598
2599 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2600 STORE( &r[0], 0, chan_index );
2601 }
2602 break;
2603
2604 case TGSI_OPCODE_XPD:
2605 FETCH(&r[0], 0, CHAN_Y);
2606 FETCH(&r[1], 1, CHAN_Z);
2607
2608 micro_mul( &r[2], &r[0], &r[1] );
2609
2610 FETCH(&r[3], 0, CHAN_Z);
2611 FETCH(&r[4], 1, CHAN_Y);
2612
2613 micro_mul( &r[5], &r[3], &r[4] );
2614 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2615
2616 FETCH(&r[2], 1, CHAN_X);
2617
2618 micro_mul( &r[3], &r[3], &r[2] );
2619
2620 FETCH(&r[5], 0, CHAN_X);
2621
2622 micro_mul( &r[1], &r[1], &r[5] );
2623 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2624
2625 micro_mul( &r[5], &r[5], &r[4] );
2626 micro_mul( &r[0], &r[0], &r[2] );
2627 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2628
2629 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2630 STORE(&d[CHAN_X], 0, CHAN_X);
2631 }
2632 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2633 STORE(&d[CHAN_Y], 0, CHAN_Y);
2634 }
2635 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2636 STORE(&d[CHAN_Z], 0, CHAN_Z);
2637 }
2638 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2639 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2640 }
2641 break;
2642
2643 case TGSI_OPCODE_ABS:
2644 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2645 break;
2646
2647 case TGSI_OPCODE_RCC:
2648 FETCH(&r[0], 0, CHAN_X);
2649 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2650 micro_float_clamp(&r[0], &r[0]);
2651 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2652 STORE(&r[0], 0, chan_index);
2653 }
2654 break;
2655
2656 case TGSI_OPCODE_DPH:
2657 FETCH(&r[0], 0, CHAN_X);
2658 FETCH(&r[1], 1, CHAN_X);
2659
2660 micro_mul( &r[0], &r[0], &r[1] );
2661
2662 FETCH(&r[1], 0, CHAN_Y);
2663 FETCH(&r[2], 1, CHAN_Y);
2664
2665 micro_mul( &r[1], &r[1], &r[2] );
2666 micro_add( &r[0], &r[0], &r[1] );
2667
2668 FETCH(&r[1], 0, CHAN_Z);
2669 FETCH(&r[2], 1, CHAN_Z);
2670
2671 micro_mul( &r[1], &r[1], &r[2] );
2672 micro_add( &r[0], &r[0], &r[1] );
2673
2674 FETCH(&r[1], 1, CHAN_W);
2675
2676 micro_add( &r[0], &r[0], &r[1] );
2677
2678 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2679 STORE( &r[0], 0, chan_index );
2680 }
2681 break;
2682
2683 case TGSI_OPCODE_COS:
2684 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2685 break;
2686
2687 case TGSI_OPCODE_DDX:
2688 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2689 break;
2690
2691 case TGSI_OPCODE_DDY:
2692 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2693 break;
2694
2695 case TGSI_OPCODE_KILP:
2696 exec_kilp (mach, inst);
2697 break;
2698
2699 case TGSI_OPCODE_KIL:
2700 exec_kil (mach, inst);
2701 break;
2702
2703 case TGSI_OPCODE_PK2H:
2704 assert (0);
2705 break;
2706
2707 case TGSI_OPCODE_PK2US:
2708 assert (0);
2709 break;
2710
2711 case TGSI_OPCODE_PK4B:
2712 assert (0);
2713 break;
2714
2715 case TGSI_OPCODE_PK4UB:
2716 assert (0);
2717 break;
2718
2719 case TGSI_OPCODE_RFL:
2720 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2721 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2722 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2723 /* r0 = dp3(src0, src0) */
2724 FETCH(&r[2], 0, CHAN_X);
2725 micro_mul(&r[0], &r[2], &r[2]);
2726 FETCH(&r[4], 0, CHAN_Y);
2727 micro_mul(&r[8], &r[4], &r[4]);
2728 micro_add(&r[0], &r[0], &r[8]);
2729 FETCH(&r[6], 0, CHAN_Z);
2730 micro_mul(&r[8], &r[6], &r[6]);
2731 micro_add(&r[0], &r[0], &r[8]);
2732
2733 /* r1 = dp3(src0, src1) */
2734 FETCH(&r[3], 1, CHAN_X);
2735 micro_mul(&r[1], &r[2], &r[3]);
2736 FETCH(&r[5], 1, CHAN_Y);
2737 micro_mul(&r[8], &r[4], &r[5]);
2738 micro_add(&r[1], &r[1], &r[8]);
2739 FETCH(&r[7], 1, CHAN_Z);
2740 micro_mul(&r[8], &r[6], &r[7]);
2741 micro_add(&r[1], &r[1], &r[8]);
2742
2743 /* r1 = 2 * r1 / r0 */
2744 micro_add(&r[1], &r[1], &r[1]);
2745 micro_div(&r[1], &r[1], &r[0]);
2746
2747 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2748 micro_mul(&r[2], &r[2], &r[1]);
2749 micro_sub(&r[2], &r[2], &r[3]);
2750 STORE(&r[2], 0, CHAN_X);
2751 }
2752 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2753 micro_mul(&r[4], &r[4], &r[1]);
2754 micro_sub(&r[4], &r[4], &r[5]);
2755 STORE(&r[4], 0, CHAN_Y);
2756 }
2757 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2758 micro_mul(&r[6], &r[6], &r[1]);
2759 micro_sub(&r[6], &r[6], &r[7]);
2760 STORE(&r[6], 0, CHAN_Z);
2761 }
2762 }
2763 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2764 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2765 }
2766 break;
2767
2768 case TGSI_OPCODE_SEQ:
2769 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2770 break;
2771
2772 case TGSI_OPCODE_SFL:
2773 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2774 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2775 }
2776 break;
2777
2778 case TGSI_OPCODE_SGT:
2779 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2780 break;
2781
2782 case TGSI_OPCODE_SIN:
2783 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2784 break;
2785
2786 case TGSI_OPCODE_SLE:
2787 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2788 break;
2789
2790 case TGSI_OPCODE_SNE:
2791 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2792 break;
2793
2794 case TGSI_OPCODE_STR:
2795 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2796 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2797 }
2798 break;
2799
2800 case TGSI_OPCODE_TEX:
2801 /* simple texture lookup */
2802 /* src[0] = texcoord */
2803 /* src[1] = sampler unit */
2804 exec_tex(mach, inst, FALSE, FALSE);
2805 break;
2806
2807 case TGSI_OPCODE_TXB:
2808 /* Texture lookup with lod bias */
2809 /* src[0] = texcoord (src[0].w = LOD bias) */
2810 /* src[1] = sampler unit */
2811 exec_tex(mach, inst, TRUE, FALSE);
2812 break;
2813
2814 case TGSI_OPCODE_TXD:
2815 /* Texture lookup with explict partial derivatives */
2816 /* src[0] = texcoord */
2817 /* src[1] = d[strq]/dx */
2818 /* src[2] = d[strq]/dy */
2819 /* src[3] = sampler unit */
2820 exec_txd(mach, inst);
2821 break;
2822
2823 case TGSI_OPCODE_TXL:
2824 /* Texture lookup with explit LOD */
2825 /* src[0] = texcoord (src[0].w = LOD) */
2826 /* src[1] = sampler unit */
2827 exec_tex(mach, inst, TRUE, FALSE);
2828 break;
2829
2830 case TGSI_OPCODE_TXP:
2831 /* Texture lookup with projection */
2832 /* src[0] = texcoord (src[0].w = projection) */
2833 /* src[1] = sampler unit */
2834 exec_tex(mach, inst, FALSE, TRUE);
2835 break;
2836
2837 case TGSI_OPCODE_UP2H:
2838 assert (0);
2839 break;
2840
2841 case TGSI_OPCODE_UP2US:
2842 assert (0);
2843 break;
2844
2845 case TGSI_OPCODE_UP4B:
2846 assert (0);
2847 break;
2848
2849 case TGSI_OPCODE_UP4UB:
2850 assert (0);
2851 break;
2852
2853 case TGSI_OPCODE_X2D:
2854 FETCH(&r[0], 1, CHAN_X);
2855 FETCH(&r[1], 1, CHAN_Y);
2856 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2857 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2858 FETCH(&r[2], 2, CHAN_X);
2859 micro_mul(&r[2], &r[2], &r[0]);
2860 FETCH(&r[3], 2, CHAN_Y);
2861 micro_mul(&r[3], &r[3], &r[1]);
2862 micro_add(&r[2], &r[2], &r[3]);
2863 FETCH(&r[3], 0, CHAN_X);
2864 micro_add(&d[CHAN_X], &r[2], &r[3]);
2865
2866 }
2867 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2868 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2869 FETCH(&r[2], 2, CHAN_Z);
2870 micro_mul(&r[2], &r[2], &r[0]);
2871 FETCH(&r[3], 2, CHAN_W);
2872 micro_mul(&r[3], &r[3], &r[1]);
2873 micro_add(&r[2], &r[2], &r[3]);
2874 FETCH(&r[3], 0, CHAN_Y);
2875 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2876
2877 }
2878 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2879 STORE(&d[CHAN_X], 0, CHAN_X);
2880 }
2881 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2882 STORE(&d[CHAN_Y], 0, CHAN_Y);
2883 }
2884 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2885 STORE(&d[CHAN_X], 0, CHAN_Z);
2886 }
2887 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2888 STORE(&d[CHAN_Y], 0, CHAN_W);
2889 }
2890 break;
2891
2892 case TGSI_OPCODE_ARA:
2893 assert (0);
2894 break;
2895
2896 case TGSI_OPCODE_ARR:
2897 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2898 break;
2899
2900 case TGSI_OPCODE_BRA:
2901 assert (0);
2902 break;
2903
2904 case TGSI_OPCODE_CAL:
2905 /* skip the call if no execution channels are enabled */
2906 if (mach->ExecMask) {
2907 /* do the call */
2908
2909 /* First, record the depths of the execution stacks.
2910 * This is important for deeply nested/looped return statements.
2911 * We have to unwind the stacks by the correct amount. For a
2912 * real code generator, we could determine the number of entries
2913 * to pop off each stack with simple static analysis and avoid
2914 * implementing this data structure at run time.
2915 */
2916 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2917 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2918 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2919 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
2920 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
2921 /* note that PC was already incremented above */
2922 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2923
2924 mach->CallStackTop++;
2925
2926 /* Second, push the Cond, Loop, Cont, Func stacks */
2927 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2928 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2929 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2930 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2931 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2932 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2933
2934 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2935 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2936 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2937 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2938 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2939 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2940
2941 /* Finally, jump to the subroutine */
2942 *pc = inst->Label.Label;
2943 }
2944 break;
2945
2946 case TGSI_OPCODE_RET:
2947 mach->FuncMask &= ~mach->ExecMask;
2948 UPDATE_EXEC_MASK(mach);
2949
2950 if (mach->FuncMask == 0x0) {
2951 /* really return now (otherwise, keep executing */
2952
2953 if (mach->CallStackTop == 0) {
2954 /* returning from main() */
2955 *pc = -1;
2956 return;
2957 }
2958
2959 assert(mach->CallStackTop > 0);
2960 mach->CallStackTop--;
2961
2962 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
2963 mach->CondMask = mach->CondStack[mach->CondStackTop];
2964
2965 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
2966 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
2967
2968 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
2969 mach->ContMask = mach->ContStack[mach->ContStackTop];
2970
2971 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
2972 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
2973
2974 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
2975 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
2976
2977 assert(mach->FuncStackTop > 0);
2978 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2979
2980 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
2981
2982 UPDATE_EXEC_MASK(mach);
2983 }
2984 break;
2985
2986 case TGSI_OPCODE_SSG:
2987 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2988 break;
2989
2990 case TGSI_OPCODE_CMP:
2991 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2992 FETCH(&r[0], 0, chan_index);
2993 FETCH(&r[1], 1, chan_index);
2994 FETCH(&r[2], 2, chan_index);
2995 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
2996 }
2997 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2998 STORE(&d[chan_index], 0, chan_index);
2999 }
3000 break;
3001
3002 case TGSI_OPCODE_SCS:
3003 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3004 FETCH( &r[0], 0, CHAN_X );
3005 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3006 micro_cos(&r[1], &r[0]);
3007 STORE(&r[1], 0, CHAN_X);
3008 }
3009 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3010 micro_sin(&r[1], &r[0]);
3011 STORE(&r[1], 0, CHAN_Y);
3012 }
3013 }
3014 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3015 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3016 }
3017 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3018 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3019 }
3020 break;
3021
3022 case TGSI_OPCODE_NRM:
3023 /* 3-component vector normalize */
3024 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
3025 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
3026 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3027 /* r3 = sqrt(dp3(src0, src0)) */
3028 FETCH(&r[0], 0, CHAN_X);
3029 micro_mul(&r[3], &r[0], &r[0]);
3030 FETCH(&r[1], 0, CHAN_Y);
3031 micro_mul(&r[4], &r[1], &r[1]);
3032 micro_add(&r[3], &r[3], &r[4]);
3033 FETCH(&r[2], 0, CHAN_Z);
3034 micro_mul(&r[4], &r[2], &r[2]);
3035 micro_add(&r[3], &r[3], &r[4]);
3036 micro_sqrt(&r[3], &r[3]);
3037
3038 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3039 micro_div(&r[0], &r[0], &r[3]);
3040 STORE(&r[0], 0, CHAN_X);
3041 }
3042 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3043 micro_div(&r[1], &r[1], &r[3]);
3044 STORE(&r[1], 0, CHAN_Y);
3045 }
3046 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3047 micro_div(&r[2], &r[2], &r[3]);
3048 STORE(&r[2], 0, CHAN_Z);
3049 }
3050 }
3051 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
3052 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
3053 }
3054 break;
3055
3056 case TGSI_OPCODE_NRM4:
3057 /* 4-component vector normalize */
3058 {
3059 union tgsi_exec_channel tmp, dot;
3060
3061 /* tmp = dp4(src0, src0): */
3062 FETCH( &r[0], 0, CHAN_X );
3063 micro_mul( &tmp, &r[0], &r[0] );
3064
3065 FETCH( &r[1], 0, CHAN_Y );
3066 micro_mul( &dot, &r[1], &r[1] );
3067 micro_add( &tmp, &tmp, &dot );
3068
3069 FETCH( &r[2], 0, CHAN_Z );
3070 micro_mul( &dot, &r[2], &r[2] );
3071 micro_add( &tmp, &tmp, &dot );
3072
3073 FETCH( &r[3], 0, CHAN_W );
3074 micro_mul( &dot, &r[3], &r[3] );
3075 micro_add( &tmp, &tmp, &dot );
3076
3077 /* tmp = 1 / sqrt(tmp) */
3078 micro_sqrt( &tmp, &tmp );
3079 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
3080
3081 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3082 /* chan = chan * tmp */
3083 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
3084 STORE( &r[chan_index], 0, chan_index );
3085 }
3086 }
3087 break;
3088
3089 case TGSI_OPCODE_DIV:
3090 assert( 0 );
3091 break;
3092
3093 case TGSI_OPCODE_DP2:
3094 FETCH( &r[0], 0, CHAN_X );
3095 FETCH( &r[1], 1, CHAN_X );
3096 micro_mul( &r[0], &r[0], &r[1] );
3097
3098 FETCH( &r[1], 0, CHAN_Y );
3099 FETCH( &r[2], 1, CHAN_Y );
3100 micro_mul( &r[1], &r[1], &r[2] );
3101 micro_add( &r[0], &r[0], &r[1] );
3102
3103 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3104 STORE( &r[0], 0, chan_index );
3105 }
3106 break;
3107
3108 case TGSI_OPCODE_IF:
3109 /* push CondMask */
3110 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3111 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3112 FETCH( &r[0], 0, CHAN_X );
3113 /* update CondMask */
3114 if( ! r[0].u[0] ) {
3115 mach->CondMask &= ~0x1;
3116 }
3117 if( ! r[0].u[1] ) {
3118 mach->CondMask &= ~0x2;
3119 }
3120 if( ! r[0].u[2] ) {
3121 mach->CondMask &= ~0x4;
3122 }
3123 if( ! r[0].u[3] ) {
3124 mach->CondMask &= ~0x8;
3125 }
3126 UPDATE_EXEC_MASK(mach);
3127 /* Todo: If CondMask==0, jump to ELSE */
3128 break;
3129
3130 case TGSI_OPCODE_ELSE:
3131 /* invert CondMask wrt previous mask */
3132 {
3133 uint prevMask;
3134 assert(mach->CondStackTop > 0);
3135 prevMask = mach->CondStack[mach->CondStackTop - 1];
3136 mach->CondMask = ~mach->CondMask & prevMask;
3137 UPDATE_EXEC_MASK(mach);
3138 /* Todo: If CondMask==0, jump to ENDIF */
3139 }
3140 break;
3141
3142 case TGSI_OPCODE_ENDIF:
3143 /* pop CondMask */
3144 assert(mach->CondStackTop > 0);
3145 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3146 UPDATE_EXEC_MASK(mach);
3147 break;
3148
3149 case TGSI_OPCODE_END:
3150 /* halt execution */
3151 *pc = -1;
3152 break;
3153
3154 case TGSI_OPCODE_REP:
3155 assert (0);
3156 break;
3157
3158 case TGSI_OPCODE_ENDREP:
3159 assert (0);
3160 break;
3161
3162 case TGSI_OPCODE_PUSHA:
3163 assert (0);
3164 break;
3165
3166 case TGSI_OPCODE_POPA:
3167 assert (0);
3168 break;
3169
3170 case TGSI_OPCODE_CEIL:
3171 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3172 break;
3173
3174 case TGSI_OPCODE_I2F:
3175 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3176 break;
3177
3178 case TGSI_OPCODE_NOT:
3179 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3180 break;
3181
3182 case TGSI_OPCODE_TRUNC:
3183 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3184 break;
3185
3186 case TGSI_OPCODE_SHL:
3187 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3188 break;
3189
3190 case TGSI_OPCODE_AND:
3191 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3192 break;
3193
3194 case TGSI_OPCODE_OR:
3195 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3196 break;
3197
3198 case TGSI_OPCODE_MOD:
3199 assert (0);
3200 break;
3201
3202 case TGSI_OPCODE_XOR:
3203 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3204 break;
3205
3206 case TGSI_OPCODE_SAD:
3207 assert (0);
3208 break;
3209
3210 case TGSI_OPCODE_TXF:
3211 assert (0);
3212 break;
3213
3214 case TGSI_OPCODE_TXQ:
3215 assert (0);
3216 break;
3217
3218 case TGSI_OPCODE_EMIT:
3219 emit_vertex(mach);
3220 break;
3221
3222 case TGSI_OPCODE_ENDPRIM:
3223 emit_primitive(mach);
3224 break;
3225
3226 case TGSI_OPCODE_BGNFOR:
3227 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3228 for (chan_index = 0; chan_index < 3; chan_index++) {
3229 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3230 }
3231 ++mach->LoopCounterStackTop;
3232 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3233 /* update LoopMask */
3234 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3235 mach->LoopMask &= ~0x1;
3236 }
3237 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3238 mach->LoopMask &= ~0x2;
3239 }
3240 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3241 mach->LoopMask &= ~0x4;
3242 }
3243 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3244 mach->LoopMask &= ~0x8;
3245 }
3246 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3247 UPDATE_EXEC_MASK(mach);
3248 /* fall-through (for now) */
3249 case TGSI_OPCODE_BGNLOOP:
3250 /* push LoopMask and ContMasks */
3251 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3252 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3253 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3254 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3255
3256 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3257 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3258 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3259 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3260 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3261 break;
3262
3263 case TGSI_OPCODE_ENDFOR:
3264 assert(mach->LoopCounterStackTop > 0);
3265 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3266 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3267 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3268 /* update LoopMask */
3269 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3270 mach->LoopMask &= ~0x1;
3271 }
3272 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3273 mach->LoopMask &= ~0x2;
3274 }
3275 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3276 mach->LoopMask &= ~0x4;
3277 }
3278 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3279 mach->LoopMask &= ~0x8;
3280 }
3281 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3282 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3283 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3284 assert(mach->LoopLabelStackTop > 0);
3285 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3286 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3287 /* Restore ContMask, but don't pop */
3288 assert(mach->ContStackTop > 0);
3289 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3290 UPDATE_EXEC_MASK(mach);
3291 if (mach->ExecMask) {
3292 /* repeat loop: jump to instruction just past BGNLOOP */
3293 assert(mach->LoopLabelStackTop > 0);
3294 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3295 }
3296 else {
3297 /* exit loop: pop LoopMask */
3298 assert(mach->LoopStackTop > 0);
3299 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3300 /* pop ContMask */
3301 assert(mach->ContStackTop > 0);
3302 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3303 assert(mach->LoopLabelStackTop > 0);
3304 --mach->LoopLabelStackTop;
3305 assert(mach->LoopCounterStackTop > 0);
3306 --mach->LoopCounterStackTop;
3307
3308 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3309 }
3310 UPDATE_EXEC_MASK(mach);
3311 break;
3312
3313 case TGSI_OPCODE_ENDLOOP:
3314 /* Restore ContMask, but don't pop */
3315 assert(mach->ContStackTop > 0);
3316 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3317 UPDATE_EXEC_MASK(mach);
3318 if (mach->ExecMask) {
3319 /* repeat loop: jump to instruction just past BGNLOOP */
3320 assert(mach->LoopLabelStackTop > 0);
3321 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3322 }
3323 else {
3324 /* exit loop: pop LoopMask */
3325 assert(mach->LoopStackTop > 0);
3326 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3327 /* pop ContMask */
3328 assert(mach->ContStackTop > 0);
3329 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3330 assert(mach->LoopLabelStackTop > 0);
3331 --mach->LoopLabelStackTop;
3332
3333 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3334 }
3335 UPDATE_EXEC_MASK(mach);
3336 break;
3337
3338 case TGSI_OPCODE_BRK:
3339 exec_break(mach);
3340 break;
3341
3342 case TGSI_OPCODE_CONT:
3343 /* turn off cont channels for each enabled exec channel */
3344 mach->ContMask &= ~mach->ExecMask;
3345 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3346 UPDATE_EXEC_MASK(mach);
3347 break;
3348
3349 case TGSI_OPCODE_BGNSUB:
3350 /* no-op */
3351 break;
3352
3353 case TGSI_OPCODE_ENDSUB:
3354 /*
3355 * XXX: This really should be a no-op. We should never reach this opcode.
3356 */
3357
3358 assert(mach->CallStackTop > 0);
3359 mach->CallStackTop--;
3360
3361 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3362 mach->CondMask = mach->CondStack[mach->CondStackTop];
3363
3364 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3365 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3366
3367 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3368 mach->ContMask = mach->ContStack[mach->ContStackTop];
3369
3370 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3371 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3372
3373 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3374 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3375
3376 assert(mach->FuncStackTop > 0);
3377 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3378
3379 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3380
3381 UPDATE_EXEC_MASK(mach);
3382 break;
3383
3384 case TGSI_OPCODE_NOP:
3385 break;
3386
3387 case TGSI_OPCODE_BREAKC:
3388 FETCH(&r[0], 0, CHAN_X);
3389 /* update CondMask */
3390 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3391 mach->LoopMask &= ~0x1;
3392 }
3393 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3394 mach->LoopMask &= ~0x2;
3395 }
3396 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3397 mach->LoopMask &= ~0x4;
3398 }
3399 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3400 mach->LoopMask &= ~0x8;
3401 }
3402 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3403 UPDATE_EXEC_MASK(mach);
3404 break;
3405
3406 case TGSI_OPCODE_F2I:
3407 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3408 break;
3409
3410 case TGSI_OPCODE_IDIV:
3411 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3412 break;
3413
3414 case TGSI_OPCODE_IMAX:
3415 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3416 break;
3417
3418 case TGSI_OPCODE_IMIN:
3419 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3420 break;
3421
3422 case TGSI_OPCODE_INEG:
3423 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3424 break;
3425
3426 case TGSI_OPCODE_ISGE:
3427 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3428 break;
3429
3430 case TGSI_OPCODE_ISHR:
3431 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3432 break;
3433
3434 case TGSI_OPCODE_ISLT:
3435 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3436 break;
3437
3438 case TGSI_OPCODE_F2U:
3439 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3440 break;
3441
3442 case TGSI_OPCODE_U2F:
3443 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3444 break;
3445
3446 case TGSI_OPCODE_UADD:
3447 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3448 break;
3449
3450 case TGSI_OPCODE_UDIV:
3451 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3452 break;
3453
3454 case TGSI_OPCODE_UMAD:
3455 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3456 break;
3457
3458 case TGSI_OPCODE_UMAX:
3459 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3460 break;
3461
3462 case TGSI_OPCODE_UMIN:
3463 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3464 break;
3465
3466 case TGSI_OPCODE_UMOD:
3467 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3468 break;
3469
3470 case TGSI_OPCODE_UMUL:
3471 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3472 break;
3473
3474 case TGSI_OPCODE_USEQ:
3475 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3476 break;
3477
3478 case TGSI_OPCODE_USGE:
3479 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3480 break;
3481
3482 case TGSI_OPCODE_USHR:
3483 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3484 break;
3485
3486 case TGSI_OPCODE_USLT:
3487 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3488 break;
3489
3490 case TGSI_OPCODE_USNE:
3491 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3492 break;
3493
3494 case TGSI_OPCODE_SWITCH:
3495 exec_switch(mach, inst);
3496 break;
3497
3498 case TGSI_OPCODE_CASE:
3499 exec_case(mach, inst);
3500 break;
3501
3502 case TGSI_OPCODE_DEFAULT:
3503 exec_default(mach);
3504 break;
3505
3506 case TGSI_OPCODE_ENDSWITCH:
3507 exec_endswitch(mach);
3508 break;
3509
3510 default:
3511 assert( 0 );
3512 }
3513 }
3514
3515
3516 #define DEBUG_EXECUTION 0
3517
3518
3519 /**
3520 * Run TGSI interpreter.
3521 * \return bitmask of "alive" quad components
3522 */
3523 uint
3524 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3525 {
3526 uint i;
3527 int pc = 0;
3528
3529 mach->CondMask = 0xf;
3530 mach->LoopMask = 0xf;
3531 mach->ContMask = 0xf;
3532 mach->FuncMask = 0xf;
3533 mach->ExecMask = 0xf;
3534
3535 mach->Switch.mask = 0xf;
3536
3537 assert(mach->CondStackTop == 0);
3538 assert(mach->LoopStackTop == 0);
3539 assert(mach->ContStackTop == 0);
3540 assert(mach->SwitchStackTop == 0);
3541 assert(mach->BreakStackTop == 0);
3542 assert(mach->CallStackTop == 0);
3543
3544 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3545 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3546
3547 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3548 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3549 mach->Primitives[0] = 0;
3550 }
3551
3552 for (i = 0; i < QUAD_SIZE; i++) {
3553 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3554 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3555 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3556 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3557 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3558 }
3559
3560 /* execute declarations (interpolants) */
3561 for (i = 0; i < mach->NumDeclarations; i++) {
3562 exec_declaration( mach, mach->Declarations+i );
3563 }
3564
3565 {
3566 #if DEBUG_EXECUTION
3567 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3568 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3569 uint inst = 1;
3570
3571 memcpy(temps, mach->Temps, sizeof(temps));
3572 memcpy(outputs, mach->Outputs, sizeof(outputs));
3573 #endif
3574
3575 /* execute instructions, until pc is set to -1 */
3576 while (pc != -1) {
3577
3578 #if DEBUG_EXECUTION
3579 uint i;
3580
3581 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3582 #endif
3583
3584 assert(pc < (int) mach->NumInstructions);
3585 exec_instruction(mach, mach->Instructions + pc, &pc);
3586
3587 #if DEBUG_EXECUTION
3588 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3589 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3590 uint j;
3591
3592 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3593 debug_printf("TEMP[%2u] = ", i);
3594 for (j = 0; j < 4; j++) {
3595 if (j > 0) {
3596 debug_printf(" ");
3597 }
3598 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3599 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3600 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3601 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3602 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3603 }
3604 }
3605 }
3606 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3607 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3608 uint j;
3609
3610 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3611 debug_printf("OUT[%2u] = ", i);
3612 for (j = 0; j < 4; j++) {
3613 if (j > 0) {
3614 debug_printf(" ");
3615 }
3616 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3617 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3618 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3619 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3620 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3621 }
3622 }
3623 }
3624 #endif
3625 }
3626 }
3627
3628 #if 0
3629 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3630 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3631 /*
3632 * Scale back depth component.
3633 */
3634 for (i = 0; i < 4; i++)
3635 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3636 }
3637 #endif
3638
3639 assert(mach->CondStackTop == 0);
3640 assert(mach->LoopStackTop == 0);
3641 assert(mach->ContStackTop == 0);
3642 assert(mach->SwitchStackTop == 0);
3643 assert(mach->BreakStackTop == 0);
3644 assert(mach->CallStackTop == 0);
3645
3646 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3647 }