tgsi: Rewrite exec implementations of NRM and NRM4.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_cos(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src)
115 {
116 dst->f[0] = cosf(src->f[0]);
117 dst->f[1] = cosf(src->f[1]);
118 dst->f[2] = cosf(src->f[2]);
119 dst->f[3] = cosf(src->f[3]);
120 }
121
122 static void
123 micro_ddx(union tgsi_exec_channel *dst,
124 const union tgsi_exec_channel *src)
125 {
126 dst->f[0] =
127 dst->f[1] =
128 dst->f[2] =
129 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
130 }
131
132 static void
133 micro_ddy(union tgsi_exec_channel *dst,
134 const union tgsi_exec_channel *src)
135 {
136 dst->f[0] =
137 dst->f[1] =
138 dst->f[2] =
139 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
140 }
141
142 static void
143 micro_exp2(union tgsi_exec_channel *dst,
144 const union tgsi_exec_channel *src)
145 {
146 #if FAST_MATH
147 dst->f[0] = util_fast_exp2(src->f[0]);
148 dst->f[1] = util_fast_exp2(src->f[1]);
149 dst->f[2] = util_fast_exp2(src->f[2]);
150 dst->f[3] = util_fast_exp2(src->f[3]);
151 #else
152 #if DEBUG
153 /* Inf is okay for this instruction, so clamp it to silence assertions. */
154 uint i;
155 union tgsi_exec_channel clamped;
156
157 for (i = 0; i < 4; i++) {
158 if (src->f[i] > 127.99999f) {
159 clamped.f[i] = 127.99999f;
160 } else if (src->f[i] < -126.99999f) {
161 clamped.f[i] = -126.99999f;
162 } else {
163 clamped.f[i] = src->f[i];
164 }
165 }
166 src = &clamped;
167 #endif /* DEBUG */
168
169 dst->f[0] = powf(2.0f, src->f[0]);
170 dst->f[1] = powf(2.0f, src->f[1]);
171 dst->f[2] = powf(2.0f, src->f[2]);
172 dst->f[3] = powf(2.0f, src->f[3]);
173 #endif /* FAST_MATH */
174 }
175
176 static void
177 micro_flr(union tgsi_exec_channel *dst,
178 const union tgsi_exec_channel *src)
179 {
180 dst->f[0] = floorf(src->f[0]);
181 dst->f[1] = floorf(src->f[1]);
182 dst->f[2] = floorf(src->f[2]);
183 dst->f[3] = floorf(src->f[3]);
184 }
185
186 static void
187 micro_frc(union tgsi_exec_channel *dst,
188 const union tgsi_exec_channel *src)
189 {
190 dst->f[0] = src->f[0] - floorf(src->f[0]);
191 dst->f[1] = src->f[1] - floorf(src->f[1]);
192 dst->f[2] = src->f[2] - floorf(src->f[2]);
193 dst->f[3] = src->f[3] - floorf(src->f[3]);
194 }
195
196 static void
197 micro_iabs(union tgsi_exec_channel *dst,
198 const union tgsi_exec_channel *src)
199 {
200 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
201 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
202 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
203 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
204 }
205
206 static void
207 micro_ineg(union tgsi_exec_channel *dst,
208 const union tgsi_exec_channel *src)
209 {
210 dst->i[0] = -src->i[0];
211 dst->i[1] = -src->i[1];
212 dst->i[2] = -src->i[2];
213 dst->i[3] = -src->i[3];
214 }
215
216 static void
217 micro_lg2(union tgsi_exec_channel *dst,
218 const union tgsi_exec_channel *src)
219 {
220 #if FAST_MATH
221 dst->f[0] = util_fast_log2(src->f[0]);
222 dst->f[1] = util_fast_log2(src->f[1]);
223 dst->f[2] = util_fast_log2(src->f[2]);
224 dst->f[3] = util_fast_log2(src->f[3]);
225 #else
226 dst->f[0] = logf(src->f[0]) * 1.442695f;
227 dst->f[1] = logf(src->f[1]) * 1.442695f;
228 dst->f[2] = logf(src->f[2]) * 1.442695f;
229 dst->f[3] = logf(src->f[3]) * 1.442695f;
230 #endif
231 }
232
233 static void
234 micro_lrp(union tgsi_exec_channel *dst,
235 const union tgsi_exec_channel *src)
236 {
237 dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
238 dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
239 dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
240 dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
241 }
242
243 static void
244 micro_mad(union tgsi_exec_channel *dst,
245 const union tgsi_exec_channel *src)
246 {
247 dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
248 dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
249 dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
250 dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
251 }
252
253 static void
254 micro_mov(union tgsi_exec_channel *dst,
255 const union tgsi_exec_channel *src)
256 {
257 dst->u[0] = src->u[0];
258 dst->u[1] = src->u[1];
259 dst->u[2] = src->u[2];
260 dst->u[3] = src->u[3];
261 }
262
263 static void
264 micro_rcp(union tgsi_exec_channel *dst,
265 const union tgsi_exec_channel *src)
266 {
267 #if 0 /* for debugging */
268 assert(src->f[0] != 0.0f);
269 assert(src->f[1] != 0.0f);
270 assert(src->f[2] != 0.0f);
271 assert(src->f[3] != 0.0f);
272 #endif
273 dst->f[0] = 1.0f / src->f[0];
274 dst->f[1] = 1.0f / src->f[1];
275 dst->f[2] = 1.0f / src->f[2];
276 dst->f[3] = 1.0f / src->f[3];
277 }
278
279 static void
280 micro_rnd(union tgsi_exec_channel *dst,
281 const union tgsi_exec_channel *src)
282 {
283 dst->f[0] = floorf(src->f[0] + 0.5f);
284 dst->f[1] = floorf(src->f[1] + 0.5f);
285 dst->f[2] = floorf(src->f[2] + 0.5f);
286 dst->f[3] = floorf(src->f[3] + 0.5f);
287 }
288
289 static void
290 micro_rsq(union tgsi_exec_channel *dst,
291 const union tgsi_exec_channel *src)
292 {
293 #if 0 /* for debugging */
294 assert(src->f[0] != 0.0f);
295 assert(src->f[1] != 0.0f);
296 assert(src->f[2] != 0.0f);
297 assert(src->f[3] != 0.0f);
298 #endif
299 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
300 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
301 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
302 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
303 }
304
305 static void
306 micro_seq(union tgsi_exec_channel *dst,
307 const union tgsi_exec_channel *src)
308 {
309 dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
310 dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
311 dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
312 dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
313 }
314
315 static void
316 micro_sge(union tgsi_exec_channel *dst,
317 const union tgsi_exec_channel *src)
318 {
319 dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
320 dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
321 dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
322 dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
323 }
324
325 static void
326 micro_sgn(union tgsi_exec_channel *dst,
327 const union tgsi_exec_channel *src)
328 {
329 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
330 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
331 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
332 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
333 }
334
335 static void
336 micro_sgt(union tgsi_exec_channel *dst,
337 const union tgsi_exec_channel *src)
338 {
339 dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
340 dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
341 dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
342 dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
343 }
344
345 static void
346 micro_sin(union tgsi_exec_channel *dst,
347 const union tgsi_exec_channel *src)
348 {
349 dst->f[0] = sinf(src->f[0]);
350 dst->f[1] = sinf(src->f[1]);
351 dst->f[2] = sinf(src->f[2]);
352 dst->f[3] = sinf(src->f[3]);
353 }
354
355 static void
356 micro_sle(union tgsi_exec_channel *dst,
357 const union tgsi_exec_channel *src)
358 {
359 dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
360 dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
361 dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
362 dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
363 }
364
365 static void
366 micro_slt(union tgsi_exec_channel *dst,
367 const union tgsi_exec_channel *src)
368 {
369 dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
370 dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
371 dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
372 dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
373 }
374
375 static void
376 micro_sne(union tgsi_exec_channel *dst,
377 const union tgsi_exec_channel *src)
378 {
379 dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
380 dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
381 dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
382 dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
383 }
384
385 static void
386 micro_trunc(union tgsi_exec_channel *dst,
387 const union tgsi_exec_channel *src)
388 {
389 dst->f[0] = (float)(int)src->f[0];
390 dst->f[1] = (float)(int)src->f[1];
391 dst->f[2] = (float)(int)src->f[2];
392 dst->f[3] = (float)(int)src->f[3];
393 }
394
395
396 #define CHAN_X 0
397 #define CHAN_Y 1
398 #define CHAN_Z 2
399 #define CHAN_W 3
400
401 enum tgsi_exec_datatype {
402 TGSI_EXEC_DATA_FLOAT,
403 TGSI_EXEC_DATA_INT,
404 TGSI_EXEC_DATA_UINT
405 };
406
407 /*
408 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
409 */
410 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
411 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
412 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
413 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
414 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
415 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
416 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
417 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
418 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
419 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
420 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
421 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
422 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
423 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
424 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
425 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
426 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
427 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
428 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
429 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
430 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
431 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
432 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
433 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
434 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
435 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
436 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
437 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
438 #define TEMP_R0 TGSI_EXEC_TEMP_R0
439 #define TEMP_P0 TGSI_EXEC_TEMP_P0
440
441 #define IS_CHANNEL_ENABLED(INST, CHAN)\
442 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
443
444 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
445 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
446
447 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
448 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
449 if (IS_CHANNEL_ENABLED( INST, CHAN ))
450
451 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
452 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
453 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
454
455
456 /** The execution mask depends on the conditional mask and the loop mask */
457 #define UPDATE_EXEC_MASK(MACH) \
458 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
459
460
461 static const union tgsi_exec_channel ZeroVec =
462 { { 0.0, 0.0, 0.0, 0.0 } };
463
464 static const union tgsi_exec_channel OneVec = {
465 {1.0f, 1.0f, 1.0f, 1.0f}
466 };
467
468
469 /**
470 * Assert that none of the float values in 'chan' are infinite or NaN.
471 * NaN and Inf may occur normally during program execution and should
472 * not lead to crashes, etc. But when debugging, it's helpful to catch
473 * them.
474 */
475 static INLINE void
476 check_inf_or_nan(const union tgsi_exec_channel *chan)
477 {
478 assert(!util_is_inf_or_nan((chan)->f[0]));
479 assert(!util_is_inf_or_nan((chan)->f[1]));
480 assert(!util_is_inf_or_nan((chan)->f[2]));
481 assert(!util_is_inf_or_nan((chan)->f[3]));
482 }
483
484
485 #ifdef DEBUG
486 static void
487 print_chan(const char *msg, const union tgsi_exec_channel *chan)
488 {
489 debug_printf("%s = {%f, %f, %f, %f}\n",
490 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
491 }
492 #endif
493
494
495 #ifdef DEBUG
496 static void
497 print_temp(const struct tgsi_exec_machine *mach, uint index)
498 {
499 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
500 int i;
501 debug_printf("Temp[%u] =\n", index);
502 for (i = 0; i < 4; i++) {
503 debug_printf(" %c: { %f, %f, %f, %f }\n",
504 "XYZW"[i],
505 tmp->xyzw[i].f[0],
506 tmp->xyzw[i].f[1],
507 tmp->xyzw[i].f[2],
508 tmp->xyzw[i].f[3]);
509 }
510 }
511 #endif
512
513
514 /**
515 * Check if there's a potential src/dst register data dependency when
516 * using SOA execution.
517 * Example:
518 * MOV T, T.yxwz;
519 * This would expand into:
520 * MOV t0, t1;
521 * MOV t1, t0;
522 * MOV t2, t3;
523 * MOV t3, t2;
524 * The second instruction will have the wrong value for t0 if executed as-is.
525 */
526 boolean
527 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
528 {
529 uint i, chan;
530
531 uint writemask = inst->Dst[0].Register.WriteMask;
532 if (writemask == TGSI_WRITEMASK_X ||
533 writemask == TGSI_WRITEMASK_Y ||
534 writemask == TGSI_WRITEMASK_Z ||
535 writemask == TGSI_WRITEMASK_W ||
536 writemask == TGSI_WRITEMASK_NONE) {
537 /* no chance of data dependency */
538 return FALSE;
539 }
540
541 /* loop over src regs */
542 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
543 if ((inst->Src[i].Register.File ==
544 inst->Dst[0].Register.File) &&
545 (inst->Src[i].Register.Index ==
546 inst->Dst[0].Register.Index)) {
547 /* loop over dest channels */
548 uint channelsWritten = 0x0;
549 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
550 /* check if we're reading a channel that's been written */
551 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
552 if (channelsWritten & (1 << swizzle)) {
553 return TRUE;
554 }
555
556 channelsWritten |= (1 << chan);
557 }
558 }
559 }
560 return FALSE;
561 }
562
563
564 /**
565 * Initialize machine state by expanding tokens to full instructions,
566 * allocating temporary storage, setting up constants, etc.
567 * After this, we can call tgsi_exec_machine_run() many times.
568 */
569 void
570 tgsi_exec_machine_bind_shader(
571 struct tgsi_exec_machine *mach,
572 const struct tgsi_token *tokens,
573 uint numSamplers,
574 struct tgsi_sampler **samplers)
575 {
576 uint k;
577 struct tgsi_parse_context parse;
578 struct tgsi_exec_labels *labels = &mach->Labels;
579 struct tgsi_full_instruction *instructions;
580 struct tgsi_full_declaration *declarations;
581 uint maxInstructions = 10, numInstructions = 0;
582 uint maxDeclarations = 10, numDeclarations = 0;
583 uint instno = 0;
584
585 #if 0
586 tgsi_dump(tokens, 0);
587 #endif
588
589 util_init_math();
590
591 mach->Tokens = tokens;
592 mach->Samplers = samplers;
593
594 k = tgsi_parse_init (&parse, mach->Tokens);
595 if (k != TGSI_PARSE_OK) {
596 debug_printf( "Problem parsing!\n" );
597 return;
598 }
599
600 mach->Processor = parse.FullHeader.Processor.Processor;
601 mach->ImmLimit = 0;
602 labels->count = 0;
603
604 declarations = (struct tgsi_full_declaration *)
605 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
606
607 if (!declarations) {
608 return;
609 }
610
611 instructions = (struct tgsi_full_instruction *)
612 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
613
614 if (!instructions) {
615 FREE( declarations );
616 return;
617 }
618
619 while( !tgsi_parse_end_of_tokens( &parse ) ) {
620 uint pointer = parse.Position;
621 uint i;
622
623 tgsi_parse_token( &parse );
624 switch( parse.FullToken.Token.Type ) {
625 case TGSI_TOKEN_TYPE_DECLARATION:
626 /* save expanded declaration */
627 if (numDeclarations == maxDeclarations) {
628 declarations = REALLOC(declarations,
629 maxDeclarations
630 * sizeof(struct tgsi_full_declaration),
631 (maxDeclarations + 10)
632 * sizeof(struct tgsi_full_declaration));
633 maxDeclarations += 10;
634 }
635 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
636 unsigned reg;
637 for (reg = parse.FullToken.FullDeclaration.Range.First;
638 reg <= parse.FullToken.FullDeclaration.Range.Last;
639 ++reg) {
640 ++mach->NumOutputs;
641 }
642 }
643 memcpy(declarations + numDeclarations,
644 &parse.FullToken.FullDeclaration,
645 sizeof(declarations[0]));
646 numDeclarations++;
647 break;
648
649 case TGSI_TOKEN_TYPE_IMMEDIATE:
650 {
651 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
652 assert( size <= 4 );
653 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
654
655 for( i = 0; i < size; i++ ) {
656 mach->Imms[mach->ImmLimit][i] =
657 parse.FullToken.FullImmediate.u[i].Float;
658 }
659 mach->ImmLimit += 1;
660 }
661 break;
662
663 case TGSI_TOKEN_TYPE_INSTRUCTION:
664 assert( labels->count < MAX_LABELS );
665
666 labels->labels[labels->count][0] = instno;
667 labels->labels[labels->count][1] = pointer;
668 labels->count++;
669
670 /* save expanded instruction */
671 if (numInstructions == maxInstructions) {
672 instructions = REALLOC(instructions,
673 maxInstructions
674 * sizeof(struct tgsi_full_instruction),
675 (maxInstructions + 10)
676 * sizeof(struct tgsi_full_instruction));
677 maxInstructions += 10;
678 }
679
680 memcpy(instructions + numInstructions,
681 &parse.FullToken.FullInstruction,
682 sizeof(instructions[0]));
683
684 numInstructions++;
685 break;
686
687 case TGSI_TOKEN_TYPE_PROPERTY:
688 break;
689
690 default:
691 assert( 0 );
692 }
693 }
694 tgsi_parse_free (&parse);
695
696 if (mach->Declarations) {
697 FREE( mach->Declarations );
698 }
699 mach->Declarations = declarations;
700 mach->NumDeclarations = numDeclarations;
701
702 if (mach->Instructions) {
703 FREE( mach->Instructions );
704 }
705 mach->Instructions = instructions;
706 mach->NumInstructions = numInstructions;
707 }
708
709
710 struct tgsi_exec_machine *
711 tgsi_exec_machine_create( void )
712 {
713 struct tgsi_exec_machine *mach;
714 uint i;
715
716 mach = align_malloc( sizeof *mach, 16 );
717 if (!mach)
718 goto fail;
719
720 memset(mach, 0, sizeof(*mach));
721
722 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
723 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
724 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
725
726 /* Setup constants. */
727 for( i = 0; i < 4; i++ ) {
728 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
729 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
730 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
731 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
732 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
733 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
734 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
735 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
736 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
737 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
738 }
739
740 #ifdef DEBUG
741 /* silence warnings */
742 (void) print_chan;
743 (void) print_temp;
744 #endif
745
746 return mach;
747
748 fail:
749 align_free(mach);
750 return NULL;
751 }
752
753
754 void
755 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
756 {
757 if (mach) {
758 FREE(mach->Instructions);
759 FREE(mach->Declarations);
760 }
761
762 align_free(mach);
763 }
764
765 static void
766 micro_add(
767 union tgsi_exec_channel *dst,
768 const union tgsi_exec_channel *src0,
769 const union tgsi_exec_channel *src1 )
770 {
771 dst->f[0] = src0->f[0] + src1->f[0];
772 dst->f[1] = src0->f[1] + src1->f[1];
773 dst->f[2] = src0->f[2] + src1->f[2];
774 dst->f[3] = src0->f[3] + src1->f[3];
775 }
776
777 static void
778 micro_div(
779 union tgsi_exec_channel *dst,
780 const union tgsi_exec_channel *src0,
781 const union tgsi_exec_channel *src1 )
782 {
783 if (src1->f[0] != 0) {
784 dst->f[0] = src0->f[0] / src1->f[0];
785 }
786 if (src1->f[1] != 0) {
787 dst->f[1] = src0->f[1] / src1->f[1];
788 }
789 if (src1->f[2] != 0) {
790 dst->f[2] = src0->f[2] / src1->f[2];
791 }
792 if (src1->f[3] != 0) {
793 dst->f[3] = src0->f[3] / src1->f[3];
794 }
795 }
796
797 static void
798 micro_float_clamp(union tgsi_exec_channel *dst,
799 const union tgsi_exec_channel *src)
800 {
801 uint i;
802
803 for (i = 0; i < 4; i++) {
804 if (src->f[i] > 0.0f) {
805 if (src->f[i] > 1.884467e+019f)
806 dst->f[i] = 1.884467e+019f;
807 else if (src->f[i] < 5.42101e-020f)
808 dst->f[i] = 5.42101e-020f;
809 else
810 dst->f[i] = src->f[i];
811 }
812 else {
813 if (src->f[i] < -1.884467e+019f)
814 dst->f[i] = -1.884467e+019f;
815 else if (src->f[i] > -5.42101e-020f)
816 dst->f[i] = -5.42101e-020f;
817 else
818 dst->f[i] = src->f[i];
819 }
820 }
821 }
822
823 static void
824 micro_lt(
825 union tgsi_exec_channel *dst,
826 const union tgsi_exec_channel *src0,
827 const union tgsi_exec_channel *src1,
828 const union tgsi_exec_channel *src2,
829 const union tgsi_exec_channel *src3 )
830 {
831 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
832 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
833 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
834 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
835 }
836
837 static void
838 micro_max(
839 union tgsi_exec_channel *dst,
840 const union tgsi_exec_channel *src0,
841 const union tgsi_exec_channel *src1 )
842 {
843 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
844 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
845 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
846 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
847 }
848
849 static void
850 micro_min(
851 union tgsi_exec_channel *dst,
852 const union tgsi_exec_channel *src0,
853 const union tgsi_exec_channel *src1 )
854 {
855 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
856 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
857 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
858 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
859 }
860
861 static void
862 micro_mul(
863 union tgsi_exec_channel *dst,
864 const union tgsi_exec_channel *src0,
865 const union tgsi_exec_channel *src1 )
866 {
867 dst->f[0] = src0->f[0] * src1->f[0];
868 dst->f[1] = src0->f[1] * src1->f[1];
869 dst->f[2] = src0->f[2] * src1->f[2];
870 dst->f[3] = src0->f[3] * src1->f[3];
871 }
872
873 #if 0
874 static void
875 micro_imul64(
876 union tgsi_exec_channel *dst0,
877 union tgsi_exec_channel *dst1,
878 const union tgsi_exec_channel *src0,
879 const union tgsi_exec_channel *src1 )
880 {
881 dst1->i[0] = src0->i[0] * src1->i[0];
882 dst1->i[1] = src0->i[1] * src1->i[1];
883 dst1->i[2] = src0->i[2] * src1->i[2];
884 dst1->i[3] = src0->i[3] * src1->i[3];
885 dst0->i[0] = 0;
886 dst0->i[1] = 0;
887 dst0->i[2] = 0;
888 dst0->i[3] = 0;
889 }
890 #endif
891
892 #if 0
893 static void
894 micro_umul64(
895 union tgsi_exec_channel *dst0,
896 union tgsi_exec_channel *dst1,
897 const union tgsi_exec_channel *src0,
898 const union tgsi_exec_channel *src1 )
899 {
900 dst1->u[0] = src0->u[0] * src1->u[0];
901 dst1->u[1] = src0->u[1] * src1->u[1];
902 dst1->u[2] = src0->u[2] * src1->u[2];
903 dst1->u[3] = src0->u[3] * src1->u[3];
904 dst0->u[0] = 0;
905 dst0->u[1] = 0;
906 dst0->u[2] = 0;
907 dst0->u[3] = 0;
908 }
909 #endif
910
911
912 #if 0
913 static void
914 micro_movc(
915 union tgsi_exec_channel *dst,
916 const union tgsi_exec_channel *src0,
917 const union tgsi_exec_channel *src1,
918 const union tgsi_exec_channel *src2 )
919 {
920 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
921 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
922 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
923 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
924 }
925 #endif
926
927 static void
928 micro_neg(
929 union tgsi_exec_channel *dst,
930 const union tgsi_exec_channel *src )
931 {
932 dst->f[0] = -src->f[0];
933 dst->f[1] = -src->f[1];
934 dst->f[2] = -src->f[2];
935 dst->f[3] = -src->f[3];
936 }
937
938 static void
939 micro_pow(
940 union tgsi_exec_channel *dst,
941 const union tgsi_exec_channel *src0,
942 const union tgsi_exec_channel *src1 )
943 {
944 #if FAST_MATH
945 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
946 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
947 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
948 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
949 #else
950 dst->f[0] = powf( src0->f[0], src1->f[0] );
951 dst->f[1] = powf( src0->f[1], src1->f[1] );
952 dst->f[2] = powf( src0->f[2], src1->f[2] );
953 dst->f[3] = powf( src0->f[3], src1->f[3] );
954 #endif
955 }
956
957 static void
958 micro_sqrt( union tgsi_exec_channel *dst,
959 const union tgsi_exec_channel *src )
960 {
961 dst->f[0] = sqrtf( src->f[0] );
962 dst->f[1] = sqrtf( src->f[1] );
963 dst->f[2] = sqrtf( src->f[2] );
964 dst->f[3] = sqrtf( src->f[3] );
965 }
966
967 static void
968 micro_sub(
969 union tgsi_exec_channel *dst,
970 const union tgsi_exec_channel *src0,
971 const union tgsi_exec_channel *src1 )
972 {
973 dst->f[0] = src0->f[0] - src1->f[0];
974 dst->f[1] = src0->f[1] - src1->f[1];
975 dst->f[2] = src0->f[2] - src1->f[2];
976 dst->f[3] = src0->f[3] - src1->f[3];
977 }
978
979 static void
980 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
981 const uint file,
982 const uint swizzle,
983 const union tgsi_exec_channel *index,
984 const union tgsi_exec_channel *index2D,
985 union tgsi_exec_channel *chan)
986 {
987 uint i;
988
989 switch (file) {
990 case TGSI_FILE_CONSTANT:
991 for (i = 0; i < QUAD_SIZE; i++) {
992 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
993 assert(mach->Consts[index2D->i[i]]);
994
995 if (index->i[i] < 0) {
996 chan->u[i] = 0;
997 } else {
998 const uint *p = (const uint *)mach->Consts[index2D->i[i]];
999
1000 chan->u[i] = p[index->i[i] * 4 + swizzle];
1001 }
1002 }
1003 break;
1004
1005 case TGSI_FILE_INPUT:
1006 case TGSI_FILE_SYSTEM_VALUE:
1007 for (i = 0; i < QUAD_SIZE; i++) {
1008 /* XXX: 2D indexing */
1009 chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i];
1010 }
1011 break;
1012
1013 case TGSI_FILE_TEMPORARY:
1014 for (i = 0; i < QUAD_SIZE; i++) {
1015 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1016 assert(index2D->i[i] == 0);
1017
1018 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1019 }
1020 break;
1021
1022 case TGSI_FILE_IMMEDIATE:
1023 for (i = 0; i < QUAD_SIZE; i++) {
1024 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1025 assert(index2D->i[i] == 0);
1026
1027 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1028 }
1029 break;
1030
1031 case TGSI_FILE_ADDRESS:
1032 for (i = 0; i < QUAD_SIZE; i++) {
1033 assert(index->i[i] >= 0);
1034 assert(index2D->i[i] == 0);
1035
1036 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1037 }
1038 break;
1039
1040 case TGSI_FILE_PREDICATE:
1041 for (i = 0; i < QUAD_SIZE; i++) {
1042 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1043 assert(index2D->i[i] == 0);
1044
1045 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1046 }
1047 break;
1048
1049 case TGSI_FILE_OUTPUT:
1050 /* vertex/fragment output vars can be read too */
1051 for (i = 0; i < QUAD_SIZE; i++) {
1052 assert(index->i[i] >= 0);
1053 assert(index2D->i[i] == 0);
1054
1055 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1056 }
1057 break;
1058
1059 default:
1060 assert(0);
1061 for (i = 0; i < QUAD_SIZE; i++) {
1062 chan->u[i] = 0;
1063 }
1064 }
1065 }
1066
1067 static void
1068 fetch_source(const struct tgsi_exec_machine *mach,
1069 union tgsi_exec_channel *chan,
1070 const struct tgsi_full_src_register *reg,
1071 const uint chan_index,
1072 enum tgsi_exec_datatype src_datatype)
1073 {
1074 union tgsi_exec_channel index;
1075 union tgsi_exec_channel index2D;
1076 uint swizzle;
1077
1078 /* We start with a direct index into a register file.
1079 *
1080 * file[1],
1081 * where:
1082 * file = Register.File
1083 * [1] = Register.Index
1084 */
1085 index.i[0] =
1086 index.i[1] =
1087 index.i[2] =
1088 index.i[3] = reg->Register.Index;
1089
1090 /* There is an extra source register that indirectly subscripts
1091 * a register file. The direct index now becomes an offset
1092 * that is being added to the indirect register.
1093 *
1094 * file[ind[2].x+1],
1095 * where:
1096 * ind = Indirect.File
1097 * [2] = Indirect.Index
1098 * .x = Indirect.SwizzleX
1099 */
1100 if (reg->Register.Indirect) {
1101 union tgsi_exec_channel index2;
1102 union tgsi_exec_channel indir_index;
1103 const uint execmask = mach->ExecMask;
1104 uint i;
1105
1106 /* which address register (always zero now) */
1107 index2.i[0] =
1108 index2.i[1] =
1109 index2.i[2] =
1110 index2.i[3] = reg->Indirect.Index;
1111
1112 /* get current value of address register[swizzle] */
1113 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1114 fetch_src_file_channel(mach,
1115 reg->Indirect.File,
1116 swizzle,
1117 &index2,
1118 &ZeroVec,
1119 &indir_index);
1120
1121 /* add value of address register to the offset */
1122 index.i[0] += indir_index.i[0];
1123 index.i[1] += indir_index.i[1];
1124 index.i[2] += indir_index.i[2];
1125 index.i[3] += indir_index.i[3];
1126
1127 /* for disabled execution channels, zero-out the index to
1128 * avoid using a potential garbage value.
1129 */
1130 for (i = 0; i < QUAD_SIZE; i++) {
1131 if ((execmask & (1 << i)) == 0)
1132 index.i[i] = 0;
1133 }
1134 }
1135
1136 /* There is an extra source register that is a second
1137 * subscript to a register file. Effectively it means that
1138 * the register file is actually a 2D array of registers.
1139 *
1140 * file[3][1],
1141 * where:
1142 * [3] = Dimension.Index
1143 */
1144 if (reg->Register.Dimension) {
1145 index2D.i[0] =
1146 index2D.i[1] =
1147 index2D.i[2] =
1148 index2D.i[3] = reg->Dimension.Index;
1149
1150 /* Again, the second subscript index can be addressed indirectly
1151 * identically to the first one.
1152 * Nothing stops us from indirectly addressing the indirect register,
1153 * but there is no need for that, so we won't exercise it.
1154 *
1155 * file[ind[4].y+3][1],
1156 * where:
1157 * ind = DimIndirect.File
1158 * [4] = DimIndirect.Index
1159 * .y = DimIndirect.SwizzleX
1160 */
1161 if (reg->Dimension.Indirect) {
1162 union tgsi_exec_channel index2;
1163 union tgsi_exec_channel indir_index;
1164 const uint execmask = mach->ExecMask;
1165 uint i;
1166
1167 index2.i[0] =
1168 index2.i[1] =
1169 index2.i[2] =
1170 index2.i[3] = reg->DimIndirect.Index;
1171
1172 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1173 fetch_src_file_channel(mach,
1174 reg->DimIndirect.File,
1175 swizzle,
1176 &index2,
1177 &ZeroVec,
1178 &indir_index);
1179
1180 index2D.i[0] += indir_index.i[0];
1181 index2D.i[1] += indir_index.i[1];
1182 index2D.i[2] += indir_index.i[2];
1183 index2D.i[3] += indir_index.i[3];
1184
1185 /* for disabled execution channels, zero-out the index to
1186 * avoid using a potential garbage value.
1187 */
1188 for (i = 0; i < QUAD_SIZE; i++) {
1189 if ((execmask & (1 << i)) == 0) {
1190 index2D.i[i] = 0;
1191 }
1192 }
1193 }
1194
1195 /* If by any chance there was a need for a 3D array of register
1196 * files, we would have to check whether Dimension is followed
1197 * by a dimension register and continue the saga.
1198 */
1199 } else {
1200 index2D.i[0] =
1201 index2D.i[1] =
1202 index2D.i[2] =
1203 index2D.i[3] = 0;
1204 }
1205
1206 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1207 fetch_src_file_channel(mach,
1208 reg->Register.File,
1209 swizzle,
1210 &index,
1211 &index2D,
1212 chan);
1213
1214 if (reg->Register.Absolute) {
1215 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1216 micro_abs(chan, chan);
1217 } else {
1218 micro_iabs(chan, chan);
1219 }
1220 }
1221
1222 if (reg->Register.Negate) {
1223 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1224 micro_neg(chan, chan);
1225 } else {
1226 micro_ineg(chan, chan);
1227 }
1228 }
1229 }
1230
1231 static void
1232 store_dest(struct tgsi_exec_machine *mach,
1233 const union tgsi_exec_channel *chan,
1234 const struct tgsi_full_dst_register *reg,
1235 const struct tgsi_full_instruction *inst,
1236 uint chan_index,
1237 enum tgsi_exec_datatype dst_datatype)
1238 {
1239 uint i;
1240 union tgsi_exec_channel null;
1241 union tgsi_exec_channel *dst;
1242 uint execmask = mach->ExecMask;
1243 int offset = 0; /* indirection offset */
1244 int index;
1245
1246 /* for debugging */
1247 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1248 check_inf_or_nan(chan);
1249 }
1250
1251 /* There is an extra source register that indirectly subscripts
1252 * a register file. The direct index now becomes an offset
1253 * that is being added to the indirect register.
1254 *
1255 * file[ind[2].x+1],
1256 * where:
1257 * ind = Indirect.File
1258 * [2] = Indirect.Index
1259 * .x = Indirect.SwizzleX
1260 */
1261 if (reg->Register.Indirect) {
1262 union tgsi_exec_channel index;
1263 union tgsi_exec_channel indir_index;
1264 uint swizzle;
1265
1266 /* which address register (always zero for now) */
1267 index.i[0] =
1268 index.i[1] =
1269 index.i[2] =
1270 index.i[3] = reg->Indirect.Index;
1271
1272 /* get current value of address register[swizzle] */
1273 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1274
1275 /* fetch values from the address/indirection register */
1276 fetch_src_file_channel(mach,
1277 reg->Indirect.File,
1278 swizzle,
1279 &index,
1280 &ZeroVec,
1281 &indir_index);
1282
1283 /* save indirection offset */
1284 offset = indir_index.i[0];
1285 }
1286
1287 switch (reg->Register.File) {
1288 case TGSI_FILE_NULL:
1289 dst = &null;
1290 break;
1291
1292 case TGSI_FILE_OUTPUT:
1293 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1294 + reg->Register.Index;
1295 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1296 #if 0
1297 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1298 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1299 for (i = 0; i < QUAD_SIZE; i++)
1300 if (execmask & (1 << i))
1301 fprintf(stderr, "%f, ", chan->f[i]);
1302 fprintf(stderr, ")\n");
1303 }
1304 #endif
1305 break;
1306
1307 case TGSI_FILE_TEMPORARY:
1308 index = reg->Register.Index;
1309 assert( index < TGSI_EXEC_NUM_TEMPS );
1310 dst = &mach->Temps[offset + index].xyzw[chan_index];
1311 break;
1312
1313 case TGSI_FILE_ADDRESS:
1314 index = reg->Register.Index;
1315 dst = &mach->Addrs[index].xyzw[chan_index];
1316 break;
1317
1318 case TGSI_FILE_LOOP:
1319 assert(reg->Register.Index == 0);
1320 assert(mach->LoopCounterStackTop > 0);
1321 assert(chan_index == CHAN_X);
1322 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1323 break;
1324
1325 case TGSI_FILE_PREDICATE:
1326 index = reg->Register.Index;
1327 assert(index < TGSI_EXEC_NUM_PREDS);
1328 dst = &mach->Predicates[index].xyzw[chan_index];
1329 break;
1330
1331 default:
1332 assert( 0 );
1333 return;
1334 }
1335
1336 if (inst->Instruction.Predicate) {
1337 uint swizzle;
1338 union tgsi_exec_channel *pred;
1339
1340 switch (chan_index) {
1341 case CHAN_X:
1342 swizzle = inst->Predicate.SwizzleX;
1343 break;
1344 case CHAN_Y:
1345 swizzle = inst->Predicate.SwizzleY;
1346 break;
1347 case CHAN_Z:
1348 swizzle = inst->Predicate.SwizzleZ;
1349 break;
1350 case CHAN_W:
1351 swizzle = inst->Predicate.SwizzleW;
1352 break;
1353 default:
1354 assert(0);
1355 return;
1356 }
1357
1358 assert(inst->Predicate.Index == 0);
1359
1360 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1361
1362 if (inst->Predicate.Negate) {
1363 for (i = 0; i < QUAD_SIZE; i++) {
1364 if (pred->u[i]) {
1365 execmask &= ~(1 << i);
1366 }
1367 }
1368 } else {
1369 for (i = 0; i < QUAD_SIZE; i++) {
1370 if (!pred->u[i]) {
1371 execmask &= ~(1 << i);
1372 }
1373 }
1374 }
1375 }
1376
1377 switch (inst->Instruction.Saturate) {
1378 case TGSI_SAT_NONE:
1379 for (i = 0; i < QUAD_SIZE; i++)
1380 if (execmask & (1 << i))
1381 dst->i[i] = chan->i[i];
1382 break;
1383
1384 case TGSI_SAT_ZERO_ONE:
1385 for (i = 0; i < QUAD_SIZE; i++)
1386 if (execmask & (1 << i)) {
1387 if (chan->f[i] < 0.0f)
1388 dst->f[i] = 0.0f;
1389 else if (chan->f[i] > 1.0f)
1390 dst->f[i] = 1.0f;
1391 else
1392 dst->i[i] = chan->i[i];
1393 }
1394 break;
1395
1396 case TGSI_SAT_MINUS_PLUS_ONE:
1397 for (i = 0; i < QUAD_SIZE; i++)
1398 if (execmask & (1 << i)) {
1399 if (chan->f[i] < -1.0f)
1400 dst->f[i] = -1.0f;
1401 else if (chan->f[i] > 1.0f)
1402 dst->f[i] = 1.0f;
1403 else
1404 dst->i[i] = chan->i[i];
1405 }
1406 break;
1407
1408 default:
1409 assert( 0 );
1410 }
1411 }
1412
1413 #define FETCH(VAL,INDEX,CHAN)\
1414 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1415
1416 #define STORE(VAL,INDEX,CHAN)\
1417 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1418
1419
1420 /**
1421 * Execute ARB-style KIL which is predicated by a src register.
1422 * Kill fragment if any of the four values is less than zero.
1423 */
1424 static void
1425 exec_kil(struct tgsi_exec_machine *mach,
1426 const struct tgsi_full_instruction *inst)
1427 {
1428 uint uniquemask;
1429 uint chan_index;
1430 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1431 union tgsi_exec_channel r[1];
1432
1433 /* This mask stores component bits that were already tested. */
1434 uniquemask = 0;
1435
1436 for (chan_index = 0; chan_index < 4; chan_index++)
1437 {
1438 uint swizzle;
1439 uint i;
1440
1441 /* unswizzle channel */
1442 swizzle = tgsi_util_get_full_src_register_swizzle (
1443 &inst->Src[0],
1444 chan_index);
1445
1446 /* check if the component has not been already tested */
1447 if (uniquemask & (1 << swizzle))
1448 continue;
1449 uniquemask |= 1 << swizzle;
1450
1451 FETCH(&r[0], 0, chan_index);
1452 for (i = 0; i < 4; i++)
1453 if (r[0].f[i] < 0.0f)
1454 kilmask |= 1 << i;
1455 }
1456
1457 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1458 }
1459
1460 /**
1461 * Execute NVIDIA-style KIL which is predicated by a condition code.
1462 * Kill fragment if the condition code is TRUE.
1463 */
1464 static void
1465 exec_kilp(struct tgsi_exec_machine *mach,
1466 const struct tgsi_full_instruction *inst)
1467 {
1468 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1469
1470 /* "unconditional" kil */
1471 kilmask = mach->ExecMask;
1472 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1473 }
1474
1475 static void
1476 emit_vertex(struct tgsi_exec_machine *mach)
1477 {
1478 /* FIXME: check for exec mask correctly
1479 unsigned i;
1480 for (i = 0; i < QUAD_SIZE; ++i) {
1481 if ((mach->ExecMask & (1 << i)))
1482 */
1483 if (mach->ExecMask) {
1484 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1485 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1486 }
1487 }
1488
1489 static void
1490 emit_primitive(struct tgsi_exec_machine *mach)
1491 {
1492 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1493 /* FIXME: check for exec mask correctly
1494 unsigned i;
1495 for (i = 0; i < QUAD_SIZE; ++i) {
1496 if ((mach->ExecMask & (1 << i)))
1497 */
1498 if (mach->ExecMask) {
1499 ++(*prim_count);
1500 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1501 mach->Primitives[*prim_count] = 0;
1502 }
1503 }
1504
1505 /*
1506 * Fetch four texture samples using STR texture coordinates.
1507 */
1508 static void
1509 fetch_texel( struct tgsi_sampler *sampler,
1510 const union tgsi_exec_channel *s,
1511 const union tgsi_exec_channel *t,
1512 const union tgsi_exec_channel *p,
1513 const union tgsi_exec_channel *c0,
1514 enum tgsi_sampler_control control,
1515 union tgsi_exec_channel *r,
1516 union tgsi_exec_channel *g,
1517 union tgsi_exec_channel *b,
1518 union tgsi_exec_channel *a )
1519 {
1520 uint j;
1521 float rgba[NUM_CHANNELS][QUAD_SIZE];
1522
1523 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1524
1525 for (j = 0; j < 4; j++) {
1526 r->f[j] = rgba[0][j];
1527 g->f[j] = rgba[1][j];
1528 b->f[j] = rgba[2][j];
1529 a->f[j] = rgba[3][j];
1530 }
1531 }
1532
1533
1534 #define TEX_MODIFIER_NONE 0
1535 #define TEX_MODIFIER_PROJECTED 1
1536 #define TEX_MODIFIER_LOD_BIAS 2
1537 #define TEX_MODIFIER_EXPLICIT_LOD 3
1538
1539
1540 static void
1541 exec_tex(struct tgsi_exec_machine *mach,
1542 const struct tgsi_full_instruction *inst,
1543 uint modifier)
1544 {
1545 const uint unit = inst->Src[1].Register.Index;
1546 union tgsi_exec_channel r[4];
1547 const union tgsi_exec_channel *lod = &ZeroVec;
1548 enum tgsi_sampler_control control;
1549 uint chan_index;
1550
1551 if (modifier != TEX_MODIFIER_NONE) {
1552 FETCH(&r[3], 0, CHAN_W);
1553 if (modifier != TEX_MODIFIER_PROJECTED) {
1554 lod = &r[3];
1555 }
1556 }
1557
1558 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1559 control = tgsi_sampler_lod_explicit;
1560 } else {
1561 control = tgsi_sampler_lod_bias;
1562 }
1563
1564 switch (inst->Texture.Texture) {
1565 case TGSI_TEXTURE_1D:
1566 case TGSI_TEXTURE_SHADOW1D:
1567 FETCH(&r[0], 0, CHAN_X);
1568
1569 if (modifier == TEX_MODIFIER_PROJECTED) {
1570 micro_div(&r[0], &r[0], &r[3]);
1571 }
1572
1573 fetch_texel(mach->Samplers[unit],
1574 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1575 control,
1576 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1577 break;
1578
1579 case TGSI_TEXTURE_2D:
1580 case TGSI_TEXTURE_RECT:
1581 case TGSI_TEXTURE_SHADOW2D:
1582 case TGSI_TEXTURE_SHADOWRECT:
1583 FETCH(&r[0], 0, CHAN_X);
1584 FETCH(&r[1], 0, CHAN_Y);
1585 FETCH(&r[2], 0, CHAN_Z);
1586
1587 if (modifier == TEX_MODIFIER_PROJECTED) {
1588 micro_div(&r[0], &r[0], &r[3]);
1589 micro_div(&r[1], &r[1], &r[3]);
1590 micro_div(&r[2], &r[2], &r[3]);
1591 }
1592
1593 fetch_texel(mach->Samplers[unit],
1594 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1595 control,
1596 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1597 break;
1598
1599 case TGSI_TEXTURE_3D:
1600 case TGSI_TEXTURE_CUBE:
1601 FETCH(&r[0], 0, CHAN_X);
1602 FETCH(&r[1], 0, CHAN_Y);
1603 FETCH(&r[2], 0, CHAN_Z);
1604
1605 if (modifier == TEX_MODIFIER_PROJECTED) {
1606 micro_div(&r[0], &r[0], &r[3]);
1607 micro_div(&r[1], &r[1], &r[3]);
1608 micro_div(&r[2], &r[2], &r[3]);
1609 }
1610
1611 fetch_texel(mach->Samplers[unit],
1612 &r[0], &r[1], &r[2], lod,
1613 control,
1614 &r[0], &r[1], &r[2], &r[3]);
1615 break;
1616
1617 default:
1618 assert(0);
1619 }
1620
1621 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1622 STORE(&r[chan_index], 0, chan_index);
1623 }
1624 }
1625
1626 static void
1627 exec_txd(struct tgsi_exec_machine *mach,
1628 const struct tgsi_full_instruction *inst)
1629 {
1630 const uint unit = inst->Src[3].Register.Index;
1631 union tgsi_exec_channel r[4];
1632 uint chan_index;
1633
1634 /*
1635 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1636 */
1637
1638 switch (inst->Texture.Texture) {
1639 case TGSI_TEXTURE_1D:
1640 case TGSI_TEXTURE_SHADOW1D:
1641
1642 FETCH(&r[0], 0, CHAN_X);
1643
1644 fetch_texel(mach->Samplers[unit],
1645 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1646 tgsi_sampler_lod_bias,
1647 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1648 break;
1649
1650 case TGSI_TEXTURE_2D:
1651 case TGSI_TEXTURE_RECT:
1652 case TGSI_TEXTURE_SHADOW2D:
1653 case TGSI_TEXTURE_SHADOWRECT:
1654
1655 FETCH(&r[0], 0, CHAN_X);
1656 FETCH(&r[1], 0, CHAN_Y);
1657 FETCH(&r[2], 0, CHAN_Z);
1658
1659 fetch_texel(mach->Samplers[unit],
1660 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1661 tgsi_sampler_lod_bias,
1662 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1663 break;
1664
1665 case TGSI_TEXTURE_3D:
1666 case TGSI_TEXTURE_CUBE:
1667
1668 FETCH(&r[0], 0, CHAN_X);
1669 FETCH(&r[1], 0, CHAN_Y);
1670 FETCH(&r[2], 0, CHAN_Z);
1671
1672 fetch_texel(mach->Samplers[unit],
1673 &r[0], &r[1], &r[2], &ZeroVec,
1674 tgsi_sampler_lod_bias,
1675 &r[0], &r[1], &r[2], &r[3]);
1676 break;
1677
1678 default:
1679 assert(0);
1680 }
1681
1682 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1683 STORE(&r[chan_index], 0, chan_index);
1684 }
1685 }
1686
1687
1688 /**
1689 * Evaluate a constant-valued coefficient at the position of the
1690 * current quad.
1691 */
1692 static void
1693 eval_constant_coef(
1694 struct tgsi_exec_machine *mach,
1695 unsigned attrib,
1696 unsigned chan )
1697 {
1698 unsigned i;
1699
1700 for( i = 0; i < QUAD_SIZE; i++ ) {
1701 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1702 }
1703 }
1704
1705 /**
1706 * Evaluate a linear-valued coefficient at the position of the
1707 * current quad.
1708 */
1709 static void
1710 eval_linear_coef(
1711 struct tgsi_exec_machine *mach,
1712 unsigned attrib,
1713 unsigned chan )
1714 {
1715 const float x = mach->QuadPos.xyzw[0].f[0];
1716 const float y = mach->QuadPos.xyzw[1].f[0];
1717 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1718 const float dady = mach->InterpCoefs[attrib].dady[chan];
1719 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1720 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1721 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1722 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1723 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1724 }
1725
1726 /**
1727 * Evaluate a perspective-valued coefficient at the position of the
1728 * current quad.
1729 */
1730 static void
1731 eval_perspective_coef(
1732 struct tgsi_exec_machine *mach,
1733 unsigned attrib,
1734 unsigned chan )
1735 {
1736 const float x = mach->QuadPos.xyzw[0].f[0];
1737 const float y = mach->QuadPos.xyzw[1].f[0];
1738 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1739 const float dady = mach->InterpCoefs[attrib].dady[chan];
1740 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1741 const float *w = mach->QuadPos.xyzw[3].f;
1742 /* divide by W here */
1743 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1744 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1745 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1746 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1747 }
1748
1749
1750 typedef void (* eval_coef_func)(
1751 struct tgsi_exec_machine *mach,
1752 unsigned attrib,
1753 unsigned chan );
1754
1755 static void
1756 exec_declaration(struct tgsi_exec_machine *mach,
1757 const struct tgsi_full_declaration *decl)
1758 {
1759 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1760 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1761 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1762 uint first, last, mask;
1763
1764 first = decl->Range.First;
1765 last = decl->Range.Last;
1766 mask = decl->Declaration.UsageMask;
1767
1768 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1769 uint i;
1770
1771 assert(decl->Semantic.Index == 0);
1772 assert(first == last);
1773
1774 for (i = 0; i < QUAD_SIZE; i++) {
1775 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1776 }
1777 } else {
1778 eval_coef_func eval;
1779 uint i, j;
1780
1781 switch (decl->Declaration.Interpolate) {
1782 case TGSI_INTERPOLATE_CONSTANT:
1783 eval = eval_constant_coef;
1784 break;
1785
1786 case TGSI_INTERPOLATE_LINEAR:
1787 eval = eval_linear_coef;
1788 break;
1789
1790 case TGSI_INTERPOLATE_PERSPECTIVE:
1791 eval = eval_perspective_coef;
1792 break;
1793
1794 default:
1795 assert(0);
1796 return;
1797 }
1798
1799 for (j = 0; j < NUM_CHANNELS; j++) {
1800 if (mask & (1 << j)) {
1801 for (i = first; i <= last; i++) {
1802 eval(mach, i, j);
1803 }
1804 }
1805 }
1806 }
1807 }
1808 }
1809 }
1810
1811 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1812 const union tgsi_exec_channel *src);
1813
1814 static void
1815 exec_scalar_unary(struct tgsi_exec_machine *mach,
1816 const struct tgsi_full_instruction *inst,
1817 micro_op op,
1818 enum tgsi_exec_datatype dst_datatype,
1819 enum tgsi_exec_datatype src_datatype)
1820 {
1821 unsigned int chan;
1822 union tgsi_exec_channel src;
1823 union tgsi_exec_channel dst;
1824
1825 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
1826 op(&dst, &src);
1827 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1828 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1829 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
1830 }
1831 }
1832 }
1833
1834 static void
1835 exec_vector_unary(struct tgsi_exec_machine *mach,
1836 const struct tgsi_full_instruction *inst,
1837 micro_op op,
1838 enum tgsi_exec_datatype dst_datatype,
1839 enum tgsi_exec_datatype src_datatype)
1840 {
1841 unsigned int chan;
1842 struct tgsi_exec_vector dst;
1843
1844 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1845 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1846 union tgsi_exec_channel src;
1847
1848 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1849 op(&dst.xyzw[chan], &src);
1850 }
1851 }
1852 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1853 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1854 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1855 }
1856 }
1857 }
1858
1859 static void
1860 exec_vector_binary(struct tgsi_exec_machine *mach,
1861 const struct tgsi_full_instruction *inst,
1862 micro_op op,
1863 enum tgsi_exec_datatype dst_datatype,
1864 enum tgsi_exec_datatype src_datatype)
1865 {
1866 unsigned int chan;
1867 struct tgsi_exec_vector dst;
1868
1869 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1870 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1871 union tgsi_exec_channel src[2];
1872
1873 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1874 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1875 op(&dst.xyzw[chan], src);
1876 }
1877 }
1878 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1879 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1880 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1881 }
1882 }
1883 }
1884
1885 static void
1886 exec_vector_trinary(struct tgsi_exec_machine *mach,
1887 const struct tgsi_full_instruction *inst,
1888 micro_op op,
1889 enum tgsi_exec_datatype dst_datatype,
1890 enum tgsi_exec_datatype src_datatype)
1891 {
1892 unsigned int chan;
1893 struct tgsi_exec_vector dst;
1894
1895 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1896 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1897 union tgsi_exec_channel src[3];
1898
1899 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1900 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1901 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1902 op(&dst.xyzw[chan], src);
1903 }
1904 }
1905 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1906 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1907 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1908 }
1909 }
1910 }
1911
1912 static void
1913 exec_dp3(struct tgsi_exec_machine *mach,
1914 const struct tgsi_full_instruction *inst)
1915 {
1916 unsigned int chan;
1917 union tgsi_exec_channel arg[3];
1918
1919 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1920 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1921 micro_mul(&arg[2], &arg[0], &arg[1]);
1922
1923 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
1924 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1925 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1926 micro_mad(&arg[2], arg);
1927 }
1928
1929 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1930 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1931 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1932 }
1933 }
1934 }
1935
1936 static void
1937 exec_dp4(struct tgsi_exec_machine *mach,
1938 const struct tgsi_full_instruction *inst)
1939 {
1940 unsigned int chan;
1941 union tgsi_exec_channel arg[3];
1942
1943 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1944 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1945 micro_mul(&arg[2], &arg[0], &arg[1]);
1946
1947 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
1948 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1949 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1950 micro_mad(&arg[2], arg);
1951 }
1952
1953 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1954 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1955 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1956 }
1957 }
1958 }
1959
1960 static void
1961 exec_dp2a(struct tgsi_exec_machine *mach,
1962 const struct tgsi_full_instruction *inst)
1963 {
1964 unsigned int chan;
1965 union tgsi_exec_channel arg[3];
1966
1967 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1968 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1969 micro_mul(&arg[2], &arg[0], &arg[1]);
1970
1971 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1972 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1973 micro_mad(&arg[0], arg);
1974
1975 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1976 micro_add(&arg[0], &arg[0], &arg[1]);
1977
1978 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1979 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1980 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1981 }
1982 }
1983 }
1984
1985 static void
1986 exec_dph(struct tgsi_exec_machine *mach,
1987 const struct tgsi_full_instruction *inst)
1988 {
1989 unsigned int chan;
1990 union tgsi_exec_channel arg[3];
1991
1992 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1993 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1994 micro_mul(&arg[2], &arg[0], &arg[1]);
1995
1996 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1997 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1998 micro_mad(&arg[2], arg);
1999
2000 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2001 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2002 micro_mad(&arg[0], arg);
2003
2004 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2005 micro_add(&arg[0], &arg[0], &arg[1]);
2006
2007 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2008 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2009 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2010 }
2011 }
2012 }
2013
2014 static void
2015 exec_dp2(struct tgsi_exec_machine *mach,
2016 const struct tgsi_full_instruction *inst)
2017 {
2018 unsigned int chan;
2019 union tgsi_exec_channel arg[3];
2020
2021 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2022 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2023 micro_mul(&arg[2], &arg[0], &arg[1]);
2024
2025 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2026 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2027 micro_mad(&arg[2], arg);
2028
2029 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2030 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2031 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2032 }
2033 }
2034 }
2035
2036 static void
2037 exec_nrm4(struct tgsi_exec_machine *mach,
2038 const struct tgsi_full_instruction *inst)
2039 {
2040 unsigned int chan;
2041 union tgsi_exec_channel arg[4];
2042 union tgsi_exec_channel scale;
2043
2044 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2045 micro_mul(&scale, &arg[0], &arg[0]);
2046
2047 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
2048 union tgsi_exec_channel product;
2049
2050 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2051 micro_mul(&product, &arg[chan], &arg[chan]);
2052 micro_add(&scale, &scale, &product);
2053 }
2054
2055 micro_rsq(&scale, &scale);
2056
2057 for (chan = CHAN_X; chan <= CHAN_W; chan++) {
2058 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2059 micro_mul(&arg[chan], &arg[chan], &scale);
2060 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2061 }
2062 }
2063 }
2064
2065 static void
2066 exec_nrm3(struct tgsi_exec_machine *mach,
2067 const struct tgsi_full_instruction *inst)
2068 {
2069 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2070 unsigned int chan;
2071 union tgsi_exec_channel arg[3];
2072 union tgsi_exec_channel scale;
2073
2074 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2075 micro_mul(&scale, &arg[0], &arg[0]);
2076
2077 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
2078 union tgsi_exec_channel product;
2079
2080 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2081 micro_mul(&product, &arg[chan], &arg[chan]);
2082 micro_add(&scale, &scale, &product);
2083 }
2084
2085 micro_rsq(&scale, &scale);
2086
2087 for (chan = CHAN_X; chan <= CHAN_Z; chan++) {
2088 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2089 micro_mul(&arg[chan], &arg[chan], &scale);
2090 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2091 }
2092 }
2093 }
2094
2095 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2096 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2097 }
2098 }
2099
2100 static void
2101 exec_break(struct tgsi_exec_machine *mach)
2102 {
2103 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
2104 /* turn off loop channels for each enabled exec channel */
2105 mach->LoopMask &= ~mach->ExecMask;
2106 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2107 UPDATE_EXEC_MASK(mach);
2108 } else {
2109 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
2110
2111 mach->Switch.mask = 0x0;
2112
2113 UPDATE_EXEC_MASK(mach);
2114 }
2115 }
2116
2117 static void
2118 exec_switch(struct tgsi_exec_machine *mach,
2119 const struct tgsi_full_instruction *inst)
2120 {
2121 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2122 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2123
2124 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2125 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2126 mach->Switch.mask = 0x0;
2127 mach->Switch.defaultMask = 0x0;
2128
2129 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2130 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
2131
2132 UPDATE_EXEC_MASK(mach);
2133 }
2134
2135 static void
2136 exec_case(struct tgsi_exec_machine *mach,
2137 const struct tgsi_full_instruction *inst)
2138 {
2139 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2140 union tgsi_exec_channel src;
2141 uint mask = 0;
2142
2143 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2144
2145 if (mach->Switch.selector.u[0] == src.u[0]) {
2146 mask |= 0x1;
2147 }
2148 if (mach->Switch.selector.u[1] == src.u[1]) {
2149 mask |= 0x2;
2150 }
2151 if (mach->Switch.selector.u[2] == src.u[2]) {
2152 mask |= 0x4;
2153 }
2154 if (mach->Switch.selector.u[3] == src.u[3]) {
2155 mask |= 0x8;
2156 }
2157
2158 mach->Switch.defaultMask |= mask;
2159
2160 mach->Switch.mask |= mask & prevMask;
2161
2162 UPDATE_EXEC_MASK(mach);
2163 }
2164
2165 static void
2166 exec_default(struct tgsi_exec_machine *mach)
2167 {
2168 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2169
2170 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
2171
2172 UPDATE_EXEC_MASK(mach);
2173 }
2174
2175 static void
2176 exec_endswitch(struct tgsi_exec_machine *mach)
2177 {
2178 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
2179 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
2180
2181 UPDATE_EXEC_MASK(mach);
2182 }
2183
2184 static void
2185 micro_i2f(union tgsi_exec_channel *dst,
2186 const union tgsi_exec_channel *src)
2187 {
2188 dst->f[0] = (float)src->i[0];
2189 dst->f[1] = (float)src->i[1];
2190 dst->f[2] = (float)src->i[2];
2191 dst->f[3] = (float)src->i[3];
2192 }
2193
2194 static void
2195 micro_not(union tgsi_exec_channel *dst,
2196 const union tgsi_exec_channel *src)
2197 {
2198 dst->u[0] = ~src->u[0];
2199 dst->u[1] = ~src->u[1];
2200 dst->u[2] = ~src->u[2];
2201 dst->u[3] = ~src->u[3];
2202 }
2203
2204 static void
2205 micro_shl(union tgsi_exec_channel *dst,
2206 const union tgsi_exec_channel *src)
2207 {
2208 dst->u[0] = src[0].u[0] << src[1].u[0];
2209 dst->u[1] = src[0].u[1] << src[1].u[1];
2210 dst->u[2] = src[0].u[2] << src[1].u[2];
2211 dst->u[3] = src[0].u[3] << src[1].u[3];
2212 }
2213
2214 static void
2215 micro_and(union tgsi_exec_channel *dst,
2216 const union tgsi_exec_channel *src)
2217 {
2218 dst->u[0] = src[0].u[0] & src[1].u[0];
2219 dst->u[1] = src[0].u[1] & src[1].u[1];
2220 dst->u[2] = src[0].u[2] & src[1].u[2];
2221 dst->u[3] = src[0].u[3] & src[1].u[3];
2222 }
2223
2224 static void
2225 micro_or(union tgsi_exec_channel *dst,
2226 const union tgsi_exec_channel *src)
2227 {
2228 dst->u[0] = src[0].u[0] | src[1].u[0];
2229 dst->u[1] = src[0].u[1] | src[1].u[1];
2230 dst->u[2] = src[0].u[2] | src[1].u[2];
2231 dst->u[3] = src[0].u[3] | src[1].u[3];
2232 }
2233
2234 static void
2235 micro_xor(union tgsi_exec_channel *dst,
2236 const union tgsi_exec_channel *src)
2237 {
2238 dst->u[0] = src[0].u[0] ^ src[1].u[0];
2239 dst->u[1] = src[0].u[1] ^ src[1].u[1];
2240 dst->u[2] = src[0].u[2] ^ src[1].u[2];
2241 dst->u[3] = src[0].u[3] ^ src[1].u[3];
2242 }
2243
2244 static void
2245 micro_f2i(union tgsi_exec_channel *dst,
2246 const union tgsi_exec_channel *src)
2247 {
2248 dst->i[0] = (int)src->f[0];
2249 dst->i[1] = (int)src->f[1];
2250 dst->i[2] = (int)src->f[2];
2251 dst->i[3] = (int)src->f[3];
2252 }
2253
2254 static void
2255 micro_idiv(union tgsi_exec_channel *dst,
2256 const union tgsi_exec_channel *src)
2257 {
2258 dst->i[0] = src[0].i[0] / src[1].i[0];
2259 dst->i[1] = src[0].i[1] / src[1].i[1];
2260 dst->i[2] = src[0].i[2] / src[1].i[2];
2261 dst->i[3] = src[0].i[3] / src[1].i[3];
2262 }
2263
2264 static void
2265 micro_imax(union tgsi_exec_channel *dst,
2266 const union tgsi_exec_channel *src)
2267 {
2268 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
2269 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
2270 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
2271 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
2272 }
2273
2274 static void
2275 micro_imin(union tgsi_exec_channel *dst,
2276 const union tgsi_exec_channel *src)
2277 {
2278 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2279 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2280 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2281 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2282 }
2283
2284 static void
2285 micro_isge(union tgsi_exec_channel *dst,
2286 const union tgsi_exec_channel *src)
2287 {
2288 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2289 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2290 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2291 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2292 }
2293
2294 static void
2295 micro_ishr(union tgsi_exec_channel *dst,
2296 const union tgsi_exec_channel *src)
2297 {
2298 dst->i[0] = src[0].i[0] >> src[1].i[0];
2299 dst->i[1] = src[0].i[1] >> src[1].i[1];
2300 dst->i[2] = src[0].i[2] >> src[1].i[2];
2301 dst->i[3] = src[0].i[3] >> src[1].i[3];
2302 }
2303
2304 static void
2305 micro_islt(union tgsi_exec_channel *dst,
2306 const union tgsi_exec_channel *src)
2307 {
2308 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2309 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2310 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2311 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2312 }
2313
2314 static void
2315 micro_f2u(union tgsi_exec_channel *dst,
2316 const union tgsi_exec_channel *src)
2317 {
2318 dst->u[0] = (uint)src->f[0];
2319 dst->u[1] = (uint)src->f[1];
2320 dst->u[2] = (uint)src->f[2];
2321 dst->u[3] = (uint)src->f[3];
2322 }
2323
2324 static void
2325 micro_u2f(union tgsi_exec_channel *dst,
2326 const union tgsi_exec_channel *src)
2327 {
2328 dst->f[0] = (float)src->u[0];
2329 dst->f[1] = (float)src->u[1];
2330 dst->f[2] = (float)src->u[2];
2331 dst->f[3] = (float)src->u[3];
2332 }
2333
2334 static void
2335 micro_uadd(union tgsi_exec_channel *dst,
2336 const union tgsi_exec_channel *src)
2337 {
2338 dst->u[0] = src[0].u[0] + src[1].u[0];
2339 dst->u[1] = src[0].u[1] + src[1].u[1];
2340 dst->u[2] = src[0].u[2] + src[1].u[2];
2341 dst->u[3] = src[0].u[3] + src[1].u[3];
2342 }
2343
2344 static void
2345 micro_udiv(union tgsi_exec_channel *dst,
2346 const union tgsi_exec_channel *src)
2347 {
2348 dst->u[0] = src[0].u[0] / src[1].u[0];
2349 dst->u[1] = src[0].u[1] / src[1].u[1];
2350 dst->u[2] = src[0].u[2] / src[1].u[2];
2351 dst->u[3] = src[0].u[3] / src[1].u[3];
2352 }
2353
2354 static void
2355 micro_umad(union tgsi_exec_channel *dst,
2356 const union tgsi_exec_channel *src)
2357 {
2358 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2359 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2360 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2361 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2362 }
2363
2364 static void
2365 micro_umax(union tgsi_exec_channel *dst,
2366 const union tgsi_exec_channel *src)
2367 {
2368 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2369 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2370 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2371 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2372 }
2373
2374 static void
2375 micro_umin(union tgsi_exec_channel *dst,
2376 const union tgsi_exec_channel *src)
2377 {
2378 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2379 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2380 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2381 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2382 }
2383
2384 static void
2385 micro_umod(union tgsi_exec_channel *dst,
2386 const union tgsi_exec_channel *src)
2387 {
2388 dst->u[0] = src[0].u[0] % src[1].u[0];
2389 dst->u[1] = src[0].u[1] % src[1].u[1];
2390 dst->u[2] = src[0].u[2] % src[1].u[2];
2391 dst->u[3] = src[0].u[3] % src[1].u[3];
2392 }
2393
2394 static void
2395 micro_umul(union tgsi_exec_channel *dst,
2396 const union tgsi_exec_channel *src)
2397 {
2398 dst->u[0] = src[0].u[0] * src[1].u[0];
2399 dst->u[1] = src[0].u[1] * src[1].u[1];
2400 dst->u[2] = src[0].u[2] * src[1].u[2];
2401 dst->u[3] = src[0].u[3] * src[1].u[3];
2402 }
2403
2404 static void
2405 micro_useq(union tgsi_exec_channel *dst,
2406 const union tgsi_exec_channel *src)
2407 {
2408 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2409 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2410 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2411 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2412 }
2413
2414 static void
2415 micro_usge(union tgsi_exec_channel *dst,
2416 const union tgsi_exec_channel *src)
2417 {
2418 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2419 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2420 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2421 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2422 }
2423
2424 static void
2425 micro_ushr(union tgsi_exec_channel *dst,
2426 const union tgsi_exec_channel *src)
2427 {
2428 dst->u[0] = src[0].u[0] >> src[1].u[0];
2429 dst->u[1] = src[0].u[1] >> src[1].u[1];
2430 dst->u[2] = src[0].u[2] >> src[1].u[2];
2431 dst->u[3] = src[0].u[3] >> src[1].u[3];
2432 }
2433
2434 static void
2435 micro_uslt(union tgsi_exec_channel *dst,
2436 const union tgsi_exec_channel *src)
2437 {
2438 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2439 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2440 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2441 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2442 }
2443
2444 static void
2445 micro_usne(union tgsi_exec_channel *dst,
2446 const union tgsi_exec_channel *src)
2447 {
2448 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2449 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2450 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2451 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2452 }
2453
2454 static void
2455 exec_instruction(
2456 struct tgsi_exec_machine *mach,
2457 const struct tgsi_full_instruction *inst,
2458 int *pc )
2459 {
2460 uint chan_index;
2461 union tgsi_exec_channel r[10];
2462 union tgsi_exec_channel d[8];
2463
2464 (*pc)++;
2465
2466 switch (inst->Instruction.Opcode) {
2467 case TGSI_OPCODE_ARL:
2468 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2469 break;
2470
2471 case TGSI_OPCODE_MOV:
2472 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2473 break;
2474
2475 case TGSI_OPCODE_LIT:
2476 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2477 FETCH( &r[0], 0, CHAN_X );
2478 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2479 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2480 }
2481
2482 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2483 FETCH( &r[1], 0, CHAN_Y );
2484 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2485
2486 FETCH( &r[2], 0, CHAN_W );
2487 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2488 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2489 micro_pow( &r[1], &r[1], &r[2] );
2490 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2491 }
2492
2493 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2494 STORE(&d[CHAN_Y], 0, CHAN_Y);
2495 }
2496 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2497 STORE(&d[CHAN_Z], 0, CHAN_Z);
2498 }
2499 }
2500 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2501 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2502 }
2503 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2504 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2505 }
2506 break;
2507
2508 case TGSI_OPCODE_RCP:
2509 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2510 break;
2511
2512 case TGSI_OPCODE_RSQ:
2513 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2514 break;
2515
2516 case TGSI_OPCODE_EXP:
2517 FETCH( &r[0], 0, CHAN_X );
2518 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2519 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2520 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2521 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2522 }
2523 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2524 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2525 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2526 }
2527 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2528 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2529 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2530 }
2531 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2532 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2533 }
2534 break;
2535
2536 case TGSI_OPCODE_LOG:
2537 FETCH( &r[0], 0, CHAN_X );
2538 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2539 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2540 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2541 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2542 STORE( &r[0], 0, CHAN_X );
2543 }
2544 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2545 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2546 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2547 STORE( &r[0], 0, CHAN_Y );
2548 }
2549 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2550 STORE( &r[1], 0, CHAN_Z );
2551 }
2552 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2553 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2554 }
2555 break;
2556
2557 case TGSI_OPCODE_MUL:
2558 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2559 FETCH(&r[0], 0, chan_index);
2560 FETCH(&r[1], 1, chan_index);
2561 micro_mul(&d[chan_index], &r[0], &r[1]);
2562 }
2563 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2564 STORE(&d[chan_index], 0, chan_index);
2565 }
2566 break;
2567
2568 case TGSI_OPCODE_ADD:
2569 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2570 FETCH( &r[0], 0, chan_index );
2571 FETCH( &r[1], 1, chan_index );
2572 micro_add(&d[chan_index], &r[0], &r[1]);
2573 }
2574 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2575 STORE(&d[chan_index], 0, chan_index);
2576 }
2577 break;
2578
2579 case TGSI_OPCODE_DP3:
2580 exec_dp3(mach, inst);
2581 break;
2582
2583 case TGSI_OPCODE_DP4:
2584 exec_dp4(mach, inst);
2585 break;
2586
2587 case TGSI_OPCODE_DST:
2588 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2589 FETCH( &r[0], 0, CHAN_Y );
2590 FETCH( &r[1], 1, CHAN_Y);
2591 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2592 }
2593 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2594 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2595 }
2596 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2597 FETCH(&d[CHAN_W], 1, CHAN_W);
2598 }
2599
2600 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2601 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2602 }
2603 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2604 STORE(&d[CHAN_Y], 0, CHAN_Y);
2605 }
2606 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2607 STORE(&d[CHAN_Z], 0, CHAN_Z);
2608 }
2609 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2610 STORE(&d[CHAN_W], 0, CHAN_W);
2611 }
2612 break;
2613
2614 case TGSI_OPCODE_MIN:
2615 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2616 FETCH(&r[0], 0, chan_index);
2617 FETCH(&r[1], 1, chan_index);
2618
2619 /* XXX use micro_min()?? */
2620 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2621 }
2622 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2623 STORE(&d[chan_index], 0, chan_index);
2624 }
2625 break;
2626
2627 case TGSI_OPCODE_MAX:
2628 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2629 FETCH(&r[0], 0, chan_index);
2630 FETCH(&r[1], 1, chan_index);
2631
2632 /* XXX use micro_max()?? */
2633 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2634 }
2635 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2636 STORE(&d[chan_index], 0, chan_index);
2637 }
2638 break;
2639
2640 case TGSI_OPCODE_SLT:
2641 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2642 break;
2643
2644 case TGSI_OPCODE_SGE:
2645 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2646 break;
2647
2648 case TGSI_OPCODE_MAD:
2649 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2650 break;
2651
2652 case TGSI_OPCODE_SUB:
2653 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2654 FETCH(&r[0], 0, chan_index);
2655 FETCH(&r[1], 1, chan_index);
2656 micro_sub(&d[chan_index], &r[0], &r[1]);
2657 }
2658 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2659 STORE(&d[chan_index], 0, chan_index);
2660 }
2661 break;
2662
2663 case TGSI_OPCODE_LRP:
2664 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2665 break;
2666
2667 case TGSI_OPCODE_CND:
2668 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2669 FETCH(&r[0], 0, chan_index);
2670 FETCH(&r[1], 1, chan_index);
2671 FETCH(&r[2], 2, chan_index);
2672 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2673 }
2674 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2675 STORE(&d[chan_index], 0, chan_index);
2676 }
2677 break;
2678
2679 case TGSI_OPCODE_DP2A:
2680 exec_dp2a(mach, inst);
2681 break;
2682
2683 case TGSI_OPCODE_FRC:
2684 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2685 break;
2686
2687 case TGSI_OPCODE_CLAMP:
2688 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2689 FETCH(&r[0], 0, chan_index);
2690 FETCH(&r[1], 1, chan_index);
2691 micro_max(&r[0], &r[0], &r[1]);
2692 FETCH(&r[1], 2, chan_index);
2693 micro_min(&d[chan_index], &r[0], &r[1]);
2694 }
2695 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2696 STORE(&d[chan_index], 0, chan_index);
2697 }
2698 break;
2699
2700 case TGSI_OPCODE_FLR:
2701 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2702 break;
2703
2704 case TGSI_OPCODE_ROUND:
2705 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2706 break;
2707
2708 case TGSI_OPCODE_EX2:
2709 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2710 break;
2711
2712 case TGSI_OPCODE_LG2:
2713 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2714 break;
2715
2716 case TGSI_OPCODE_POW:
2717 FETCH(&r[0], 0, CHAN_X);
2718 FETCH(&r[1], 1, CHAN_X);
2719
2720 micro_pow( &r[0], &r[0], &r[1] );
2721
2722 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2723 STORE( &r[0], 0, chan_index );
2724 }
2725 break;
2726
2727 case TGSI_OPCODE_XPD:
2728 FETCH(&r[0], 0, CHAN_Y);
2729 FETCH(&r[1], 1, CHAN_Z);
2730
2731 micro_mul( &r[2], &r[0], &r[1] );
2732
2733 FETCH(&r[3], 0, CHAN_Z);
2734 FETCH(&r[4], 1, CHAN_Y);
2735
2736 micro_mul( &r[5], &r[3], &r[4] );
2737 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2738
2739 FETCH(&r[2], 1, CHAN_X);
2740
2741 micro_mul( &r[3], &r[3], &r[2] );
2742
2743 FETCH(&r[5], 0, CHAN_X);
2744
2745 micro_mul( &r[1], &r[1], &r[5] );
2746 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2747
2748 micro_mul( &r[5], &r[5], &r[4] );
2749 micro_mul( &r[0], &r[0], &r[2] );
2750 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2751
2752 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2753 STORE(&d[CHAN_X], 0, CHAN_X);
2754 }
2755 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2756 STORE(&d[CHAN_Y], 0, CHAN_Y);
2757 }
2758 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2759 STORE(&d[CHAN_Z], 0, CHAN_Z);
2760 }
2761 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2762 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2763 }
2764 break;
2765
2766 case TGSI_OPCODE_ABS:
2767 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2768 break;
2769
2770 case TGSI_OPCODE_RCC:
2771 FETCH(&r[0], 0, CHAN_X);
2772 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2773 micro_float_clamp(&r[0], &r[0]);
2774 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2775 STORE(&r[0], 0, chan_index);
2776 }
2777 break;
2778
2779 case TGSI_OPCODE_DPH:
2780 exec_dph(mach, inst);
2781 break;
2782
2783 case TGSI_OPCODE_COS:
2784 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2785 break;
2786
2787 case TGSI_OPCODE_DDX:
2788 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2789 break;
2790
2791 case TGSI_OPCODE_DDY:
2792 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2793 break;
2794
2795 case TGSI_OPCODE_KILP:
2796 exec_kilp (mach, inst);
2797 break;
2798
2799 case TGSI_OPCODE_KIL:
2800 exec_kil (mach, inst);
2801 break;
2802
2803 case TGSI_OPCODE_PK2H:
2804 assert (0);
2805 break;
2806
2807 case TGSI_OPCODE_PK2US:
2808 assert (0);
2809 break;
2810
2811 case TGSI_OPCODE_PK4B:
2812 assert (0);
2813 break;
2814
2815 case TGSI_OPCODE_PK4UB:
2816 assert (0);
2817 break;
2818
2819 case TGSI_OPCODE_RFL:
2820 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2821 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2822 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2823 /* r0 = dp3(src0, src0) */
2824 FETCH(&r[2], 0, CHAN_X);
2825 micro_mul(&r[0], &r[2], &r[2]);
2826 FETCH(&r[4], 0, CHAN_Y);
2827 micro_mul(&r[8], &r[4], &r[4]);
2828 micro_add(&r[0], &r[0], &r[8]);
2829 FETCH(&r[6], 0, CHAN_Z);
2830 micro_mul(&r[8], &r[6], &r[6]);
2831 micro_add(&r[0], &r[0], &r[8]);
2832
2833 /* r1 = dp3(src0, src1) */
2834 FETCH(&r[3], 1, CHAN_X);
2835 micro_mul(&r[1], &r[2], &r[3]);
2836 FETCH(&r[5], 1, CHAN_Y);
2837 micro_mul(&r[8], &r[4], &r[5]);
2838 micro_add(&r[1], &r[1], &r[8]);
2839 FETCH(&r[7], 1, CHAN_Z);
2840 micro_mul(&r[8], &r[6], &r[7]);
2841 micro_add(&r[1], &r[1], &r[8]);
2842
2843 /* r1 = 2 * r1 / r0 */
2844 micro_add(&r[1], &r[1], &r[1]);
2845 micro_div(&r[1], &r[1], &r[0]);
2846
2847 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2848 micro_mul(&r[2], &r[2], &r[1]);
2849 micro_sub(&r[2], &r[2], &r[3]);
2850 STORE(&r[2], 0, CHAN_X);
2851 }
2852 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2853 micro_mul(&r[4], &r[4], &r[1]);
2854 micro_sub(&r[4], &r[4], &r[5]);
2855 STORE(&r[4], 0, CHAN_Y);
2856 }
2857 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2858 micro_mul(&r[6], &r[6], &r[1]);
2859 micro_sub(&r[6], &r[6], &r[7]);
2860 STORE(&r[6], 0, CHAN_Z);
2861 }
2862 }
2863 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2864 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2865 }
2866 break;
2867
2868 case TGSI_OPCODE_SEQ:
2869 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2870 break;
2871
2872 case TGSI_OPCODE_SFL:
2873 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2874 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2875 }
2876 break;
2877
2878 case TGSI_OPCODE_SGT:
2879 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2880 break;
2881
2882 case TGSI_OPCODE_SIN:
2883 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2884 break;
2885
2886 case TGSI_OPCODE_SLE:
2887 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2888 break;
2889
2890 case TGSI_OPCODE_SNE:
2891 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2892 break;
2893
2894 case TGSI_OPCODE_STR:
2895 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2896 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2897 }
2898 break;
2899
2900 case TGSI_OPCODE_TEX:
2901 /* simple texture lookup */
2902 /* src[0] = texcoord */
2903 /* src[1] = sampler unit */
2904 exec_tex(mach, inst, TEX_MODIFIER_NONE);
2905 break;
2906
2907 case TGSI_OPCODE_TXB:
2908 /* Texture lookup with lod bias */
2909 /* src[0] = texcoord (src[0].w = LOD bias) */
2910 /* src[1] = sampler unit */
2911 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
2912 break;
2913
2914 case TGSI_OPCODE_TXD:
2915 /* Texture lookup with explict partial derivatives */
2916 /* src[0] = texcoord */
2917 /* src[1] = d[strq]/dx */
2918 /* src[2] = d[strq]/dy */
2919 /* src[3] = sampler unit */
2920 exec_txd(mach, inst);
2921 break;
2922
2923 case TGSI_OPCODE_TXL:
2924 /* Texture lookup with explit LOD */
2925 /* src[0] = texcoord (src[0].w = LOD) */
2926 /* src[1] = sampler unit */
2927 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
2928 break;
2929
2930 case TGSI_OPCODE_TXP:
2931 /* Texture lookup with projection */
2932 /* src[0] = texcoord (src[0].w = projection) */
2933 /* src[1] = sampler unit */
2934 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
2935 break;
2936
2937 case TGSI_OPCODE_UP2H:
2938 assert (0);
2939 break;
2940
2941 case TGSI_OPCODE_UP2US:
2942 assert (0);
2943 break;
2944
2945 case TGSI_OPCODE_UP4B:
2946 assert (0);
2947 break;
2948
2949 case TGSI_OPCODE_UP4UB:
2950 assert (0);
2951 break;
2952
2953 case TGSI_OPCODE_X2D:
2954 FETCH(&r[0], 1, CHAN_X);
2955 FETCH(&r[1], 1, CHAN_Y);
2956 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2957 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2958 FETCH(&r[2], 2, CHAN_X);
2959 micro_mul(&r[2], &r[2], &r[0]);
2960 FETCH(&r[3], 2, CHAN_Y);
2961 micro_mul(&r[3], &r[3], &r[1]);
2962 micro_add(&r[2], &r[2], &r[3]);
2963 FETCH(&r[3], 0, CHAN_X);
2964 micro_add(&d[CHAN_X], &r[2], &r[3]);
2965
2966 }
2967 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2968 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2969 FETCH(&r[2], 2, CHAN_Z);
2970 micro_mul(&r[2], &r[2], &r[0]);
2971 FETCH(&r[3], 2, CHAN_W);
2972 micro_mul(&r[3], &r[3], &r[1]);
2973 micro_add(&r[2], &r[2], &r[3]);
2974 FETCH(&r[3], 0, CHAN_Y);
2975 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2976
2977 }
2978 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2979 STORE(&d[CHAN_X], 0, CHAN_X);
2980 }
2981 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2982 STORE(&d[CHAN_Y], 0, CHAN_Y);
2983 }
2984 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2985 STORE(&d[CHAN_X], 0, CHAN_Z);
2986 }
2987 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2988 STORE(&d[CHAN_Y], 0, CHAN_W);
2989 }
2990 break;
2991
2992 case TGSI_OPCODE_ARA:
2993 assert (0);
2994 break;
2995
2996 case TGSI_OPCODE_ARR:
2997 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2998 break;
2999
3000 case TGSI_OPCODE_BRA:
3001 assert (0);
3002 break;
3003
3004 case TGSI_OPCODE_CAL:
3005 /* skip the call if no execution channels are enabled */
3006 if (mach->ExecMask) {
3007 /* do the call */
3008
3009 /* First, record the depths of the execution stacks.
3010 * This is important for deeply nested/looped return statements.
3011 * We have to unwind the stacks by the correct amount. For a
3012 * real code generator, we could determine the number of entries
3013 * to pop off each stack with simple static analysis and avoid
3014 * implementing this data structure at run time.
3015 */
3016 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3017 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3018 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3019 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3020 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3021 /* note that PC was already incremented above */
3022 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3023
3024 mach->CallStackTop++;
3025
3026 /* Second, push the Cond, Loop, Cont, Func stacks */
3027 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3028 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3029 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3030 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3031 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3032 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3033
3034 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3035 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3036 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3037 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3038 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3039 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3040
3041 /* Finally, jump to the subroutine */
3042 *pc = inst->Label.Label;
3043 }
3044 break;
3045
3046 case TGSI_OPCODE_RET:
3047 mach->FuncMask &= ~mach->ExecMask;
3048 UPDATE_EXEC_MASK(mach);
3049
3050 if (mach->FuncMask == 0x0) {
3051 /* really return now (otherwise, keep executing */
3052
3053 if (mach->CallStackTop == 0) {
3054 /* returning from main() */
3055 *pc = -1;
3056 return;
3057 }
3058
3059 assert(mach->CallStackTop > 0);
3060 mach->CallStackTop--;
3061
3062 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3063 mach->CondMask = mach->CondStack[mach->CondStackTop];
3064
3065 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3066 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3067
3068 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3069 mach->ContMask = mach->ContStack[mach->ContStackTop];
3070
3071 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3072 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3073
3074 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3075 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3076
3077 assert(mach->FuncStackTop > 0);
3078 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3079
3080 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3081
3082 UPDATE_EXEC_MASK(mach);
3083 }
3084 break;
3085
3086 case TGSI_OPCODE_SSG:
3087 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3088 break;
3089
3090 case TGSI_OPCODE_CMP:
3091 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3092 FETCH(&r[0], 0, chan_index);
3093 FETCH(&r[1], 1, chan_index);
3094 FETCH(&r[2], 2, chan_index);
3095 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
3096 }
3097 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3098 STORE(&d[chan_index], 0, chan_index);
3099 }
3100 break;
3101
3102 case TGSI_OPCODE_SCS:
3103 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3104 FETCH( &r[0], 0, CHAN_X );
3105 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3106 micro_cos(&r[1], &r[0]);
3107 STORE(&r[1], 0, CHAN_X);
3108 }
3109 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3110 micro_sin(&r[1], &r[0]);
3111 STORE(&r[1], 0, CHAN_Y);
3112 }
3113 }
3114 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3115 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3116 }
3117 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3118 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3119 }
3120 break;
3121
3122 case TGSI_OPCODE_NRM:
3123 exec_nrm3(mach, inst);
3124 break;
3125
3126 case TGSI_OPCODE_NRM4:
3127 exec_nrm4(mach, inst);
3128 break;
3129
3130 case TGSI_OPCODE_DIV:
3131 assert( 0 );
3132 break;
3133
3134 case TGSI_OPCODE_DP2:
3135 exec_dp2(mach, inst);
3136 break;
3137
3138 case TGSI_OPCODE_IF:
3139 /* push CondMask */
3140 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3141 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3142 FETCH( &r[0], 0, CHAN_X );
3143 /* update CondMask */
3144 if( ! r[0].u[0] ) {
3145 mach->CondMask &= ~0x1;
3146 }
3147 if( ! r[0].u[1] ) {
3148 mach->CondMask &= ~0x2;
3149 }
3150 if( ! r[0].u[2] ) {
3151 mach->CondMask &= ~0x4;
3152 }
3153 if( ! r[0].u[3] ) {
3154 mach->CondMask &= ~0x8;
3155 }
3156 UPDATE_EXEC_MASK(mach);
3157 /* Todo: If CondMask==0, jump to ELSE */
3158 break;
3159
3160 case TGSI_OPCODE_ELSE:
3161 /* invert CondMask wrt previous mask */
3162 {
3163 uint prevMask;
3164 assert(mach->CondStackTop > 0);
3165 prevMask = mach->CondStack[mach->CondStackTop - 1];
3166 mach->CondMask = ~mach->CondMask & prevMask;
3167 UPDATE_EXEC_MASK(mach);
3168 /* Todo: If CondMask==0, jump to ENDIF */
3169 }
3170 break;
3171
3172 case TGSI_OPCODE_ENDIF:
3173 /* pop CondMask */
3174 assert(mach->CondStackTop > 0);
3175 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3176 UPDATE_EXEC_MASK(mach);
3177 break;
3178
3179 case TGSI_OPCODE_END:
3180 /* halt execution */
3181 *pc = -1;
3182 break;
3183
3184 case TGSI_OPCODE_REP:
3185 assert (0);
3186 break;
3187
3188 case TGSI_OPCODE_ENDREP:
3189 assert (0);
3190 break;
3191
3192 case TGSI_OPCODE_PUSHA:
3193 assert (0);
3194 break;
3195
3196 case TGSI_OPCODE_POPA:
3197 assert (0);
3198 break;
3199
3200 case TGSI_OPCODE_CEIL:
3201 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3202 break;
3203
3204 case TGSI_OPCODE_I2F:
3205 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3206 break;
3207
3208 case TGSI_OPCODE_NOT:
3209 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3210 break;
3211
3212 case TGSI_OPCODE_TRUNC:
3213 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3214 break;
3215
3216 case TGSI_OPCODE_SHL:
3217 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3218 break;
3219
3220 case TGSI_OPCODE_AND:
3221 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3222 break;
3223
3224 case TGSI_OPCODE_OR:
3225 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3226 break;
3227
3228 case TGSI_OPCODE_MOD:
3229 assert (0);
3230 break;
3231
3232 case TGSI_OPCODE_XOR:
3233 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3234 break;
3235
3236 case TGSI_OPCODE_SAD:
3237 assert (0);
3238 break;
3239
3240 case TGSI_OPCODE_TXF:
3241 assert (0);
3242 break;
3243
3244 case TGSI_OPCODE_TXQ:
3245 assert (0);
3246 break;
3247
3248 case TGSI_OPCODE_EMIT:
3249 emit_vertex(mach);
3250 break;
3251
3252 case TGSI_OPCODE_ENDPRIM:
3253 emit_primitive(mach);
3254 break;
3255
3256 case TGSI_OPCODE_BGNFOR:
3257 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3258 for (chan_index = 0; chan_index < 3; chan_index++) {
3259 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3260 }
3261 ++mach->LoopCounterStackTop;
3262 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3263 /* update LoopMask */
3264 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3265 mach->LoopMask &= ~0x1;
3266 }
3267 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3268 mach->LoopMask &= ~0x2;
3269 }
3270 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3271 mach->LoopMask &= ~0x4;
3272 }
3273 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3274 mach->LoopMask &= ~0x8;
3275 }
3276 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3277 UPDATE_EXEC_MASK(mach);
3278 /* fall-through (for now) */
3279 case TGSI_OPCODE_BGNLOOP:
3280 /* push LoopMask and ContMasks */
3281 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3282 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3283 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3284 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3285
3286 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3287 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3288 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3289 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3290 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3291 break;
3292
3293 case TGSI_OPCODE_ENDFOR:
3294 assert(mach->LoopCounterStackTop > 0);
3295 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3296 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3297 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3298 /* update LoopMask */
3299 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3300 mach->LoopMask &= ~0x1;
3301 }
3302 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3303 mach->LoopMask &= ~0x2;
3304 }
3305 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3306 mach->LoopMask &= ~0x4;
3307 }
3308 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3309 mach->LoopMask &= ~0x8;
3310 }
3311 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3312 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3313 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3314 assert(mach->LoopLabelStackTop > 0);
3315 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3316 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3317 /* Restore ContMask, but don't pop */
3318 assert(mach->ContStackTop > 0);
3319 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3320 UPDATE_EXEC_MASK(mach);
3321 if (mach->ExecMask) {
3322 /* repeat loop: jump to instruction just past BGNLOOP */
3323 assert(mach->LoopLabelStackTop > 0);
3324 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3325 }
3326 else {
3327 /* exit loop: pop LoopMask */
3328 assert(mach->LoopStackTop > 0);
3329 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3330 /* pop ContMask */
3331 assert(mach->ContStackTop > 0);
3332 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3333 assert(mach->LoopLabelStackTop > 0);
3334 --mach->LoopLabelStackTop;
3335 assert(mach->LoopCounterStackTop > 0);
3336 --mach->LoopCounterStackTop;
3337
3338 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3339 }
3340 UPDATE_EXEC_MASK(mach);
3341 break;
3342
3343 case TGSI_OPCODE_ENDLOOP:
3344 /* Restore ContMask, but don't pop */
3345 assert(mach->ContStackTop > 0);
3346 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3347 UPDATE_EXEC_MASK(mach);
3348 if (mach->ExecMask) {
3349 /* repeat loop: jump to instruction just past BGNLOOP */
3350 assert(mach->LoopLabelStackTop > 0);
3351 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3352 }
3353 else {
3354 /* exit loop: pop LoopMask */
3355 assert(mach->LoopStackTop > 0);
3356 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3357 /* pop ContMask */
3358 assert(mach->ContStackTop > 0);
3359 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3360 assert(mach->LoopLabelStackTop > 0);
3361 --mach->LoopLabelStackTop;
3362
3363 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3364 }
3365 UPDATE_EXEC_MASK(mach);
3366 break;
3367
3368 case TGSI_OPCODE_BRK:
3369 exec_break(mach);
3370 break;
3371
3372 case TGSI_OPCODE_CONT:
3373 /* turn off cont channels for each enabled exec channel */
3374 mach->ContMask &= ~mach->ExecMask;
3375 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3376 UPDATE_EXEC_MASK(mach);
3377 break;
3378
3379 case TGSI_OPCODE_BGNSUB:
3380 /* no-op */
3381 break;
3382
3383 case TGSI_OPCODE_ENDSUB:
3384 /*
3385 * XXX: This really should be a no-op. We should never reach this opcode.
3386 */
3387
3388 assert(mach->CallStackTop > 0);
3389 mach->CallStackTop--;
3390
3391 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3392 mach->CondMask = mach->CondStack[mach->CondStackTop];
3393
3394 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3395 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3396
3397 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3398 mach->ContMask = mach->ContStack[mach->ContStackTop];
3399
3400 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3401 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3402
3403 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3404 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3405
3406 assert(mach->FuncStackTop > 0);
3407 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3408
3409 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3410
3411 UPDATE_EXEC_MASK(mach);
3412 break;
3413
3414 case TGSI_OPCODE_NOP:
3415 break;
3416
3417 case TGSI_OPCODE_BREAKC:
3418 FETCH(&r[0], 0, CHAN_X);
3419 /* update CondMask */
3420 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3421 mach->LoopMask &= ~0x1;
3422 }
3423 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3424 mach->LoopMask &= ~0x2;
3425 }
3426 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3427 mach->LoopMask &= ~0x4;
3428 }
3429 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3430 mach->LoopMask &= ~0x8;
3431 }
3432 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3433 UPDATE_EXEC_MASK(mach);
3434 break;
3435
3436 case TGSI_OPCODE_F2I:
3437 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3438 break;
3439
3440 case TGSI_OPCODE_IDIV:
3441 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3442 break;
3443
3444 case TGSI_OPCODE_IMAX:
3445 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3446 break;
3447
3448 case TGSI_OPCODE_IMIN:
3449 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3450 break;
3451
3452 case TGSI_OPCODE_INEG:
3453 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3454 break;
3455
3456 case TGSI_OPCODE_ISGE:
3457 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3458 break;
3459
3460 case TGSI_OPCODE_ISHR:
3461 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3462 break;
3463
3464 case TGSI_OPCODE_ISLT:
3465 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3466 break;
3467
3468 case TGSI_OPCODE_F2U:
3469 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3470 break;
3471
3472 case TGSI_OPCODE_U2F:
3473 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3474 break;
3475
3476 case TGSI_OPCODE_UADD:
3477 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3478 break;
3479
3480 case TGSI_OPCODE_UDIV:
3481 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3482 break;
3483
3484 case TGSI_OPCODE_UMAD:
3485 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3486 break;
3487
3488 case TGSI_OPCODE_UMAX:
3489 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3490 break;
3491
3492 case TGSI_OPCODE_UMIN:
3493 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3494 break;
3495
3496 case TGSI_OPCODE_UMOD:
3497 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3498 break;
3499
3500 case TGSI_OPCODE_UMUL:
3501 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3502 break;
3503
3504 case TGSI_OPCODE_USEQ:
3505 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3506 break;
3507
3508 case TGSI_OPCODE_USGE:
3509 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3510 break;
3511
3512 case TGSI_OPCODE_USHR:
3513 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3514 break;
3515
3516 case TGSI_OPCODE_USLT:
3517 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3518 break;
3519
3520 case TGSI_OPCODE_USNE:
3521 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3522 break;
3523
3524 case TGSI_OPCODE_SWITCH:
3525 exec_switch(mach, inst);
3526 break;
3527
3528 case TGSI_OPCODE_CASE:
3529 exec_case(mach, inst);
3530 break;
3531
3532 case TGSI_OPCODE_DEFAULT:
3533 exec_default(mach);
3534 break;
3535
3536 case TGSI_OPCODE_ENDSWITCH:
3537 exec_endswitch(mach);
3538 break;
3539
3540 default:
3541 assert( 0 );
3542 }
3543 }
3544
3545
3546 #define DEBUG_EXECUTION 0
3547
3548
3549 /**
3550 * Run TGSI interpreter.
3551 * \return bitmask of "alive" quad components
3552 */
3553 uint
3554 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3555 {
3556 uint i;
3557 int pc = 0;
3558
3559 mach->CondMask = 0xf;
3560 mach->LoopMask = 0xf;
3561 mach->ContMask = 0xf;
3562 mach->FuncMask = 0xf;
3563 mach->ExecMask = 0xf;
3564
3565 mach->Switch.mask = 0xf;
3566
3567 assert(mach->CondStackTop == 0);
3568 assert(mach->LoopStackTop == 0);
3569 assert(mach->ContStackTop == 0);
3570 assert(mach->SwitchStackTop == 0);
3571 assert(mach->BreakStackTop == 0);
3572 assert(mach->CallStackTop == 0);
3573
3574 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3575 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3576
3577 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3578 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3579 mach->Primitives[0] = 0;
3580 }
3581
3582 for (i = 0; i < QUAD_SIZE; i++) {
3583 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3584 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3585 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3586 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3587 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3588 }
3589
3590 /* execute declarations (interpolants) */
3591 for (i = 0; i < mach->NumDeclarations; i++) {
3592 exec_declaration( mach, mach->Declarations+i );
3593 }
3594
3595 {
3596 #if DEBUG_EXECUTION
3597 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3598 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3599 uint inst = 1;
3600
3601 memcpy(temps, mach->Temps, sizeof(temps));
3602 memcpy(outputs, mach->Outputs, sizeof(outputs));
3603 #endif
3604
3605 /* execute instructions, until pc is set to -1 */
3606 while (pc != -1) {
3607
3608 #if DEBUG_EXECUTION
3609 uint i;
3610
3611 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3612 #endif
3613
3614 assert(pc < (int) mach->NumInstructions);
3615 exec_instruction(mach, mach->Instructions + pc, &pc);
3616
3617 #if DEBUG_EXECUTION
3618 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3619 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3620 uint j;
3621
3622 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3623 debug_printf("TEMP[%2u] = ", i);
3624 for (j = 0; j < 4; j++) {
3625 if (j > 0) {
3626 debug_printf(" ");
3627 }
3628 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3629 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3630 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3631 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3632 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3633 }
3634 }
3635 }
3636 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3637 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3638 uint j;
3639
3640 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3641 debug_printf("OUT[%2u] = ", i);
3642 for (j = 0; j < 4; j++) {
3643 if (j > 0) {
3644 debug_printf(" ");
3645 }
3646 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3647 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3648 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3649 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3650 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3651 }
3652 }
3653 }
3654 #endif
3655 }
3656 }
3657
3658 #if 0
3659 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3660 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3661 /*
3662 * Scale back depth component.
3663 */
3664 for (i = 0; i < 4; i++)
3665 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3666 }
3667 #endif
3668
3669 assert(mach->CondStackTop == 0);
3670 assert(mach->LoopStackTop == 0);
3671 assert(mach->ContStackTop == 0);
3672 assert(mach->SwitchStackTop == 0);
3673 assert(mach->BreakStackTop == 0);
3674 assert(mach->CallStackTop == 0);
3675
3676 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3677 }