83646b73c1ee3d12735611a1463e89f5ebfaa031
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_cos(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src)
115 {
116 dst->f[0] = cosf(src->f[0]);
117 dst->f[1] = cosf(src->f[1]);
118 dst->f[2] = cosf(src->f[2]);
119 dst->f[3] = cosf(src->f[3]);
120 }
121
122 static void
123 micro_ddx(union tgsi_exec_channel *dst,
124 const union tgsi_exec_channel *src)
125 {
126 dst->f[0] =
127 dst->f[1] =
128 dst->f[2] =
129 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
130 }
131
132 static void
133 micro_ddy(union tgsi_exec_channel *dst,
134 const union tgsi_exec_channel *src)
135 {
136 dst->f[0] =
137 dst->f[1] =
138 dst->f[2] =
139 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
140 }
141
142 static void
143 micro_exp2(union tgsi_exec_channel *dst,
144 const union tgsi_exec_channel *src)
145 {
146 #if FAST_MATH
147 dst->f[0] = util_fast_exp2(src->f[0]);
148 dst->f[1] = util_fast_exp2(src->f[1]);
149 dst->f[2] = util_fast_exp2(src->f[2]);
150 dst->f[3] = util_fast_exp2(src->f[3]);
151 #else
152 #if DEBUG
153 /* Inf is okay for this instruction, so clamp it to silence assertions. */
154 uint i;
155 union tgsi_exec_channel clamped;
156
157 for (i = 0; i < 4; i++) {
158 if (src->f[i] > 127.99999f) {
159 clamped.f[i] = 127.99999f;
160 } else if (src->f[i] < -126.99999f) {
161 clamped.f[i] = -126.99999f;
162 } else {
163 clamped.f[i] = src->f[i];
164 }
165 }
166 src = &clamped;
167 #endif /* DEBUG */
168
169 dst->f[0] = powf(2.0f, src->f[0]);
170 dst->f[1] = powf(2.0f, src->f[1]);
171 dst->f[2] = powf(2.0f, src->f[2]);
172 dst->f[3] = powf(2.0f, src->f[3]);
173 #endif /* FAST_MATH */
174 }
175
176 static void
177 micro_flr(union tgsi_exec_channel *dst,
178 const union tgsi_exec_channel *src)
179 {
180 dst->f[0] = floorf(src->f[0]);
181 dst->f[1] = floorf(src->f[1]);
182 dst->f[2] = floorf(src->f[2]);
183 dst->f[3] = floorf(src->f[3]);
184 }
185
186 static void
187 micro_frc(union tgsi_exec_channel *dst,
188 const union tgsi_exec_channel *src)
189 {
190 dst->f[0] = src->f[0] - floorf(src->f[0]);
191 dst->f[1] = src->f[1] - floorf(src->f[1]);
192 dst->f[2] = src->f[2] - floorf(src->f[2]);
193 dst->f[3] = src->f[3] - floorf(src->f[3]);
194 }
195
196 static void
197 micro_iabs(union tgsi_exec_channel *dst,
198 const union tgsi_exec_channel *src)
199 {
200 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
201 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
202 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
203 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
204 }
205
206 static void
207 micro_ineg(union tgsi_exec_channel *dst,
208 const union tgsi_exec_channel *src)
209 {
210 dst->i[0] = -src->i[0];
211 dst->i[1] = -src->i[1];
212 dst->i[2] = -src->i[2];
213 dst->i[3] = -src->i[3];
214 }
215
216 static void
217 micro_lg2(union tgsi_exec_channel *dst,
218 const union tgsi_exec_channel *src)
219 {
220 #if FAST_MATH
221 dst->f[0] = util_fast_log2(src->f[0]);
222 dst->f[1] = util_fast_log2(src->f[1]);
223 dst->f[2] = util_fast_log2(src->f[2]);
224 dst->f[3] = util_fast_log2(src->f[3]);
225 #else
226 dst->f[0] = logf(src->f[0]) * 1.442695f;
227 dst->f[1] = logf(src->f[1]) * 1.442695f;
228 dst->f[2] = logf(src->f[2]) * 1.442695f;
229 dst->f[3] = logf(src->f[3]) * 1.442695f;
230 #endif
231 }
232
233 static void
234 micro_lrp(union tgsi_exec_channel *dst,
235 const union tgsi_exec_channel *src)
236 {
237 dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
238 dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
239 dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
240 dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
241 }
242
243 static void
244 micro_mad(union tgsi_exec_channel *dst,
245 const union tgsi_exec_channel *src)
246 {
247 dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
248 dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
249 dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
250 dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
251 }
252
253 static void
254 micro_mov(union tgsi_exec_channel *dst,
255 const union tgsi_exec_channel *src)
256 {
257 dst->u[0] = src->u[0];
258 dst->u[1] = src->u[1];
259 dst->u[2] = src->u[2];
260 dst->u[3] = src->u[3];
261 }
262
263 static void
264 micro_rcp(union tgsi_exec_channel *dst,
265 const union tgsi_exec_channel *src)
266 {
267 dst->f[0] = 1.0f / src->f[0];
268 dst->f[1] = 1.0f / src->f[1];
269 dst->f[2] = 1.0f / src->f[2];
270 dst->f[3] = 1.0f / src->f[3];
271 }
272
273 static void
274 micro_rnd(union tgsi_exec_channel *dst,
275 const union tgsi_exec_channel *src)
276 {
277 dst->f[0] = floorf(src->f[0] + 0.5f);
278 dst->f[1] = floorf(src->f[1] + 0.5f);
279 dst->f[2] = floorf(src->f[2] + 0.5f);
280 dst->f[3] = floorf(src->f[3] + 0.5f);
281 }
282
283 static void
284 micro_rsq(union tgsi_exec_channel *dst,
285 const union tgsi_exec_channel *src)
286 {
287 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
288 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
289 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
290 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
291 }
292
293 static void
294 micro_seq(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
298 dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
299 dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
300 dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
301 }
302
303 static void
304 micro_sge(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
308 dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
309 dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
310 dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
311 }
312
313 static void
314 micro_sgn(union tgsi_exec_channel *dst,
315 const union tgsi_exec_channel *src)
316 {
317 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
318 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
319 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
320 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
321 }
322
323 static void
324 micro_sgt(union tgsi_exec_channel *dst,
325 const union tgsi_exec_channel *src)
326 {
327 dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
328 dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
329 dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
330 dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
331 }
332
333 static void
334 micro_sin(union tgsi_exec_channel *dst,
335 const union tgsi_exec_channel *src)
336 {
337 dst->f[0] = sinf(src->f[0]);
338 dst->f[1] = sinf(src->f[1]);
339 dst->f[2] = sinf(src->f[2]);
340 dst->f[3] = sinf(src->f[3]);
341 }
342
343 static void
344 micro_sle(union tgsi_exec_channel *dst,
345 const union tgsi_exec_channel *src)
346 {
347 dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
348 dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
349 dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
350 dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
351 }
352
353 static void
354 micro_slt(union tgsi_exec_channel *dst,
355 const union tgsi_exec_channel *src)
356 {
357 dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
358 dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
359 dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
360 dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
361 }
362
363 static void
364 micro_sne(union tgsi_exec_channel *dst,
365 const union tgsi_exec_channel *src)
366 {
367 dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
368 dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
369 dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
370 dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
371 }
372
373 static void
374 micro_trunc(union tgsi_exec_channel *dst,
375 const union tgsi_exec_channel *src)
376 {
377 dst->f[0] = (float)(int)src->f[0];
378 dst->f[1] = (float)(int)src->f[1];
379 dst->f[2] = (float)(int)src->f[2];
380 dst->f[3] = (float)(int)src->f[3];
381 }
382
383
384 #define CHAN_X 0
385 #define CHAN_Y 1
386 #define CHAN_Z 2
387 #define CHAN_W 3
388
389 enum tgsi_exec_datatype {
390 TGSI_EXEC_DATA_FLOAT,
391 TGSI_EXEC_DATA_INT,
392 TGSI_EXEC_DATA_UINT
393 };
394
395 /*
396 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
397 */
398 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
399 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
400 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
401 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
402 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
403 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
404 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
405 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
406 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
407 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
408 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
409 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
410 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
411 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
412 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
413 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
414 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
415 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
416 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
417 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
418 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
419 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
420 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
421 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
422 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
423 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
424 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
425 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
426 #define TEMP_R0 TGSI_EXEC_TEMP_R0
427 #define TEMP_P0 TGSI_EXEC_TEMP_P0
428
429 #define IS_CHANNEL_ENABLED(INST, CHAN)\
430 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
431
432 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
433 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
434
435 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
436 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
437 if (IS_CHANNEL_ENABLED( INST, CHAN ))
438
439 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
440 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
441 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
442
443
444 /** The execution mask depends on the conditional mask and the loop mask */
445 #define UPDATE_EXEC_MASK(MACH) \
446 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
447
448
449 static const union tgsi_exec_channel ZeroVec =
450 { { 0.0, 0.0, 0.0, 0.0 } };
451
452
453 #define CHECK_INF_OR_NAN(chan) do {\
454 assert(!util_is_inf_or_nan((chan)->f[0]));\
455 assert(!util_is_inf_or_nan((chan)->f[1]));\
456 assert(!util_is_inf_or_nan((chan)->f[2]));\
457 assert(!util_is_inf_or_nan((chan)->f[3]));\
458 } while (0)
459
460
461 #ifdef DEBUG
462 static void
463 print_chan(const char *msg, const union tgsi_exec_channel *chan)
464 {
465 debug_printf("%s = {%f, %f, %f, %f}\n",
466 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
467 }
468 #endif
469
470
471 #ifdef DEBUG
472 static void
473 print_temp(const struct tgsi_exec_machine *mach, uint index)
474 {
475 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
476 int i;
477 debug_printf("Temp[%u] =\n", index);
478 for (i = 0; i < 4; i++) {
479 debug_printf(" %c: { %f, %f, %f, %f }\n",
480 "XYZW"[i],
481 tmp->xyzw[i].f[0],
482 tmp->xyzw[i].f[1],
483 tmp->xyzw[i].f[2],
484 tmp->xyzw[i].f[3]);
485 }
486 }
487 #endif
488
489
490 /**
491 * Check if there's a potential src/dst register data dependency when
492 * using SOA execution.
493 * Example:
494 * MOV T, T.yxwz;
495 * This would expand into:
496 * MOV t0, t1;
497 * MOV t1, t0;
498 * MOV t2, t3;
499 * MOV t3, t2;
500 * The second instruction will have the wrong value for t0 if executed as-is.
501 */
502 boolean
503 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
504 {
505 uint i, chan;
506
507 uint writemask = inst->Dst[0].Register.WriteMask;
508 if (writemask == TGSI_WRITEMASK_X ||
509 writemask == TGSI_WRITEMASK_Y ||
510 writemask == TGSI_WRITEMASK_Z ||
511 writemask == TGSI_WRITEMASK_W ||
512 writemask == TGSI_WRITEMASK_NONE) {
513 /* no chance of data dependency */
514 return FALSE;
515 }
516
517 /* loop over src regs */
518 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
519 if ((inst->Src[i].Register.File ==
520 inst->Dst[0].Register.File) &&
521 (inst->Src[i].Register.Index ==
522 inst->Dst[0].Register.Index)) {
523 /* loop over dest channels */
524 uint channelsWritten = 0x0;
525 FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
526 /* check if we're reading a channel that's been written */
527 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
528 if (channelsWritten & (1 << swizzle)) {
529 return TRUE;
530 }
531
532 channelsWritten |= (1 << chan);
533 }
534 }
535 }
536 return FALSE;
537 }
538
539
540 /**
541 * Initialize machine state by expanding tokens to full instructions,
542 * allocating temporary storage, setting up constants, etc.
543 * After this, we can call tgsi_exec_machine_run() many times.
544 */
545 void
546 tgsi_exec_machine_bind_shader(
547 struct tgsi_exec_machine *mach,
548 const struct tgsi_token *tokens,
549 uint numSamplers,
550 struct tgsi_sampler **samplers)
551 {
552 uint k;
553 struct tgsi_parse_context parse;
554 struct tgsi_exec_labels *labels = &mach->Labels;
555 struct tgsi_full_instruction *instructions;
556 struct tgsi_full_declaration *declarations;
557 uint maxInstructions = 10, numInstructions = 0;
558 uint maxDeclarations = 10, numDeclarations = 0;
559 uint instno = 0;
560
561 #if 0
562 tgsi_dump(tokens, 0);
563 #endif
564
565 util_init_math();
566
567 mach->Tokens = tokens;
568 mach->Samplers = samplers;
569
570 k = tgsi_parse_init (&parse, mach->Tokens);
571 if (k != TGSI_PARSE_OK) {
572 debug_printf( "Problem parsing!\n" );
573 return;
574 }
575
576 mach->Processor = parse.FullHeader.Processor.Processor;
577 mach->ImmLimit = 0;
578 labels->count = 0;
579
580 declarations = (struct tgsi_full_declaration *)
581 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
582
583 if (!declarations) {
584 return;
585 }
586
587 instructions = (struct tgsi_full_instruction *)
588 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
589
590 if (!instructions) {
591 FREE( declarations );
592 return;
593 }
594
595 while( !tgsi_parse_end_of_tokens( &parse ) ) {
596 uint pointer = parse.Position;
597 uint i;
598
599 tgsi_parse_token( &parse );
600 switch( parse.FullToken.Token.Type ) {
601 case TGSI_TOKEN_TYPE_DECLARATION:
602 /* save expanded declaration */
603 if (numDeclarations == maxDeclarations) {
604 declarations = REALLOC(declarations,
605 maxDeclarations
606 * sizeof(struct tgsi_full_declaration),
607 (maxDeclarations + 10)
608 * sizeof(struct tgsi_full_declaration));
609 maxDeclarations += 10;
610 }
611 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
612 unsigned reg;
613 for (reg = parse.FullToken.FullDeclaration.Range.First;
614 reg <= parse.FullToken.FullDeclaration.Range.Last;
615 ++reg) {
616 ++mach->NumOutputs;
617 }
618 }
619 memcpy(declarations + numDeclarations,
620 &parse.FullToken.FullDeclaration,
621 sizeof(declarations[0]));
622 numDeclarations++;
623 break;
624
625 case TGSI_TOKEN_TYPE_IMMEDIATE:
626 {
627 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
628 assert( size <= 4 );
629 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
630
631 for( i = 0; i < size; i++ ) {
632 mach->Imms[mach->ImmLimit][i] =
633 parse.FullToken.FullImmediate.u[i].Float;
634 }
635 mach->ImmLimit += 1;
636 }
637 break;
638
639 case TGSI_TOKEN_TYPE_INSTRUCTION:
640 assert( labels->count < MAX_LABELS );
641
642 labels->labels[labels->count][0] = instno;
643 labels->labels[labels->count][1] = pointer;
644 labels->count++;
645
646 /* save expanded instruction */
647 if (numInstructions == maxInstructions) {
648 instructions = REALLOC(instructions,
649 maxInstructions
650 * sizeof(struct tgsi_full_instruction),
651 (maxInstructions + 10)
652 * sizeof(struct tgsi_full_instruction));
653 maxInstructions += 10;
654 }
655
656 memcpy(instructions + numInstructions,
657 &parse.FullToken.FullInstruction,
658 sizeof(instructions[0]));
659
660 numInstructions++;
661 break;
662
663 case TGSI_TOKEN_TYPE_PROPERTY:
664 break;
665
666 default:
667 assert( 0 );
668 }
669 }
670 tgsi_parse_free (&parse);
671
672 if (mach->Declarations) {
673 FREE( mach->Declarations );
674 }
675 mach->Declarations = declarations;
676 mach->NumDeclarations = numDeclarations;
677
678 if (mach->Instructions) {
679 FREE( mach->Instructions );
680 }
681 mach->Instructions = instructions;
682 mach->NumInstructions = numInstructions;
683 }
684
685
686 struct tgsi_exec_machine *
687 tgsi_exec_machine_create( void )
688 {
689 struct tgsi_exec_machine *mach;
690 uint i;
691
692 mach = align_malloc( sizeof *mach, 16 );
693 if (!mach)
694 goto fail;
695
696 memset(mach, 0, sizeof(*mach));
697
698 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
699 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
700 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
701
702 /* Setup constants. */
703 for( i = 0; i < 4; i++ ) {
704 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
705 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
706 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
707 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
708 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
709 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
710 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
711 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
712 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
713 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
714 }
715
716 #ifdef DEBUG
717 /* silence warnings */
718 (void) print_chan;
719 (void) print_temp;
720 #endif
721
722 return mach;
723
724 fail:
725 align_free(mach);
726 return NULL;
727 }
728
729
730 void
731 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
732 {
733 if (mach) {
734 FREE(mach->Instructions);
735 FREE(mach->Declarations);
736 }
737
738 align_free(mach);
739 }
740
741 static void
742 micro_add(
743 union tgsi_exec_channel *dst,
744 const union tgsi_exec_channel *src0,
745 const union tgsi_exec_channel *src1 )
746 {
747 dst->f[0] = src0->f[0] + src1->f[0];
748 dst->f[1] = src0->f[1] + src1->f[1];
749 dst->f[2] = src0->f[2] + src1->f[2];
750 dst->f[3] = src0->f[3] + src1->f[3];
751 }
752
753 static void
754 micro_div(
755 union tgsi_exec_channel *dst,
756 const union tgsi_exec_channel *src0,
757 const union tgsi_exec_channel *src1 )
758 {
759 if (src1->f[0] != 0) {
760 dst->f[0] = src0->f[0] / src1->f[0];
761 }
762 if (src1->f[1] != 0) {
763 dst->f[1] = src0->f[1] / src1->f[1];
764 }
765 if (src1->f[2] != 0) {
766 dst->f[2] = src0->f[2] / src1->f[2];
767 }
768 if (src1->f[3] != 0) {
769 dst->f[3] = src0->f[3] / src1->f[3];
770 }
771 }
772
773 static void
774 micro_float_clamp(union tgsi_exec_channel *dst,
775 const union tgsi_exec_channel *src)
776 {
777 uint i;
778
779 for (i = 0; i < 4; i++) {
780 if (src->f[i] > 0.0f) {
781 if (src->f[i] > 1.884467e+019f)
782 dst->f[i] = 1.884467e+019f;
783 else if (src->f[i] < 5.42101e-020f)
784 dst->f[i] = 5.42101e-020f;
785 else
786 dst->f[i] = src->f[i];
787 }
788 else {
789 if (src->f[i] < -1.884467e+019f)
790 dst->f[i] = -1.884467e+019f;
791 else if (src->f[i] > -5.42101e-020f)
792 dst->f[i] = -5.42101e-020f;
793 else
794 dst->f[i] = src->f[i];
795 }
796 }
797 }
798
799 static void
800 micro_lt(
801 union tgsi_exec_channel *dst,
802 const union tgsi_exec_channel *src0,
803 const union tgsi_exec_channel *src1,
804 const union tgsi_exec_channel *src2,
805 const union tgsi_exec_channel *src3 )
806 {
807 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
808 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
809 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
810 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
811 }
812
813 static void
814 micro_max(
815 union tgsi_exec_channel *dst,
816 const union tgsi_exec_channel *src0,
817 const union tgsi_exec_channel *src1 )
818 {
819 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
820 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
821 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
822 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
823 }
824
825 static void
826 micro_min(
827 union tgsi_exec_channel *dst,
828 const union tgsi_exec_channel *src0,
829 const union tgsi_exec_channel *src1 )
830 {
831 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
832 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
833 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
834 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
835 }
836
837 static void
838 micro_mul(
839 union tgsi_exec_channel *dst,
840 const union tgsi_exec_channel *src0,
841 const union tgsi_exec_channel *src1 )
842 {
843 dst->f[0] = src0->f[0] * src1->f[0];
844 dst->f[1] = src0->f[1] * src1->f[1];
845 dst->f[2] = src0->f[2] * src1->f[2];
846 dst->f[3] = src0->f[3] * src1->f[3];
847 }
848
849 #if 0
850 static void
851 micro_imul64(
852 union tgsi_exec_channel *dst0,
853 union tgsi_exec_channel *dst1,
854 const union tgsi_exec_channel *src0,
855 const union tgsi_exec_channel *src1 )
856 {
857 dst1->i[0] = src0->i[0] * src1->i[0];
858 dst1->i[1] = src0->i[1] * src1->i[1];
859 dst1->i[2] = src0->i[2] * src1->i[2];
860 dst1->i[3] = src0->i[3] * src1->i[3];
861 dst0->i[0] = 0;
862 dst0->i[1] = 0;
863 dst0->i[2] = 0;
864 dst0->i[3] = 0;
865 }
866 #endif
867
868 #if 0
869 static void
870 micro_umul64(
871 union tgsi_exec_channel *dst0,
872 union tgsi_exec_channel *dst1,
873 const union tgsi_exec_channel *src0,
874 const union tgsi_exec_channel *src1 )
875 {
876 dst1->u[0] = src0->u[0] * src1->u[0];
877 dst1->u[1] = src0->u[1] * src1->u[1];
878 dst1->u[2] = src0->u[2] * src1->u[2];
879 dst1->u[3] = src0->u[3] * src1->u[3];
880 dst0->u[0] = 0;
881 dst0->u[1] = 0;
882 dst0->u[2] = 0;
883 dst0->u[3] = 0;
884 }
885 #endif
886
887
888 #if 0
889 static void
890 micro_movc(
891 union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src0,
893 const union tgsi_exec_channel *src1,
894 const union tgsi_exec_channel *src2 )
895 {
896 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
897 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
898 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
899 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
900 }
901 #endif
902
903 static void
904 micro_neg(
905 union tgsi_exec_channel *dst,
906 const union tgsi_exec_channel *src )
907 {
908 dst->f[0] = -src->f[0];
909 dst->f[1] = -src->f[1];
910 dst->f[2] = -src->f[2];
911 dst->f[3] = -src->f[3];
912 }
913
914 static void
915 micro_pow(
916 union tgsi_exec_channel *dst,
917 const union tgsi_exec_channel *src0,
918 const union tgsi_exec_channel *src1 )
919 {
920 #if FAST_MATH
921 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
922 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
923 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
924 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
925 #else
926 dst->f[0] = powf( src0->f[0], src1->f[0] );
927 dst->f[1] = powf( src0->f[1], src1->f[1] );
928 dst->f[2] = powf( src0->f[2], src1->f[2] );
929 dst->f[3] = powf( src0->f[3], src1->f[3] );
930 #endif
931 }
932
933 static void
934 micro_sqrt( union tgsi_exec_channel *dst,
935 const union tgsi_exec_channel *src )
936 {
937 dst->f[0] = sqrtf( src->f[0] );
938 dst->f[1] = sqrtf( src->f[1] );
939 dst->f[2] = sqrtf( src->f[2] );
940 dst->f[3] = sqrtf( src->f[3] );
941 }
942
943 static void
944 micro_sub(
945 union tgsi_exec_channel *dst,
946 const union tgsi_exec_channel *src0,
947 const union tgsi_exec_channel *src1 )
948 {
949 dst->f[0] = src0->f[0] - src1->f[0];
950 dst->f[1] = src0->f[1] - src1->f[1];
951 dst->f[2] = src0->f[2] - src1->f[2];
952 dst->f[3] = src0->f[3] - src1->f[3];
953 }
954
955 static void
956 fetch_src_file_channel(
957 const struct tgsi_exec_machine *mach,
958 const uint file,
959 const uint swizzle,
960 const union tgsi_exec_channel *index,
961 union tgsi_exec_channel *chan )
962 {
963 switch( swizzle ) {
964 case TGSI_SWIZZLE_X:
965 case TGSI_SWIZZLE_Y:
966 case TGSI_SWIZZLE_Z:
967 case TGSI_SWIZZLE_W:
968 switch( file ) {
969 case TGSI_FILE_CONSTANT:
970 assert(mach->Consts);
971 if (index->i[0] < 0)
972 chan->f[0] = 0.0f;
973 else
974 chan->f[0] = mach->Consts[index->i[0]][swizzle];
975 if (index->i[1] < 0)
976 chan->f[1] = 0.0f;
977 else
978 chan->f[1] = mach->Consts[index->i[1]][swizzle];
979 if (index->i[2] < 0)
980 chan->f[2] = 0.0f;
981 else
982 chan->f[2] = mach->Consts[index->i[2]][swizzle];
983 if (index->i[3] < 0)
984 chan->f[3] = 0.0f;
985 else
986 chan->f[3] = mach->Consts[index->i[3]][swizzle];
987 break;
988
989 case TGSI_FILE_INPUT:
990 case TGSI_FILE_SYSTEM_VALUE:
991 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
992 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
993 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
994 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
995 break;
996
997 case TGSI_FILE_TEMPORARY:
998 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
999 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1000 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1001 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1002 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1003 break;
1004
1005 case TGSI_FILE_IMMEDIATE:
1006 assert( index->i[0] < (int) mach->ImmLimit );
1007 chan->f[0] = mach->Imms[index->i[0]][swizzle];
1008 assert( index->i[1] < (int) mach->ImmLimit );
1009 chan->f[1] = mach->Imms[index->i[1]][swizzle];
1010 assert( index->i[2] < (int) mach->ImmLimit );
1011 chan->f[2] = mach->Imms[index->i[2]][swizzle];
1012 assert( index->i[3] < (int) mach->ImmLimit );
1013 chan->f[3] = mach->Imms[index->i[3]][swizzle];
1014 break;
1015
1016 case TGSI_FILE_ADDRESS:
1017 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1018 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1019 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1020 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1021 break;
1022
1023 case TGSI_FILE_PREDICATE:
1024 assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
1025 assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
1026 assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
1027 assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
1028 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
1029 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
1030 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
1031 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
1032 break;
1033
1034 case TGSI_FILE_OUTPUT:
1035 /* vertex/fragment output vars can be read too */
1036 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1037 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1038 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1039 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1040 break;
1041
1042 default:
1043 assert( 0 );
1044 chan->u[0] = 0;
1045 chan->u[1] = 0;
1046 chan->u[2] = 0;
1047 chan->u[3] = 0;
1048 }
1049 break;
1050
1051 default:
1052 assert( 0 );
1053 chan->u[0] = 0;
1054 chan->u[1] = 0;
1055 chan->u[2] = 0;
1056 chan->u[3] = 0;
1057 }
1058 }
1059
1060 static void
1061 fetch_source(const struct tgsi_exec_machine *mach,
1062 union tgsi_exec_channel *chan,
1063 const struct tgsi_full_src_register *reg,
1064 const uint chan_index,
1065 enum tgsi_exec_datatype src_datatype)
1066 {
1067 union tgsi_exec_channel index;
1068 uint swizzle;
1069
1070 /* We start with a direct index into a register file.
1071 *
1072 * file[1],
1073 * where:
1074 * file = Register.File
1075 * [1] = Register.Index
1076 */
1077 index.i[0] =
1078 index.i[1] =
1079 index.i[2] =
1080 index.i[3] = reg->Register.Index;
1081
1082 /* There is an extra source register that indirectly subscripts
1083 * a register file. The direct index now becomes an offset
1084 * that is being added to the indirect register.
1085 *
1086 * file[ind[2].x+1],
1087 * where:
1088 * ind = Indirect.File
1089 * [2] = Indirect.Index
1090 * .x = Indirect.SwizzleX
1091 */
1092 if (reg->Register.Indirect) {
1093 union tgsi_exec_channel index2;
1094 union tgsi_exec_channel indir_index;
1095 const uint execmask = mach->ExecMask;
1096 uint i;
1097
1098 /* which address register (always zero now) */
1099 index2.i[0] =
1100 index2.i[1] =
1101 index2.i[2] =
1102 index2.i[3] = reg->Indirect.Index;
1103
1104 /* get current value of address register[swizzle] */
1105 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1106 fetch_src_file_channel(
1107 mach,
1108 reg->Indirect.File,
1109 swizzle,
1110 &index2,
1111 &indir_index );
1112
1113 /* add value of address register to the offset */
1114 index.i[0] += indir_index.i[0];
1115 index.i[1] += indir_index.i[1];
1116 index.i[2] += indir_index.i[2];
1117 index.i[3] += indir_index.i[3];
1118
1119 /* for disabled execution channels, zero-out the index to
1120 * avoid using a potential garbage value.
1121 */
1122 for (i = 0; i < QUAD_SIZE; i++) {
1123 if ((execmask & (1 << i)) == 0)
1124 index.i[i] = 0;
1125 }
1126 }
1127
1128 /* There is an extra source register that is a second
1129 * subscript to a register file. Effectively it means that
1130 * the register file is actually a 2D array of registers.
1131 *
1132 * file[3][1] == file[3*sizeof(file[1])+1],
1133 * where:
1134 * [3] = Dimension.Index
1135 */
1136 if (reg->Register.Dimension) {
1137 int array_size;
1138 union tgsi_exec_channel dim_index;
1139
1140 /* The size of the first-order array depends on the register file type.
1141 * We need to multiply the index to the first array to get an effective,
1142 * "flat" index that points to the beginning of the second-order array.
1143 */
1144 switch (reg->Register.File) {
1145 case TGSI_FILE_INPUT:
1146 case TGSI_FILE_SYSTEM_VALUE:
1147 array_size = TGSI_EXEC_MAX_INPUT_ATTRIBS;
1148 break;
1149 case TGSI_FILE_CONSTANT:
1150 array_size = TGSI_EXEC_MAX_CONST_BUFFER;
1151 break;
1152 default:
1153 assert( 0 );
1154 array_size = 0;
1155 }
1156
1157 dim_index.i[0] =
1158 dim_index.i[1] =
1159 dim_index.i[2] =
1160 dim_index.i[3] = reg->Dimension.Index;
1161
1162 /* Again, the second subscript index can be addressed indirectly
1163 * identically to the first one.
1164 * Nothing stops us from indirectly addressing the indirect register,
1165 * but there is no need for that, so we won't exercise it.
1166 *
1167 * file[ind[4].y+3][1],
1168 * where:
1169 * ind = DimIndirect.File
1170 * [4] = DimIndirect.Index
1171 * .y = DimIndirect.SwizzleX
1172 */
1173 if (reg->Dimension.Indirect) {
1174 union tgsi_exec_channel index2;
1175 union tgsi_exec_channel indir_index;
1176 const uint execmask = mach->ExecMask;
1177 uint i;
1178
1179 index2.i[0] =
1180 index2.i[1] =
1181 index2.i[2] =
1182 index2.i[3] = reg->DimIndirect.Index;
1183
1184 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1185 fetch_src_file_channel(
1186 mach,
1187 reg->DimIndirect.File,
1188 swizzle,
1189 &index2,
1190 &indir_index );
1191
1192 dim_index.i[0] += indir_index.i[0];
1193 dim_index.i[1] += indir_index.i[1];
1194 dim_index.i[2] += indir_index.i[2];
1195 dim_index.i[3] += indir_index.i[3];
1196
1197 /* for disabled execution channels, zero-out the index to
1198 * avoid using a potential garbage value.
1199 */
1200 for (i = 0; i < QUAD_SIZE; i++) {
1201 if ((execmask & (1 << i)) == 0)
1202 dim_index.i[i] = 0;
1203 }
1204 }
1205
1206 index.i[0] += dim_index.i[0] * array_size;
1207 index.i[1] += dim_index.i[1] * array_size;
1208 index.i[2] += dim_index.i[2] * array_size;
1209 index.i[3] += dim_index.i[3] * array_size;
1210
1211 /* If by any chance there was a need for a 3D array of register
1212 * files, we would have to check whether Dimension is followed
1213 * by a dimension register and continue the saga.
1214 */
1215 }
1216
1217 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1218 fetch_src_file_channel(
1219 mach,
1220 reg->Register.File,
1221 swizzle,
1222 &index,
1223 chan );
1224
1225 if (reg->Register.Absolute) {
1226 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1227 micro_abs(chan, chan);
1228 } else {
1229 micro_iabs(chan, chan);
1230 }
1231 }
1232
1233 if (reg->Register.Negate) {
1234 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1235 micro_neg(chan, chan);
1236 } else {
1237 micro_ineg(chan, chan);
1238 }
1239 }
1240 }
1241
1242 static void
1243 store_dest(struct tgsi_exec_machine *mach,
1244 const union tgsi_exec_channel *chan,
1245 const struct tgsi_full_dst_register *reg,
1246 const struct tgsi_full_instruction *inst,
1247 uint chan_index,
1248 enum tgsi_exec_datatype dst_datatype)
1249 {
1250 uint i;
1251 union tgsi_exec_channel null;
1252 union tgsi_exec_channel *dst;
1253 uint execmask = mach->ExecMask;
1254 int offset = 0; /* indirection offset */
1255 int index;
1256
1257 if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1258 CHECK_INF_OR_NAN(chan);
1259 }
1260
1261 /* There is an extra source register that indirectly subscripts
1262 * a register file. The direct index now becomes an offset
1263 * that is being added to the indirect register.
1264 *
1265 * file[ind[2].x+1],
1266 * where:
1267 * ind = Indirect.File
1268 * [2] = Indirect.Index
1269 * .x = Indirect.SwizzleX
1270 */
1271 if (reg->Register.Indirect) {
1272 union tgsi_exec_channel index;
1273 union tgsi_exec_channel indir_index;
1274 uint swizzle;
1275
1276 /* which address register (always zero for now) */
1277 index.i[0] =
1278 index.i[1] =
1279 index.i[2] =
1280 index.i[3] = reg->Indirect.Index;
1281
1282 /* get current value of address register[swizzle] */
1283 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1284
1285 /* fetch values from the address/indirection register */
1286 fetch_src_file_channel(
1287 mach,
1288 reg->Indirect.File,
1289 swizzle,
1290 &index,
1291 &indir_index );
1292
1293 /* save indirection offset */
1294 offset = indir_index.i[0];
1295 }
1296
1297 switch (reg->Register.File) {
1298 case TGSI_FILE_NULL:
1299 dst = &null;
1300 break;
1301
1302 case TGSI_FILE_OUTPUT:
1303 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1304 + reg->Register.Index;
1305 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1306 #if 0
1307 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1308 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1309 for (i = 0; i < QUAD_SIZE; i++)
1310 if (execmask & (1 << i))
1311 fprintf(stderr, "%f, ", chan->f[i]);
1312 fprintf(stderr, ")\n");
1313 }
1314 #endif
1315 break;
1316
1317 case TGSI_FILE_TEMPORARY:
1318 index = reg->Register.Index;
1319 assert( index < TGSI_EXEC_NUM_TEMPS );
1320 dst = &mach->Temps[offset + index].xyzw[chan_index];
1321 break;
1322
1323 case TGSI_FILE_ADDRESS:
1324 index = reg->Register.Index;
1325 dst = &mach->Addrs[index].xyzw[chan_index];
1326 break;
1327
1328 case TGSI_FILE_LOOP:
1329 assert(reg->Register.Index == 0);
1330 assert(mach->LoopCounterStackTop > 0);
1331 assert(chan_index == CHAN_X);
1332 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
1333 break;
1334
1335 case TGSI_FILE_PREDICATE:
1336 index = reg->Register.Index;
1337 assert(index < TGSI_EXEC_NUM_PREDS);
1338 dst = &mach->Predicates[index].xyzw[chan_index];
1339 break;
1340
1341 default:
1342 assert( 0 );
1343 return;
1344 }
1345
1346 if (inst->Instruction.Predicate) {
1347 uint swizzle;
1348 union tgsi_exec_channel *pred;
1349
1350 switch (chan_index) {
1351 case CHAN_X:
1352 swizzle = inst->Predicate.SwizzleX;
1353 break;
1354 case CHAN_Y:
1355 swizzle = inst->Predicate.SwizzleY;
1356 break;
1357 case CHAN_Z:
1358 swizzle = inst->Predicate.SwizzleZ;
1359 break;
1360 case CHAN_W:
1361 swizzle = inst->Predicate.SwizzleW;
1362 break;
1363 default:
1364 assert(0);
1365 return;
1366 }
1367
1368 assert(inst->Predicate.Index == 0);
1369
1370 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1371
1372 if (inst->Predicate.Negate) {
1373 for (i = 0; i < QUAD_SIZE; i++) {
1374 if (pred->u[i]) {
1375 execmask &= ~(1 << i);
1376 }
1377 }
1378 } else {
1379 for (i = 0; i < QUAD_SIZE; i++) {
1380 if (!pred->u[i]) {
1381 execmask &= ~(1 << i);
1382 }
1383 }
1384 }
1385 }
1386
1387 switch (inst->Instruction.Saturate) {
1388 case TGSI_SAT_NONE:
1389 for (i = 0; i < QUAD_SIZE; i++)
1390 if (execmask & (1 << i))
1391 dst->i[i] = chan->i[i];
1392 break;
1393
1394 case TGSI_SAT_ZERO_ONE:
1395 for (i = 0; i < QUAD_SIZE; i++)
1396 if (execmask & (1 << i)) {
1397 if (chan->f[i] < 0.0f)
1398 dst->f[i] = 0.0f;
1399 else if (chan->f[i] > 1.0f)
1400 dst->f[i] = 1.0f;
1401 else
1402 dst->i[i] = chan->i[i];
1403 }
1404 break;
1405
1406 case TGSI_SAT_MINUS_PLUS_ONE:
1407 for (i = 0; i < QUAD_SIZE; i++)
1408 if (execmask & (1 << i)) {
1409 if (chan->f[i] < -1.0f)
1410 dst->f[i] = -1.0f;
1411 else if (chan->f[i] > 1.0f)
1412 dst->f[i] = 1.0f;
1413 else
1414 dst->i[i] = chan->i[i];
1415 }
1416 break;
1417
1418 default:
1419 assert( 0 );
1420 }
1421 }
1422
1423 #define FETCH(VAL,INDEX,CHAN)\
1424 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1425
1426 #define STORE(VAL,INDEX,CHAN)\
1427 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT)
1428
1429
1430 /**
1431 * Execute ARB-style KIL which is predicated by a src register.
1432 * Kill fragment if any of the four values is less than zero.
1433 */
1434 static void
1435 exec_kil(struct tgsi_exec_machine *mach,
1436 const struct tgsi_full_instruction *inst)
1437 {
1438 uint uniquemask;
1439 uint chan_index;
1440 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1441 union tgsi_exec_channel r[1];
1442
1443 /* This mask stores component bits that were already tested. */
1444 uniquemask = 0;
1445
1446 for (chan_index = 0; chan_index < 4; chan_index++)
1447 {
1448 uint swizzle;
1449 uint i;
1450
1451 /* unswizzle channel */
1452 swizzle = tgsi_util_get_full_src_register_swizzle (
1453 &inst->Src[0],
1454 chan_index);
1455
1456 /* check if the component has not been already tested */
1457 if (uniquemask & (1 << swizzle))
1458 continue;
1459 uniquemask |= 1 << swizzle;
1460
1461 FETCH(&r[0], 0, chan_index);
1462 for (i = 0; i < 4; i++)
1463 if (r[0].f[i] < 0.0f)
1464 kilmask |= 1 << i;
1465 }
1466
1467 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1468 }
1469
1470 /**
1471 * Execute NVIDIA-style KIL which is predicated by a condition code.
1472 * Kill fragment if the condition code is TRUE.
1473 */
1474 static void
1475 exec_kilp(struct tgsi_exec_machine *mach,
1476 const struct tgsi_full_instruction *inst)
1477 {
1478 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1479
1480 /* "unconditional" kil */
1481 kilmask = mach->ExecMask;
1482 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1483 }
1484
1485 static void
1486 emit_vertex(struct tgsi_exec_machine *mach)
1487 {
1488 /* FIXME: check for exec mask correctly
1489 unsigned i;
1490 for (i = 0; i < QUAD_SIZE; ++i) {
1491 if ((mach->ExecMask & (1 << i)))
1492 */
1493 if (mach->ExecMask) {
1494 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1495 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1496 }
1497 }
1498
1499 static void
1500 emit_primitive(struct tgsi_exec_machine *mach)
1501 {
1502 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1503 /* FIXME: check for exec mask correctly
1504 unsigned i;
1505 for (i = 0; i < QUAD_SIZE; ++i) {
1506 if ((mach->ExecMask & (1 << i)))
1507 */
1508 if (mach->ExecMask) {
1509 ++(*prim_count);
1510 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1511 mach->Primitives[*prim_count] = 0;
1512 }
1513 }
1514
1515 /*
1516 * Fetch a four texture samples using STR texture coordinates.
1517 */
1518 static void
1519 fetch_texel( struct tgsi_sampler *sampler,
1520 const union tgsi_exec_channel *s,
1521 const union tgsi_exec_channel *t,
1522 const union tgsi_exec_channel *p,
1523 const union tgsi_exec_channel *c0,
1524 enum tgsi_sampler_control control,
1525 union tgsi_exec_channel *r,
1526 union tgsi_exec_channel *g,
1527 union tgsi_exec_channel *b,
1528 union tgsi_exec_channel *a )
1529 {
1530 uint j;
1531 float rgba[NUM_CHANNELS][QUAD_SIZE];
1532
1533 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1534
1535 for (j = 0; j < 4; j++) {
1536 r->f[j] = rgba[0][j];
1537 g->f[j] = rgba[1][j];
1538 b->f[j] = rgba[2][j];
1539 a->f[j] = rgba[3][j];
1540 }
1541 }
1542
1543
1544 #define TEX_MODIFIER_NONE 0
1545 #define TEX_MODIFIER_PROJECTED 1
1546 #define TEX_MODIFIER_LOD_BIAS 2
1547 #define TEX_MODIFIER_EXPLICIT_LOD 3
1548
1549
1550 static void
1551 exec_tex(struct tgsi_exec_machine *mach,
1552 const struct tgsi_full_instruction *inst,
1553 uint modifier)
1554 {
1555 const uint unit = inst->Src[1].Register.Index;
1556 union tgsi_exec_channel r[4];
1557 const union tgsi_exec_channel *lod = &ZeroVec;
1558 enum tgsi_sampler_control control;
1559 uint chan_index;
1560
1561 if (modifier != TEX_MODIFIER_NONE) {
1562 FETCH(&r[3], 0, CHAN_W);
1563 if (modifier != TEX_MODIFIER_PROJECTED) {
1564 lod = &r[3];
1565 }
1566 }
1567
1568 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1569 control = tgsi_sampler_lod_explicit;
1570 } else {
1571 control = tgsi_sampler_lod_bias;
1572 }
1573
1574 switch (inst->Texture.Texture) {
1575 case TGSI_TEXTURE_1D:
1576 case TGSI_TEXTURE_SHADOW1D:
1577 FETCH(&r[0], 0, CHAN_X);
1578
1579 if (modifier == TEX_MODIFIER_PROJECTED) {
1580 micro_div(&r[0], &r[0], &r[3]);
1581 }
1582
1583 fetch_texel(mach->Samplers[unit],
1584 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1585 control,
1586 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1587 break;
1588
1589 case TGSI_TEXTURE_2D:
1590 case TGSI_TEXTURE_RECT:
1591 case TGSI_TEXTURE_SHADOW2D:
1592 case TGSI_TEXTURE_SHADOWRECT:
1593 FETCH(&r[0], 0, CHAN_X);
1594 FETCH(&r[1], 0, CHAN_Y);
1595 FETCH(&r[2], 0, CHAN_Z);
1596
1597 if (modifier == TEX_MODIFIER_PROJECTED) {
1598 micro_div(&r[0], &r[0], &r[3]);
1599 micro_div(&r[1], &r[1], &r[3]);
1600 micro_div(&r[2], &r[2], &r[3]);
1601 }
1602
1603 fetch_texel(mach->Samplers[unit],
1604 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1605 control,
1606 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1607 break;
1608
1609 case TGSI_TEXTURE_3D:
1610 case TGSI_TEXTURE_CUBE:
1611 FETCH(&r[0], 0, CHAN_X);
1612 FETCH(&r[1], 0, CHAN_Y);
1613 FETCH(&r[2], 0, CHAN_Z);
1614
1615 if (modifier == TEX_MODIFIER_PROJECTED) {
1616 micro_div(&r[0], &r[0], &r[3]);
1617 micro_div(&r[1], &r[1], &r[3]);
1618 micro_div(&r[2], &r[2], &r[3]);
1619 }
1620
1621 fetch_texel(mach->Samplers[unit],
1622 &r[0], &r[1], &r[2], lod,
1623 control,
1624 &r[0], &r[1], &r[2], &r[3]);
1625 break;
1626
1627 default:
1628 assert(0);
1629 }
1630
1631 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1632 STORE(&r[chan_index], 0, chan_index);
1633 }
1634 }
1635
1636 static void
1637 exec_txd(struct tgsi_exec_machine *mach,
1638 const struct tgsi_full_instruction *inst)
1639 {
1640 const uint unit = inst->Src[3].Register.Index;
1641 union tgsi_exec_channel r[4];
1642 uint chan_index;
1643
1644 /*
1645 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1646 */
1647
1648 switch (inst->Texture.Texture) {
1649 case TGSI_TEXTURE_1D:
1650 case TGSI_TEXTURE_SHADOW1D:
1651
1652 FETCH(&r[0], 0, CHAN_X);
1653
1654 fetch_texel(mach->Samplers[unit],
1655 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1656 tgsi_sampler_lod_bias,
1657 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1658 break;
1659
1660 case TGSI_TEXTURE_2D:
1661 case TGSI_TEXTURE_RECT:
1662 case TGSI_TEXTURE_SHADOW2D:
1663 case TGSI_TEXTURE_SHADOWRECT:
1664
1665 FETCH(&r[0], 0, CHAN_X);
1666 FETCH(&r[1], 0, CHAN_Y);
1667 FETCH(&r[2], 0, CHAN_Z);
1668
1669 fetch_texel(mach->Samplers[unit],
1670 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1671 tgsi_sampler_lod_bias,
1672 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1673 break;
1674
1675 case TGSI_TEXTURE_3D:
1676 case TGSI_TEXTURE_CUBE:
1677
1678 FETCH(&r[0], 0, CHAN_X);
1679 FETCH(&r[1], 0, CHAN_Y);
1680 FETCH(&r[2], 0, CHAN_Z);
1681
1682 fetch_texel(mach->Samplers[unit],
1683 &r[0], &r[1], &r[2], &ZeroVec,
1684 tgsi_sampler_lod_bias,
1685 &r[0], &r[1], &r[2], &r[3]);
1686 break;
1687
1688 default:
1689 assert(0);
1690 }
1691
1692 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
1693 STORE(&r[chan_index], 0, chan_index);
1694 }
1695 }
1696
1697
1698 /**
1699 * Evaluate a constant-valued coefficient at the position of the
1700 * current quad.
1701 */
1702 static void
1703 eval_constant_coef(
1704 struct tgsi_exec_machine *mach,
1705 unsigned attrib,
1706 unsigned chan )
1707 {
1708 unsigned i;
1709
1710 for( i = 0; i < QUAD_SIZE; i++ ) {
1711 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1712 }
1713 }
1714
1715 /**
1716 * Evaluate a linear-valued coefficient at the position of the
1717 * current quad.
1718 */
1719 static void
1720 eval_linear_coef(
1721 struct tgsi_exec_machine *mach,
1722 unsigned attrib,
1723 unsigned chan )
1724 {
1725 const float x = mach->QuadPos.xyzw[0].f[0];
1726 const float y = mach->QuadPos.xyzw[1].f[0];
1727 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1728 const float dady = mach->InterpCoefs[attrib].dady[chan];
1729 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1730 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1731 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1732 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1733 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1734 }
1735
1736 /**
1737 * Evaluate a perspective-valued coefficient at the position of the
1738 * current quad.
1739 */
1740 static void
1741 eval_perspective_coef(
1742 struct tgsi_exec_machine *mach,
1743 unsigned attrib,
1744 unsigned chan )
1745 {
1746 const float x = mach->QuadPos.xyzw[0].f[0];
1747 const float y = mach->QuadPos.xyzw[1].f[0];
1748 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1749 const float dady = mach->InterpCoefs[attrib].dady[chan];
1750 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1751 const float *w = mach->QuadPos.xyzw[3].f;
1752 /* divide by W here */
1753 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1754 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1755 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1756 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1757 }
1758
1759
1760 typedef void (* eval_coef_func)(
1761 struct tgsi_exec_machine *mach,
1762 unsigned attrib,
1763 unsigned chan );
1764
1765 static void
1766 exec_declaration(struct tgsi_exec_machine *mach,
1767 const struct tgsi_full_declaration *decl)
1768 {
1769 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1770 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1771 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1772 uint first, last, mask;
1773
1774 first = decl->Range.First;
1775 last = decl->Range.Last;
1776 mask = decl->Declaration.UsageMask;
1777
1778 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
1779 assert(decl->Semantic.Index == 0);
1780 assert(first == last);
1781 assert(mask == TGSI_WRITEMASK_XYZW);
1782
1783 mach->Inputs[first] = mach->QuadPos;
1784 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1785 uint i;
1786
1787 assert(decl->Semantic.Index == 0);
1788 assert(first == last);
1789
1790 for (i = 0; i < QUAD_SIZE; i++) {
1791 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1792 }
1793 } else {
1794 eval_coef_func eval;
1795 uint i, j;
1796
1797 switch (decl->Declaration.Interpolate) {
1798 case TGSI_INTERPOLATE_CONSTANT:
1799 eval = eval_constant_coef;
1800 break;
1801
1802 case TGSI_INTERPOLATE_LINEAR:
1803 eval = eval_linear_coef;
1804 break;
1805
1806 case TGSI_INTERPOLATE_PERSPECTIVE:
1807 eval = eval_perspective_coef;
1808 break;
1809
1810 default:
1811 assert(0);
1812 return;
1813 }
1814
1815 for (j = 0; j < NUM_CHANNELS; j++) {
1816 if (mask & (1 << j)) {
1817 for (i = first; i <= last; i++) {
1818 eval(mach, i, j);
1819 }
1820 }
1821 }
1822 }
1823 }
1824 }
1825 }
1826
1827 typedef void (* micro_op)(union tgsi_exec_channel *dst,
1828 const union tgsi_exec_channel *src);
1829
1830 static void
1831 exec_scalar_unary(struct tgsi_exec_machine *mach,
1832 const struct tgsi_full_instruction *inst,
1833 micro_op op,
1834 enum tgsi_exec_datatype dst_datatype,
1835 enum tgsi_exec_datatype src_datatype)
1836 {
1837 unsigned int chan;
1838 union tgsi_exec_channel src;
1839 union tgsi_exec_channel dst;
1840
1841 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
1842 op(&dst, &src);
1843 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1844 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1845 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
1846 }
1847 }
1848 }
1849
1850 static void
1851 exec_vector_unary(struct tgsi_exec_machine *mach,
1852 const struct tgsi_full_instruction *inst,
1853 micro_op op,
1854 enum tgsi_exec_datatype dst_datatype,
1855 enum tgsi_exec_datatype src_datatype)
1856 {
1857 unsigned int chan;
1858 struct tgsi_exec_vector dst;
1859
1860 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1861 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1862 union tgsi_exec_channel src;
1863
1864 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
1865 op(&dst.xyzw[chan], &src);
1866 }
1867 }
1868 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1869 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1870 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1871 }
1872 }
1873 }
1874
1875 static void
1876 exec_vector_binary(struct tgsi_exec_machine *mach,
1877 const struct tgsi_full_instruction *inst,
1878 micro_op op,
1879 enum tgsi_exec_datatype dst_datatype,
1880 enum tgsi_exec_datatype src_datatype)
1881 {
1882 unsigned int chan;
1883 struct tgsi_exec_vector dst;
1884
1885 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1886 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1887 union tgsi_exec_channel src[2];
1888
1889 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1890 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1891 op(&dst.xyzw[chan], src);
1892 }
1893 }
1894 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1895 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1896 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1897 }
1898 }
1899 }
1900
1901 static void
1902 exec_vector_trinary(struct tgsi_exec_machine *mach,
1903 const struct tgsi_full_instruction *inst,
1904 micro_op op,
1905 enum tgsi_exec_datatype dst_datatype,
1906 enum tgsi_exec_datatype src_datatype)
1907 {
1908 unsigned int chan;
1909 struct tgsi_exec_vector dst;
1910
1911 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1912 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1913 union tgsi_exec_channel src[3];
1914
1915 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
1916 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
1917 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
1918 op(&dst.xyzw[chan], src);
1919 }
1920 }
1921 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1922 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1923 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
1924 }
1925 }
1926 }
1927
1928 static void
1929 exec_dp3(struct tgsi_exec_machine *mach,
1930 const struct tgsi_full_instruction *inst)
1931 {
1932 unsigned int chan;
1933 union tgsi_exec_channel arg[3];
1934
1935 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1936 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1937 micro_mul(&arg[2], &arg[0], &arg[1]);
1938
1939 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
1940 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1941 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1942 micro_mad(&arg[2], arg);
1943 }
1944
1945 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1946 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1947 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1948 }
1949 }
1950 }
1951
1952 static void
1953 exec_dp4(struct tgsi_exec_machine *mach,
1954 const struct tgsi_full_instruction *inst)
1955 {
1956 unsigned int chan;
1957 union tgsi_exec_channel arg[3];
1958
1959 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1960 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1961 micro_mul(&arg[2], &arg[0], &arg[1]);
1962
1963 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
1964 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
1965 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
1966 micro_mad(&arg[2], arg);
1967 }
1968
1969 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1970 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1971 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1972 }
1973 }
1974 }
1975
1976 static void
1977 exec_dp2a(struct tgsi_exec_machine *mach,
1978 const struct tgsi_full_instruction *inst)
1979 {
1980 unsigned int chan;
1981 union tgsi_exec_channel arg[3];
1982
1983 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1984 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1985 micro_mul(&arg[2], &arg[0], &arg[1]);
1986
1987 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1988 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
1989 micro_mad(&arg[0], arg);
1990
1991 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
1992 micro_add(&arg[0], &arg[0], &arg[1]);
1993
1994 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1995 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1996 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1997 }
1998 }
1999 }
2000
2001 static void
2002 exec_dph(struct tgsi_exec_machine *mach,
2003 const struct tgsi_full_instruction *inst)
2004 {
2005 unsigned int chan;
2006 union tgsi_exec_channel arg[3];
2007
2008 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2009 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2010 micro_mul(&arg[2], &arg[0], &arg[1]);
2011
2012 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2013 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2014 micro_mad(&arg[2], arg);
2015
2016 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2017 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2018 micro_mad(&arg[0], arg);
2019
2020 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2021 micro_add(&arg[0], &arg[0], &arg[1]);
2022
2023 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2024 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2025 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2026 }
2027 }
2028 }
2029
2030 static void
2031 exec_dp2(struct tgsi_exec_machine *mach,
2032 const struct tgsi_full_instruction *inst)
2033 {
2034 unsigned int chan;
2035 union tgsi_exec_channel arg[3];
2036
2037 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2038 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2039 micro_mul(&arg[2], &arg[0], &arg[1]);
2040
2041 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2042 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2043 micro_mad(&arg[2], arg);
2044
2045 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2046 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2047 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2048 }
2049 }
2050 }
2051
2052 static void
2053 exec_break(struct tgsi_exec_machine *mach)
2054 {
2055 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
2056 /* turn off loop channels for each enabled exec channel */
2057 mach->LoopMask &= ~mach->ExecMask;
2058 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2059 UPDATE_EXEC_MASK(mach);
2060 } else {
2061 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
2062
2063 mach->Switch.mask = 0x0;
2064
2065 UPDATE_EXEC_MASK(mach);
2066 }
2067 }
2068
2069 static void
2070 exec_switch(struct tgsi_exec_machine *mach,
2071 const struct tgsi_full_instruction *inst)
2072 {
2073 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2074 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2075
2076 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2077 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2078 mach->Switch.mask = 0x0;
2079 mach->Switch.defaultMask = 0x0;
2080
2081 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2082 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
2083
2084 UPDATE_EXEC_MASK(mach);
2085 }
2086
2087 static void
2088 exec_case(struct tgsi_exec_machine *mach,
2089 const struct tgsi_full_instruction *inst)
2090 {
2091 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2092 union tgsi_exec_channel src;
2093 uint mask = 0;
2094
2095 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2096
2097 if (mach->Switch.selector.u[0] == src.u[0]) {
2098 mask |= 0x1;
2099 }
2100 if (mach->Switch.selector.u[1] == src.u[1]) {
2101 mask |= 0x2;
2102 }
2103 if (mach->Switch.selector.u[2] == src.u[2]) {
2104 mask |= 0x4;
2105 }
2106 if (mach->Switch.selector.u[3] == src.u[3]) {
2107 mask |= 0x8;
2108 }
2109
2110 mach->Switch.defaultMask |= mask;
2111
2112 mach->Switch.mask |= mask & prevMask;
2113
2114 UPDATE_EXEC_MASK(mach);
2115 }
2116
2117 static void
2118 exec_default(struct tgsi_exec_machine *mach)
2119 {
2120 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2121
2122 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
2123
2124 UPDATE_EXEC_MASK(mach);
2125 }
2126
2127 static void
2128 exec_endswitch(struct tgsi_exec_machine *mach)
2129 {
2130 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
2131 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
2132
2133 UPDATE_EXEC_MASK(mach);
2134 }
2135
2136 static void
2137 micro_i2f(union tgsi_exec_channel *dst,
2138 const union tgsi_exec_channel *src)
2139 {
2140 dst->f[0] = (float)src->i[0];
2141 dst->f[1] = (float)src->i[1];
2142 dst->f[2] = (float)src->i[2];
2143 dst->f[3] = (float)src->i[3];
2144 }
2145
2146 static void
2147 micro_not(union tgsi_exec_channel *dst,
2148 const union tgsi_exec_channel *src)
2149 {
2150 dst->u[0] = ~src->u[0];
2151 dst->u[1] = ~src->u[1];
2152 dst->u[2] = ~src->u[2];
2153 dst->u[3] = ~src->u[3];
2154 }
2155
2156 static void
2157 micro_shl(union tgsi_exec_channel *dst,
2158 const union tgsi_exec_channel *src)
2159 {
2160 dst->u[0] = src[0].u[0] << src[1].u[0];
2161 dst->u[1] = src[0].u[1] << src[1].u[1];
2162 dst->u[2] = src[0].u[2] << src[1].u[2];
2163 dst->u[3] = src[0].u[3] << src[1].u[3];
2164 }
2165
2166 static void
2167 micro_and(union tgsi_exec_channel *dst,
2168 const union tgsi_exec_channel *src)
2169 {
2170 dst->u[0] = src[0].u[0] & src[1].u[0];
2171 dst->u[1] = src[0].u[1] & src[1].u[1];
2172 dst->u[2] = src[0].u[2] & src[1].u[2];
2173 dst->u[3] = src[0].u[3] & src[1].u[3];
2174 }
2175
2176 static void
2177 micro_or(union tgsi_exec_channel *dst,
2178 const union tgsi_exec_channel *src)
2179 {
2180 dst->u[0] = src[0].u[0] | src[1].u[0];
2181 dst->u[1] = src[0].u[1] | src[1].u[1];
2182 dst->u[2] = src[0].u[2] | src[1].u[2];
2183 dst->u[3] = src[0].u[3] | src[1].u[3];
2184 }
2185
2186 static void
2187 micro_xor(union tgsi_exec_channel *dst,
2188 const union tgsi_exec_channel *src)
2189 {
2190 dst->u[0] = src[0].u[0] ^ src[1].u[0];
2191 dst->u[1] = src[0].u[1] ^ src[1].u[1];
2192 dst->u[2] = src[0].u[2] ^ src[1].u[2];
2193 dst->u[3] = src[0].u[3] ^ src[1].u[3];
2194 }
2195
2196 static void
2197 micro_f2i(union tgsi_exec_channel *dst,
2198 const union tgsi_exec_channel *src)
2199 {
2200 dst->i[0] = (int)src->f[0];
2201 dst->i[1] = (int)src->f[1];
2202 dst->i[2] = (int)src->f[2];
2203 dst->i[3] = (int)src->f[3];
2204 }
2205
2206 static void
2207 micro_idiv(union tgsi_exec_channel *dst,
2208 const union tgsi_exec_channel *src)
2209 {
2210 dst->i[0] = src[0].i[0] / src[1].i[0];
2211 dst->i[1] = src[0].i[1] / src[1].i[1];
2212 dst->i[2] = src[0].i[2] / src[1].i[2];
2213 dst->i[3] = src[0].i[3] / src[1].i[3];
2214 }
2215
2216 static void
2217 micro_imax(union tgsi_exec_channel *dst,
2218 const union tgsi_exec_channel *src)
2219 {
2220 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
2221 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
2222 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
2223 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
2224 }
2225
2226 static void
2227 micro_imin(union tgsi_exec_channel *dst,
2228 const union tgsi_exec_channel *src)
2229 {
2230 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
2231 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
2232 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
2233 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
2234 }
2235
2236 static void
2237 micro_isge(union tgsi_exec_channel *dst,
2238 const union tgsi_exec_channel *src)
2239 {
2240 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
2241 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
2242 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
2243 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
2244 }
2245
2246 static void
2247 micro_ishr(union tgsi_exec_channel *dst,
2248 const union tgsi_exec_channel *src)
2249 {
2250 dst->i[0] = src[0].i[0] >> src[1].i[0];
2251 dst->i[1] = src[0].i[1] >> src[1].i[1];
2252 dst->i[2] = src[0].i[2] >> src[1].i[2];
2253 dst->i[3] = src[0].i[3] >> src[1].i[3];
2254 }
2255
2256 static void
2257 micro_islt(union tgsi_exec_channel *dst,
2258 const union tgsi_exec_channel *src)
2259 {
2260 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
2261 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
2262 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
2263 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
2264 }
2265
2266 static void
2267 micro_f2u(union tgsi_exec_channel *dst,
2268 const union tgsi_exec_channel *src)
2269 {
2270 dst->u[0] = (uint)src->f[0];
2271 dst->u[1] = (uint)src->f[1];
2272 dst->u[2] = (uint)src->f[2];
2273 dst->u[3] = (uint)src->f[3];
2274 }
2275
2276 static void
2277 micro_u2f(union tgsi_exec_channel *dst,
2278 const union tgsi_exec_channel *src)
2279 {
2280 dst->f[0] = (float)src->u[0];
2281 dst->f[1] = (float)src->u[1];
2282 dst->f[2] = (float)src->u[2];
2283 dst->f[3] = (float)src->u[3];
2284 }
2285
2286 static void
2287 micro_uadd(union tgsi_exec_channel *dst,
2288 const union tgsi_exec_channel *src)
2289 {
2290 dst->u[0] = src[0].u[0] + src[1].u[0];
2291 dst->u[1] = src[0].u[1] + src[1].u[1];
2292 dst->u[2] = src[0].u[2] + src[1].u[2];
2293 dst->u[3] = src[0].u[3] + src[1].u[3];
2294 }
2295
2296 static void
2297 micro_udiv(union tgsi_exec_channel *dst,
2298 const union tgsi_exec_channel *src)
2299 {
2300 dst->u[0] = src[0].u[0] / src[1].u[0];
2301 dst->u[1] = src[0].u[1] / src[1].u[1];
2302 dst->u[2] = src[0].u[2] / src[1].u[2];
2303 dst->u[3] = src[0].u[3] / src[1].u[3];
2304 }
2305
2306 static void
2307 micro_umad(union tgsi_exec_channel *dst,
2308 const union tgsi_exec_channel *src)
2309 {
2310 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
2311 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
2312 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
2313 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
2314 }
2315
2316 static void
2317 micro_umax(union tgsi_exec_channel *dst,
2318 const union tgsi_exec_channel *src)
2319 {
2320 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
2321 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
2322 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
2323 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
2324 }
2325
2326 static void
2327 micro_umin(union tgsi_exec_channel *dst,
2328 const union tgsi_exec_channel *src)
2329 {
2330 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
2331 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
2332 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
2333 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
2334 }
2335
2336 static void
2337 micro_umod(union tgsi_exec_channel *dst,
2338 const union tgsi_exec_channel *src)
2339 {
2340 dst->u[0] = src[0].u[0] % src[1].u[0];
2341 dst->u[1] = src[0].u[1] % src[1].u[1];
2342 dst->u[2] = src[0].u[2] % src[1].u[2];
2343 dst->u[3] = src[0].u[3] % src[1].u[3];
2344 }
2345
2346 static void
2347 micro_umul(union tgsi_exec_channel *dst,
2348 const union tgsi_exec_channel *src)
2349 {
2350 dst->u[0] = src[0].u[0] * src[1].u[0];
2351 dst->u[1] = src[0].u[1] * src[1].u[1];
2352 dst->u[2] = src[0].u[2] * src[1].u[2];
2353 dst->u[3] = src[0].u[3] * src[1].u[3];
2354 }
2355
2356 static void
2357 micro_useq(union tgsi_exec_channel *dst,
2358 const union tgsi_exec_channel *src)
2359 {
2360 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
2361 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
2362 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
2363 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
2364 }
2365
2366 static void
2367 micro_usge(union tgsi_exec_channel *dst,
2368 const union tgsi_exec_channel *src)
2369 {
2370 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
2371 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
2372 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
2373 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
2374 }
2375
2376 static void
2377 micro_ushr(union tgsi_exec_channel *dst,
2378 const union tgsi_exec_channel *src)
2379 {
2380 dst->u[0] = src[0].u[0] >> src[1].u[0];
2381 dst->u[1] = src[0].u[1] >> src[1].u[1];
2382 dst->u[2] = src[0].u[2] >> src[1].u[2];
2383 dst->u[3] = src[0].u[3] >> src[1].u[3];
2384 }
2385
2386 static void
2387 micro_uslt(union tgsi_exec_channel *dst,
2388 const union tgsi_exec_channel *src)
2389 {
2390 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
2391 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
2392 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
2393 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
2394 }
2395
2396 static void
2397 micro_usne(union tgsi_exec_channel *dst,
2398 const union tgsi_exec_channel *src)
2399 {
2400 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
2401 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
2402 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
2403 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
2404 }
2405
2406 static void
2407 exec_instruction(
2408 struct tgsi_exec_machine *mach,
2409 const struct tgsi_full_instruction *inst,
2410 int *pc )
2411 {
2412 uint chan_index;
2413 union tgsi_exec_channel r[10];
2414 union tgsi_exec_channel d[8];
2415
2416 (*pc)++;
2417
2418 switch (inst->Instruction.Opcode) {
2419 case TGSI_OPCODE_ARL:
2420 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2421 break;
2422
2423 case TGSI_OPCODE_MOV:
2424 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2425 break;
2426
2427 case TGSI_OPCODE_LIT:
2428 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2429 FETCH( &r[0], 0, CHAN_X );
2430 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2431 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2432 }
2433
2434 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2435 FETCH( &r[1], 0, CHAN_Y );
2436 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2437
2438 FETCH( &r[2], 0, CHAN_W );
2439 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2440 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2441 micro_pow( &r[1], &r[1], &r[2] );
2442 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
2443 }
2444
2445 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2446 STORE(&d[CHAN_Y], 0, CHAN_Y);
2447 }
2448 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2449 STORE(&d[CHAN_Z], 0, CHAN_Z);
2450 }
2451 }
2452 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2453 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2454 }
2455 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2456 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2457 }
2458 break;
2459
2460 case TGSI_OPCODE_RCP:
2461 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2462 break;
2463
2464 case TGSI_OPCODE_RSQ:
2465 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2466 break;
2467
2468 case TGSI_OPCODE_EXP:
2469 FETCH( &r[0], 0, CHAN_X );
2470 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
2471 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2472 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
2473 STORE( &r[2], 0, CHAN_X ); /* store r2 */
2474 }
2475 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2476 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2477 STORE( &r[2], 0, CHAN_Y ); /* store r2 */
2478 }
2479 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2480 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
2481 STORE( &r[2], 0, CHAN_Z ); /* store r2 */
2482 }
2483 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2484 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2485 }
2486 break;
2487
2488 case TGSI_OPCODE_LOG:
2489 FETCH( &r[0], 0, CHAN_X );
2490 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */
2491 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
2492 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
2493 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2494 STORE( &r[0], 0, CHAN_X );
2495 }
2496 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2497 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
2498 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2499 STORE( &r[0], 0, CHAN_Y );
2500 }
2501 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2502 STORE( &r[1], 0, CHAN_Z );
2503 }
2504 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2505 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2506 }
2507 break;
2508
2509 case TGSI_OPCODE_MUL:
2510 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2511 FETCH(&r[0], 0, chan_index);
2512 FETCH(&r[1], 1, chan_index);
2513 micro_mul(&d[chan_index], &r[0], &r[1]);
2514 }
2515 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2516 STORE(&d[chan_index], 0, chan_index);
2517 }
2518 break;
2519
2520 case TGSI_OPCODE_ADD:
2521 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2522 FETCH( &r[0], 0, chan_index );
2523 FETCH( &r[1], 1, chan_index );
2524 micro_add(&d[chan_index], &r[0], &r[1]);
2525 }
2526 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2527 STORE(&d[chan_index], 0, chan_index);
2528 }
2529 break;
2530
2531 case TGSI_OPCODE_DP3:
2532 exec_dp3(mach, inst);
2533 break;
2534
2535 case TGSI_OPCODE_DP4:
2536 exec_dp4(mach, inst);
2537 break;
2538
2539 case TGSI_OPCODE_DST:
2540 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2541 FETCH( &r[0], 0, CHAN_Y );
2542 FETCH( &r[1], 1, CHAN_Y);
2543 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2544 }
2545 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2546 FETCH(&d[CHAN_Z], 0, CHAN_Z);
2547 }
2548 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2549 FETCH(&d[CHAN_W], 1, CHAN_W);
2550 }
2551
2552 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2553 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
2554 }
2555 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2556 STORE(&d[CHAN_Y], 0, CHAN_Y);
2557 }
2558 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2559 STORE(&d[CHAN_Z], 0, CHAN_Z);
2560 }
2561 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2562 STORE(&d[CHAN_W], 0, CHAN_W);
2563 }
2564 break;
2565
2566 case TGSI_OPCODE_MIN:
2567 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2568 FETCH(&r[0], 0, chan_index);
2569 FETCH(&r[1], 1, chan_index);
2570
2571 /* XXX use micro_min()?? */
2572 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
2573 }
2574 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2575 STORE(&d[chan_index], 0, chan_index);
2576 }
2577 break;
2578
2579 case TGSI_OPCODE_MAX:
2580 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2581 FETCH(&r[0], 0, chan_index);
2582 FETCH(&r[1], 1, chan_index);
2583
2584 /* XXX use micro_max()?? */
2585 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
2586 }
2587 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2588 STORE(&d[chan_index], 0, chan_index);
2589 }
2590 break;
2591
2592 case TGSI_OPCODE_SLT:
2593 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2594 break;
2595
2596 case TGSI_OPCODE_SGE:
2597 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2598 break;
2599
2600 case TGSI_OPCODE_MAD:
2601 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2602 break;
2603
2604 case TGSI_OPCODE_SUB:
2605 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2606 FETCH(&r[0], 0, chan_index);
2607 FETCH(&r[1], 1, chan_index);
2608 micro_sub(&d[chan_index], &r[0], &r[1]);
2609 }
2610 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2611 STORE(&d[chan_index], 0, chan_index);
2612 }
2613 break;
2614
2615 case TGSI_OPCODE_LRP:
2616 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2617 break;
2618
2619 case TGSI_OPCODE_CND:
2620 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2621 FETCH(&r[0], 0, chan_index);
2622 FETCH(&r[1], 1, chan_index);
2623 FETCH(&r[2], 2, chan_index);
2624 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2625 }
2626 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2627 STORE(&d[chan_index], 0, chan_index);
2628 }
2629 break;
2630
2631 case TGSI_OPCODE_DP2A:
2632 exec_dp2a(mach, inst);
2633 break;
2634
2635 case TGSI_OPCODE_FRC:
2636 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2637 break;
2638
2639 case TGSI_OPCODE_CLAMP:
2640 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2641 FETCH(&r[0], 0, chan_index);
2642 FETCH(&r[1], 1, chan_index);
2643 micro_max(&r[0], &r[0], &r[1]);
2644 FETCH(&r[1], 2, chan_index);
2645 micro_min(&d[chan_index], &r[0], &r[1]);
2646 }
2647 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2648 STORE(&d[chan_index], 0, chan_index);
2649 }
2650 break;
2651
2652 case TGSI_OPCODE_FLR:
2653 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2654 break;
2655
2656 case TGSI_OPCODE_ROUND:
2657 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2658 break;
2659
2660 case TGSI_OPCODE_EX2:
2661 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2662 break;
2663
2664 case TGSI_OPCODE_LG2:
2665 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2666 break;
2667
2668 case TGSI_OPCODE_POW:
2669 FETCH(&r[0], 0, CHAN_X);
2670 FETCH(&r[1], 1, CHAN_X);
2671
2672 micro_pow( &r[0], &r[0], &r[1] );
2673
2674 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2675 STORE( &r[0], 0, chan_index );
2676 }
2677 break;
2678
2679 case TGSI_OPCODE_XPD:
2680 FETCH(&r[0], 0, CHAN_Y);
2681 FETCH(&r[1], 1, CHAN_Z);
2682
2683 micro_mul( &r[2], &r[0], &r[1] );
2684
2685 FETCH(&r[3], 0, CHAN_Z);
2686 FETCH(&r[4], 1, CHAN_Y);
2687
2688 micro_mul( &r[5], &r[3], &r[4] );
2689 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2690
2691 FETCH(&r[2], 1, CHAN_X);
2692
2693 micro_mul( &r[3], &r[3], &r[2] );
2694
2695 FETCH(&r[5], 0, CHAN_X);
2696
2697 micro_mul( &r[1], &r[1], &r[5] );
2698 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2699
2700 micro_mul( &r[5], &r[5], &r[4] );
2701 micro_mul( &r[0], &r[0], &r[2] );
2702 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2703
2704 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2705 STORE(&d[CHAN_X], 0, CHAN_X);
2706 }
2707 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2708 STORE(&d[CHAN_Y], 0, CHAN_Y);
2709 }
2710 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2711 STORE(&d[CHAN_Z], 0, CHAN_Z);
2712 }
2713 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2714 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2715 }
2716 break;
2717
2718 case TGSI_OPCODE_ABS:
2719 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2720 break;
2721
2722 case TGSI_OPCODE_RCC:
2723 FETCH(&r[0], 0, CHAN_X);
2724 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2725 micro_float_clamp(&r[0], &r[0]);
2726 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2727 STORE(&r[0], 0, chan_index);
2728 }
2729 break;
2730
2731 case TGSI_OPCODE_DPH:
2732 exec_dph(mach, inst);
2733 break;
2734
2735 case TGSI_OPCODE_COS:
2736 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2737 break;
2738
2739 case TGSI_OPCODE_DDX:
2740 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2741 break;
2742
2743 case TGSI_OPCODE_DDY:
2744 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2745 break;
2746
2747 case TGSI_OPCODE_KILP:
2748 exec_kilp (mach, inst);
2749 break;
2750
2751 case TGSI_OPCODE_KIL:
2752 exec_kil (mach, inst);
2753 break;
2754
2755 case TGSI_OPCODE_PK2H:
2756 assert (0);
2757 break;
2758
2759 case TGSI_OPCODE_PK2US:
2760 assert (0);
2761 break;
2762
2763 case TGSI_OPCODE_PK4B:
2764 assert (0);
2765 break;
2766
2767 case TGSI_OPCODE_PK4UB:
2768 assert (0);
2769 break;
2770
2771 case TGSI_OPCODE_RFL:
2772 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2773 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2774 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2775 /* r0 = dp3(src0, src0) */
2776 FETCH(&r[2], 0, CHAN_X);
2777 micro_mul(&r[0], &r[2], &r[2]);
2778 FETCH(&r[4], 0, CHAN_Y);
2779 micro_mul(&r[8], &r[4], &r[4]);
2780 micro_add(&r[0], &r[0], &r[8]);
2781 FETCH(&r[6], 0, CHAN_Z);
2782 micro_mul(&r[8], &r[6], &r[6]);
2783 micro_add(&r[0], &r[0], &r[8]);
2784
2785 /* r1 = dp3(src0, src1) */
2786 FETCH(&r[3], 1, CHAN_X);
2787 micro_mul(&r[1], &r[2], &r[3]);
2788 FETCH(&r[5], 1, CHAN_Y);
2789 micro_mul(&r[8], &r[4], &r[5]);
2790 micro_add(&r[1], &r[1], &r[8]);
2791 FETCH(&r[7], 1, CHAN_Z);
2792 micro_mul(&r[8], &r[6], &r[7]);
2793 micro_add(&r[1], &r[1], &r[8]);
2794
2795 /* r1 = 2 * r1 / r0 */
2796 micro_add(&r[1], &r[1], &r[1]);
2797 micro_div(&r[1], &r[1], &r[0]);
2798
2799 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2800 micro_mul(&r[2], &r[2], &r[1]);
2801 micro_sub(&r[2], &r[2], &r[3]);
2802 STORE(&r[2], 0, CHAN_X);
2803 }
2804 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2805 micro_mul(&r[4], &r[4], &r[1]);
2806 micro_sub(&r[4], &r[4], &r[5]);
2807 STORE(&r[4], 0, CHAN_Y);
2808 }
2809 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2810 micro_mul(&r[6], &r[6], &r[1]);
2811 micro_sub(&r[6], &r[6], &r[7]);
2812 STORE(&r[6], 0, CHAN_Z);
2813 }
2814 }
2815 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2816 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2817 }
2818 break;
2819
2820 case TGSI_OPCODE_SEQ:
2821 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2822 break;
2823
2824 case TGSI_OPCODE_SFL:
2825 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2826 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2827 }
2828 break;
2829
2830 case TGSI_OPCODE_SGT:
2831 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2832 break;
2833
2834 case TGSI_OPCODE_SIN:
2835 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2836 break;
2837
2838 case TGSI_OPCODE_SLE:
2839 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2840 break;
2841
2842 case TGSI_OPCODE_SNE:
2843 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2844 break;
2845
2846 case TGSI_OPCODE_STR:
2847 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2848 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2849 }
2850 break;
2851
2852 case TGSI_OPCODE_TEX:
2853 /* simple texture lookup */
2854 /* src[0] = texcoord */
2855 /* src[1] = sampler unit */
2856 exec_tex(mach, inst, TEX_MODIFIER_NONE);
2857 break;
2858
2859 case TGSI_OPCODE_TXB:
2860 /* Texture lookup with lod bias */
2861 /* src[0] = texcoord (src[0].w = LOD bias) */
2862 /* src[1] = sampler unit */
2863 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
2864 break;
2865
2866 case TGSI_OPCODE_TXD:
2867 /* Texture lookup with explict partial derivatives */
2868 /* src[0] = texcoord */
2869 /* src[1] = d[strq]/dx */
2870 /* src[2] = d[strq]/dy */
2871 /* src[3] = sampler unit */
2872 exec_txd(mach, inst);
2873 break;
2874
2875 case TGSI_OPCODE_TXL:
2876 /* Texture lookup with explit LOD */
2877 /* src[0] = texcoord (src[0].w = LOD) */
2878 /* src[1] = sampler unit */
2879 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
2880 break;
2881
2882 case TGSI_OPCODE_TXP:
2883 /* Texture lookup with projection */
2884 /* src[0] = texcoord (src[0].w = projection) */
2885 /* src[1] = sampler unit */
2886 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
2887 break;
2888
2889 case TGSI_OPCODE_UP2H:
2890 assert (0);
2891 break;
2892
2893 case TGSI_OPCODE_UP2US:
2894 assert (0);
2895 break;
2896
2897 case TGSI_OPCODE_UP4B:
2898 assert (0);
2899 break;
2900
2901 case TGSI_OPCODE_UP4UB:
2902 assert (0);
2903 break;
2904
2905 case TGSI_OPCODE_X2D:
2906 FETCH(&r[0], 1, CHAN_X);
2907 FETCH(&r[1], 1, CHAN_Y);
2908 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2909 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2910 FETCH(&r[2], 2, CHAN_X);
2911 micro_mul(&r[2], &r[2], &r[0]);
2912 FETCH(&r[3], 2, CHAN_Y);
2913 micro_mul(&r[3], &r[3], &r[1]);
2914 micro_add(&r[2], &r[2], &r[3]);
2915 FETCH(&r[3], 0, CHAN_X);
2916 micro_add(&d[CHAN_X], &r[2], &r[3]);
2917
2918 }
2919 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2920 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2921 FETCH(&r[2], 2, CHAN_Z);
2922 micro_mul(&r[2], &r[2], &r[0]);
2923 FETCH(&r[3], 2, CHAN_W);
2924 micro_mul(&r[3], &r[3], &r[1]);
2925 micro_add(&r[2], &r[2], &r[3]);
2926 FETCH(&r[3], 0, CHAN_Y);
2927 micro_add(&d[CHAN_Y], &r[2], &r[3]);
2928
2929 }
2930 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2931 STORE(&d[CHAN_X], 0, CHAN_X);
2932 }
2933 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2934 STORE(&d[CHAN_Y], 0, CHAN_Y);
2935 }
2936 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2937 STORE(&d[CHAN_X], 0, CHAN_Z);
2938 }
2939 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2940 STORE(&d[CHAN_Y], 0, CHAN_W);
2941 }
2942 break;
2943
2944 case TGSI_OPCODE_ARA:
2945 assert (0);
2946 break;
2947
2948 case TGSI_OPCODE_ARR:
2949 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2950 break;
2951
2952 case TGSI_OPCODE_BRA:
2953 assert (0);
2954 break;
2955
2956 case TGSI_OPCODE_CAL:
2957 /* skip the call if no execution channels are enabled */
2958 if (mach->ExecMask) {
2959 /* do the call */
2960
2961 /* First, record the depths of the execution stacks.
2962 * This is important for deeply nested/looped return statements.
2963 * We have to unwind the stacks by the correct amount. For a
2964 * real code generator, we could determine the number of entries
2965 * to pop off each stack with simple static analysis and avoid
2966 * implementing this data structure at run time.
2967 */
2968 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
2969 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
2970 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
2971 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
2972 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
2973 /* note that PC was already incremented above */
2974 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
2975
2976 mach->CallStackTop++;
2977
2978 /* Second, push the Cond, Loop, Cont, Func stacks */
2979 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2980 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2981 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2982 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2983 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2984 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2985
2986 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2987 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2988 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2989 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2990 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2991 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2992
2993 /* Finally, jump to the subroutine */
2994 *pc = inst->Label.Label;
2995 }
2996 break;
2997
2998 case TGSI_OPCODE_RET:
2999 mach->FuncMask &= ~mach->ExecMask;
3000 UPDATE_EXEC_MASK(mach);
3001
3002 if (mach->FuncMask == 0x0) {
3003 /* really return now (otherwise, keep executing */
3004
3005 if (mach->CallStackTop == 0) {
3006 /* returning from main() */
3007 *pc = -1;
3008 return;
3009 }
3010
3011 assert(mach->CallStackTop > 0);
3012 mach->CallStackTop--;
3013
3014 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3015 mach->CondMask = mach->CondStack[mach->CondStackTop];
3016
3017 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3018 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3019
3020 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3021 mach->ContMask = mach->ContStack[mach->ContStackTop];
3022
3023 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3024 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3025
3026 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3027 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3028
3029 assert(mach->FuncStackTop > 0);
3030 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3031
3032 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3033
3034 UPDATE_EXEC_MASK(mach);
3035 }
3036 break;
3037
3038 case TGSI_OPCODE_SSG:
3039 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3040 break;
3041
3042 case TGSI_OPCODE_CMP:
3043 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3044 FETCH(&r[0], 0, chan_index);
3045 FETCH(&r[1], 1, chan_index);
3046 FETCH(&r[2], 2, chan_index);
3047 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
3048 }
3049 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
3050 STORE(&d[chan_index], 0, chan_index);
3051 }
3052 break;
3053
3054 case TGSI_OPCODE_SCS:
3055 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
3056 FETCH( &r[0], 0, CHAN_X );
3057 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3058 micro_cos(&r[1], &r[0]);
3059 STORE(&r[1], 0, CHAN_X);
3060 }
3061 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3062 micro_sin(&r[1], &r[0]);
3063 STORE(&r[1], 0, CHAN_Y);
3064 }
3065 }
3066 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
3067 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
3068 }
3069 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
3070 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
3071 }
3072 break;
3073
3074 case TGSI_OPCODE_NRM:
3075 /* 3-component vector normalize */
3076 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
3077 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
3078 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3079 /* r3 = sqrt(dp3(src0, src0)) */
3080 FETCH(&r[0], 0, CHAN_X);
3081 micro_mul(&r[3], &r[0], &r[0]);
3082 FETCH(&r[1], 0, CHAN_Y);
3083 micro_mul(&r[4], &r[1], &r[1]);
3084 micro_add(&r[3], &r[3], &r[4]);
3085 FETCH(&r[2], 0, CHAN_Z);
3086 micro_mul(&r[4], &r[2], &r[2]);
3087 micro_add(&r[3], &r[3], &r[4]);
3088 micro_sqrt(&r[3], &r[3]);
3089
3090 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
3091 micro_div(&r[0], &r[0], &r[3]);
3092 STORE(&r[0], 0, CHAN_X);
3093 }
3094 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
3095 micro_div(&r[1], &r[1], &r[3]);
3096 STORE(&r[1], 0, CHAN_Y);
3097 }
3098 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
3099 micro_div(&r[2], &r[2], &r[3]);
3100 STORE(&r[2], 0, CHAN_Z);
3101 }
3102 }
3103 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
3104 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
3105 }
3106 break;
3107
3108 case TGSI_OPCODE_NRM4:
3109 /* 4-component vector normalize */
3110 {
3111 union tgsi_exec_channel tmp, dot;
3112
3113 /* tmp = dp4(src0, src0): */
3114 FETCH( &r[0], 0, CHAN_X );
3115 micro_mul( &tmp, &r[0], &r[0] );
3116
3117 FETCH( &r[1], 0, CHAN_Y );
3118 micro_mul( &dot, &r[1], &r[1] );
3119 micro_add( &tmp, &tmp, &dot );
3120
3121 FETCH( &r[2], 0, CHAN_Z );
3122 micro_mul( &dot, &r[2], &r[2] );
3123 micro_add( &tmp, &tmp, &dot );
3124
3125 FETCH( &r[3], 0, CHAN_W );
3126 micro_mul( &dot, &r[3], &r[3] );
3127 micro_add( &tmp, &tmp, &dot );
3128
3129 /* tmp = 1 / sqrt(tmp) */
3130 micro_sqrt( &tmp, &tmp );
3131 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
3132
3133 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3134 /* chan = chan * tmp */
3135 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
3136 STORE( &r[chan_index], 0, chan_index );
3137 }
3138 }
3139 break;
3140
3141 case TGSI_OPCODE_DIV:
3142 assert( 0 );
3143 break;
3144
3145 case TGSI_OPCODE_DP2:
3146 exec_dp2(mach, inst);
3147 break;
3148
3149 case TGSI_OPCODE_IF:
3150 /* push CondMask */
3151 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3152 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3153 FETCH( &r[0], 0, CHAN_X );
3154 /* update CondMask */
3155 if( ! r[0].u[0] ) {
3156 mach->CondMask &= ~0x1;
3157 }
3158 if( ! r[0].u[1] ) {
3159 mach->CondMask &= ~0x2;
3160 }
3161 if( ! r[0].u[2] ) {
3162 mach->CondMask &= ~0x4;
3163 }
3164 if( ! r[0].u[3] ) {
3165 mach->CondMask &= ~0x8;
3166 }
3167 UPDATE_EXEC_MASK(mach);
3168 /* Todo: If CondMask==0, jump to ELSE */
3169 break;
3170
3171 case TGSI_OPCODE_ELSE:
3172 /* invert CondMask wrt previous mask */
3173 {
3174 uint prevMask;
3175 assert(mach->CondStackTop > 0);
3176 prevMask = mach->CondStack[mach->CondStackTop - 1];
3177 mach->CondMask = ~mach->CondMask & prevMask;
3178 UPDATE_EXEC_MASK(mach);
3179 /* Todo: If CondMask==0, jump to ENDIF */
3180 }
3181 break;
3182
3183 case TGSI_OPCODE_ENDIF:
3184 /* pop CondMask */
3185 assert(mach->CondStackTop > 0);
3186 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3187 UPDATE_EXEC_MASK(mach);
3188 break;
3189
3190 case TGSI_OPCODE_END:
3191 /* halt execution */
3192 *pc = -1;
3193 break;
3194
3195 case TGSI_OPCODE_REP:
3196 assert (0);
3197 break;
3198
3199 case TGSI_OPCODE_ENDREP:
3200 assert (0);
3201 break;
3202
3203 case TGSI_OPCODE_PUSHA:
3204 assert (0);
3205 break;
3206
3207 case TGSI_OPCODE_POPA:
3208 assert (0);
3209 break;
3210
3211 case TGSI_OPCODE_CEIL:
3212 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3213 break;
3214
3215 case TGSI_OPCODE_I2F:
3216 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3217 break;
3218
3219 case TGSI_OPCODE_NOT:
3220 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3221 break;
3222
3223 case TGSI_OPCODE_TRUNC:
3224 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3225 break;
3226
3227 case TGSI_OPCODE_SHL:
3228 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3229 break;
3230
3231 case TGSI_OPCODE_AND:
3232 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3233 break;
3234
3235 case TGSI_OPCODE_OR:
3236 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3237 break;
3238
3239 case TGSI_OPCODE_MOD:
3240 assert (0);
3241 break;
3242
3243 case TGSI_OPCODE_XOR:
3244 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3245 break;
3246
3247 case TGSI_OPCODE_SAD:
3248 assert (0);
3249 break;
3250
3251 case TGSI_OPCODE_TXF:
3252 assert (0);
3253 break;
3254
3255 case TGSI_OPCODE_TXQ:
3256 assert (0);
3257 break;
3258
3259 case TGSI_OPCODE_EMIT:
3260 emit_vertex(mach);
3261 break;
3262
3263 case TGSI_OPCODE_ENDPRIM:
3264 emit_primitive(mach);
3265 break;
3266
3267 case TGSI_OPCODE_BGNFOR:
3268 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3269 for (chan_index = 0; chan_index < 3; chan_index++) {
3270 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
3271 }
3272 ++mach->LoopCounterStackTop;
3273 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
3274 /* update LoopMask */
3275 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3276 mach->LoopMask &= ~0x1;
3277 }
3278 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3279 mach->LoopMask &= ~0x2;
3280 }
3281 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3282 mach->LoopMask &= ~0x4;
3283 }
3284 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3285 mach->LoopMask &= ~0x8;
3286 }
3287 /* TODO: if mach->LoopMask == 0, jump to end of loop */
3288 UPDATE_EXEC_MASK(mach);
3289 /* fall-through (for now) */
3290 case TGSI_OPCODE_BGNLOOP:
3291 /* push LoopMask and ContMasks */
3292 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3293 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3294 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3295 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3296
3297 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3298 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3299 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3300 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3301 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3302 break;
3303
3304 case TGSI_OPCODE_ENDFOR:
3305 assert(mach->LoopCounterStackTop > 0);
3306 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3307 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
3308 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
3309 /* update LoopMask */
3310 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
3311 mach->LoopMask &= ~0x1;
3312 }
3313 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
3314 mach->LoopMask &= ~0x2;
3315 }
3316 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
3317 mach->LoopMask &= ~0x4;
3318 }
3319 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
3320 mach->LoopMask &= ~0x8;
3321 }
3322 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3323 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
3324 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
3325 assert(mach->LoopLabelStackTop > 0);
3326 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
3327 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
3328 /* Restore ContMask, but don't pop */
3329 assert(mach->ContStackTop > 0);
3330 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3331 UPDATE_EXEC_MASK(mach);
3332 if (mach->ExecMask) {
3333 /* repeat loop: jump to instruction just past BGNLOOP */
3334 assert(mach->LoopLabelStackTop > 0);
3335 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3336 }
3337 else {
3338 /* exit loop: pop LoopMask */
3339 assert(mach->LoopStackTop > 0);
3340 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3341 /* pop ContMask */
3342 assert(mach->ContStackTop > 0);
3343 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3344 assert(mach->LoopLabelStackTop > 0);
3345 --mach->LoopLabelStackTop;
3346 assert(mach->LoopCounterStackTop > 0);
3347 --mach->LoopCounterStackTop;
3348
3349 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3350 }
3351 UPDATE_EXEC_MASK(mach);
3352 break;
3353
3354 case TGSI_OPCODE_ENDLOOP:
3355 /* Restore ContMask, but don't pop */
3356 assert(mach->ContStackTop > 0);
3357 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3358 UPDATE_EXEC_MASK(mach);
3359 if (mach->ExecMask) {
3360 /* repeat loop: jump to instruction just past BGNLOOP */
3361 assert(mach->LoopLabelStackTop > 0);
3362 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3363 }
3364 else {
3365 /* exit loop: pop LoopMask */
3366 assert(mach->LoopStackTop > 0);
3367 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3368 /* pop ContMask */
3369 assert(mach->ContStackTop > 0);
3370 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3371 assert(mach->LoopLabelStackTop > 0);
3372 --mach->LoopLabelStackTop;
3373
3374 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3375 }
3376 UPDATE_EXEC_MASK(mach);
3377 break;
3378
3379 case TGSI_OPCODE_BRK:
3380 exec_break(mach);
3381 break;
3382
3383 case TGSI_OPCODE_CONT:
3384 /* turn off cont channels for each enabled exec channel */
3385 mach->ContMask &= ~mach->ExecMask;
3386 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3387 UPDATE_EXEC_MASK(mach);
3388 break;
3389
3390 case TGSI_OPCODE_BGNSUB:
3391 /* no-op */
3392 break;
3393
3394 case TGSI_OPCODE_ENDSUB:
3395 /*
3396 * XXX: This really should be a no-op. We should never reach this opcode.
3397 */
3398
3399 assert(mach->CallStackTop > 0);
3400 mach->CallStackTop--;
3401
3402 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3403 mach->CondMask = mach->CondStack[mach->CondStackTop];
3404
3405 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3406 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3407
3408 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3409 mach->ContMask = mach->ContStack[mach->ContStackTop];
3410
3411 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3412 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3413
3414 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3415 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3416
3417 assert(mach->FuncStackTop > 0);
3418 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3419
3420 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3421
3422 UPDATE_EXEC_MASK(mach);
3423 break;
3424
3425 case TGSI_OPCODE_NOP:
3426 break;
3427
3428 case TGSI_OPCODE_BREAKC:
3429 FETCH(&r[0], 0, CHAN_X);
3430 /* update CondMask */
3431 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3432 mach->LoopMask &= ~0x1;
3433 }
3434 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3435 mach->LoopMask &= ~0x2;
3436 }
3437 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3438 mach->LoopMask &= ~0x4;
3439 }
3440 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3441 mach->LoopMask &= ~0x8;
3442 }
3443 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3444 UPDATE_EXEC_MASK(mach);
3445 break;
3446
3447 case TGSI_OPCODE_F2I:
3448 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3449 break;
3450
3451 case TGSI_OPCODE_IDIV:
3452 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3453 break;
3454
3455 case TGSI_OPCODE_IMAX:
3456 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3457 break;
3458
3459 case TGSI_OPCODE_IMIN:
3460 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3461 break;
3462
3463 case TGSI_OPCODE_INEG:
3464 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3465 break;
3466
3467 case TGSI_OPCODE_ISGE:
3468 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3469 break;
3470
3471 case TGSI_OPCODE_ISHR:
3472 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3473 break;
3474
3475 case TGSI_OPCODE_ISLT:
3476 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3477 break;
3478
3479 case TGSI_OPCODE_F2U:
3480 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3481 break;
3482
3483 case TGSI_OPCODE_U2F:
3484 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3485 break;
3486
3487 case TGSI_OPCODE_UADD:
3488 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3489 break;
3490
3491 case TGSI_OPCODE_UDIV:
3492 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3493 break;
3494
3495 case TGSI_OPCODE_UMAD:
3496 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3497 break;
3498
3499 case TGSI_OPCODE_UMAX:
3500 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3501 break;
3502
3503 case TGSI_OPCODE_UMIN:
3504 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3505 break;
3506
3507 case TGSI_OPCODE_UMOD:
3508 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3509 break;
3510
3511 case TGSI_OPCODE_UMUL:
3512 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3513 break;
3514
3515 case TGSI_OPCODE_USEQ:
3516 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3517 break;
3518
3519 case TGSI_OPCODE_USGE:
3520 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3521 break;
3522
3523 case TGSI_OPCODE_USHR:
3524 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3525 break;
3526
3527 case TGSI_OPCODE_USLT:
3528 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3529 break;
3530
3531 case TGSI_OPCODE_USNE:
3532 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3533 break;
3534
3535 case TGSI_OPCODE_SWITCH:
3536 exec_switch(mach, inst);
3537 break;
3538
3539 case TGSI_OPCODE_CASE:
3540 exec_case(mach, inst);
3541 break;
3542
3543 case TGSI_OPCODE_DEFAULT:
3544 exec_default(mach);
3545 break;
3546
3547 case TGSI_OPCODE_ENDSWITCH:
3548 exec_endswitch(mach);
3549 break;
3550
3551 default:
3552 assert( 0 );
3553 }
3554 }
3555
3556
3557 #define DEBUG_EXECUTION 0
3558
3559
3560 /**
3561 * Run TGSI interpreter.
3562 * \return bitmask of "alive" quad components
3563 */
3564 uint
3565 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3566 {
3567 uint i;
3568 int pc = 0;
3569
3570 mach->CondMask = 0xf;
3571 mach->LoopMask = 0xf;
3572 mach->ContMask = 0xf;
3573 mach->FuncMask = 0xf;
3574 mach->ExecMask = 0xf;
3575
3576 mach->Switch.mask = 0xf;
3577
3578 assert(mach->CondStackTop == 0);
3579 assert(mach->LoopStackTop == 0);
3580 assert(mach->ContStackTop == 0);
3581 assert(mach->SwitchStackTop == 0);
3582 assert(mach->BreakStackTop == 0);
3583 assert(mach->CallStackTop == 0);
3584
3585 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3586 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3587
3588 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3589 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3590 mach->Primitives[0] = 0;
3591 }
3592
3593 for (i = 0; i < QUAD_SIZE; i++) {
3594 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3595 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3596 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3597 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3598 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3599 }
3600
3601 /* execute declarations (interpolants) */
3602 for (i = 0; i < mach->NumDeclarations; i++) {
3603 exec_declaration( mach, mach->Declarations+i );
3604 }
3605
3606 {
3607 #if DEBUG_EXECUTION
3608 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3609 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3610 uint inst = 1;
3611
3612 memcpy(temps, mach->Temps, sizeof(temps));
3613 memcpy(outputs, mach->Outputs, sizeof(outputs));
3614 #endif
3615
3616 /* execute instructions, until pc is set to -1 */
3617 while (pc != -1) {
3618
3619 #if DEBUG_EXECUTION
3620 uint i;
3621
3622 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3623 #endif
3624
3625 assert(pc < (int) mach->NumInstructions);
3626 exec_instruction(mach, mach->Instructions + pc, &pc);
3627
3628 #if DEBUG_EXECUTION
3629 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3630 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3631 uint j;
3632
3633 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3634 debug_printf("TEMP[%2u] = ", i);
3635 for (j = 0; j < 4; j++) {
3636 if (j > 0) {
3637 debug_printf(" ");
3638 }
3639 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3640 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3641 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3642 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3643 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3644 }
3645 }
3646 }
3647 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3648 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3649 uint j;
3650
3651 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3652 debug_printf("OUT[%2u] = ", i);
3653 for (j = 0; j < 4; j++) {
3654 if (j > 0) {
3655 debug_printf(" ");
3656 }
3657 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3658 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3659 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3660 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3661 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3662 }
3663 }
3664 }
3665 #endif
3666 }
3667 }
3668
3669 #if 0
3670 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3671 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3672 /*
3673 * Scale back depth component.
3674 */
3675 for (i = 0; i < 4; i++)
3676 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3677 }
3678 #endif
3679
3680 assert(mach->CondStackTop == 0);
3681 assert(mach->LoopStackTop == 0);
3682 assert(mach->ContStackTop == 0);
3683 assert(mach->SwitchStackTop == 0);
3684 assert(mach->BreakStackTop == 0);
3685 assert(mach->CallStackTop == 0);
3686
3687 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3688 }