tgsi/exec: Cleanup the remaining arithmetic instructions.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 1
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_clamp(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src0,
115 const union tgsi_exec_channel *src1,
116 const union tgsi_exec_channel *src2)
117 {
118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
122 }
123
124 static void
125 micro_cmp(union tgsi_exec_channel *dst,
126 const union tgsi_exec_channel *src0,
127 const union tgsi_exec_channel *src1,
128 const union tgsi_exec_channel *src2)
129 {
130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
134 }
135
136 static void
137 micro_cnd(union tgsi_exec_channel *dst,
138 const union tgsi_exec_channel *src0,
139 const union tgsi_exec_channel *src1,
140 const union tgsi_exec_channel *src2)
141 {
142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
146 }
147
148 static void
149 micro_cos(union tgsi_exec_channel *dst,
150 const union tgsi_exec_channel *src)
151 {
152 dst->f[0] = cosf(src->f[0]);
153 dst->f[1] = cosf(src->f[1]);
154 dst->f[2] = cosf(src->f[2]);
155 dst->f[3] = cosf(src->f[3]);
156 }
157
158 static void
159 micro_ddx(union tgsi_exec_channel *dst,
160 const union tgsi_exec_channel *src)
161 {
162 dst->f[0] =
163 dst->f[1] =
164 dst->f[2] =
165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
166 }
167
168 static void
169 micro_ddy(union tgsi_exec_channel *dst,
170 const union tgsi_exec_channel *src)
171 {
172 dst->f[0] =
173 dst->f[1] =
174 dst->f[2] =
175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
176 }
177
178 static void
179 micro_exp2(union tgsi_exec_channel *dst,
180 const union tgsi_exec_channel *src)
181 {
182 #if FAST_MATH
183 dst->f[0] = util_fast_exp2(src->f[0]);
184 dst->f[1] = util_fast_exp2(src->f[1]);
185 dst->f[2] = util_fast_exp2(src->f[2]);
186 dst->f[3] = util_fast_exp2(src->f[3]);
187 #else
188 #if DEBUG
189 /* Inf is okay for this instruction, so clamp it to silence assertions. */
190 uint i;
191 union tgsi_exec_channel clamped;
192
193 for (i = 0; i < 4; i++) {
194 if (src->f[i] > 127.99999f) {
195 clamped.f[i] = 127.99999f;
196 } else if (src->f[i] < -126.99999f) {
197 clamped.f[i] = -126.99999f;
198 } else {
199 clamped.f[i] = src->f[i];
200 }
201 }
202 src = &clamped;
203 #endif /* DEBUG */
204
205 dst->f[0] = powf(2.0f, src->f[0]);
206 dst->f[1] = powf(2.0f, src->f[1]);
207 dst->f[2] = powf(2.0f, src->f[2]);
208 dst->f[3] = powf(2.0f, src->f[3]);
209 #endif /* FAST_MATH */
210 }
211
212 static void
213 micro_flr(union tgsi_exec_channel *dst,
214 const union tgsi_exec_channel *src)
215 {
216 dst->f[0] = floorf(src->f[0]);
217 dst->f[1] = floorf(src->f[1]);
218 dst->f[2] = floorf(src->f[2]);
219 dst->f[3] = floorf(src->f[3]);
220 }
221
222 static void
223 micro_frc(union tgsi_exec_channel *dst,
224 const union tgsi_exec_channel *src)
225 {
226 dst->f[0] = src->f[0] - floorf(src->f[0]);
227 dst->f[1] = src->f[1] - floorf(src->f[1]);
228 dst->f[2] = src->f[2] - floorf(src->f[2]);
229 dst->f[3] = src->f[3] - floorf(src->f[3]);
230 }
231
232 static void
233 micro_iabs(union tgsi_exec_channel *dst,
234 const union tgsi_exec_channel *src)
235 {
236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
240 }
241
242 static void
243 micro_ineg(union tgsi_exec_channel *dst,
244 const union tgsi_exec_channel *src)
245 {
246 dst->i[0] = -src->i[0];
247 dst->i[1] = -src->i[1];
248 dst->i[2] = -src->i[2];
249 dst->i[3] = -src->i[3];
250 }
251
252 static void
253 micro_lg2(union tgsi_exec_channel *dst,
254 const union tgsi_exec_channel *src)
255 {
256 #if FAST_MATH
257 dst->f[0] = util_fast_log2(src->f[0]);
258 dst->f[1] = util_fast_log2(src->f[1]);
259 dst->f[2] = util_fast_log2(src->f[2]);
260 dst->f[3] = util_fast_log2(src->f[3]);
261 #else
262 dst->f[0] = logf(src->f[0]) * 1.442695f;
263 dst->f[1] = logf(src->f[1]) * 1.442695f;
264 dst->f[2] = logf(src->f[2]) * 1.442695f;
265 dst->f[3] = logf(src->f[3]) * 1.442695f;
266 #endif
267 }
268
269 static void
270 micro_lrp(union tgsi_exec_channel *dst,
271 const union tgsi_exec_channel *src0,
272 const union tgsi_exec_channel *src1,
273 const union tgsi_exec_channel *src2)
274 {
275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
279 }
280
281 static void
282 micro_mad(union tgsi_exec_channel *dst,
283 const union tgsi_exec_channel *src0,
284 const union tgsi_exec_channel *src1,
285 const union tgsi_exec_channel *src2)
286 {
287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
291 }
292
293 static void
294 micro_mov(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->u[0] = src->u[0];
298 dst->u[1] = src->u[1];
299 dst->u[2] = src->u[2];
300 dst->u[3] = src->u[3];
301 }
302
303 static void
304 micro_rcp(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 #if 0 /* for debugging */
308 assert(src->f[0] != 0.0f);
309 assert(src->f[1] != 0.0f);
310 assert(src->f[2] != 0.0f);
311 assert(src->f[3] != 0.0f);
312 #endif
313 dst->f[0] = 1.0f / src->f[0];
314 dst->f[1] = 1.0f / src->f[1];
315 dst->f[2] = 1.0f / src->f[2];
316 dst->f[3] = 1.0f / src->f[3];
317 }
318
319 static void
320 micro_rnd(union tgsi_exec_channel *dst,
321 const union tgsi_exec_channel *src)
322 {
323 dst->f[0] = floorf(src->f[0] + 0.5f);
324 dst->f[1] = floorf(src->f[1] + 0.5f);
325 dst->f[2] = floorf(src->f[2] + 0.5f);
326 dst->f[3] = floorf(src->f[3] + 0.5f);
327 }
328
329 static void
330 micro_rsq(union tgsi_exec_channel *dst,
331 const union tgsi_exec_channel *src)
332 {
333 #if 0 /* for debugging */
334 assert(src->f[0] != 0.0f);
335 assert(src->f[1] != 0.0f);
336 assert(src->f[2] != 0.0f);
337 assert(src->f[3] != 0.0f);
338 #endif
339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
343 }
344
345 static void
346 micro_seq(union tgsi_exec_channel *dst,
347 const union tgsi_exec_channel *src0,
348 const union tgsi_exec_channel *src1)
349 {
350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
354 }
355
356 static void
357 micro_sge(union tgsi_exec_channel *dst,
358 const union tgsi_exec_channel *src0,
359 const union tgsi_exec_channel *src1)
360 {
361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
365 }
366
367 static void
368 micro_sgn(union tgsi_exec_channel *dst,
369 const union tgsi_exec_channel *src)
370 {
371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
375 }
376
377 static void
378 micro_sgt(union tgsi_exec_channel *dst,
379 const union tgsi_exec_channel *src0,
380 const union tgsi_exec_channel *src1)
381 {
382 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
383 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
384 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
385 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
386 }
387
388 static void
389 micro_sin(union tgsi_exec_channel *dst,
390 const union tgsi_exec_channel *src)
391 {
392 dst->f[0] = sinf(src->f[0]);
393 dst->f[1] = sinf(src->f[1]);
394 dst->f[2] = sinf(src->f[2]);
395 dst->f[3] = sinf(src->f[3]);
396 }
397
398 static void
399 micro_sle(union tgsi_exec_channel *dst,
400 const union tgsi_exec_channel *src0,
401 const union tgsi_exec_channel *src1)
402 {
403 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
404 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
405 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
406 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
407 }
408
409 static void
410 micro_slt(union tgsi_exec_channel *dst,
411 const union tgsi_exec_channel *src0,
412 const union tgsi_exec_channel *src1)
413 {
414 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
415 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
416 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
417 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
418 }
419
420 static void
421 micro_sne(union tgsi_exec_channel *dst,
422 const union tgsi_exec_channel *src0,
423 const union tgsi_exec_channel *src1)
424 {
425 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
426 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
427 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
428 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
429 }
430
431 static void
432 micro_sfl(union tgsi_exec_channel *dst)
433 {
434 dst->f[0] = 0.0f;
435 dst->f[1] = 0.0f;
436 dst->f[2] = 0.0f;
437 dst->f[3] = 0.0f;
438 }
439
440 static void
441 micro_str(union tgsi_exec_channel *dst)
442 {
443 dst->f[0] = 1.0f;
444 dst->f[1] = 1.0f;
445 dst->f[2] = 1.0f;
446 dst->f[3] = 1.0f;
447 }
448
449 static void
450 micro_trunc(union tgsi_exec_channel *dst,
451 const union tgsi_exec_channel *src)
452 {
453 dst->f[0] = (float)(int)src->f[0];
454 dst->f[1] = (float)(int)src->f[1];
455 dst->f[2] = (float)(int)src->f[2];
456 dst->f[3] = (float)(int)src->f[3];
457 }
458
459
460 #define CHAN_X 0
461 #define CHAN_Y 1
462 #define CHAN_Z 2
463 #define CHAN_W 3
464
465 enum tgsi_exec_datatype {
466 TGSI_EXEC_DATA_FLOAT,
467 TGSI_EXEC_DATA_INT,
468 TGSI_EXEC_DATA_UINT
469 };
470
471 /*
472 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
473 */
474 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
475 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
476 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
477 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
478 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
479 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
480 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
481 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
482
483
484 /** The execution mask depends on the conditional mask and the loop mask */
485 #define UPDATE_EXEC_MASK(MACH) \
486 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
487
488
489 static const union tgsi_exec_channel ZeroVec =
490 { { 0.0, 0.0, 0.0, 0.0 } };
491
492 static const union tgsi_exec_channel OneVec = {
493 {1.0f, 1.0f, 1.0f, 1.0f}
494 };
495
496 static const union tgsi_exec_channel P128Vec = {
497 {128.0f, 128.0f, 128.0f, 128.0f}
498 };
499
500 static const union tgsi_exec_channel M128Vec = {
501 {-128.0f, -128.0f, -128.0f, -128.0f}
502 };
503
504
505 /**
506 * Assert that none of the float values in 'chan' are infinite or NaN.
507 * NaN and Inf may occur normally during program execution and should
508 * not lead to crashes, etc. But when debugging, it's helpful to catch
509 * them.
510 */
511 static INLINE void
512 check_inf_or_nan(const union tgsi_exec_channel *chan)
513 {
514 assert(!util_is_inf_or_nan((chan)->f[0]));
515 assert(!util_is_inf_or_nan((chan)->f[1]));
516 assert(!util_is_inf_or_nan((chan)->f[2]));
517 assert(!util_is_inf_or_nan((chan)->f[3]));
518 }
519
520
521 #ifdef DEBUG
522 static void
523 print_chan(const char *msg, const union tgsi_exec_channel *chan)
524 {
525 debug_printf("%s = {%f, %f, %f, %f}\n",
526 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
527 }
528 #endif
529
530
531 #ifdef DEBUG
532 static void
533 print_temp(const struct tgsi_exec_machine *mach, uint index)
534 {
535 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
536 int i;
537 debug_printf("Temp[%u] =\n", index);
538 for (i = 0; i < 4; i++) {
539 debug_printf(" %c: { %f, %f, %f, %f }\n",
540 "XYZW"[i],
541 tmp->xyzw[i].f[0],
542 tmp->xyzw[i].f[1],
543 tmp->xyzw[i].f[2],
544 tmp->xyzw[i].f[3]);
545 }
546 }
547 #endif
548
549
550 void
551 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
552 unsigned num_bufs,
553 const void **bufs,
554 const unsigned *buf_sizes)
555 {
556 unsigned i;
557
558 for (i = 0; i < num_bufs; i++) {
559 mach->Consts[i] = bufs[i];
560 mach->ConstsSize[i] = buf_sizes[i];
561 }
562 }
563
564
565 /**
566 * Check if there's a potential src/dst register data dependency when
567 * using SOA execution.
568 * Example:
569 * MOV T, T.yxwz;
570 * This would expand into:
571 * MOV t0, t1;
572 * MOV t1, t0;
573 * MOV t2, t3;
574 * MOV t3, t2;
575 * The second instruction will have the wrong value for t0 if executed as-is.
576 */
577 boolean
578 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
579 {
580 uint i, chan;
581
582 uint writemask = inst->Dst[0].Register.WriteMask;
583 if (writemask == TGSI_WRITEMASK_X ||
584 writemask == TGSI_WRITEMASK_Y ||
585 writemask == TGSI_WRITEMASK_Z ||
586 writemask == TGSI_WRITEMASK_W ||
587 writemask == TGSI_WRITEMASK_NONE) {
588 /* no chance of data dependency */
589 return FALSE;
590 }
591
592 /* loop over src regs */
593 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
594 if ((inst->Src[i].Register.File ==
595 inst->Dst[0].Register.File) &&
596 ((inst->Src[i].Register.Index ==
597 inst->Dst[0].Register.Index) ||
598 inst->Src[i].Register.Indirect ||
599 inst->Dst[0].Register.Indirect)) {
600 /* loop over dest channels */
601 uint channelsWritten = 0x0;
602 for (chan = 0; chan < NUM_CHANNELS; chan++) {
603 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
604 /* check if we're reading a channel that's been written */
605 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
606 if (channelsWritten & (1 << swizzle)) {
607 return TRUE;
608 }
609
610 channelsWritten |= (1 << chan);
611 }
612 }
613 }
614 }
615 return FALSE;
616 }
617
618
619 /**
620 * Initialize machine state by expanding tokens to full instructions,
621 * allocating temporary storage, setting up constants, etc.
622 * After this, we can call tgsi_exec_machine_run() many times.
623 */
624 void
625 tgsi_exec_machine_bind_shader(
626 struct tgsi_exec_machine *mach,
627 const struct tgsi_token *tokens,
628 uint numSamplers,
629 struct tgsi_sampler **samplers)
630 {
631 uint k;
632 struct tgsi_parse_context parse;
633 struct tgsi_full_instruction *instructions;
634 struct tgsi_full_declaration *declarations;
635 uint maxInstructions = 10, numInstructions = 0;
636 uint maxDeclarations = 10, numDeclarations = 0;
637
638 #if 0
639 tgsi_dump(tokens, 0);
640 #endif
641
642 util_init_math();
643
644 if (numSamplers) {
645 assert(samplers);
646 }
647
648 mach->Tokens = tokens;
649 mach->Samplers = samplers;
650
651 if (!tokens) {
652 /* unbind and free all */
653 if (mach->Declarations) {
654 FREE( mach->Declarations );
655 }
656 mach->Declarations = NULL;
657 mach->NumDeclarations = 0;
658
659 if (mach->Instructions) {
660 FREE( mach->Instructions );
661 }
662 mach->Instructions = NULL;
663 mach->NumInstructions = 0;
664
665 return;
666 }
667
668 k = tgsi_parse_init (&parse, mach->Tokens);
669 if (k != TGSI_PARSE_OK) {
670 debug_printf( "Problem parsing!\n" );
671 return;
672 }
673
674 mach->Processor = parse.FullHeader.Processor.Processor;
675 mach->ImmLimit = 0;
676
677 declarations = (struct tgsi_full_declaration *)
678 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
679
680 if (!declarations) {
681 return;
682 }
683
684 instructions = (struct tgsi_full_instruction *)
685 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
686
687 if (!instructions) {
688 FREE( declarations );
689 return;
690 }
691
692 while( !tgsi_parse_end_of_tokens( &parse ) ) {
693 uint i;
694
695 tgsi_parse_token( &parse );
696 switch( parse.FullToken.Token.Type ) {
697 case TGSI_TOKEN_TYPE_DECLARATION:
698 /* save expanded declaration */
699 if (numDeclarations == maxDeclarations) {
700 declarations = REALLOC(declarations,
701 maxDeclarations
702 * sizeof(struct tgsi_full_declaration),
703 (maxDeclarations + 10)
704 * sizeof(struct tgsi_full_declaration));
705 maxDeclarations += 10;
706 }
707 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
708 unsigned reg;
709 for (reg = parse.FullToken.FullDeclaration.Range.First;
710 reg <= parse.FullToken.FullDeclaration.Range.Last;
711 ++reg) {
712 ++mach->NumOutputs;
713 }
714 }
715 if (parse.FullToken.FullDeclaration.Declaration.File ==
716 TGSI_FILE_IMMEDIATE_ARRAY) {
717 unsigned reg;
718 struct tgsi_full_declaration *decl =
719 &parse.FullToken.FullDeclaration;
720 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES);
721 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) {
722 for( i = 0; i < 4; i++ ) {
723 int idx = reg * 4 + i;
724 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float;
725 }
726 }
727 }
728 memcpy(declarations + numDeclarations,
729 &parse.FullToken.FullDeclaration,
730 sizeof(declarations[0]));
731 numDeclarations++;
732 break;
733
734 case TGSI_TOKEN_TYPE_IMMEDIATE:
735 {
736 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
737 assert( size <= 4 );
738 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
739
740 for( i = 0; i < size; i++ ) {
741 mach->Imms[mach->ImmLimit][i] =
742 parse.FullToken.FullImmediate.u[i].Float;
743 }
744 mach->ImmLimit += 1;
745 }
746 break;
747
748 case TGSI_TOKEN_TYPE_INSTRUCTION:
749
750 /* save expanded instruction */
751 if (numInstructions == maxInstructions) {
752 instructions = REALLOC(instructions,
753 maxInstructions
754 * sizeof(struct tgsi_full_instruction),
755 (maxInstructions + 10)
756 * sizeof(struct tgsi_full_instruction));
757 maxInstructions += 10;
758 }
759
760 memcpy(instructions + numInstructions,
761 &parse.FullToken.FullInstruction,
762 sizeof(instructions[0]));
763
764 numInstructions++;
765 break;
766
767 case TGSI_TOKEN_TYPE_PROPERTY:
768 break;
769
770 default:
771 assert( 0 );
772 }
773 }
774 tgsi_parse_free (&parse);
775
776 if (mach->Declarations) {
777 FREE( mach->Declarations );
778 }
779 mach->Declarations = declarations;
780 mach->NumDeclarations = numDeclarations;
781
782 if (mach->Instructions) {
783 FREE( mach->Instructions );
784 }
785 mach->Instructions = instructions;
786 mach->NumInstructions = numInstructions;
787 }
788
789
790 struct tgsi_exec_machine *
791 tgsi_exec_machine_create( void )
792 {
793 struct tgsi_exec_machine *mach;
794 uint i;
795
796 mach = align_malloc( sizeof *mach, 16 );
797 if (!mach)
798 goto fail;
799
800 memset(mach, 0, sizeof(*mach));
801
802 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
803 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
804 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
805
806 /* Setup constants needed by the SSE2 executor. */
807 for( i = 0; i < 4; i++ ) {
808 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
809 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
810 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
811 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
812 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
813 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
814 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
815 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
816 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
817 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
818 }
819
820 #ifdef DEBUG
821 /* silence warnings */
822 (void) print_chan;
823 (void) print_temp;
824 #endif
825
826 return mach;
827
828 fail:
829 align_free(mach);
830 return NULL;
831 }
832
833
834 void
835 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
836 {
837 if (mach) {
838 if (mach->Instructions)
839 FREE(mach->Instructions);
840 if (mach->Declarations)
841 FREE(mach->Declarations);
842 }
843
844 align_free(mach);
845 }
846
847 static void
848 micro_add(union tgsi_exec_channel *dst,
849 const union tgsi_exec_channel *src0,
850 const union tgsi_exec_channel *src1)
851 {
852 dst->f[0] = src0->f[0] + src1->f[0];
853 dst->f[1] = src0->f[1] + src1->f[1];
854 dst->f[2] = src0->f[2] + src1->f[2];
855 dst->f[3] = src0->f[3] + src1->f[3];
856 }
857
858 static void
859 micro_div(
860 union tgsi_exec_channel *dst,
861 const union tgsi_exec_channel *src0,
862 const union tgsi_exec_channel *src1 )
863 {
864 if (src1->f[0] != 0) {
865 dst->f[0] = src0->f[0] / src1->f[0];
866 }
867 if (src1->f[1] != 0) {
868 dst->f[1] = src0->f[1] / src1->f[1];
869 }
870 if (src1->f[2] != 0) {
871 dst->f[2] = src0->f[2] / src1->f[2];
872 }
873 if (src1->f[3] != 0) {
874 dst->f[3] = src0->f[3] / src1->f[3];
875 }
876 }
877
878 static void
879 micro_rcc(union tgsi_exec_channel *dst,
880 const union tgsi_exec_channel *src)
881 {
882 uint i;
883
884 for (i = 0; i < 4; i++) {
885 float recip = 1.0f / src->f[i];
886
887 if (recip > 0.0f) {
888 if (recip > 1.884467e+019f) {
889 dst->f[i] = 1.884467e+019f;
890 }
891 else if (recip < 5.42101e-020f) {
892 dst->f[i] = 5.42101e-020f;
893 }
894 else {
895 dst->f[i] = recip;
896 }
897 }
898 else {
899 if (recip < -1.884467e+019f) {
900 dst->f[i] = -1.884467e+019f;
901 }
902 else if (recip > -5.42101e-020f) {
903 dst->f[i] = -5.42101e-020f;
904 }
905 else {
906 dst->f[i] = recip;
907 }
908 }
909 }
910 }
911
912 static void
913 micro_lt(
914 union tgsi_exec_channel *dst,
915 const union tgsi_exec_channel *src0,
916 const union tgsi_exec_channel *src1,
917 const union tgsi_exec_channel *src2,
918 const union tgsi_exec_channel *src3 )
919 {
920 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
921 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
922 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
923 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
924 }
925
926 static void
927 micro_max(union tgsi_exec_channel *dst,
928 const union tgsi_exec_channel *src0,
929 const union tgsi_exec_channel *src1)
930 {
931 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
932 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
933 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
934 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
935 }
936
937 static void
938 micro_min(union tgsi_exec_channel *dst,
939 const union tgsi_exec_channel *src0,
940 const union tgsi_exec_channel *src1)
941 {
942 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
943 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
944 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
945 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
946 }
947
948 static void
949 micro_mul(union tgsi_exec_channel *dst,
950 const union tgsi_exec_channel *src0,
951 const union tgsi_exec_channel *src1)
952 {
953 dst->f[0] = src0->f[0] * src1->f[0];
954 dst->f[1] = src0->f[1] * src1->f[1];
955 dst->f[2] = src0->f[2] * src1->f[2];
956 dst->f[3] = src0->f[3] * src1->f[3];
957 }
958
959 static void
960 micro_neg(
961 union tgsi_exec_channel *dst,
962 const union tgsi_exec_channel *src )
963 {
964 dst->f[0] = -src->f[0];
965 dst->f[1] = -src->f[1];
966 dst->f[2] = -src->f[2];
967 dst->f[3] = -src->f[3];
968 }
969
970 static void
971 micro_pow(
972 union tgsi_exec_channel *dst,
973 const union tgsi_exec_channel *src0,
974 const union tgsi_exec_channel *src1 )
975 {
976 #if FAST_MATH
977 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
978 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
979 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
980 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
981 #else
982 dst->f[0] = powf( src0->f[0], src1->f[0] );
983 dst->f[1] = powf( src0->f[1], src1->f[1] );
984 dst->f[2] = powf( src0->f[2], src1->f[2] );
985 dst->f[3] = powf( src0->f[3], src1->f[3] );
986 #endif
987 }
988
989 static void
990 micro_sub(union tgsi_exec_channel *dst,
991 const union tgsi_exec_channel *src0,
992 const union tgsi_exec_channel *src1)
993 {
994 dst->f[0] = src0->f[0] - src1->f[0];
995 dst->f[1] = src0->f[1] - src1->f[1];
996 dst->f[2] = src0->f[2] - src1->f[2];
997 dst->f[3] = src0->f[3] - src1->f[3];
998 }
999
1000 static void
1001 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1002 const uint file,
1003 const uint swizzle,
1004 const union tgsi_exec_channel *index,
1005 const union tgsi_exec_channel *index2D,
1006 union tgsi_exec_channel *chan)
1007 {
1008 uint i;
1009
1010 assert(swizzle < 4);
1011
1012 switch (file) {
1013 case TGSI_FILE_CONSTANT:
1014 for (i = 0; i < QUAD_SIZE; i++) {
1015 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1016 assert(mach->Consts[index2D->i[i]]);
1017
1018 if (index->i[i] < 0) {
1019 chan->u[i] = 0;
1020 } else {
1021 /* NOTE: copying the const value as a uint instead of float */
1022 const uint constbuf = index2D->i[i];
1023 const uint *buf = (const uint *)mach->Consts[constbuf];
1024 const int pos = index->i[i] * 4 + swizzle;
1025 /* const buffer bounds check */
1026 if (pos < 0 || pos >= mach->ConstsSize[constbuf]) {
1027 if (0) {
1028 /* Debug: print warning */
1029 static int count = 0;
1030 if (count++ < 100)
1031 debug_printf("TGSI Exec: const buffer index %d"
1032 " out of bounds\n", pos);
1033 }
1034 chan->u[i] = 0;
1035 }
1036 else
1037 chan->u[i] = buf[pos];
1038 }
1039 }
1040 break;
1041
1042 case TGSI_FILE_INPUT:
1043 case TGSI_FILE_SYSTEM_VALUE:
1044 for (i = 0; i < QUAD_SIZE; i++) {
1045 /*
1046 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1047 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1048 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1049 index2D->i[i], index->i[i]);
1050 }*/
1051 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1052 assert(pos >= 0);
1053 assert(pos < Elements(mach->Inputs));
1054 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1055 }
1056 break;
1057
1058 case TGSI_FILE_TEMPORARY:
1059 for (i = 0; i < QUAD_SIZE; i++) {
1060 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1061 assert(index2D->i[i] == 0);
1062
1063 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1064 }
1065 break;
1066
1067 case TGSI_FILE_TEMPORARY_ARRAY:
1068 for (i = 0; i < QUAD_SIZE; i++) {
1069 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1070 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS);
1071
1072 chan->u[i] =
1073 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i];
1074 }
1075 break;
1076
1077 case TGSI_FILE_IMMEDIATE:
1078 for (i = 0; i < QUAD_SIZE; i++) {
1079 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1080 assert(index2D->i[i] == 0);
1081
1082 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1083 }
1084 break;
1085
1086 case TGSI_FILE_IMMEDIATE_ARRAY:
1087 for (i = 0; i < QUAD_SIZE; i++) {
1088 assert(index2D->i[i] == 0);
1089
1090 chan->f[i] = mach->ImmArray[index->i[i]][swizzle];
1091 }
1092 break;
1093
1094 case TGSI_FILE_ADDRESS:
1095 for (i = 0; i < QUAD_SIZE; i++) {
1096 assert(index->i[i] >= 0);
1097 assert(index2D->i[i] == 0);
1098
1099 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1100 }
1101 break;
1102
1103 case TGSI_FILE_PREDICATE:
1104 for (i = 0; i < QUAD_SIZE; i++) {
1105 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1106 assert(index2D->i[i] == 0);
1107
1108 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1109 }
1110 break;
1111
1112 case TGSI_FILE_OUTPUT:
1113 /* vertex/fragment output vars can be read too */
1114 for (i = 0; i < QUAD_SIZE; i++) {
1115 assert(index->i[i] >= 0);
1116 assert(index2D->i[i] == 0);
1117
1118 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1119 }
1120 break;
1121
1122 default:
1123 assert(0);
1124 for (i = 0; i < QUAD_SIZE; i++) {
1125 chan->u[i] = 0;
1126 }
1127 }
1128 }
1129
1130 static void
1131 fetch_source(const struct tgsi_exec_machine *mach,
1132 union tgsi_exec_channel *chan,
1133 const struct tgsi_full_src_register *reg,
1134 const uint chan_index,
1135 enum tgsi_exec_datatype src_datatype)
1136 {
1137 union tgsi_exec_channel index;
1138 union tgsi_exec_channel index2D;
1139 uint swizzle;
1140
1141 /* We start with a direct index into a register file.
1142 *
1143 * file[1],
1144 * where:
1145 * file = Register.File
1146 * [1] = Register.Index
1147 */
1148 index.i[0] =
1149 index.i[1] =
1150 index.i[2] =
1151 index.i[3] = reg->Register.Index;
1152
1153 /* There is an extra source register that indirectly subscripts
1154 * a register file. The direct index now becomes an offset
1155 * that is being added to the indirect register.
1156 *
1157 * file[ind[2].x+1],
1158 * where:
1159 * ind = Indirect.File
1160 * [2] = Indirect.Index
1161 * .x = Indirect.SwizzleX
1162 */
1163 if (reg->Register.Indirect) {
1164 union tgsi_exec_channel index2;
1165 union tgsi_exec_channel indir_index;
1166 const uint execmask = mach->ExecMask;
1167 uint i;
1168
1169 /* which address register (always zero now) */
1170 index2.i[0] =
1171 index2.i[1] =
1172 index2.i[2] =
1173 index2.i[3] = reg->Indirect.Index;
1174 assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
1175 /* get current value of address register[swizzle] */
1176 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1177 fetch_src_file_channel(mach,
1178 reg->Indirect.File,
1179 swizzle,
1180 &index2,
1181 &ZeroVec,
1182 &indir_index);
1183
1184 /* add value of address register to the offset */
1185 index.i[0] += indir_index.i[0];
1186 index.i[1] += indir_index.i[1];
1187 index.i[2] += indir_index.i[2];
1188 index.i[3] += indir_index.i[3];
1189
1190 /* for disabled execution channels, zero-out the index to
1191 * avoid using a potential garbage value.
1192 */
1193 for (i = 0; i < QUAD_SIZE; i++) {
1194 if ((execmask & (1 << i)) == 0)
1195 index.i[i] = 0;
1196 }
1197 }
1198
1199 /* There is an extra source register that is a second
1200 * subscript to a register file. Effectively it means that
1201 * the register file is actually a 2D array of registers.
1202 *
1203 * file[3][1],
1204 * where:
1205 * [3] = Dimension.Index
1206 */
1207 if (reg->Register.Dimension) {
1208 index2D.i[0] =
1209 index2D.i[1] =
1210 index2D.i[2] =
1211 index2D.i[3] = reg->Dimension.Index;
1212
1213 /* Again, the second subscript index can be addressed indirectly
1214 * identically to the first one.
1215 * Nothing stops us from indirectly addressing the indirect register,
1216 * but there is no need for that, so we won't exercise it.
1217 *
1218 * file[ind[4].y+3][1],
1219 * where:
1220 * ind = DimIndirect.File
1221 * [4] = DimIndirect.Index
1222 * .y = DimIndirect.SwizzleX
1223 */
1224 if (reg->Dimension.Indirect) {
1225 union tgsi_exec_channel index2;
1226 union tgsi_exec_channel indir_index;
1227 const uint execmask = mach->ExecMask;
1228 uint i;
1229
1230 index2.i[0] =
1231 index2.i[1] =
1232 index2.i[2] =
1233 index2.i[3] = reg->DimIndirect.Index;
1234
1235 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1236 fetch_src_file_channel(mach,
1237 reg->DimIndirect.File,
1238 swizzle,
1239 &index2,
1240 &ZeroVec,
1241 &indir_index);
1242
1243 index2D.i[0] += indir_index.i[0];
1244 index2D.i[1] += indir_index.i[1];
1245 index2D.i[2] += indir_index.i[2];
1246 index2D.i[3] += indir_index.i[3];
1247
1248 /* for disabled execution channels, zero-out the index to
1249 * avoid using a potential garbage value.
1250 */
1251 for (i = 0; i < QUAD_SIZE; i++) {
1252 if ((execmask & (1 << i)) == 0) {
1253 index2D.i[i] = 0;
1254 }
1255 }
1256 }
1257
1258 /* If by any chance there was a need for a 3D array of register
1259 * files, we would have to check whether Dimension is followed
1260 * by a dimension register and continue the saga.
1261 */
1262 } else {
1263 index2D.i[0] =
1264 index2D.i[1] =
1265 index2D.i[2] =
1266 index2D.i[3] = 0;
1267 }
1268
1269 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1270 fetch_src_file_channel(mach,
1271 reg->Register.File,
1272 swizzle,
1273 &index,
1274 &index2D,
1275 chan);
1276
1277 if (reg->Register.Absolute) {
1278 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1279 micro_abs(chan, chan);
1280 } else {
1281 micro_iabs(chan, chan);
1282 }
1283 }
1284
1285 if (reg->Register.Negate) {
1286 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1287 micro_neg(chan, chan);
1288 } else {
1289 micro_ineg(chan, chan);
1290 }
1291 }
1292 }
1293
1294 static void
1295 store_dest(struct tgsi_exec_machine *mach,
1296 const union tgsi_exec_channel *chan,
1297 const struct tgsi_full_dst_register *reg,
1298 const struct tgsi_full_instruction *inst,
1299 uint chan_index,
1300 enum tgsi_exec_datatype dst_datatype)
1301 {
1302 uint i;
1303 union tgsi_exec_channel null;
1304 union tgsi_exec_channel *dst;
1305 union tgsi_exec_channel index2D;
1306 uint execmask = mach->ExecMask;
1307 int offset = 0; /* indirection offset */
1308 int index;
1309
1310 /* for debugging */
1311 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1312 check_inf_or_nan(chan);
1313 }
1314
1315 /* There is an extra source register that indirectly subscripts
1316 * a register file. The direct index now becomes an offset
1317 * that is being added to the indirect register.
1318 *
1319 * file[ind[2].x+1],
1320 * where:
1321 * ind = Indirect.File
1322 * [2] = Indirect.Index
1323 * .x = Indirect.SwizzleX
1324 */
1325 if (reg->Register.Indirect) {
1326 union tgsi_exec_channel index;
1327 union tgsi_exec_channel indir_index;
1328 uint swizzle;
1329
1330 /* which address register (always zero for now) */
1331 index.i[0] =
1332 index.i[1] =
1333 index.i[2] =
1334 index.i[3] = reg->Indirect.Index;
1335
1336 /* get current value of address register[swizzle] */
1337 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
1338
1339 /* fetch values from the address/indirection register */
1340 fetch_src_file_channel(mach,
1341 reg->Indirect.File,
1342 swizzle,
1343 &index,
1344 &ZeroVec,
1345 &indir_index);
1346
1347 /* save indirection offset */
1348 offset = indir_index.i[0];
1349 }
1350
1351 /* There is an extra source register that is a second
1352 * subscript to a register file. Effectively it means that
1353 * the register file is actually a 2D array of registers.
1354 *
1355 * file[3][1],
1356 * where:
1357 * [3] = Dimension.Index
1358 */
1359 if (reg->Register.Dimension) {
1360 index2D.i[0] =
1361 index2D.i[1] =
1362 index2D.i[2] =
1363 index2D.i[3] = reg->Dimension.Index;
1364
1365 /* Again, the second subscript index can be addressed indirectly
1366 * identically to the first one.
1367 * Nothing stops us from indirectly addressing the indirect register,
1368 * but there is no need for that, so we won't exercise it.
1369 *
1370 * file[ind[4].y+3][1],
1371 * where:
1372 * ind = DimIndirect.File
1373 * [4] = DimIndirect.Index
1374 * .y = DimIndirect.SwizzleX
1375 */
1376 if (reg->Dimension.Indirect) {
1377 union tgsi_exec_channel index2;
1378 union tgsi_exec_channel indir_index;
1379 const uint execmask = mach->ExecMask;
1380 unsigned swizzle;
1381 uint i;
1382
1383 index2.i[0] =
1384 index2.i[1] =
1385 index2.i[2] =
1386 index2.i[3] = reg->DimIndirect.Index;
1387
1388 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
1389 fetch_src_file_channel(mach,
1390 reg->DimIndirect.File,
1391 swizzle,
1392 &index2,
1393 &ZeroVec,
1394 &indir_index);
1395
1396 index2D.i[0] += indir_index.i[0];
1397 index2D.i[1] += indir_index.i[1];
1398 index2D.i[2] += indir_index.i[2];
1399 index2D.i[3] += indir_index.i[3];
1400
1401 /* for disabled execution channels, zero-out the index to
1402 * avoid using a potential garbage value.
1403 */
1404 for (i = 0; i < QUAD_SIZE; i++) {
1405 if ((execmask & (1 << i)) == 0) {
1406 index2D.i[i] = 0;
1407 }
1408 }
1409 }
1410
1411 /* If by any chance there was a need for a 3D array of register
1412 * files, we would have to check whether Dimension is followed
1413 * by a dimension register and continue the saga.
1414 */
1415 } else {
1416 index2D.i[0] =
1417 index2D.i[1] =
1418 index2D.i[2] =
1419 index2D.i[3] = 0;
1420 }
1421
1422 switch (reg->Register.File) {
1423 case TGSI_FILE_NULL:
1424 dst = &null;
1425 break;
1426
1427 case TGSI_FILE_OUTPUT:
1428 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1429 + reg->Register.Index;
1430 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1431 #if 0
1432 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1433 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1434 for (i = 0; i < QUAD_SIZE; i++)
1435 if (execmask & (1 << i))
1436 fprintf(stderr, "%f, ", chan->f[i]);
1437 fprintf(stderr, ")\n");
1438 }
1439 #endif
1440 break;
1441
1442 case TGSI_FILE_TEMPORARY:
1443 index = reg->Register.Index;
1444 assert( index < TGSI_EXEC_NUM_TEMPS );
1445 dst = &mach->Temps[offset + index].xyzw[chan_index];
1446 break;
1447
1448 case TGSI_FILE_TEMPORARY_ARRAY:
1449 index = reg->Register.Index;
1450 assert( index < TGSI_EXEC_NUM_TEMPS );
1451 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS );
1452 /* XXX we use index2D.i[0] here but somehow we might
1453 * end up with someone trying to store indirectly in
1454 * different buffers */
1455 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index];
1456 break;
1457
1458 case TGSI_FILE_ADDRESS:
1459 index = reg->Register.Index;
1460 dst = &mach->Addrs[index].xyzw[chan_index];
1461 break;
1462
1463 case TGSI_FILE_PREDICATE:
1464 index = reg->Register.Index;
1465 assert(index < TGSI_EXEC_NUM_PREDS);
1466 dst = &mach->Predicates[index].xyzw[chan_index];
1467 break;
1468
1469 default:
1470 assert( 0 );
1471 return;
1472 }
1473
1474 if (inst->Instruction.Predicate) {
1475 uint swizzle;
1476 union tgsi_exec_channel *pred;
1477
1478 switch (chan_index) {
1479 case CHAN_X:
1480 swizzle = inst->Predicate.SwizzleX;
1481 break;
1482 case CHAN_Y:
1483 swizzle = inst->Predicate.SwizzleY;
1484 break;
1485 case CHAN_Z:
1486 swizzle = inst->Predicate.SwizzleZ;
1487 break;
1488 case CHAN_W:
1489 swizzle = inst->Predicate.SwizzleW;
1490 break;
1491 default:
1492 assert(0);
1493 return;
1494 }
1495
1496 assert(inst->Predicate.Index == 0);
1497
1498 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1499
1500 if (inst->Predicate.Negate) {
1501 for (i = 0; i < QUAD_SIZE; i++) {
1502 if (pred->u[i]) {
1503 execmask &= ~(1 << i);
1504 }
1505 }
1506 } else {
1507 for (i = 0; i < QUAD_SIZE; i++) {
1508 if (!pred->u[i]) {
1509 execmask &= ~(1 << i);
1510 }
1511 }
1512 }
1513 }
1514
1515 switch (inst->Instruction.Saturate) {
1516 case TGSI_SAT_NONE:
1517 for (i = 0; i < QUAD_SIZE; i++)
1518 if (execmask & (1 << i))
1519 dst->i[i] = chan->i[i];
1520 break;
1521
1522 case TGSI_SAT_ZERO_ONE:
1523 for (i = 0; i < QUAD_SIZE; i++)
1524 if (execmask & (1 << i)) {
1525 if (chan->f[i] < 0.0f)
1526 dst->f[i] = 0.0f;
1527 else if (chan->f[i] > 1.0f)
1528 dst->f[i] = 1.0f;
1529 else
1530 dst->i[i] = chan->i[i];
1531 }
1532 break;
1533
1534 case TGSI_SAT_MINUS_PLUS_ONE:
1535 for (i = 0; i < QUAD_SIZE; i++)
1536 if (execmask & (1 << i)) {
1537 if (chan->f[i] < -1.0f)
1538 dst->f[i] = -1.0f;
1539 else if (chan->f[i] > 1.0f)
1540 dst->f[i] = 1.0f;
1541 else
1542 dst->i[i] = chan->i[i];
1543 }
1544 break;
1545
1546 default:
1547 assert( 0 );
1548 }
1549 }
1550
1551 #define FETCH(VAL,INDEX,CHAN)\
1552 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1553
1554
1555 /**
1556 * Execute ARB-style KIL which is predicated by a src register.
1557 * Kill fragment if any of the four values is less than zero.
1558 */
1559 static void
1560 exec_kil(struct tgsi_exec_machine *mach,
1561 const struct tgsi_full_instruction *inst)
1562 {
1563 uint uniquemask;
1564 uint chan_index;
1565 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1566 union tgsi_exec_channel r[1];
1567
1568 /* This mask stores component bits that were already tested. */
1569 uniquemask = 0;
1570
1571 for (chan_index = 0; chan_index < 4; chan_index++)
1572 {
1573 uint swizzle;
1574 uint i;
1575
1576 /* unswizzle channel */
1577 swizzle = tgsi_util_get_full_src_register_swizzle (
1578 &inst->Src[0],
1579 chan_index);
1580
1581 /* check if the component has not been already tested */
1582 if (uniquemask & (1 << swizzle))
1583 continue;
1584 uniquemask |= 1 << swizzle;
1585
1586 FETCH(&r[0], 0, chan_index);
1587 for (i = 0; i < 4; i++)
1588 if (r[0].f[i] < 0.0f)
1589 kilmask |= 1 << i;
1590 }
1591
1592 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1593 }
1594
1595 /**
1596 * Execute NVIDIA-style KIL which is predicated by a condition code.
1597 * Kill fragment if the condition code is TRUE.
1598 */
1599 static void
1600 exec_kilp(struct tgsi_exec_machine *mach,
1601 const struct tgsi_full_instruction *inst)
1602 {
1603 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1604
1605 /* "unconditional" kil */
1606 kilmask = mach->ExecMask;
1607 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1608 }
1609
1610 static void
1611 emit_vertex(struct tgsi_exec_machine *mach)
1612 {
1613 /* FIXME: check for exec mask correctly
1614 unsigned i;
1615 for (i = 0; i < QUAD_SIZE; ++i) {
1616 if ((mach->ExecMask & (1 << i)))
1617 */
1618 if (mach->ExecMask) {
1619 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1620 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1621 }
1622 }
1623
1624 static void
1625 emit_primitive(struct tgsi_exec_machine *mach)
1626 {
1627 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1628 /* FIXME: check for exec mask correctly
1629 unsigned i;
1630 for (i = 0; i < QUAD_SIZE; ++i) {
1631 if ((mach->ExecMask & (1 << i)))
1632 */
1633 if (mach->ExecMask) {
1634 ++(*prim_count);
1635 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1636 mach->Primitives[*prim_count] = 0;
1637 }
1638 }
1639
1640 static void
1641 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1642 {
1643 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1644 int emitted_verts =
1645 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1646 if (emitted_verts) {
1647 emit_primitive(mach);
1648 }
1649 }
1650 }
1651
1652
1653 /*
1654 * Fetch four texture samples using STR texture coordinates.
1655 */
1656 static void
1657 fetch_texel( struct tgsi_sampler *sampler,
1658 const union tgsi_exec_channel *s,
1659 const union tgsi_exec_channel *t,
1660 const union tgsi_exec_channel *p,
1661 const union tgsi_exec_channel *c0,
1662 enum tgsi_sampler_control control,
1663 union tgsi_exec_channel *r,
1664 union tgsi_exec_channel *g,
1665 union tgsi_exec_channel *b,
1666 union tgsi_exec_channel *a )
1667 {
1668 uint j;
1669 float rgba[NUM_CHANNELS][QUAD_SIZE];
1670
1671 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1672
1673 for (j = 0; j < 4; j++) {
1674 r->f[j] = rgba[0][j];
1675 g->f[j] = rgba[1][j];
1676 b->f[j] = rgba[2][j];
1677 a->f[j] = rgba[3][j];
1678 }
1679 }
1680
1681
1682 #define TEX_MODIFIER_NONE 0
1683 #define TEX_MODIFIER_PROJECTED 1
1684 #define TEX_MODIFIER_LOD_BIAS 2
1685 #define TEX_MODIFIER_EXPLICIT_LOD 3
1686
1687
1688 static void
1689 exec_tex(struct tgsi_exec_machine *mach,
1690 const struct tgsi_full_instruction *inst,
1691 uint modifier)
1692 {
1693 const uint unit = inst->Src[1].Register.Index;
1694 union tgsi_exec_channel r[4];
1695 const union tgsi_exec_channel *lod = &ZeroVec;
1696 enum tgsi_sampler_control control;
1697 uint chan;
1698
1699 if (modifier != TEX_MODIFIER_NONE) {
1700 FETCH(&r[3], 0, CHAN_W);
1701 if (modifier != TEX_MODIFIER_PROJECTED) {
1702 lod = &r[3];
1703 }
1704 }
1705
1706 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1707 control = tgsi_sampler_lod_explicit;
1708 } else {
1709 control = tgsi_sampler_lod_bias;
1710 }
1711
1712 switch (inst->Texture.Texture) {
1713 case TGSI_TEXTURE_1D:
1714 case TGSI_TEXTURE_SHADOW1D:
1715 FETCH(&r[0], 0, CHAN_X);
1716
1717 if (modifier == TEX_MODIFIER_PROJECTED) {
1718 micro_div(&r[0], &r[0], &r[3]);
1719 }
1720
1721 fetch_texel(mach->Samplers[unit],
1722 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1723 control,
1724 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1725 break;
1726
1727 case TGSI_TEXTURE_2D:
1728 case TGSI_TEXTURE_RECT:
1729 case TGSI_TEXTURE_SHADOW2D:
1730 case TGSI_TEXTURE_SHADOWRECT:
1731 FETCH(&r[0], 0, CHAN_X);
1732 FETCH(&r[1], 0, CHAN_Y);
1733 FETCH(&r[2], 0, CHAN_Z);
1734
1735 if (modifier == TEX_MODIFIER_PROJECTED) {
1736 micro_div(&r[0], &r[0], &r[3]);
1737 micro_div(&r[1], &r[1], &r[3]);
1738 micro_div(&r[2], &r[2], &r[3]);
1739 }
1740
1741 fetch_texel(mach->Samplers[unit],
1742 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1743 control,
1744 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1745 break;
1746
1747 case TGSI_TEXTURE_3D:
1748 case TGSI_TEXTURE_CUBE:
1749 FETCH(&r[0], 0, CHAN_X);
1750 FETCH(&r[1], 0, CHAN_Y);
1751 FETCH(&r[2], 0, CHAN_Z);
1752
1753 if (modifier == TEX_MODIFIER_PROJECTED) {
1754 micro_div(&r[0], &r[0], &r[3]);
1755 micro_div(&r[1], &r[1], &r[3]);
1756 micro_div(&r[2], &r[2], &r[3]);
1757 }
1758
1759 fetch_texel(mach->Samplers[unit],
1760 &r[0], &r[1], &r[2], lod,
1761 control,
1762 &r[0], &r[1], &r[2], &r[3]);
1763 break;
1764
1765 default:
1766 assert(0);
1767 }
1768
1769 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1770 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1771 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1772 }
1773 }
1774 }
1775
1776 static void
1777 exec_txd(struct tgsi_exec_machine *mach,
1778 const struct tgsi_full_instruction *inst)
1779 {
1780 const uint unit = inst->Src[3].Register.Index;
1781 union tgsi_exec_channel r[4];
1782 uint chan;
1783
1784 /*
1785 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1786 */
1787
1788 switch (inst->Texture.Texture) {
1789 case TGSI_TEXTURE_1D:
1790 case TGSI_TEXTURE_SHADOW1D:
1791
1792 FETCH(&r[0], 0, CHAN_X);
1793
1794 fetch_texel(mach->Samplers[unit],
1795 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1796 tgsi_sampler_lod_bias,
1797 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1798 break;
1799
1800 case TGSI_TEXTURE_2D:
1801 case TGSI_TEXTURE_RECT:
1802 case TGSI_TEXTURE_SHADOW2D:
1803 case TGSI_TEXTURE_SHADOWRECT:
1804
1805 FETCH(&r[0], 0, CHAN_X);
1806 FETCH(&r[1], 0, CHAN_Y);
1807 FETCH(&r[2], 0, CHAN_Z);
1808
1809 fetch_texel(mach->Samplers[unit],
1810 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1811 tgsi_sampler_lod_bias,
1812 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1813 break;
1814
1815 case TGSI_TEXTURE_3D:
1816 case TGSI_TEXTURE_CUBE:
1817
1818 FETCH(&r[0], 0, CHAN_X);
1819 FETCH(&r[1], 0, CHAN_Y);
1820 FETCH(&r[2], 0, CHAN_Z);
1821
1822 fetch_texel(mach->Samplers[unit],
1823 &r[0], &r[1], &r[2], &ZeroVec,
1824 tgsi_sampler_lod_bias,
1825 &r[0], &r[1], &r[2], &r[3]);
1826 break;
1827
1828 default:
1829 assert(0);
1830 }
1831
1832 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1833 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1834 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1835 }
1836 }
1837 }
1838
1839
1840 /**
1841 * Evaluate a constant-valued coefficient at the position of the
1842 * current quad.
1843 */
1844 static void
1845 eval_constant_coef(
1846 struct tgsi_exec_machine *mach,
1847 unsigned attrib,
1848 unsigned chan )
1849 {
1850 unsigned i;
1851
1852 for( i = 0; i < QUAD_SIZE; i++ ) {
1853 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1854 }
1855 }
1856
1857 /**
1858 * Evaluate a linear-valued coefficient at the position of the
1859 * current quad.
1860 */
1861 static void
1862 eval_linear_coef(
1863 struct tgsi_exec_machine *mach,
1864 unsigned attrib,
1865 unsigned chan )
1866 {
1867 const float x = mach->QuadPos.xyzw[0].f[0];
1868 const float y = mach->QuadPos.xyzw[1].f[0];
1869 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1870 const float dady = mach->InterpCoefs[attrib].dady[chan];
1871 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1872 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1873 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1874 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1875 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1876 }
1877
1878 /**
1879 * Evaluate a perspective-valued coefficient at the position of the
1880 * current quad.
1881 */
1882 static void
1883 eval_perspective_coef(
1884 struct tgsi_exec_machine *mach,
1885 unsigned attrib,
1886 unsigned chan )
1887 {
1888 const float x = mach->QuadPos.xyzw[0].f[0];
1889 const float y = mach->QuadPos.xyzw[1].f[0];
1890 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1891 const float dady = mach->InterpCoefs[attrib].dady[chan];
1892 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1893 const float *w = mach->QuadPos.xyzw[3].f;
1894 /* divide by W here */
1895 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1896 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1897 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1898 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1899 }
1900
1901
1902 typedef void (* eval_coef_func)(
1903 struct tgsi_exec_machine *mach,
1904 unsigned attrib,
1905 unsigned chan );
1906
1907 static void
1908 exec_declaration(struct tgsi_exec_machine *mach,
1909 const struct tgsi_full_declaration *decl)
1910 {
1911 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1912 if (decl->Declaration.File == TGSI_FILE_INPUT ||
1913 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1914 uint first, last, mask;
1915
1916 first = decl->Range.First;
1917 last = decl->Range.Last;
1918 mask = decl->Declaration.UsageMask;
1919
1920 /* XXX we could remove this special-case code since
1921 * mach->InterpCoefs[first].a0 should already have the
1922 * front/back-face value. But we should first update the
1923 * ureg code to emit the right UsageMask value (WRITEMASK_X).
1924 * Then, we could remove the tgsi_exec_machine::Face field.
1925 */
1926 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
1927 uint i;
1928
1929 assert(decl->Semantic.Index == 0);
1930 assert(first == last);
1931
1932 for (i = 0; i < QUAD_SIZE; i++) {
1933 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
1934 }
1935 } else {
1936 eval_coef_func eval;
1937 uint i, j;
1938
1939 switch (decl->Declaration.Interpolate) {
1940 case TGSI_INTERPOLATE_CONSTANT:
1941 eval = eval_constant_coef;
1942 break;
1943
1944 case TGSI_INTERPOLATE_LINEAR:
1945 eval = eval_linear_coef;
1946 break;
1947
1948 case TGSI_INTERPOLATE_PERSPECTIVE:
1949 eval = eval_perspective_coef;
1950 break;
1951
1952 default:
1953 assert(0);
1954 return;
1955 }
1956
1957 for (j = 0; j < NUM_CHANNELS; j++) {
1958 if (mask & (1 << j)) {
1959 for (i = first; i <= last; i++) {
1960 eval(mach, i, j);
1961 }
1962 }
1963 }
1964 }
1965 }
1966 }
1967 }
1968
1969 typedef void (* micro_op)(union tgsi_exec_channel *dst);
1970
1971 static void
1972 exec_vector(struct tgsi_exec_machine *mach,
1973 const struct tgsi_full_instruction *inst,
1974 micro_op op,
1975 enum tgsi_exec_datatype dst_datatype)
1976 {
1977 unsigned int chan;
1978
1979 for (chan = 0; chan < NUM_CHANNELS; chan++) {
1980 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1981 union tgsi_exec_channel dst;
1982
1983 op(&dst);
1984 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
1985 }
1986 }
1987 }
1988
1989 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
1990 const union tgsi_exec_channel *src);
1991
1992 static void
1993 exec_scalar_unary(struct tgsi_exec_machine *mach,
1994 const struct tgsi_full_instruction *inst,
1995 micro_unary_op op,
1996 enum tgsi_exec_datatype dst_datatype,
1997 enum tgsi_exec_datatype src_datatype)
1998 {
1999 unsigned int chan;
2000 union tgsi_exec_channel src;
2001 union tgsi_exec_channel dst;
2002
2003 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype);
2004 op(&dst, &src);
2005 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2006 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2007 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2008 }
2009 }
2010 }
2011
2012 static void
2013 exec_vector_unary(struct tgsi_exec_machine *mach,
2014 const struct tgsi_full_instruction *inst,
2015 micro_unary_op op,
2016 enum tgsi_exec_datatype dst_datatype,
2017 enum tgsi_exec_datatype src_datatype)
2018 {
2019 unsigned int chan;
2020 struct tgsi_exec_vector dst;
2021
2022 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2023 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2024 union tgsi_exec_channel src;
2025
2026 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2027 op(&dst.xyzw[chan], &src);
2028 }
2029 }
2030 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2031 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2032 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2033 }
2034 }
2035 }
2036
2037 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2038 const union tgsi_exec_channel *src0,
2039 const union tgsi_exec_channel *src1);
2040
2041 static void
2042 exec_scalar_binary(struct tgsi_exec_machine *mach,
2043 const struct tgsi_full_instruction *inst,
2044 micro_binary_op op,
2045 enum tgsi_exec_datatype dst_datatype,
2046 enum tgsi_exec_datatype src_datatype)
2047 {
2048 unsigned int chan;
2049 union tgsi_exec_channel src[2];
2050 union tgsi_exec_channel dst;
2051
2052 fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype);
2053 fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype);
2054 op(&dst, &src[0], &src[1]);
2055 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2056 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2057 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2058 }
2059 }
2060 }
2061
2062 static void
2063 exec_vector_binary(struct tgsi_exec_machine *mach,
2064 const struct tgsi_full_instruction *inst,
2065 micro_binary_op op,
2066 enum tgsi_exec_datatype dst_datatype,
2067 enum tgsi_exec_datatype src_datatype)
2068 {
2069 unsigned int chan;
2070 struct tgsi_exec_vector dst;
2071
2072 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2073 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2074 union tgsi_exec_channel src[2];
2075
2076 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2077 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2078 op(&dst.xyzw[chan], &src[0], &src[1]);
2079 }
2080 }
2081 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2082 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2083 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2084 }
2085 }
2086 }
2087
2088 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2089 const union tgsi_exec_channel *src0,
2090 const union tgsi_exec_channel *src1,
2091 const union tgsi_exec_channel *src2);
2092
2093 static void
2094 exec_vector_trinary(struct tgsi_exec_machine *mach,
2095 const struct tgsi_full_instruction *inst,
2096 micro_trinary_op op,
2097 enum tgsi_exec_datatype dst_datatype,
2098 enum tgsi_exec_datatype src_datatype)
2099 {
2100 unsigned int chan;
2101 struct tgsi_exec_vector dst;
2102
2103 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2104 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2105 union tgsi_exec_channel src[3];
2106
2107 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2108 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2109 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2110 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2111 }
2112 }
2113 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2114 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2115 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2116 }
2117 }
2118 }
2119
2120 static void
2121 exec_dp3(struct tgsi_exec_machine *mach,
2122 const struct tgsi_full_instruction *inst)
2123 {
2124 unsigned int chan;
2125 union tgsi_exec_channel arg[3];
2126
2127 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2128 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2129 micro_mul(&arg[2], &arg[0], &arg[1]);
2130
2131 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
2132 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2133 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2134 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2135 }
2136
2137 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2138 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2139 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2140 }
2141 }
2142 }
2143
2144 static void
2145 exec_dp4(struct tgsi_exec_machine *mach,
2146 const struct tgsi_full_instruction *inst)
2147 {
2148 unsigned int chan;
2149 union tgsi_exec_channel arg[3];
2150
2151 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2152 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2153 micro_mul(&arg[2], &arg[0], &arg[1]);
2154
2155 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
2156 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2157 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2158 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2159 }
2160
2161 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2162 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2163 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2164 }
2165 }
2166 }
2167
2168 static void
2169 exec_dp2a(struct tgsi_exec_machine *mach,
2170 const struct tgsi_full_instruction *inst)
2171 {
2172 unsigned int chan;
2173 union tgsi_exec_channel arg[3];
2174
2175 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2176 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2177 micro_mul(&arg[2], &arg[0], &arg[1]);
2178
2179 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2180 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2181 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2182
2183 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2184 micro_add(&arg[0], &arg[0], &arg[1]);
2185
2186 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2187 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2188 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2189 }
2190 }
2191 }
2192
2193 static void
2194 exec_dph(struct tgsi_exec_machine *mach,
2195 const struct tgsi_full_instruction *inst)
2196 {
2197 unsigned int chan;
2198 union tgsi_exec_channel arg[3];
2199
2200 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2201 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2202 micro_mul(&arg[2], &arg[0], &arg[1]);
2203
2204 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2205 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2206 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2207
2208 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2209 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2210 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2211
2212 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2213 micro_add(&arg[0], &arg[0], &arg[1]);
2214
2215 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2216 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2217 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2218 }
2219 }
2220 }
2221
2222 static void
2223 exec_dp2(struct tgsi_exec_machine *mach,
2224 const struct tgsi_full_instruction *inst)
2225 {
2226 unsigned int chan;
2227 union tgsi_exec_channel arg[3];
2228
2229 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2230 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2231 micro_mul(&arg[2], &arg[0], &arg[1]);
2232
2233 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2234 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2235 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2236
2237 for (chan = 0; chan < NUM_CHANNELS; chan++) {
2238 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2239 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2240 }
2241 }
2242 }
2243
2244 static void
2245 exec_nrm4(struct tgsi_exec_machine *mach,
2246 const struct tgsi_full_instruction *inst)
2247 {
2248 unsigned int chan;
2249 union tgsi_exec_channel arg[4];
2250 union tgsi_exec_channel scale;
2251
2252 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2253 micro_mul(&scale, &arg[0], &arg[0]);
2254
2255 for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
2256 union tgsi_exec_channel product;
2257
2258 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2259 micro_mul(&product, &arg[chan], &arg[chan]);
2260 micro_add(&scale, &scale, &product);
2261 }
2262
2263 micro_rsq(&scale, &scale);
2264
2265 for (chan = CHAN_X; chan <= CHAN_W; chan++) {
2266 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2267 micro_mul(&arg[chan], &arg[chan], &scale);
2268 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2269 }
2270 }
2271 }
2272
2273 static void
2274 exec_nrm3(struct tgsi_exec_machine *mach,
2275 const struct tgsi_full_instruction *inst)
2276 {
2277 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2278 unsigned int chan;
2279 union tgsi_exec_channel arg[3];
2280 union tgsi_exec_channel scale;
2281
2282 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2283 micro_mul(&scale, &arg[0], &arg[0]);
2284
2285 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
2286 union tgsi_exec_channel product;
2287
2288 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2289 micro_mul(&product, &arg[chan], &arg[chan]);
2290 micro_add(&scale, &scale, &product);
2291 }
2292
2293 micro_rsq(&scale, &scale);
2294
2295 for (chan = CHAN_X; chan <= CHAN_Z; chan++) {
2296 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2297 micro_mul(&arg[chan], &arg[chan], &scale);
2298 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2299 }
2300 }
2301 }
2302
2303 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2304 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2305 }
2306 }
2307
2308 static void
2309 exec_scs(struct tgsi_exec_machine *mach,
2310 const struct tgsi_full_instruction *inst)
2311 {
2312 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2313 union tgsi_exec_channel arg;
2314 union tgsi_exec_channel result;
2315
2316 fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2317
2318 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2319 micro_cos(&result, &arg);
2320 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2321 }
2322 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2323 micro_sin(&result, &arg);
2324 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2325 }
2326 }
2327 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2328 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2329 }
2330 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2331 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2332 }
2333 }
2334
2335 static void
2336 exec_x2d(struct tgsi_exec_machine *mach,
2337 const struct tgsi_full_instruction *inst)
2338 {
2339 union tgsi_exec_channel r[4];
2340 union tgsi_exec_channel d[2];
2341
2342 fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2343 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2344 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2345 fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2346 micro_mul(&r[2], &r[2], &r[0]);
2347 fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2348 micro_mul(&r[3], &r[3], &r[1]);
2349 micro_add(&r[2], &r[2], &r[3]);
2350 fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2351 micro_add(&d[0], &r[2], &r[3]);
2352 }
2353 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2354 fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2355 micro_mul(&r[2], &r[2], &r[0]);
2356 fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2357 micro_mul(&r[3], &r[3], &r[1]);
2358 micro_add(&r[2], &r[2], &r[3]);
2359 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2360 micro_add(&d[1], &r[2], &r[3]);
2361 }
2362 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2363 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2364 }
2365 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2366 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2367 }
2368 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2369 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2370 }
2371 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2372 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2373 }
2374 }
2375
2376 static void
2377 exec_rfl(struct tgsi_exec_machine *mach,
2378 const struct tgsi_full_instruction *inst)
2379 {
2380 union tgsi_exec_channel r[9];
2381
2382 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2383 /* r0 = dp3(src0, src0) */
2384 fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2385 micro_mul(&r[0], &r[2], &r[2]);
2386 fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2387 micro_mul(&r[8], &r[4], &r[4]);
2388 micro_add(&r[0], &r[0], &r[8]);
2389 fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2390 micro_mul(&r[8], &r[6], &r[6]);
2391 micro_add(&r[0], &r[0], &r[8]);
2392
2393 /* r1 = dp3(src0, src1) */
2394 fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2395 micro_mul(&r[1], &r[2], &r[3]);
2396 fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2397 micro_mul(&r[8], &r[4], &r[5]);
2398 micro_add(&r[1], &r[1], &r[8]);
2399 fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2400 micro_mul(&r[8], &r[6], &r[7]);
2401 micro_add(&r[1], &r[1], &r[8]);
2402
2403 /* r1 = 2 * r1 / r0 */
2404 micro_add(&r[1], &r[1], &r[1]);
2405 micro_div(&r[1], &r[1], &r[0]);
2406
2407 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2408 micro_mul(&r[2], &r[2], &r[1]);
2409 micro_sub(&r[2], &r[2], &r[3]);
2410 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2411 }
2412 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2413 micro_mul(&r[4], &r[4], &r[1]);
2414 micro_sub(&r[4], &r[4], &r[5]);
2415 store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2416 }
2417 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2418 micro_mul(&r[6], &r[6], &r[1]);
2419 micro_sub(&r[6], &r[6], &r[7]);
2420 store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2421 }
2422 }
2423 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2424 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2425 }
2426 }
2427
2428 static void
2429 exec_xpd(struct tgsi_exec_machine *mach,
2430 const struct tgsi_full_instruction *inst)
2431 {
2432 union tgsi_exec_channel r[6];
2433 union tgsi_exec_channel d[3];
2434
2435 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2436 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2437
2438 micro_mul(&r[2], &r[0], &r[1]);
2439
2440 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2441 fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2442
2443 micro_mul(&r[5], &r[3], &r[4] );
2444 micro_sub(&d[CHAN_X], &r[2], &r[5]);
2445
2446 fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2447
2448 micro_mul(&r[3], &r[3], &r[2]);
2449
2450 fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2451
2452 micro_mul(&r[1], &r[1], &r[5]);
2453 micro_sub(&d[CHAN_Y], &r[3], &r[1]);
2454
2455 micro_mul(&r[5], &r[5], &r[4]);
2456 micro_mul(&r[0], &r[0], &r[2]);
2457 micro_sub(&d[CHAN_Z], &r[5], &r[0]);
2458
2459 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2460 store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2461 }
2462 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2463 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2464 }
2465 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2466 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2467 }
2468 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2469 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2470 }
2471 }
2472
2473 static void
2474 exec_dst(struct tgsi_exec_machine *mach,
2475 const struct tgsi_full_instruction *inst)
2476 {
2477 union tgsi_exec_channel r[2];
2478 union tgsi_exec_channel d[4];
2479
2480 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2481 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2482 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2483 micro_mul(&d[CHAN_Y], &r[0], &r[1]);
2484 }
2485 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2486 fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2487 }
2488 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2489 fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2490 }
2491
2492 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2493 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2494 }
2495 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2496 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2497 }
2498 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2499 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2500 }
2501 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2502 store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2503 }
2504 }
2505
2506 static void
2507 exec_log(struct tgsi_exec_machine *mach,
2508 const struct tgsi_full_instruction *inst)
2509 {
2510 union tgsi_exec_channel r[3];
2511
2512 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2513 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
2514 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
2515 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
2516 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2517 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2518 }
2519 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2520 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
2521 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
2522 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2523 }
2524 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2525 store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2526 }
2527 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2528 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2529 }
2530 }
2531
2532 static void
2533 exec_exp(struct tgsi_exec_machine *mach,
2534 const struct tgsi_full_instruction *inst)
2535 {
2536 union tgsi_exec_channel r[3];
2537
2538 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2539 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
2540 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2541 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
2542 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2543 }
2544 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2545 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
2546 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2547 }
2548 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2549 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
2550 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2551 }
2552 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2553 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2554 }
2555 }
2556
2557 static void
2558 exec_lit(struct tgsi_exec_machine *mach,
2559 const struct tgsi_full_instruction *inst)
2560 {
2561 union tgsi_exec_channel r[3];
2562 union tgsi_exec_channel d[3];
2563
2564 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2565 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
2566 }
2567 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
2568 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
2569 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2570 micro_max(&d[CHAN_Y], &r[0], &ZeroVec);
2571 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2572 }
2573
2574 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2575 fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2576 micro_max(&r[1], &r[1], &ZeroVec);
2577
2578 fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT);
2579 micro_min(&r[2], &r[2], &P128Vec);
2580 micro_max(&r[2], &r[2], &M128Vec);
2581 micro_pow(&r[1], &r[1], &r[2]);
2582 micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
2583 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2584 }
2585 }
2586 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2587 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
2588 }
2589 }
2590
2591 static void
2592 exec_break(struct tgsi_exec_machine *mach)
2593 {
2594 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
2595 /* turn off loop channels for each enabled exec channel */
2596 mach->LoopMask &= ~mach->ExecMask;
2597 /* Todo: if mach->LoopMask == 0, jump to end of loop */
2598 UPDATE_EXEC_MASK(mach);
2599 } else {
2600 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
2601
2602 mach->Switch.mask = 0x0;
2603
2604 UPDATE_EXEC_MASK(mach);
2605 }
2606 }
2607
2608 static void
2609 exec_switch(struct tgsi_exec_machine *mach,
2610 const struct tgsi_full_instruction *inst)
2611 {
2612 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
2613 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
2614
2615 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
2616 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2617 mach->Switch.mask = 0x0;
2618 mach->Switch.defaultMask = 0x0;
2619
2620 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
2621 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
2622
2623 UPDATE_EXEC_MASK(mach);
2624 }
2625
2626 static void
2627 exec_case(struct tgsi_exec_machine *mach,
2628 const struct tgsi_full_instruction *inst)
2629 {
2630 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2631 union tgsi_exec_channel src;
2632 uint mask = 0;
2633
2634 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT);
2635
2636 if (mach->Switch.selector.u[0] == src.u[0]) {
2637 mask |= 0x1;
2638 }
2639 if (mach->Switch.selector.u[1] == src.u[1]) {
2640 mask |= 0x2;
2641 }
2642 if (mach->Switch.selector.u[2] == src.u[2]) {
2643 mask |= 0x4;
2644 }
2645 if (mach->Switch.selector.u[3] == src.u[3]) {
2646 mask |= 0x8;
2647 }
2648
2649 mach->Switch.defaultMask |= mask;
2650
2651 mach->Switch.mask |= mask & prevMask;
2652
2653 UPDATE_EXEC_MASK(mach);
2654 }
2655
2656 static void
2657 exec_default(struct tgsi_exec_machine *mach)
2658 {
2659 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
2660
2661 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
2662
2663 UPDATE_EXEC_MASK(mach);
2664 }
2665
2666 static void
2667 exec_endswitch(struct tgsi_exec_machine *mach)
2668 {
2669 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
2670 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
2671
2672 UPDATE_EXEC_MASK(mach);
2673 }
2674
2675 static void
2676 micro_i2f(union tgsi_exec_channel *dst,
2677 const union tgsi_exec_channel *src)
2678 {
2679 dst->f[0] = (float)src->i[0];
2680 dst->f[1] = (float)src->i[1];
2681 dst->f[2] = (float)src->i[2];
2682 dst->f[3] = (float)src->i[3];
2683 }
2684
2685 static void
2686 micro_not(union tgsi_exec_channel *dst,
2687 const union tgsi_exec_channel *src)
2688 {
2689 dst->u[0] = ~src->u[0];
2690 dst->u[1] = ~src->u[1];
2691 dst->u[2] = ~src->u[2];
2692 dst->u[3] = ~src->u[3];
2693 }
2694
2695 static void
2696 micro_shl(union tgsi_exec_channel *dst,
2697 const union tgsi_exec_channel *src0,
2698 const union tgsi_exec_channel *src1)
2699 {
2700 dst->u[0] = src0->u[0] << src1->u[0];
2701 dst->u[1] = src0->u[1] << src1->u[1];
2702 dst->u[2] = src0->u[2] << src1->u[2];
2703 dst->u[3] = src0->u[3] << src1->u[3];
2704 }
2705
2706 static void
2707 micro_and(union tgsi_exec_channel *dst,
2708 const union tgsi_exec_channel *src0,
2709 const union tgsi_exec_channel *src1)
2710 {
2711 dst->u[0] = src0->u[0] & src1->u[0];
2712 dst->u[1] = src0->u[1] & src1->u[1];
2713 dst->u[2] = src0->u[2] & src1->u[2];
2714 dst->u[3] = src0->u[3] & src1->u[3];
2715 }
2716
2717 static void
2718 micro_or(union tgsi_exec_channel *dst,
2719 const union tgsi_exec_channel *src0,
2720 const union tgsi_exec_channel *src1)
2721 {
2722 dst->u[0] = src0->u[0] | src1->u[0];
2723 dst->u[1] = src0->u[1] | src1->u[1];
2724 dst->u[2] = src0->u[2] | src1->u[2];
2725 dst->u[3] = src0->u[3] | src1->u[3];
2726 }
2727
2728 static void
2729 micro_xor(union tgsi_exec_channel *dst,
2730 const union tgsi_exec_channel *src0,
2731 const union tgsi_exec_channel *src1)
2732 {
2733 dst->u[0] = src0->u[0] ^ src1->u[0];
2734 dst->u[1] = src0->u[1] ^ src1->u[1];
2735 dst->u[2] = src0->u[2] ^ src1->u[2];
2736 dst->u[3] = src0->u[3] ^ src1->u[3];
2737 }
2738
2739 static void
2740 micro_f2i(union tgsi_exec_channel *dst,
2741 const union tgsi_exec_channel *src)
2742 {
2743 dst->i[0] = (int)src->f[0];
2744 dst->i[1] = (int)src->f[1];
2745 dst->i[2] = (int)src->f[2];
2746 dst->i[3] = (int)src->f[3];
2747 }
2748
2749 static void
2750 micro_idiv(union tgsi_exec_channel *dst,
2751 const union tgsi_exec_channel *src0,
2752 const union tgsi_exec_channel *src1)
2753 {
2754 dst->i[0] = src0->i[0] / src1->i[0];
2755 dst->i[1] = src0->i[1] / src1->i[1];
2756 dst->i[2] = src0->i[2] / src1->i[2];
2757 dst->i[3] = src0->i[3] / src1->i[3];
2758 }
2759
2760 static void
2761 micro_imax(union tgsi_exec_channel *dst,
2762 const union tgsi_exec_channel *src0,
2763 const union tgsi_exec_channel *src1)
2764 {
2765 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
2766 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
2767 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
2768 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
2769 }
2770
2771 static void
2772 micro_imin(union tgsi_exec_channel *dst,
2773 const union tgsi_exec_channel *src0,
2774 const union tgsi_exec_channel *src1)
2775 {
2776 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
2777 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
2778 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
2779 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
2780 }
2781
2782 static void
2783 micro_isge(union tgsi_exec_channel *dst,
2784 const union tgsi_exec_channel *src0,
2785 const union tgsi_exec_channel *src1)
2786 {
2787 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
2788 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
2789 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
2790 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
2791 }
2792
2793 static void
2794 micro_ishr(union tgsi_exec_channel *dst,
2795 const union tgsi_exec_channel *src0,
2796 const union tgsi_exec_channel *src1)
2797 {
2798 dst->i[0] = src0->i[0] >> src1->i[0];
2799 dst->i[1] = src0->i[1] >> src1->i[1];
2800 dst->i[2] = src0->i[2] >> src1->i[2];
2801 dst->i[3] = src0->i[3] >> src1->i[3];
2802 }
2803
2804 static void
2805 micro_islt(union tgsi_exec_channel *dst,
2806 const union tgsi_exec_channel *src0,
2807 const union tgsi_exec_channel *src1)
2808 {
2809 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
2810 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
2811 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
2812 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
2813 }
2814
2815 static void
2816 micro_f2u(union tgsi_exec_channel *dst,
2817 const union tgsi_exec_channel *src)
2818 {
2819 dst->u[0] = (uint)src->f[0];
2820 dst->u[1] = (uint)src->f[1];
2821 dst->u[2] = (uint)src->f[2];
2822 dst->u[3] = (uint)src->f[3];
2823 }
2824
2825 static void
2826 micro_u2f(union tgsi_exec_channel *dst,
2827 const union tgsi_exec_channel *src)
2828 {
2829 dst->f[0] = (float)src->u[0];
2830 dst->f[1] = (float)src->u[1];
2831 dst->f[2] = (float)src->u[2];
2832 dst->f[3] = (float)src->u[3];
2833 }
2834
2835 static void
2836 micro_uadd(union tgsi_exec_channel *dst,
2837 const union tgsi_exec_channel *src0,
2838 const union tgsi_exec_channel *src1)
2839 {
2840 dst->u[0] = src0->u[0] + src1->u[0];
2841 dst->u[1] = src0->u[1] + src1->u[1];
2842 dst->u[2] = src0->u[2] + src1->u[2];
2843 dst->u[3] = src0->u[3] + src1->u[3];
2844 }
2845
2846 static void
2847 micro_udiv(union tgsi_exec_channel *dst,
2848 const union tgsi_exec_channel *src0,
2849 const union tgsi_exec_channel *src1)
2850 {
2851 dst->u[0] = src0->u[0] / src1->u[0];
2852 dst->u[1] = src0->u[1] / src1->u[1];
2853 dst->u[2] = src0->u[2] / src1->u[2];
2854 dst->u[3] = src0->u[3] / src1->u[3];
2855 }
2856
2857 static void
2858 micro_umad(union tgsi_exec_channel *dst,
2859 const union tgsi_exec_channel *src0,
2860 const union tgsi_exec_channel *src1,
2861 const union tgsi_exec_channel *src2)
2862 {
2863 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
2864 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
2865 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
2866 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
2867 }
2868
2869 static void
2870 micro_umax(union tgsi_exec_channel *dst,
2871 const union tgsi_exec_channel *src0,
2872 const union tgsi_exec_channel *src1)
2873 {
2874 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
2875 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
2876 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
2877 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
2878 }
2879
2880 static void
2881 micro_umin(union tgsi_exec_channel *dst,
2882 const union tgsi_exec_channel *src0,
2883 const union tgsi_exec_channel *src1)
2884 {
2885 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
2886 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
2887 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
2888 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
2889 }
2890
2891 static void
2892 micro_umod(union tgsi_exec_channel *dst,
2893 const union tgsi_exec_channel *src0,
2894 const union tgsi_exec_channel *src1)
2895 {
2896 dst->u[0] = src0->u[0] % src1->u[0];
2897 dst->u[1] = src0->u[1] % src1->u[1];
2898 dst->u[2] = src0->u[2] % src1->u[2];
2899 dst->u[3] = src0->u[3] % src1->u[3];
2900 }
2901
2902 static void
2903 micro_umul(union tgsi_exec_channel *dst,
2904 const union tgsi_exec_channel *src0,
2905 const union tgsi_exec_channel *src1)
2906 {
2907 dst->u[0] = src0->u[0] * src1->u[0];
2908 dst->u[1] = src0->u[1] * src1->u[1];
2909 dst->u[2] = src0->u[2] * src1->u[2];
2910 dst->u[3] = src0->u[3] * src1->u[3];
2911 }
2912
2913 static void
2914 micro_useq(union tgsi_exec_channel *dst,
2915 const union tgsi_exec_channel *src0,
2916 const union tgsi_exec_channel *src1)
2917 {
2918 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
2919 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
2920 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
2921 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
2922 }
2923
2924 static void
2925 micro_usge(union tgsi_exec_channel *dst,
2926 const union tgsi_exec_channel *src0,
2927 const union tgsi_exec_channel *src1)
2928 {
2929 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
2930 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
2931 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
2932 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
2933 }
2934
2935 static void
2936 micro_ushr(union tgsi_exec_channel *dst,
2937 const union tgsi_exec_channel *src0,
2938 const union tgsi_exec_channel *src1)
2939 {
2940 dst->u[0] = src0->u[0] >> src1->u[0];
2941 dst->u[1] = src0->u[1] >> src1->u[1];
2942 dst->u[2] = src0->u[2] >> src1->u[2];
2943 dst->u[3] = src0->u[3] >> src1->u[3];
2944 }
2945
2946 static void
2947 micro_uslt(union tgsi_exec_channel *dst,
2948 const union tgsi_exec_channel *src0,
2949 const union tgsi_exec_channel *src1)
2950 {
2951 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
2952 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
2953 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
2954 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
2955 }
2956
2957 static void
2958 micro_usne(union tgsi_exec_channel *dst,
2959 const union tgsi_exec_channel *src0,
2960 const union tgsi_exec_channel *src1)
2961 {
2962 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
2963 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
2964 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
2965 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
2966 }
2967
2968 static void
2969 exec_instruction(
2970 struct tgsi_exec_machine *mach,
2971 const struct tgsi_full_instruction *inst,
2972 int *pc )
2973 {
2974 union tgsi_exec_channel r[10];
2975
2976 (*pc)++;
2977
2978 switch (inst->Instruction.Opcode) {
2979 case TGSI_OPCODE_ARL:
2980 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
2981 break;
2982
2983 case TGSI_OPCODE_MOV:
2984 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
2985 break;
2986
2987 case TGSI_OPCODE_LIT:
2988 exec_lit(mach, inst);
2989 break;
2990
2991 case TGSI_OPCODE_RCP:
2992 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2993 break;
2994
2995 case TGSI_OPCODE_RSQ:
2996 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
2997 break;
2998
2999 case TGSI_OPCODE_EXP:
3000 exec_exp(mach, inst);
3001 break;
3002
3003 case TGSI_OPCODE_LOG:
3004 exec_log(mach, inst);
3005 break;
3006
3007 case TGSI_OPCODE_MUL:
3008 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3009 break;
3010
3011 case TGSI_OPCODE_ADD:
3012 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3013 break;
3014
3015 case TGSI_OPCODE_DP3:
3016 exec_dp3(mach, inst);
3017 break;
3018
3019 case TGSI_OPCODE_DP4:
3020 exec_dp4(mach, inst);
3021 break;
3022
3023 case TGSI_OPCODE_DST:
3024 exec_dst(mach, inst);
3025 break;
3026
3027 case TGSI_OPCODE_MIN:
3028 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3029 break;
3030
3031 case TGSI_OPCODE_MAX:
3032 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3033 break;
3034
3035 case TGSI_OPCODE_SLT:
3036 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3037 break;
3038
3039 case TGSI_OPCODE_SGE:
3040 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3041 break;
3042
3043 case TGSI_OPCODE_MAD:
3044 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3045 break;
3046
3047 case TGSI_OPCODE_SUB:
3048 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3049 break;
3050
3051 case TGSI_OPCODE_LRP:
3052 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3053 break;
3054
3055 case TGSI_OPCODE_CND:
3056 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3057 break;
3058
3059 case TGSI_OPCODE_DP2A:
3060 exec_dp2a(mach, inst);
3061 break;
3062
3063 case TGSI_OPCODE_FRC:
3064 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3065 break;
3066
3067 case TGSI_OPCODE_CLAMP:
3068 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3069 break;
3070
3071 case TGSI_OPCODE_FLR:
3072 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3073 break;
3074
3075 case TGSI_OPCODE_ROUND:
3076 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3077 break;
3078
3079 case TGSI_OPCODE_EX2:
3080 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3081 break;
3082
3083 case TGSI_OPCODE_LG2:
3084 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3085 break;
3086
3087 case TGSI_OPCODE_POW:
3088 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3089 break;
3090
3091 case TGSI_OPCODE_XPD:
3092 exec_xpd(mach, inst);
3093 break;
3094
3095 case TGSI_OPCODE_ABS:
3096 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3097 break;
3098
3099 case TGSI_OPCODE_RCC:
3100 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3101 break;
3102
3103 case TGSI_OPCODE_DPH:
3104 exec_dph(mach, inst);
3105 break;
3106
3107 case TGSI_OPCODE_COS:
3108 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3109 break;
3110
3111 case TGSI_OPCODE_DDX:
3112 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3113 break;
3114
3115 case TGSI_OPCODE_DDY:
3116 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3117 break;
3118
3119 case TGSI_OPCODE_KILP:
3120 exec_kilp (mach, inst);
3121 break;
3122
3123 case TGSI_OPCODE_KIL:
3124 exec_kil (mach, inst);
3125 break;
3126
3127 case TGSI_OPCODE_PK2H:
3128 assert (0);
3129 break;
3130
3131 case TGSI_OPCODE_PK2US:
3132 assert (0);
3133 break;
3134
3135 case TGSI_OPCODE_PK4B:
3136 assert (0);
3137 break;
3138
3139 case TGSI_OPCODE_PK4UB:
3140 assert (0);
3141 break;
3142
3143 case TGSI_OPCODE_RFL:
3144 exec_rfl(mach, inst);
3145 break;
3146
3147 case TGSI_OPCODE_SEQ:
3148 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3149 break;
3150
3151 case TGSI_OPCODE_SFL:
3152 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3153 break;
3154
3155 case TGSI_OPCODE_SGT:
3156 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3157 break;
3158
3159 case TGSI_OPCODE_SIN:
3160 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3161 break;
3162
3163 case TGSI_OPCODE_SLE:
3164 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3165 break;
3166
3167 case TGSI_OPCODE_SNE:
3168 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3169 break;
3170
3171 case TGSI_OPCODE_STR:
3172 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3173 break;
3174
3175 case TGSI_OPCODE_TEX:
3176 /* simple texture lookup */
3177 /* src[0] = texcoord */
3178 /* src[1] = sampler unit */
3179 exec_tex(mach, inst, TEX_MODIFIER_NONE);
3180 break;
3181
3182 case TGSI_OPCODE_TXB:
3183 /* Texture lookup with lod bias */
3184 /* src[0] = texcoord (src[0].w = LOD bias) */
3185 /* src[1] = sampler unit */
3186 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
3187 break;
3188
3189 case TGSI_OPCODE_TXD:
3190 /* Texture lookup with explict partial derivatives */
3191 /* src[0] = texcoord */
3192 /* src[1] = d[strq]/dx */
3193 /* src[2] = d[strq]/dy */
3194 /* src[3] = sampler unit */
3195 exec_txd(mach, inst);
3196 break;
3197
3198 case TGSI_OPCODE_TXL:
3199 /* Texture lookup with explit LOD */
3200 /* src[0] = texcoord (src[0].w = LOD) */
3201 /* src[1] = sampler unit */
3202 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
3203 break;
3204
3205 case TGSI_OPCODE_TXP:
3206 /* Texture lookup with projection */
3207 /* src[0] = texcoord (src[0].w = projection) */
3208 /* src[1] = sampler unit */
3209 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
3210 break;
3211
3212 case TGSI_OPCODE_UP2H:
3213 assert (0);
3214 break;
3215
3216 case TGSI_OPCODE_UP2US:
3217 assert (0);
3218 break;
3219
3220 case TGSI_OPCODE_UP4B:
3221 assert (0);
3222 break;
3223
3224 case TGSI_OPCODE_UP4UB:
3225 assert (0);
3226 break;
3227
3228 case TGSI_OPCODE_X2D:
3229 exec_x2d(mach, inst);
3230 break;
3231
3232 case TGSI_OPCODE_ARA:
3233 assert (0);
3234 break;
3235
3236 case TGSI_OPCODE_ARR:
3237 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3238 break;
3239
3240 case TGSI_OPCODE_BRA:
3241 assert (0);
3242 break;
3243
3244 case TGSI_OPCODE_CAL:
3245 /* skip the call if no execution channels are enabled */
3246 if (mach->ExecMask) {
3247 /* do the call */
3248
3249 /* First, record the depths of the execution stacks.
3250 * This is important for deeply nested/looped return statements.
3251 * We have to unwind the stacks by the correct amount. For a
3252 * real code generator, we could determine the number of entries
3253 * to pop off each stack with simple static analysis and avoid
3254 * implementing this data structure at run time.
3255 */
3256 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3257 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3258 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3259 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3260 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3261 /* note that PC was already incremented above */
3262 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3263
3264 mach->CallStackTop++;
3265
3266 /* Second, push the Cond, Loop, Cont, Func stacks */
3267 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3268 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3269 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3270 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3271 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3272 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3273
3274 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3275 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3276 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3277 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3278 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3279 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3280
3281 /* Finally, jump to the subroutine */
3282 *pc = inst->Label.Label;
3283 }
3284 break;
3285
3286 case TGSI_OPCODE_RET:
3287 mach->FuncMask &= ~mach->ExecMask;
3288 UPDATE_EXEC_MASK(mach);
3289
3290 if (mach->FuncMask == 0x0) {
3291 /* really return now (otherwise, keep executing */
3292
3293 if (mach->CallStackTop == 0) {
3294 /* returning from main() */
3295 mach->CondStackTop = 0;
3296 mach->LoopStackTop = 0;
3297 *pc = -1;
3298 return;
3299 }
3300
3301 assert(mach->CallStackTop > 0);
3302 mach->CallStackTop--;
3303
3304 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3305 mach->CondMask = mach->CondStack[mach->CondStackTop];
3306
3307 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3308 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3309
3310 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3311 mach->ContMask = mach->ContStack[mach->ContStackTop];
3312
3313 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3314 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3315
3316 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3317 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3318
3319 assert(mach->FuncStackTop > 0);
3320 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3321
3322 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3323
3324 UPDATE_EXEC_MASK(mach);
3325 }
3326 break;
3327
3328 case TGSI_OPCODE_SSG:
3329 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3330 break;
3331
3332 case TGSI_OPCODE_CMP:
3333 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3334 break;
3335
3336 case TGSI_OPCODE_SCS:
3337 exec_scs(mach, inst);
3338 break;
3339
3340 case TGSI_OPCODE_NRM:
3341 exec_nrm3(mach, inst);
3342 break;
3343
3344 case TGSI_OPCODE_NRM4:
3345 exec_nrm4(mach, inst);
3346 break;
3347
3348 case TGSI_OPCODE_DIV:
3349 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3350 break;
3351
3352 case TGSI_OPCODE_DP2:
3353 exec_dp2(mach, inst);
3354 break;
3355
3356 case TGSI_OPCODE_IF:
3357 /* push CondMask */
3358 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3359 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3360 FETCH( &r[0], 0, CHAN_X );
3361 /* update CondMask */
3362 if( ! r[0].u[0] ) {
3363 mach->CondMask &= ~0x1;
3364 }
3365 if( ! r[0].u[1] ) {
3366 mach->CondMask &= ~0x2;
3367 }
3368 if( ! r[0].u[2] ) {
3369 mach->CondMask &= ~0x4;
3370 }
3371 if( ! r[0].u[3] ) {
3372 mach->CondMask &= ~0x8;
3373 }
3374 UPDATE_EXEC_MASK(mach);
3375 /* Todo: If CondMask==0, jump to ELSE */
3376 break;
3377
3378 case TGSI_OPCODE_ELSE:
3379 /* invert CondMask wrt previous mask */
3380 {
3381 uint prevMask;
3382 assert(mach->CondStackTop > 0);
3383 prevMask = mach->CondStack[mach->CondStackTop - 1];
3384 mach->CondMask = ~mach->CondMask & prevMask;
3385 UPDATE_EXEC_MASK(mach);
3386 /* Todo: If CondMask==0, jump to ENDIF */
3387 }
3388 break;
3389
3390 case TGSI_OPCODE_ENDIF:
3391 /* pop CondMask */
3392 assert(mach->CondStackTop > 0);
3393 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3394 UPDATE_EXEC_MASK(mach);
3395 break;
3396
3397 case TGSI_OPCODE_END:
3398 /* make sure we end primitives which haven't
3399 * been explicitly emitted */
3400 conditional_emit_primitive(mach);
3401 /* halt execution */
3402 *pc = -1;
3403 break;
3404
3405 case TGSI_OPCODE_PUSHA:
3406 assert (0);
3407 break;
3408
3409 case TGSI_OPCODE_POPA:
3410 assert (0);
3411 break;
3412
3413 case TGSI_OPCODE_CEIL:
3414 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3415 break;
3416
3417 case TGSI_OPCODE_I2F:
3418 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3419 break;
3420
3421 case TGSI_OPCODE_NOT:
3422 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3423 break;
3424
3425 case TGSI_OPCODE_TRUNC:
3426 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3427 break;
3428
3429 case TGSI_OPCODE_SHL:
3430 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3431 break;
3432
3433 case TGSI_OPCODE_AND:
3434 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3435 break;
3436
3437 case TGSI_OPCODE_OR:
3438 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3439 break;
3440
3441 case TGSI_OPCODE_MOD:
3442 assert (0);
3443 break;
3444
3445 case TGSI_OPCODE_XOR:
3446 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3447 break;
3448
3449 case TGSI_OPCODE_SAD:
3450 assert (0);
3451 break;
3452
3453 case TGSI_OPCODE_TXF:
3454 assert (0);
3455 break;
3456
3457 case TGSI_OPCODE_TXQ:
3458 assert (0);
3459 break;
3460
3461 case TGSI_OPCODE_EMIT:
3462 emit_vertex(mach);
3463 break;
3464
3465 case TGSI_OPCODE_ENDPRIM:
3466 emit_primitive(mach);
3467 break;
3468
3469 case TGSI_OPCODE_BGNLOOP:
3470 /* push LoopMask and ContMasks */
3471 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3472 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3473 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3474 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3475
3476 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3477 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3478 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3479 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3480 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3481 break;
3482
3483 case TGSI_OPCODE_ENDLOOP:
3484 /* Restore ContMask, but don't pop */
3485 assert(mach->ContStackTop > 0);
3486 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3487 UPDATE_EXEC_MASK(mach);
3488 if (mach->ExecMask) {
3489 /* repeat loop: jump to instruction just past BGNLOOP */
3490 assert(mach->LoopLabelStackTop > 0);
3491 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3492 }
3493 else {
3494 /* exit loop: pop LoopMask */
3495 assert(mach->LoopStackTop > 0);
3496 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3497 /* pop ContMask */
3498 assert(mach->ContStackTop > 0);
3499 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3500 assert(mach->LoopLabelStackTop > 0);
3501 --mach->LoopLabelStackTop;
3502
3503 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3504 }
3505 UPDATE_EXEC_MASK(mach);
3506 break;
3507
3508 case TGSI_OPCODE_BRK:
3509 exec_break(mach);
3510 break;
3511
3512 case TGSI_OPCODE_CONT:
3513 /* turn off cont channels for each enabled exec channel */
3514 mach->ContMask &= ~mach->ExecMask;
3515 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3516 UPDATE_EXEC_MASK(mach);
3517 break;
3518
3519 case TGSI_OPCODE_BGNSUB:
3520 /* no-op */
3521 break;
3522
3523 case TGSI_OPCODE_ENDSUB:
3524 /*
3525 * XXX: This really should be a no-op. We should never reach this opcode.
3526 */
3527
3528 assert(mach->CallStackTop > 0);
3529 mach->CallStackTop--;
3530
3531 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3532 mach->CondMask = mach->CondStack[mach->CondStackTop];
3533
3534 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3535 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3536
3537 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3538 mach->ContMask = mach->ContStack[mach->ContStackTop];
3539
3540 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3541 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3542
3543 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3544 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3545
3546 assert(mach->FuncStackTop > 0);
3547 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3548
3549 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3550
3551 UPDATE_EXEC_MASK(mach);
3552 break;
3553
3554 case TGSI_OPCODE_NOP:
3555 break;
3556
3557 case TGSI_OPCODE_BREAKC:
3558 FETCH(&r[0], 0, CHAN_X);
3559 /* update CondMask */
3560 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
3561 mach->LoopMask &= ~0x1;
3562 }
3563 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
3564 mach->LoopMask &= ~0x2;
3565 }
3566 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
3567 mach->LoopMask &= ~0x4;
3568 }
3569 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
3570 mach->LoopMask &= ~0x8;
3571 }
3572 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3573 UPDATE_EXEC_MASK(mach);
3574 break;
3575
3576 case TGSI_OPCODE_F2I:
3577 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3578 break;
3579
3580 case TGSI_OPCODE_IDIV:
3581 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3582 break;
3583
3584 case TGSI_OPCODE_IMAX:
3585 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3586 break;
3587
3588 case TGSI_OPCODE_IMIN:
3589 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3590 break;
3591
3592 case TGSI_OPCODE_INEG:
3593 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3594 break;
3595
3596 case TGSI_OPCODE_ISGE:
3597 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3598 break;
3599
3600 case TGSI_OPCODE_ISHR:
3601 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3602 break;
3603
3604 case TGSI_OPCODE_ISLT:
3605 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3606 break;
3607
3608 case TGSI_OPCODE_F2U:
3609 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3610 break;
3611
3612 case TGSI_OPCODE_U2F:
3613 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
3614 break;
3615
3616 case TGSI_OPCODE_UADD:
3617 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3618 break;
3619
3620 case TGSI_OPCODE_UDIV:
3621 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3622 break;
3623
3624 case TGSI_OPCODE_UMAD:
3625 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3626 break;
3627
3628 case TGSI_OPCODE_UMAX:
3629 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3630 break;
3631
3632 case TGSI_OPCODE_UMIN:
3633 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3634 break;
3635
3636 case TGSI_OPCODE_UMOD:
3637 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3638 break;
3639
3640 case TGSI_OPCODE_UMUL:
3641 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3642 break;
3643
3644 case TGSI_OPCODE_USEQ:
3645 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3646 break;
3647
3648 case TGSI_OPCODE_USGE:
3649 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3650 break;
3651
3652 case TGSI_OPCODE_USHR:
3653 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3654 break;
3655
3656 case TGSI_OPCODE_USLT:
3657 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3658 break;
3659
3660 case TGSI_OPCODE_USNE:
3661 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3662 break;
3663
3664 case TGSI_OPCODE_SWITCH:
3665 exec_switch(mach, inst);
3666 break;
3667
3668 case TGSI_OPCODE_CASE:
3669 exec_case(mach, inst);
3670 break;
3671
3672 case TGSI_OPCODE_DEFAULT:
3673 exec_default(mach);
3674 break;
3675
3676 case TGSI_OPCODE_ENDSWITCH:
3677 exec_endswitch(mach);
3678 break;
3679
3680 default:
3681 assert( 0 );
3682 }
3683 }
3684
3685
3686 #define DEBUG_EXECUTION 0
3687
3688
3689 /**
3690 * Run TGSI interpreter.
3691 * \return bitmask of "alive" quad components
3692 */
3693 uint
3694 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3695 {
3696 uint i;
3697 int pc = 0;
3698
3699 mach->CondMask = 0xf;
3700 mach->LoopMask = 0xf;
3701 mach->ContMask = 0xf;
3702 mach->FuncMask = 0xf;
3703 mach->ExecMask = 0xf;
3704
3705 mach->Switch.mask = 0xf;
3706
3707 assert(mach->CondStackTop == 0);
3708 assert(mach->LoopStackTop == 0);
3709 assert(mach->ContStackTop == 0);
3710 assert(mach->SwitchStackTop == 0);
3711 assert(mach->BreakStackTop == 0);
3712 assert(mach->CallStackTop == 0);
3713
3714 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3715 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3716
3717 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3718 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3719 mach->Primitives[0] = 0;
3720 }
3721
3722 for (i = 0; i < QUAD_SIZE; i++) {
3723 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3724 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3725 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3726 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3727 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3728 }
3729
3730 /* execute declarations (interpolants) */
3731 for (i = 0; i < mach->NumDeclarations; i++) {
3732 exec_declaration( mach, mach->Declarations+i );
3733 }
3734
3735 {
3736 #if DEBUG_EXECUTION
3737 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
3738 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
3739 uint inst = 1;
3740
3741 memcpy(temps, mach->Temps, sizeof(temps));
3742 memcpy(outputs, mach->Outputs, sizeof(outputs));
3743 #endif
3744
3745 /* execute instructions, until pc is set to -1 */
3746 while (pc != -1) {
3747
3748 #if DEBUG_EXECUTION
3749 uint i;
3750
3751 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
3752 #endif
3753
3754 assert(pc < (int) mach->NumInstructions);
3755 exec_instruction(mach, mach->Instructions + pc, &pc);
3756
3757 #if DEBUG_EXECUTION
3758 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
3759 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
3760 uint j;
3761
3762 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
3763 debug_printf("TEMP[%2u] = ", i);
3764 for (j = 0; j < 4; j++) {
3765 if (j > 0) {
3766 debug_printf(" ");
3767 }
3768 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3769 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
3770 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
3771 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
3772 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
3773 }
3774 }
3775 }
3776 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
3777 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
3778 uint j;
3779
3780 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
3781 debug_printf("OUT[%2u] = ", i);
3782 for (j = 0; j < 4; j++) {
3783 if (j > 0) {
3784 debug_printf(" ");
3785 }
3786 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3787 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
3788 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
3789 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
3790 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
3791 }
3792 }
3793 }
3794 #endif
3795 }
3796 }
3797
3798 #if 0
3799 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3800 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3801 /*
3802 * Scale back depth component.
3803 */
3804 for (i = 0; i < 4; i++)
3805 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3806 }
3807 #endif
3808
3809 /* Strictly speaking, these assertions aren't really needed but they
3810 * can potentially catch some bugs in the control flow code.
3811 */
3812 assert(mach->CondStackTop == 0);
3813 assert(mach->LoopStackTop == 0);
3814 assert(mach->ContStackTop == 0);
3815 assert(mach->SwitchStackTop == 0);
3816 assert(mach->BreakStackTop == 0);
3817 assert(mach->CallStackTop == 0);
3818
3819 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3820 }