Merge branch 'gallium-userbuf'
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define FAST_MATH 0
66
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
71
72 static void
73 micro_abs(union tgsi_exec_channel *dst,
74 const union tgsi_exec_channel *src)
75 {
76 dst->f[0] = fabsf(src->f[0]);
77 dst->f[1] = fabsf(src->f[1]);
78 dst->f[2] = fabsf(src->f[2]);
79 dst->f[3] = fabsf(src->f[3]);
80 }
81
82 static void
83 micro_arl(union tgsi_exec_channel *dst,
84 const union tgsi_exec_channel *src)
85 {
86 dst->i[0] = (int)floorf(src->f[0]);
87 dst->i[1] = (int)floorf(src->f[1]);
88 dst->i[2] = (int)floorf(src->f[2]);
89 dst->i[3] = (int)floorf(src->f[3]);
90 }
91
92 static void
93 micro_arr(union tgsi_exec_channel *dst,
94 const union tgsi_exec_channel *src)
95 {
96 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
97 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
98 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
99 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
100 }
101
102 static void
103 micro_ceil(union tgsi_exec_channel *dst,
104 const union tgsi_exec_channel *src)
105 {
106 dst->f[0] = ceilf(src->f[0]);
107 dst->f[1] = ceilf(src->f[1]);
108 dst->f[2] = ceilf(src->f[2]);
109 dst->f[3] = ceilf(src->f[3]);
110 }
111
112 static void
113 micro_clamp(union tgsi_exec_channel *dst,
114 const union tgsi_exec_channel *src0,
115 const union tgsi_exec_channel *src1,
116 const union tgsi_exec_channel *src2)
117 {
118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
122 }
123
124 static void
125 micro_cmp(union tgsi_exec_channel *dst,
126 const union tgsi_exec_channel *src0,
127 const union tgsi_exec_channel *src1,
128 const union tgsi_exec_channel *src2)
129 {
130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
134 }
135
136 static void
137 micro_cnd(union tgsi_exec_channel *dst,
138 const union tgsi_exec_channel *src0,
139 const union tgsi_exec_channel *src1,
140 const union tgsi_exec_channel *src2)
141 {
142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
146 }
147
148 static void
149 micro_cos(union tgsi_exec_channel *dst,
150 const union tgsi_exec_channel *src)
151 {
152 dst->f[0] = cosf(src->f[0]);
153 dst->f[1] = cosf(src->f[1]);
154 dst->f[2] = cosf(src->f[2]);
155 dst->f[3] = cosf(src->f[3]);
156 }
157
158 static void
159 micro_ddx(union tgsi_exec_channel *dst,
160 const union tgsi_exec_channel *src)
161 {
162 dst->f[0] =
163 dst->f[1] =
164 dst->f[2] =
165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
166 }
167
168 static void
169 micro_ddy(union tgsi_exec_channel *dst,
170 const union tgsi_exec_channel *src)
171 {
172 dst->f[0] =
173 dst->f[1] =
174 dst->f[2] =
175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
176 }
177
178 static void
179 micro_exp2(union tgsi_exec_channel *dst,
180 const union tgsi_exec_channel *src)
181 {
182 #if FAST_MATH
183 dst->f[0] = util_fast_exp2(src->f[0]);
184 dst->f[1] = util_fast_exp2(src->f[1]);
185 dst->f[2] = util_fast_exp2(src->f[2]);
186 dst->f[3] = util_fast_exp2(src->f[3]);
187 #else
188 #if DEBUG
189 /* Inf is okay for this instruction, so clamp it to silence assertions. */
190 uint i;
191 union tgsi_exec_channel clamped;
192
193 for (i = 0; i < 4; i++) {
194 if (src->f[i] > 127.99999f) {
195 clamped.f[i] = 127.99999f;
196 } else if (src->f[i] < -126.99999f) {
197 clamped.f[i] = -126.99999f;
198 } else {
199 clamped.f[i] = src->f[i];
200 }
201 }
202 src = &clamped;
203 #endif /* DEBUG */
204
205 dst->f[0] = powf(2.0f, src->f[0]);
206 dst->f[1] = powf(2.0f, src->f[1]);
207 dst->f[2] = powf(2.0f, src->f[2]);
208 dst->f[3] = powf(2.0f, src->f[3]);
209 #endif /* FAST_MATH */
210 }
211
212 static void
213 micro_flr(union tgsi_exec_channel *dst,
214 const union tgsi_exec_channel *src)
215 {
216 dst->f[0] = floorf(src->f[0]);
217 dst->f[1] = floorf(src->f[1]);
218 dst->f[2] = floorf(src->f[2]);
219 dst->f[3] = floorf(src->f[3]);
220 }
221
222 static void
223 micro_frc(union tgsi_exec_channel *dst,
224 const union tgsi_exec_channel *src)
225 {
226 dst->f[0] = src->f[0] - floorf(src->f[0]);
227 dst->f[1] = src->f[1] - floorf(src->f[1]);
228 dst->f[2] = src->f[2] - floorf(src->f[2]);
229 dst->f[3] = src->f[3] - floorf(src->f[3]);
230 }
231
232 static void
233 micro_iabs(union tgsi_exec_channel *dst,
234 const union tgsi_exec_channel *src)
235 {
236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
240 }
241
242 static void
243 micro_ineg(union tgsi_exec_channel *dst,
244 const union tgsi_exec_channel *src)
245 {
246 dst->i[0] = -src->i[0];
247 dst->i[1] = -src->i[1];
248 dst->i[2] = -src->i[2];
249 dst->i[3] = -src->i[3];
250 }
251
252 static void
253 micro_lg2(union tgsi_exec_channel *dst,
254 const union tgsi_exec_channel *src)
255 {
256 #if FAST_MATH
257 dst->f[0] = util_fast_log2(src->f[0]);
258 dst->f[1] = util_fast_log2(src->f[1]);
259 dst->f[2] = util_fast_log2(src->f[2]);
260 dst->f[3] = util_fast_log2(src->f[3]);
261 #else
262 dst->f[0] = logf(src->f[0]) * 1.442695f;
263 dst->f[1] = logf(src->f[1]) * 1.442695f;
264 dst->f[2] = logf(src->f[2]) * 1.442695f;
265 dst->f[3] = logf(src->f[3]) * 1.442695f;
266 #endif
267 }
268
269 static void
270 micro_lrp(union tgsi_exec_channel *dst,
271 const union tgsi_exec_channel *src0,
272 const union tgsi_exec_channel *src1,
273 const union tgsi_exec_channel *src2)
274 {
275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
279 }
280
281 static void
282 micro_mad(union tgsi_exec_channel *dst,
283 const union tgsi_exec_channel *src0,
284 const union tgsi_exec_channel *src1,
285 const union tgsi_exec_channel *src2)
286 {
287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
291 }
292
293 static void
294 micro_mov(union tgsi_exec_channel *dst,
295 const union tgsi_exec_channel *src)
296 {
297 dst->u[0] = src->u[0];
298 dst->u[1] = src->u[1];
299 dst->u[2] = src->u[2];
300 dst->u[3] = src->u[3];
301 }
302
303 static void
304 micro_rcp(union tgsi_exec_channel *dst,
305 const union tgsi_exec_channel *src)
306 {
307 #if 0 /* for debugging */
308 assert(src->f[0] != 0.0f);
309 assert(src->f[1] != 0.0f);
310 assert(src->f[2] != 0.0f);
311 assert(src->f[3] != 0.0f);
312 #endif
313 dst->f[0] = 1.0f / src->f[0];
314 dst->f[1] = 1.0f / src->f[1];
315 dst->f[2] = 1.0f / src->f[2];
316 dst->f[3] = 1.0f / src->f[3];
317 }
318
319 static void
320 micro_rnd(union tgsi_exec_channel *dst,
321 const union tgsi_exec_channel *src)
322 {
323 dst->f[0] = floorf(src->f[0] + 0.5f);
324 dst->f[1] = floorf(src->f[1] + 0.5f);
325 dst->f[2] = floorf(src->f[2] + 0.5f);
326 dst->f[3] = floorf(src->f[3] + 0.5f);
327 }
328
329 static void
330 micro_rsq(union tgsi_exec_channel *dst,
331 const union tgsi_exec_channel *src)
332 {
333 #if 0 /* for debugging */
334 assert(src->f[0] != 0.0f);
335 assert(src->f[1] != 0.0f);
336 assert(src->f[2] != 0.0f);
337 assert(src->f[3] != 0.0f);
338 #endif
339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
343 }
344
345 static void
346 micro_seq(union tgsi_exec_channel *dst,
347 const union tgsi_exec_channel *src0,
348 const union tgsi_exec_channel *src1)
349 {
350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
354 }
355
356 static void
357 micro_sge(union tgsi_exec_channel *dst,
358 const union tgsi_exec_channel *src0,
359 const union tgsi_exec_channel *src1)
360 {
361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
365 }
366
367 static void
368 micro_sgn(union tgsi_exec_channel *dst,
369 const union tgsi_exec_channel *src)
370 {
371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
375 }
376
377 static void
378 micro_isgn(union tgsi_exec_channel *dst,
379 const union tgsi_exec_channel *src)
380 {
381 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
382 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
383 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
384 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
385 }
386
387 static void
388 micro_sgt(union tgsi_exec_channel *dst,
389 const union tgsi_exec_channel *src0,
390 const union tgsi_exec_channel *src1)
391 {
392 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
393 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
394 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
395 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
396 }
397
398 static void
399 micro_sin(union tgsi_exec_channel *dst,
400 const union tgsi_exec_channel *src)
401 {
402 dst->f[0] = sinf(src->f[0]);
403 dst->f[1] = sinf(src->f[1]);
404 dst->f[2] = sinf(src->f[2]);
405 dst->f[3] = sinf(src->f[3]);
406 }
407
408 static void
409 micro_sle(union tgsi_exec_channel *dst,
410 const union tgsi_exec_channel *src0,
411 const union tgsi_exec_channel *src1)
412 {
413 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
414 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
415 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
416 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
417 }
418
419 static void
420 micro_slt(union tgsi_exec_channel *dst,
421 const union tgsi_exec_channel *src0,
422 const union tgsi_exec_channel *src1)
423 {
424 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
425 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
426 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
427 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
428 }
429
430 static void
431 micro_sne(union tgsi_exec_channel *dst,
432 const union tgsi_exec_channel *src0,
433 const union tgsi_exec_channel *src1)
434 {
435 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
436 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
437 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
438 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
439 }
440
441 static void
442 micro_sfl(union tgsi_exec_channel *dst)
443 {
444 dst->f[0] = 0.0f;
445 dst->f[1] = 0.0f;
446 dst->f[2] = 0.0f;
447 dst->f[3] = 0.0f;
448 }
449
450 static void
451 micro_str(union tgsi_exec_channel *dst)
452 {
453 dst->f[0] = 1.0f;
454 dst->f[1] = 1.0f;
455 dst->f[2] = 1.0f;
456 dst->f[3] = 1.0f;
457 }
458
459 static void
460 micro_trunc(union tgsi_exec_channel *dst,
461 const union tgsi_exec_channel *src)
462 {
463 dst->f[0] = (float)(int)src->f[0];
464 dst->f[1] = (float)(int)src->f[1];
465 dst->f[2] = (float)(int)src->f[2];
466 dst->f[3] = (float)(int)src->f[3];
467 }
468
469
470 enum tgsi_exec_datatype {
471 TGSI_EXEC_DATA_FLOAT,
472 TGSI_EXEC_DATA_INT,
473 TGSI_EXEC_DATA_UINT
474 };
475
476 /*
477 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
478 */
479 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
480 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
481 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
482 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
483 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
484 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
485
486
487 /** The execution mask depends on the conditional mask and the loop mask */
488 #define UPDATE_EXEC_MASK(MACH) \
489 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
490
491
492 static const union tgsi_exec_channel ZeroVec =
493 { { 0.0, 0.0, 0.0, 0.0 } };
494
495 static const union tgsi_exec_channel OneVec = {
496 {1.0f, 1.0f, 1.0f, 1.0f}
497 };
498
499 static const union tgsi_exec_channel P128Vec = {
500 {128.0f, 128.0f, 128.0f, 128.0f}
501 };
502
503 static const union tgsi_exec_channel M128Vec = {
504 {-128.0f, -128.0f, -128.0f, -128.0f}
505 };
506
507
508 /**
509 * Assert that none of the float values in 'chan' are infinite or NaN.
510 * NaN and Inf may occur normally during program execution and should
511 * not lead to crashes, etc. But when debugging, it's helpful to catch
512 * them.
513 */
514 static INLINE void
515 check_inf_or_nan(const union tgsi_exec_channel *chan)
516 {
517 assert(!util_is_inf_or_nan((chan)->f[0]));
518 assert(!util_is_inf_or_nan((chan)->f[1]));
519 assert(!util_is_inf_or_nan((chan)->f[2]));
520 assert(!util_is_inf_or_nan((chan)->f[3]));
521 }
522
523
524 #ifdef DEBUG
525 static void
526 print_chan(const char *msg, const union tgsi_exec_channel *chan)
527 {
528 debug_printf("%s = {%f, %f, %f, %f}\n",
529 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
530 }
531 #endif
532
533
534 #ifdef DEBUG
535 static void
536 print_temp(const struct tgsi_exec_machine *mach, uint index)
537 {
538 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
539 int i;
540 debug_printf("Temp[%u] =\n", index);
541 for (i = 0; i < 4; i++) {
542 debug_printf(" %c: { %f, %f, %f, %f }\n",
543 "XYZW"[i],
544 tmp->xyzw[i].f[0],
545 tmp->xyzw[i].f[1],
546 tmp->xyzw[i].f[2],
547 tmp->xyzw[i].f[3]);
548 }
549 }
550 #endif
551
552
553 void
554 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
555 unsigned num_bufs,
556 const void **bufs,
557 const unsigned *buf_sizes)
558 {
559 unsigned i;
560
561 for (i = 0; i < num_bufs; i++) {
562 mach->Consts[i] = bufs[i];
563 mach->ConstsSize[i] = buf_sizes[i];
564 }
565 }
566
567
568 /**
569 * Check if there's a potential src/dst register data dependency when
570 * using SOA execution.
571 * Example:
572 * MOV T, T.yxwz;
573 * This would expand into:
574 * MOV t0, t1;
575 * MOV t1, t0;
576 * MOV t2, t3;
577 * MOV t3, t2;
578 * The second instruction will have the wrong value for t0 if executed as-is.
579 */
580 boolean
581 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
582 {
583 uint i, chan;
584
585 uint writemask = inst->Dst[0].Register.WriteMask;
586 if (writemask == TGSI_WRITEMASK_X ||
587 writemask == TGSI_WRITEMASK_Y ||
588 writemask == TGSI_WRITEMASK_Z ||
589 writemask == TGSI_WRITEMASK_W ||
590 writemask == TGSI_WRITEMASK_NONE) {
591 /* no chance of data dependency */
592 return FALSE;
593 }
594
595 /* loop over src regs */
596 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
597 if ((inst->Src[i].Register.File ==
598 inst->Dst[0].Register.File) &&
599 ((inst->Src[i].Register.Index ==
600 inst->Dst[0].Register.Index) ||
601 inst->Src[i].Register.Indirect ||
602 inst->Dst[0].Register.Indirect)) {
603 /* loop over dest channels */
604 uint channelsWritten = 0x0;
605 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
606 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
607 /* check if we're reading a channel that's been written */
608 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
609 if (channelsWritten & (1 << swizzle)) {
610 return TRUE;
611 }
612
613 channelsWritten |= (1 << chan);
614 }
615 }
616 }
617 }
618 return FALSE;
619 }
620
621
622 /**
623 * Initialize machine state by expanding tokens to full instructions,
624 * allocating temporary storage, setting up constants, etc.
625 * After this, we can call tgsi_exec_machine_run() many times.
626 */
627 void
628 tgsi_exec_machine_bind_shader(
629 struct tgsi_exec_machine *mach,
630 const struct tgsi_token *tokens,
631 uint numSamplers,
632 struct tgsi_sampler **samplers)
633 {
634 uint k;
635 struct tgsi_parse_context parse;
636 struct tgsi_full_instruction *instructions;
637 struct tgsi_full_declaration *declarations;
638 uint maxInstructions = 10, numInstructions = 0;
639 uint maxDeclarations = 10, numDeclarations = 0;
640
641 #if 0
642 tgsi_dump(tokens, 0);
643 #endif
644
645 util_init_math();
646
647 if (numSamplers) {
648 assert(samplers);
649 }
650
651 mach->Tokens = tokens;
652 mach->Samplers = samplers;
653
654 if (!tokens) {
655 /* unbind and free all */
656 if (mach->Declarations) {
657 FREE( mach->Declarations );
658 }
659 mach->Declarations = NULL;
660 mach->NumDeclarations = 0;
661
662 if (mach->Instructions) {
663 FREE( mach->Instructions );
664 }
665 mach->Instructions = NULL;
666 mach->NumInstructions = 0;
667
668 return;
669 }
670
671 k = tgsi_parse_init (&parse, mach->Tokens);
672 if (k != TGSI_PARSE_OK) {
673 debug_printf( "Problem parsing!\n" );
674 return;
675 }
676
677 mach->Processor = parse.FullHeader.Processor.Processor;
678 mach->ImmLimit = 0;
679
680 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
681 !mach->UsedGeometryShader) {
682 struct tgsi_exec_vector *inputs;
683 struct tgsi_exec_vector *outputs;
684
685 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
686 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
687 16);
688
689 if (!inputs)
690 return;
691
692 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
693 TGSI_MAX_TOTAL_VERTICES, 16);
694
695 if (!outputs) {
696 align_free(inputs);
697 return;
698 }
699
700 align_free(mach->Inputs);
701 align_free(mach->Outputs);
702
703 mach->Inputs = inputs;
704 mach->Outputs = outputs;
705 mach->UsedGeometryShader = TRUE;
706 }
707
708 declarations = (struct tgsi_full_declaration *)
709 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
710
711 if (!declarations) {
712 return;
713 }
714
715 instructions = (struct tgsi_full_instruction *)
716 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
717
718 if (!instructions) {
719 FREE( declarations );
720 return;
721 }
722
723 while( !tgsi_parse_end_of_tokens( &parse ) ) {
724 uint i;
725
726 tgsi_parse_token( &parse );
727 switch( parse.FullToken.Token.Type ) {
728 case TGSI_TOKEN_TYPE_DECLARATION:
729 /* save expanded declaration */
730 if (numDeclarations == maxDeclarations) {
731 declarations = REALLOC(declarations,
732 maxDeclarations
733 * sizeof(struct tgsi_full_declaration),
734 (maxDeclarations + 10)
735 * sizeof(struct tgsi_full_declaration));
736 maxDeclarations += 10;
737 }
738 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
739 unsigned reg;
740 for (reg = parse.FullToken.FullDeclaration.Range.First;
741 reg <= parse.FullToken.FullDeclaration.Range.Last;
742 ++reg) {
743 ++mach->NumOutputs;
744 }
745 }
746 if (parse.FullToken.FullDeclaration.Declaration.File ==
747 TGSI_FILE_IMMEDIATE_ARRAY) {
748 unsigned reg;
749 struct tgsi_full_declaration *decl =
750 &parse.FullToken.FullDeclaration;
751 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES);
752 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) {
753 for( i = 0; i < 4; i++ ) {
754 int idx = reg * 4 + i;
755 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float;
756 }
757 }
758 }
759 memcpy(declarations + numDeclarations,
760 &parse.FullToken.FullDeclaration,
761 sizeof(declarations[0]));
762 numDeclarations++;
763 break;
764
765 case TGSI_TOKEN_TYPE_IMMEDIATE:
766 {
767 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
768 assert( size <= 4 );
769 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
770
771 for( i = 0; i < size; i++ ) {
772 mach->Imms[mach->ImmLimit][i] =
773 parse.FullToken.FullImmediate.u[i].Float;
774 }
775 mach->ImmLimit += 1;
776 }
777 break;
778
779 case TGSI_TOKEN_TYPE_INSTRUCTION:
780
781 /* save expanded instruction */
782 if (numInstructions == maxInstructions) {
783 instructions = REALLOC(instructions,
784 maxInstructions
785 * sizeof(struct tgsi_full_instruction),
786 (maxInstructions + 10)
787 * sizeof(struct tgsi_full_instruction));
788 maxInstructions += 10;
789 }
790
791 memcpy(instructions + numInstructions,
792 &parse.FullToken.FullInstruction,
793 sizeof(instructions[0]));
794
795 numInstructions++;
796 break;
797
798 case TGSI_TOKEN_TYPE_PROPERTY:
799 break;
800
801 default:
802 assert( 0 );
803 }
804 }
805 tgsi_parse_free (&parse);
806
807 if (mach->Declarations) {
808 FREE( mach->Declarations );
809 }
810 mach->Declarations = declarations;
811 mach->NumDeclarations = numDeclarations;
812
813 if (mach->Instructions) {
814 FREE( mach->Instructions );
815 }
816 mach->Instructions = instructions;
817 mach->NumInstructions = numInstructions;
818 }
819
820
821 struct tgsi_exec_machine *
822 tgsi_exec_machine_create( void )
823 {
824 struct tgsi_exec_machine *mach;
825 uint i;
826
827 mach = align_malloc( sizeof *mach, 16 );
828 if (!mach)
829 goto fail;
830
831 memset(mach, 0, sizeof(*mach));
832
833 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
834 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
835 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
836
837 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
838 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
839 if (!mach->Inputs || !mach->Outputs)
840 goto fail;
841
842 /* Setup constants needed by the SSE2 executor. */
843 for( i = 0; i < 4; i++ ) {
844 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
845 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
846 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
847 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
848 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
849 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
850 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
851 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
852 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
853 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
854 }
855
856 #ifdef DEBUG
857 /* silence warnings */
858 (void) print_chan;
859 (void) print_temp;
860 #endif
861
862 return mach;
863
864 fail:
865 if (mach) {
866 align_free(mach->Inputs);
867 align_free(mach->Outputs);
868 align_free(mach);
869 }
870 return NULL;
871 }
872
873
874 void
875 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
876 {
877 if (mach) {
878 if (mach->Instructions)
879 FREE(mach->Instructions);
880 if (mach->Declarations)
881 FREE(mach->Declarations);
882
883 align_free(mach->Inputs);
884 align_free(mach->Outputs);
885
886 align_free(mach);
887 }
888 }
889
890 static void
891 micro_add(union tgsi_exec_channel *dst,
892 const union tgsi_exec_channel *src0,
893 const union tgsi_exec_channel *src1)
894 {
895 dst->f[0] = src0->f[0] + src1->f[0];
896 dst->f[1] = src0->f[1] + src1->f[1];
897 dst->f[2] = src0->f[2] + src1->f[2];
898 dst->f[3] = src0->f[3] + src1->f[3];
899 }
900
901 static void
902 micro_div(
903 union tgsi_exec_channel *dst,
904 const union tgsi_exec_channel *src0,
905 const union tgsi_exec_channel *src1 )
906 {
907 if (src1->f[0] != 0) {
908 dst->f[0] = src0->f[0] / src1->f[0];
909 }
910 if (src1->f[1] != 0) {
911 dst->f[1] = src0->f[1] / src1->f[1];
912 }
913 if (src1->f[2] != 0) {
914 dst->f[2] = src0->f[2] / src1->f[2];
915 }
916 if (src1->f[3] != 0) {
917 dst->f[3] = src0->f[3] / src1->f[3];
918 }
919 }
920
921 static void
922 micro_rcc(union tgsi_exec_channel *dst,
923 const union tgsi_exec_channel *src)
924 {
925 uint i;
926
927 for (i = 0; i < 4; i++) {
928 float recip = 1.0f / src->f[i];
929
930 if (recip > 0.0f) {
931 if (recip > 1.884467e+019f) {
932 dst->f[i] = 1.884467e+019f;
933 }
934 else if (recip < 5.42101e-020f) {
935 dst->f[i] = 5.42101e-020f;
936 }
937 else {
938 dst->f[i] = recip;
939 }
940 }
941 else {
942 if (recip < -1.884467e+019f) {
943 dst->f[i] = -1.884467e+019f;
944 }
945 else if (recip > -5.42101e-020f) {
946 dst->f[i] = -5.42101e-020f;
947 }
948 else {
949 dst->f[i] = recip;
950 }
951 }
952 }
953 }
954
955 static void
956 micro_lt(
957 union tgsi_exec_channel *dst,
958 const union tgsi_exec_channel *src0,
959 const union tgsi_exec_channel *src1,
960 const union tgsi_exec_channel *src2,
961 const union tgsi_exec_channel *src3 )
962 {
963 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
964 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
965 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
966 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
967 }
968
969 static void
970 micro_max(union tgsi_exec_channel *dst,
971 const union tgsi_exec_channel *src0,
972 const union tgsi_exec_channel *src1)
973 {
974 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
975 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
976 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
977 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
978 }
979
980 static void
981 micro_min(union tgsi_exec_channel *dst,
982 const union tgsi_exec_channel *src0,
983 const union tgsi_exec_channel *src1)
984 {
985 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
986 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
987 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
988 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
989 }
990
991 static void
992 micro_mul(union tgsi_exec_channel *dst,
993 const union tgsi_exec_channel *src0,
994 const union tgsi_exec_channel *src1)
995 {
996 dst->f[0] = src0->f[0] * src1->f[0];
997 dst->f[1] = src0->f[1] * src1->f[1];
998 dst->f[2] = src0->f[2] * src1->f[2];
999 dst->f[3] = src0->f[3] * src1->f[3];
1000 }
1001
1002 static void
1003 micro_neg(
1004 union tgsi_exec_channel *dst,
1005 const union tgsi_exec_channel *src )
1006 {
1007 dst->f[0] = -src->f[0];
1008 dst->f[1] = -src->f[1];
1009 dst->f[2] = -src->f[2];
1010 dst->f[3] = -src->f[3];
1011 }
1012
1013 static void
1014 micro_pow(
1015 union tgsi_exec_channel *dst,
1016 const union tgsi_exec_channel *src0,
1017 const union tgsi_exec_channel *src1 )
1018 {
1019 #if FAST_MATH
1020 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1021 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1022 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1023 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1024 #else
1025 dst->f[0] = powf( src0->f[0], src1->f[0] );
1026 dst->f[1] = powf( src0->f[1], src1->f[1] );
1027 dst->f[2] = powf( src0->f[2], src1->f[2] );
1028 dst->f[3] = powf( src0->f[3], src1->f[3] );
1029 #endif
1030 }
1031
1032 static void
1033 micro_sub(union tgsi_exec_channel *dst,
1034 const union tgsi_exec_channel *src0,
1035 const union tgsi_exec_channel *src1)
1036 {
1037 dst->f[0] = src0->f[0] - src1->f[0];
1038 dst->f[1] = src0->f[1] - src1->f[1];
1039 dst->f[2] = src0->f[2] - src1->f[2];
1040 dst->f[3] = src0->f[3] - src1->f[3];
1041 }
1042
1043 static void
1044 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1045 const uint chan_index,
1046 const uint file,
1047 const uint swizzle,
1048 const union tgsi_exec_channel *index,
1049 const union tgsi_exec_channel *index2D,
1050 union tgsi_exec_channel *chan)
1051 {
1052 uint i;
1053
1054 assert(swizzle < 4);
1055
1056 switch (file) {
1057 case TGSI_FILE_CONSTANT:
1058 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1059 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1060 assert(mach->Consts[index2D->i[i]]);
1061
1062 if (index->i[i] < 0) {
1063 chan->u[i] = 0;
1064 } else {
1065 /* NOTE: copying the const value as a uint instead of float */
1066 const uint constbuf = index2D->i[i];
1067 const uint *buf = (const uint *)mach->Consts[constbuf];
1068 const int pos = index->i[i] * 4 + swizzle;
1069 /* const buffer bounds check */
1070 if (pos < 0 || pos >= mach->ConstsSize[constbuf]) {
1071 if (0) {
1072 /* Debug: print warning */
1073 static int count = 0;
1074 if (count++ < 100)
1075 debug_printf("TGSI Exec: const buffer index %d"
1076 " out of bounds\n", pos);
1077 }
1078 chan->u[i] = 0;
1079 }
1080 else
1081 chan->u[i] = buf[pos];
1082 }
1083 }
1084 break;
1085
1086 case TGSI_FILE_INPUT:
1087 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1088 /*
1089 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1090 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1091 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1092 index2D->i[i], index->i[i]);
1093 }*/
1094 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1095 assert(pos >= 0);
1096 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1097 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1098 }
1099 break;
1100
1101 case TGSI_FILE_SYSTEM_VALUE:
1102 /* XXX no swizzling at this point. Will be needed if we put
1103 * gl_FragCoord, for example, in a sys value register.
1104 */
1105 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1106 chan->u[i] = mach->SystemValue[index->i[i]].u[i];
1107 }
1108 break;
1109
1110 case TGSI_FILE_TEMPORARY:
1111 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1112 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1113 assert(index2D->i[i] == 0);
1114
1115 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1116 }
1117 break;
1118
1119 case TGSI_FILE_TEMPORARY_ARRAY:
1120 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1121 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1122 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS);
1123
1124 chan->u[i] =
1125 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i];
1126 }
1127 break;
1128
1129 case TGSI_FILE_IMMEDIATE:
1130 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1131 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1132 assert(index2D->i[i] == 0);
1133
1134 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1135 }
1136 break;
1137
1138 case TGSI_FILE_IMMEDIATE_ARRAY:
1139 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1140 assert(index2D->i[i] == 0);
1141
1142 chan->f[i] = mach->ImmArray[index->i[i]][swizzle];
1143 }
1144 break;
1145
1146 case TGSI_FILE_ADDRESS:
1147 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1148 assert(index->i[i] >= 0);
1149 assert(index2D->i[i] == 0);
1150
1151 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1152 }
1153 break;
1154
1155 case TGSI_FILE_PREDICATE:
1156 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1157 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1158 assert(index2D->i[i] == 0);
1159
1160 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1161 }
1162 break;
1163
1164 case TGSI_FILE_OUTPUT:
1165 /* vertex/fragment output vars can be read too */
1166 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1167 assert(index->i[i] >= 0);
1168 assert(index2D->i[i] == 0);
1169
1170 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1171 }
1172 break;
1173
1174 default:
1175 assert(0);
1176 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1177 chan->u[i] = 0;
1178 }
1179 }
1180 }
1181
1182 static void
1183 fetch_source(const struct tgsi_exec_machine *mach,
1184 union tgsi_exec_channel *chan,
1185 const struct tgsi_full_src_register *reg,
1186 const uint chan_index,
1187 enum tgsi_exec_datatype src_datatype)
1188 {
1189 union tgsi_exec_channel index;
1190 union tgsi_exec_channel index2D;
1191 uint swizzle;
1192
1193 /* We start with a direct index into a register file.
1194 *
1195 * file[1],
1196 * where:
1197 * file = Register.File
1198 * [1] = Register.Index
1199 */
1200 index.i[0] =
1201 index.i[1] =
1202 index.i[2] =
1203 index.i[3] = reg->Register.Index;
1204
1205 /* There is an extra source register that indirectly subscripts
1206 * a register file. The direct index now becomes an offset
1207 * that is being added to the indirect register.
1208 *
1209 * file[ind[2].x+1],
1210 * where:
1211 * ind = Indirect.File
1212 * [2] = Indirect.Index
1213 * .x = Indirect.SwizzleX
1214 */
1215 if (reg->Register.Indirect) {
1216 union tgsi_exec_channel index2;
1217 union tgsi_exec_channel indir_index;
1218 const uint execmask = mach->ExecMask;
1219 uint i;
1220
1221 /* which address register (always zero now) */
1222 index2.i[0] =
1223 index2.i[1] =
1224 index2.i[2] =
1225 index2.i[3] = reg->Indirect.Index;
1226 assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
1227 /* get current value of address register[swizzle] */
1228 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, TGSI_CHAN_X );
1229 fetch_src_file_channel(mach,
1230 chan_index,
1231 reg->Indirect.File,
1232 swizzle,
1233 &index2,
1234 &ZeroVec,
1235 &indir_index);
1236
1237 /* add value of address register to the offset */
1238 index.i[0] += indir_index.i[0];
1239 index.i[1] += indir_index.i[1];
1240 index.i[2] += indir_index.i[2];
1241 index.i[3] += indir_index.i[3];
1242
1243 /* for disabled execution channels, zero-out the index to
1244 * avoid using a potential garbage value.
1245 */
1246 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1247 if ((execmask & (1 << i)) == 0)
1248 index.i[i] = 0;
1249 }
1250 }
1251
1252 /* There is an extra source register that is a second
1253 * subscript to a register file. Effectively it means that
1254 * the register file is actually a 2D array of registers.
1255 *
1256 * file[3][1],
1257 * where:
1258 * [3] = Dimension.Index
1259 */
1260 if (reg->Register.Dimension) {
1261 index2D.i[0] =
1262 index2D.i[1] =
1263 index2D.i[2] =
1264 index2D.i[3] = reg->Dimension.Index;
1265
1266 /* Again, the second subscript index can be addressed indirectly
1267 * identically to the first one.
1268 * Nothing stops us from indirectly addressing the indirect register,
1269 * but there is no need for that, so we won't exercise it.
1270 *
1271 * file[ind[4].y+3][1],
1272 * where:
1273 * ind = DimIndirect.File
1274 * [4] = DimIndirect.Index
1275 * .y = DimIndirect.SwizzleX
1276 */
1277 if (reg->Dimension.Indirect) {
1278 union tgsi_exec_channel index2;
1279 union tgsi_exec_channel indir_index;
1280 const uint execmask = mach->ExecMask;
1281 uint i;
1282
1283 index2.i[0] =
1284 index2.i[1] =
1285 index2.i[2] =
1286 index2.i[3] = reg->DimIndirect.Index;
1287
1288 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, TGSI_CHAN_X );
1289 fetch_src_file_channel(mach,
1290 chan_index,
1291 reg->DimIndirect.File,
1292 swizzle,
1293 &index2,
1294 &ZeroVec,
1295 &indir_index);
1296
1297 index2D.i[0] += indir_index.i[0];
1298 index2D.i[1] += indir_index.i[1];
1299 index2D.i[2] += indir_index.i[2];
1300 index2D.i[3] += indir_index.i[3];
1301
1302 /* for disabled execution channels, zero-out the index to
1303 * avoid using a potential garbage value.
1304 */
1305 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1306 if ((execmask & (1 << i)) == 0) {
1307 index2D.i[i] = 0;
1308 }
1309 }
1310 }
1311
1312 /* If by any chance there was a need for a 3D array of register
1313 * files, we would have to check whether Dimension is followed
1314 * by a dimension register and continue the saga.
1315 */
1316 } else {
1317 index2D.i[0] =
1318 index2D.i[1] =
1319 index2D.i[2] =
1320 index2D.i[3] = 0;
1321 }
1322
1323 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1324 fetch_src_file_channel(mach,
1325 chan_index,
1326 reg->Register.File,
1327 swizzle,
1328 &index,
1329 &index2D,
1330 chan);
1331
1332 if (reg->Register.Absolute) {
1333 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1334 micro_abs(chan, chan);
1335 } else {
1336 micro_iabs(chan, chan);
1337 }
1338 }
1339
1340 if (reg->Register.Negate) {
1341 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1342 micro_neg(chan, chan);
1343 } else {
1344 micro_ineg(chan, chan);
1345 }
1346 }
1347 }
1348
1349 static void
1350 store_dest(struct tgsi_exec_machine *mach,
1351 const union tgsi_exec_channel *chan,
1352 const struct tgsi_full_dst_register *reg,
1353 const struct tgsi_full_instruction *inst,
1354 uint chan_index,
1355 enum tgsi_exec_datatype dst_datatype)
1356 {
1357 uint i;
1358 union tgsi_exec_channel null;
1359 union tgsi_exec_channel *dst;
1360 union tgsi_exec_channel index2D;
1361 uint execmask = mach->ExecMask;
1362 int offset = 0; /* indirection offset */
1363 int index;
1364
1365 /* for debugging */
1366 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1367 check_inf_or_nan(chan);
1368 }
1369
1370 /* There is an extra source register that indirectly subscripts
1371 * a register file. The direct index now becomes an offset
1372 * that is being added to the indirect register.
1373 *
1374 * file[ind[2].x+1],
1375 * where:
1376 * ind = Indirect.File
1377 * [2] = Indirect.Index
1378 * .x = Indirect.SwizzleX
1379 */
1380 if (reg->Register.Indirect) {
1381 union tgsi_exec_channel index;
1382 union tgsi_exec_channel indir_index;
1383 uint swizzle;
1384
1385 /* which address register (always zero for now) */
1386 index.i[0] =
1387 index.i[1] =
1388 index.i[2] =
1389 index.i[3] = reg->Indirect.Index;
1390
1391 /* get current value of address register[swizzle] */
1392 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, TGSI_CHAN_X );
1393
1394 /* fetch values from the address/indirection register */
1395 fetch_src_file_channel(mach,
1396 chan_index,
1397 reg->Indirect.File,
1398 swizzle,
1399 &index,
1400 &ZeroVec,
1401 &indir_index);
1402
1403 /* save indirection offset */
1404 offset = indir_index.i[0];
1405 }
1406
1407 /* There is an extra source register that is a second
1408 * subscript to a register file. Effectively it means that
1409 * the register file is actually a 2D array of registers.
1410 *
1411 * file[3][1],
1412 * where:
1413 * [3] = Dimension.Index
1414 */
1415 if (reg->Register.Dimension) {
1416 index2D.i[0] =
1417 index2D.i[1] =
1418 index2D.i[2] =
1419 index2D.i[3] = reg->Dimension.Index;
1420
1421 /* Again, the second subscript index can be addressed indirectly
1422 * identically to the first one.
1423 * Nothing stops us from indirectly addressing the indirect register,
1424 * but there is no need for that, so we won't exercise it.
1425 *
1426 * file[ind[4].y+3][1],
1427 * where:
1428 * ind = DimIndirect.File
1429 * [4] = DimIndirect.Index
1430 * .y = DimIndirect.SwizzleX
1431 */
1432 if (reg->Dimension.Indirect) {
1433 union tgsi_exec_channel index2;
1434 union tgsi_exec_channel indir_index;
1435 const uint execmask = mach->ExecMask;
1436 unsigned swizzle;
1437 uint i;
1438
1439 index2.i[0] =
1440 index2.i[1] =
1441 index2.i[2] =
1442 index2.i[3] = reg->DimIndirect.Index;
1443
1444 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, TGSI_CHAN_X );
1445 fetch_src_file_channel(mach,
1446 chan_index,
1447 reg->DimIndirect.File,
1448 swizzle,
1449 &index2,
1450 &ZeroVec,
1451 &indir_index);
1452
1453 index2D.i[0] += indir_index.i[0];
1454 index2D.i[1] += indir_index.i[1];
1455 index2D.i[2] += indir_index.i[2];
1456 index2D.i[3] += indir_index.i[3];
1457
1458 /* for disabled execution channels, zero-out the index to
1459 * avoid using a potential garbage value.
1460 */
1461 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1462 if ((execmask & (1 << i)) == 0) {
1463 index2D.i[i] = 0;
1464 }
1465 }
1466 }
1467
1468 /* If by any chance there was a need for a 3D array of register
1469 * files, we would have to check whether Dimension is followed
1470 * by a dimension register and continue the saga.
1471 */
1472 } else {
1473 index2D.i[0] =
1474 index2D.i[1] =
1475 index2D.i[2] =
1476 index2D.i[3] = 0;
1477 }
1478
1479 switch (reg->Register.File) {
1480 case TGSI_FILE_NULL:
1481 dst = &null;
1482 break;
1483
1484 case TGSI_FILE_OUTPUT:
1485 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1486 + reg->Register.Index;
1487 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1488 #if 0
1489 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1490 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1491 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1492 if (execmask & (1 << i))
1493 fprintf(stderr, "%f, ", chan->f[i]);
1494 fprintf(stderr, ")\n");
1495 }
1496 #endif
1497 break;
1498
1499 case TGSI_FILE_TEMPORARY:
1500 index = reg->Register.Index;
1501 assert( index < TGSI_EXEC_NUM_TEMPS );
1502 dst = &mach->Temps[offset + index].xyzw[chan_index];
1503 break;
1504
1505 case TGSI_FILE_TEMPORARY_ARRAY:
1506 index = reg->Register.Index;
1507 assert( index < TGSI_EXEC_NUM_TEMPS );
1508 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS );
1509 /* XXX we use index2D.i[0] here but somehow we might
1510 * end up with someone trying to store indirectly in
1511 * different buffers */
1512 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index];
1513 break;
1514
1515 case TGSI_FILE_ADDRESS:
1516 index = reg->Register.Index;
1517 dst = &mach->Addrs[index].xyzw[chan_index];
1518 break;
1519
1520 case TGSI_FILE_PREDICATE:
1521 index = reg->Register.Index;
1522 assert(index < TGSI_EXEC_NUM_PREDS);
1523 dst = &mach->Predicates[index].xyzw[chan_index];
1524 break;
1525
1526 default:
1527 assert( 0 );
1528 return;
1529 }
1530
1531 if (inst->Instruction.Predicate) {
1532 uint swizzle;
1533 union tgsi_exec_channel *pred;
1534
1535 switch (chan_index) {
1536 case TGSI_CHAN_X:
1537 swizzle = inst->Predicate.SwizzleX;
1538 break;
1539 case TGSI_CHAN_Y:
1540 swizzle = inst->Predicate.SwizzleY;
1541 break;
1542 case TGSI_CHAN_Z:
1543 swizzle = inst->Predicate.SwizzleZ;
1544 break;
1545 case TGSI_CHAN_W:
1546 swizzle = inst->Predicate.SwizzleW;
1547 break;
1548 default:
1549 assert(0);
1550 return;
1551 }
1552
1553 assert(inst->Predicate.Index == 0);
1554
1555 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1556
1557 if (inst->Predicate.Negate) {
1558 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1559 if (pred->u[i]) {
1560 execmask &= ~(1 << i);
1561 }
1562 }
1563 } else {
1564 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1565 if (!pred->u[i]) {
1566 execmask &= ~(1 << i);
1567 }
1568 }
1569 }
1570 }
1571
1572 switch (inst->Instruction.Saturate) {
1573 case TGSI_SAT_NONE:
1574 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1575 if (execmask & (1 << i))
1576 dst->i[i] = chan->i[i];
1577 break;
1578
1579 case TGSI_SAT_ZERO_ONE:
1580 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1581 if (execmask & (1 << i)) {
1582 if (chan->f[i] < 0.0f)
1583 dst->f[i] = 0.0f;
1584 else if (chan->f[i] > 1.0f)
1585 dst->f[i] = 1.0f;
1586 else
1587 dst->i[i] = chan->i[i];
1588 }
1589 break;
1590
1591 case TGSI_SAT_MINUS_PLUS_ONE:
1592 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1593 if (execmask & (1 << i)) {
1594 if (chan->f[i] < -1.0f)
1595 dst->f[i] = -1.0f;
1596 else if (chan->f[i] > 1.0f)
1597 dst->f[i] = 1.0f;
1598 else
1599 dst->i[i] = chan->i[i];
1600 }
1601 break;
1602
1603 default:
1604 assert( 0 );
1605 }
1606 }
1607
1608 #define FETCH(VAL,INDEX,CHAN)\
1609 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1610
1611 #define IFETCH(VAL,INDEX,CHAN)\
1612 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1613
1614
1615 /**
1616 * Execute ARB-style KIL which is predicated by a src register.
1617 * Kill fragment if any of the four values is less than zero.
1618 */
1619 static void
1620 exec_kil(struct tgsi_exec_machine *mach,
1621 const struct tgsi_full_instruction *inst)
1622 {
1623 uint uniquemask;
1624 uint chan_index;
1625 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1626 union tgsi_exec_channel r[1];
1627
1628 /* This mask stores component bits that were already tested. */
1629 uniquemask = 0;
1630
1631 for (chan_index = 0; chan_index < 4; chan_index++)
1632 {
1633 uint swizzle;
1634 uint i;
1635
1636 /* unswizzle channel */
1637 swizzle = tgsi_util_get_full_src_register_swizzle (
1638 &inst->Src[0],
1639 chan_index);
1640
1641 /* check if the component has not been already tested */
1642 if (uniquemask & (1 << swizzle))
1643 continue;
1644 uniquemask |= 1 << swizzle;
1645
1646 FETCH(&r[0], 0, chan_index);
1647 for (i = 0; i < 4; i++)
1648 if (r[0].f[i] < 0.0f)
1649 kilmask |= 1 << i;
1650 }
1651
1652 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1653 }
1654
1655 /**
1656 * Execute NVIDIA-style KIL which is predicated by a condition code.
1657 * Kill fragment if the condition code is TRUE.
1658 */
1659 static void
1660 exec_kilp(struct tgsi_exec_machine *mach,
1661 const struct tgsi_full_instruction *inst)
1662 {
1663 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1664
1665 /* "unconditional" kil */
1666 kilmask = mach->ExecMask;
1667 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1668 }
1669
1670 static void
1671 emit_vertex(struct tgsi_exec_machine *mach)
1672 {
1673 /* FIXME: check for exec mask correctly
1674 unsigned i;
1675 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1676 if ((mach->ExecMask & (1 << i)))
1677 */
1678 if (mach->ExecMask) {
1679 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1680 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1681 }
1682 }
1683
1684 static void
1685 emit_primitive(struct tgsi_exec_machine *mach)
1686 {
1687 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1688 /* FIXME: check for exec mask correctly
1689 unsigned i;
1690 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1691 if ((mach->ExecMask & (1 << i)))
1692 */
1693 if (mach->ExecMask) {
1694 ++(*prim_count);
1695 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1696 mach->Primitives[*prim_count] = 0;
1697 }
1698 }
1699
1700 static void
1701 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1702 {
1703 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1704 int emitted_verts =
1705 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1706 if (emitted_verts) {
1707 emit_primitive(mach);
1708 }
1709 }
1710 }
1711
1712
1713 /*
1714 * Fetch four texture samples using STR texture coordinates.
1715 */
1716 static void
1717 fetch_texel( struct tgsi_sampler *sampler,
1718 const union tgsi_exec_channel *s,
1719 const union tgsi_exec_channel *t,
1720 const union tgsi_exec_channel *p,
1721 const union tgsi_exec_channel *c0,
1722 enum tgsi_sampler_control control,
1723 union tgsi_exec_channel *r,
1724 union tgsi_exec_channel *g,
1725 union tgsi_exec_channel *b,
1726 union tgsi_exec_channel *a )
1727 {
1728 uint j;
1729 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1730
1731 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
1732
1733 for (j = 0; j < 4; j++) {
1734 r->f[j] = rgba[0][j];
1735 g->f[j] = rgba[1][j];
1736 b->f[j] = rgba[2][j];
1737 a->f[j] = rgba[3][j];
1738 }
1739 }
1740
1741
1742 #define TEX_MODIFIER_NONE 0
1743 #define TEX_MODIFIER_PROJECTED 1
1744 #define TEX_MODIFIER_LOD_BIAS 2
1745 #define TEX_MODIFIER_EXPLICIT_LOD 3
1746
1747
1748 static void
1749 exec_tex(struct tgsi_exec_machine *mach,
1750 const struct tgsi_full_instruction *inst,
1751 uint modifier)
1752 {
1753 const uint unit = inst->Src[1].Register.Index;
1754 union tgsi_exec_channel r[4];
1755 const union tgsi_exec_channel *lod = &ZeroVec;
1756 enum tgsi_sampler_control control;
1757 uint chan;
1758
1759 if (modifier != TEX_MODIFIER_NONE) {
1760 FETCH(&r[3], 0, TGSI_CHAN_W);
1761 if (modifier != TEX_MODIFIER_PROJECTED) {
1762 lod = &r[3];
1763 }
1764 }
1765
1766 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1767 control = tgsi_sampler_lod_explicit;
1768 } else {
1769 control = tgsi_sampler_lod_bias;
1770 }
1771
1772 switch (inst->Texture.Texture) {
1773 case TGSI_TEXTURE_1D:
1774 FETCH(&r[0], 0, TGSI_CHAN_X);
1775
1776 if (modifier == TEX_MODIFIER_PROJECTED) {
1777 micro_div(&r[0], &r[0], &r[3]);
1778 }
1779
1780 fetch_texel(mach->Samplers[unit],
1781 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
1782 control,
1783 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1784 break;
1785 case TGSI_TEXTURE_SHADOW1D:
1786 FETCH(&r[0], 0, TGSI_CHAN_X);
1787 FETCH(&r[2], 0, TGSI_CHAN_Z);
1788
1789 if (modifier == TEX_MODIFIER_PROJECTED) {
1790 micro_div(&r[0], &r[0], &r[3]);
1791 }
1792
1793 fetch_texel(mach->Samplers[unit],
1794 &r[0], &ZeroVec, &r[2], lod, /* S, T, P, LOD */
1795 control,
1796 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1797 break;
1798
1799 case TGSI_TEXTURE_2D:
1800 case TGSI_TEXTURE_RECT:
1801 case TGSI_TEXTURE_SHADOW2D:
1802 case TGSI_TEXTURE_SHADOWRECT:
1803 FETCH(&r[0], 0, TGSI_CHAN_X);
1804 FETCH(&r[1], 0, TGSI_CHAN_Y);
1805 FETCH(&r[2], 0, TGSI_CHAN_Z);
1806
1807 if (modifier == TEX_MODIFIER_PROJECTED) {
1808 micro_div(&r[0], &r[0], &r[3]);
1809 micro_div(&r[1], &r[1], &r[3]);
1810 micro_div(&r[2], &r[2], &r[3]);
1811 }
1812
1813 fetch_texel(mach->Samplers[unit],
1814 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1815 control,
1816 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1817 break;
1818
1819 case TGSI_TEXTURE_1D_ARRAY:
1820 FETCH(&r[0], 0, TGSI_CHAN_X);
1821 FETCH(&r[1], 0, TGSI_CHAN_Y);
1822
1823 if (modifier == TEX_MODIFIER_PROJECTED) {
1824 micro_div(&r[0], &r[0], &r[3]);
1825 }
1826
1827 fetch_texel(mach->Samplers[unit],
1828 &r[0], &r[1], &ZeroVec, lod, /* S, T, P, LOD */
1829 control,
1830 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1831 break;
1832 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1833 FETCH(&r[0], 0, TGSI_CHAN_X);
1834 FETCH(&r[1], 0, TGSI_CHAN_Y);
1835 FETCH(&r[2], 0, TGSI_CHAN_Z);
1836
1837 if (modifier == TEX_MODIFIER_PROJECTED) {
1838 micro_div(&r[0], &r[0], &r[3]);
1839 }
1840
1841 fetch_texel(mach->Samplers[unit],
1842 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1843 control,
1844 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1845 break;
1846
1847 case TGSI_TEXTURE_2D_ARRAY:
1848 FETCH(&r[0], 0, TGSI_CHAN_X);
1849 FETCH(&r[1], 0, TGSI_CHAN_Y);
1850 FETCH(&r[2], 0, TGSI_CHAN_Z);
1851
1852 if (modifier == TEX_MODIFIER_PROJECTED) {
1853 micro_div(&r[0], &r[0], &r[3]);
1854 micro_div(&r[1], &r[1], &r[3]);
1855 }
1856
1857 fetch_texel(mach->Samplers[unit],
1858 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
1859 control,
1860 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1861 break;
1862 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1863 case TGSI_TEXTURE_SHADOWCUBE:
1864 FETCH(&r[0], 0, TGSI_CHAN_X);
1865 FETCH(&r[1], 0, TGSI_CHAN_Y);
1866 FETCH(&r[2], 0, TGSI_CHAN_Z);
1867 FETCH(&r[3], 0, TGSI_CHAN_W);
1868
1869 fetch_texel(mach->Samplers[unit],
1870 &r[0], &r[1], &r[2], &r[3], /* S, T, P, LOD */
1871 control,
1872 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1873 break;
1874 case TGSI_TEXTURE_3D:
1875 case TGSI_TEXTURE_CUBE:
1876 FETCH(&r[0], 0, TGSI_CHAN_X);
1877 FETCH(&r[1], 0, TGSI_CHAN_Y);
1878 FETCH(&r[2], 0, TGSI_CHAN_Z);
1879
1880 if (modifier == TEX_MODIFIER_PROJECTED) {
1881 micro_div(&r[0], &r[0], &r[3]);
1882 micro_div(&r[1], &r[1], &r[3]);
1883 micro_div(&r[2], &r[2], &r[3]);
1884 }
1885
1886 fetch_texel(mach->Samplers[unit],
1887 &r[0], &r[1], &r[2], lod,
1888 control,
1889 &r[0], &r[1], &r[2], &r[3]);
1890 break;
1891
1892 default:
1893 assert(0);
1894 }
1895
1896 #if 0
1897 debug_printf("fetch r: %g %g %g %g\n",
1898 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
1899 debug_printf("fetch g: %g %g %g %g\n",
1900 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
1901 debug_printf("fetch b: %g %g %g %g\n",
1902 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
1903 debug_printf("fetch a: %g %g %g %g\n",
1904 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
1905 #endif
1906
1907 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1908 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1909 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1910 }
1911 }
1912 }
1913
1914 static void
1915 exec_txd(struct tgsi_exec_machine *mach,
1916 const struct tgsi_full_instruction *inst)
1917 {
1918 const uint unit = inst->Src[3].Register.Index;
1919 union tgsi_exec_channel r[4];
1920 uint chan;
1921
1922 /*
1923 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1924 */
1925
1926 switch (inst->Texture.Texture) {
1927 case TGSI_TEXTURE_1D:
1928 case TGSI_TEXTURE_SHADOW1D:
1929
1930 FETCH(&r[0], 0, TGSI_CHAN_X);
1931
1932 fetch_texel(mach->Samplers[unit],
1933 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
1934 tgsi_sampler_lod_bias,
1935 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1936 break;
1937
1938 case TGSI_TEXTURE_1D_ARRAY:
1939 case TGSI_TEXTURE_2D:
1940 case TGSI_TEXTURE_RECT:
1941 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1942 case TGSI_TEXTURE_SHADOW2D:
1943 case TGSI_TEXTURE_SHADOWRECT:
1944
1945 FETCH(&r[0], 0, TGSI_CHAN_X);
1946 FETCH(&r[1], 0, TGSI_CHAN_Y);
1947 FETCH(&r[2], 0, TGSI_CHAN_Z);
1948
1949 fetch_texel(mach->Samplers[unit],
1950 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
1951 tgsi_sampler_lod_bias,
1952 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1953 break;
1954
1955 case TGSI_TEXTURE_2D_ARRAY:
1956 case TGSI_TEXTURE_3D:
1957 case TGSI_TEXTURE_CUBE:
1958
1959 FETCH(&r[0], 0, TGSI_CHAN_X);
1960 FETCH(&r[1], 0, TGSI_CHAN_Y);
1961 FETCH(&r[2], 0, TGSI_CHAN_Z);
1962
1963 fetch_texel(mach->Samplers[unit],
1964 &r[0], &r[1], &r[2], &ZeroVec,
1965 tgsi_sampler_lod_bias,
1966 &r[0], &r[1], &r[2], &r[3]);
1967 break;
1968
1969 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1970
1971 FETCH(&r[0], 0, TGSI_CHAN_X);
1972 FETCH(&r[1], 0, TGSI_CHAN_Y);
1973 FETCH(&r[2], 0, TGSI_CHAN_Z);
1974 FETCH(&r[3], 0, TGSI_CHAN_W);
1975
1976 fetch_texel(mach->Samplers[unit],
1977 &r[0], &r[1], &r[2], &r[3],
1978 tgsi_sampler_lod_bias,
1979 &r[0], &r[1], &r[2], &r[3]);
1980 break;
1981
1982 default:
1983 assert(0);
1984 }
1985
1986 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1987 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1988 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1989 }
1990 }
1991 }
1992
1993
1994 static void
1995 exec_txf(struct tgsi_exec_machine *mach,
1996 const struct tgsi_full_instruction *inst)
1997 {
1998 struct tgsi_sampler *sampler;
1999 const uint unit = inst->Src[2].Register.Index;
2000 union tgsi_exec_channel r[4];
2001 union tgsi_exec_channel offset[3];
2002 uint chan;
2003 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2004 int j;
2005 int8_t offsets[3];
2006
2007 if (inst->Texture.NumOffsets == 1) {
2008 union tgsi_exec_channel index;
2009 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2010 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2011 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2012 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2013 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2014 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2015 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2016 offsets[0] = offset[0].i[0];
2017 offsets[1] = offset[1].i[0];
2018 offsets[2] = offset[2].i[0];
2019 } else
2020 offsets[0] = offsets[1] = offsets[2] = 0;
2021
2022 IFETCH(&r[3], 0, TGSI_CHAN_W);
2023
2024 switch(inst->Texture.Texture) {
2025 case TGSI_TEXTURE_3D:
2026 case TGSI_TEXTURE_2D_ARRAY:
2027 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2028 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2029 /* fallthrough */
2030 case TGSI_TEXTURE_2D:
2031 case TGSI_TEXTURE_RECT:
2032 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2033 case TGSI_TEXTURE_SHADOW2D:
2034 case TGSI_TEXTURE_SHADOWRECT:
2035 case TGSI_TEXTURE_1D_ARRAY:
2036 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2037 /* fallthrough */
2038 case TGSI_TEXTURE_1D:
2039 case TGSI_TEXTURE_SHADOW1D:
2040 IFETCH(&r[0], 0, TGSI_CHAN_X);
2041 break;
2042 default:
2043 assert(0);
2044 break;
2045 }
2046
2047 sampler = mach->Samplers[unit];
2048 sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i,
2049 offsets, rgba);
2050
2051 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2052 r[0].f[j] = rgba[0][j];
2053 r[1].f[j] = rgba[1][j];
2054 r[2].f[j] = rgba[2][j];
2055 r[3].f[j] = rgba[3][j];
2056 }
2057
2058 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2059 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2060 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2061 }
2062 }
2063 }
2064
2065 static void
2066 exec_txq(struct tgsi_exec_machine *mach,
2067 const struct tgsi_full_instruction *inst)
2068 {
2069 struct tgsi_sampler *sampler;
2070 const uint unit = inst->Src[1].Register.Index;
2071 int result[4];
2072 union tgsi_exec_channel r[4], src;
2073 uint chan;
2074 int i,j;
2075
2076 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2077 sampler = mach->Samplers[unit];
2078
2079 sampler->get_dims(sampler, src.i[0], result);
2080
2081 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2082 for (j = 0; j < 4; j++) {
2083 r[j].i[i] = result[j];
2084 }
2085 }
2086
2087 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2088 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2089 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2090 TGSI_EXEC_DATA_INT);
2091 }
2092 }
2093 }
2094
2095 static void
2096 exec_sample(struct tgsi_exec_machine *mach,
2097 const struct tgsi_full_instruction *inst,
2098 uint modifier)
2099 {
2100 const uint resource_unit = inst->Src[1].Register.Index;
2101 const uint sampler_unit = inst->Src[2].Register.Index;
2102 union tgsi_exec_channel r[4];
2103 const union tgsi_exec_channel *lod = &ZeroVec;
2104 enum tgsi_sampler_control control;
2105 uint chan;
2106
2107 if (modifier != TEX_MODIFIER_NONE) {
2108 if (modifier == TEX_MODIFIER_LOD_BIAS)
2109 FETCH(&r[3], 3, TGSI_CHAN_X);
2110 else /*TEX_MODIFIER_LOD*/
2111 FETCH(&r[3], 0, TGSI_CHAN_W);
2112
2113 if (modifier != TEX_MODIFIER_PROJECTED) {
2114 lod = &r[3];
2115 }
2116 }
2117
2118 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2119 control = tgsi_sampler_lod_explicit;
2120 } else {
2121 control = tgsi_sampler_lod_bias;
2122 }
2123
2124 switch (mach->SamplerViews[resource_unit].Resource) {
2125 case TGSI_TEXTURE_1D:
2126 case TGSI_TEXTURE_SHADOW1D:
2127 FETCH(&r[0], 0, TGSI_CHAN_X);
2128
2129 if (modifier == TEX_MODIFIER_PROJECTED) {
2130 micro_div(&r[0], &r[0], &r[3]);
2131 }
2132
2133 fetch_texel(mach->Samplers[sampler_unit],
2134 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */
2135 control,
2136 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2137 break;
2138
2139 case TGSI_TEXTURE_1D_ARRAY:
2140 case TGSI_TEXTURE_2D:
2141 case TGSI_TEXTURE_RECT:
2142 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2143 case TGSI_TEXTURE_SHADOW2D:
2144 case TGSI_TEXTURE_SHADOWRECT:
2145 FETCH(&r[0], 0, TGSI_CHAN_X);
2146 FETCH(&r[1], 0, TGSI_CHAN_Y);
2147 FETCH(&r[2], 0, TGSI_CHAN_Z);
2148
2149 if (modifier == TEX_MODIFIER_PROJECTED) {
2150 micro_div(&r[0], &r[0], &r[3]);
2151 micro_div(&r[1], &r[1], &r[3]);
2152 micro_div(&r[2], &r[2], &r[3]);
2153 }
2154
2155 fetch_texel(mach->Samplers[sampler_unit],
2156 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */
2157 control,
2158 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2159 break;
2160
2161 case TGSI_TEXTURE_2D_ARRAY:
2162 case TGSI_TEXTURE_3D:
2163 case TGSI_TEXTURE_CUBE:
2164 FETCH(&r[0], 0, TGSI_CHAN_X);
2165 FETCH(&r[1], 0, TGSI_CHAN_Y);
2166 FETCH(&r[2], 0, TGSI_CHAN_Z);
2167
2168 if (modifier == TEX_MODIFIER_PROJECTED) {
2169 micro_div(&r[0], &r[0], &r[3]);
2170 micro_div(&r[1], &r[1], &r[3]);
2171 micro_div(&r[2], &r[2], &r[3]);
2172 }
2173
2174 fetch_texel(mach->Samplers[sampler_unit],
2175 &r[0], &r[1], &r[2], lod,
2176 control,
2177 &r[0], &r[1], &r[2], &r[3]);
2178 break;
2179
2180 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2181 case TGSI_TEXTURE_SHADOWCUBE:
2182 FETCH(&r[0], 0, TGSI_CHAN_X);
2183 FETCH(&r[1], 0, TGSI_CHAN_Y);
2184 FETCH(&r[2], 0, TGSI_CHAN_Z);
2185 FETCH(&r[3], 0, TGSI_CHAN_W);
2186
2187 assert(modifier != TEX_MODIFIER_PROJECTED);
2188
2189 fetch_texel(mach->Samplers[sampler_unit],
2190 &r[0], &r[1], &r[2], &r[3],
2191 control,
2192 &r[0], &r[1], &r[2], &r[3]);
2193 break;
2194
2195 default:
2196 assert(0);
2197 }
2198
2199 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2200 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2201 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2202 }
2203 }
2204 }
2205
2206 static void
2207 exec_sample_d(struct tgsi_exec_machine *mach,
2208 const struct tgsi_full_instruction *inst)
2209 {
2210 const uint resource_unit = inst->Src[1].Register.Index;
2211 const uint sampler_unit = inst->Src[2].Register.Index;
2212 union tgsi_exec_channel r[4];
2213 uint chan;
2214 /*
2215 * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet.
2216 */
2217
2218 switch (mach->SamplerViews[resource_unit].Resource) {
2219 case TGSI_TEXTURE_1D:
2220 case TGSI_TEXTURE_SHADOW1D:
2221
2222 FETCH(&r[0], 0, TGSI_CHAN_X);
2223
2224 fetch_texel(mach->Samplers[sampler_unit],
2225 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */
2226 tgsi_sampler_lod_bias,
2227 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2228 break;
2229
2230 case TGSI_TEXTURE_2D:
2231 case TGSI_TEXTURE_RECT:
2232 case TGSI_TEXTURE_SHADOW2D:
2233 case TGSI_TEXTURE_SHADOWRECT:
2234
2235 FETCH(&r[0], 0, TGSI_CHAN_X);
2236 FETCH(&r[1], 0, TGSI_CHAN_Y);
2237 FETCH(&r[2], 0, TGSI_CHAN_Z);
2238
2239 fetch_texel(mach->Samplers[sampler_unit],
2240 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */
2241 tgsi_sampler_lod_bias,
2242 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2243 break;
2244
2245 case TGSI_TEXTURE_3D:
2246 case TGSI_TEXTURE_CUBE:
2247
2248 FETCH(&r[0], 0, TGSI_CHAN_X);
2249 FETCH(&r[1], 0, TGSI_CHAN_Y);
2250 FETCH(&r[2], 0, TGSI_CHAN_Z);
2251
2252 fetch_texel(mach->Samplers[sampler_unit],
2253 &r[0], &r[1], &r[2], &ZeroVec,
2254 tgsi_sampler_lod_bias,
2255 &r[0], &r[1], &r[2], &r[3]);
2256 break;
2257
2258 default:
2259 assert(0);
2260 }
2261
2262 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2263 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2264 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2265 }
2266 }
2267 }
2268
2269
2270 /**
2271 * Evaluate a constant-valued coefficient at the position of the
2272 * current quad.
2273 */
2274 static void
2275 eval_constant_coef(
2276 struct tgsi_exec_machine *mach,
2277 unsigned attrib,
2278 unsigned chan )
2279 {
2280 unsigned i;
2281
2282 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2283 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2284 }
2285 }
2286
2287 /**
2288 * Evaluate a linear-valued coefficient at the position of the
2289 * current quad.
2290 */
2291 static void
2292 eval_linear_coef(
2293 struct tgsi_exec_machine *mach,
2294 unsigned attrib,
2295 unsigned chan )
2296 {
2297 const float x = mach->QuadPos.xyzw[0].f[0];
2298 const float y = mach->QuadPos.xyzw[1].f[0];
2299 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2300 const float dady = mach->InterpCoefs[attrib].dady[chan];
2301 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2302 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2303 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2304 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2305 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2306 }
2307
2308 /**
2309 * Evaluate a perspective-valued coefficient at the position of the
2310 * current quad.
2311 */
2312 static void
2313 eval_perspective_coef(
2314 struct tgsi_exec_machine *mach,
2315 unsigned attrib,
2316 unsigned chan )
2317 {
2318 const float x = mach->QuadPos.xyzw[0].f[0];
2319 const float y = mach->QuadPos.xyzw[1].f[0];
2320 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2321 const float dady = mach->InterpCoefs[attrib].dady[chan];
2322 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2323 const float *w = mach->QuadPos.xyzw[3].f;
2324 /* divide by W here */
2325 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2326 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2327 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2328 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2329 }
2330
2331
2332 typedef void (* eval_coef_func)(
2333 struct tgsi_exec_machine *mach,
2334 unsigned attrib,
2335 unsigned chan );
2336
2337 static void
2338 exec_declaration(struct tgsi_exec_machine *mach,
2339 const struct tgsi_full_declaration *decl)
2340 {
2341 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2342 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2343 return;
2344 }
2345
2346 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2347 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2348 uint first, last, mask;
2349
2350 first = decl->Range.First;
2351 last = decl->Range.Last;
2352 mask = decl->Declaration.UsageMask;
2353
2354 /* XXX we could remove this special-case code since
2355 * mach->InterpCoefs[first].a0 should already have the
2356 * front/back-face value. But we should first update the
2357 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2358 * Then, we could remove the tgsi_exec_machine::Face field.
2359 */
2360 /* XXX make FACE a system value */
2361 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2362 uint i;
2363
2364 assert(decl->Semantic.Index == 0);
2365 assert(first == last);
2366
2367 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2368 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2369 }
2370 } else {
2371 eval_coef_func eval;
2372 uint i, j;
2373
2374 switch (decl->Interp.Interpolate) {
2375 case TGSI_INTERPOLATE_CONSTANT:
2376 eval = eval_constant_coef;
2377 break;
2378
2379 case TGSI_INTERPOLATE_LINEAR:
2380 eval = eval_linear_coef;
2381 break;
2382
2383 case TGSI_INTERPOLATE_PERSPECTIVE:
2384 eval = eval_perspective_coef;
2385 break;
2386
2387 case TGSI_INTERPOLATE_COLOR:
2388 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2389 break;
2390
2391 default:
2392 assert(0);
2393 return;
2394 }
2395
2396 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2397 if (mask & (1 << j)) {
2398 for (i = first; i <= last; i++) {
2399 eval(mach, i, j);
2400 }
2401 }
2402 }
2403 }
2404 }
2405 }
2406
2407 if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
2408 mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
2409 }
2410 }
2411
2412
2413 typedef void (* micro_op)(union tgsi_exec_channel *dst);
2414
2415 static void
2416 exec_vector(struct tgsi_exec_machine *mach,
2417 const struct tgsi_full_instruction *inst,
2418 micro_op op,
2419 enum tgsi_exec_datatype dst_datatype)
2420 {
2421 unsigned int chan;
2422
2423 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2424 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2425 union tgsi_exec_channel dst;
2426
2427 op(&dst);
2428 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2429 }
2430 }
2431 }
2432
2433 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2434 const union tgsi_exec_channel *src);
2435
2436 static void
2437 exec_scalar_unary(struct tgsi_exec_machine *mach,
2438 const struct tgsi_full_instruction *inst,
2439 micro_unary_op op,
2440 enum tgsi_exec_datatype dst_datatype,
2441 enum tgsi_exec_datatype src_datatype)
2442 {
2443 unsigned int chan;
2444 union tgsi_exec_channel src;
2445 union tgsi_exec_channel dst;
2446
2447 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2448 op(&dst, &src);
2449 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2450 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2451 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2452 }
2453 }
2454 }
2455
2456 static void
2457 exec_vector_unary(struct tgsi_exec_machine *mach,
2458 const struct tgsi_full_instruction *inst,
2459 micro_unary_op op,
2460 enum tgsi_exec_datatype dst_datatype,
2461 enum tgsi_exec_datatype src_datatype)
2462 {
2463 unsigned int chan;
2464 struct tgsi_exec_vector dst;
2465
2466 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2467 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2468 union tgsi_exec_channel src;
2469
2470 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2471 op(&dst.xyzw[chan], &src);
2472 }
2473 }
2474 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2475 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2476 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2477 }
2478 }
2479 }
2480
2481 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2482 const union tgsi_exec_channel *src0,
2483 const union tgsi_exec_channel *src1);
2484
2485 static void
2486 exec_scalar_binary(struct tgsi_exec_machine *mach,
2487 const struct tgsi_full_instruction *inst,
2488 micro_binary_op op,
2489 enum tgsi_exec_datatype dst_datatype,
2490 enum tgsi_exec_datatype src_datatype)
2491 {
2492 unsigned int chan;
2493 union tgsi_exec_channel src[2];
2494 union tgsi_exec_channel dst;
2495
2496 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2497 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_Y, src_datatype);
2498 op(&dst, &src[0], &src[1]);
2499 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2500 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2501 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2502 }
2503 }
2504 }
2505
2506 static void
2507 exec_vector_binary(struct tgsi_exec_machine *mach,
2508 const struct tgsi_full_instruction *inst,
2509 micro_binary_op op,
2510 enum tgsi_exec_datatype dst_datatype,
2511 enum tgsi_exec_datatype src_datatype)
2512 {
2513 unsigned int chan;
2514 struct tgsi_exec_vector dst;
2515
2516 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2517 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2518 union tgsi_exec_channel src[2];
2519
2520 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2521 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2522 op(&dst.xyzw[chan], &src[0], &src[1]);
2523 }
2524 }
2525 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2526 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2527 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2528 }
2529 }
2530 }
2531
2532 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2533 const union tgsi_exec_channel *src0,
2534 const union tgsi_exec_channel *src1,
2535 const union tgsi_exec_channel *src2);
2536
2537 static void
2538 exec_vector_trinary(struct tgsi_exec_machine *mach,
2539 const struct tgsi_full_instruction *inst,
2540 micro_trinary_op op,
2541 enum tgsi_exec_datatype dst_datatype,
2542 enum tgsi_exec_datatype src_datatype)
2543 {
2544 unsigned int chan;
2545 struct tgsi_exec_vector dst;
2546
2547 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2548 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2549 union tgsi_exec_channel src[3];
2550
2551 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2552 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2553 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2554 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2555 }
2556 }
2557 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2558 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2559 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2560 }
2561 }
2562 }
2563
2564 static void
2565 exec_dp3(struct tgsi_exec_machine *mach,
2566 const struct tgsi_full_instruction *inst)
2567 {
2568 unsigned int chan;
2569 union tgsi_exec_channel arg[3];
2570
2571 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2572 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2573 micro_mul(&arg[2], &arg[0], &arg[1]);
2574
2575 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2576 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2577 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2578 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2579 }
2580
2581 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2582 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2583 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2584 }
2585 }
2586 }
2587
2588 static void
2589 exec_dp4(struct tgsi_exec_machine *mach,
2590 const struct tgsi_full_instruction *inst)
2591 {
2592 unsigned int chan;
2593 union tgsi_exec_channel arg[3];
2594
2595 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2596 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2597 micro_mul(&arg[2], &arg[0], &arg[1]);
2598
2599 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2600 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2601 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2602 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2603 }
2604
2605 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2606 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2607 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2608 }
2609 }
2610 }
2611
2612 static void
2613 exec_dp2a(struct tgsi_exec_machine *mach,
2614 const struct tgsi_full_instruction *inst)
2615 {
2616 unsigned int chan;
2617 union tgsi_exec_channel arg[3];
2618
2619 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2620 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2621 micro_mul(&arg[2], &arg[0], &arg[1]);
2622
2623 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2624 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2625 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2626
2627 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2628 micro_add(&arg[0], &arg[0], &arg[1]);
2629
2630 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2631 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2632 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2633 }
2634 }
2635 }
2636
2637 static void
2638 exec_dph(struct tgsi_exec_machine *mach,
2639 const struct tgsi_full_instruction *inst)
2640 {
2641 unsigned int chan;
2642 union tgsi_exec_channel arg[3];
2643
2644 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2645 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2646 micro_mul(&arg[2], &arg[0], &arg[1]);
2647
2648 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2649 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2650 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2651
2652 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2653 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2654 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2655
2656 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2657 micro_add(&arg[0], &arg[0], &arg[1]);
2658
2659 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2660 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2661 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2662 }
2663 }
2664 }
2665
2666 static void
2667 exec_dp2(struct tgsi_exec_machine *mach,
2668 const struct tgsi_full_instruction *inst)
2669 {
2670 unsigned int chan;
2671 union tgsi_exec_channel arg[3];
2672
2673 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2674 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2675 micro_mul(&arg[2], &arg[0], &arg[1]);
2676
2677 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2678 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2679 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2680
2681 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2682 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2683 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2684 }
2685 }
2686 }
2687
2688 static void
2689 exec_nrm4(struct tgsi_exec_machine *mach,
2690 const struct tgsi_full_instruction *inst)
2691 {
2692 unsigned int chan;
2693 union tgsi_exec_channel arg[4];
2694 union tgsi_exec_channel scale;
2695
2696 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2697 micro_mul(&scale, &arg[0], &arg[0]);
2698
2699 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2700 union tgsi_exec_channel product;
2701
2702 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2703 micro_mul(&product, &arg[chan], &arg[chan]);
2704 micro_add(&scale, &scale, &product);
2705 }
2706
2707 micro_rsq(&scale, &scale);
2708
2709 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) {
2710 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2711 micro_mul(&arg[chan], &arg[chan], &scale);
2712 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2713 }
2714 }
2715 }
2716
2717 static void
2718 exec_nrm3(struct tgsi_exec_machine *mach,
2719 const struct tgsi_full_instruction *inst)
2720 {
2721 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2722 unsigned int chan;
2723 union tgsi_exec_channel arg[3];
2724 union tgsi_exec_channel scale;
2725
2726 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2727 micro_mul(&scale, &arg[0], &arg[0]);
2728
2729 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2730 union tgsi_exec_channel product;
2731
2732 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2733 micro_mul(&product, &arg[chan], &arg[chan]);
2734 micro_add(&scale, &scale, &product);
2735 }
2736
2737 micro_rsq(&scale, &scale);
2738
2739 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) {
2740 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2741 micro_mul(&arg[chan], &arg[chan], &scale);
2742 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2743 }
2744 }
2745 }
2746
2747 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2748 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2749 }
2750 }
2751
2752 static void
2753 exec_scs(struct tgsi_exec_machine *mach,
2754 const struct tgsi_full_instruction *inst)
2755 {
2756 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2757 union tgsi_exec_channel arg;
2758 union tgsi_exec_channel result;
2759
2760 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2761
2762 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2763 micro_cos(&result, &arg);
2764 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2765 }
2766 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2767 micro_sin(&result, &arg);
2768 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2769 }
2770 }
2771 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2772 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2773 }
2774 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2775 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2776 }
2777 }
2778
2779 static void
2780 exec_x2d(struct tgsi_exec_machine *mach,
2781 const struct tgsi_full_instruction *inst)
2782 {
2783 union tgsi_exec_channel r[4];
2784 union tgsi_exec_channel d[2];
2785
2786 fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2787 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2788 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2789 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2790 micro_mul(&r[2], &r[2], &r[0]);
2791 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2792 micro_mul(&r[3], &r[3], &r[1]);
2793 micro_add(&r[2], &r[2], &r[3]);
2794 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2795 micro_add(&d[0], &r[2], &r[3]);
2796 }
2797 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2798 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2799 micro_mul(&r[2], &r[2], &r[0]);
2800 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2801 micro_mul(&r[3], &r[3], &r[1]);
2802 micro_add(&r[2], &r[2], &r[3]);
2803 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2804 micro_add(&d[1], &r[2], &r[3]);
2805 }
2806 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2807 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2808 }
2809 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2810 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2811 }
2812 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2813 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2814 }
2815 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2816 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2817 }
2818 }
2819
2820 static void
2821 exec_rfl(struct tgsi_exec_machine *mach,
2822 const struct tgsi_full_instruction *inst)
2823 {
2824 union tgsi_exec_channel r[9];
2825
2826 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2827 /* r0 = dp3(src0, src0) */
2828 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2829 micro_mul(&r[0], &r[2], &r[2]);
2830 fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2831 micro_mul(&r[8], &r[4], &r[4]);
2832 micro_add(&r[0], &r[0], &r[8]);
2833 fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2834 micro_mul(&r[8], &r[6], &r[6]);
2835 micro_add(&r[0], &r[0], &r[8]);
2836
2837 /* r1 = dp3(src0, src1) */
2838 fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2839 micro_mul(&r[1], &r[2], &r[3]);
2840 fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2841 micro_mul(&r[8], &r[4], &r[5]);
2842 micro_add(&r[1], &r[1], &r[8]);
2843 fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2844 micro_mul(&r[8], &r[6], &r[7]);
2845 micro_add(&r[1], &r[1], &r[8]);
2846
2847 /* r1 = 2 * r1 / r0 */
2848 micro_add(&r[1], &r[1], &r[1]);
2849 micro_div(&r[1], &r[1], &r[0]);
2850
2851 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2852 micro_mul(&r[2], &r[2], &r[1]);
2853 micro_sub(&r[2], &r[2], &r[3]);
2854 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2855 }
2856 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2857 micro_mul(&r[4], &r[4], &r[1]);
2858 micro_sub(&r[4], &r[4], &r[5]);
2859 store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2860 }
2861 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2862 micro_mul(&r[6], &r[6], &r[1]);
2863 micro_sub(&r[6], &r[6], &r[7]);
2864 store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2865 }
2866 }
2867 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2868 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2869 }
2870 }
2871
2872 static void
2873 exec_xpd(struct tgsi_exec_machine *mach,
2874 const struct tgsi_full_instruction *inst)
2875 {
2876 union tgsi_exec_channel r[6];
2877 union tgsi_exec_channel d[3];
2878
2879 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2880 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2881
2882 micro_mul(&r[2], &r[0], &r[1]);
2883
2884 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2885 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2886
2887 micro_mul(&r[5], &r[3], &r[4] );
2888 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
2889
2890 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2891
2892 micro_mul(&r[3], &r[3], &r[2]);
2893
2894 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2895
2896 micro_mul(&r[1], &r[1], &r[5]);
2897 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
2898
2899 micro_mul(&r[5], &r[5], &r[4]);
2900 micro_mul(&r[0], &r[0], &r[2]);
2901 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
2902
2903 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2904 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2905 }
2906 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2907 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2908 }
2909 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2910 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2911 }
2912 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2913 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2914 }
2915 }
2916
2917 static void
2918 exec_dst(struct tgsi_exec_machine *mach,
2919 const struct tgsi_full_instruction *inst)
2920 {
2921 union tgsi_exec_channel r[2];
2922 union tgsi_exec_channel d[4];
2923
2924 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2925 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2926 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2927 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
2928 }
2929 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2930 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2931 }
2932 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2933 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2934 }
2935
2936 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2937 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2938 }
2939 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2940 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2941 }
2942 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2943 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2944 }
2945 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2946 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2947 }
2948 }
2949
2950 static void
2951 exec_log(struct tgsi_exec_machine *mach,
2952 const struct tgsi_full_instruction *inst)
2953 {
2954 union tgsi_exec_channel r[3];
2955
2956 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2957 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
2958 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
2959 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
2960 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2961 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2962 }
2963 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2964 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
2965 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
2966 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2967 }
2968 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2969 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2970 }
2971 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2972 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2973 }
2974 }
2975
2976 static void
2977 exec_exp(struct tgsi_exec_machine *mach,
2978 const struct tgsi_full_instruction *inst)
2979 {
2980 union tgsi_exec_channel r[3];
2981
2982 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2983 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
2984 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2985 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
2986 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2987 }
2988 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2989 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
2990 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2991 }
2992 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2993 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
2994 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2995 }
2996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2997 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2998 }
2999 }
3000
3001 static void
3002 exec_lit(struct tgsi_exec_machine *mach,
3003 const struct tgsi_full_instruction *inst)
3004 {
3005 union tgsi_exec_channel r[3];
3006 union tgsi_exec_channel d[3];
3007
3008 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3009 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3010 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3011 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3012 micro_max(&r[1], &r[1], &ZeroVec);
3013
3014 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3015 micro_min(&r[2], &r[2], &P128Vec);
3016 micro_max(&r[2], &r[2], &M128Vec);
3017 micro_pow(&r[1], &r[1], &r[2]);
3018 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3019 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3020 }
3021 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3022 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3023 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3024 }
3025 }
3026 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3027 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3028 }
3029
3030 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3031 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3032 }
3033 }
3034
3035 static void
3036 exec_break(struct tgsi_exec_machine *mach)
3037 {
3038 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3039 /* turn off loop channels for each enabled exec channel */
3040 mach->LoopMask &= ~mach->ExecMask;
3041 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3042 UPDATE_EXEC_MASK(mach);
3043 } else {
3044 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3045
3046 mach->Switch.mask = 0x0;
3047
3048 UPDATE_EXEC_MASK(mach);
3049 }
3050 }
3051
3052 static void
3053 exec_switch(struct tgsi_exec_machine *mach,
3054 const struct tgsi_full_instruction *inst)
3055 {
3056 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3057 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3058
3059 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3060 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3061 mach->Switch.mask = 0x0;
3062 mach->Switch.defaultMask = 0x0;
3063
3064 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3065 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3066
3067 UPDATE_EXEC_MASK(mach);
3068 }
3069
3070 static void
3071 exec_case(struct tgsi_exec_machine *mach,
3072 const struct tgsi_full_instruction *inst)
3073 {
3074 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3075 union tgsi_exec_channel src;
3076 uint mask = 0;
3077
3078 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3079
3080 if (mach->Switch.selector.u[0] == src.u[0]) {
3081 mask |= 0x1;
3082 }
3083 if (mach->Switch.selector.u[1] == src.u[1]) {
3084 mask |= 0x2;
3085 }
3086 if (mach->Switch.selector.u[2] == src.u[2]) {
3087 mask |= 0x4;
3088 }
3089 if (mach->Switch.selector.u[3] == src.u[3]) {
3090 mask |= 0x8;
3091 }
3092
3093 mach->Switch.defaultMask |= mask;
3094
3095 mach->Switch.mask |= mask & prevMask;
3096
3097 UPDATE_EXEC_MASK(mach);
3098 }
3099
3100 static void
3101 exec_default(struct tgsi_exec_machine *mach)
3102 {
3103 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3104
3105 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3106
3107 UPDATE_EXEC_MASK(mach);
3108 }
3109
3110 static void
3111 exec_endswitch(struct tgsi_exec_machine *mach)
3112 {
3113 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3114 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3115
3116 UPDATE_EXEC_MASK(mach);
3117 }
3118
3119 static void
3120 micro_i2f(union tgsi_exec_channel *dst,
3121 const union tgsi_exec_channel *src)
3122 {
3123 dst->f[0] = (float)src->i[0];
3124 dst->f[1] = (float)src->i[1];
3125 dst->f[2] = (float)src->i[2];
3126 dst->f[3] = (float)src->i[3];
3127 }
3128
3129 static void
3130 micro_not(union tgsi_exec_channel *dst,
3131 const union tgsi_exec_channel *src)
3132 {
3133 dst->u[0] = ~src->u[0];
3134 dst->u[1] = ~src->u[1];
3135 dst->u[2] = ~src->u[2];
3136 dst->u[3] = ~src->u[3];
3137 }
3138
3139 static void
3140 micro_shl(union tgsi_exec_channel *dst,
3141 const union tgsi_exec_channel *src0,
3142 const union tgsi_exec_channel *src1)
3143 {
3144 dst->u[0] = src0->u[0] << src1->u[0];
3145 dst->u[1] = src0->u[1] << src1->u[1];
3146 dst->u[2] = src0->u[2] << src1->u[2];
3147 dst->u[3] = src0->u[3] << src1->u[3];
3148 }
3149
3150 static void
3151 micro_and(union tgsi_exec_channel *dst,
3152 const union tgsi_exec_channel *src0,
3153 const union tgsi_exec_channel *src1)
3154 {
3155 dst->u[0] = src0->u[0] & src1->u[0];
3156 dst->u[1] = src0->u[1] & src1->u[1];
3157 dst->u[2] = src0->u[2] & src1->u[2];
3158 dst->u[3] = src0->u[3] & src1->u[3];
3159 }
3160
3161 static void
3162 micro_or(union tgsi_exec_channel *dst,
3163 const union tgsi_exec_channel *src0,
3164 const union tgsi_exec_channel *src1)
3165 {
3166 dst->u[0] = src0->u[0] | src1->u[0];
3167 dst->u[1] = src0->u[1] | src1->u[1];
3168 dst->u[2] = src0->u[2] | src1->u[2];
3169 dst->u[3] = src0->u[3] | src1->u[3];
3170 }
3171
3172 static void
3173 micro_xor(union tgsi_exec_channel *dst,
3174 const union tgsi_exec_channel *src0,
3175 const union tgsi_exec_channel *src1)
3176 {
3177 dst->u[0] = src0->u[0] ^ src1->u[0];
3178 dst->u[1] = src0->u[1] ^ src1->u[1];
3179 dst->u[2] = src0->u[2] ^ src1->u[2];
3180 dst->u[3] = src0->u[3] ^ src1->u[3];
3181 }
3182
3183 static void
3184 micro_mod(union tgsi_exec_channel *dst,
3185 const union tgsi_exec_channel *src0,
3186 const union tgsi_exec_channel *src1)
3187 {
3188 dst->i[0] = src0->i[0] % src1->i[0];
3189 dst->i[1] = src0->i[1] % src1->i[1];
3190 dst->i[2] = src0->i[2] % src1->i[2];
3191 dst->i[3] = src0->i[3] % src1->i[3];
3192 }
3193
3194 static void
3195 micro_f2i(union tgsi_exec_channel *dst,
3196 const union tgsi_exec_channel *src)
3197 {
3198 dst->i[0] = (int)src->f[0];
3199 dst->i[1] = (int)src->f[1];
3200 dst->i[2] = (int)src->f[2];
3201 dst->i[3] = (int)src->f[3];
3202 }
3203
3204 static void
3205 micro_idiv(union tgsi_exec_channel *dst,
3206 const union tgsi_exec_channel *src0,
3207 const union tgsi_exec_channel *src1)
3208 {
3209 dst->i[0] = src0->i[0] / src1->i[0];
3210 dst->i[1] = src0->i[1] / src1->i[1];
3211 dst->i[2] = src0->i[2] / src1->i[2];
3212 dst->i[3] = src0->i[3] / src1->i[3];
3213 }
3214
3215 static void
3216 micro_imax(union tgsi_exec_channel *dst,
3217 const union tgsi_exec_channel *src0,
3218 const union tgsi_exec_channel *src1)
3219 {
3220 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
3221 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
3222 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
3223 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
3224 }
3225
3226 static void
3227 micro_imin(union tgsi_exec_channel *dst,
3228 const union tgsi_exec_channel *src0,
3229 const union tgsi_exec_channel *src1)
3230 {
3231 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
3232 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
3233 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
3234 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
3235 }
3236
3237 static void
3238 micro_isge(union tgsi_exec_channel *dst,
3239 const union tgsi_exec_channel *src0,
3240 const union tgsi_exec_channel *src1)
3241 {
3242 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
3243 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
3244 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
3245 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
3246 }
3247
3248 static void
3249 micro_ishr(union tgsi_exec_channel *dst,
3250 const union tgsi_exec_channel *src0,
3251 const union tgsi_exec_channel *src1)
3252 {
3253 dst->i[0] = src0->i[0] >> src1->i[0];
3254 dst->i[1] = src0->i[1] >> src1->i[1];
3255 dst->i[2] = src0->i[2] >> src1->i[2];
3256 dst->i[3] = src0->i[3] >> src1->i[3];
3257 }
3258
3259 static void
3260 micro_islt(union tgsi_exec_channel *dst,
3261 const union tgsi_exec_channel *src0,
3262 const union tgsi_exec_channel *src1)
3263 {
3264 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
3265 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
3266 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
3267 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
3268 }
3269
3270 static void
3271 micro_f2u(union tgsi_exec_channel *dst,
3272 const union tgsi_exec_channel *src)
3273 {
3274 dst->u[0] = (uint)src->f[0];
3275 dst->u[1] = (uint)src->f[1];
3276 dst->u[2] = (uint)src->f[2];
3277 dst->u[3] = (uint)src->f[3];
3278 }
3279
3280 static void
3281 micro_u2f(union tgsi_exec_channel *dst,
3282 const union tgsi_exec_channel *src)
3283 {
3284 dst->f[0] = (float)src->u[0];
3285 dst->f[1] = (float)src->u[1];
3286 dst->f[2] = (float)src->u[2];
3287 dst->f[3] = (float)src->u[3];
3288 }
3289
3290 static void
3291 micro_uadd(union tgsi_exec_channel *dst,
3292 const union tgsi_exec_channel *src0,
3293 const union tgsi_exec_channel *src1)
3294 {
3295 dst->u[0] = src0->u[0] + src1->u[0];
3296 dst->u[1] = src0->u[1] + src1->u[1];
3297 dst->u[2] = src0->u[2] + src1->u[2];
3298 dst->u[3] = src0->u[3] + src1->u[3];
3299 }
3300
3301 static void
3302 micro_udiv(union tgsi_exec_channel *dst,
3303 const union tgsi_exec_channel *src0,
3304 const union tgsi_exec_channel *src1)
3305 {
3306 dst->u[0] = src0->u[0] / src1->u[0];
3307 dst->u[1] = src0->u[1] / src1->u[1];
3308 dst->u[2] = src0->u[2] / src1->u[2];
3309 dst->u[3] = src0->u[3] / src1->u[3];
3310 }
3311
3312 static void
3313 micro_umad(union tgsi_exec_channel *dst,
3314 const union tgsi_exec_channel *src0,
3315 const union tgsi_exec_channel *src1,
3316 const union tgsi_exec_channel *src2)
3317 {
3318 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
3319 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
3320 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
3321 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
3322 }
3323
3324 static void
3325 micro_umax(union tgsi_exec_channel *dst,
3326 const union tgsi_exec_channel *src0,
3327 const union tgsi_exec_channel *src1)
3328 {
3329 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
3330 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
3331 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
3332 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
3333 }
3334
3335 static void
3336 micro_umin(union tgsi_exec_channel *dst,
3337 const union tgsi_exec_channel *src0,
3338 const union tgsi_exec_channel *src1)
3339 {
3340 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
3341 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
3342 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
3343 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
3344 }
3345
3346 static void
3347 micro_umod(union tgsi_exec_channel *dst,
3348 const union tgsi_exec_channel *src0,
3349 const union tgsi_exec_channel *src1)
3350 {
3351 dst->u[0] = src0->u[0] % src1->u[0];
3352 dst->u[1] = src0->u[1] % src1->u[1];
3353 dst->u[2] = src0->u[2] % src1->u[2];
3354 dst->u[3] = src0->u[3] % src1->u[3];
3355 }
3356
3357 static void
3358 micro_umul(union tgsi_exec_channel *dst,
3359 const union tgsi_exec_channel *src0,
3360 const union tgsi_exec_channel *src1)
3361 {
3362 dst->u[0] = src0->u[0] * src1->u[0];
3363 dst->u[1] = src0->u[1] * src1->u[1];
3364 dst->u[2] = src0->u[2] * src1->u[2];
3365 dst->u[3] = src0->u[3] * src1->u[3];
3366 }
3367
3368 static void
3369 micro_useq(union tgsi_exec_channel *dst,
3370 const union tgsi_exec_channel *src0,
3371 const union tgsi_exec_channel *src1)
3372 {
3373 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
3374 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
3375 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
3376 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
3377 }
3378
3379 static void
3380 micro_usge(union tgsi_exec_channel *dst,
3381 const union tgsi_exec_channel *src0,
3382 const union tgsi_exec_channel *src1)
3383 {
3384 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
3385 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
3386 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
3387 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
3388 }
3389
3390 static void
3391 micro_ushr(union tgsi_exec_channel *dst,
3392 const union tgsi_exec_channel *src0,
3393 const union tgsi_exec_channel *src1)
3394 {
3395 dst->u[0] = src0->u[0] >> src1->u[0];
3396 dst->u[1] = src0->u[1] >> src1->u[1];
3397 dst->u[2] = src0->u[2] >> src1->u[2];
3398 dst->u[3] = src0->u[3] >> src1->u[3];
3399 }
3400
3401 static void
3402 micro_uslt(union tgsi_exec_channel *dst,
3403 const union tgsi_exec_channel *src0,
3404 const union tgsi_exec_channel *src1)
3405 {
3406 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
3407 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
3408 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
3409 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
3410 }
3411
3412 static void
3413 micro_usne(union tgsi_exec_channel *dst,
3414 const union tgsi_exec_channel *src0,
3415 const union tgsi_exec_channel *src1)
3416 {
3417 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
3418 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
3419 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
3420 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
3421 }
3422
3423 static void
3424 micro_uarl(union tgsi_exec_channel *dst,
3425 const union tgsi_exec_channel *src)
3426 {
3427 dst->i[0] = src->u[0];
3428 dst->i[1] = src->u[1];
3429 dst->i[2] = src->u[2];
3430 dst->i[3] = src->u[3];
3431 }
3432
3433 static void
3434 micro_ucmp(union tgsi_exec_channel *dst,
3435 const union tgsi_exec_channel *src0,
3436 const union tgsi_exec_channel *src1,
3437 const union tgsi_exec_channel *src2)
3438 {
3439 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
3440 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
3441 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
3442 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
3443 }
3444
3445 static void
3446 exec_instruction(
3447 struct tgsi_exec_machine *mach,
3448 const struct tgsi_full_instruction *inst,
3449 int *pc )
3450 {
3451 union tgsi_exec_channel r[10];
3452
3453 (*pc)++;
3454
3455 switch (inst->Instruction.Opcode) {
3456 case TGSI_OPCODE_ARL:
3457 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3458 break;
3459
3460 case TGSI_OPCODE_MOV:
3461 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3462 break;
3463
3464 case TGSI_OPCODE_LIT:
3465 exec_lit(mach, inst);
3466 break;
3467
3468 case TGSI_OPCODE_RCP:
3469 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3470 break;
3471
3472 case TGSI_OPCODE_RSQ:
3473 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3474 break;
3475
3476 case TGSI_OPCODE_EXP:
3477 exec_exp(mach, inst);
3478 break;
3479
3480 case TGSI_OPCODE_LOG:
3481 exec_log(mach, inst);
3482 break;
3483
3484 case TGSI_OPCODE_MUL:
3485 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3486 break;
3487
3488 case TGSI_OPCODE_ADD:
3489 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3490 break;
3491
3492 case TGSI_OPCODE_DP3:
3493 exec_dp3(mach, inst);
3494 break;
3495
3496 case TGSI_OPCODE_DP4:
3497 exec_dp4(mach, inst);
3498 break;
3499
3500 case TGSI_OPCODE_DST:
3501 exec_dst(mach, inst);
3502 break;
3503
3504 case TGSI_OPCODE_MIN:
3505 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3506 break;
3507
3508 case TGSI_OPCODE_MAX:
3509 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3510 break;
3511
3512 case TGSI_OPCODE_SLT:
3513 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3514 break;
3515
3516 case TGSI_OPCODE_SGE:
3517 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3518 break;
3519
3520 case TGSI_OPCODE_MAD:
3521 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3522 break;
3523
3524 case TGSI_OPCODE_SUB:
3525 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3526 break;
3527
3528 case TGSI_OPCODE_LRP:
3529 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3530 break;
3531
3532 case TGSI_OPCODE_CND:
3533 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3534 break;
3535
3536 case TGSI_OPCODE_DP2A:
3537 exec_dp2a(mach, inst);
3538 break;
3539
3540 case TGSI_OPCODE_FRC:
3541 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3542 break;
3543
3544 case TGSI_OPCODE_CLAMP:
3545 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3546 break;
3547
3548 case TGSI_OPCODE_FLR:
3549 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3550 break;
3551
3552 case TGSI_OPCODE_ROUND:
3553 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3554 break;
3555
3556 case TGSI_OPCODE_EX2:
3557 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3558 break;
3559
3560 case TGSI_OPCODE_LG2:
3561 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3562 break;
3563
3564 case TGSI_OPCODE_POW:
3565 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3566 break;
3567
3568 case TGSI_OPCODE_XPD:
3569 exec_xpd(mach, inst);
3570 break;
3571
3572 case TGSI_OPCODE_ABS:
3573 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3574 break;
3575
3576 case TGSI_OPCODE_RCC:
3577 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3578 break;
3579
3580 case TGSI_OPCODE_DPH:
3581 exec_dph(mach, inst);
3582 break;
3583
3584 case TGSI_OPCODE_COS:
3585 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3586 break;
3587
3588 case TGSI_OPCODE_DDX:
3589 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3590 break;
3591
3592 case TGSI_OPCODE_DDY:
3593 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3594 break;
3595
3596 case TGSI_OPCODE_KILP:
3597 exec_kilp (mach, inst);
3598 break;
3599
3600 case TGSI_OPCODE_KIL:
3601 exec_kil (mach, inst);
3602 break;
3603
3604 case TGSI_OPCODE_PK2H:
3605 assert (0);
3606 break;
3607
3608 case TGSI_OPCODE_PK2US:
3609 assert (0);
3610 break;
3611
3612 case TGSI_OPCODE_PK4B:
3613 assert (0);
3614 break;
3615
3616 case TGSI_OPCODE_PK4UB:
3617 assert (0);
3618 break;
3619
3620 case TGSI_OPCODE_RFL:
3621 exec_rfl(mach, inst);
3622 break;
3623
3624 case TGSI_OPCODE_SEQ:
3625 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3626 break;
3627
3628 case TGSI_OPCODE_SFL:
3629 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3630 break;
3631
3632 case TGSI_OPCODE_SGT:
3633 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3634 break;
3635
3636 case TGSI_OPCODE_SIN:
3637 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3638 break;
3639
3640 case TGSI_OPCODE_SLE:
3641 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3642 break;
3643
3644 case TGSI_OPCODE_SNE:
3645 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3646 break;
3647
3648 case TGSI_OPCODE_STR:
3649 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3650 break;
3651
3652 case TGSI_OPCODE_TEX:
3653 /* simple texture lookup */
3654 /* src[0] = texcoord */
3655 /* src[1] = sampler unit */
3656 exec_tex(mach, inst, TEX_MODIFIER_NONE);
3657 break;
3658
3659 case TGSI_OPCODE_TXB:
3660 /* Texture lookup with lod bias */
3661 /* src[0] = texcoord (src[0].w = LOD bias) */
3662 /* src[1] = sampler unit */
3663 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS);
3664 break;
3665
3666 case TGSI_OPCODE_TXD:
3667 /* Texture lookup with explict partial derivatives */
3668 /* src[0] = texcoord */
3669 /* src[1] = d[strq]/dx */
3670 /* src[2] = d[strq]/dy */
3671 /* src[3] = sampler unit */
3672 exec_txd(mach, inst);
3673 break;
3674
3675 case TGSI_OPCODE_TXL:
3676 /* Texture lookup with explit LOD */
3677 /* src[0] = texcoord (src[0].w = LOD) */
3678 /* src[1] = sampler unit */
3679 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
3680 break;
3681
3682 case TGSI_OPCODE_TXP:
3683 /* Texture lookup with projection */
3684 /* src[0] = texcoord (src[0].w = projection) */
3685 /* src[1] = sampler unit */
3686 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED);
3687 break;
3688
3689 case TGSI_OPCODE_UP2H:
3690 assert (0);
3691 break;
3692
3693 case TGSI_OPCODE_UP2US:
3694 assert (0);
3695 break;
3696
3697 case TGSI_OPCODE_UP4B:
3698 assert (0);
3699 break;
3700
3701 case TGSI_OPCODE_UP4UB:
3702 assert (0);
3703 break;
3704
3705 case TGSI_OPCODE_X2D:
3706 exec_x2d(mach, inst);
3707 break;
3708
3709 case TGSI_OPCODE_ARA:
3710 assert (0);
3711 break;
3712
3713 case TGSI_OPCODE_ARR:
3714 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3715 break;
3716
3717 case TGSI_OPCODE_BRA:
3718 assert (0);
3719 break;
3720
3721 case TGSI_OPCODE_CAL:
3722 /* skip the call if no execution channels are enabled */
3723 if (mach->ExecMask) {
3724 /* do the call */
3725
3726 /* First, record the depths of the execution stacks.
3727 * This is important for deeply nested/looped return statements.
3728 * We have to unwind the stacks by the correct amount. For a
3729 * real code generator, we could determine the number of entries
3730 * to pop off each stack with simple static analysis and avoid
3731 * implementing this data structure at run time.
3732 */
3733 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3734 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3735 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3736 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3737 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3738 /* note that PC was already incremented above */
3739 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3740
3741 mach->CallStackTop++;
3742
3743 /* Second, push the Cond, Loop, Cont, Func stacks */
3744 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3745 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3746 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3747 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3748 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3749 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3750
3751 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3752 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3753 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3754 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3755 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3756 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3757
3758 /* Finally, jump to the subroutine */
3759 *pc = inst->Label.Label;
3760 }
3761 break;
3762
3763 case TGSI_OPCODE_RET:
3764 mach->FuncMask &= ~mach->ExecMask;
3765 UPDATE_EXEC_MASK(mach);
3766
3767 if (mach->FuncMask == 0x0) {
3768 /* really return now (otherwise, keep executing */
3769
3770 if (mach->CallStackTop == 0) {
3771 /* returning from main() */
3772 mach->CondStackTop = 0;
3773 mach->LoopStackTop = 0;
3774 *pc = -1;
3775 return;
3776 }
3777
3778 assert(mach->CallStackTop > 0);
3779 mach->CallStackTop--;
3780
3781 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3782 mach->CondMask = mach->CondStack[mach->CondStackTop];
3783
3784 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3785 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3786
3787 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3788 mach->ContMask = mach->ContStack[mach->ContStackTop];
3789
3790 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3791 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3792
3793 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3794 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3795
3796 assert(mach->FuncStackTop > 0);
3797 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3798
3799 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3800
3801 UPDATE_EXEC_MASK(mach);
3802 }
3803 break;
3804
3805 case TGSI_OPCODE_SSG:
3806 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3807 break;
3808
3809 case TGSI_OPCODE_CMP:
3810 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3811 break;
3812
3813 case TGSI_OPCODE_SCS:
3814 exec_scs(mach, inst);
3815 break;
3816
3817 case TGSI_OPCODE_NRM:
3818 exec_nrm3(mach, inst);
3819 break;
3820
3821 case TGSI_OPCODE_NRM4:
3822 exec_nrm4(mach, inst);
3823 break;
3824
3825 case TGSI_OPCODE_DIV:
3826 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3827 break;
3828
3829 case TGSI_OPCODE_DP2:
3830 exec_dp2(mach, inst);
3831 break;
3832
3833 case TGSI_OPCODE_IF:
3834 /* push CondMask */
3835 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3836 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3837 FETCH( &r[0], 0, TGSI_CHAN_X );
3838 /* update CondMask */
3839 if( ! r[0].u[0] ) {
3840 mach->CondMask &= ~0x1;
3841 }
3842 if( ! r[0].u[1] ) {
3843 mach->CondMask &= ~0x2;
3844 }
3845 if( ! r[0].u[2] ) {
3846 mach->CondMask &= ~0x4;
3847 }
3848 if( ! r[0].u[3] ) {
3849 mach->CondMask &= ~0x8;
3850 }
3851 UPDATE_EXEC_MASK(mach);
3852 /* Todo: If CondMask==0, jump to ELSE */
3853 break;
3854
3855 case TGSI_OPCODE_ELSE:
3856 /* invert CondMask wrt previous mask */
3857 {
3858 uint prevMask;
3859 assert(mach->CondStackTop > 0);
3860 prevMask = mach->CondStack[mach->CondStackTop - 1];
3861 mach->CondMask = ~mach->CondMask & prevMask;
3862 UPDATE_EXEC_MASK(mach);
3863 /* Todo: If CondMask==0, jump to ENDIF */
3864 }
3865 break;
3866
3867 case TGSI_OPCODE_ENDIF:
3868 /* pop CondMask */
3869 assert(mach->CondStackTop > 0);
3870 mach->CondMask = mach->CondStack[--mach->CondStackTop];
3871 UPDATE_EXEC_MASK(mach);
3872 break;
3873
3874 case TGSI_OPCODE_END:
3875 /* make sure we end primitives which haven't
3876 * been explicitly emitted */
3877 conditional_emit_primitive(mach);
3878 /* halt execution */
3879 *pc = -1;
3880 break;
3881
3882 case TGSI_OPCODE_PUSHA:
3883 assert (0);
3884 break;
3885
3886 case TGSI_OPCODE_POPA:
3887 assert (0);
3888 break;
3889
3890 case TGSI_OPCODE_CEIL:
3891 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3892 break;
3893
3894 case TGSI_OPCODE_I2F:
3895 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3896 break;
3897
3898 case TGSI_OPCODE_NOT:
3899 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3900 break;
3901
3902 case TGSI_OPCODE_TRUNC:
3903 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3904 break;
3905
3906 case TGSI_OPCODE_SHL:
3907 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3908 break;
3909
3910 case TGSI_OPCODE_AND:
3911 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3912 break;
3913
3914 case TGSI_OPCODE_OR:
3915 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3916 break;
3917
3918 case TGSI_OPCODE_MOD:
3919 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3920 break;
3921
3922 case TGSI_OPCODE_XOR:
3923 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3924 break;
3925
3926 case TGSI_OPCODE_SAD:
3927 assert (0);
3928 break;
3929
3930 case TGSI_OPCODE_TXF:
3931 exec_txf(mach, inst);
3932 break;
3933
3934 case TGSI_OPCODE_TXQ:
3935 exec_txq(mach, inst);
3936 break;
3937
3938 case TGSI_OPCODE_EMIT:
3939 emit_vertex(mach);
3940 break;
3941
3942 case TGSI_OPCODE_ENDPRIM:
3943 emit_primitive(mach);
3944 break;
3945
3946 case TGSI_OPCODE_BGNLOOP:
3947 /* push LoopMask and ContMasks */
3948 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3949 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3950 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3951 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3952
3953 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3954 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3955 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
3956 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3957 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
3958 break;
3959
3960 case TGSI_OPCODE_ENDLOOP:
3961 /* Restore ContMask, but don't pop */
3962 assert(mach->ContStackTop > 0);
3963 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3964 UPDATE_EXEC_MASK(mach);
3965 if (mach->ExecMask) {
3966 /* repeat loop: jump to instruction just past BGNLOOP */
3967 assert(mach->LoopLabelStackTop > 0);
3968 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
3969 }
3970 else {
3971 /* exit loop: pop LoopMask */
3972 assert(mach->LoopStackTop > 0);
3973 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3974 /* pop ContMask */
3975 assert(mach->ContStackTop > 0);
3976 mach->ContMask = mach->ContStack[--mach->ContStackTop];
3977 assert(mach->LoopLabelStackTop > 0);
3978 --mach->LoopLabelStackTop;
3979
3980 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3981 }
3982 UPDATE_EXEC_MASK(mach);
3983 break;
3984
3985 case TGSI_OPCODE_BRK:
3986 exec_break(mach);
3987 break;
3988
3989 case TGSI_OPCODE_CONT:
3990 /* turn off cont channels for each enabled exec channel */
3991 mach->ContMask &= ~mach->ExecMask;
3992 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3993 UPDATE_EXEC_MASK(mach);
3994 break;
3995
3996 case TGSI_OPCODE_BGNSUB:
3997 /* no-op */
3998 break;
3999
4000 case TGSI_OPCODE_ENDSUB:
4001 /*
4002 * XXX: This really should be a no-op. We should never reach this opcode.
4003 */
4004
4005 assert(mach->CallStackTop > 0);
4006 mach->CallStackTop--;
4007
4008 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
4009 mach->CondMask = mach->CondStack[mach->CondStackTop];
4010
4011 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
4012 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
4013
4014 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
4015 mach->ContMask = mach->ContStack[mach->ContStackTop];
4016
4017 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
4018 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
4019
4020 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
4021 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
4022
4023 assert(mach->FuncStackTop > 0);
4024 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
4025
4026 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
4027
4028 UPDATE_EXEC_MASK(mach);
4029 break;
4030
4031 case TGSI_OPCODE_NOP:
4032 break;
4033
4034 case TGSI_OPCODE_BREAKC:
4035 FETCH(&r[0], 0, TGSI_CHAN_X);
4036 /* update CondMask */
4037 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
4038 mach->LoopMask &= ~0x1;
4039 }
4040 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
4041 mach->LoopMask &= ~0x2;
4042 }
4043 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
4044 mach->LoopMask &= ~0x4;
4045 }
4046 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
4047 mach->LoopMask &= ~0x8;
4048 }
4049 /* Todo: if mach->LoopMask == 0, jump to end of loop */
4050 UPDATE_EXEC_MASK(mach);
4051 break;
4052
4053 case TGSI_OPCODE_F2I:
4054 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
4055 break;
4056
4057 case TGSI_OPCODE_IDIV:
4058 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4059 break;
4060
4061 case TGSI_OPCODE_IMAX:
4062 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4063 break;
4064
4065 case TGSI_OPCODE_IMIN:
4066 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4067 break;
4068
4069 case TGSI_OPCODE_INEG:
4070 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4071 break;
4072
4073 case TGSI_OPCODE_ISGE:
4074 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4075 break;
4076
4077 case TGSI_OPCODE_ISHR:
4078 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4079 break;
4080
4081 case TGSI_OPCODE_ISLT:
4082 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4083 break;
4084
4085 case TGSI_OPCODE_F2U:
4086 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
4087 break;
4088
4089 case TGSI_OPCODE_U2F:
4090 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
4091 break;
4092
4093 case TGSI_OPCODE_UADD:
4094 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4095 break;
4096
4097 case TGSI_OPCODE_UDIV:
4098 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4099 break;
4100
4101 case TGSI_OPCODE_UMAD:
4102 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4103 break;
4104
4105 case TGSI_OPCODE_UMAX:
4106 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4107 break;
4108
4109 case TGSI_OPCODE_UMIN:
4110 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4111 break;
4112
4113 case TGSI_OPCODE_UMOD:
4114 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4115 break;
4116
4117 case TGSI_OPCODE_UMUL:
4118 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4119 break;
4120
4121 case TGSI_OPCODE_USEQ:
4122 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4123 break;
4124
4125 case TGSI_OPCODE_USGE:
4126 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4127 break;
4128
4129 case TGSI_OPCODE_USHR:
4130 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4131 break;
4132
4133 case TGSI_OPCODE_USLT:
4134 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4135 break;
4136
4137 case TGSI_OPCODE_USNE:
4138 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4139 break;
4140
4141 case TGSI_OPCODE_SWITCH:
4142 exec_switch(mach, inst);
4143 break;
4144
4145 case TGSI_OPCODE_CASE:
4146 exec_case(mach, inst);
4147 break;
4148
4149 case TGSI_OPCODE_DEFAULT:
4150 exec_default(mach);
4151 break;
4152
4153 case TGSI_OPCODE_ENDSWITCH:
4154 exec_endswitch(mach);
4155 break;
4156
4157 case TGSI_OPCODE_SAMPLE_I:
4158 assert(0);
4159 break;
4160
4161 case TGSI_OPCODE_SAMPLE_I_MS:
4162 assert(0);
4163 break;
4164
4165 case TGSI_OPCODE_SAMPLE:
4166 exec_sample(mach, inst, TEX_MODIFIER_NONE);
4167 break;
4168
4169 case TGSI_OPCODE_SAMPLE_B:
4170 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
4171 break;
4172
4173 case TGSI_OPCODE_SAMPLE_C:
4174 exec_sample(mach, inst, TEX_MODIFIER_NONE);
4175 break;
4176
4177 case TGSI_OPCODE_SAMPLE_C_LZ:
4178 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS);
4179 break;
4180
4181 case TGSI_OPCODE_SAMPLE_D:
4182 exec_sample_d(mach, inst);
4183 break;
4184
4185 case TGSI_OPCODE_SAMPLE_L:
4186 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD);
4187 break;
4188
4189 case TGSI_OPCODE_GATHER4:
4190 assert(0);
4191 break;
4192
4193 case TGSI_OPCODE_SVIEWINFO:
4194 assert(0);
4195 break;
4196
4197 case TGSI_OPCODE_SAMPLE_POS:
4198 assert(0);
4199 break;
4200
4201 case TGSI_OPCODE_SAMPLE_INFO:
4202 assert(0);
4203 break;
4204
4205 case TGSI_OPCODE_UARL:
4206 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
4207 break;
4208
4209 case TGSI_OPCODE_UCMP:
4210 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4211 break;
4212
4213 case TGSI_OPCODE_IABS:
4214 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4215 break;
4216
4217 case TGSI_OPCODE_ISSG:
4218 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4219 break;
4220
4221 default:
4222 assert( 0 );
4223 }
4224 }
4225
4226
4227 #define DEBUG_EXECUTION 0
4228
4229
4230 /**
4231 * Run TGSI interpreter.
4232 * \return bitmask of "alive" quad components
4233 */
4234 uint
4235 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
4236 {
4237 uint i;
4238 int pc = 0;
4239
4240 mach->CondMask = 0xf;
4241 mach->LoopMask = 0xf;
4242 mach->ContMask = 0xf;
4243 mach->FuncMask = 0xf;
4244 mach->ExecMask = 0xf;
4245
4246 mach->Switch.mask = 0xf;
4247
4248 assert(mach->CondStackTop == 0);
4249 assert(mach->LoopStackTop == 0);
4250 assert(mach->ContStackTop == 0);
4251 assert(mach->SwitchStackTop == 0);
4252 assert(mach->BreakStackTop == 0);
4253 assert(mach->CallStackTop == 0);
4254
4255 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
4256 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
4257
4258 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
4259 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
4260 mach->Primitives[0] = 0;
4261 }
4262
4263 /* execute declarations (interpolants) */
4264 for (i = 0; i < mach->NumDeclarations; i++) {
4265 exec_declaration( mach, mach->Declarations+i );
4266 }
4267
4268 {
4269 #if DEBUG_EXECUTION
4270 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
4271 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
4272 uint inst = 1;
4273
4274 memcpy(temps, mach->Temps, sizeof(temps));
4275 memcpy(outputs, mach->Outputs, sizeof(outputs));
4276 #endif
4277
4278 /* execute instructions, until pc is set to -1 */
4279 while (pc != -1) {
4280
4281 #if DEBUG_EXECUTION
4282 uint i;
4283
4284 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
4285 #endif
4286
4287 assert(pc < (int) mach->NumInstructions);
4288 exec_instruction(mach, mach->Instructions + pc, &pc);
4289
4290 #if DEBUG_EXECUTION
4291 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
4292 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
4293 uint j;
4294
4295 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
4296 debug_printf("TEMP[%2u] = ", i);
4297 for (j = 0; j < 4; j++) {
4298 if (j > 0) {
4299 debug_printf(" ");
4300 }
4301 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4302 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
4303 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
4304 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
4305 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
4306 }
4307 }
4308 }
4309 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
4310 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
4311 uint j;
4312
4313 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
4314 debug_printf("OUT[%2u] = ", i);
4315 for (j = 0; j < 4; j++) {
4316 if (j > 0) {
4317 debug_printf(" ");
4318 }
4319 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4320 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
4321 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
4322 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
4323 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
4324 }
4325 }
4326 }
4327 #endif
4328 }
4329 }
4330
4331 #if 0
4332 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4333 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
4334 /*
4335 * Scale back depth component.
4336 */
4337 for (i = 0; i < 4; i++)
4338 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
4339 }
4340 #endif
4341
4342 /* Strictly speaking, these assertions aren't really needed but they
4343 * can potentially catch some bugs in the control flow code.
4344 */
4345 assert(mach->CondStackTop == 0);
4346 assert(mach->LoopStackTop == 0);
4347 assert(mach->ContStackTop == 0);
4348 assert(mach->SwitchStackTop == 0);
4349 assert(mach->BreakStackTop == 0);
4350 assert(mach->CallStackTop == 0);
4351
4352 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4353 }