83f0b1b0134437f2ae9fc8c99b7aa40a9b402ea7
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65
66
67 #define DEBUG_EXECUTION 0
68
69
70 #define FAST_MATH 0
71
72 #define TILE_TOP_LEFT 0
73 #define TILE_TOP_RIGHT 1
74 #define TILE_BOTTOM_LEFT 2
75 #define TILE_BOTTOM_RIGHT 3
76
77 union tgsi_double_channel {
78 double d[TGSI_QUAD_SIZE];
79 unsigned u[TGSI_QUAD_SIZE][2];
80 uint64_t u64[TGSI_QUAD_SIZE];
81 int64_t i64[TGSI_QUAD_SIZE];
82 };
83
84 struct tgsi_double_vector {
85 union tgsi_double_channel xy;
86 union tgsi_double_channel zw;
87 };
88
89 static void
90 micro_abs(union tgsi_exec_channel *dst,
91 const union tgsi_exec_channel *src)
92 {
93 dst->f[0] = fabsf(src->f[0]);
94 dst->f[1] = fabsf(src->f[1]);
95 dst->f[2] = fabsf(src->f[2]);
96 dst->f[3] = fabsf(src->f[3]);
97 }
98
99 static void
100 micro_arl(union tgsi_exec_channel *dst,
101 const union tgsi_exec_channel *src)
102 {
103 dst->i[0] = (int)floorf(src->f[0]);
104 dst->i[1] = (int)floorf(src->f[1]);
105 dst->i[2] = (int)floorf(src->f[2]);
106 dst->i[3] = (int)floorf(src->f[3]);
107 }
108
109 static void
110 micro_arr(union tgsi_exec_channel *dst,
111 const union tgsi_exec_channel *src)
112 {
113 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118
119 static void
120 micro_ceil(union tgsi_exec_channel *dst,
121 const union tgsi_exec_channel *src)
122 {
123 dst->f[0] = ceilf(src->f[0]);
124 dst->f[1] = ceilf(src->f[1]);
125 dst->f[2] = ceilf(src->f[2]);
126 dst->f[3] = ceilf(src->f[3]);
127 }
128
129 static void
130 micro_cmp(union tgsi_exec_channel *dst,
131 const union tgsi_exec_channel *src0,
132 const union tgsi_exec_channel *src1,
133 const union tgsi_exec_channel *src2)
134 {
135 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
136 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
137 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
138 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
139 }
140
141 static void
142 micro_cos(union tgsi_exec_channel *dst,
143 const union tgsi_exec_channel *src)
144 {
145 dst->f[0] = cosf(src->f[0]);
146 dst->f[1] = cosf(src->f[1]);
147 dst->f[2] = cosf(src->f[2]);
148 dst->f[3] = cosf(src->f[3]);
149 }
150
151 static void
152 micro_d2f(union tgsi_exec_channel *dst,
153 const union tgsi_double_channel *src)
154 {
155 dst->f[0] = (float)src->d[0];
156 dst->f[1] = (float)src->d[1];
157 dst->f[2] = (float)src->d[2];
158 dst->f[3] = (float)src->d[3];
159 }
160
161 static void
162 micro_d2i(union tgsi_exec_channel *dst,
163 const union tgsi_double_channel *src)
164 {
165 dst->i[0] = (int)src->d[0];
166 dst->i[1] = (int)src->d[1];
167 dst->i[2] = (int)src->d[2];
168 dst->i[3] = (int)src->d[3];
169 }
170
171 static void
172 micro_d2u(union tgsi_exec_channel *dst,
173 const union tgsi_double_channel *src)
174 {
175 dst->u[0] = (unsigned)src->d[0];
176 dst->u[1] = (unsigned)src->d[1];
177 dst->u[2] = (unsigned)src->d[2];
178 dst->u[3] = (unsigned)src->d[3];
179 }
180 static void
181 micro_dabs(union tgsi_double_channel *dst,
182 const union tgsi_double_channel *src)
183 {
184 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
185 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
186 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
187 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
188 }
189
190 static void
191 micro_dadd(union tgsi_double_channel *dst,
192 const union tgsi_double_channel *src)
193 {
194 dst->d[0] = src[0].d[0] + src[1].d[0];
195 dst->d[1] = src[0].d[1] + src[1].d[1];
196 dst->d[2] = src[0].d[2] + src[1].d[2];
197 dst->d[3] = src[0].d[3] + src[1].d[3];
198 }
199
200 static void
201 micro_ddiv(union tgsi_double_channel *dst,
202 const union tgsi_double_channel *src)
203 {
204 dst->d[0] = src[0].d[0] / src[1].d[0];
205 dst->d[1] = src[0].d[1] / src[1].d[1];
206 dst->d[2] = src[0].d[2] / src[1].d[2];
207 dst->d[3] = src[0].d[3] / src[1].d[3];
208 }
209
210 static void
211 micro_ddx(union tgsi_exec_channel *dst,
212 const union tgsi_exec_channel *src)
213 {
214 dst->f[0] =
215 dst->f[1] =
216 dst->f[2] =
217 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
218 }
219
220 static void
221 micro_ddx_fine(union tgsi_exec_channel *dst,
222 const union tgsi_exec_channel *src)
223 {
224 dst->f[0] =
225 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
226 dst->f[2] =
227 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
228 }
229
230
231 static void
232 micro_ddy(union tgsi_exec_channel *dst,
233 const union tgsi_exec_channel *src)
234 {
235 dst->f[0] =
236 dst->f[1] =
237 dst->f[2] =
238 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
239 }
240
241 static void
242 micro_ddy_fine(union tgsi_exec_channel *dst,
243 const union tgsi_exec_channel *src)
244 {
245 dst->f[0] =
246 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
247 dst->f[1] =
248 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
249 }
250
251 static void
252 micro_dmul(union tgsi_double_channel *dst,
253 const union tgsi_double_channel *src)
254 {
255 dst->d[0] = src[0].d[0] * src[1].d[0];
256 dst->d[1] = src[0].d[1] * src[1].d[1];
257 dst->d[2] = src[0].d[2] * src[1].d[2];
258 dst->d[3] = src[0].d[3] * src[1].d[3];
259 }
260
261 static void
262 micro_dmax(union tgsi_double_channel *dst,
263 const union tgsi_double_channel *src)
264 {
265 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
266 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
267 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
268 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
269 }
270
271 static void
272 micro_dmin(union tgsi_double_channel *dst,
273 const union tgsi_double_channel *src)
274 {
275 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
276 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
277 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
278 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
279 }
280
281 static void
282 micro_dneg(union tgsi_double_channel *dst,
283 const union tgsi_double_channel *src)
284 {
285 dst->d[0] = -src->d[0];
286 dst->d[1] = -src->d[1];
287 dst->d[2] = -src->d[2];
288 dst->d[3] = -src->d[3];
289 }
290
291 static void
292 micro_dslt(union tgsi_double_channel *dst,
293 const union tgsi_double_channel *src)
294 {
295 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
296 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
297 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
298 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
299 }
300
301 static void
302 micro_dsne(union tgsi_double_channel *dst,
303 const union tgsi_double_channel *src)
304 {
305 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
306 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
307 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
308 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
309 }
310
311 static void
312 micro_dsge(union tgsi_double_channel *dst,
313 const union tgsi_double_channel *src)
314 {
315 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
316 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
317 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
318 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
319 }
320
321 static void
322 micro_dseq(union tgsi_double_channel *dst,
323 const union tgsi_double_channel *src)
324 {
325 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
326 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
327 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
328 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
329 }
330
331 static void
332 micro_drcp(union tgsi_double_channel *dst,
333 const union tgsi_double_channel *src)
334 {
335 dst->d[0] = 1.0 / src->d[0];
336 dst->d[1] = 1.0 / src->d[1];
337 dst->d[2] = 1.0 / src->d[2];
338 dst->d[3] = 1.0 / src->d[3];
339 }
340
341 static void
342 micro_dsqrt(union tgsi_double_channel *dst,
343 const union tgsi_double_channel *src)
344 {
345 dst->d[0] = sqrt(src->d[0]);
346 dst->d[1] = sqrt(src->d[1]);
347 dst->d[2] = sqrt(src->d[2]);
348 dst->d[3] = sqrt(src->d[3]);
349 }
350
351 static void
352 micro_drsq(union tgsi_double_channel *dst,
353 const union tgsi_double_channel *src)
354 {
355 dst->d[0] = 1.0 / sqrt(src->d[0]);
356 dst->d[1] = 1.0 / sqrt(src->d[1]);
357 dst->d[2] = 1.0 / sqrt(src->d[2]);
358 dst->d[3] = 1.0 / sqrt(src->d[3]);
359 }
360
361 static void
362 micro_dmad(union tgsi_double_channel *dst,
363 const union tgsi_double_channel *src)
364 {
365 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
366 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
367 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
368 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
369 }
370
371 static void
372 micro_dfrac(union tgsi_double_channel *dst,
373 const union tgsi_double_channel *src)
374 {
375 dst->d[0] = src->d[0] - floor(src->d[0]);
376 dst->d[1] = src->d[1] - floor(src->d[1]);
377 dst->d[2] = src->d[2] - floor(src->d[2]);
378 dst->d[3] = src->d[3] - floor(src->d[3]);
379 }
380
381 static void
382 micro_dflr(union tgsi_double_channel *dst,
383 const union tgsi_double_channel *src)
384 {
385 dst->d[0] = floor(src->d[0]);
386 dst->d[1] = floor(src->d[1]);
387 dst->d[2] = floor(src->d[2]);
388 dst->d[3] = floor(src->d[3]);
389 }
390
391 static void
392 micro_dldexp(union tgsi_double_channel *dst,
393 const union tgsi_double_channel *src0,
394 union tgsi_exec_channel *src1)
395 {
396 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
397 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
398 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
399 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
400 }
401
402 static void
403 micro_dfracexp(union tgsi_double_channel *dst,
404 union tgsi_exec_channel *dst_exp,
405 const union tgsi_double_channel *src)
406 {
407 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
408 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
409 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
410 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
411 }
412
413 static void
414 micro_exp2(union tgsi_exec_channel *dst,
415 const union tgsi_exec_channel *src)
416 {
417 #if FAST_MATH
418 dst->f[0] = util_fast_exp2(src->f[0]);
419 dst->f[1] = util_fast_exp2(src->f[1]);
420 dst->f[2] = util_fast_exp2(src->f[2]);
421 dst->f[3] = util_fast_exp2(src->f[3]);
422 #else
423 #if DEBUG
424 /* Inf is okay for this instruction, so clamp it to silence assertions. */
425 uint i;
426 union tgsi_exec_channel clamped;
427
428 for (i = 0; i < 4; i++) {
429 if (src->f[i] > 127.99999f) {
430 clamped.f[i] = 127.99999f;
431 } else if (src->f[i] < -126.99999f) {
432 clamped.f[i] = -126.99999f;
433 } else {
434 clamped.f[i] = src->f[i];
435 }
436 }
437 src = &clamped;
438 #endif /* DEBUG */
439
440 dst->f[0] = powf(2.0f, src->f[0]);
441 dst->f[1] = powf(2.0f, src->f[1]);
442 dst->f[2] = powf(2.0f, src->f[2]);
443 dst->f[3] = powf(2.0f, src->f[3]);
444 #endif /* FAST_MATH */
445 }
446
447 static void
448 micro_f2d(union tgsi_double_channel *dst,
449 const union tgsi_exec_channel *src)
450 {
451 dst->d[0] = (double)src->f[0];
452 dst->d[1] = (double)src->f[1];
453 dst->d[2] = (double)src->f[2];
454 dst->d[3] = (double)src->f[3];
455 }
456
457 static void
458 micro_flr(union tgsi_exec_channel *dst,
459 const union tgsi_exec_channel *src)
460 {
461 dst->f[0] = floorf(src->f[0]);
462 dst->f[1] = floorf(src->f[1]);
463 dst->f[2] = floorf(src->f[2]);
464 dst->f[3] = floorf(src->f[3]);
465 }
466
467 static void
468 micro_frc(union tgsi_exec_channel *dst,
469 const union tgsi_exec_channel *src)
470 {
471 dst->f[0] = src->f[0] - floorf(src->f[0]);
472 dst->f[1] = src->f[1] - floorf(src->f[1]);
473 dst->f[2] = src->f[2] - floorf(src->f[2]);
474 dst->f[3] = src->f[3] - floorf(src->f[3]);
475 }
476
477 static void
478 micro_i2d(union tgsi_double_channel *dst,
479 const union tgsi_exec_channel *src)
480 {
481 dst->d[0] = (double)src->i[0];
482 dst->d[1] = (double)src->i[1];
483 dst->d[2] = (double)src->i[2];
484 dst->d[3] = (double)src->i[3];
485 }
486
487 static void
488 micro_iabs(union tgsi_exec_channel *dst,
489 const union tgsi_exec_channel *src)
490 {
491 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
492 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
493 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
494 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
495 }
496
497 static void
498 micro_ineg(union tgsi_exec_channel *dst,
499 const union tgsi_exec_channel *src)
500 {
501 dst->i[0] = -src->i[0];
502 dst->i[1] = -src->i[1];
503 dst->i[2] = -src->i[2];
504 dst->i[3] = -src->i[3];
505 }
506
507 static void
508 micro_lg2(union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src)
510 {
511 #if FAST_MATH
512 dst->f[0] = util_fast_log2(src->f[0]);
513 dst->f[1] = util_fast_log2(src->f[1]);
514 dst->f[2] = util_fast_log2(src->f[2]);
515 dst->f[3] = util_fast_log2(src->f[3]);
516 #else
517 dst->f[0] = logf(src->f[0]) * 1.442695f;
518 dst->f[1] = logf(src->f[1]) * 1.442695f;
519 dst->f[2] = logf(src->f[2]) * 1.442695f;
520 dst->f[3] = logf(src->f[3]) * 1.442695f;
521 #endif
522 }
523
524 static void
525 micro_lrp(union tgsi_exec_channel *dst,
526 const union tgsi_exec_channel *src0,
527 const union tgsi_exec_channel *src1,
528 const union tgsi_exec_channel *src2)
529 {
530 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
531 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
532 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
533 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
534 }
535
536 static void
537 micro_mad(union tgsi_exec_channel *dst,
538 const union tgsi_exec_channel *src0,
539 const union tgsi_exec_channel *src1,
540 const union tgsi_exec_channel *src2)
541 {
542 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
543 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
544 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
545 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
546 }
547
548 static void
549 micro_mov(union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src)
551 {
552 dst->u[0] = src->u[0];
553 dst->u[1] = src->u[1];
554 dst->u[2] = src->u[2];
555 dst->u[3] = src->u[3];
556 }
557
558 static void
559 micro_rcp(union tgsi_exec_channel *dst,
560 const union tgsi_exec_channel *src)
561 {
562 #if 0 /* for debugging */
563 assert(src->f[0] != 0.0f);
564 assert(src->f[1] != 0.0f);
565 assert(src->f[2] != 0.0f);
566 assert(src->f[3] != 0.0f);
567 #endif
568 dst->f[0] = 1.0f / src->f[0];
569 dst->f[1] = 1.0f / src->f[1];
570 dst->f[2] = 1.0f / src->f[2];
571 dst->f[3] = 1.0f / src->f[3];
572 }
573
574 static void
575 micro_rnd(union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src)
577 {
578 dst->f[0] = _mesa_roundevenf(src->f[0]);
579 dst->f[1] = _mesa_roundevenf(src->f[1]);
580 dst->f[2] = _mesa_roundevenf(src->f[2]);
581 dst->f[3] = _mesa_roundevenf(src->f[3]);
582 }
583
584 static void
585 micro_rsq(union tgsi_exec_channel *dst,
586 const union tgsi_exec_channel *src)
587 {
588 #if 0 /* for debugging */
589 assert(src->f[0] != 0.0f);
590 assert(src->f[1] != 0.0f);
591 assert(src->f[2] != 0.0f);
592 assert(src->f[3] != 0.0f);
593 #endif
594 dst->f[0] = 1.0f / sqrtf(src->f[0]);
595 dst->f[1] = 1.0f / sqrtf(src->f[1]);
596 dst->f[2] = 1.0f / sqrtf(src->f[2]);
597 dst->f[3] = 1.0f / sqrtf(src->f[3]);
598 }
599
600 static void
601 micro_sqrt(union tgsi_exec_channel *dst,
602 const union tgsi_exec_channel *src)
603 {
604 dst->f[0] = sqrtf(src->f[0]);
605 dst->f[1] = sqrtf(src->f[1]);
606 dst->f[2] = sqrtf(src->f[2]);
607 dst->f[3] = sqrtf(src->f[3]);
608 }
609
610 static void
611 micro_seq(union tgsi_exec_channel *dst,
612 const union tgsi_exec_channel *src0,
613 const union tgsi_exec_channel *src1)
614 {
615 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
616 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
617 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
618 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
619 }
620
621 static void
622 micro_sge(union tgsi_exec_channel *dst,
623 const union tgsi_exec_channel *src0,
624 const union tgsi_exec_channel *src1)
625 {
626 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
627 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
628 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
629 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
630 }
631
632 static void
633 micro_sgn(union tgsi_exec_channel *dst,
634 const union tgsi_exec_channel *src)
635 {
636 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
637 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
638 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
639 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
640 }
641
642 static void
643 micro_isgn(union tgsi_exec_channel *dst,
644 const union tgsi_exec_channel *src)
645 {
646 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
647 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
648 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
649 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
650 }
651
652 static void
653 micro_sgt(union tgsi_exec_channel *dst,
654 const union tgsi_exec_channel *src0,
655 const union tgsi_exec_channel *src1)
656 {
657 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
658 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
659 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
660 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
661 }
662
663 static void
664 micro_sin(union tgsi_exec_channel *dst,
665 const union tgsi_exec_channel *src)
666 {
667 dst->f[0] = sinf(src->f[0]);
668 dst->f[1] = sinf(src->f[1]);
669 dst->f[2] = sinf(src->f[2]);
670 dst->f[3] = sinf(src->f[3]);
671 }
672
673 static void
674 micro_sle(union tgsi_exec_channel *dst,
675 const union tgsi_exec_channel *src0,
676 const union tgsi_exec_channel *src1)
677 {
678 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
679 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
680 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
681 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
682 }
683
684 static void
685 micro_slt(union tgsi_exec_channel *dst,
686 const union tgsi_exec_channel *src0,
687 const union tgsi_exec_channel *src1)
688 {
689 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
690 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
691 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
692 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
693 }
694
695 static void
696 micro_sne(union tgsi_exec_channel *dst,
697 const union tgsi_exec_channel *src0,
698 const union tgsi_exec_channel *src1)
699 {
700 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
701 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
702 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
703 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
704 }
705
706 static void
707 micro_trunc(union tgsi_exec_channel *dst,
708 const union tgsi_exec_channel *src)
709 {
710 dst->f[0] = truncf(src->f[0]);
711 dst->f[1] = truncf(src->f[1]);
712 dst->f[2] = truncf(src->f[2]);
713 dst->f[3] = truncf(src->f[3]);
714 }
715
716 static void
717 micro_u2d(union tgsi_double_channel *dst,
718 const union tgsi_exec_channel *src)
719 {
720 dst->d[0] = (double)src->u[0];
721 dst->d[1] = (double)src->u[1];
722 dst->d[2] = (double)src->u[2];
723 dst->d[3] = (double)src->u[3];
724 }
725
726 static void
727 micro_i64abs(union tgsi_double_channel *dst,
728 const union tgsi_double_channel *src)
729 {
730 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
731 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
732 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
733 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
734 }
735
736 static void
737 micro_i64sgn(union tgsi_double_channel *dst,
738 const union tgsi_double_channel *src)
739 {
740 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
741 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
742 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
743 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
744 }
745
746 static void
747 micro_i64neg(union tgsi_double_channel *dst,
748 const union tgsi_double_channel *src)
749 {
750 dst->i64[0] = -src->i64[0];
751 dst->i64[1] = -src->i64[1];
752 dst->i64[2] = -src->i64[2];
753 dst->i64[3] = -src->i64[3];
754 }
755
756 static void
757 micro_u64seq(union tgsi_double_channel *dst,
758 const union tgsi_double_channel *src)
759 {
760 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
761 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
762 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
763 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
764 }
765
766 static void
767 micro_u64sne(union tgsi_double_channel *dst,
768 const union tgsi_double_channel *src)
769 {
770 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
771 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
772 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
773 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
774 }
775
776 static void
777 micro_i64slt(union tgsi_double_channel *dst,
778 const union tgsi_double_channel *src)
779 {
780 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
781 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
782 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
783 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
784 }
785
786 static void
787 micro_u64slt(union tgsi_double_channel *dst,
788 const union tgsi_double_channel *src)
789 {
790 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
791 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
792 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
793 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
794 }
795
796 static void
797 micro_i64sge(union tgsi_double_channel *dst,
798 const union tgsi_double_channel *src)
799 {
800 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
801 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
802 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
803 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
804 }
805
806 static void
807 micro_u64sge(union tgsi_double_channel *dst,
808 const union tgsi_double_channel *src)
809 {
810 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
811 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
812 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
813 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
814 }
815
816 static void
817 micro_u64max(union tgsi_double_channel *dst,
818 const union tgsi_double_channel *src)
819 {
820 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
821 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
822 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
823 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
824 }
825
826 static void
827 micro_i64max(union tgsi_double_channel *dst,
828 const union tgsi_double_channel *src)
829 {
830 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
831 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
832 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
833 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
834 }
835
836 static void
837 micro_u64min(union tgsi_double_channel *dst,
838 const union tgsi_double_channel *src)
839 {
840 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
841 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
842 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
843 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
844 }
845
846 static void
847 micro_i64min(union tgsi_double_channel *dst,
848 const union tgsi_double_channel *src)
849 {
850 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
851 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
852 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
853 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
854 }
855
856 static void
857 micro_u64add(union tgsi_double_channel *dst,
858 const union tgsi_double_channel *src)
859 {
860 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
861 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
862 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
863 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
864 }
865
866 static void
867 micro_u64mul(union tgsi_double_channel *dst,
868 const union tgsi_double_channel *src)
869 {
870 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
871 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
872 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
873 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
874 }
875
876 static void
877 micro_u64div(union tgsi_double_channel *dst,
878 const union tgsi_double_channel *src)
879 {
880 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
881 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
882 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
883 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
884 }
885
886 static void
887 micro_i64div(union tgsi_double_channel *dst,
888 const union tgsi_double_channel *src)
889 {
890 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
891 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
892 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
893 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
894 }
895
896 static void
897 micro_u64mod(union tgsi_double_channel *dst,
898 const union tgsi_double_channel *src)
899 {
900 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
901 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
902 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
903 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
904 }
905
906 static void
907 micro_i64mod(union tgsi_double_channel *dst,
908 const union tgsi_double_channel *src)
909 {
910 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
911 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
912 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
913 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
914 }
915
916 static void
917 micro_u64shl(union tgsi_double_channel *dst,
918 const union tgsi_double_channel *src0,
919 union tgsi_exec_channel *src1)
920 {
921 unsigned masked_count;
922 masked_count = src1->u[0] & 0x3f;
923 dst->u64[0] = src0->u64[0] << masked_count;
924 masked_count = src1->u[1] & 0x3f;
925 dst->u64[1] = src0->u64[1] << masked_count;
926 masked_count = src1->u[2] & 0x3f;
927 dst->u64[2] = src0->u64[2] << masked_count;
928 masked_count = src1->u[3] & 0x3f;
929 dst->u64[3] = src0->u64[3] << masked_count;
930 }
931
932 static void
933 micro_i64shr(union tgsi_double_channel *dst,
934 const union tgsi_double_channel *src0,
935 union tgsi_exec_channel *src1)
936 {
937 unsigned masked_count;
938 masked_count = src1->u[0] & 0x3f;
939 dst->i64[0] = src0->i64[0] >> masked_count;
940 masked_count = src1->u[1] & 0x3f;
941 dst->i64[1] = src0->i64[1] >> masked_count;
942 masked_count = src1->u[2] & 0x3f;
943 dst->i64[2] = src0->i64[2] >> masked_count;
944 masked_count = src1->u[3] & 0x3f;
945 dst->i64[3] = src0->i64[3] >> masked_count;
946 }
947
948 static void
949 micro_u64shr(union tgsi_double_channel *dst,
950 const union tgsi_double_channel *src0,
951 union tgsi_exec_channel *src1)
952 {
953 unsigned masked_count;
954 masked_count = src1->u[0] & 0x3f;
955 dst->u64[0] = src0->u64[0] >> masked_count;
956 masked_count = src1->u[1] & 0x3f;
957 dst->u64[1] = src0->u64[1] >> masked_count;
958 masked_count = src1->u[2] & 0x3f;
959 dst->u64[2] = src0->u64[2] >> masked_count;
960 masked_count = src1->u[3] & 0x3f;
961 dst->u64[3] = src0->u64[3] >> masked_count;
962 }
963
964 enum tgsi_exec_datatype {
965 TGSI_EXEC_DATA_FLOAT,
966 TGSI_EXEC_DATA_INT,
967 TGSI_EXEC_DATA_UINT,
968 TGSI_EXEC_DATA_DOUBLE,
969 TGSI_EXEC_DATA_INT64,
970 TGSI_EXEC_DATA_UINT64,
971 };
972
973 /*
974 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
975 */
976 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
977 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
978 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
979 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
980 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
981 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
982 #define TEMP_PRIMITIVE_S1_I TGSI_EXEC_TEMP_PRIMITIVE_S1_I
983 #define TEMP_PRIMITIVE_S1_C TGSI_EXEC_TEMP_PRIMITIVE_S1_C
984 #define TEMP_PRIMITIVE_S2_I TGSI_EXEC_TEMP_PRIMITIVE_S2_I
985 #define TEMP_PRIMITIVE_S2_C TGSI_EXEC_TEMP_PRIMITIVE_S2_C
986 #define TEMP_PRIMITIVE_S3_I TGSI_EXEC_TEMP_PRIMITIVE_S3_I
987 #define TEMP_PRIMITIVE_S3_C TGSI_EXEC_TEMP_PRIMITIVE_S3_C
988
989 static const struct {
990 int idx;
991 int chan;
992 } temp_prim_idxs[] = {
993 { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
994 { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
995 { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
996 { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
997 };
998
999 /** The execution mask depends on the conditional mask and the loop mask */
1000 #define UPDATE_EXEC_MASK(MACH) \
1001 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
1002
1003
1004 static const union tgsi_exec_channel ZeroVec =
1005 { { 0.0, 0.0, 0.0, 0.0 } };
1006
1007 static const union tgsi_exec_channel OneVec = {
1008 {1.0f, 1.0f, 1.0f, 1.0f}
1009 };
1010
1011 static const union tgsi_exec_channel P128Vec = {
1012 {128.0f, 128.0f, 128.0f, 128.0f}
1013 };
1014
1015 static const union tgsi_exec_channel M128Vec = {
1016 {-128.0f, -128.0f, -128.0f, -128.0f}
1017 };
1018
1019
1020 /**
1021 * Assert that none of the float values in 'chan' are infinite or NaN.
1022 * NaN and Inf may occur normally during program execution and should
1023 * not lead to crashes, etc. But when debugging, it's helpful to catch
1024 * them.
1025 */
1026 static inline void
1027 check_inf_or_nan(const union tgsi_exec_channel *chan)
1028 {
1029 assert(!util_is_inf_or_nan((chan)->f[0]));
1030 assert(!util_is_inf_or_nan((chan)->f[1]));
1031 assert(!util_is_inf_or_nan((chan)->f[2]));
1032 assert(!util_is_inf_or_nan((chan)->f[3]));
1033 }
1034
1035
1036 #ifdef DEBUG
1037 static void
1038 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1039 {
1040 debug_printf("%s = {%f, %f, %f, %f}\n",
1041 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1042 }
1043 #endif
1044
1045
1046 #ifdef DEBUG
1047 static void
1048 print_temp(const struct tgsi_exec_machine *mach, uint index)
1049 {
1050 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1051 int i;
1052 debug_printf("Temp[%u] =\n", index);
1053 for (i = 0; i < 4; i++) {
1054 debug_printf(" %c: { %f, %f, %f, %f }\n",
1055 "XYZW"[i],
1056 tmp->xyzw[i].f[0],
1057 tmp->xyzw[i].f[1],
1058 tmp->xyzw[i].f[2],
1059 tmp->xyzw[i].f[3]);
1060 }
1061 }
1062 #endif
1063
1064
1065 void
1066 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1067 unsigned num_bufs,
1068 const void **bufs,
1069 const unsigned *buf_sizes)
1070 {
1071 unsigned i;
1072
1073 for (i = 0; i < num_bufs; i++) {
1074 mach->Consts[i] = bufs[i];
1075 mach->ConstsSize[i] = buf_sizes[i];
1076 }
1077 }
1078
1079 /**
1080 * Initialize machine state by expanding tokens to full instructions,
1081 * allocating temporary storage, setting up constants, etc.
1082 * After this, we can call tgsi_exec_machine_run() many times.
1083 */
1084 void
1085 tgsi_exec_machine_bind_shader(
1086 struct tgsi_exec_machine *mach,
1087 const struct tgsi_token *tokens,
1088 struct tgsi_sampler *sampler,
1089 struct tgsi_image *image,
1090 struct tgsi_buffer *buffer)
1091 {
1092 uint k;
1093 struct tgsi_parse_context parse;
1094 struct tgsi_full_instruction *instructions;
1095 struct tgsi_full_declaration *declarations;
1096 uint maxInstructions = 10, numInstructions = 0;
1097 uint maxDeclarations = 10, numDeclarations = 0;
1098
1099 #if 0
1100 tgsi_dump(tokens, 0);
1101 #endif
1102
1103 util_init_math();
1104
1105
1106 mach->Tokens = tokens;
1107 mach->Sampler = sampler;
1108 mach->Image = image;
1109 mach->Buffer = buffer;
1110
1111 if (!tokens) {
1112 /* unbind and free all */
1113 FREE(mach->Declarations);
1114 mach->Declarations = NULL;
1115 mach->NumDeclarations = 0;
1116
1117 FREE(mach->Instructions);
1118 mach->Instructions = NULL;
1119 mach->NumInstructions = 0;
1120
1121 return;
1122 }
1123
1124 k = tgsi_parse_init (&parse, mach->Tokens);
1125 if (k != TGSI_PARSE_OK) {
1126 debug_printf( "Problem parsing!\n" );
1127 return;
1128 }
1129
1130 mach->ImmLimit = 0;
1131 mach->NumOutputs = 0;
1132
1133 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1134 mach->SysSemanticToIndex[k] = -1;
1135
1136 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1137 !mach->UsedGeometryShader) {
1138 struct tgsi_exec_vector *inputs;
1139 struct tgsi_exec_vector *outputs;
1140
1141 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1142 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1143 16);
1144
1145 if (!inputs)
1146 return;
1147
1148 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1149 TGSI_MAX_TOTAL_VERTICES, 16);
1150
1151 if (!outputs) {
1152 align_free(inputs);
1153 return;
1154 }
1155
1156 align_free(mach->Inputs);
1157 align_free(mach->Outputs);
1158
1159 mach->Inputs = inputs;
1160 mach->Outputs = outputs;
1161 mach->UsedGeometryShader = TRUE;
1162 }
1163
1164 declarations = (struct tgsi_full_declaration *)
1165 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1166
1167 if (!declarations) {
1168 return;
1169 }
1170
1171 instructions = (struct tgsi_full_instruction *)
1172 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1173
1174 if (!instructions) {
1175 FREE( declarations );
1176 return;
1177 }
1178
1179 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1180 uint i;
1181
1182 tgsi_parse_token( &parse );
1183 switch( parse.FullToken.Token.Type ) {
1184 case TGSI_TOKEN_TYPE_DECLARATION:
1185 /* save expanded declaration */
1186 if (numDeclarations == maxDeclarations) {
1187 declarations = REALLOC(declarations,
1188 maxDeclarations
1189 * sizeof(struct tgsi_full_declaration),
1190 (maxDeclarations + 10)
1191 * sizeof(struct tgsi_full_declaration));
1192 maxDeclarations += 10;
1193 }
1194 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
1195 unsigned reg;
1196 for (reg = parse.FullToken.FullDeclaration.Range.First;
1197 reg <= parse.FullToken.FullDeclaration.Range.Last;
1198 ++reg) {
1199 ++mach->NumOutputs;
1200 }
1201 }
1202 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1203 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1204 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1205 }
1206
1207 memcpy(declarations + numDeclarations,
1208 &parse.FullToken.FullDeclaration,
1209 sizeof(declarations[0]));
1210 numDeclarations++;
1211 break;
1212
1213 case TGSI_TOKEN_TYPE_IMMEDIATE:
1214 {
1215 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1216 assert( size <= 4 );
1217 if (mach->ImmLimit >= mach->ImmsReserved) {
1218 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1219 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1220 if (imms) {
1221 mach->ImmsReserved = newReserved;
1222 mach->Imms = imms;
1223 } else {
1224 debug_printf("Unable to (re)allocate space for immidiate constants\n");
1225 break;
1226 }
1227 }
1228
1229 for( i = 0; i < size; i++ ) {
1230 mach->Imms[mach->ImmLimit][i] =
1231 parse.FullToken.FullImmediate.u[i].Float;
1232 }
1233 mach->ImmLimit += 1;
1234 }
1235 break;
1236
1237 case TGSI_TOKEN_TYPE_INSTRUCTION:
1238
1239 /* save expanded instruction */
1240 if (numInstructions == maxInstructions) {
1241 instructions = REALLOC(instructions,
1242 maxInstructions
1243 * sizeof(struct tgsi_full_instruction),
1244 (maxInstructions + 10)
1245 * sizeof(struct tgsi_full_instruction));
1246 maxInstructions += 10;
1247 }
1248
1249 memcpy(instructions + numInstructions,
1250 &parse.FullToken.FullInstruction,
1251 sizeof(instructions[0]));
1252
1253 numInstructions++;
1254 break;
1255
1256 case TGSI_TOKEN_TYPE_PROPERTY:
1257 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1258 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1259 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1260 }
1261 }
1262 break;
1263
1264 default:
1265 assert( 0 );
1266 }
1267 }
1268 tgsi_parse_free (&parse);
1269
1270 FREE(mach->Declarations);
1271 mach->Declarations = declarations;
1272 mach->NumDeclarations = numDeclarations;
1273
1274 FREE(mach->Instructions);
1275 mach->Instructions = instructions;
1276 mach->NumInstructions = numInstructions;
1277 }
1278
1279
1280 struct tgsi_exec_machine *
1281 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1282 {
1283 struct tgsi_exec_machine *mach;
1284
1285 mach = align_malloc( sizeof *mach, 16 );
1286 if (!mach)
1287 goto fail;
1288
1289 memset(mach, 0, sizeof(*mach));
1290
1291 mach->ShaderType = shader_type;
1292 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1293 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1294
1295 if (shader_type != PIPE_SHADER_COMPUTE) {
1296 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1297 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1298 if (!mach->Inputs || !mach->Outputs)
1299 goto fail;
1300 }
1301
1302 if (shader_type == PIPE_SHADER_FRAGMENT) {
1303 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1304 if (!mach->InputSampleOffsetApply)
1305 goto fail;
1306 }
1307
1308 #ifdef DEBUG
1309 /* silence warnings */
1310 (void) print_chan;
1311 (void) print_temp;
1312 #endif
1313
1314 return mach;
1315
1316 fail:
1317 if (mach) {
1318 align_free(mach->InputSampleOffsetApply);
1319 align_free(mach->Inputs);
1320 align_free(mach->Outputs);
1321 align_free(mach);
1322 }
1323 return NULL;
1324 }
1325
1326
1327 void
1328 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1329 {
1330 if (mach) {
1331 FREE(mach->Instructions);
1332 FREE(mach->Declarations);
1333 FREE(mach->Imms);
1334
1335 align_free(mach->InputSampleOffsetApply);
1336 align_free(mach->Inputs);
1337 align_free(mach->Outputs);
1338
1339 align_free(mach);
1340 }
1341 }
1342
1343 static void
1344 micro_add(union tgsi_exec_channel *dst,
1345 const union tgsi_exec_channel *src0,
1346 const union tgsi_exec_channel *src1)
1347 {
1348 dst->f[0] = src0->f[0] + src1->f[0];
1349 dst->f[1] = src0->f[1] + src1->f[1];
1350 dst->f[2] = src0->f[2] + src1->f[2];
1351 dst->f[3] = src0->f[3] + src1->f[3];
1352 }
1353
1354 static void
1355 micro_div(
1356 union tgsi_exec_channel *dst,
1357 const union tgsi_exec_channel *src0,
1358 const union tgsi_exec_channel *src1 )
1359 {
1360 if (src1->f[0] != 0) {
1361 dst->f[0] = src0->f[0] / src1->f[0];
1362 }
1363 if (src1->f[1] != 0) {
1364 dst->f[1] = src0->f[1] / src1->f[1];
1365 }
1366 if (src1->f[2] != 0) {
1367 dst->f[2] = src0->f[2] / src1->f[2];
1368 }
1369 if (src1->f[3] != 0) {
1370 dst->f[3] = src0->f[3] / src1->f[3];
1371 }
1372 }
1373
1374 static void
1375 micro_lt(
1376 union tgsi_exec_channel *dst,
1377 const union tgsi_exec_channel *src0,
1378 const union tgsi_exec_channel *src1,
1379 const union tgsi_exec_channel *src2,
1380 const union tgsi_exec_channel *src3 )
1381 {
1382 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1383 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1384 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1385 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1386 }
1387
1388 static void
1389 micro_max(union tgsi_exec_channel *dst,
1390 const union tgsi_exec_channel *src0,
1391 const union tgsi_exec_channel *src1)
1392 {
1393 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1394 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1395 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1396 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1397 }
1398
1399 static void
1400 micro_min(union tgsi_exec_channel *dst,
1401 const union tgsi_exec_channel *src0,
1402 const union tgsi_exec_channel *src1)
1403 {
1404 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1405 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1406 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1407 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1408 }
1409
1410 static void
1411 micro_mul(union tgsi_exec_channel *dst,
1412 const union tgsi_exec_channel *src0,
1413 const union tgsi_exec_channel *src1)
1414 {
1415 dst->f[0] = src0->f[0] * src1->f[0];
1416 dst->f[1] = src0->f[1] * src1->f[1];
1417 dst->f[2] = src0->f[2] * src1->f[2];
1418 dst->f[3] = src0->f[3] * src1->f[3];
1419 }
1420
1421 static void
1422 micro_neg(
1423 union tgsi_exec_channel *dst,
1424 const union tgsi_exec_channel *src )
1425 {
1426 dst->f[0] = -src->f[0];
1427 dst->f[1] = -src->f[1];
1428 dst->f[2] = -src->f[2];
1429 dst->f[3] = -src->f[3];
1430 }
1431
1432 static void
1433 micro_pow(
1434 union tgsi_exec_channel *dst,
1435 const union tgsi_exec_channel *src0,
1436 const union tgsi_exec_channel *src1 )
1437 {
1438 #if FAST_MATH
1439 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1440 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1441 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1442 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1443 #else
1444 dst->f[0] = powf( src0->f[0], src1->f[0] );
1445 dst->f[1] = powf( src0->f[1], src1->f[1] );
1446 dst->f[2] = powf( src0->f[2], src1->f[2] );
1447 dst->f[3] = powf( src0->f[3], src1->f[3] );
1448 #endif
1449 }
1450
1451 static void
1452 micro_ldexp(union tgsi_exec_channel *dst,
1453 const union tgsi_exec_channel *src0,
1454 const union tgsi_exec_channel *src1)
1455 {
1456 dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1457 dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1458 dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1459 dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1460 }
1461
1462 static void
1463 micro_sub(union tgsi_exec_channel *dst,
1464 const union tgsi_exec_channel *src0,
1465 const union tgsi_exec_channel *src1)
1466 {
1467 dst->f[0] = src0->f[0] - src1->f[0];
1468 dst->f[1] = src0->f[1] - src1->f[1];
1469 dst->f[2] = src0->f[2] - src1->f[2];
1470 dst->f[3] = src0->f[3] - src1->f[3];
1471 }
1472
1473 static void
1474 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1475 const uint file,
1476 const uint swizzle,
1477 const union tgsi_exec_channel *index,
1478 const union tgsi_exec_channel *index2D,
1479 union tgsi_exec_channel *chan)
1480 {
1481 uint i;
1482
1483 assert(swizzle < 4);
1484
1485 switch (file) {
1486 case TGSI_FILE_CONSTANT:
1487 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1488 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1489 assert(mach->Consts[index2D->i[i]]);
1490
1491 if (index->i[i] < 0) {
1492 chan->u[i] = 0;
1493 } else {
1494 /* NOTE: copying the const value as a uint instead of float */
1495 const uint constbuf = index2D->i[i];
1496 const uint *buf = (const uint *)mach->Consts[constbuf];
1497 const int pos = index->i[i] * 4 + swizzle;
1498 /* const buffer bounds check */
1499 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1500 if (0) {
1501 /* Debug: print warning */
1502 static int count = 0;
1503 if (count++ < 100)
1504 debug_printf("TGSI Exec: const buffer index %d"
1505 " out of bounds\n", pos);
1506 }
1507 chan->u[i] = 0;
1508 }
1509 else
1510 chan->u[i] = buf[pos];
1511 }
1512 }
1513 break;
1514
1515 case TGSI_FILE_INPUT:
1516 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1517 /*
1518 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1519 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1520 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1521 index2D->i[i], index->i[i]);
1522 }*/
1523 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1524 assert(pos >= 0);
1525 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1526 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1527 }
1528 break;
1529
1530 case TGSI_FILE_SYSTEM_VALUE:
1531 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1532 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1533 }
1534 break;
1535
1536 case TGSI_FILE_TEMPORARY:
1537 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1538 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1539 assert(index2D->i[i] == 0);
1540
1541 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1542 }
1543 break;
1544
1545 case TGSI_FILE_IMMEDIATE:
1546 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1547 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1548 assert(index2D->i[i] == 0);
1549
1550 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1551 }
1552 break;
1553
1554 case TGSI_FILE_ADDRESS:
1555 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1556 assert(index->i[i] >= 0);
1557 assert(index2D->i[i] == 0);
1558
1559 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1560 }
1561 break;
1562
1563 case TGSI_FILE_OUTPUT:
1564 /* vertex/fragment output vars can be read too */
1565 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1566 assert(index->i[i] >= 0);
1567 assert(index2D->i[i] == 0);
1568
1569 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1570 }
1571 break;
1572
1573 default:
1574 assert(0);
1575 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1576 chan->u[i] = 0;
1577 }
1578 }
1579 }
1580
1581 static void
1582 get_index_registers(const struct tgsi_exec_machine *mach,
1583 const struct tgsi_full_src_register *reg,
1584 union tgsi_exec_channel *index,
1585 union tgsi_exec_channel *index2D)
1586 {
1587 uint swizzle;
1588
1589 /* We start with a direct index into a register file.
1590 *
1591 * file[1],
1592 * where:
1593 * file = Register.File
1594 * [1] = Register.Index
1595 */
1596 index->i[0] =
1597 index->i[1] =
1598 index->i[2] =
1599 index->i[3] = reg->Register.Index;
1600
1601 /* There is an extra source register that indirectly subscripts
1602 * a register file. The direct index now becomes an offset
1603 * that is being added to the indirect register.
1604 *
1605 * file[ind[2].x+1],
1606 * where:
1607 * ind = Indirect.File
1608 * [2] = Indirect.Index
1609 * .x = Indirect.SwizzleX
1610 */
1611 if (reg->Register.Indirect) {
1612 union tgsi_exec_channel index2;
1613 union tgsi_exec_channel indir_index;
1614 const uint execmask = mach->ExecMask;
1615 uint i;
1616
1617 /* which address register (always zero now) */
1618 index2.i[0] =
1619 index2.i[1] =
1620 index2.i[2] =
1621 index2.i[3] = reg->Indirect.Index;
1622 /* get current value of address register[swizzle] */
1623 swizzle = reg->Indirect.Swizzle;
1624 fetch_src_file_channel(mach,
1625 reg->Indirect.File,
1626 swizzle,
1627 &index2,
1628 &ZeroVec,
1629 &indir_index);
1630
1631 /* add value of address register to the offset */
1632 index->i[0] += indir_index.i[0];
1633 index->i[1] += indir_index.i[1];
1634 index->i[2] += indir_index.i[2];
1635 index->i[3] += indir_index.i[3];
1636
1637 /* for disabled execution channels, zero-out the index to
1638 * avoid using a potential garbage value.
1639 */
1640 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1641 if ((execmask & (1 << i)) == 0)
1642 index->i[i] = 0;
1643 }
1644 }
1645
1646 /* There is an extra source register that is a second
1647 * subscript to a register file. Effectively it means that
1648 * the register file is actually a 2D array of registers.
1649 *
1650 * file[3][1],
1651 * where:
1652 * [3] = Dimension.Index
1653 */
1654 if (reg->Register.Dimension) {
1655 index2D->i[0] =
1656 index2D->i[1] =
1657 index2D->i[2] =
1658 index2D->i[3] = reg->Dimension.Index;
1659
1660 /* Again, the second subscript index can be addressed indirectly
1661 * identically to the first one.
1662 * Nothing stops us from indirectly addressing the indirect register,
1663 * but there is no need for that, so we won't exercise it.
1664 *
1665 * file[ind[4].y+3][1],
1666 * where:
1667 * ind = DimIndirect.File
1668 * [4] = DimIndirect.Index
1669 * .y = DimIndirect.SwizzleX
1670 */
1671 if (reg->Dimension.Indirect) {
1672 union tgsi_exec_channel index2;
1673 union tgsi_exec_channel indir_index;
1674 const uint execmask = mach->ExecMask;
1675 uint i;
1676
1677 index2.i[0] =
1678 index2.i[1] =
1679 index2.i[2] =
1680 index2.i[3] = reg->DimIndirect.Index;
1681
1682 swizzle = reg->DimIndirect.Swizzle;
1683 fetch_src_file_channel(mach,
1684 reg->DimIndirect.File,
1685 swizzle,
1686 &index2,
1687 &ZeroVec,
1688 &indir_index);
1689
1690 index2D->i[0] += indir_index.i[0];
1691 index2D->i[1] += indir_index.i[1];
1692 index2D->i[2] += indir_index.i[2];
1693 index2D->i[3] += indir_index.i[3];
1694
1695 /* for disabled execution channels, zero-out the index to
1696 * avoid using a potential garbage value.
1697 */
1698 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1699 if ((execmask & (1 << i)) == 0) {
1700 index2D->i[i] = 0;
1701 }
1702 }
1703 }
1704
1705 /* If by any chance there was a need for a 3D array of register
1706 * files, we would have to check whether Dimension is followed
1707 * by a dimension register and continue the saga.
1708 */
1709 } else {
1710 index2D->i[0] =
1711 index2D->i[1] =
1712 index2D->i[2] =
1713 index2D->i[3] = 0;
1714 }
1715 }
1716
1717
1718 static void
1719 fetch_source_d(const struct tgsi_exec_machine *mach,
1720 union tgsi_exec_channel *chan,
1721 const struct tgsi_full_src_register *reg,
1722 const uint chan_index)
1723 {
1724 union tgsi_exec_channel index;
1725 union tgsi_exec_channel index2D;
1726 uint swizzle;
1727
1728 get_index_registers(mach, reg, &index, &index2D);
1729
1730
1731 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1732 fetch_src_file_channel(mach,
1733 reg->Register.File,
1734 swizzle,
1735 &index,
1736 &index2D,
1737 chan);
1738 }
1739
1740 static void
1741 fetch_source(const struct tgsi_exec_machine *mach,
1742 union tgsi_exec_channel *chan,
1743 const struct tgsi_full_src_register *reg,
1744 const uint chan_index,
1745 enum tgsi_exec_datatype src_datatype)
1746 {
1747 fetch_source_d(mach, chan, reg, chan_index);
1748
1749 if (reg->Register.Absolute) {
1750 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1751 micro_abs(chan, chan);
1752 } else {
1753 micro_iabs(chan, chan);
1754 }
1755 }
1756
1757 if (reg->Register.Negate) {
1758 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1759 micro_neg(chan, chan);
1760 } else {
1761 micro_ineg(chan, chan);
1762 }
1763 }
1764 }
1765
1766 static union tgsi_exec_channel *
1767 store_dest_dstret(struct tgsi_exec_machine *mach,
1768 const union tgsi_exec_channel *chan,
1769 const struct tgsi_full_dst_register *reg,
1770 uint chan_index,
1771 enum tgsi_exec_datatype dst_datatype)
1772 {
1773 static union tgsi_exec_channel null;
1774 union tgsi_exec_channel *dst;
1775 union tgsi_exec_channel index2D;
1776 int offset = 0; /* indirection offset */
1777 int index;
1778
1779 /* for debugging */
1780 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1781 check_inf_or_nan(chan);
1782 }
1783
1784 /* There is an extra source register that indirectly subscripts
1785 * a register file. The direct index now becomes an offset
1786 * that is being added to the indirect register.
1787 *
1788 * file[ind[2].x+1],
1789 * where:
1790 * ind = Indirect.File
1791 * [2] = Indirect.Index
1792 * .x = Indirect.SwizzleX
1793 */
1794 if (reg->Register.Indirect) {
1795 union tgsi_exec_channel index;
1796 union tgsi_exec_channel indir_index;
1797 uint swizzle;
1798
1799 /* which address register (always zero for now) */
1800 index.i[0] =
1801 index.i[1] =
1802 index.i[2] =
1803 index.i[3] = reg->Indirect.Index;
1804
1805 /* get current value of address register[swizzle] */
1806 swizzle = reg->Indirect.Swizzle;
1807
1808 /* fetch values from the address/indirection register */
1809 fetch_src_file_channel(mach,
1810 reg->Indirect.File,
1811 swizzle,
1812 &index,
1813 &ZeroVec,
1814 &indir_index);
1815
1816 /* save indirection offset */
1817 offset = indir_index.i[0];
1818 }
1819
1820 /* There is an extra source register that is a second
1821 * subscript to a register file. Effectively it means that
1822 * the register file is actually a 2D array of registers.
1823 *
1824 * file[3][1],
1825 * where:
1826 * [3] = Dimension.Index
1827 */
1828 if (reg->Register.Dimension) {
1829 index2D.i[0] =
1830 index2D.i[1] =
1831 index2D.i[2] =
1832 index2D.i[3] = reg->Dimension.Index;
1833
1834 /* Again, the second subscript index can be addressed indirectly
1835 * identically to the first one.
1836 * Nothing stops us from indirectly addressing the indirect register,
1837 * but there is no need for that, so we won't exercise it.
1838 *
1839 * file[ind[4].y+3][1],
1840 * where:
1841 * ind = DimIndirect.File
1842 * [4] = DimIndirect.Index
1843 * .y = DimIndirect.SwizzleX
1844 */
1845 if (reg->Dimension.Indirect) {
1846 union tgsi_exec_channel index2;
1847 union tgsi_exec_channel indir_index;
1848 const uint execmask = mach->ExecMask;
1849 unsigned swizzle;
1850 uint i;
1851
1852 index2.i[0] =
1853 index2.i[1] =
1854 index2.i[2] =
1855 index2.i[3] = reg->DimIndirect.Index;
1856
1857 swizzle = reg->DimIndirect.Swizzle;
1858 fetch_src_file_channel(mach,
1859 reg->DimIndirect.File,
1860 swizzle,
1861 &index2,
1862 &ZeroVec,
1863 &indir_index);
1864
1865 index2D.i[0] += indir_index.i[0];
1866 index2D.i[1] += indir_index.i[1];
1867 index2D.i[2] += indir_index.i[2];
1868 index2D.i[3] += indir_index.i[3];
1869
1870 /* for disabled execution channels, zero-out the index to
1871 * avoid using a potential garbage value.
1872 */
1873 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1874 if ((execmask & (1 << i)) == 0) {
1875 index2D.i[i] = 0;
1876 }
1877 }
1878 }
1879
1880 /* If by any chance there was a need for a 3D array of register
1881 * files, we would have to check whether Dimension is followed
1882 * by a dimension register and continue the saga.
1883 */
1884 } else {
1885 index2D.i[0] =
1886 index2D.i[1] =
1887 index2D.i[2] =
1888 index2D.i[3] = 0;
1889 }
1890
1891 switch (reg->Register.File) {
1892 case TGSI_FILE_NULL:
1893 dst = &null;
1894 break;
1895
1896 case TGSI_FILE_OUTPUT:
1897 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1898 + reg->Register.Index;
1899 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1900 #if 0
1901 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1902 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1903 reg->Register.Index);
1904 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1905 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1906 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1907 if (execmask & (1 << i))
1908 debug_printf("%f, ", chan->f[i]);
1909 debug_printf(")\n");
1910 }
1911 #endif
1912 break;
1913
1914 case TGSI_FILE_TEMPORARY:
1915 index = reg->Register.Index;
1916 assert( index < TGSI_EXEC_NUM_TEMPS );
1917 dst = &mach->Temps[offset + index].xyzw[chan_index];
1918 break;
1919
1920 case TGSI_FILE_ADDRESS:
1921 index = reg->Register.Index;
1922 dst = &mach->Addrs[index].xyzw[chan_index];
1923 break;
1924
1925 default:
1926 assert( 0 );
1927 return NULL;
1928 }
1929
1930 return dst;
1931 }
1932
1933 static void
1934 store_dest_double(struct tgsi_exec_machine *mach,
1935 const union tgsi_exec_channel *chan,
1936 const struct tgsi_full_dst_register *reg,
1937 uint chan_index,
1938 enum tgsi_exec_datatype dst_datatype)
1939 {
1940 union tgsi_exec_channel *dst;
1941 const uint execmask = mach->ExecMask;
1942 int i;
1943
1944 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1945 if (!dst)
1946 return;
1947
1948 /* doubles path */
1949 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1950 if (execmask & (1 << i))
1951 dst->i[i] = chan->i[i];
1952 }
1953
1954 static void
1955 store_dest(struct tgsi_exec_machine *mach,
1956 const union tgsi_exec_channel *chan,
1957 const struct tgsi_full_dst_register *reg,
1958 const struct tgsi_full_instruction *inst,
1959 uint chan_index,
1960 enum tgsi_exec_datatype dst_datatype)
1961 {
1962 union tgsi_exec_channel *dst;
1963 const uint execmask = mach->ExecMask;
1964 int i;
1965
1966 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1967 if (!dst)
1968 return;
1969
1970 if (!inst->Instruction.Saturate) {
1971 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1972 if (execmask & (1 << i))
1973 dst->i[i] = chan->i[i];
1974 }
1975 else {
1976 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1977 if (execmask & (1 << i)) {
1978 if (chan->f[i] < 0.0f)
1979 dst->f[i] = 0.0f;
1980 else if (chan->f[i] > 1.0f)
1981 dst->f[i] = 1.0f;
1982 else
1983 dst->i[i] = chan->i[i];
1984 }
1985 }
1986 }
1987
1988 #define FETCH(VAL,INDEX,CHAN)\
1989 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1990
1991 #define IFETCH(VAL,INDEX,CHAN)\
1992 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1993
1994
1995 /**
1996 * Execute ARB-style KIL which is predicated by a src register.
1997 * Kill fragment if any of the four values is less than zero.
1998 */
1999 static void
2000 exec_kill_if(struct tgsi_exec_machine *mach,
2001 const struct tgsi_full_instruction *inst)
2002 {
2003 uint uniquemask;
2004 uint chan_index;
2005 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2006 union tgsi_exec_channel r[1];
2007
2008 /* This mask stores component bits that were already tested. */
2009 uniquemask = 0;
2010
2011 for (chan_index = 0; chan_index < 4; chan_index++)
2012 {
2013 uint swizzle;
2014 uint i;
2015
2016 /* unswizzle channel */
2017 swizzle = tgsi_util_get_full_src_register_swizzle (
2018 &inst->Src[0],
2019 chan_index);
2020
2021 /* check if the component has not been already tested */
2022 if (uniquemask & (1 << swizzle))
2023 continue;
2024 uniquemask |= 1 << swizzle;
2025
2026 FETCH(&r[0], 0, chan_index);
2027 for (i = 0; i < 4; i++)
2028 if (r[0].f[i] < 0.0f)
2029 kilmask |= 1 << i;
2030 }
2031
2032 /* restrict to fragments currently executing */
2033 kilmask &= mach->ExecMask;
2034
2035 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2036 }
2037
2038 /**
2039 * Unconditional fragment kill/discard.
2040 */
2041 static void
2042 exec_kill(struct tgsi_exec_machine *mach)
2043 {
2044 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2045
2046 /* kill fragment for all fragments currently executing */
2047 kilmask = mach->ExecMask;
2048 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2049 }
2050
2051 static void
2052 emit_vertex(struct tgsi_exec_machine *mach,
2053 const struct tgsi_full_instruction *inst)
2054 {
2055 union tgsi_exec_channel r[1];
2056 unsigned stream_id;
2057 unsigned *prim_count;
2058 /* FIXME: check for exec mask correctly
2059 unsigned i;
2060 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2061 if ((mach->ExecMask & (1 << i)))
2062 */
2063 IFETCH(&r[0], 0, TGSI_CHAN_X);
2064 stream_id = r[0].u[0];
2065 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2066 if (mach->ExecMask) {
2067 if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices)
2068 return;
2069
2070 if (mach->Primitives[stream_id][*prim_count] == 0)
2071 mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
2072 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2073 mach->Primitives[stream_id][*prim_count]++;
2074 }
2075 }
2076
2077 static void
2078 emit_primitive(struct tgsi_exec_machine *mach,
2079 const struct tgsi_full_instruction *inst)
2080 {
2081 unsigned *prim_count;
2082 union tgsi_exec_channel r[1];
2083 unsigned stream_id = 0;
2084 /* FIXME: check for exec mask correctly
2085 unsigned i;
2086 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2087 if ((mach->ExecMask & (1 << i)))
2088 */
2089 if (inst) {
2090 IFETCH(&r[0], 0, TGSI_CHAN_X);
2091 stream_id = r[0].u[0];
2092 }
2093 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2094 if (mach->ExecMask) {
2095 ++(*prim_count);
2096 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2097 mach->Primitives[stream_id][*prim_count] = 0;
2098 }
2099 }
2100
2101 static void
2102 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2103 {
2104 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2105 int emitted_verts =
2106 mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
2107 if (emitted_verts) {
2108 emit_primitive(mach, NULL);
2109 }
2110 }
2111 }
2112
2113
2114 /*
2115 * Fetch four texture samples using STR texture coordinates.
2116 */
2117 static void
2118 fetch_texel( struct tgsi_sampler *sampler,
2119 const unsigned sview_idx,
2120 const unsigned sampler_idx,
2121 const union tgsi_exec_channel *s,
2122 const union tgsi_exec_channel *t,
2123 const union tgsi_exec_channel *p,
2124 const union tgsi_exec_channel *c0,
2125 const union tgsi_exec_channel *c1,
2126 float derivs[3][2][TGSI_QUAD_SIZE],
2127 const int8_t offset[3],
2128 enum tgsi_sampler_control control,
2129 union tgsi_exec_channel *r,
2130 union tgsi_exec_channel *g,
2131 union tgsi_exec_channel *b,
2132 union tgsi_exec_channel *a )
2133 {
2134 uint j;
2135 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2136
2137 /* FIXME: handle explicit derivs, offsets */
2138 sampler->get_samples(sampler, sview_idx, sampler_idx,
2139 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2140
2141 for (j = 0; j < 4; j++) {
2142 r->f[j] = rgba[0][j];
2143 g->f[j] = rgba[1][j];
2144 b->f[j] = rgba[2][j];
2145 a->f[j] = rgba[3][j];
2146 }
2147 }
2148
2149
2150 #define TEX_MODIFIER_NONE 0
2151 #define TEX_MODIFIER_PROJECTED 1
2152 #define TEX_MODIFIER_LOD_BIAS 2
2153 #define TEX_MODIFIER_EXPLICIT_LOD 3
2154 #define TEX_MODIFIER_LEVEL_ZERO 4
2155 #define TEX_MODIFIER_GATHER 5
2156
2157 /*
2158 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2159 */
2160 static void
2161 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2162 const struct tgsi_full_instruction *inst,
2163 int8_t offsets[3])
2164 {
2165 if (inst->Texture.NumOffsets == 1) {
2166 union tgsi_exec_channel index;
2167 union tgsi_exec_channel offset[3];
2168 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2169 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2170 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2171 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2172 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2173 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2174 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2175 offsets[0] = offset[0].i[0];
2176 offsets[1] = offset[1].i[0];
2177 offsets[2] = offset[2].i[0];
2178 } else {
2179 assert(inst->Texture.NumOffsets == 0);
2180 offsets[0] = offsets[1] = offsets[2] = 0;
2181 }
2182 }
2183
2184
2185 /*
2186 * Fetch dx and dy values for one channel (s, t or r).
2187 * Put dx values into one float array, dy values into another.
2188 */
2189 static void
2190 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2191 const struct tgsi_full_instruction *inst,
2192 unsigned regdsrcx,
2193 unsigned chan,
2194 float derivs[2][TGSI_QUAD_SIZE])
2195 {
2196 union tgsi_exec_channel d;
2197 FETCH(&d, regdsrcx, chan);
2198 derivs[0][0] = d.f[0];
2199 derivs[0][1] = d.f[1];
2200 derivs[0][2] = d.f[2];
2201 derivs[0][3] = d.f[3];
2202 FETCH(&d, regdsrcx + 1, chan);
2203 derivs[1][0] = d.f[0];
2204 derivs[1][1] = d.f[1];
2205 derivs[1][2] = d.f[2];
2206 derivs[1][3] = d.f[3];
2207 }
2208
2209 static uint
2210 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2211 const struct tgsi_full_instruction *inst,
2212 uint sampler)
2213 {
2214 uint unit = 0;
2215 int i;
2216 if (inst->Src[sampler].Register.Indirect) {
2217 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2218 union tgsi_exec_channel indir_index, index2;
2219 const uint execmask = mach->ExecMask;
2220 index2.i[0] =
2221 index2.i[1] =
2222 index2.i[2] =
2223 index2.i[3] = reg->Indirect.Index;
2224
2225 fetch_src_file_channel(mach,
2226 reg->Indirect.File,
2227 reg->Indirect.Swizzle,
2228 &index2,
2229 &ZeroVec,
2230 &indir_index);
2231 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2232 if (execmask & (1 << i)) {
2233 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2234 break;
2235 }
2236 }
2237
2238 } else {
2239 unit = inst->Src[sampler].Register.Index;
2240 }
2241 return unit;
2242 }
2243
2244 /*
2245 * execute a texture instruction.
2246 *
2247 * modifier is used to control the channel routing for the
2248 * instruction variants like proj, lod, and texture with lod bias.
2249 * sampler indicates which src register the sampler is contained in.
2250 */
2251 static void
2252 exec_tex(struct tgsi_exec_machine *mach,
2253 const struct tgsi_full_instruction *inst,
2254 uint modifier, uint sampler)
2255 {
2256 const union tgsi_exec_channel *args[5], *proj = NULL;
2257 union tgsi_exec_channel r[5];
2258 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2259 uint chan;
2260 uint unit;
2261 int8_t offsets[3];
2262 int dim, shadow_ref, i;
2263
2264 unit = fetch_sampler_unit(mach, inst, sampler);
2265 /* always fetch all 3 offsets, overkill but keeps code simple */
2266 fetch_texel_offsets(mach, inst, offsets);
2267
2268 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2269 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2270
2271 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2272 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2273
2274 assert(dim <= 4);
2275 if (shadow_ref >= 0)
2276 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2277
2278 /* fetch modifier to the last argument */
2279 if (modifier != TEX_MODIFIER_NONE) {
2280 const int last = ARRAY_SIZE(args) - 1;
2281
2282 /* fetch modifier from src0.w or src1.x */
2283 if (sampler == 1) {
2284 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2285 FETCH(&r[last], 0, TGSI_CHAN_W);
2286 }
2287 else {
2288 FETCH(&r[last], 1, TGSI_CHAN_X);
2289 }
2290
2291 if (modifier != TEX_MODIFIER_PROJECTED) {
2292 args[last] = &r[last];
2293 }
2294 else {
2295 proj = &r[last];
2296 args[last] = &ZeroVec;
2297 }
2298
2299 /* point unused arguments to zero vector */
2300 for (i = dim; i < last; i++)
2301 args[i] = &ZeroVec;
2302
2303 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2304 control = TGSI_SAMPLER_LOD_EXPLICIT;
2305 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2306 control = TGSI_SAMPLER_LOD_BIAS;
2307 else if (modifier == TEX_MODIFIER_GATHER)
2308 control = TGSI_SAMPLER_GATHER;
2309 }
2310 else {
2311 for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2312 args[i] = &ZeroVec;
2313 }
2314
2315 /* fetch coordinates */
2316 for (i = 0; i < dim; i++) {
2317 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2318
2319 if (proj)
2320 micro_div(&r[i], &r[i], proj);
2321
2322 args[i] = &r[i];
2323 }
2324
2325 /* fetch reference value */
2326 if (shadow_ref >= 0) {
2327 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2328
2329 if (proj)
2330 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2331
2332 args[shadow_ref] = &r[shadow_ref];
2333 }
2334
2335 fetch_texel(mach->Sampler, unit, unit,
2336 args[0], args[1], args[2], args[3], args[4],
2337 NULL, offsets, control,
2338 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2339
2340 #if 0
2341 debug_printf("fetch r: %g %g %g %g\n",
2342 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2343 debug_printf("fetch g: %g %g %g %g\n",
2344 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2345 debug_printf("fetch b: %g %g %g %g\n",
2346 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2347 debug_printf("fetch a: %g %g %g %g\n",
2348 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2349 #endif
2350
2351 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2352 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2353 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2354 }
2355 }
2356 }
2357
2358 static void
2359 exec_lodq(struct tgsi_exec_machine *mach,
2360 const struct tgsi_full_instruction *inst)
2361 {
2362 uint resource_unit, sampler_unit;
2363 unsigned dim;
2364 unsigned i;
2365 union tgsi_exec_channel coords[4];
2366 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2367 union tgsi_exec_channel r[2];
2368
2369 resource_unit = fetch_sampler_unit(mach, inst, 1);
2370 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2371 uint target = mach->SamplerViews[resource_unit].Resource;
2372 dim = tgsi_util_get_texture_coord_dim(target);
2373 sampler_unit = fetch_sampler_unit(mach, inst, 2);
2374 } else {
2375 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2376 sampler_unit = resource_unit;
2377 }
2378 assert(dim <= ARRAY_SIZE(coords));
2379 /* fetch coordinates */
2380 for (i = 0; i < dim; i++) {
2381 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2382 args[i] = &coords[i];
2383 }
2384 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2385 args[i] = &ZeroVec;
2386 }
2387 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2388 args[0]->f,
2389 args[1]->f,
2390 args[2]->f,
2391 args[3]->f,
2392 TGSI_SAMPLER_LOD_NONE,
2393 r[0].f,
2394 r[1].f);
2395
2396 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2397 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2398 TGSI_EXEC_DATA_FLOAT);
2399 }
2400 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2401 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2402 TGSI_EXEC_DATA_FLOAT);
2403 }
2404 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2405 unsigned char swizzles[4];
2406 unsigned chan;
2407 swizzles[0] = inst->Src[1].Register.SwizzleX;
2408 swizzles[1] = inst->Src[1].Register.SwizzleY;
2409 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2410 swizzles[3] = inst->Src[1].Register.SwizzleW;
2411
2412 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2413 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2414 if (swizzles[chan] >= 2) {
2415 store_dest(mach, &ZeroVec,
2416 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2417 } else {
2418 store_dest(mach, &r[swizzles[chan]],
2419 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2420 }
2421 }
2422 }
2423 } else {
2424 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2425 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2426 TGSI_EXEC_DATA_FLOAT);
2427 }
2428 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2429 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2430 TGSI_EXEC_DATA_FLOAT);
2431 }
2432 }
2433 }
2434
2435 static void
2436 exec_txd(struct tgsi_exec_machine *mach,
2437 const struct tgsi_full_instruction *inst)
2438 {
2439 union tgsi_exec_channel r[4];
2440 float derivs[3][2][TGSI_QUAD_SIZE];
2441 uint chan;
2442 uint unit;
2443 int8_t offsets[3];
2444
2445 unit = fetch_sampler_unit(mach, inst, 3);
2446 /* always fetch all 3 offsets, overkill but keeps code simple */
2447 fetch_texel_offsets(mach, inst, offsets);
2448
2449 switch (inst->Texture.Texture) {
2450 case TGSI_TEXTURE_1D:
2451 FETCH(&r[0], 0, TGSI_CHAN_X);
2452
2453 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2454
2455 fetch_texel(mach->Sampler, unit, unit,
2456 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2457 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2458 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2459 break;
2460
2461 case TGSI_TEXTURE_SHADOW1D:
2462 case TGSI_TEXTURE_1D_ARRAY:
2463 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2464 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2465 FETCH(&r[0], 0, TGSI_CHAN_X);
2466 FETCH(&r[1], 0, TGSI_CHAN_Y);
2467 FETCH(&r[2], 0, TGSI_CHAN_Z);
2468
2469 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2470
2471 fetch_texel(mach->Sampler, unit, unit,
2472 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2473 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2474 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2475 break;
2476
2477 case TGSI_TEXTURE_2D:
2478 case TGSI_TEXTURE_RECT:
2479 FETCH(&r[0], 0, TGSI_CHAN_X);
2480 FETCH(&r[1], 0, TGSI_CHAN_Y);
2481
2482 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2483 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2484
2485 fetch_texel(mach->Sampler, unit, unit,
2486 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2487 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2488 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2489 break;
2490
2491
2492 case TGSI_TEXTURE_SHADOW2D:
2493 case TGSI_TEXTURE_SHADOWRECT:
2494 case TGSI_TEXTURE_2D_ARRAY:
2495 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2496 /* only SHADOW2D_ARRAY actually needs W */
2497 FETCH(&r[0], 0, TGSI_CHAN_X);
2498 FETCH(&r[1], 0, TGSI_CHAN_Y);
2499 FETCH(&r[2], 0, TGSI_CHAN_Z);
2500 FETCH(&r[3], 0, TGSI_CHAN_W);
2501
2502 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2503 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2504
2505 fetch_texel(mach->Sampler, unit, unit,
2506 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2507 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2508 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2509 break;
2510
2511 case TGSI_TEXTURE_3D:
2512 case TGSI_TEXTURE_CUBE:
2513 case TGSI_TEXTURE_CUBE_ARRAY:
2514 case TGSI_TEXTURE_SHADOWCUBE:
2515 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2516 FETCH(&r[0], 0, TGSI_CHAN_X);
2517 FETCH(&r[1], 0, TGSI_CHAN_Y);
2518 FETCH(&r[2], 0, TGSI_CHAN_Z);
2519 FETCH(&r[3], 0, TGSI_CHAN_W);
2520
2521 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2522 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2523 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2524
2525 fetch_texel(mach->Sampler, unit, unit,
2526 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2527 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2528 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2529 break;
2530
2531 default:
2532 assert(0);
2533 }
2534
2535 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2536 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2537 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2538 }
2539 }
2540 }
2541
2542
2543 static void
2544 exec_txf(struct tgsi_exec_machine *mach,
2545 const struct tgsi_full_instruction *inst)
2546 {
2547 union tgsi_exec_channel r[4];
2548 uint chan;
2549 uint unit;
2550 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2551 int j;
2552 int8_t offsets[3];
2553 unsigned target;
2554
2555 unit = fetch_sampler_unit(mach, inst, 1);
2556 /* always fetch all 3 offsets, overkill but keeps code simple */
2557 fetch_texel_offsets(mach, inst, offsets);
2558
2559 IFETCH(&r[3], 0, TGSI_CHAN_W);
2560
2561 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2562 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2563 target = mach->SamplerViews[unit].Resource;
2564 }
2565 else {
2566 target = inst->Texture.Texture;
2567 }
2568 switch(target) {
2569 case TGSI_TEXTURE_3D:
2570 case TGSI_TEXTURE_2D_ARRAY:
2571 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2572 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2573 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2574 /* fallthrough */
2575 case TGSI_TEXTURE_2D:
2576 case TGSI_TEXTURE_RECT:
2577 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2578 case TGSI_TEXTURE_SHADOW2D:
2579 case TGSI_TEXTURE_SHADOWRECT:
2580 case TGSI_TEXTURE_1D_ARRAY:
2581 case TGSI_TEXTURE_2D_MSAA:
2582 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2583 /* fallthrough */
2584 case TGSI_TEXTURE_BUFFER:
2585 case TGSI_TEXTURE_1D:
2586 case TGSI_TEXTURE_SHADOW1D:
2587 IFETCH(&r[0], 0, TGSI_CHAN_X);
2588 break;
2589 default:
2590 assert(0);
2591 break;
2592 }
2593
2594 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2595 offsets, rgba);
2596
2597 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2598 r[0].f[j] = rgba[0][j];
2599 r[1].f[j] = rgba[1][j];
2600 r[2].f[j] = rgba[2][j];
2601 r[3].f[j] = rgba[3][j];
2602 }
2603
2604 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2605 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2606 unsigned char swizzles[4];
2607 swizzles[0] = inst->Src[1].Register.SwizzleX;
2608 swizzles[1] = inst->Src[1].Register.SwizzleY;
2609 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2610 swizzles[3] = inst->Src[1].Register.SwizzleW;
2611
2612 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2613 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2614 store_dest(mach, &r[swizzles[chan]],
2615 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2616 }
2617 }
2618 }
2619 else {
2620 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2621 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2622 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2623 }
2624 }
2625 }
2626 }
2627
2628 static void
2629 exec_txq(struct tgsi_exec_machine *mach,
2630 const struct tgsi_full_instruction *inst)
2631 {
2632 int result[4];
2633 union tgsi_exec_channel r[4], src;
2634 uint chan;
2635 uint unit;
2636 int i,j;
2637
2638 unit = fetch_sampler_unit(mach, inst, 1);
2639
2640 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2641
2642 /* XXX: This interface can't return per-pixel values */
2643 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2644
2645 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2646 for (j = 0; j < 4; j++) {
2647 r[j].i[i] = result[j];
2648 }
2649 }
2650
2651 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2652 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2653 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2654 TGSI_EXEC_DATA_INT);
2655 }
2656 }
2657 }
2658
2659 static void
2660 exec_sample(struct tgsi_exec_machine *mach,
2661 const struct tgsi_full_instruction *inst,
2662 uint modifier, boolean compare)
2663 {
2664 const uint resource_unit = inst->Src[1].Register.Index;
2665 const uint sampler_unit = inst->Src[2].Register.Index;
2666 union tgsi_exec_channel r[5], c1;
2667 const union tgsi_exec_channel *lod = &ZeroVec;
2668 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2669 uint chan;
2670 unsigned char swizzles[4];
2671 int8_t offsets[3];
2672
2673 /* always fetch all 3 offsets, overkill but keeps code simple */
2674 fetch_texel_offsets(mach, inst, offsets);
2675
2676 assert(modifier != TEX_MODIFIER_PROJECTED);
2677
2678 if (modifier != TEX_MODIFIER_NONE) {
2679 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2680 FETCH(&c1, 3, TGSI_CHAN_X);
2681 lod = &c1;
2682 control = TGSI_SAMPLER_LOD_BIAS;
2683 }
2684 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2685 FETCH(&c1, 3, TGSI_CHAN_X);
2686 lod = &c1;
2687 control = TGSI_SAMPLER_LOD_EXPLICIT;
2688 }
2689 else if (modifier == TEX_MODIFIER_GATHER) {
2690 control = TGSI_SAMPLER_GATHER;
2691 }
2692 else {
2693 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2694 control = TGSI_SAMPLER_LOD_ZERO;
2695 }
2696 }
2697
2698 FETCH(&r[0], 0, TGSI_CHAN_X);
2699
2700 switch (mach->SamplerViews[resource_unit].Resource) {
2701 case TGSI_TEXTURE_1D:
2702 if (compare) {
2703 FETCH(&r[2], 3, TGSI_CHAN_X);
2704 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2705 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2706 NULL, offsets, control,
2707 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2708 }
2709 else {
2710 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2711 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2712 NULL, offsets, control,
2713 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2714 }
2715 break;
2716
2717 case TGSI_TEXTURE_1D_ARRAY:
2718 case TGSI_TEXTURE_2D:
2719 case TGSI_TEXTURE_RECT:
2720 FETCH(&r[1], 0, TGSI_CHAN_Y);
2721 if (compare) {
2722 FETCH(&r[2], 3, TGSI_CHAN_X);
2723 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2724 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2725 NULL, offsets, control,
2726 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2727 }
2728 else {
2729 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2730 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2731 NULL, offsets, control,
2732 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2733 }
2734 break;
2735
2736 case TGSI_TEXTURE_2D_ARRAY:
2737 case TGSI_TEXTURE_3D:
2738 case TGSI_TEXTURE_CUBE:
2739 FETCH(&r[1], 0, TGSI_CHAN_Y);
2740 FETCH(&r[2], 0, TGSI_CHAN_Z);
2741 if(compare) {
2742 FETCH(&r[3], 3, TGSI_CHAN_X);
2743 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2744 &r[0], &r[1], &r[2], &r[3], lod,
2745 NULL, offsets, control,
2746 &r[0], &r[1], &r[2], &r[3]);
2747 }
2748 else {
2749 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2750 &r[0], &r[1], &r[2], &ZeroVec, lod,
2751 NULL, offsets, control,
2752 &r[0], &r[1], &r[2], &r[3]);
2753 }
2754 break;
2755
2756 case TGSI_TEXTURE_CUBE_ARRAY:
2757 FETCH(&r[1], 0, TGSI_CHAN_Y);
2758 FETCH(&r[2], 0, TGSI_CHAN_Z);
2759 FETCH(&r[3], 0, TGSI_CHAN_W);
2760 if(compare) {
2761 FETCH(&r[4], 3, TGSI_CHAN_X);
2762 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2763 &r[0], &r[1], &r[2], &r[3], &r[4],
2764 NULL, offsets, control,
2765 &r[0], &r[1], &r[2], &r[3]);
2766 }
2767 else {
2768 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2769 &r[0], &r[1], &r[2], &r[3], lod,
2770 NULL, offsets, control,
2771 &r[0], &r[1], &r[2], &r[3]);
2772 }
2773 break;
2774
2775
2776 default:
2777 assert(0);
2778 }
2779
2780 swizzles[0] = inst->Src[1].Register.SwizzleX;
2781 swizzles[1] = inst->Src[1].Register.SwizzleY;
2782 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2783 swizzles[3] = inst->Src[1].Register.SwizzleW;
2784
2785 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2786 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2787 store_dest(mach, &r[swizzles[chan]],
2788 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2789 }
2790 }
2791 }
2792
2793 static void
2794 exec_sample_d(struct tgsi_exec_machine *mach,
2795 const struct tgsi_full_instruction *inst)
2796 {
2797 const uint resource_unit = inst->Src[1].Register.Index;
2798 const uint sampler_unit = inst->Src[2].Register.Index;
2799 union tgsi_exec_channel r[4];
2800 float derivs[3][2][TGSI_QUAD_SIZE];
2801 uint chan;
2802 unsigned char swizzles[4];
2803 int8_t offsets[3];
2804
2805 /* always fetch all 3 offsets, overkill but keeps code simple */
2806 fetch_texel_offsets(mach, inst, offsets);
2807
2808 FETCH(&r[0], 0, TGSI_CHAN_X);
2809
2810 switch (mach->SamplerViews[resource_unit].Resource) {
2811 case TGSI_TEXTURE_1D:
2812 case TGSI_TEXTURE_1D_ARRAY:
2813 /* only 1D array actually needs Y */
2814 FETCH(&r[1], 0, TGSI_CHAN_Y);
2815
2816 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2817
2818 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2819 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2820 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2821 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2822 break;
2823
2824 case TGSI_TEXTURE_2D:
2825 case TGSI_TEXTURE_RECT:
2826 case TGSI_TEXTURE_2D_ARRAY:
2827 /* only 2D array actually needs Z */
2828 FETCH(&r[1], 0, TGSI_CHAN_Y);
2829 FETCH(&r[2], 0, TGSI_CHAN_Z);
2830
2831 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2832 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2833
2834 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2835 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2836 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2837 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2838 break;
2839
2840 case TGSI_TEXTURE_3D:
2841 case TGSI_TEXTURE_CUBE:
2842 case TGSI_TEXTURE_CUBE_ARRAY:
2843 /* only cube array actually needs W */
2844 FETCH(&r[1], 0, TGSI_CHAN_Y);
2845 FETCH(&r[2], 0, TGSI_CHAN_Z);
2846 FETCH(&r[3], 0, TGSI_CHAN_W);
2847
2848 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2849 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2850 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2851
2852 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2853 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2854 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2855 &r[0], &r[1], &r[2], &r[3]);
2856 break;
2857
2858 default:
2859 assert(0);
2860 }
2861
2862 swizzles[0] = inst->Src[1].Register.SwizzleX;
2863 swizzles[1] = inst->Src[1].Register.SwizzleY;
2864 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2865 swizzles[3] = inst->Src[1].Register.SwizzleW;
2866
2867 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2868 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2869 store_dest(mach, &r[swizzles[chan]],
2870 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2871 }
2872 }
2873 }
2874
2875
2876 /**
2877 * Evaluate a constant-valued coefficient at the position of the
2878 * current quad.
2879 */
2880 static void
2881 eval_constant_coef(
2882 struct tgsi_exec_machine *mach,
2883 unsigned attrib,
2884 unsigned chan )
2885 {
2886 unsigned i;
2887
2888 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2889 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2890 }
2891 }
2892
2893 static void
2894 interp_constant_offset(
2895 UNUSED const struct tgsi_exec_machine *mach,
2896 UNUSED unsigned attrib,
2897 UNUSED unsigned chan,
2898 UNUSED float ofs_x,
2899 UNUSED float ofs_y,
2900 UNUSED union tgsi_exec_channel *out_chan)
2901 {
2902 }
2903
2904 /**
2905 * Evaluate a linear-valued coefficient at the position of the
2906 * current quad.
2907 */
2908 static void
2909 interp_linear_offset(
2910 const struct tgsi_exec_machine *mach,
2911 unsigned attrib,
2912 unsigned chan,
2913 float ofs_x,
2914 float ofs_y,
2915 union tgsi_exec_channel *out_chan)
2916 {
2917 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2918 const float dady = mach->InterpCoefs[attrib].dady[chan];
2919 const float delta = ofs_x * dadx + ofs_y * dady;
2920 out_chan->f[0] += delta;
2921 out_chan->f[1] += delta;
2922 out_chan->f[2] += delta;
2923 out_chan->f[3] += delta;
2924 }
2925
2926 static void
2927 eval_linear_coef(struct tgsi_exec_machine *mach,
2928 unsigned attrib,
2929 unsigned chan)
2930 {
2931 const float x = mach->QuadPos.xyzw[0].f[0];
2932 const float y = mach->QuadPos.xyzw[1].f[0];
2933 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2934 const float dady = mach->InterpCoefs[attrib].dady[chan];
2935 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2936
2937 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2938 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2939 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2940 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2941 }
2942
2943 /**
2944 * Evaluate a perspective-valued coefficient at the position of the
2945 * current quad.
2946 */
2947
2948 static void
2949 interp_perspective_offset(
2950 const struct tgsi_exec_machine *mach,
2951 unsigned attrib,
2952 unsigned chan,
2953 float ofs_x,
2954 float ofs_y,
2955 union tgsi_exec_channel *out_chan)
2956 {
2957 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2958 const float dady = mach->InterpCoefs[attrib].dady[chan];
2959 const float *w = mach->QuadPos.xyzw[3].f;
2960 const float delta = ofs_x * dadx + ofs_y * dady;
2961 out_chan->f[0] += delta / w[0];
2962 out_chan->f[1] += delta / w[1];
2963 out_chan->f[2] += delta / w[2];
2964 out_chan->f[3] += delta / w[3];
2965 }
2966
2967 static void
2968 eval_perspective_coef(
2969 struct tgsi_exec_machine *mach,
2970 unsigned attrib,
2971 unsigned chan )
2972 {
2973 const float x = mach->QuadPos.xyzw[0].f[0];
2974 const float y = mach->QuadPos.xyzw[1].f[0];
2975 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2976 const float dady = mach->InterpCoefs[attrib].dady[chan];
2977 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2978 const float *w = mach->QuadPos.xyzw[3].f;
2979 /* divide by W here */
2980 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2981 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2982 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2983 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2984 }
2985
2986
2987 typedef void (* eval_coef_func)(
2988 struct tgsi_exec_machine *mach,
2989 unsigned attrib,
2990 unsigned chan );
2991
2992 static void
2993 exec_declaration(struct tgsi_exec_machine *mach,
2994 const struct tgsi_full_declaration *decl)
2995 {
2996 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2997 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2998 return;
2999 }
3000
3001 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
3002 if (decl->Declaration.File == TGSI_FILE_INPUT) {
3003 uint first, last, mask;
3004
3005 first = decl->Range.First;
3006 last = decl->Range.Last;
3007 mask = decl->Declaration.UsageMask;
3008
3009 /* XXX we could remove this special-case code since
3010 * mach->InterpCoefs[first].a0 should already have the
3011 * front/back-face value. But we should first update the
3012 * ureg code to emit the right UsageMask value (WRITEMASK_X).
3013 * Then, we could remove the tgsi_exec_machine::Face field.
3014 */
3015 /* XXX make FACE a system value */
3016 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
3017 uint i;
3018
3019 assert(decl->Semantic.Index == 0);
3020 assert(first == last);
3021
3022 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3023 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
3024 }
3025 } else {
3026 eval_coef_func eval;
3027 apply_sample_offset_func interp;
3028 uint i, j;
3029
3030 switch (decl->Interp.Interpolate) {
3031 case TGSI_INTERPOLATE_CONSTANT:
3032 eval = eval_constant_coef;
3033 interp = interp_constant_offset;
3034 break;
3035
3036 case TGSI_INTERPOLATE_LINEAR:
3037 eval = eval_linear_coef;
3038 interp = interp_linear_offset;
3039 break;
3040
3041 case TGSI_INTERPOLATE_PERSPECTIVE:
3042 eval = eval_perspective_coef;
3043 interp = interp_perspective_offset;
3044 break;
3045
3046 case TGSI_INTERPOLATE_COLOR:
3047 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3048 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
3049 break;
3050
3051 default:
3052 assert(0);
3053 return;
3054 }
3055
3056 for (i = first; i <= last; i++)
3057 mach->InputSampleOffsetApply[i] = interp;
3058
3059 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3060 if (mask & (1 << j)) {
3061 for (i = first; i <= last; i++) {
3062 eval(mach, i, j);
3063 }
3064 }
3065 }
3066 }
3067
3068 if (DEBUG_EXECUTION) {
3069 uint i, j;
3070 for (i = first; i <= last; ++i) {
3071 debug_printf("IN[%2u] = ", i);
3072 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3073 if (j > 0) {
3074 debug_printf(" ");
3075 }
3076 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3077 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3078 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3079 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3080 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3081 }
3082 }
3083 }
3084 }
3085 }
3086
3087 }
3088
3089 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3090 const union tgsi_exec_channel *src);
3091
3092 static void
3093 exec_scalar_unary(struct tgsi_exec_machine *mach,
3094 const struct tgsi_full_instruction *inst,
3095 micro_unary_op op,
3096 enum tgsi_exec_datatype dst_datatype,
3097 enum tgsi_exec_datatype src_datatype)
3098 {
3099 unsigned int chan;
3100 union tgsi_exec_channel src;
3101 union tgsi_exec_channel dst;
3102
3103 fetch_source(mach, &sr