gallium: remove TGSI_OPCODE_ABS
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65
66
67 #define DEBUG_EXECUTION 0
68
69
70 #define FAST_MATH 0
71
72 #define TILE_TOP_LEFT 0
73 #define TILE_TOP_RIGHT 1
74 #define TILE_BOTTOM_LEFT 2
75 #define TILE_BOTTOM_RIGHT 3
76
77 union tgsi_double_channel {
78 double d[TGSI_QUAD_SIZE];
79 unsigned u[TGSI_QUAD_SIZE][2];
80 uint64_t u64[TGSI_QUAD_SIZE];
81 int64_t i64[TGSI_QUAD_SIZE];
82 };
83
84 struct tgsi_double_vector {
85 union tgsi_double_channel xy;
86 union tgsi_double_channel zw;
87 };
88
89 static void
90 micro_abs(union tgsi_exec_channel *dst,
91 const union tgsi_exec_channel *src)
92 {
93 dst->f[0] = fabsf(src->f[0]);
94 dst->f[1] = fabsf(src->f[1]);
95 dst->f[2] = fabsf(src->f[2]);
96 dst->f[3] = fabsf(src->f[3]);
97 }
98
99 static void
100 micro_arl(union tgsi_exec_channel *dst,
101 const union tgsi_exec_channel *src)
102 {
103 dst->i[0] = (int)floorf(src->f[0]);
104 dst->i[1] = (int)floorf(src->f[1]);
105 dst->i[2] = (int)floorf(src->f[2]);
106 dst->i[3] = (int)floorf(src->f[3]);
107 }
108
109 static void
110 micro_arr(union tgsi_exec_channel *dst,
111 const union tgsi_exec_channel *src)
112 {
113 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118
119 static void
120 micro_ceil(union tgsi_exec_channel *dst,
121 const union tgsi_exec_channel *src)
122 {
123 dst->f[0] = ceilf(src->f[0]);
124 dst->f[1] = ceilf(src->f[1]);
125 dst->f[2] = ceilf(src->f[2]);
126 dst->f[3] = ceilf(src->f[3]);
127 }
128
129 static void
130 micro_clamp(union tgsi_exec_channel *dst,
131 const union tgsi_exec_channel *src0,
132 const union tgsi_exec_channel *src1,
133 const union tgsi_exec_channel *src2)
134 {
135 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
136 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
137 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
138 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
139 }
140
141 static void
142 micro_cmp(union tgsi_exec_channel *dst,
143 const union tgsi_exec_channel *src0,
144 const union tgsi_exec_channel *src1,
145 const union tgsi_exec_channel *src2)
146 {
147 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
148 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
149 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
150 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
151 }
152
153 static void
154 micro_cos(union tgsi_exec_channel *dst,
155 const union tgsi_exec_channel *src)
156 {
157 dst->f[0] = cosf(src->f[0]);
158 dst->f[1] = cosf(src->f[1]);
159 dst->f[2] = cosf(src->f[2]);
160 dst->f[3] = cosf(src->f[3]);
161 }
162
163 static void
164 micro_d2f(union tgsi_exec_channel *dst,
165 const union tgsi_double_channel *src)
166 {
167 dst->f[0] = (float)src->d[0];
168 dst->f[1] = (float)src->d[1];
169 dst->f[2] = (float)src->d[2];
170 dst->f[3] = (float)src->d[3];
171 }
172
173 static void
174 micro_d2i(union tgsi_exec_channel *dst,
175 const union tgsi_double_channel *src)
176 {
177 dst->i[0] = (int)src->d[0];
178 dst->i[1] = (int)src->d[1];
179 dst->i[2] = (int)src->d[2];
180 dst->i[3] = (int)src->d[3];
181 }
182
183 static void
184 micro_d2u(union tgsi_exec_channel *dst,
185 const union tgsi_double_channel *src)
186 {
187 dst->u[0] = (unsigned)src->d[0];
188 dst->u[1] = (unsigned)src->d[1];
189 dst->u[2] = (unsigned)src->d[2];
190 dst->u[3] = (unsigned)src->d[3];
191 }
192 static void
193 micro_dabs(union tgsi_double_channel *dst,
194 const union tgsi_double_channel *src)
195 {
196 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
197 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
198 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
199 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
200 }
201
202 static void
203 micro_dadd(union tgsi_double_channel *dst,
204 const union tgsi_double_channel *src)
205 {
206 dst->d[0] = src[0].d[0] + src[1].d[0];
207 dst->d[1] = src[0].d[1] + src[1].d[1];
208 dst->d[2] = src[0].d[2] + src[1].d[2];
209 dst->d[3] = src[0].d[3] + src[1].d[3];
210 }
211
212 static void
213 micro_ddx(union tgsi_exec_channel *dst,
214 const union tgsi_exec_channel *src)
215 {
216 dst->f[0] =
217 dst->f[1] =
218 dst->f[2] =
219 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
220 }
221
222 static void
223 micro_ddy(union tgsi_exec_channel *dst,
224 const union tgsi_exec_channel *src)
225 {
226 dst->f[0] =
227 dst->f[1] =
228 dst->f[2] =
229 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
230 }
231
232 static void
233 micro_dmul(union tgsi_double_channel *dst,
234 const union tgsi_double_channel *src)
235 {
236 dst->d[0] = src[0].d[0] * src[1].d[0];
237 dst->d[1] = src[0].d[1] * src[1].d[1];
238 dst->d[2] = src[0].d[2] * src[1].d[2];
239 dst->d[3] = src[0].d[3] * src[1].d[3];
240 }
241
242 static void
243 micro_dmax(union tgsi_double_channel *dst,
244 const union tgsi_double_channel *src)
245 {
246 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
247 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
248 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
249 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
250 }
251
252 static void
253 micro_dmin(union tgsi_double_channel *dst,
254 const union tgsi_double_channel *src)
255 {
256 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
257 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
258 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
259 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
260 }
261
262 static void
263 micro_dneg(union tgsi_double_channel *dst,
264 const union tgsi_double_channel *src)
265 {
266 dst->d[0] = -src->d[0];
267 dst->d[1] = -src->d[1];
268 dst->d[2] = -src->d[2];
269 dst->d[3] = -src->d[3];
270 }
271
272 static void
273 micro_dslt(union tgsi_double_channel *dst,
274 const union tgsi_double_channel *src)
275 {
276 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
277 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
278 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
279 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
280 }
281
282 static void
283 micro_dsne(union tgsi_double_channel *dst,
284 const union tgsi_double_channel *src)
285 {
286 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
287 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
288 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
289 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
290 }
291
292 static void
293 micro_dsge(union tgsi_double_channel *dst,
294 const union tgsi_double_channel *src)
295 {
296 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
297 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
298 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
299 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
300 }
301
302 static void
303 micro_dseq(union tgsi_double_channel *dst,
304 const union tgsi_double_channel *src)
305 {
306 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
307 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
308 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
309 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
310 }
311
312 static void
313 micro_drcp(union tgsi_double_channel *dst,
314 const union tgsi_double_channel *src)
315 {
316 dst->d[0] = 1.0 / src->d[0];
317 dst->d[1] = 1.0 / src->d[1];
318 dst->d[2] = 1.0 / src->d[2];
319 dst->d[3] = 1.0 / src->d[3];
320 }
321
322 static void
323 micro_dsqrt(union tgsi_double_channel *dst,
324 const union tgsi_double_channel *src)
325 {
326 dst->d[0] = sqrt(src->d[0]);
327 dst->d[1] = sqrt(src->d[1]);
328 dst->d[2] = sqrt(src->d[2]);
329 dst->d[3] = sqrt(src->d[3]);
330 }
331
332 static void
333 micro_drsq(union tgsi_double_channel *dst,
334 const union tgsi_double_channel *src)
335 {
336 dst->d[0] = 1.0 / sqrt(src->d[0]);
337 dst->d[1] = 1.0 / sqrt(src->d[1]);
338 dst->d[2] = 1.0 / sqrt(src->d[2]);
339 dst->d[3] = 1.0 / sqrt(src->d[3]);
340 }
341
342 static void
343 micro_dmad(union tgsi_double_channel *dst,
344 const union tgsi_double_channel *src)
345 {
346 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
347 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
348 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
349 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
350 }
351
352 static void
353 micro_dfrac(union tgsi_double_channel *dst,
354 const union tgsi_double_channel *src)
355 {
356 dst->d[0] = src->d[0] - floor(src->d[0]);
357 dst->d[1] = src->d[1] - floor(src->d[1]);
358 dst->d[2] = src->d[2] - floor(src->d[2]);
359 dst->d[3] = src->d[3] - floor(src->d[3]);
360 }
361
362 static void
363 micro_dldexp(union tgsi_double_channel *dst,
364 const union tgsi_double_channel *src0,
365 union tgsi_exec_channel *src1)
366 {
367 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
368 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
369 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
370 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
371 }
372
373 static void
374 micro_dfracexp(union tgsi_double_channel *dst,
375 union tgsi_exec_channel *dst_exp,
376 const union tgsi_double_channel *src)
377 {
378 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
379 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
380 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
381 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
382 }
383
384 static void
385 micro_exp2(union tgsi_exec_channel *dst,
386 const union tgsi_exec_channel *src)
387 {
388 #if FAST_MATH
389 dst->f[0] = util_fast_exp2(src->f[0]);
390 dst->f[1] = util_fast_exp2(src->f[1]);
391 dst->f[2] = util_fast_exp2(src->f[2]);
392 dst->f[3] = util_fast_exp2(src->f[3]);
393 #else
394 #if DEBUG
395 /* Inf is okay for this instruction, so clamp it to silence assertions. */
396 uint i;
397 union tgsi_exec_channel clamped;
398
399 for (i = 0; i < 4; i++) {
400 if (src->f[i] > 127.99999f) {
401 clamped.f[i] = 127.99999f;
402 } else if (src->f[i] < -126.99999f) {
403 clamped.f[i] = -126.99999f;
404 } else {
405 clamped.f[i] = src->f[i];
406 }
407 }
408 src = &clamped;
409 #endif /* DEBUG */
410
411 dst->f[0] = powf(2.0f, src->f[0]);
412 dst->f[1] = powf(2.0f, src->f[1]);
413 dst->f[2] = powf(2.0f, src->f[2]);
414 dst->f[3] = powf(2.0f, src->f[3]);
415 #endif /* FAST_MATH */
416 }
417
418 static void
419 micro_f2d(union tgsi_double_channel *dst,
420 const union tgsi_exec_channel *src)
421 {
422 dst->d[0] = (double)src->f[0];
423 dst->d[1] = (double)src->f[1];
424 dst->d[2] = (double)src->f[2];
425 dst->d[3] = (double)src->f[3];
426 }
427
428 static void
429 micro_flr(union tgsi_exec_channel *dst,
430 const union tgsi_exec_channel *src)
431 {
432 dst->f[0] = floorf(src->f[0]);
433 dst->f[1] = floorf(src->f[1]);
434 dst->f[2] = floorf(src->f[2]);
435 dst->f[3] = floorf(src->f[3]);
436 }
437
438 static void
439 micro_frc(union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src)
441 {
442 dst->f[0] = src->f[0] - floorf(src->f[0]);
443 dst->f[1] = src->f[1] - floorf(src->f[1]);
444 dst->f[2] = src->f[2] - floorf(src->f[2]);
445 dst->f[3] = src->f[3] - floorf(src->f[3]);
446 }
447
448 static void
449 micro_i2d(union tgsi_double_channel *dst,
450 const union tgsi_exec_channel *src)
451 {
452 dst->d[0] = (double)src->i[0];
453 dst->d[1] = (double)src->i[1];
454 dst->d[2] = (double)src->i[2];
455 dst->d[3] = (double)src->i[3];
456 }
457
458 static void
459 micro_iabs(union tgsi_exec_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
463 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
464 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
465 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
466 }
467
468 static void
469 micro_ineg(union tgsi_exec_channel *dst,
470 const union tgsi_exec_channel *src)
471 {
472 dst->i[0] = -src->i[0];
473 dst->i[1] = -src->i[1];
474 dst->i[2] = -src->i[2];
475 dst->i[3] = -src->i[3];
476 }
477
478 static void
479 micro_lg2(union tgsi_exec_channel *dst,
480 const union tgsi_exec_channel *src)
481 {
482 #if FAST_MATH
483 dst->f[0] = util_fast_log2(src->f[0]);
484 dst->f[1] = util_fast_log2(src->f[1]);
485 dst->f[2] = util_fast_log2(src->f[2]);
486 dst->f[3] = util_fast_log2(src->f[3]);
487 #else
488 dst->f[0] = logf(src->f[0]) * 1.442695f;
489 dst->f[1] = logf(src->f[1]) * 1.442695f;
490 dst->f[2] = logf(src->f[2]) * 1.442695f;
491 dst->f[3] = logf(src->f[3]) * 1.442695f;
492 #endif
493 }
494
495 static void
496 micro_lrp(union tgsi_exec_channel *dst,
497 const union tgsi_exec_channel *src0,
498 const union tgsi_exec_channel *src1,
499 const union tgsi_exec_channel *src2)
500 {
501 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
502 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
503 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
504 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
505 }
506
507 static void
508 micro_mad(union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src0,
510 const union tgsi_exec_channel *src1,
511 const union tgsi_exec_channel *src2)
512 {
513 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
514 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
515 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
516 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
517 }
518
519 static void
520 micro_mov(union tgsi_exec_channel *dst,
521 const union tgsi_exec_channel *src)
522 {
523 dst->u[0] = src->u[0];
524 dst->u[1] = src->u[1];
525 dst->u[2] = src->u[2];
526 dst->u[3] = src->u[3];
527 }
528
529 static void
530 micro_rcp(union tgsi_exec_channel *dst,
531 const union tgsi_exec_channel *src)
532 {
533 #if 0 /* for debugging */
534 assert(src->f[0] != 0.0f);
535 assert(src->f[1] != 0.0f);
536 assert(src->f[2] != 0.0f);
537 assert(src->f[3] != 0.0f);
538 #endif
539 dst->f[0] = 1.0f / src->f[0];
540 dst->f[1] = 1.0f / src->f[1];
541 dst->f[2] = 1.0f / src->f[2];
542 dst->f[3] = 1.0f / src->f[3];
543 }
544
545 static void
546 micro_rnd(union tgsi_exec_channel *dst,
547 const union tgsi_exec_channel *src)
548 {
549 dst->f[0] = _mesa_roundevenf(src->f[0]);
550 dst->f[1] = _mesa_roundevenf(src->f[1]);
551 dst->f[2] = _mesa_roundevenf(src->f[2]);
552 dst->f[3] = _mesa_roundevenf(src->f[3]);
553 }
554
555 static void
556 micro_rsq(union tgsi_exec_channel *dst,
557 const union tgsi_exec_channel *src)
558 {
559 #if 0 /* for debugging */
560 assert(src->f[0] != 0.0f);
561 assert(src->f[1] != 0.0f);
562 assert(src->f[2] != 0.0f);
563 assert(src->f[3] != 0.0f);
564 #endif
565 dst->f[0] = 1.0f / sqrtf(src->f[0]);
566 dst->f[1] = 1.0f / sqrtf(src->f[1]);
567 dst->f[2] = 1.0f / sqrtf(src->f[2]);
568 dst->f[3] = 1.0f / sqrtf(src->f[3]);
569 }
570
571 static void
572 micro_sqrt(union tgsi_exec_channel *dst,
573 const union tgsi_exec_channel *src)
574 {
575 dst->f[0] = sqrtf(src->f[0]);
576 dst->f[1] = sqrtf(src->f[1]);
577 dst->f[2] = sqrtf(src->f[2]);
578 dst->f[3] = sqrtf(src->f[3]);
579 }
580
581 static void
582 micro_seq(union tgsi_exec_channel *dst,
583 const union tgsi_exec_channel *src0,
584 const union tgsi_exec_channel *src1)
585 {
586 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
587 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
588 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
589 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
590 }
591
592 static void
593 micro_sge(union tgsi_exec_channel *dst,
594 const union tgsi_exec_channel *src0,
595 const union tgsi_exec_channel *src1)
596 {
597 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
598 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
599 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
600 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
601 }
602
603 static void
604 micro_sgn(union tgsi_exec_channel *dst,
605 const union tgsi_exec_channel *src)
606 {
607 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
608 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
609 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
610 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
611 }
612
613 static void
614 micro_isgn(union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src)
616 {
617 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
618 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
619 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
620 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
621 }
622
623 static void
624 micro_sgt(union tgsi_exec_channel *dst,
625 const union tgsi_exec_channel *src0,
626 const union tgsi_exec_channel *src1)
627 {
628 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
629 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
630 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
631 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
632 }
633
634 static void
635 micro_sin(union tgsi_exec_channel *dst,
636 const union tgsi_exec_channel *src)
637 {
638 dst->f[0] = sinf(src->f[0]);
639 dst->f[1] = sinf(src->f[1]);
640 dst->f[2] = sinf(src->f[2]);
641 dst->f[3] = sinf(src->f[3]);
642 }
643
644 static void
645 micro_sle(union tgsi_exec_channel *dst,
646 const union tgsi_exec_channel *src0,
647 const union tgsi_exec_channel *src1)
648 {
649 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
650 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
651 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
652 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
653 }
654
655 static void
656 micro_slt(union tgsi_exec_channel *dst,
657 const union tgsi_exec_channel *src0,
658 const union tgsi_exec_channel *src1)
659 {
660 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
661 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
662 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
663 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
664 }
665
666 static void
667 micro_sne(union tgsi_exec_channel *dst,
668 const union tgsi_exec_channel *src0,
669 const union tgsi_exec_channel *src1)
670 {
671 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
672 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
673 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
674 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
675 }
676
677 static void
678 micro_trunc(union tgsi_exec_channel *dst,
679 const union tgsi_exec_channel *src)
680 {
681 dst->f[0] = truncf(src->f[0]);
682 dst->f[1] = truncf(src->f[1]);
683 dst->f[2] = truncf(src->f[2]);
684 dst->f[3] = truncf(src->f[3]);
685 }
686
687 static void
688 micro_u2d(union tgsi_double_channel *dst,
689 const union tgsi_exec_channel *src)
690 {
691 dst->d[0] = (double)src->u[0];
692 dst->d[1] = (double)src->u[1];
693 dst->d[2] = (double)src->u[2];
694 dst->d[3] = (double)src->u[3];
695 }
696
697 static void
698 micro_i64abs(union tgsi_double_channel *dst,
699 const union tgsi_double_channel *src)
700 {
701 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
702 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
703 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
704 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
705 }
706
707 static void
708 micro_i64sgn(union tgsi_double_channel *dst,
709 const union tgsi_double_channel *src)
710 {
711 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
712 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
713 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
714 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
715 }
716
717 static void
718 micro_i64neg(union tgsi_double_channel *dst,
719 const union tgsi_double_channel *src)
720 {
721 dst->i64[0] = -src->i64[0];
722 dst->i64[1] = -src->i64[1];
723 dst->i64[2] = -src->i64[2];
724 dst->i64[3] = -src->i64[3];
725 }
726
727 static void
728 micro_u64seq(union tgsi_double_channel *dst,
729 const union tgsi_double_channel *src)
730 {
731 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
732 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
733 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
734 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
735 }
736
737 static void
738 micro_u64sne(union tgsi_double_channel *dst,
739 const union tgsi_double_channel *src)
740 {
741 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
742 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
743 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
744 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
745 }
746
747 static void
748 micro_i64slt(union tgsi_double_channel *dst,
749 const union tgsi_double_channel *src)
750 {
751 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
752 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
753 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
754 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
755 }
756
757 static void
758 micro_u64slt(union tgsi_double_channel *dst,
759 const union tgsi_double_channel *src)
760 {
761 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
762 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
763 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
764 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
765 }
766
767 static void
768 micro_i64sge(union tgsi_double_channel *dst,
769 const union tgsi_double_channel *src)
770 {
771 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
772 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
773 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
774 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
775 }
776
777 static void
778 micro_u64sge(union tgsi_double_channel *dst,
779 const union tgsi_double_channel *src)
780 {
781 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
782 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
783 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
784 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
785 }
786
787 static void
788 micro_u64max(union tgsi_double_channel *dst,
789 const union tgsi_double_channel *src)
790 {
791 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
792 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
793 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
794 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
795 }
796
797 static void
798 micro_i64max(union tgsi_double_channel *dst,
799 const union tgsi_double_channel *src)
800 {
801 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
802 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
803 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
804 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
805 }
806
807 static void
808 micro_u64min(union tgsi_double_channel *dst,
809 const union tgsi_double_channel *src)
810 {
811 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
812 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
813 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
814 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
815 }
816
817 static void
818 micro_i64min(union tgsi_double_channel *dst,
819 const union tgsi_double_channel *src)
820 {
821 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
822 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
823 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
824 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
825 }
826
827 static void
828 micro_u64add(union tgsi_double_channel *dst,
829 const union tgsi_double_channel *src)
830 {
831 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
832 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
833 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
834 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
835 }
836
837 static void
838 micro_u64mul(union tgsi_double_channel *dst,
839 const union tgsi_double_channel *src)
840 {
841 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
842 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
843 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
844 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
845 }
846
847 static void
848 micro_u64div(union tgsi_double_channel *dst,
849 const union tgsi_double_channel *src)
850 {
851 dst->u64[0] = src[0].u64[0] / src[1].u64[0];
852 dst->u64[1] = src[0].u64[1] / src[1].u64[1];
853 dst->u64[2] = src[0].u64[2] / src[1].u64[2];
854 dst->u64[3] = src[0].u64[3] / src[1].u64[3];
855 }
856
857 static void
858 micro_i64div(union tgsi_double_channel *dst,
859 const union tgsi_double_channel *src)
860 {
861 dst->i64[0] = src[0].i64[0] / src[1].i64[0];
862 dst->i64[1] = src[0].i64[1] / src[1].i64[1];
863 dst->i64[2] = src[0].i64[2] / src[1].i64[2];
864 dst->i64[3] = src[0].i64[3] / src[1].i64[3];
865 }
866
867 static void
868 micro_u64mod(union tgsi_double_channel *dst,
869 const union tgsi_double_channel *src)
870 {
871 dst->u64[0] = src[0].u64[0] % src[1].u64[0];
872 dst->u64[1] = src[0].u64[1] % src[1].u64[1];
873 dst->u64[2] = src[0].u64[2] % src[1].u64[2];
874 dst->u64[3] = src[0].u64[3] % src[1].u64[3];
875 }
876
877 static void
878 micro_i64mod(union tgsi_double_channel *dst,
879 const union tgsi_double_channel *src)
880 {
881 dst->i64[0] = src[0].i64[0] % src[1].i64[0];
882 dst->i64[1] = src[0].i64[1] % src[1].i64[1];
883 dst->i64[2] = src[0].i64[2] % src[1].i64[2];
884 dst->i64[3] = src[0].i64[3] % src[1].i64[3];
885 }
886
887 static void
888 micro_u64shl(union tgsi_double_channel *dst,
889 const union tgsi_double_channel *src0,
890 union tgsi_exec_channel *src1)
891 {
892 unsigned masked_count;
893 masked_count = src1->u[0] & 0x3f;
894 dst->u64[0] = src0->u64[0] << masked_count;
895 masked_count = src1->u[1] & 0x3f;
896 dst->u64[1] = src0->u64[1] << masked_count;
897 masked_count = src1->u[2] & 0x3f;
898 dst->u64[2] = src0->u64[2] << masked_count;
899 masked_count = src1->u[3] & 0x3f;
900 dst->u64[3] = src0->u64[3] << masked_count;
901 }
902
903 static void
904 micro_i64shr(union tgsi_double_channel *dst,
905 const union tgsi_double_channel *src0,
906 union tgsi_exec_channel *src1)
907 {
908 unsigned masked_count;
909 masked_count = src1->u[0] & 0x3f;
910 dst->i64[0] = src0->i64[0] >> masked_count;
911 masked_count = src1->u[1] & 0x3f;
912 dst->i64[1] = src0->i64[1] >> masked_count;
913 masked_count = src1->u[2] & 0x3f;
914 dst->i64[2] = src0->i64[2] >> masked_count;
915 masked_count = src1->u[3] & 0x3f;
916 dst->i64[3] = src0->i64[3] >> masked_count;
917 }
918
919 static void
920 micro_u64shr(union tgsi_double_channel *dst,
921 const union tgsi_double_channel *src0,
922 union tgsi_exec_channel *src1)
923 {
924 unsigned masked_count;
925 masked_count = src1->u[0] & 0x3f;
926 dst->u64[0] = src0->u64[0] >> masked_count;
927 masked_count = src1->u[1] & 0x3f;
928 dst->u64[1] = src0->u64[1] >> masked_count;
929 masked_count = src1->u[2] & 0x3f;
930 dst->u64[2] = src0->u64[2] >> masked_count;
931 masked_count = src1->u[3] & 0x3f;
932 dst->u64[3] = src0->u64[3] >> masked_count;
933 }
934
935 enum tgsi_exec_datatype {
936 TGSI_EXEC_DATA_FLOAT,
937 TGSI_EXEC_DATA_INT,
938 TGSI_EXEC_DATA_UINT,
939 TGSI_EXEC_DATA_DOUBLE,
940 TGSI_EXEC_DATA_INT64,
941 TGSI_EXEC_DATA_UINT64,
942 };
943
944 /*
945 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
946 */
947 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
948 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
949 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
950 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
951 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
952 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
953
954
955 /** The execution mask depends on the conditional mask and the loop mask */
956 #define UPDATE_EXEC_MASK(MACH) \
957 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
958
959
960 static const union tgsi_exec_channel ZeroVec =
961 { { 0.0, 0.0, 0.0, 0.0 } };
962
963 static const union tgsi_exec_channel OneVec = {
964 {1.0f, 1.0f, 1.0f, 1.0f}
965 };
966
967 static const union tgsi_exec_channel P128Vec = {
968 {128.0f, 128.0f, 128.0f, 128.0f}
969 };
970
971 static const union tgsi_exec_channel M128Vec = {
972 {-128.0f, -128.0f, -128.0f, -128.0f}
973 };
974
975
976 /**
977 * Assert that none of the float values in 'chan' are infinite or NaN.
978 * NaN and Inf may occur normally during program execution and should
979 * not lead to crashes, etc. But when debugging, it's helpful to catch
980 * them.
981 */
982 static inline void
983 check_inf_or_nan(const union tgsi_exec_channel *chan)
984 {
985 assert(!util_is_inf_or_nan((chan)->f[0]));
986 assert(!util_is_inf_or_nan((chan)->f[1]));
987 assert(!util_is_inf_or_nan((chan)->f[2]));
988 assert(!util_is_inf_or_nan((chan)->f[3]));
989 }
990
991
992 #ifdef DEBUG
993 static void
994 print_chan(const char *msg, const union tgsi_exec_channel *chan)
995 {
996 debug_printf("%s = {%f, %f, %f, %f}\n",
997 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
998 }
999 #endif
1000
1001
1002 #ifdef DEBUG
1003 static void
1004 print_temp(const struct tgsi_exec_machine *mach, uint index)
1005 {
1006 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1007 int i;
1008 debug_printf("Temp[%u] =\n", index);
1009 for (i = 0; i < 4; i++) {
1010 debug_printf(" %c: { %f, %f, %f, %f }\n",
1011 "XYZW"[i],
1012 tmp->xyzw[i].f[0],
1013 tmp->xyzw[i].f[1],
1014 tmp->xyzw[i].f[2],
1015 tmp->xyzw[i].f[3]);
1016 }
1017 }
1018 #endif
1019
1020
1021 void
1022 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1023 unsigned num_bufs,
1024 const void **bufs,
1025 const unsigned *buf_sizes)
1026 {
1027 unsigned i;
1028
1029 for (i = 0; i < num_bufs; i++) {
1030 mach->Consts[i] = bufs[i];
1031 mach->ConstsSize[i] = buf_sizes[i];
1032 }
1033 }
1034
1035
1036 /**
1037 * Check if there's a potential src/dst register data dependency when
1038 * using SOA execution.
1039 * Example:
1040 * MOV T, T.yxwz;
1041 * This would expand into:
1042 * MOV t0, t1;
1043 * MOV t1, t0;
1044 * MOV t2, t3;
1045 * MOV t3, t2;
1046 * The second instruction will have the wrong value for t0 if executed as-is.
1047 */
1048 boolean
1049 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
1050 {
1051 uint i, chan;
1052
1053 uint writemask = inst->Dst[0].Register.WriteMask;
1054 if (writemask == TGSI_WRITEMASK_X ||
1055 writemask == TGSI_WRITEMASK_Y ||
1056 writemask == TGSI_WRITEMASK_Z ||
1057 writemask == TGSI_WRITEMASK_W ||
1058 writemask == TGSI_WRITEMASK_NONE) {
1059 /* no chance of data dependency */
1060 return FALSE;
1061 }
1062
1063 /* loop over src regs */
1064 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1065 if ((inst->Src[i].Register.File ==
1066 inst->Dst[0].Register.File) &&
1067 ((inst->Src[i].Register.Index ==
1068 inst->Dst[0].Register.Index) ||
1069 inst->Src[i].Register.Indirect ||
1070 inst->Dst[0].Register.Indirect)) {
1071 /* loop over dest channels */
1072 uint channelsWritten = 0x0;
1073 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1074 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1075 /* check if we're reading a channel that's been written */
1076 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
1077 if (channelsWritten & (1 << swizzle)) {
1078 return TRUE;
1079 }
1080
1081 channelsWritten |= (1 << chan);
1082 }
1083 }
1084 }
1085 }
1086 return FALSE;
1087 }
1088
1089
1090 /**
1091 * Initialize machine state by expanding tokens to full instructions,
1092 * allocating temporary storage, setting up constants, etc.
1093 * After this, we can call tgsi_exec_machine_run() many times.
1094 */
1095 void
1096 tgsi_exec_machine_bind_shader(
1097 struct tgsi_exec_machine *mach,
1098 const struct tgsi_token *tokens,
1099 struct tgsi_sampler *sampler,
1100 struct tgsi_image *image,
1101 struct tgsi_buffer *buffer)
1102 {
1103 uint k;
1104 struct tgsi_parse_context parse;
1105 struct tgsi_full_instruction *instructions;
1106 struct tgsi_full_declaration *declarations;
1107 uint maxInstructions = 10, numInstructions = 0;
1108 uint maxDeclarations = 10, numDeclarations = 0;
1109
1110 #if 0
1111 tgsi_dump(tokens, 0);
1112 #endif
1113
1114 util_init_math();
1115
1116
1117 mach->Tokens = tokens;
1118 mach->Sampler = sampler;
1119 mach->Image = image;
1120 mach->Buffer = buffer;
1121
1122 if (!tokens) {
1123 /* unbind and free all */
1124 FREE(mach->Declarations);
1125 mach->Declarations = NULL;
1126 mach->NumDeclarations = 0;
1127
1128 FREE(mach->Instructions);
1129 mach->Instructions = NULL;
1130 mach->NumInstructions = 0;
1131
1132 return;
1133 }
1134
1135 k = tgsi_parse_init (&parse, mach->Tokens);
1136 if (k != TGSI_PARSE_OK) {
1137 debug_printf( "Problem parsing!\n" );
1138 return;
1139 }
1140
1141 mach->ImmLimit = 0;
1142 mach->NumOutputs = 0;
1143
1144 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1145 mach->SysSemanticToIndex[k] = -1;
1146
1147 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1148 !mach->UsedGeometryShader) {
1149 struct tgsi_exec_vector *inputs;
1150 struct tgsi_exec_vector *outputs;
1151
1152 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1153 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1154 16);
1155
1156 if (!inputs)
1157 return;
1158
1159 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1160 TGSI_MAX_TOTAL_VERTICES, 16);
1161
1162 if (!outputs) {
1163 align_free(inputs);
1164 return;
1165 }
1166
1167 align_free(mach->Inputs);
1168 align_free(mach->Outputs);
1169
1170 mach->Inputs = inputs;
1171 mach->Outputs = outputs;
1172 mach->UsedGeometryShader = TRUE;
1173 }
1174
1175 declarations = (struct tgsi_full_declaration *)
1176 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1177
1178 if (!declarations) {
1179 return;
1180 }
1181
1182 instructions = (struct tgsi_full_instruction *)
1183 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1184
1185 if (!instructions) {
1186 FREE( declarations );
1187 return;
1188 }
1189
1190 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1191 uint i;
1192
1193 tgsi_parse_token( &parse );
1194 switch( parse.FullToken.Token.Type ) {
1195 case TGSI_TOKEN_TYPE_DECLARATION:
1196 /* save expanded declaration */
1197 if (numDeclarations == maxDeclarations) {
1198 declarations = REALLOC(declarations,
1199 maxDeclarations
1200 * sizeof(struct tgsi_full_declaration),
1201 (maxDeclarations + 10)
1202 * sizeof(struct tgsi_full_declaration));
1203 maxDeclarations += 10;
1204 }
1205 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
1206 unsigned reg;
1207 for (reg = parse.FullToken.FullDeclaration.Range.First;
1208 reg <= parse.FullToken.FullDeclaration.Range.Last;
1209 ++reg) {
1210 ++mach->NumOutputs;
1211 }
1212 }
1213 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1214 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1215 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1216 }
1217
1218 memcpy(declarations + numDeclarations,
1219 &parse.FullToken.FullDeclaration,
1220 sizeof(declarations[0]));
1221 numDeclarations++;
1222 break;
1223
1224 case TGSI_TOKEN_TYPE_IMMEDIATE:
1225 {
1226 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1227 assert( size <= 4 );
1228 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
1229
1230 for( i = 0; i < size; i++ ) {
1231 mach->Imms[mach->ImmLimit][i] =
1232 parse.FullToken.FullImmediate.u[i].Float;
1233 }
1234 mach->ImmLimit += 1;
1235 }
1236 break;
1237
1238 case TGSI_TOKEN_TYPE_INSTRUCTION:
1239
1240 /* save expanded instruction */
1241 if (numInstructions == maxInstructions) {
1242 instructions = REALLOC(instructions,
1243 maxInstructions
1244 * sizeof(struct tgsi_full_instruction),
1245 (maxInstructions + 10)
1246 * sizeof(struct tgsi_full_instruction));
1247 maxInstructions += 10;
1248 }
1249
1250 memcpy(instructions + numInstructions,
1251 &parse.FullToken.FullInstruction,
1252 sizeof(instructions[0]));
1253
1254 numInstructions++;
1255 break;
1256
1257 case TGSI_TOKEN_TYPE_PROPERTY:
1258 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1259 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1260 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1261 }
1262 }
1263 break;
1264
1265 default:
1266 assert( 0 );
1267 }
1268 }
1269 tgsi_parse_free (&parse);
1270
1271 FREE(mach->Declarations);
1272 mach->Declarations = declarations;
1273 mach->NumDeclarations = numDeclarations;
1274
1275 FREE(mach->Instructions);
1276 mach->Instructions = instructions;
1277 mach->NumInstructions = numInstructions;
1278 }
1279
1280
1281 struct tgsi_exec_machine *
1282 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1283 {
1284 struct tgsi_exec_machine *mach;
1285 uint i;
1286
1287 mach = align_malloc( sizeof *mach, 16 );
1288 if (!mach)
1289 goto fail;
1290
1291 memset(mach, 0, sizeof(*mach));
1292
1293 mach->ShaderType = shader_type;
1294 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1295 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1296 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
1297
1298 if (shader_type != PIPE_SHADER_COMPUTE) {
1299 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1300 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1301 if (!mach->Inputs || !mach->Outputs)
1302 goto fail;
1303 }
1304
1305 /* Setup constants needed by the SSE2 executor. */
1306 for( i = 0; i < 4; i++ ) {
1307 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
1308 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
1309 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
1310 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
1311 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
1312 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
1313 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
1314 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
1315 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
1316 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
1317 }
1318
1319 #ifdef DEBUG
1320 /* silence warnings */
1321 (void) print_chan;
1322 (void) print_temp;
1323 #endif
1324
1325 return mach;
1326
1327 fail:
1328 if (mach) {
1329 align_free(mach->Inputs);
1330 align_free(mach->Outputs);
1331 align_free(mach);
1332 }
1333 return NULL;
1334 }
1335
1336
1337 void
1338 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1339 {
1340 if (mach) {
1341 FREE(mach->Instructions);
1342 FREE(mach->Declarations);
1343
1344 align_free(mach->Inputs);
1345 align_free(mach->Outputs);
1346
1347 align_free(mach);
1348 }
1349 }
1350
1351 static void
1352 micro_add(union tgsi_exec_channel *dst,
1353 const union tgsi_exec_channel *src0,
1354 const union tgsi_exec_channel *src1)
1355 {
1356 dst->f[0] = src0->f[0] + src1->f[0];
1357 dst->f[1] = src0->f[1] + src1->f[1];
1358 dst->f[2] = src0->f[2] + src1->f[2];
1359 dst->f[3] = src0->f[3] + src1->f[3];
1360 }
1361
1362 static void
1363 micro_div(
1364 union tgsi_exec_channel *dst,
1365 const union tgsi_exec_channel *src0,
1366 const union tgsi_exec_channel *src1 )
1367 {
1368 if (src1->f[0] != 0) {
1369 dst->f[0] = src0->f[0] / src1->f[0];
1370 }
1371 if (src1->f[1] != 0) {
1372 dst->f[1] = src0->f[1] / src1->f[1];
1373 }
1374 if (src1->f[2] != 0) {
1375 dst->f[2] = src0->f[2] / src1->f[2];
1376 }
1377 if (src1->f[3] != 0) {
1378 dst->f[3] = src0->f[3] / src1->f[3];
1379 }
1380 }
1381
1382 static void
1383 micro_lt(
1384 union tgsi_exec_channel *dst,
1385 const union tgsi_exec_channel *src0,
1386 const union tgsi_exec_channel *src1,
1387 const union tgsi_exec_channel *src2,
1388 const union tgsi_exec_channel *src3 )
1389 {
1390 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1391 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1392 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1393 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1394 }
1395
1396 static void
1397 micro_max(union tgsi_exec_channel *dst,
1398 const union tgsi_exec_channel *src0,
1399 const union tgsi_exec_channel *src1)
1400 {
1401 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1402 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1403 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1404 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1405 }
1406
1407 static void
1408 micro_min(union tgsi_exec_channel *dst,
1409 const union tgsi_exec_channel *src0,
1410 const union tgsi_exec_channel *src1)
1411 {
1412 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1413 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1414 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1415 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1416 }
1417
1418 static void
1419 micro_mul(union tgsi_exec_channel *dst,
1420 const union tgsi_exec_channel *src0,
1421 const union tgsi_exec_channel *src1)
1422 {
1423 dst->f[0] = src0->f[0] * src1->f[0];
1424 dst->f[1] = src0->f[1] * src1->f[1];
1425 dst->f[2] = src0->f[2] * src1->f[2];
1426 dst->f[3] = src0->f[3] * src1->f[3];
1427 }
1428
1429 static void
1430 micro_neg(
1431 union tgsi_exec_channel *dst,
1432 const union tgsi_exec_channel *src )
1433 {
1434 dst->f[0] = -src->f[0];
1435 dst->f[1] = -src->f[1];
1436 dst->f[2] = -src->f[2];
1437 dst->f[3] = -src->f[3];
1438 }
1439
1440 static void
1441 micro_pow(
1442 union tgsi_exec_channel *dst,
1443 const union tgsi_exec_channel *src0,
1444 const union tgsi_exec_channel *src1 )
1445 {
1446 #if FAST_MATH
1447 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1448 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1449 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1450 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1451 #else
1452 dst->f[0] = powf( src0->f[0], src1->f[0] );
1453 dst->f[1] = powf( src0->f[1], src1->f[1] );
1454 dst->f[2] = powf( src0->f[2], src1->f[2] );
1455 dst->f[3] = powf( src0->f[3], src1->f[3] );
1456 #endif
1457 }
1458
1459 static void
1460 micro_sub(union tgsi_exec_channel *dst,
1461 const union tgsi_exec_channel *src0,
1462 const union tgsi_exec_channel *src1)
1463 {
1464 dst->f[0] = src0->f[0] - src1->f[0];
1465 dst->f[1] = src0->f[1] - src1->f[1];
1466 dst->f[2] = src0->f[2] - src1->f[2];
1467 dst->f[3] = src0->f[3] - src1->f[3];
1468 }
1469
1470 static void
1471 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1472 const uint chan_index,
1473 const uint file,
1474 const uint swizzle,
1475 const union tgsi_exec_channel *index,
1476 const union tgsi_exec_channel *index2D,
1477 union tgsi_exec_channel *chan)
1478 {
1479 uint i;
1480
1481 assert(swizzle < 4);
1482
1483 switch (file) {
1484 case TGSI_FILE_CONSTANT:
1485 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1486 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1487 assert(mach->Consts[index2D->i[i]]);
1488
1489 if (index->i[i] < 0) {
1490 chan->u[i] = 0;
1491 } else {
1492 /* NOTE: copying the const value as a uint instead of float */
1493 const uint constbuf = index2D->i[i];
1494 const uint *buf = (const uint *)mach->Consts[constbuf];
1495 const int pos = index->i[i] * 4 + swizzle;
1496 /* const buffer bounds check */
1497 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1498 if (0) {
1499 /* Debug: print warning */
1500 static int count = 0;
1501 if (count++ < 100)
1502 debug_printf("TGSI Exec: const buffer index %d"
1503 " out of bounds\n", pos);
1504 }
1505 chan->u[i] = 0;
1506 }
1507 else
1508 chan->u[i] = buf[pos];
1509 }
1510 }
1511 break;
1512
1513 case TGSI_FILE_INPUT:
1514 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1515 /*
1516 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1517 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1518 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1519 index2D->i[i], index->i[i]);
1520 }*/
1521 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1522 assert(pos >= 0);
1523 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1524 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1525 }
1526 break;
1527
1528 case TGSI_FILE_SYSTEM_VALUE:
1529 /* XXX no swizzling at this point. Will be needed if we put
1530 * gl_FragCoord, for example, in a sys value register.
1531 */
1532 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1533 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1534 }
1535 break;
1536
1537 case TGSI_FILE_TEMPORARY:
1538 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1539 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1540 assert(index2D->i[i] == 0);
1541
1542 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1543 }
1544 break;
1545
1546 case TGSI_FILE_IMMEDIATE:
1547 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1548 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1549 assert(index2D->i[i] == 0);
1550
1551 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1552 }
1553 break;
1554
1555 case TGSI_FILE_ADDRESS:
1556 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1557 assert(index->i[i] >= 0);
1558 assert(index2D->i[i] == 0);
1559
1560 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1561 }
1562 break;
1563
1564 case TGSI_FILE_PREDICATE:
1565 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1566 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1567 assert(index2D->i[i] == 0);
1568
1569 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1570 }
1571 break;
1572
1573 case TGSI_FILE_OUTPUT:
1574 /* vertex/fragment output vars can be read too */
1575 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1576 assert(index->i[i] >= 0);
1577 assert(index2D->i[i] == 0);
1578
1579 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1580 }
1581 break;
1582
1583 default:
1584 assert(0);
1585 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1586 chan->u[i] = 0;
1587 }
1588 }
1589 }
1590
1591 static void
1592 fetch_source_d(const struct tgsi_exec_machine *mach,
1593 union tgsi_exec_channel *chan,
1594 const struct tgsi_full_src_register *reg,
1595 const uint chan_index,
1596 enum tgsi_exec_datatype src_datatype)
1597 {
1598 union tgsi_exec_channel index;
1599 union tgsi_exec_channel index2D;
1600 uint swizzle;
1601
1602 /* We start with a direct index into a register file.
1603 *
1604 * file[1],
1605 * where:
1606 * file = Register.File
1607 * [1] = Register.Index
1608 */
1609 index.i[0] =
1610 index.i[1] =
1611 index.i[2] =
1612 index.i[3] = reg->Register.Index;
1613
1614 /* There is an extra source register that indirectly subscripts
1615 * a register file. The direct index now becomes an offset
1616 * that is being added to the indirect register.
1617 *
1618 * file[ind[2].x+1],
1619 * where:
1620 * ind = Indirect.File
1621 * [2] = Indirect.Index
1622 * .x = Indirect.SwizzleX
1623 */
1624 if (reg->Register.Indirect) {
1625 union tgsi_exec_channel index2;
1626 union tgsi_exec_channel indir_index;
1627 const uint execmask = mach->ExecMask;
1628 uint i;
1629
1630 /* which address register (always zero now) */
1631 index2.i[0] =
1632 index2.i[1] =
1633 index2.i[2] =
1634 index2.i[3] = reg->Indirect.Index;
1635 /* get current value of address register[swizzle] */
1636 swizzle = reg->Indirect.Swizzle;
1637 fetch_src_file_channel(mach,
1638 chan_index,
1639 reg->Indirect.File,
1640 swizzle,
1641 &index2,
1642 &ZeroVec,
1643 &indir_index);
1644
1645 /* add value of address register to the offset */
1646 index.i[0] += indir_index.i[0];
1647 index.i[1] += indir_index.i[1];
1648 index.i[2] += indir_index.i[2];
1649 index.i[3] += indir_index.i[3];
1650
1651 /* for disabled execution channels, zero-out the index to
1652 * avoid using a potential garbage value.
1653 */
1654 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1655 if ((execmask & (1 << i)) == 0)
1656 index.i[i] = 0;
1657 }
1658 }
1659
1660 /* There is an extra source register that is a second
1661 * subscript to a register file. Effectively it means that
1662 * the register file is actually a 2D array of registers.
1663 *
1664 * file[3][1],
1665 * where:
1666 * [3] = Dimension.Index
1667 */
1668 if (reg->Register.Dimension) {
1669 index2D.i[0] =
1670 index2D.i[1] =
1671 index2D.i[2] =
1672 index2D.i[3] = reg->Dimension.Index;
1673
1674 /* Again, the second subscript index can be addressed indirectly
1675 * identically to the first one.
1676 * Nothing stops us from indirectly addressing the indirect register,
1677 * but there is no need for that, so we won't exercise it.
1678 *
1679 * file[ind[4].y+3][1],
1680 * where:
1681 * ind = DimIndirect.File
1682 * [4] = DimIndirect.Index
1683 * .y = DimIndirect.SwizzleX
1684 */
1685 if (reg->Dimension.Indirect) {
1686 union tgsi_exec_channel index2;
1687 union tgsi_exec_channel indir_index;
1688 const uint execmask = mach->ExecMask;
1689 uint i;
1690
1691 index2.i[0] =
1692 index2.i[1] =
1693 index2.i[2] =
1694 index2.i[3] = reg->DimIndirect.Index;
1695
1696 swizzle = reg->DimIndirect.Swizzle;
1697 fetch_src_file_channel(mach,
1698 chan_index,
1699 reg->DimIndirect.File,
1700 swizzle,
1701 &index2,
1702 &ZeroVec,
1703 &indir_index);
1704
1705 index2D.i[0] += indir_index.i[0];
1706 index2D.i[1] += indir_index.i[1];
1707 index2D.i[2] += indir_index.i[2];
1708 index2D.i[3] += indir_index.i[3];
1709
1710 /* for disabled execution channels, zero-out the index to
1711 * avoid using a potential garbage value.
1712 */
1713 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1714 if ((execmask & (1 << i)) == 0) {
1715 index2D.i[i] = 0;
1716 }
1717 }
1718 }
1719
1720 /* If by any chance there was a need for a 3D array of register
1721 * files, we would have to check whether Dimension is followed
1722 * by a dimension register and continue the saga.
1723 */
1724 } else {
1725 index2D.i[0] =
1726 index2D.i[1] =
1727 index2D.i[2] =
1728 index2D.i[3] = 0;
1729 }
1730
1731 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1732 fetch_src_file_channel(mach,
1733 chan_index,
1734 reg->Register.File,
1735 swizzle,
1736 &index,
1737 &index2D,
1738 chan);
1739 }
1740
1741 static void
1742 fetch_source(const struct tgsi_exec_machine *mach,
1743 union tgsi_exec_channel *chan,
1744 const struct tgsi_full_src_register *reg,
1745 const uint chan_index,
1746 enum tgsi_exec_datatype src_datatype)
1747 {
1748 fetch_source_d(mach, chan, reg, chan_index, src_datatype);
1749
1750 if (reg->Register.Absolute) {
1751 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1752 micro_abs(chan, chan);
1753 } else {
1754 micro_iabs(chan, chan);
1755 }
1756 }
1757
1758 if (reg->Register.Negate) {
1759 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1760 micro_neg(chan, chan);
1761 } else {
1762 micro_ineg(chan, chan);
1763 }
1764 }
1765 }
1766
1767 static union tgsi_exec_channel *
1768 store_dest_dstret(struct tgsi_exec_machine *mach,
1769 const union tgsi_exec_channel *chan,
1770 const struct tgsi_full_dst_register *reg,
1771 const struct tgsi_full_instruction *inst,
1772 uint chan_index,
1773 enum tgsi_exec_datatype dst_datatype)
1774 {
1775 uint i;
1776 static union tgsi_exec_channel null;
1777 union tgsi_exec_channel *dst;
1778 union tgsi_exec_channel index2D;
1779 uint execmask = mach->ExecMask;
1780 int offset = 0; /* indirection offset */
1781 int index;
1782
1783 /* for debugging */
1784 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1785 check_inf_or_nan(chan);
1786 }
1787
1788 /* There is an extra source register that indirectly subscripts
1789 * a register file. The direct index now becomes an offset
1790 * that is being added to the indirect register.
1791 *
1792 * file[ind[2].x+1],
1793 * where:
1794 * ind = Indirect.File
1795 * [2] = Indirect.Index
1796 * .x = Indirect.SwizzleX
1797 */
1798 if (reg->Register.Indirect) {
1799 union tgsi_exec_channel index;
1800 union tgsi_exec_channel indir_index;
1801 uint swizzle;
1802
1803 /* which address register (always zero for now) */
1804 index.i[0] =
1805 index.i[1] =
1806 index.i[2] =
1807 index.i[3] = reg->Indirect.Index;
1808
1809 /* get current value of address register[swizzle] */
1810 swizzle = reg->Indirect.Swizzle;
1811
1812 /* fetch values from the address/indirection register */
1813 fetch_src_file_channel(mach,
1814 chan_index,
1815 reg->Indirect.File,
1816 swizzle,
1817 &index,
1818 &ZeroVec,
1819 &indir_index);
1820
1821 /* save indirection offset */
1822 offset = indir_index.i[0];
1823 }
1824
1825 /* There is an extra source register that is a second
1826 * subscript to a register file. Effectively it means that
1827 * the register file is actually a 2D array of registers.
1828 *
1829 * file[3][1],
1830 * where:
1831 * [3] = Dimension.Index
1832 */
1833 if (reg->Register.Dimension) {
1834 index2D.i[0] =
1835 index2D.i[1] =
1836 index2D.i[2] =
1837 index2D.i[3] = reg->Dimension.Index;
1838
1839 /* Again, the second subscript index can be addressed indirectly
1840 * identically to the first one.
1841 * Nothing stops us from indirectly addressing the indirect register,
1842 * but there is no need for that, so we won't exercise it.
1843 *
1844 * file[ind[4].y+3][1],
1845 * where:
1846 * ind = DimIndirect.File
1847 * [4] = DimIndirect.Index
1848 * .y = DimIndirect.SwizzleX
1849 */
1850 if (reg->Dimension.Indirect) {
1851 union tgsi_exec_channel index2;
1852 union tgsi_exec_channel indir_index;
1853 const uint execmask = mach->ExecMask;
1854 unsigned swizzle;
1855 uint i;
1856
1857 index2.i[0] =
1858 index2.i[1] =
1859 index2.i[2] =
1860 index2.i[3] = reg->DimIndirect.Index;
1861
1862 swizzle = reg->DimIndirect.Swizzle;
1863 fetch_src_file_channel(mach,
1864 chan_index,
1865 reg->DimIndirect.File,
1866 swizzle,
1867 &index2,
1868 &ZeroVec,
1869 &indir_index);
1870
1871 index2D.i[0] += indir_index.i[0];
1872 index2D.i[1] += indir_index.i[1];
1873 index2D.i[2] += indir_index.i[2];
1874 index2D.i[3] += indir_index.i[3];
1875
1876 /* for disabled execution channels, zero-out the index to
1877 * avoid using a potential garbage value.
1878 */
1879 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1880 if ((execmask & (1 << i)) == 0) {
1881 index2D.i[i] = 0;
1882 }
1883 }
1884 }
1885
1886 /* If by any chance there was a need for a 3D array of register
1887 * files, we would have to check whether Dimension is followed
1888 * by a dimension register and continue the saga.
1889 */
1890 } else {
1891 index2D.i[0] =
1892 index2D.i[1] =
1893 index2D.i[2] =
1894 index2D.i[3] = 0;
1895 }
1896
1897 switch (reg->Register.File) {
1898 case TGSI_FILE_NULL:
1899 dst = &null;
1900 break;
1901
1902 case TGSI_FILE_OUTPUT:
1903 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1904 + reg->Register.Index;
1905 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1906 #if 0
1907 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1908 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1909 reg->Register.Index);
1910 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1911 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1912 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1913 if (execmask & (1 << i))
1914 debug_printf("%f, ", chan->f[i]);
1915 debug_printf(")\n");
1916 }
1917 #endif
1918 break;
1919
1920 case TGSI_FILE_TEMPORARY:
1921 index = reg->Register.Index;
1922 assert( index < TGSI_EXEC_NUM_TEMPS );
1923 dst = &mach->Temps[offset + index].xyzw[chan_index];
1924 break;
1925
1926 case TGSI_FILE_ADDRESS:
1927 index = reg->Register.Index;
1928 dst = &mach->Addrs[index].xyzw[chan_index];
1929 break;
1930
1931 case TGSI_FILE_PREDICATE:
1932 index = reg->Register.Index;
1933 assert(index < TGSI_EXEC_NUM_PREDS);
1934 dst = &mach->Predicates[index].xyzw[chan_index];
1935 break;
1936
1937 default:
1938 assert( 0 );
1939 return NULL;
1940 }
1941
1942 if (inst->Instruction.Predicate) {
1943 uint swizzle;
1944 union tgsi_exec_channel *pred;
1945
1946 switch (chan_index) {
1947 case TGSI_CHAN_X:
1948 swizzle = inst->Predicate.SwizzleX;
1949 break;
1950 case TGSI_CHAN_Y:
1951 swizzle = inst->Predicate.SwizzleY;
1952 break;
1953 case TGSI_CHAN_Z:
1954 swizzle = inst->Predicate.SwizzleZ;
1955 break;
1956 case TGSI_CHAN_W:
1957 swizzle = inst->Predicate.SwizzleW;
1958 break;
1959 default:
1960 assert(0);
1961 return NULL;
1962 }
1963
1964 assert(inst->Predicate.Index == 0);
1965
1966 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1967
1968 if (inst->Predicate.Negate) {
1969 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1970 if (pred->u[i]) {
1971 execmask &= ~(1 << i);
1972 }
1973 }
1974 } else {
1975 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1976 if (!pred->u[i]) {
1977 execmask &= ~(1 << i);
1978 }
1979 }
1980 }
1981 }
1982
1983 return dst;
1984 }
1985
1986 static void
1987 store_dest_double(struct tgsi_exec_machine *mach,
1988 const union tgsi_exec_channel *chan,
1989 const struct tgsi_full_dst_register *reg,
1990 const struct tgsi_full_instruction *inst,
1991 uint chan_index,
1992 enum tgsi_exec_datatype dst_datatype)
1993 {
1994 union tgsi_exec_channel *dst;
1995 const uint execmask = mach->ExecMask;
1996 int i;
1997
1998 dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
1999 dst_datatype);
2000 if (!dst)
2001 return;
2002
2003 /* doubles path */
2004 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2005 if (execmask & (1 << i))
2006 dst->i[i] = chan->i[i];
2007 }
2008
2009 static void
2010 store_dest(struct tgsi_exec_machine *mach,
2011 const union tgsi_exec_channel *chan,
2012 const struct tgsi_full_dst_register *reg,
2013 const struct tgsi_full_instruction *inst,
2014 uint chan_index,
2015 enum tgsi_exec_datatype dst_datatype)
2016 {
2017 union tgsi_exec_channel *dst;
2018 const uint execmask = mach->ExecMask;
2019 int i;
2020
2021 dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
2022 dst_datatype);
2023 if (!dst)
2024 return;
2025
2026 if (!inst->Instruction.Saturate) {
2027 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2028 if (execmask & (1 << i))
2029 dst->i[i] = chan->i[i];
2030 }
2031 else {
2032 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2033 if (execmask & (1 << i)) {
2034 if (chan->f[i] < 0.0f)
2035 dst->f[i] = 0.0f;
2036 else if (chan->f[i] > 1.0f)
2037 dst->f[i] = 1.0f;
2038 else
2039 dst->i[i] = chan->i[i];
2040 }
2041 }
2042 }
2043
2044 #define FETCH(VAL,INDEX,CHAN)\
2045 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
2046
2047 #define IFETCH(VAL,INDEX,CHAN)\
2048 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
2049
2050
2051 /**
2052 * Execute ARB-style KIL which is predicated by a src register.
2053 * Kill fragment if any of the four values is less than zero.
2054 */
2055 static void
2056 exec_kill_if(struct tgsi_exec_machine *mach,
2057 const struct tgsi_full_instruction *inst)
2058 {
2059 uint uniquemask;
2060 uint chan_index;
2061 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2062 union tgsi_exec_channel r[1];
2063
2064 /* This mask stores component bits that were already tested. */
2065 uniquemask = 0;
2066
2067 for (chan_index = 0; chan_index < 4; chan_index++)
2068 {
2069 uint swizzle;
2070 uint i;
2071
2072 /* unswizzle channel */
2073 swizzle = tgsi_util_get_full_src_register_swizzle (
2074 &inst->Src[0],
2075 chan_index);
2076
2077 /* check if the component has not been already tested */
2078 if (uniquemask & (1 << swizzle))
2079 continue;
2080 uniquemask |= 1 << swizzle;
2081
2082 FETCH(&r[0], 0, chan_index);
2083 for (i = 0; i < 4; i++)
2084 if (r[0].f[i] < 0.0f)
2085 kilmask |= 1 << i;
2086 }
2087
2088 /* restrict to fragments currently executing */
2089 kilmask &= mach->ExecMask;
2090
2091 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2092 }
2093
2094 /**
2095 * Unconditional fragment kill/discard.
2096 */
2097 static void
2098 exec_kill(struct tgsi_exec_machine *mach,
2099 const struct tgsi_full_instruction *inst)
2100 {
2101 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2102
2103 /* kill fragment for all fragments currently executing */
2104 kilmask = mach->ExecMask;
2105 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2106 }
2107
2108 static void
2109 emit_vertex(struct tgsi_exec_machine *mach)
2110 {
2111 /* FIXME: check for exec mask correctly
2112 unsigned i;
2113 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2114 if ((mach->ExecMask & (1 << i)))
2115 */
2116 if (mach->ExecMask) {
2117 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices)
2118 return;
2119
2120 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2121 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2122 }
2123 }
2124
2125 static void
2126 emit_primitive(struct tgsi_exec_machine *mach)
2127 {
2128 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
2129 /* FIXME: check for exec mask correctly
2130 unsigned i;
2131 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2132 if ((mach->ExecMask & (1 << i)))
2133 */
2134 if (mach->ExecMask) {
2135 ++(*prim_count);
2136 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2137 mach->Primitives[*prim_count] = 0;
2138 }
2139 }
2140
2141 static void
2142 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2143 {
2144 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2145 int emitted_verts =
2146 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
2147 if (emitted_verts) {
2148 emit_primitive(mach);
2149 }
2150 }
2151 }
2152
2153
2154 /*
2155 * Fetch four texture samples using STR texture coordinates.
2156 */
2157 static void
2158 fetch_texel( struct tgsi_sampler *sampler,
2159 const unsigned sview_idx,
2160 const unsigned sampler_idx,
2161 const union tgsi_exec_channel *s,
2162 const union tgsi_exec_channel *t,
2163 const union tgsi_exec_channel *p,
2164 const union tgsi_exec_channel *c0,
2165 const union tgsi_exec_channel *c1,
2166 float derivs[3][2][TGSI_QUAD_SIZE],
2167 const int8_t offset[3],
2168 enum tgsi_sampler_control control,
2169 union tgsi_exec_channel *r,
2170 union tgsi_exec_channel *g,
2171 union tgsi_exec_channel *b,
2172 union tgsi_exec_channel *a )
2173 {
2174 uint j;
2175 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2176
2177 /* FIXME: handle explicit derivs, offsets */
2178 sampler->get_samples(sampler, sview_idx, sampler_idx,
2179 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2180
2181 for (j = 0; j < 4; j++) {
2182 r->f[j] = rgba[0][j];
2183 g->f[j] = rgba[1][j];
2184 b->f[j] = rgba[2][j];
2185 a->f[j] = rgba[3][j];
2186 }
2187 }
2188
2189
2190 #define TEX_MODIFIER_NONE 0
2191 #define TEX_MODIFIER_PROJECTED 1
2192 #define TEX_MODIFIER_LOD_BIAS 2
2193 #define TEX_MODIFIER_EXPLICIT_LOD 3
2194 #define TEX_MODIFIER_LEVEL_ZERO 4
2195 #define TEX_MODIFIER_GATHER 5
2196
2197 /*
2198 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2199 */
2200 static void
2201 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2202 const struct tgsi_full_instruction *inst,
2203 int8_t offsets[3])
2204 {
2205 if (inst->Texture.NumOffsets == 1) {
2206 union tgsi_exec_channel index;
2207 union tgsi_exec_channel offset[3];
2208 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2209 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2210 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2211 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2212 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2213 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2214 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2215 offsets[0] = offset[0].i[0];
2216 offsets[1] = offset[1].i[0];
2217 offsets[2] = offset[2].i[0];
2218 } else {
2219 assert(inst->Texture.NumOffsets == 0);
2220 offsets[0] = offsets[1] = offsets[2] = 0;
2221 }
2222 }
2223
2224
2225 /*
2226 * Fetch dx and dy values for one channel (s, t or r).
2227 * Put dx values into one float array, dy values into another.
2228 */
2229 static void
2230 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2231 const struct tgsi_full_instruction *inst,
2232 unsigned regdsrcx,
2233 unsigned chan,
2234 float derivs[2][TGSI_QUAD_SIZE])
2235 {
2236 union tgsi_exec_channel d;
2237 FETCH(&d, regdsrcx, chan);
2238 derivs[0][0] = d.f[0];
2239 derivs[0][1] = d.f[1];
2240 derivs[0][2] = d.f[2];
2241 derivs[0][3] = d.f[3];
2242 FETCH(&d, regdsrcx + 1, chan);
2243 derivs[1][0] = d.f[0];
2244 derivs[1][1] = d.f[1];
2245 derivs[1][2] = d.f[2];
2246 derivs[1][3] = d.f[3];
2247 }
2248
2249 static uint
2250 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2251 const struct tgsi_full_instruction *inst,
2252 uint sampler)
2253 {
2254 uint unit = 0;
2255 int i;
2256 if (inst->Src[sampler].Register.Indirect) {
2257 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2258 union tgsi_exec_channel indir_index, index2;
2259 const uint execmask = mach->ExecMask;
2260 index2.i[0] =
2261 index2.i[1] =
2262 index2.i[2] =
2263 index2.i[3] = reg->Indirect.Index;
2264
2265 fetch_src_file_channel(mach,
2266 0,
2267 reg->Indirect.File,
2268 reg->Indirect.Swizzle,
2269 &index2,
2270 &ZeroVec,
2271 &indir_index);
2272 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2273 if (execmask & (1 << i)) {
2274 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2275 break;
2276 }
2277 }
2278
2279 } else {
2280 unit = inst->Src[sampler].Register.Index;
2281 }
2282 return unit;
2283 }
2284
2285 /*
2286 * execute a texture instruction.
2287 *
2288 * modifier is used to control the channel routing for the
2289 * instruction variants like proj, lod, and texture with lod bias.
2290 * sampler indicates which src register the sampler is contained in.
2291 */
2292 static void
2293 exec_tex(struct tgsi_exec_machine *mach,
2294 const struct tgsi_full_instruction *inst,
2295 uint modifier, uint sampler)
2296 {
2297 const union tgsi_exec_channel *args[5], *proj = NULL;
2298 union tgsi_exec_channel r[5];
2299 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2300 uint chan;
2301 uint unit;
2302 int8_t offsets[3];
2303 int dim, shadow_ref, i;
2304
2305 unit = fetch_sampler_unit(mach, inst, sampler);
2306 /* always fetch all 3 offsets, overkill but keeps code simple */
2307 fetch_texel_offsets(mach, inst, offsets);
2308
2309 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2310 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2311
2312 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2313 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2314
2315 assert(dim <= 4);
2316 if (shadow_ref >= 0)
2317 assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args));
2318
2319 /* fetch modifier to the last argument */
2320 if (modifier != TEX_MODIFIER_NONE) {
2321 const int last = ARRAY_SIZE(args) - 1;
2322
2323 /* fetch modifier from src0.w or src1.x */
2324 if (sampler == 1) {
2325 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2326 FETCH(&r[last], 0, TGSI_CHAN_W);
2327 }
2328 else {
2329 assert(shadow_ref != 4);
2330 FETCH(&r[last], 1, TGSI_CHAN_X);
2331 }
2332
2333 if (modifier != TEX_MODIFIER_PROJECTED) {
2334 args[last] = &r[last];
2335 }
2336 else {
2337 proj = &r[last];
2338 args[last] = &ZeroVec;
2339 }
2340
2341 /* point unused arguments to zero vector */
2342 for (i = dim; i < last; i++)
2343 args[i] = &ZeroVec;
2344
2345 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2346 control = TGSI_SAMPLER_LOD_EXPLICIT;
2347 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2348 control = TGSI_SAMPLER_LOD_BIAS;
2349 else if (modifier == TEX_MODIFIER_GATHER)
2350 control = TGSI_SAMPLER_GATHER;
2351 }
2352 else {
2353 for (i = dim; i < ARRAY_SIZE(args); i++)
2354 args[i] = &ZeroVec;
2355 }
2356
2357 /* fetch coordinates */
2358 for (i = 0; i < dim; i++) {
2359 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2360
2361 if (proj)
2362 micro_div(&r[i], &r[i], proj);
2363
2364 args[i] = &r[i];
2365 }
2366
2367 /* fetch reference value */
2368 if (shadow_ref >= 0) {
2369 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2370
2371 if (proj)
2372 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2373
2374 args[shadow_ref] = &r[shadow_ref];
2375 }
2376
2377 fetch_texel(mach->Sampler, unit, unit,
2378 args[0], args[1], args[2], args[3], args[4],
2379 NULL, offsets, control,
2380 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2381
2382 #if 0
2383 debug_printf("fetch r: %g %g %g %g\n",
2384 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2385 debug_printf("fetch g: %g %g %g %g\n",
2386 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2387 debug_printf("fetch b: %g %g %g %g\n",
2388 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2389 debug_printf("fetch a: %g %g %g %g\n",
2390 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2391 #endif
2392
2393 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2394 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2395 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2396 }
2397 }
2398 }
2399
2400 static void
2401 exec_lodq(struct tgsi_exec_machine *mach,
2402 const struct tgsi_full_instruction *inst)
2403 {
2404 uint unit;
2405 int dim;
2406 int i;
2407 union tgsi_exec_channel coords[4];
2408 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2409 union tgsi_exec_channel r[2];
2410
2411 unit = fetch_sampler_unit(mach, inst, 1);
2412 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2413 assert(dim <= ARRAY_SIZE(coords));
2414 /* fetch coordinates */
2415 for (i = 0; i < dim; i++) {
2416 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2417 args[i] = &coords[i];
2418 }
2419 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2420 args[i] = &ZeroVec;
2421 }
2422 mach->Sampler->query_lod(mach->Sampler, unit, unit,
2423 args[0]->f,
2424 args[1]->f,
2425 args[2]->f,
2426 args[3]->f,
2427 TGSI_SAMPLER_LOD_NONE,
2428 r[0].f,
2429 r[1].f);
2430
2431 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2432 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2433 TGSI_EXEC_DATA_FLOAT);
2434 }
2435 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2436 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2437 TGSI_EXEC_DATA_FLOAT);
2438 }
2439 }
2440
2441 static void
2442 exec_txd(struct tgsi_exec_machine *mach,
2443 const struct tgsi_full_instruction *inst)
2444 {
2445 union tgsi_exec_channel r[4];
2446 float derivs[3][2][TGSI_QUAD_SIZE];
2447 uint chan;
2448 uint unit;
2449 int8_t offsets[3];
2450
2451 unit = fetch_sampler_unit(mach, inst, 3);
2452 /* always fetch all 3 offsets, overkill but keeps code simple */
2453 fetch_texel_offsets(mach, inst, offsets);
2454
2455 switch (inst->Texture.Texture) {
2456 case TGSI_TEXTURE_1D:
2457 FETCH(&r[0], 0, TGSI_CHAN_X);
2458
2459 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2460
2461 fetch_texel(mach->Sampler, unit, unit,
2462 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2463 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2464 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2465 break;
2466
2467 case TGSI_TEXTURE_SHADOW1D:
2468 case TGSI_TEXTURE_1D_ARRAY:
2469 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2470 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2471 FETCH(&r[0], 0, TGSI_CHAN_X);
2472 FETCH(&r[1], 0, TGSI_CHAN_Y);
2473 FETCH(&r[2], 0, TGSI_CHAN_Z);
2474
2475 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2476
2477 fetch_texel(mach->Sampler, unit, unit,
2478 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2479 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2480 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2481 break;
2482
2483 case TGSI_TEXTURE_2D:
2484 case TGSI_TEXTURE_RECT:
2485 FETCH(&r[0], 0, TGSI_CHAN_X);
2486 FETCH(&r[1], 0, TGSI_CHAN_Y);
2487
2488 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2489 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2490
2491 fetch_texel(mach->Sampler, unit, unit,
2492 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2493 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2494 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2495 break;
2496
2497
2498 case TGSI_TEXTURE_SHADOW2D:
2499 case TGSI_TEXTURE_SHADOWRECT:
2500 case TGSI_TEXTURE_2D_ARRAY:
2501 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2502 /* only SHADOW2D_ARRAY actually needs W */
2503 FETCH(&r[0], 0, TGSI_CHAN_X);
2504 FETCH(&r[1], 0, TGSI_CHAN_Y);
2505 FETCH(&r[2], 0, TGSI_CHAN_Z);
2506 FETCH(&r[3], 0, TGSI_CHAN_W);
2507
2508 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2509 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2510
2511 fetch_texel(mach->Sampler, unit, unit,
2512 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2513 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2514 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2515 break;
2516
2517 case TGSI_TEXTURE_3D:
2518 case TGSI_TEXTURE_CUBE:
2519 case TGSI_TEXTURE_CUBE_ARRAY:
2520 case TGSI_TEXTURE_SHADOWCUBE:
2521 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2522 FETCH(&r[0], 0, TGSI_CHAN_X);
2523 FETCH(&r[1], 0, TGSI_CHAN_Y);
2524 FETCH(&r[2], 0, TGSI_CHAN_Z);
2525 FETCH(&r[3], 0, TGSI_CHAN_W);
2526
2527 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2528 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2529 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2530
2531 fetch_texel(mach->Sampler, unit, unit,
2532 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2533 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2534 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2535 break;
2536
2537 default:
2538 assert(0);
2539 }
2540
2541 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2542 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2543 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2544 }
2545 }
2546 }
2547
2548
2549 static void
2550 exec_txf(struct tgsi_exec_machine *mach,
2551 const struct tgsi_full_instruction *inst)
2552 {
2553 union tgsi_exec_channel r[4];
2554 uint chan;
2555 uint unit;
2556 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2557 int j;
2558 int8_t offsets[3];
2559 unsigned target;
2560
2561 unit = fetch_sampler_unit(mach, inst, 1);
2562 /* always fetch all 3 offsets, overkill but keeps code simple */
2563 fetch_texel_offsets(mach, inst, offsets);
2564
2565 IFETCH(&r[3], 0, TGSI_CHAN_W);
2566
2567 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2568 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2569 target = mach->SamplerViews[unit].Resource;
2570 }
2571 else {
2572 target = inst->Texture.Texture;
2573 }
2574 switch(target) {
2575 case TGSI_TEXTURE_3D:
2576 case TGSI_TEXTURE_2D_ARRAY:
2577 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2578 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2579 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2580 /* fallthrough */
2581 case TGSI_TEXTURE_2D:
2582 case TGSI_TEXTURE_RECT:
2583 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2584 case TGSI_TEXTURE_SHADOW2D:
2585 case TGSI_TEXTURE_SHADOWRECT:
2586 case TGSI_TEXTURE_1D_ARRAY:
2587 case TGSI_TEXTURE_2D_MSAA:
2588 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2589 /* fallthrough */
2590 case TGSI_TEXTURE_BUFFER:
2591 case TGSI_TEXTURE_1D:
2592 case TGSI_TEXTURE_SHADOW1D:
2593 IFETCH(&r[0], 0, TGSI_CHAN_X);
2594 break;
2595 default:
2596 assert(0);
2597 break;
2598 }
2599
2600 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2601 offsets, rgba);
2602
2603 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2604 r[0].f[j] = rgba[0][j];
2605 r[1].f[j] = rgba[1][j];
2606 r[2].f[j] = rgba[2][j];
2607 r[3].f[j] = rgba[3][j];
2608 }
2609
2610 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2611 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2612 unsigned char swizzles[4];
2613 swizzles[0] = inst->Src[1].Register.SwizzleX;
2614 swizzles[1] = inst->Src[1].Register.SwizzleY;
2615 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2616 swizzles[3] = inst->Src[1].Register.SwizzleW;
2617
2618 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2619 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2620 store_dest(mach, &r[swizzles[chan]],
2621 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2622 }
2623 }
2624 }
2625 else {
2626 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2627 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2628 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2629 }
2630 }
2631 }
2632 }
2633
2634 static void
2635 exec_txq(struct tgsi_exec_machine *mach,
2636 const struct tgsi_full_instruction *inst)
2637 {
2638 int result[4];
2639 union tgsi_exec_channel r[4], src;
2640 uint chan;
2641 uint unit;
2642 int i,j;
2643
2644 unit = fetch_sampler_unit(mach, inst, 1);
2645
2646 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2647
2648 /* XXX: This interface can't return per-pixel values */
2649 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2650
2651 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2652 for (j = 0; j < 4; j++) {
2653 r[j].i[i] = result[j];
2654 }
2655 }
2656
2657 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2658 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2659 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2660 TGSI_EXEC_DATA_INT);
2661 }
2662 }
2663 }
2664
2665 static void
2666 exec_sample(struct tgsi_exec_machine *mach,
2667 const struct tgsi_full_instruction *inst,
2668 uint modifier, boolean compare)
2669 {
2670 const uint resource_unit = inst->Src[1].Register.Index;
2671 const uint sampler_unit = inst->Src[2].Register.Index;
2672 union tgsi_exec_channel r[5], c1;
2673 const union tgsi_exec_channel *lod = &ZeroVec;
2674 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2675 uint chan;
2676 unsigned char swizzles[4];
2677 int8_t offsets[3];
2678
2679 /* always fetch all 3 offsets, overkill but keeps code simple */
2680 fetch_texel_offsets(mach, inst, offsets);
2681
2682 assert(modifier != TEX_MODIFIER_PROJECTED);
2683
2684 if (modifier != TEX_MODIFIER_NONE) {
2685 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2686 FETCH(&c1, 3, TGSI_CHAN_X);
2687 lod = &c1;
2688 control = TGSI_SAMPLER_LOD_BIAS;
2689 }
2690 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2691 FETCH(&c1, 3, TGSI_CHAN_X);
2692 lod = &c1;
2693 control = TGSI_SAMPLER_LOD_EXPLICIT;
2694 }
2695 else {
2696 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2697 control = TGSI_SAMPLER_LOD_ZERO;
2698 }
2699 }
2700
2701 FETCH(&r[0], 0, TGSI_CHAN_X);
2702
2703 switch (mach->SamplerViews[resource_unit].Resource) {
2704 case TGSI_TEXTURE_1D:
2705 if (compare) {
2706 FETCH(&r[2], 3, TGSI_CHAN_X);
2707 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2708 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2709 NULL, offsets, control,
2710 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2711 }
2712 else {
2713 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2714 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2715 NULL, offsets, control,
2716 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2717 }
2718 break;
2719
2720 case TGSI_TEXTURE_1D_ARRAY:
2721 case TGSI_TEXTURE_2D:
2722 case TGSI_TEXTURE_RECT:
2723 FETCH(&r[1], 0, TGSI_CHAN_Y);
2724 if (compare) {
2725 FETCH(&r[2], 3, TGSI_CHAN_X);
2726 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2727 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2728 NULL, offsets, control,
2729 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2730 }
2731 else {
2732 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2733 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2734 NULL, offsets, control,
2735 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2736 }
2737 break;
2738
2739 case TGSI_TEXTURE_2D_ARRAY:
2740 case TGSI_TEXTURE_3D:
2741 case TGSI_TEXTURE_CUBE:
2742 FETCH(&r[1], 0, TGSI_CHAN_Y);
2743 FETCH(&r[2], 0, TGSI_CHAN_Z);
2744 if(compare) {
2745 FETCH(&r[3], 3, TGSI_CHAN_X);
2746 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2747 &r[0], &r[1], &r[2], &r[3], lod,
2748 NULL, offsets, control,
2749 &r[0], &r[1], &r[2], &r[3]);
2750 }
2751 else {
2752 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2753 &r[0], &r[1], &r[2], &ZeroVec, lod,
2754 NULL, offsets, control,
2755 &r[0], &r[1], &r[2], &r[3]);
2756 }
2757 break;
2758
2759 case TGSI_TEXTURE_CUBE_ARRAY:
2760 FETCH(&r[1], 0, TGSI_CHAN_Y);
2761 FETCH(&r[2], 0, TGSI_CHAN_Z);
2762 FETCH(&r[3], 0, TGSI_CHAN_W);
2763 if(compare) {
2764 FETCH(&r[4], 3, TGSI_CHAN_X);
2765 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2766 &r[0], &r[1], &r[2], &r[3], &r[4],
2767 NULL, offsets, control,
2768 &r[0], &r[1], &r[2], &r[3]);
2769 }
2770 else {
2771 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2772 &r[0], &r[1], &r[2], &r[3], lod,
2773 NULL, offsets, control,
2774 &r[0], &r[1], &r[2], &r[3]);
2775 }
2776 break;
2777
2778
2779 default:
2780 assert(0);
2781 }
2782
2783 swizzles[0] = inst->Src[1].Register.SwizzleX;
2784 swizzles[1] = inst->Src[1].Register.SwizzleY;
2785 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2786 swizzles[3] = inst->Src[1].Register.SwizzleW;
2787
2788 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2789 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2790 store_dest(mach, &r[swizzles[chan]],
2791 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2792 }
2793 }
2794 }
2795
2796 static void
2797 exec_sample_d(struct tgsi_exec_machine *mach,
2798 const struct tgsi_full_instruction *inst)
2799 {
2800 const uint resource_unit = inst->Src[1].Register.Index;
2801 const uint sampler_unit = inst->Src[2].Register.Index;
2802 union tgsi_exec_channel r[4];
2803 float derivs[3][2][TGSI_QUAD_SIZE];
2804 uint chan;
2805 unsigned char swizzles[4];
2806 int8_t offsets[3];
2807
2808 /* always fetch all 3 offsets, overkill but keeps code simple */
2809 fetch_texel_offsets(mach, inst, offsets);
2810
2811 FETCH(&r[0], 0, TGSI_CHAN_X);
2812
2813 switch (mach->SamplerViews[resource_unit].Resource) {
2814 case TGSI_TEXTURE_1D:
2815 case TGSI_TEXTURE_1D_ARRAY:
2816 /* only 1D array actually needs Y */
2817 FETCH(&r[1], 0, TGSI_CHAN_Y);
2818
2819 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2820
2821 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2822 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2823 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2824 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2825 break;
2826
2827 case TGSI_TEXTURE_2D:
2828 case TGSI_TEXTURE_RECT:
2829 case TGSI_TEXTURE_2D_ARRAY:
2830 /* only 2D array actually needs Z */
2831 FETCH(&r[1], 0, TGSI_CHAN_Y);
2832 FETCH(&r[2], 0, TGSI_CHAN_Z);
2833
2834 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2835 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2836
2837 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2838 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2839 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2840 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2841 break;
2842
2843 case TGSI_TEXTURE_3D:
2844 case TGSI_TEXTURE_CUBE:
2845 case TGSI_TEXTURE_CUBE_ARRAY:
2846 /* only cube array actually needs W */
2847 FETCH(&r[1], 0, TGSI_CHAN_Y);
2848 FETCH(&r[2], 0, TGSI_CHAN_Z);
2849 FETCH(&r[3], 0, TGSI_CHAN_W);
2850
2851 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2852 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2853 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2854
2855 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2856 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2857 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2858 &r[0], &r[1], &r[2], &r[3]);
2859 break;
2860
2861 default:
2862 assert(0);
2863 }
2864
2865 swizzles[0] = inst->Src[1].Register.SwizzleX;
2866 swizzles[1] = inst->Src[1].Register.SwizzleY;
2867 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2868 swizzles[3] = inst->Src[1].Register.SwizzleW;
2869
2870 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2871 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2872 store_dest(mach, &r[swizzles[chan]],
2873 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2874 }
2875 }
2876 }
2877
2878
2879 /**
2880 * Evaluate a constant-valued coefficient at the position of the
2881 * current quad.
2882 */
2883 static void
2884 eval_constant_coef(
2885 struct tgsi_exec_machine *mach,
2886 unsigned attrib,
2887 unsigned chan )
2888 {
2889 unsigned i;
2890
2891 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2892 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2893 }
2894 }
2895
2896 /**
2897 * Evaluate a linear-valued coefficient at the position of the
2898 * current quad.
2899 */
2900 static void
2901 eval_linear_coef(
2902 struct tgsi_exec_machine *mach,
2903 unsigned attrib,
2904 unsigned chan )
2905 {
2906 const float x = mach->QuadPos.xyzw[0].f[0];
2907 const float y = mach->QuadPos.xyzw[1].f[0];
2908 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2909 const float dady = mach->InterpCoefs[attrib].dady[chan];
2910 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2911 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2912 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2913 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2914 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2915 }
2916
2917 /**
2918 * Evaluate a perspective-valued coefficient at the position of the
2919 * current quad.
2920 */
2921 static void
2922 eval_perspective_coef(
2923 struct tgsi_exec_machine *mach,
2924 unsigned attrib,
2925 unsigned chan )
2926 {
2927 const float x = mach->QuadPos.xyzw[0].f[0];
2928 const float y = mach->QuadPos.xyzw[1].f[0];
2929 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2930 const float dady = mach->InterpCoefs[attrib].dady[chan];
2931 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2932 const float *w = mach->QuadPos.xyzw[3].f;
2933 /* divide by W here */
2934 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2935 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2936 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2937 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2938 }
2939
2940
2941 typedef void (* eval_coef_func)(
2942 struct tgsi_exec_machine *mach,
2943 unsigned attrib,
2944 unsigned chan );
2945
2946 static void
2947 exec_declaration(struct tgsi_exec_machine *mach,
2948 const struct tgsi_full_declaration *decl)
2949 {
2950 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2951 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2952 return;
2953 }
2954
2955 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2956 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2957 uint first, last, mask;
2958
2959 first = decl->Range.First;
2960 last = decl->Range.Last;
2961 mask = decl->Declaration.UsageMask;
2962
2963 /* XXX we could remove this special-case code since
2964 * mach->InterpCoefs[first].a0 should already have the
2965 * front/back-face value. But we should first update the
2966 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2967 * Then, we could remove the tgsi_exec_machine::Face field.
2968 */
2969 /* XXX make FACE a system value */
2970 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2971 uint i;
2972
2973 assert(decl->Semantic.Index == 0);
2974 assert(first == last);
2975
2976 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2977 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2978 }
2979 } else {
2980 eval_coef_func eval;
2981 uint i, j;
2982
2983 switch (decl->Interp.Interpolate) {
2984 case TGSI_INTERPOLATE_CONSTANT:
2985 eval = eval_constant_coef;
2986 break;
2987
2988 case TGSI_INTERPOLATE_LINEAR:
2989 eval = eval_linear_coef;
2990 break;
2991
2992 case TGSI_INTERPOLATE_PERSPECTIVE:
2993 eval = eval_perspective_coef;
2994 break;
2995
2996 case TGSI_INTERPOLATE_COLOR:
2997 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2998 break;
2999
3000 default:
3001 assert(0);
3002 return;
3003 }
3004
3005 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3006 if (mask & (1 << j)) {
3007 for (i = first; i <= last; i++) {
3008 eval(mach, i, j);
3009 }
3010 }
3011 }
3012 }
3013
3014 if (DEBUG_EXECUTION) {
3015 uint i, j;
3016 for (i = first; i <= last; ++i) {
3017 debug_printf("IN[%2u] = ", i);
3018 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3019 if (j > 0) {
3020 debug_printf(" ");
3021 }
3022 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3023 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3024 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3025 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3026 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3027 }
3028 }
3029 }
3030 }
3031 }
3032
3033 }
3034
3035 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3036 const union tgsi_exec_channel *src);
3037
3038 static void
3039 exec_scalar_unary(struct tgsi_exec_machine *mach,
3040 const struct tgsi_full_instruction *inst,
3041 micro_unary_op op,
3042 enum tgsi_exec_datatype dst_datatype,
3043 enum tgsi_exec_datatype src_datatype)
3044 {
3045 unsigned int chan;
3046 union tgsi_exec_channel src;
3047 union tgsi_exec_channel dst;
3048
3049 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3050 op(&dst, &src);
3051 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3052 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3053 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3054 }
3055 }
3056 }
3057
3058 static void
3059 exec_vector_unary(struct tgsi_exec_machine *mach,
3060 const struct tgsi_full_instruction *inst,
3061 micro_unary_op op,
3062 enum tgsi_exec_datatype dst_datatype,
3063 enum tgsi_exec_datatype src_datatype)
3064 {
3065 unsigned int chan;
3066 struct tgsi_exec_vector dst;
3067
3068 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3069 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3070 union tgsi_exec_channel src;
3071
3072 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3073 op(&dst.xyzw[chan], &src);
3074 }
3075 }
3076 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3077 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3078 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3079 }
3080 }
3081 }
3082
3083 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3084 const union tgsi_exec_channel *src0,
3085 const union tgsi_exec_channel *src1);
3086
3087 static void
3088 exec_scalar_binary(struct tgsi_exec_machine *mach,
3089 const struct tgsi_full_instruction *inst,
3090 micro_binary_op op,
3091 enum tgsi_exec_datatype dst_datatype,
3092 enum tgsi_exec_datatype src_datatype)
3093 {
3094 unsigned int chan;
3095 union tgsi_exec_channel src[2];
3096 union tgsi_exec_channel dst;
3097
3098 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3099 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3100 op(&dst, &src[0], &src[1]);
3101 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3102 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3103 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3104 }
3105 }
3106 }
3107
3108 static void
3109 exec_vector_binary(struct tgsi_exec_machine *mach,
3110 const struct tgsi_full_instruction *inst,
3111 micro_binary_op op,
3112 enum tgsi_exec_datatype dst_datatype,
3113 enum tgsi_exec_datatype src_datatype)
3114 {
3115 unsigned int chan;
3116 struct tgsi_exec_vector dst;
3117
3118 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3119 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3120 union tgsi_exec_channel src[2];
3121
3122 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3123 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3124 op(&dst.xyzw[chan], &src[0], &src[1]);
3125 }
3126 }
3127 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3128 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3129 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3130 }
3131 }
3132 }
3133
3134 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3135 const union tgsi_exec_channel *src0,
3136 const union tgsi_exec_channel *src1,
3137 const union tgsi_exec_channel *src2);
3138
3139 static void
3140 exec_vector_trinary(struct tgsi_exec_machine *mach,
3141 const struct tgsi_full_instruction *inst,
3142 micro_trinary_op op,
3143 enum tgsi_exec_datatype dst_datatype,
3144 enum tgsi_exec_datatype src_datatype)
3145 {
3146 unsigned int chan;
3147 struct tgsi_exec_vector dst;
3148
3149 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3150 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3151 union tgsi_exec_channel src[3];
3152
3153 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3154 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3155 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3156 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3157 }
3158 }
3159 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3160 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3161 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3162 }
3163 }
3164 }
3165
3166 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3167 const union tgsi_exec_channel *src0,
3168 const union tgsi_exec_channel *src1,
3169 const union tgsi_exec_channel *src2,
3170 const union tgsi_exec_channel *src3);
3171
3172 static void
3173 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3174 const struct tgsi_full_instruction *inst,
3175 micro_quaternary_op op,
3176 enum tgsi_exec_datatype dst_datatype,
3177 enum tgsi_exec_datatype src_datatype)
3178 {
3179 unsigned int chan;
3180 struct tgsi_exec_vector dst;
3181
3182 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3183 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3184 union tgsi_exec_channel src[4];
3185
3186 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3187 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3188 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3189 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3190 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3191 }
3192 }
3193 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3194 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3195 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3196 }
3197 }
3198 }
3199
3200 static void
3201 exec_dp3(struct tgsi_exec_machine *mach,
3202 const struct tgsi_full_instruction *inst)
3203 {
3204 unsigned int chan;
3205 union tgsi_exec_channel arg[3];
3206
3207 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3208 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3209 micro_mul(&arg[2], &arg[0], &arg[1]);
3210
3211 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3212 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3213 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3214 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3215 }
3216
3217 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3218 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3219 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3220 }
3221 }
3222 }
3223
3224 static void
3225 exec_dp4(struct tgsi_exec_machine *mach,
3226 const struct tgsi_full_instruction *inst)
3227 {
3228 unsigned int chan;
3229 union tgsi_exec_channel arg[3];
3230
3231 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3232 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3233 micro_mul(&arg[2], &arg[0], &arg[1]);
3234
3235 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3236 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3237 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3238 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3239 }
3240
3241 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3242 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3243 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3244 }
3245 }
3246 }
3247
3248 static void
3249 exec_dp2a(struct tgsi_exec_machine *mach,
3250 const struct tgsi_full_instruction *inst)
3251 {
3252 unsigned int chan;
3253 union tgsi_exec_channel arg[3];
3254
3255 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3256 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3257 micro_mul(&arg[2], &arg[0], &arg[1]);
3258
3259 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3260 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3261 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3262
3263 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3264 micro_add(&arg[0], &arg[0], &arg[1]);
3265
3266 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3267 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3268 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3269 }
3270 }
3271 }
3272
3273 static void
3274 exec_dph(struct tgsi_exec_machine *mach,
3275 const struct tgsi_full_instruction *inst)
3276 {
3277 unsigned int chan;
3278 union tgsi_exec_channel arg[3];
3279
3280 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3281 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3282 micro_mul(&arg[2], &arg[0], &arg[1]);
3283
3284 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3285 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3286 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3287
3288 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3289 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3290 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3291
3292 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3293 micro_add(&arg[0], &arg[0], &arg[1]);
3294
3295 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3296 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3297 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3298 }
3299 }
3300 }
3301
3302 static void
3303 exec_dp2(struct tgsi_exec_machine *mach,
3304 const struct tgsi_full_instruction *inst)
3305 {
3306 unsigned int chan;
3307 union tgsi_exec_channel arg[3];
3308
3309 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3310 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3311 micro_mul(&arg[2], &arg[0], &arg[1]);
3312
3313 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3314 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3315 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3316
3317 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3318 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3319 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3320 }
3321 }
3322 }
3323
3324 static void
3325 exec_pk2h(struct tgsi_exec_machine *mach,
3326 const struct tgsi_full_instruction *inst)
3327 {
3328 unsigned chan;
3329 union tgsi_exec_channel arg[2], dst;
3330
3331 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3332 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3333 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3334 dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
3335 (util_float_to_half(arg[1].f[chan]) << 16);
3336 }
3337 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3338 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3339 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3340 }
3341 }
3342 }
3343
3344 static void
3345 exec_up2h(struct tgsi_exec_machine *mach,
3346 const struct tgsi_full_instruction *inst)
3347 {
3348 unsigned chan;
3349 union tgsi_exec_channel arg, dst[2];
3350
3351 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3352 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3353 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
3354 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
3355 }
3356 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3357 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3358 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3359 }
3360 }
3361 }
3362
3363 static void
3364 exec_scs(struct tgsi_exec_machine *mach,
3365 const struct tgsi_full_instruction *inst)
3366 {
3367 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
3368 union tgsi_exec_channel arg;
3369 union tgsi_exec_channel result;
3370
3371 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3372
3373 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3374 micro_cos(&result, &arg);
3375 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3376 }
3377 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3378 micro_sin(&result, &arg);
3379 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3380 }
3381 }
3382 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3383 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3384 }
3385 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3386 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3387 }
3388 }
3389
3390 static void
3391 exec_xpd(struct tgsi_exec_machine *mach,
3392 const struct tgsi_full_instruction *inst)
3393 {
3394 union tgsi_exec_channel r[6];
3395 union tgsi_exec_channel d[3];
3396
3397 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3398 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3399
3400 micro_mul(&r[2], &r[0], &r[1]);
3401
3402 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3403 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3404
3405 micro_mul(&r[5], &r[3], &r[4] );
3406 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
3407
3408 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3409
3410 micro_mul(&r[3], &r[3], &r[2]);
3411
3412 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3413
3414 micro_mul(&r[1], &r[1], &r[5]);
3415 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
3416
3417 micro_mul(&r[5], &r[5], &r[4]);
3418 micro_mul(&r[0], &r[0], &r[2]);
3419 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
3420
3421 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3422 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3423 }
3424 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3425 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3426 }
3427 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3428 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3429 }
3430 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3431 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3432 }
3433 }
3434
3435 static void
3436 exec_dst(struct tgsi_exec_machine *mach,
3437 const struct tgsi_full_instruction *inst)
3438 {
3439 union tgsi_exec_channel r[2];
3440 union tgsi_exec_channel d[4];
3441
3442 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3443 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3444 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3445 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3446 }
3447 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3448 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3449 }
3450 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3451 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3452 }
3453
3454 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3455 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3456 }
3457 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3458 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3459 }
3460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3461 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3462 }
3463 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3464 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3465 }
3466 }
3467
3468 static void
3469 exec_log(struct tgsi_exec_machine *mach,
3470 const struct tgsi_full_instruction *inst)
3471 {
3472 union tgsi_exec_channel r[3];
3473
3474 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3475 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3476 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3477 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3478 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3479 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3480 }
3481 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3482 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3483 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3484 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3485 }
3486 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3487 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3488 }
3489 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3490 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3491 }
3492 }
3493
3494 static void
3495 exec_exp(struct tgsi_exec_machine *mach,
3496 const struct tgsi_full_instruction *inst)
3497 {
3498 union tgsi_exec_channel r[3];
3499
3500 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3501 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3502 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3503 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3504 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3505 }
3506 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3507 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3508 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3509 }
3510 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3511 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3512 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3513 }
3514 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3515 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3516 }
3517 }
3518
3519 static void
3520 exec_lit(struct tgsi_exec_machine *mach,
3521 const struct tgsi_full_instruction *inst)
3522 {
3523 union tgsi_exec_channel r[3];
3524 union tgsi_exec_channel d[3];
3525
3526 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3527 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3528 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3529 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3530 micro_max(&r[1], &r[1], &ZeroVec);
3531
3532 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3533 micro_min(&r[2], &r[2], &P128Vec);
3534 micro_max(&r[2], &r[2], &M128Vec);
3535 micro_pow(&r[1], &r[1], &r[2]);
3536 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3537 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3538 }
3539 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3540 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3541 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3542 }
3543 }
3544 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3545 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3546 }
3547
3548 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3549 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3550 }
3551 }
3552
3553 static void
3554 exec_break(struct tgsi_exec_machine *mach)
3555 {
3556 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3557 /* turn off loop channels for each enabled exec channel */
3558 mach->LoopMask &= ~mach->ExecMask;
3559 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3560 UPDATE_EXEC_MASK(mach);
3561 } else {
3562 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3563
3564 mach->Switch.mask = 0x0;
3565
3566 UPDATE_EXEC_MASK(mach);
3567 }
3568 }
3569
3570 static void
3571 exec_switch(struct tgsi_exec_machine *mach,
3572 const struct tgsi_full_instruction *inst)
3573 {
3574 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3575 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3576
3577 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3578 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3579 mach->Switch.mask = 0x0;
3580 mach->Switch.defaultMask = 0x0;
3581
3582 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3583 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3584
3585 UPDATE_EXEC_MASK(mach);
3586 }
3587
3588 static void
3589 exec_case(struct tgsi_exec_machine *mach,
3590 const struct tgsi_full_instruction *inst)
3591 {
3592 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3593 union tgsi_exec_channel src;
3594 uint mask = 0;
3595
3596 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3597
3598 if (mach->Switch.selector.u[0] == src.u[0]) {
3599 mask |= 0x1;
3600 }
3601 if (mach->Switch.selector.u[1] == src.u[1]) {
3602 mask |= 0x2;
3603 }
3604 if (mach->Switch.selector.u[2] == src.u[2]) {
3605 mask |= 0x4;
3606 }
3607 if (mach->Switch.selector.u[3] == src.u[3]) {
3608 mask |= 0x8;
3609 }
3610
3611 mach->Switch.defaultMask |= mask;
3612
3613 mach->Switch.mask |= mask & prevMask;
3614
3615 UPDATE_EXEC_MASK(mach);
3616 }
3617
3618 /* FIXME: this will only work if default is last */
3619 static void
3620 exec_default(struct tgsi_exec_machine *mach)
3621 {
3622 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3623
3624 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3625
3626 UPDATE_EXEC_MASK(mach);
3627 }
3628
3629 static void
3630 exec_endswitch(struct tgsi_exec_machine *mach)
3631 {
3632 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3633 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3634
3635 UPDATE_EXEC_MASK(mach);
3636 }
3637
3638 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3639 const union tgsi_double_channel *src);
3640
3641 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3642 const union tgsi_double_channel *src0,
3643 union tgsi_exec_channel *src1);
3644
3645 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3646 const union tgsi_exec_channel *src);
3647
3648 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3649 const union tgsi_double_channel *src);
3650
3651 static void
3652 fetch_double_channel(struct tgsi_exec_machine *mach,
3653 union tgsi_double_channel *chan,
3654 const struct tgsi_full_src_register *reg,
3655 uint chan_0,
3656 uint chan_1)
3657 {
3658 union tgsi_exec_channel src[2];
3659 uint i;
3660
3661 fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3662 fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3663
3664 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3665 chan->u[i][0] = src[0].u[i];
3666 chan->u[i][1] = src[1].u[i];
3667 }
3668 if (reg->Register.Absolute) {
3669 micro_dabs(chan, chan);
3670 }
3671 if (reg->Register.Negate) {
3672 micro_dneg(chan, chan);
3673 }
3674 }
3675
3676 static void
3677 store_double_channel(struct tgsi_exec_machine *mach,
3678 const union tgsi_double_channel *chan,
3679 const struct tgsi_full_dst_register *reg,
3680 const struct tgsi_full_instruction *inst,
3681 uint chan_0,
3682 uint chan_1)
3683 {
3684 union tgsi_exec_channel dst[2];
3685 uint i;
3686 union tgsi_double_channel temp;
3687 const uint execmask = mach->ExecMask;
3688
3689 if (!inst->Instruction.Saturate) {
3690 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3691 if (execmask & (1 << i)) {
3692 dst[0].u[i] = chan->u[i][0];
3693 dst[1].u[i] = chan->u[i][1];
3694 }
3695 }
3696 else {
3697 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3698 if (execmask & (1 << i)) {
3699 if (chan->d[i] < 0.0)
3700 temp.d[i] = 0.0;
3701 else if (chan->d[i] > 1.0)
3702 temp.d[i] = 1.0;
3703 else
3704 temp.d[i] = chan->d[i];
3705
3706 dst[0].u[i] = temp.u[i][0];
3707 dst[1].u[i] = temp.u[i][1];
3708 }
3709 }
3710
3711 store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT);
3712 if (chan_1 != -1)
3713 store_dest_double(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT);
3714 }
3715
3716 static void
3717 exec_double_unary(struct tgsi_exec_machine *mach,
3718 const struct tgsi_full_instruction *inst,
3719 micro_dop op)
3720 {
3721 union tgsi_double_channel src;
3722 union tgsi_double_channel dst;
3723
3724 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3725 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3726 op(&dst, &src);
3727 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3728 }
3729 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3730 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3731 op(&dst, &src);
3732 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3733 }
3734 }
3735
3736 static void
3737 exec_double_binary(struct tgsi_exec_machine *mach,
3738 const struct tgsi_full_instruction *inst,
3739 micro_dop op,
3740 enum tgsi_exec_datatype dst_datatype)
3741 {
3742 union tgsi_double_channel src[2];
3743 union tgsi_double_channel dst;
3744 int first_dest_chan, second_dest_chan;
3745 int wmask;
3746
3747 wmask = inst->Dst[0].Register.WriteMask;
3748 /* these are & because of the way DSLT etc store their destinations */
3749 if (wmask & TGSI_WRITEMASK_XY) {
3750 first_dest_chan = TGSI_CHAN_X;
3751 second_dest_chan = TGSI_CHAN_Y;
3752 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3753 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3754 second_dest_chan = -1;
3755 }
3756
3757 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3758 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3759 op(&dst, src);
3760 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3761 }
3762
3763 if (wmask & TGSI_WRITEMASK_ZW) {
3764 first_dest_chan = TGSI_CHAN_Z;
3765 second_dest_chan = TGSI_CHAN_W;
3766 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3767 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3768 second_dest_chan = -1;
3769 }
3770
3771 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3772 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3773 op(&dst, src);
3774 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3775 }
3776 }
3777
3778 static void
3779 exec_double_trinary(struct tgsi_exec_machine *mach,
3780 const struct tgsi_full_instruction *inst,
3781 micro_dop op)
3782 {
3783 union tgsi_double_channel src[3];
3784 union tgsi_double_channel dst;
3785
3786 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3787 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3788 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3789 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3790 op(&dst, src);
3791 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3792 }
3793 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3794 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3795 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3796 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3797 op(&dst, src);
3798 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3799 }
3800 }
3801
3802 static void
3803 exec_dldexp(struct tgsi_exec_machine *mach,
3804 const struct tgsi_full_instruction *inst)
3805 {
3806 union tgsi_double_channel src0;
3807 union tgsi_exec_channel src1;
3808 union tgsi_double_channel dst;
3809 int wmask;
3810
3811 wmask = inst->Dst[0].Register.WriteMask;
3812 if (wmask & TGSI_WRITEMASK_XY) {
3813 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3814 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3815 micro_dldexp(&dst, &src0, &src1);
3816 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3817 }
3818
3819 if (wmask & TGSI_WRITEMASK_ZW) {
3820 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3821 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3822 micro_dldexp(&dst, &src0, &src1);
3823 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3824 }
3825 }
3826
3827 static void
3828 exec_dfracexp(struct tgsi_exec_machine *mach,
3829 const struct tgsi_full_instruction *inst)
3830 {
3831 union tgsi_double_channel src;
3832 union tgsi_double_channel dst;
3833 union tgsi_exec_channel dst_exp;
3834
3835 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
3836 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3837 micro_dfracexp(&dst, &dst_exp, &src);
3838 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3839 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3840 }
3841 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
3842 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3843 micro_dfracexp(&dst, &dst_exp, &src);
3844 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3845 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3846 }
3847 }
3848
3849 static void
3850 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3851 const struct tgsi_full_instruction *inst,
3852 micro_dop_sop op)
3853 {
3854 union tgsi_double_channel src0;
3855 union tgsi_exec_channel src1;
3856 union tgsi_double_channel dst;
3857 int wmask;
3858
3859 wmask = inst->Dst[0].Register.WriteMask;
3860 if (wmask & TGSI_WRITEMASK_XY) {
3861 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3862 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3863 op(&dst, &src0, &src1);
3864 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3865 }
3866
3867 if (wmask & TGSI_WRITEMASK_ZW) {
3868 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3869 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3870 op(&dst, &src0, &src1);
3871 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3872 }
3873 }
3874
3875 static int
3876 get_image_coord_dim(unsigned tgsi_tex)
3877 {
3878 int dim;
3879 switch (tgsi_tex) {
3880 case TGSI_TEXTURE_BUFFER:
3881 case TGSI_TEXTURE_1D:
3882 dim = 1;
3883 break;
3884 case TGSI_TEXTURE_2D:
3885 case TGSI_TEXTURE_RECT:
3886 case TGSI_TEXTURE_1D_ARRAY:
3887 case TGSI_TEXTURE_2D_MSAA:
3888 dim = 2;
3889 break;
3890 case TGSI_TEXTURE_3D:
3891 case TGSI_TEXTURE_CUBE:
3892 case TGSI_TEXTURE_2D_ARRAY:
3893 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3894 case TGSI_TEXTURE_CUBE_ARRAY:
3895 dim = 3;
3896 break;
3897 default:
3898 assert(!"unknown texture target");
3899 dim = 0;
3900 break;
3901 }
3902
3903 return dim;
3904 }
3905
3906 static int
3907 get_image_coord_sample(unsigned tgsi_tex)
3908 {
3909 int sample = 0;
3910 switch (tgsi_tex) {
3911 case TGSI_TEXTURE_2D_MSAA:
3912 sample = 3;
3913 break;
3914 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3915 sample = 4;
3916 break;
3917 default:
3918 break;
3919 }
3920 return sample;
3921 }
3922
3923 static void
3924 exec_load_img(struct tgsi_exec_machine *mach,
3925 const struct tgsi_full_instruction *inst)
3926 {
3927 union tgsi_exec_channel r[4], sample_r;
3928 uint unit;
3929 int sample;
3930 int i, j;
3931 int dim;
3932 uint chan;
3933 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3934 struct tgsi_image_params params;
3935 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3936
3937 unit = fetch_sampler_unit(mach, inst, 0);
3938 dim = get_image_coord_dim(inst->Memory.Texture);
3939 sample = get_image_coord_sample(inst->Memory.Texture);
3940 assert(dim <= 3);
3941
3942 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3943 params.unit = unit;
3944 params.tgsi_tex_instr = inst->Memory.Texture;
3945 params.format = inst->Memory.Format;
3946
3947 for (i = 0; i < dim; i++) {
3948 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3949 }
3950
3951 if (sample)
3952 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3953
3954 mach->Image->load(mach->Image, &params,
3955 r[0].i, r[1].i, r[2].i, sample_r.i,
3956 rgba);
3957 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3958 r[0].f[j] = rgba[0][j];
3959 r[1].f[j] = rgba[1][j];
3960 r[2].f[j] = rgba[2][j];
3961 r[3].f[j] = rgba[3][j];
3962 }
3963 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3964 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3965 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3966 }
3967 }
3968 }
3969
3970 static void
3971 exec_load_buf(struct tgsi_exec_machine *mach,
3972 const struct tgsi_full_instruction *inst)
3973 {
3974 union tgsi_exec_channel r[4];
3975 uint unit;
3976 int j;
3977 uint chan;
3978 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3979 struct tgsi_buffer_params params;
3980 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3981
3982 unit = fetch_sampler_unit(mach, inst, 0);
3983
3984 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3985 params.unit = unit;
3986 IFETCH(&r[0], 1, TGSI_CHAN_X);
3987
3988 mach->Buffer->load(mach->Buffer, &params,
3989 r[0].i, rgba);
3990 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3991 r[0].f[j] = rgba[0][j];
3992 r[1].f[j] = rgba[1][j];
3993 r[2].f[j] = rgba[2][j];
3994 r[3].f[j] = rgba[3][j];
3995 }
3996 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3997 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3998 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3999 }
4000 }
4001 }
4002
4003 static void
4004 exec_load_mem(struct tgsi_exec_machine *mach,
4005 const struct tgsi_full_instruction *inst)
4006 {
4007 union tgsi_exec_channel r[4];
4008 uint chan;
4009 char *ptr = mach->LocalMem;
4010 uint32_t offset;
4011 int j;
4012
4013 IFETCH(&r[0], 1, TGSI_CHAN_X);
4014 if (r[0].u[0] >= mach->LocalMemSize)
4015 return;
4016
4017 offset = r[0].u[0];
4018 ptr += offset;
4019
4020 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4021 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4022 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4023 memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
4024 }
4025 }
4026 }
4027
4028 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4029 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4030 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4031 }
4032 }
4033 }
4034
4035 static void
4036 exec_load(struct tgsi_exec_machine *mach,
4037 const struct tgsi_full_instruction *inst)
4038 {
4039 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4040 exec_load_img(mach, inst);
4041 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4042 exec_load_buf(mach, inst);
4043 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4044 exec_load_mem(mach, inst);
4045 }
4046
4047 static void
4048 exec_store_img(struct tgsi_exec_machine *mach,
4049 const struct tgsi_full_instruction *inst)
4050 {
4051 union tgsi_exec_channel r[3], sample_r;
4052 union tgsi_exec_channel value[4];
4053 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4054 struct tgsi_image_params params;
4055 int dim;
4056 int sample;
4057 int i, j;
4058 uint unit;
4059 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4060 unit = inst->Dst[0].Register.Index;
4061 dim = get_image_coord_dim(inst->Memory.Texture);
4062 sample = get_image_coord_sample(inst->Memory.Texture);
4063 assert(dim <= 3);
4064
4065 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4066 params.unit = unit;
4067 params.tgsi_tex_instr = inst->Memory.Texture;
4068 params.format = inst->Memory.Format;
4069
4070 for (i = 0; i < dim; i++) {
4071 IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4072 }
4073
4074 for (i = 0; i < 4; i++) {
4075 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4076 }
4077 if (sample)
4078 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4079
4080 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4081 rgba[0][j] = value[0].f[j];
4082 rgba[1][j] = value[1].f[j];
4083 rgba[2][j] = value[2].f[j];
4084 rgba[3][j] = value[3].f[j];
4085 }
4086
4087 mach->Image->store(mach->Image, &params,
4088 r[0].i, r[1].i, r[2].i, sample_r.i,
4089 rgba);
4090 }
4091
4092 static void
4093 exec_store_buf(struct tgsi_exec_machine *mach,
4094 const struct tgsi_full_instruction *inst)
4095 {
4096 union tgsi_exec_channel r[3];
4097 union tgsi_exec_channel value[4];
4098 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4099 struct tgsi_buffer_params params;
4100 int i, j;
4101 uint unit;
4102 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4103
4104 unit = inst->Dst[0].Register.Index;
4105
4106 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4107 params.unit = unit;
4108 params.writemask = inst->Dst[0].Register.WriteMask;
4109
4110 IFETCH(&r[0], 0, TGSI_CHAN_X);
4111 for (i = 0; i < 4; i++) {
4112 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4113 }
4114
4115 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4116 rgba[0][j] = value[0].f[j];
4117 rgba[1][j] = value[1].f[j];
4118 rgba[2][j] = value[2].f[j];
4119 rgba[3][j] = value[3].f[j];
4120 }
4121
4122 mach->Buffer->store(mach->Buffer, &params,
4123 r[0].i,
4124 rgba);
4125 }
4126
4127 static void
4128 exec_store_mem(struct tgsi_exec_machine *mach,
4129 const struct tgsi_full_instruction *inst)
4130 {
4131 union tgsi_exec_channel r[3];
4132 union tgsi_exec_channel value[4];
4133 uint i, chan;
4134 char *ptr = mach->LocalMem;
4135 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4136 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4137
4138 IFETCH(&r[0], 0, TGSI_CHAN_X);
4139
4140 for (i = 0; i < 4; i++) {
4141 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4142 }
4143
4144 if (r[0].u[0] >= mach->LocalMemSize)
4145 return;
4146 ptr += r[0].u[0];
4147
4148 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4149 if (execmask & (1 << i)) {
4150 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4151 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4152 memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4153 }
4154 }
4155 }
4156 }
4157 }
4158
4159 static void
4160 exec_store(struct tgsi_exec_machine *mach,
4161 const struct tgsi_full_instruction *inst)
4162 {
4163 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4164 exec_store_img(mach, inst);
4165 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4166 exec_store_buf(mach, inst);
4167 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4168 exec_store_mem(mach, inst);
4169 }
4170
4171 static void
4172 exec_atomop_img(struct tgsi_exec_machine *mach,
4173 const struct tgsi_full_instruction *inst)
4174 {
4175 union tgsi_exec_channel r[4], sample_r;
4176 union tgsi_exec_channel value[4], value2[4];
4177 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4178 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4179 struct tgsi_image_params params;
4180 int dim;
4181 int sample;
4182 int i, j;
4183 uint unit, chan;
4184 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4185 unit = fetch_sampler_unit(mach, inst, 0);
4186 dim = get_image_coord_dim(inst->Memory.Texture);
4187 sample = get_image_coord_sample(inst->Memory.Texture);
4188 assert(dim <= 3);
4189
4190 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4191 params.unit = unit;
4192 params.tgsi_tex_instr = inst->Memory.Texture;
4193 params.format = inst->Memory.Format;
4194
4195 for (i = 0; i < dim; i++) {
4196 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4197 }
4198
4199 for (i = 0; i < 4; i++) {
4200 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4201 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4202 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4203 }
4204 if (sample)
4205 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4206
4207 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4208 rgba[0][j] = value[0].f[j];
4209 rgba[1][j] = value[1].f[j];
4210 rgba[2][j] = value[2].f[j];
4211 rgba[3][j] = value[3].f[j];
4212 }
4213 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4214 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4215 rgba2[0][j] = value2[0].f[j];
4216 rgba2[1][j] = value2[1].f[j];
4217 rgba2[2][j] = value2[2].f[j];
4218 rgba2[3][j] = value2[3].f[j];
4219 }
4220 }
4221
4222 mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4223 r[0].i, r[1].i, r[2].i, sample_r.i,
4224 rgba, rgba2);
4225
4226 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4227 r[0].f[j] = rgba[0][j];
4228 r[1].f[j] = rgba[1][j];
4229 r[2].f[j] = rgba[2][j];
4230 r[3].f[j] = rgba[3][j];
4231 }
4232 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4233 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4234 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4235 }
4236 }
4237 }
4238
4239 static void
4240 exec_atomop_buf(struct tgsi_exec_machine *mach,
4241 const struct tgsi_full_instruction *inst)
4242 {
4243 union tgsi_exec_channel r[4];
4244 union tgsi_exec_channel value[4], value2[4];
4245 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4246 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4247 struct tgsi_buffer_params params;
4248 int i, j;
4249 uint unit, chan;
4250 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4251
4252 unit = fetch_sampler_unit(mach, inst, 0);
4253
4254 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4255 params.unit = unit;
4256 params.writemask = inst->Dst[0].Register.WriteMask;
4257
4258 IFETCH(&r[0], 1, TGSI_CHAN_X);
4259
4260 for (i = 0; i < 4; i++) {
4261 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4262 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4263 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4264 }
4265
4266 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4267 rgba[0][j] = value[0].f[j];
4268 rgba[1][j] = value[1].f[j];
4269 rgba[2][j] = value[2].f[j];
4270 rgba[3][j] = value[3].f[j];
4271 }
4272 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4273 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4274 rgba2[0][j] = value2[0].f[j];
4275 rgba2[1][j] = value2[1].f[j];
4276 rgba2[2][j] = value2[2].f[j];
4277 rgba2[3][j] = value2[3].f[j];
4278 }
4279 }
4280
4281 mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
4282 r[0].i,
4283 rgba, rgba2);
4284
4285 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4286 r[0].f[j] = rgba[0][j];
4287 r[1].f[j] = rgba[1][j];
4288 r[2].f[j] = rgba[2][j];
4289 r[3].f[j] = rgba[3][j];
4290 }
4291 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4292 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4293 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4294 }
4295 }
4296 }
4297
4298 static void
4299 exec_atomop_mem(struct tgsi_exec_machine *mach,
4300 const struct tgsi_full_instruction *inst)
4301 {
4302 union tgsi_exec_channel r[4];
4303 union tgsi_exec_channel value[4], value2[4];
4304 char *ptr = mach->LocalMem;
4305 uint32_t val;
4306 uint chan, i;
4307 uint32_t offset;
4308 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4309 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4310 IFETCH(&r[0], 1, TGSI_CHAN_X);
4311
4312 if (r[0].u[0] >= mach->LocalMemSize)
4313 return;
4314
4315 offset = r[0].u[0];
4316 ptr += offset;
4317 for (i = 0; i < 4; i++) {
4318 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4319 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4320 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4321 }
4322
4323 memcpy(&r[0].u[0], ptr, 4);
4324 val = r[0].u[0];
4325 switch (inst->Instruction.Opcode) {
4326 case TGSI_OPCODE_ATOMUADD:
4327 val += value[0].u[0];
4328 break;
4329 case TGSI_OPCODE_ATOMXOR:
4330 val ^= value[0].u[0];
4331 break;
4332 case TGSI_OPCODE_ATOMOR:
4333 val |= value[0].u[0];
4334 break;
4335 case TGSI_OPCODE_ATOMAND:
4336 val &= value[0].u[0];
4337 break;
4338 case TGSI_OPCODE_ATOMUMIN:
4339 val = MIN2(val, value[0].u[0]);
4340 break;
4341 case TGSI_OPCODE_ATOMUMAX:
4342 val = MAX2(val, value[0].u[0]);
4343 break;
4344 case TGSI_OPCODE_ATOMIMIN:
4345 val = MIN2(r[0].i[0], value[0].i[0]);
4346 break;
4347 case TGSI_OPCODE_ATOMIMAX:
4348 val = MAX2(r[0].i[0], value[0].i[0]);
4349 break;
4350 case TGSI_OPCODE_ATOMXCHG:
4351 val = value[0].i[0];
4352 break;
4353 case TGSI_OPCODE_ATOMCAS:
4354 if (val == value[0].u[0])
4355 val = value2[0].u[0];
4356 break;
4357 default:
4358 break;
4359 }
4360 for (i = 0; i < TGSI_QUAD_SIZE; i++)
4361 if (execmask & (1 << i))
4362 memcpy(ptr, &val, 4);
4363
4364 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4365 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4366 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4367 }
4368 }
4369 }
4370
4371 static void
4372 exec_atomop(struct tgsi_exec_machine *mach,
4373 const struct tgsi_full_instruction *inst)
4374 {
4375 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4376 exec_atomop_img(mach, inst);
4377 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4378 exec_atomop_buf(mach, inst);
4379 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4380 exec_atomop_mem(mach, inst);
4381 }
4382
4383 static void
4384 exec_resq_img(struct tgsi_exec_machine *mach,
4385 const struct tgsi_full_instruction *inst)
4386 {
4387 int result[4];
4388 union tgsi_exec_channel r[4];
4389 uint unit;
4390 int i, chan, j;
4391 struct tgsi_image_params params;
4392 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4393
4394 unit = fetch_sampler_unit(mach, inst, 0);
4395
4396 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4397 params.unit = unit;
4398 params.tgsi_tex_instr = inst->Memory.Texture;
4399 params.format = inst->Memory.Format;
4400
4401 mach->Image->get_dims(mach->Image, &params, result);
4402
4403 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4404 for (j = 0; j < 4; j++) {
4405 r[j].i[i] = result[j];
4406 }
4407 }
4408
4409 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4410 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4411 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4412 TGSI_EXEC_DATA_INT);
4413 }
4414 }
4415 }
4416
4417 static void
4418 exec_resq_buf(struct tgsi_exec_machine *mach,
4419 const struct tgsi_full_instruction *inst)
4420 {
4421 int result;
4422 union tgsi_exec_channel r[4];
4423 uint unit;
4424 int i, chan;
4425 struct tgsi_buffer_params params;
4426 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4427
4428 unit = fetch_sampler_unit(mach, inst, 0);
4429
4430 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4431 params.unit = unit;
4432
4433 mach->Buffer->get_dims(mach->Buffer, &params, &result);
4434
4435 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4436 r[0].i[i] = result;
4437 }
4438
4439 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4440 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4441 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4442 TGSI_EXEC_DATA_INT);
4443 }
4444 }
4445 }
4446
4447 static void
4448 exec_resq(struct tgsi_exec_machine *mach,
4449 const struct tgsi_full_instruction *inst)
4450 {
4451 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4452 exec_resq_img(mach, inst);
4453 else
4454 exec_resq_buf(mach, inst);
4455 }
4456
4457 static void
4458 micro_f2u64(union tgsi_double_channel *dst,
4459 const union tgsi_exec_channel *src)
4460 {
4461 dst->u64[0] = (uint64_t)src->f[0];
4462 dst->u64[1] = (uint64_t)src->f[1];
4463 dst->u64[2] = (uint64_t)src->f[2];
4464 dst->u64[3] = (uint64_t)src->f[3];
4465 }
4466
4467 static void
4468 micro_f2i64(union tgsi_double_channel *dst,
4469 const union tgsi_exec_channel *src)
4470 {
4471 dst->i64[0] = (int64_t)src->f[0];
4472 dst->i64[1] = (int64_t)src->f[1];
4473 dst->i64[2] = (int64_t)src->f[2];
4474 dst->i64[3] = (int64_t)src->f[3];
4475 }
4476
4477 static void
4478 micro_u2i64(union tgsi_double_channel *dst,
4479 const union tgsi_exec_channel *src)
4480 {
4481 dst->u64[0] = (uint64_t)src->u[0];
4482 dst->u64[1] = (uint64_t)src->u[1];
4483 dst->u64[2] = (uint64_t)src->u[2];
4484 dst->u64[3] = (uint64_t)src->u[3];
4485 }
4486
4487 static void
4488 micro_i2i64(union tgsi_double_channel *dst,
4489 const union tgsi_exec_channel *src)
4490 {
4491 dst->i64[0] = (int64_t)src->i[0];
4492 dst->i64[1] = (int64_t)src->i[1];
4493 dst->i64[2] = (int64_t)src->i[2];
4494 dst->i64[3] = (int64_t)src->i[3];
4495 }
4496
4497 static void
4498 micro_d2u64(union tgsi_double_channel *dst,
4499 const union tgsi_double_channel *src)
4500 {
4501 dst->u64[0] = (uint64_t)src->d[0];
4502 dst->u64[1] = (uint64_t)src->d[1];
4503 dst->u64[2] = (uint64_t)src->d[2];
4504 dst->u64[3] = (uint64_t)src->d[3];
4505 }
4506
4507 static void
4508 micro_d2i64(union tgsi_double_channel *dst,
4509 const union tgsi_double_channel *src)
4510 {
4511 dst->i64[0] = (int64_t)src->d[0];
4512 dst->i64[1] = (int64_t)src->d[1];
4513 dst->i64[2] = (int64_t)src->d[2];
4514 dst->i64[3] = (int64_t)src->d[3];
4515 }
4516
4517 static void
4518 micro_u642d(union tgsi_double_channel *dst,
4519 const union tgsi_double_channel *src)
4520 {
4521 dst->d[0] = (double)src->u64[0];
4522 dst->d[1] = (double)src->u64[1];
4523 dst->d[2] = (double)src->u64[2];
4524 dst->d[3] = (double)src->u64[3];
4525 }
4526
4527 static void
4528 micro_i642d(union tgsi_double_channel *dst,
4529 const union tgsi_double_channel *src)
4530 {
4531 dst->d[0] = (double)src->i64[0];
4532 dst->d[1] = (double)src->i64[1];
4533 dst->d[2] = (double)src->i64[2];
4534 dst->d[3] = (double)src->i64[3];
4535 }
4536
4537 static void
4538 micro_u642f(union tgsi_exec_channel *dst,
4539 const union tgsi_double_channel *src)
4540 {
4541 dst->f[0] = (float)src->u64[0];
4542 dst->f[1] = (float)src->u64[1];
4543 dst->f[2] = (float)src->u64[2];
4544 dst->f[3] = (float)src->u64[3];
4545 }
4546
4547 static void
4548 micro_i642f(union tgsi_exec_channel *dst,
4549 const union tgsi_double_channel *src)
4550 {
4551 dst->f[0] = (float)src->i64[0];
4552 dst->f[1] = (float)src->i64[1];
4553 dst->f[2] = (float)src->i64[2];
4554 dst->f[3] = (float)src->i64[3];
4555 }
4556
4557 static void
4558 exec_t_2_64(struct tgsi_exec_machine *mach,
4559 const struct tgsi_full_instruction *inst,
4560 micro_dop_s op,
4561 enum tgsi_exec_datatype src_datatype)
4562 {
4563 union tgsi_exec_channel src;
4564 union tgsi_double_channel dst;
4565
4566 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4567 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4568 op(&dst, &src);
4569 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4570 }
4571 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4572 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4573 op(&dst, &src);
4574 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4575 }
4576 }
4577
4578 static void
4579 exec_64_2_t(struct tgsi_exec_machine *mach,
4580 const struct tgsi_full_instruction *inst,
4581 micro_sop_d op,
4582 enum tgsi_exec_datatype dst_datatype)
4583 {
4584 union tgsi_double_channel src;
4585 union tgsi_exec_channel dst;
4586 int wm = inst->Dst[0].Register.WriteMask;
4587 int i;
4588 int bit;
4589 for (i = 0; i < 2; i++) {
4590 bit = ffs(wm);
4591 if (bit) {
4592 wm &= ~(1 << (bit - 1));
4593 if (i == 0)
4594 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4595 else
4596 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4597 op(&dst, &src);
4598 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4599 }
4600 }
4601 }
4602
4603 static void
4604 micro_i2f(union tgsi_exec_channel *dst,
4605 const union tgsi_exec_channel *src)
4606 {
4607 dst->f[0] = (float)src->i[0];
4608 dst->f[1] = (float)src->i[1];
4609 dst->f[2] = (float)src->i[2];
4610 dst->f[3] = (float)src->i[3];
4611 }
4612
4613 static void
4614 micro_not(union tgsi_exec_channel *dst,
4615 const union tgsi_exec_channel *src)
4616 {
4617 dst->u[0] = ~src->u[0];
4618 dst->u[1] = ~src->u[1];
4619 dst->u[2] = ~src->u[2];
4620 dst->u[3] = ~src->u[3];
4621 }
4622
4623 static void
4624 micro_shl(union tgsi_exec_channel *dst,
4625 const union tgsi_exec_channel *src0,
4626 const union tgsi_exec_channel *src1)
4627 {
4628 unsigned masked_count;
4629 masked_count = src1->u[0] & 0x1f;
4630 dst->u[0] = src0->u[0] << masked_count;
4631 masked_count = src1->u[1] & 0x1f;
4632 dst->u[1] = src0->u[1] << masked_count;
4633 masked_count = src1->u[2] & 0x1f;
4634 dst->u[2] = src0->u[2] << masked_count;
4635 masked_count = src1->u[3] & 0x1f;
4636 dst->u[3] = src0->u[3] << masked_count;
4637 }
4638
4639 static void
4640 micro_and(union tgsi_exec_channel *dst,
4641 const union tgsi_exec_channel *src0,
4642 const union tgsi_exec_channel *src1)
4643 {
4644 dst->u[0] = src0->u[0] & src1->u[0];
4645 dst->u[1] = src0->u[1] & src1->u[1];
4646 dst->u[2] = src0->u[2] & src1->u[2];
4647 dst->u[3] = src0->u[3] & src1->u[3];
4648 }
4649
4650 static void
4651 micro_or(union tgsi_exec_channel *dst,
4652 const union tgsi_exec_channel *src0,
4653 const union tgsi_exec_channel *src1)
4654 {
4655 dst->u[0] = src0->u[0] | src1->u[0];
4656 dst->u[1] = src0->u[1] | src1->u[1];
4657 dst->u[2] = src0->u[2] | src1->u[2];
4658 dst->u[3] = src0->u[3] | src1->u[3];
4659 }
4660
4661 static void
4662 micro_xor(union tgsi_exec_channel *dst,
4663 const union tgsi_exec_channel *src0,
4664 const union tgsi_exec_channel *src1)
4665 {
4666 dst->u[0] = src0->u[0] ^ src1->u[0];
4667 dst->u[1] = src0->u[1] ^ src1->u[1];
4668 dst->u[2] = src0->u[2] ^ src1->u[2];
4669 dst->u[3] = src0->u[3] ^ src1->u[3];
4670 }
4671
4672 static void
4673 micro_mod(union tgsi_exec_channel *dst,
4674 const union tgsi_exec_channel *src0,
4675 const union tgsi_exec_channel *src1)
4676 {
4677 dst->i[0] = src0->i[0] % src1->i[0];
4678 dst->i[1] = src0->i[1] % src1->i[1];
4679 dst->i[2] = src0->i[2] % src1->i[2];
4680 dst->i[3] = src0->i[3] % src1->i[3];
4681 }
4682
4683 static void
4684 micro_f2i(union tgsi_exec_channel *dst,
4685 const union tgsi_exec_channel *src)
4686 {
4687 dst->i[0] = (int)src->f[0];
4688 dst->i[1] = (int)src->f[1];
4689 dst->i[2] = (int)src->f[2];
4690 dst->i[3] = (int)src->f[3];
4691 }
4692
4693 static void
4694 micro_fseq(union tgsi_exec_channel *dst,
4695 const union tgsi_exec_channel *src0,
4696 const union tgsi_exec_channel *src1)
4697 {
4698 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4699 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4700 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4701 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4702 }
4703
4704 static void
4705 micro_fsge(union tgsi_exec_channel *dst,
4706 const union tgsi_exec_channel *src0,
4707 const union tgsi_exec_channel *src1)
4708 {
4709 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4710 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4711 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4712 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4713 }
4714
4715 static void
4716 micro_fslt(union tgsi_exec_channel *dst,
4717 const union tgsi_exec_channel *src0,
4718 const union tgsi_exec_channel *src1)
4719 {
4720 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4721 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4722 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4723 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4724 }
4725
4726 static void
4727 micro_fsne(union tgsi_exec_channel *dst,
4728 const union tgsi_exec_channel *src0,
4729 const union tgsi_exec_channel *src1)
4730 {
4731 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4732 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4733 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4734 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4735 }
4736
4737 static void
4738 micro_idiv(union tgsi_exec_channel *dst,
4739 const union tgsi_exec_channel *src0,
4740 const union tgsi_exec_channel *src1)
4741 {
4742 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4743 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4744 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4745 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4746 }
4747
4748 static void
4749 micro_imax(union tgsi_exec_channel *dst,
4750 const union tgsi_exec_channel *src0,
4751 const union tgsi_exec_channel *src1)
4752 {
4753 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4754 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4755 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4756 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4757 }
4758
4759 static void
4760 micro_imin(union tgsi_exec_channel *dst,
4761 const union tgsi_exec_channel *src0,
4762 const union tgsi_exec_channel *src1)
4763 {
4764 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4765 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4766 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4767 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4768 }
4769
4770 static void
4771 micro_isge(union tgsi_exec_channel *dst,
4772 const union tgsi_exec_channel *src0,
4773 const union tgsi_exec_channel *src1)
4774 {
4775 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4776 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4777 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4778 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4779 }
4780
4781 static void
4782 micro_ishr(union tgsi_exec_channel *dst,
4783 const union tgsi_exec_channel *src0,
4784 const union tgsi_exec_channel *src1)
4785 {
4786 unsigned masked_count;
4787 masked_count = src1->i[0] & 0x1f;
4788 dst->i[0] = src0->i[0] >> masked_count;
4789 masked_count = src1->i[1] & 0x1f;
4790 dst->i[1] = src0->i[1] >> masked_count;
4791 masked_count = src1->i[2] & 0x1f;
4792 dst->i[2] = src0->i[2] >> masked_count;
4793 masked_count = src1->i[3] & 0x1f;
4794 dst->i[3] = src0->i[3] >> masked_count;
4795 }
4796
4797 static void
4798 micro_islt(union tgsi_exec_channel *dst,
4799 const union tgsi_exec_channel *src0,
4800 const union tgsi_exec_channel *src1)
4801 {
4802 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4803 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4804 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4805 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4806 }
4807
4808 static void
4809 micro_f2u(union tgsi_exec_channel *dst,
4810 const union tgsi_exec_channel *src)
4811 {
4812 dst->u[0] = (uint)src->f[0];
4813 dst->u[1] = (uint)src->f[1];
4814 dst->u[2] = (uint)src->f[2];
4815 dst->u[3] = (uint)src->f[3];
4816 }
4817
4818 static void
4819 micro_u2f(union tgsi_exec_channel *dst,
4820 const union tgsi_exec_channel *src)
4821 {
4822 dst->f[0] = (float)src->u[0];
4823 dst->f[1] = (float)src->u[1];
4824 dst->f[2] = (float)src->u[2];
4825 dst->f[3] = (float)src->u[3];
4826 }
4827
4828 static void
4829 micro_uadd(union tgsi_exec_channel *dst,
4830 const union tgsi_exec_channel *src0,
4831 const union tgsi_exec_channel *src1)
4832 {
4833 dst->u[0] = src0->u[0] + src1->u[0];
4834 dst->u[1] = src0->u[1] + src1->u[1];
4835 dst->u[2] = src0->u[2] + src1->u[2];
4836 dst->u[3] = src0->u[3] + src1->u[3];
4837 }
4838
4839 static void
4840 micro_udiv(union tgsi_exec_channel *dst,
4841 const union tgsi_exec_channel *src0,
4842 const union tgsi_exec_channel *src1)
4843 {
4844 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4845 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4846 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4847 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4848 }
4849
4850 static void
4851 micro_umad(union tgsi_exec_channel *dst,
4852 const union tgsi_exec_channel *src0,
4853 const union tgsi_exec_channel *src1,
4854 const union tgsi_exec_channel *src2)
4855 {
4856 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4857 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4858 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4859 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4860 }
4861
4862 static void
4863 micro_umax(union tgsi_exec_channel *dst,
4864 const union tgsi_exec_channel *src0,
4865 const union tgsi_exec_channel *src1)
4866 {
4867 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4868 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4869 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4870 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4871 }
4872
4873 static void
4874 micro_umin(union tgsi_exec_channel *dst,
4875 const union tgsi_exec_channel *src0,
4876 const union tgsi_exec_channel *src1)
4877 {
4878 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4879 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4880 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4881 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4882 }
4883
4884 static void
4885 micro_umod(union tgsi_exec_channel *dst,
4886 const union tgsi_exec_channel *src0,
4887 const union tgsi_exec_channel *src1)
4888 {
4889 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4890 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4891 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4892 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4893 }
4894
4895 static void
4896 micro_umul(union tgsi_exec_channel *dst,
4897 const union tgsi_exec_channel *src0,
4898 const union tgsi_exec_channel *src1)
4899 {
4900 dst->u[0] = src0->u[0] * src1->u[0];
4901 dst->u[1] = src0->u[1] * src1->u[1];
4902 dst->u[2] = src0->u[2] * src1->u[2];
4903 dst->u[3] = src0->u[3] * src1->u[3];
4904 }
4905
4906 static void
4907 micro_imul_hi(union tgsi_exec_channel *dst,
4908 const union tgsi_exec_channel *src0,
4909 const union tgsi_exec_channel *src1)
4910 {
4911 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4912 dst->i[0] = I64M(src0->i[0], src1->i[0]);
4913 dst->i[1] = I64M(src0->i[1], src1->i[1]);
4914 dst->i[2] = I64M(src0->i[2], src1->i[2]);
4915 dst->i[3] = I64M(src0->i[3], src1->i[3]);
4916 #undef I64M
4917 }
4918
4919 static void
4920 micro_umul_hi(union tgsi_exec_channel *dst,
4921 const union tgsi_exec_channel *src0,
4922 const union tgsi_exec_channel *src1)
4923 {
4924 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4925 dst->u[0] = U64M(src0->u[0], src1->u[0]);
4926 dst->u[1] = U64M(src0->u[1], src1->u[1]);
4927 dst->u[2] = U64M(src0->u[2], src1->u[2]);
4928 dst->u[3] = U64M(src0->u[3], src1->u[3]);
4929 #undef U64M
4930 }
4931
4932 static void
4933 micro_useq(union tgsi_exec_channel *dst,
4934 const union tgsi_exec_channel *src0,
4935 const union tgsi_exec_channel *src1)
4936 {
4937 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4938 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4939 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4940 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4941 }
4942
4943 static void
4944 micro_usge(union tgsi_exec_channel *dst,
4945 const union tgsi_exec_channel *src0,
4946 const union tgsi_exec_channel *src1)
4947 {
4948 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4949 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4950 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4951 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4952 }
4953
4954 static void
4955 micro_ushr(union tgsi_exec_channel *dst,
4956 const union tgsi_exec_channel *src0,
4957 const union tgsi_exec_channel *src1)
4958 {
4959 unsigned masked_count;
4960 masked_count = src1->u[0] & 0x1f;
4961 dst->u[0] = src0->u[0] >> masked_count;
4962 masked_count = src1->u[1] & 0x1f;
4963 dst->u[1] = src0->u[1] >> masked_count;
4964 masked_count = src1->u[2] & 0x1f;
4965 dst->u[2] = src0->u[2] >> masked_count;
4966 masked_count = src1->u[3] & 0x1f;
4967 dst->u[3] = src0->u[3] >> masked_count;
4968 }
4969
4970 static void
4971 micro_uslt(union tgsi_exec_channel *dst,
4972 const union tgsi_exec_channel *src0,
4973 const union tgsi_exec_channel *src1)
4974 {
4975 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4976 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4977 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4978 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4979 }
4980
4981 static void
4982 micro_usne(union tgsi_exec_channel *dst,
4983 const union tgsi_exec_channel *src0,
4984 const union tgsi_exec_channel *src1)
4985 {
4986 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4987 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4988 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4989 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4990 }
4991
4992 static void
4993 micro_uarl(union tgsi_exec_channel *dst,
4994 const union tgsi_exec_channel *src)
4995 {
4996 dst->i[0] = src->u[0];
4997 dst->i[1] = src->u[1];
4998 dst->i[2] = src->u[2];
4999 dst->i[3] = src->u[3];
5000 }
5001
5002 static void
5003 micro_ucmp(union tgsi_exec_channel *dst,
5004 const union tgsi_exec_channel *src0,
5005 const union tgsi_exec_channel *src1,
5006 const union tgsi_exec_channel *src2)
5007 {
5008 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
5009 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
5010 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
5011 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
5012 }
5013
5014 /**
5015 * Signed bitfield extract (i.e. sign-extend the extracted bits)
5016 */
5017 static void
5018 micro_ibfe(union tgsi_exec_channel *dst,
5019 const union tgsi_exec_channel *src0,
5020 const union tgsi_exec_channel *src1,
5021 const union tgsi_exec_channel *src2)
5022 {
5023 int i;
5024 for (i = 0; i < 4; i++) {
5025 int width = src2->i[i] & 0x1f;
5026 int offset = src1->i[i] & 0x1f;
5027 if (width == 0)
5028 dst->i[i] = 0;
5029 else if (width + offset < 32)
5030 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5031 else
5032 dst->i[i] = src0->i[i] >> offset;
5033 }
5034 }
5035
5036 /**
5037 * Unsigned bitfield extract
5038 */
5039 static void
5040 micro_ubfe(union tgsi_exec_channel *dst,
5041 const union tgsi_exec_channel *src0,
5042 const union tgsi_exec_channel *src1,
5043 const union tgsi_exec_channel *src2)
5044 {
5045 int i;
5046 for (i = 0; i < 4; i++) {
5047 int width = src2->u[i] & 0x1f;
5048 int offset = src1->u[i] & 0x1f;
5049 if (width == 0)
5050 dst->u[i] = 0;
5051 else if (width + offset < 32)
5052 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5053 else
5054 dst->u[i] = src0->u[i] >> offset;
5055 }
5056 }
5057
5058 /**
5059 * Bitfield insert: copy low bits from src1 into a region of src0.
5060 */
5061 static void
5062 micro_bfi(union tgsi_exec_channel *dst,
5063 const union tgsi_exec_channel *src0,
5064 const union tgsi_exec_channel *src1,
5065 const union tgsi_exec_channel *src2,
5066 const union tgsi_exec_channel *src3)
5067 {
5068 int i;
5069 for (i = 0; i < 4; i++) {
5070 int width = src3->u[i] & 0x1f;
5071 int offset = src2->u[i] & 0x1f;
5072 int bitmask = ((1 << width) - 1) << offset;
5073 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5074 }
5075 }
5076
5077 static void
5078 micro_brev(union tgsi_exec_channel *dst,
5079 const union tgsi_exec_channel *src)
5080 {
5081 dst->u[0] = util_bitreverse(src->u[0]);
5082 dst->u[1] = util_bitreverse(src->u[1]);
5083 dst->u[2] = util_bitreverse(src->u[2]);
5084 dst->u[3] = util_bitreverse(src->u[3]);
5085 }
5086
5087 static void
5088 micro_popc(union tgsi_exec_channel *dst,
5089 const union tgsi_exec_channel *src)
5090 {
5091 dst->u[0] = util_bitcount(src->u[0]);
5092 dst->u[1] = util_bitcount(src->u[1]);
5093 dst->u[2] = util_bitcount(src->u[2]);
5094 dst->u[3] = util_bitcount(src->u[3]);
5095 }
5096
5097 static void
5098 micro_lsb(union tgsi_exec_channel *dst,
5099 const union tgsi_exec_channel *src)
5100 {
5101 dst->i[0] = ffs(src->u[0]) - 1;
5102 dst->i[1] = ffs(src->u[1]) - 1;
5103 dst->i[2] = ffs(src->u[2]) - 1;
5104 dst->i[3] = ffs(src->u[3]) - 1;
5105 }
5106
5107 static void
5108 micro_imsb(union tgsi_exec_channel *dst,
5109 const union tgsi_exec_channel *src)
5110 {
5111 dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5112 dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5113 dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5114 dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5115 }
5116
5117 static void
5118 micro_umsb(union tgsi_exec_channel *dst,
5119 const union tgsi_exec_channel *src)
5120 {
5121 dst->i[0] = util_last_bit(src->u[0]) - 1;
5122 dst->i[1] = util_last_bit(src->u[1]) - 1;
5123 dst->i[2] = util_last_bit(src->u[2]) - 1;
5124 dst->i[3] = util_last_bit(src->u[3]) - 1;
5125 }
5126
5127 /**
5128 * Execute a TGSI instruction.
5129 * Returns TRUE if a barrier instruction is hit,
5130 * otherwise FALSE.
5131 */
5132 static boolean
5133 exec_instruction(
5134 struct tgsi_exec_machine *mach,
5135 const struct tgsi_full_instruction *inst,
5136 int *pc )
5137 {
5138 union tgsi_exec_channel r[10];
5139
5140 (*pc)++;
5141
5142 switch (inst->Instruction.Opcode) {
5143 case TGSI_OPCODE_ARL:
5144 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5145 break;
5146
5147 case TGSI_OPCODE_MOV:
5148 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5149 break;
5150
5151 case TGSI_OPCODE_LIT:
5152 exec_lit(mach, inst);
5153 break;
5154
5155 case TGSI_OPCODE_RCP:
5156 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5157 break;
5158
5159 case TGSI_OPCODE_RSQ:
5160 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5161 break;
5162
5163 case TGSI_OPCODE_EXP:
5164 exec_exp(mach, inst);
5165 break;
5166
5167 case TGSI_OPCODE_LOG:
5168 exec_log(mach, inst);
5169 break;
5170
5171 case TGSI_OPCODE_MUL:
5172 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5173 break;
5174
5175 case TGSI_OPCODE_ADD:
5176 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5177 break;
5178
5179 case TGSI_OPCODE_DP3:
5180 exec_dp3(mach, inst);
5181 break;
5182
5183 case TGSI_OPCODE_DP4:
5184 exec_dp4(mach, inst);
5185 break;
5186
5187 case TGSI_OPCODE_DST:
5188 exec_dst(mach, inst);
5189 break;
5190
5191 case TGSI_OPCODE_MIN:
5192 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5193 break;
5194
5195 case TGSI_OPCODE_MAX:
5196 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5197 break;
5198
5199 case TGSI_OPCODE_SLT:
5200 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5201 break;
5202
5203 case TGSI_OPCODE_SGE:
5204 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5205 break;
5206
5207 case TGSI_OPCODE_MAD:
5208 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5209 break;
5210
5211 case TGSI_OPCODE_SUB:
5212 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5213 break;
5214
5215 case TGSI_OPCODE_LRP:
5216 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5217 break;
5218
5219 case TGSI_OPCODE_SQRT:
5220 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5221 break;
5222
5223 case TGSI_OPCODE_DP2A:
5224 exec_dp2a(mach, inst);
5225 break;
5226
5227 case TGSI_OPCODE_FRC:
5228 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5229 break;
5230
5231 case TGSI_OPCODE_CLAMP:
5232 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5233 break;
5234
5235 case TGSI_OPCODE_FLR:
5236 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5237 break;
5238
5239 case TGSI_OPCODE_ROUND:
5240 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5241 break;
5242
5243 case TGSI_OPCODE_EX2:
5244 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5245 break;
5246
5247 case TGSI_OPCODE_LG2:
5248 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5249 break;
5250
5251 case TGSI_OPCODE_POW:
5252 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5253 break;
5254
5255 case TGSI_OPCODE_XPD:
5256 exec_xpd(mach, inst);
5257 break;
5258
5259 case TGSI_OPCODE_DPH:
5260 exec_dph(mach, inst);
5261 break;
5262
5263 case TGSI_OPCODE_COS:
5264 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5265 break;
5266
5267 case TGSI_OPCODE_DDX:
5268 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5269 break;
5270
5271 case TGSI_OPCODE_DDY:
5272 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5273 break;
5274
5275 case TGSI_OPCODE_KILL:
5276 exec_kill (mach, inst);
5277 break;
5278
5279 case TGSI_OPCODE_KILL_IF:
5280 exec_kill_if (mach, inst);
5281 break;
5282
5283 case TGSI_OPCODE_PK2H:
5284 exec_pk2h(mach, inst);
5285 break;
5286
5287 case TGSI_OPCODE_PK2US:
5288 assert (0);
5289 break;
5290
5291 case TGSI_OPCODE_PK4B:
5292 assert (0);
5293 break;
5294
5295 case TGSI_OPCODE_PK4UB:
5296 assert (0);
5297 break;
5298
5299 case TGSI_OPCODE_SEQ:
5300 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5301 break;
5302
5303 case TGSI_OPCODE_SGT:
5304 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5305 break;
5306
5307 case TGSI_OPCODE_SIN:
5308 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5309 break;
5310
5311 case TGSI_OPCODE_SLE:
5312 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5313 break;
5314
5315 case TGSI_OPCODE_SNE:
5316 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5317 break;
5318
5319 case TGSI_OPCODE_TEX:
5320 /* simple texture lookup */
5321 /* src[0] = texcoord */
5322 /* src[1] = sampler unit */
5323 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5324 break;
5325
5326 case TGSI_OPCODE_TXB:
5327 /* Texture lookup with lod bias */
5328 /* src[0] = texcoord (src[0].w = LOD bias) */
5329 /* src[1] = sampler unit */
5330 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5331 break;
5332
5333 case TGSI_OPCODE_TXD:
5334 /* Texture lookup with explict partial derivatives */
5335 /* src[0] = texcoord */
5336 /* src[1] = d[strq]/dx */
5337 /* src[2] = d[strq]/dy */
5338 /* src[3] = sampler unit */
5339 exec_txd(mach, inst);
5340 break;
5341
5342 case TGSI_OPCODE_TXL:
5343 /* Texture lookup with explit LOD */
5344 /* src[0] = texcoord (src[0].w = LOD) */
5345 /* src[1] = sampler unit */
5346 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5347 break;
5348
5349 case TGSI_OPCODE_TXP:
5350 /* Texture lookup with projection */
5351 /* src[0] = texcoord (src[0].w = projection) */
5352 /* src[1] = sampler unit */
5353 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5354 break;
5355
5356 case TGSI_OPCODE_TG4:
5357 /* src[0] = texcoord */
5358 /* src[1] = component */
5359 /* src[2] = sampler unit */
5360 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5361 break;
5362
5363 case TGSI_OPCODE_LODQ:
5364 /* src[0] = texcoord */
5365 /* src[1] = sampler unit */
5366 exec_lodq(mach, inst);
5367 break;
5368
5369 case TGSI_OPCODE_UP2H:
5370 exec_up2h(mach, inst);
5371 break;
5372
5373 case TGSI_OPCODE_UP2US:
5374 assert (0);
5375 break;
5376
5377 case TGSI_OPCODE_UP4B:
5378 assert (0);
5379 break;
5380
5381 case TGSI_OPCODE_UP4UB:
5382 assert (0);
5383 break;
5384
5385 case TGSI_OPCODE_ARR:
5386 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5387 break;
5388
5389 case TGSI_OPCODE_CAL:
5390 /* skip the call if no execution channels are enabled */
5391 if (mach->ExecMask) {
5392 /* do the call */
5393
5394 /* First, record the depths of the execution stacks.
5395 * This is important for deeply nested/looped return statements.
5396 * We have to unwind the stacks by the correct amount. For a
5397 * real code generator, we could determine the number of entries
5398 * to pop off each stack with simple static analysis and avoid
5399 * implementing this data structure at run time.
5400 */
5401 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5402 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5403 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5404 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5405 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5406 /* note that PC was already incremented above */
5407 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5408
5409 mach->CallStackTop++;
5410
5411 /* Second, push the Cond, Loop, Cont, Func stacks */
5412 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5413 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5414 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5415 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5416 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5417 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5418
5419 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5420 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5421 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5422 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5423 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5424 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5425
5426 /* Finally, jump to the subroutine. The label is a pointer
5427 * (an instruction number) to the BGNSUB instruction.
5428 */
5429 *pc = inst->Label.Label;
5430 assert(mach->Instructions[*pc].Instruction.Opcode
5431 == TGSI_OPCODE_BGNSUB);
5432 }
5433 break;
5434
5435 case TGSI_OPCODE_RET:
5436 mach->FuncMask &= ~mach->ExecMask;
5437 UPDATE_EXEC_MASK(mach);
5438
5439 if (mach->FuncMask == 0x0) {
5440 /* really return now (otherwise, keep executing */
5441
5442 if (mach->CallStackTop == 0) {
5443 /* returning from main() */
5444 mach->CondStackTop = 0;
5445 mach->LoopStackTop = 0;
5446 mach->ContStackTop = 0;
5447 mach->LoopLabelStackTop = 0;
5448 mach->SwitchStackTop = 0;
5449 mach->BreakStackTop = 0;
5450 *pc = -1;
5451 return FALSE;
5452 }
5453
5454 assert(mach->CallStackTop > 0);
5455 mach->CallStackTop--;
5456
5457 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5458 mach->CondMask = mach->CondStack[mach->CondStackTop];
5459
5460 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5461 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5462
5463 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5464 mach->ContMask = mach->ContStack[mach->ContStackTop];
5465
5466 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5467 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5468
5469 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5470 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5471
5472 assert(mach->FuncStackTop > 0);
5473 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5474
5475 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5476
5477 UPDATE_EXEC_MASK(mach);
5478 }
5479 break;
5480
5481 case TGSI_OPCODE_SSG:
5482 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5483 break;
5484
5485 case TGSI_OPCODE_CMP:
5486 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5487 break;
5488
5489 case TGSI_OPCODE_SCS:
5490 exec_scs(mach, inst);
5491 break;
5492
5493 case TGSI_OPCODE_DIV:
5494 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5495 break;
5496
5497 case TGSI_OPCODE_DP2:
5498 exec_dp2(mach, inst);
5499 break;
5500
5501 case TGSI_OPCODE_IF:
5502 /* push CondMask */
5503 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5504 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5505 FETCH( &r[0], 0, TGSI_CHAN_X );
5506 /* update CondMask */
5507 if( ! r[0].f[0] ) {
5508 mach->CondMask &= ~0x1;
5509 }
5510 if( ! r[0].f[1] ) {
5511 mach->CondMask &= ~0x2;
5512 }
5513 if( ! r[0].f[2] ) {
5514 mach->CondMask &= ~0x4;
5515 }
5516 if( ! r[0].f[3] ) {
5517 mach->CondMask &= ~0x8;
5518 }
5519 UPDATE_EXEC_MASK(mach);
5520 /* Todo: If CondMask==0, jump to ELSE */
5521 break;
5522
5523 case TGSI_OPCODE_UIF:
5524 /* push CondMask */
5525 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5526 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5527 IFETCH( &r[0], 0, TGSI_CHAN_X );
5528 /* update CondMask */
5529 if( ! r[0].u[0] ) {
5530 mach->CondMask &= ~0x1;
5531 }
5532 if( ! r[0].u[1] ) {
5533 mach->CondMask &= ~0x2;
5534 }
5535 if( ! r[0].u[2] ) {
5536 mach->CondMask &= ~0x4;
5537 }
5538 if( ! r[0].u[3] ) {
5539 mach->CondMask &= ~0x8;
5540 }
5541 UPDATE_EXEC_MASK(mach);
5542 /* Todo: If CondMask==0, jump to ELSE */
5543 break;
5544
5545 case TGSI_OPCODE_ELSE:
5546 /* invert CondMask wrt previous mask */
5547 {
5548 uint prevMask;
5549 assert(mach->CondStackTop > 0);
5550 prevMask = mach->CondStack[mach->CondStackTop - 1];
5551 mach->CondMask = ~mach->CondMask & prevMask;
5552 UPDATE_EXEC_MASK(mach);
5553 /* Todo: If CondMask==0, jump to ENDIF */
5554 }
5555 break;
5556
5557 case TGSI_OPCODE_ENDIF:
5558 /* pop CondMask */
5559 assert(mach->CondStackTop > 0);
5560 mach->CondMask = mach->CondStack[--mach->CondStackTop];
5561 UPDATE_EXEC_MASK(mach);
5562 break;
5563
5564 case TGSI_OPCODE_END:
5565 /* make sure we end primitives which haven't
5566 * been explicitly emitted */
5567 conditional_emit_primitive(mach);
5568 /* halt execution */
5569 *pc = -1;
5570 break;
5571
5572 case TGSI_OPCODE_PUSHA:
5573 assert (0);
5574 break;
5575
5576 case TGSI_OPCODE_POPA:
5577 assert (0);
5578 break;
5579
5580 case TGSI_OPCODE_CEIL:
5581 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5582 break;
5583
5584 case TGSI_OPCODE_I2F:
5585 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5586 break;
5587
5588 case TGSI_OPCODE_NOT:
5589 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5590 break;
5591
5592 case TGSI_OPCODE_TRUNC:
5593 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5594 break;
5595
5596 case TGSI_OPCODE_SHL:
5597 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5598 break;
5599
5600 case TGSI_OPCODE_AND:
5601 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5602 break;
5603
5604 case TGSI_OPCODE_OR:
5605 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5606 break;
5607
5608 case TGSI_OPCODE_MOD:
5609 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5610 break;
5611
5612 case TGSI_OPCODE_XOR:
5613 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5614 break;
5615
5616 case TGSI_OPCODE_SAD:
5617 assert (0);
5618 break;
5619
5620 case TGSI_OPCODE_TXF:
5621 exec_txf(mach, inst);
5622 break;
5623
5624 case TGSI_OPCODE_TXQ:
5625 exec_txq(mach, inst);
5626 break;
5627
5628 case TGSI_OPCODE_EMIT:
5629 emit_vertex(mach);
5630 break;
5631
5632 case TGSI_OPCODE_ENDPRIM:
5633 emit_primitive(mach);
5634 break;
5635
5636 case TGSI_OPCODE_BGNLOOP:
5637 /* push LoopMask and ContMasks */
5638 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5639 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5640 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5641 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5642
5643 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5644 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5645 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5646 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5647 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5648 break;
5649
5650 case TGSI_OPCODE_ENDLOOP:
5651 /* Restore ContMask, but don't pop */
5652 assert(mach->ContStackTop > 0);
5653 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5654 UPDATE_EXEC_MASK(mach);
5655 if (mach->ExecMask) {
5656 /* repeat loop: jump to instruction just past BGNLOOP */
5657 assert(mach->LoopLabelStackTop > 0);
5658 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5659 }
5660 else {
5661 /* exit loop: pop LoopMask */
5662 assert(mach->LoopStackTop > 0);
5663 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5664 /* pop ContMask */
5665 assert(mach->ContStackTop > 0);
5666 mach->ContMask = mach->ContStack[--mach->ContStackTop];
5667 assert(mach->LoopLabelStackTop > 0);
5668 --mach->LoopLabelStackTop;
5669
5670 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5671 }
5672 UPDATE_EXEC_MASK(mach);
5673 break;
5674
5675 case TGSI_OPCODE_BRK:
5676 exec_break(mach);
5677 break;
5678
5679 case TGSI_OPCODE_CONT:
5680 /* turn off cont channels for each enabled exec channel */
5681 mach->ContMask &= ~mach->ExecMask;
5682 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5683 UPDATE_EXEC_MASK(mach);
5684 break;
5685
5686 case TGSI_OPCODE_BGNSUB:
5687 /* no-op */
5688 break;
5689
5690 case TGSI_OPCODE_ENDSUB:
5691 /*
5692 * XXX: This really should be a no-op. We should never reach this opcode.
5693 */
5694
5695 assert(mach->CallStackTop > 0);
5696 mach->CallStackTop--;
5697
5698 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5699 mach->CondMask = mach->CondStack[mach->CondStackTop];
5700
5701 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5702 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5703
5704 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5705 mach->ContMask = mach->ContStack[mach->ContStackTop];
5706
5707 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5708 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5709
5710 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5711 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5712
5713 assert(mach->FuncStackTop > 0);
5714 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5715
5716 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5717
5718 UPDATE_EXEC_MASK(mach);
5719 break;
5720
5721 case TGSI_OPCODE_NOP:
5722 break;
5723
5724 case TGSI_OPCODE_BREAKC:
5725 IFETCH(&r[0], 0, TGSI_CHAN_X);
5726 /* update CondMask */
5727 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
5728 mach->LoopMask &= ~0x1;
5729 }
5730 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
5731 mach->LoopMask &= ~0x2;
5732 }
5733 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
5734 mach->LoopMask &= ~0x4;
5735 }
5736 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
5737 mach->LoopMask &= ~0x8;
5738 }
5739 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5740 UPDATE_EXEC_MASK(mach);
5741 break;
5742
5743 case TGSI_OPCODE_F2I:
5744 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5745 break;
5746
5747 case TGSI_OPCODE_FSEQ:
5748 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5749 break;
5750
5751 case TGSI_OPCODE_FSGE:
5752 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5753 break;
5754
5755 case TGSI_OPCODE_FSLT:
5756 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5757 break;
5758
5759 case TGSI_OPCODE_FSNE:
5760 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5761 break;
5762
5763 case TGSI_OPCODE_IDIV:
5764 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5765 break;
5766
5767 case TGSI_OPCODE_IMAX:
5768 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5769 break;
5770
5771 case TGSI_OPCODE_IMIN:
5772 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5773 break;
5774
5775 case TGSI_OPCODE_INEG:
5776 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5777 break;
5778
5779 case TGSI_OPCODE_ISGE:
5780 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5781 break;
5782
5783 case TGSI_OPCODE_ISHR:
5784 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5785 break;
5786
5787 case TGSI_OPCODE_ISLT:
5788 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5789 break;
5790
5791 case TGSI_OPCODE_F2U:
5792 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5793 break;
5794
5795 case TGSI_OPCODE_U2F:
5796 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5797 break;
5798
5799 case TGSI_OPCODE_UADD:
5800 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5801 break;
5802
5803 case TGSI_OPCODE_UDIV:
5804 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5805 break;
5806
5807 case TGSI_OPCODE_UMAD:
5808 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5809 break;
5810
5811 case TGSI_OPCODE_UMAX:
5812 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5813 break;
5814
5815 case TGSI_OPCODE_UMIN:
5816 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5817 break;
5818
5819 case TGSI_OPCODE_UMOD:
5820 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5821 break;
5822
5823 case TGSI_OPCODE_UMUL:
5824 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5825 break;
5826
5827 case TGSI_OPCODE_IMUL_HI:
5828 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5829 break;
5830
5831 case TGSI_OPCODE_UMUL_HI:
5832 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5833 break;
5834
5835 case TGSI_OPCODE_USEQ:
5836 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5837 break;
5838
5839 case TGSI_OPCODE_USGE:
5840 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5841 break;
5842
5843 case TGSI_OPCODE_USHR:
5844 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5845 break;
5846
5847 case TGSI_OPCODE_USLT:
5848 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5849 break;
5850
5851 case TGSI_OPCODE_USNE:
5852 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5853 break;
5854
5855 case TGSI_OPCODE_SWITCH:
5856 exec_switch(mach, inst);
5857 break;
5858
5859 case TGSI_OPCODE_CASE:
5860 exec_case(mach, inst);
5861 break;
5862
5863 case TGSI_OPCODE_DEFAULT:
5864 exec_default(mach);
5865 break;
5866
5867 case TGSI_OPCODE_ENDSWITCH:
5868 exec_endswitch(mach);
5869 break;
5870
5871 case TGSI_OPCODE_SAMPLE_I:
5872 exec_txf(mach, inst);
5873 break;
5874
5875 case TGSI_OPCODE_SAMPLE_I_MS:
5876 exec_txf(mach, inst);
5877 break;
5878
5879 case TGSI_OPCODE_SAMPLE:
5880 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5881 break;
5882
5883 case TGSI_OPCODE_SAMPLE_B:
5884 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5885 break;
5886
5887 case TGSI_OPCODE_SAMPLE_C:
5888 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5889 break;
5890
5891 case TGSI_OPCODE_SAMPLE_C_LZ:
5892 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5893 break;
5894
5895 case TGSI_OPCODE_SAMPLE_D:
5896 exec_sample_d(mach, inst);
5897 break;
5898
5899 case TGSI_OPCODE_SAMPLE_L:
5900 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5901 break;
5902
5903 case TGSI_OPCODE_GATHER4:
5904 assert(0);
5905 break;
5906
5907 case TGSI_OPCODE_SVIEWINFO:
5908 exec_txq(mach, inst);
5909 break;
5910
5911 case TGSI_OPCODE_SAMPLE_POS:
5912 assert(0);
5913 break;
5914
5915 case TGSI_OPCODE_SAMPLE_INFO:
5916 assert(0);
5917 break;
5918
5919 case TGSI_OPCODE_UARL:
5920 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5921 break;
5922
5923 case TGSI_OPCODE_UCMP:
5924 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5925 break;
5926
5927 case TGSI_OPCODE_IABS:
5928 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5929 break;
5930
5931 case TGSI_OPCODE_ISSG:
5932 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5933 break;
5934
5935 case TGSI_OPCODE_TEX2:
5936 /* simple texture lookup */
5937 /* src[0] = texcoord */
5938 /* src[1] = compare */
5939 /* src[2] = sampler unit */
5940 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5941 break;
5942 case TGSI_OPCODE_TXB2:
5943 /* simple texture lookup */
5944 /* src[0] = texcoord */
5945 /* src[1] = bias */
5946 /* src[2] = sampler unit */
5947 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5948 break;
5949 case TGSI_OPCODE_TXL2:
5950 /* simple texture lookup */
5951 /* src[0] = texcoord */
5952 /* src[1] = lod */
5953 /* src[2] = sampler unit */
5954 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5955 break;
5956
5957 case TGSI_OPCODE_IBFE:
5958 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5959 break;
5960 case TGSI_OPCODE_UBFE:
5961 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5962 break;
5963 case TGSI_OPCODE_BFI:
5964 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5965 break;
5966 case TGSI_OPCODE_BREV:
5967 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5968 break;
5969 case TGSI_OPCODE_POPC:
5970 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5971 break;
5972 case TGSI_OPCODE_LSB:
5973 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5974 break;
5975 case TGSI_OPCODE_IMSB:
5976 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5977 break;
5978 case TGSI_OPCODE_UMSB:
5979 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5980 break;
5981
5982 case TGSI_OPCODE_F2D:
5983 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5984 break;
5985
5986 case TGSI_OPCODE_D2F:
5987 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
5988 break;
5989
5990 case TGSI_OPCODE_DABS:
5991 exec_double_unary(mach, inst, micro_dabs);
5992 break;
5993
5994 case TGSI_OPCODE_DNEG:
5995 exec_double_unary(mach, inst, micro_dneg);
5996 break;
5997
5998 case TGSI_OPCODE_DADD:
5999 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6000 break;
6001
6002 case TGSI_OPCODE_DMUL:
6003 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6004 break;
6005
6006 case TGSI_OPCODE_DMAX:
6007 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6008 break;
6009
6010 case TGSI_OPCODE_DMIN:
6011 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6012 break;
6013
6014 case TGSI_OPCODE_DSLT:
6015 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6016 break;
6017
6018 case TGSI_OPCODE_DSGE:
6019 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6020 break;
6021
6022 case TGSI_OPCODE_DSEQ:
6023 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6024 break;
6025
6026 case TGSI_OPCODE_DSNE:
6027 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6028 break;
6029
6030 case TGSI_OPCODE_DRCP:
6031 exec_double_unary(mach, inst, micro_drcp);
6032 break;
6033
6034 case TGSI_OPCODE_DSQRT:
6035 exec_double_unary(mach, inst, micro_dsqrt);
6036 break;
6037
6038 case TGSI_OPCODE_DRSQ:
6039 exec_double_unary(mach, inst, micro_drsq);
6040 break;
6041
6042 case TGSI_OPCODE_DMAD:
6043 exec_double_trinary(mach, inst, micro_dmad);
6044 break;
6045
6046 case TGSI_OPCODE_DFRAC:
6047 exec_double_unary(mach, inst, micro_dfrac);
6048 break;
6049
6050 case TGSI_OPCODE_DLDEXP:
6051 exec_dldexp(mach, inst);
6052 break;
6053
6054 case TGSI_OPCODE_DFRACEXP:
6055 exec_dfracexp(mach, inst);
6056 break;
6057
6058 case TGSI_OPCODE_I2D:
6059 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6060 break;
6061
6062 case TGSI_OPCODE_D2I:
6063 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6064 break;
6065
6066 case TGSI_OPCODE_U2D:
6067 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6068 break;
6069
6070 case TGSI_OPCODE_D2U:
6071 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6072 break;
6073
6074 case TGSI_OPCODE_LOAD:
6075 exec_load(mach, inst);
6076 break;
6077
6078 case TGSI_OPCODE_STORE:
6079 exec_store(mach, inst);
6080 break;
6081
6082 case TGSI_OPCODE_ATOMUADD:
6083 case TGSI_OPCODE_ATOMXCHG:
6084 case TGSI_OPCODE_ATOMCAS:
6085 case TGSI_OPCODE_ATOMAND:
6086 case TGSI_OPCODE_ATOMOR:
6087 case TGSI_OPCODE_ATOMXOR:
6088 case TGSI_OPCODE_ATOMUMIN:
6089 case TGSI_OPCODE_ATOMUMAX:
6090 case TGSI_OPCODE_ATOMIMIN:
6091 case TGSI_OPCODE_ATOMIMAX:
6092 exec_atomop(mach, inst);
6093 break;
6094
6095 case TGSI_OPCODE_RESQ:
6096 exec_resq(mach, inst);
6097 break;
6098 case TGSI_OPCODE_BARRIER:
6099 case TGSI_OPCODE_MEMBAR:
6100 return TRUE;
6101 break;
6102
6103 case TGSI_OPCODE_I64ABS:
6104 exec_double_unary(mach, inst, micro_i64abs);
6105 break;
6106
6107 case TGSI_OPCODE_I64SSG:
6108 exec_double_unary(mach, inst, micro_i64sgn);
6109 break;
6110
6111 case TGSI_OPCODE_I64NEG:
6112 exec_double_unary(mach, inst, micro_i64neg);
6113 break;
6114
6115 case TGSI_OPCODE_U64SEQ:
6116 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6117 break;
6118
6119 case TGSI_OPCODE_U64SNE:
6120 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6121 break;
6122
6123 case TGSI_OPCODE_I64SLT:
6124 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6125 break;
6126 case TGSI_OPCODE_U64SLT:
6127 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6128 break;
6129
6130 case TGSI_OPCODE_I64SGE:
6131 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6132 break;
6133 case TGSI_OPCODE_U64SGE:
6134 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6135 break;
6136
6137 case TGSI_OPCODE_I64MIN:
6138 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6139 break;
6140 case TGSI_OPCODE_U64MIN:
6141 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6142 break;
6143 case TGSI_OPCODE_I64MAX:
6144 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6145 break;
6146 case TGSI_OPCODE_U64MAX:
6147 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6148 break;
6149 case TGSI_OPCODE_U64ADD:
6150 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6151 break;
6152 case TGSI_OPCODE_U64MUL:
6153 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6154 break;
6155 case TGSI_OPCODE_U64SHL:
6156 exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6157 break;
6158 case TGSI_OPCODE_I64SHR:
6159 exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6160 break;
6161 case TGSI_OPCODE_U64SHR:
6162 exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6163 break;
6164 case TGSI_OPCODE_U64DIV:
6165 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6166 break;
6167 case TGSI_OPCODE_I64DIV:
6168 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6169 break;
6170 case TGSI_OPCODE_U64MOD:
6171 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6172 break;
6173 case TGSI_OPCODE_I64MOD:
6174 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6175 break;
6176
6177 case TGSI_OPCODE_F2U64:
6178 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6179 break;
6180
6181 case TGSI_OPCODE_F2I64:
6182 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6183 break;
6184
6185 case TGSI_OPCODE_U2I64:
6186 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6187 break;
6188 case TGSI_OPCODE_I2I64:
6189 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6190 break;
6191
6192 case TGSI_OPCODE_D2U64:
6193 exec_double_unary(mach, inst, micro_d2u64);
6194 break;
6195
6196 case TGSI_OPCODE_D2I64:
6197 exec_double_unary(mach, inst, micro_d2i64);
6198 break;
6199
6200 case TGSI_OPCODE_U642F:
6201 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6202 break;
6203 case TGSI_OPCODE_I642F:
6204 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6205 break;
6206
6207 case TGSI_OPCODE_U642D:
6208 exec_double_unary(mach, inst, micro_u642d);
6209 break;
6210 case TGSI_OPCODE_I642D:
6211 exec_double_unary(mach, inst, micro_i642d);
6212 break;
6213
6214 default:
6215 assert( 0 );
6216 }
6217 return FALSE;
6218 }
6219
6220 static void
6221 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6222 {
6223 uint default_mask = 0xf;
6224
6225 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6226 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6227
6228 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6229 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
6230 mach->Primitives[0] = 0;
6231 /* GS runs on a single primitive for now */
6232 default_mask = 0x1;
6233 }
6234
6235 if (mach->NonHelperMask == 0)
6236 mach->NonHelperMask = default_mask;
6237 mach->CondMask = default_mask;
6238 mach->LoopMask = default_mask;
6239 mach->ContMask = default_mask;
6240 mach->FuncMask = default_mask;
6241 mach->ExecMask = default_mask;
6242
6243 mach->Switch.mask = default_mask;
6244
6245 assert(mach->CondStackTop == 0);
6246 assert(mach->LoopStackTop == 0);
6247 assert(mach->ContStackTop == 0);
6248 assert(mach->SwitchStackTop == 0);
6249 assert(mach->BreakStackTop == 0);
6250 assert(mach->CallStackTop == 0);
6251 }
6252
6253 /**
6254 * Run TGSI interpreter.
6255 * \return bitmask of "alive" quad components
6256 */
6257 uint
6258 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6259 {
6260 uint i;
6261
6262 mach->pc = start_pc;
6263
6264 if (!start_pc) {
6265 tgsi_exec_machine_setup_masks(mach);
6266
6267 /* execute declarations (interpolants) */
6268 for (i = 0; i < mach->NumDeclarations; i++) {
6269 exec_declaration( mach, mach->Declarations+i );
6270 }
6271 }
6272
6273 {
6274 #if DEBUG_EXECUTION
6275 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6276 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6277 uint inst = 1;
6278
6279 if (!start_pc) {
6280 memset(mach->Temps, 0, sizeof(temps));
6281 if (mach->Outputs)
6282 memset(mach->Outputs, 0, sizeof(outputs));
6283 memset(temps, 0, sizeof(temps));
6284 memset(outputs, 0, sizeof(outputs));
6285 }
6286 #endif
6287
6288 /* execute instructions, until pc is set to -1 */
6289 while (mach->pc != -1) {
6290 boolean barrier_hit;
6291 #if DEBUG_EXECUTION
6292 uint i;
6293
6294 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6295 #endif
6296
6297 assert(mach->pc < (int) mach->NumInstructions);
6298 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6299
6300 /* for compute shaders if we hit a barrier return now for later rescheduling */
6301 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6302 return 0;
6303
6304 #if DEBUG_EXECUTION
6305 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6306 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6307 uint j;
6308
6309 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6310 debug_printf("TEMP[%2u] = ", i);
6311 for (j = 0; j < 4; j++) {
6312 if (j > 0) {
6313 debug_printf(" ");
6314 }
6315 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6316 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6317 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6318 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6319 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6320 }
6321 }
6322 }
6323 if (mach->Outputs) {
6324 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6325 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6326 uint j;
6327
6328 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6329 debug_printf("OUT[%2u] = ", i);
6330 for (j = 0; j < 4; j++) {
6331 if (j > 0) {
6332 debug_printf(" ");
6333 }
6334 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6335 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6336 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6337 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6338 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6339 }
6340 }
6341 }
6342 }
6343 #endif
6344 }
6345 }
6346
6347 #if 0
6348 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6349 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6350 /*
6351 * Scale back depth component.
6352 */
6353 for (i = 0; i < 4; i++)
6354 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6355 }
6356 #endif
6357
6358 /* Strictly speaking, these assertions aren't really needed but they
6359 * can potentially catch some bugs in the control flow code.
6360 */
6361 assert(mach->CondStackTop == 0);
6362 assert(mach->LoopStackTop == 0);
6363 assert(mach->ContStackTop == 0);
6364 assert(mach->SwitchStackTop == 0);
6365 assert(mach->BreakStackTop == 0);
6366 assert(mach->CallStackTop == 0);
6367
6368 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6369 }