tgsi: Remove unused local
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65
66
67 #define DEBUG_EXECUTION 0
68
69
70 #define FAST_MATH 0
71
72 #define TILE_TOP_LEFT 0
73 #define TILE_TOP_RIGHT 1
74 #define TILE_BOTTOM_LEFT 2
75 #define TILE_BOTTOM_RIGHT 3
76
77 union tgsi_double_channel {
78 double d[TGSI_QUAD_SIZE];
79 unsigned u[TGSI_QUAD_SIZE][2];
80 uint64_t u64[TGSI_QUAD_SIZE];
81 int64_t i64[TGSI_QUAD_SIZE];
82 };
83
84 struct tgsi_double_vector {
85 union tgsi_double_channel xy;
86 union tgsi_double_channel zw;
87 };
88
89 static void
90 micro_abs(union tgsi_exec_channel *dst,
91 const union tgsi_exec_channel *src)
92 {
93 dst->f[0] = fabsf(src->f[0]);
94 dst->f[1] = fabsf(src->f[1]);
95 dst->f[2] = fabsf(src->f[2]);
96 dst->f[3] = fabsf(src->f[3]);
97 }
98
99 static void
100 micro_arl(union tgsi_exec_channel *dst,
101 const union tgsi_exec_channel *src)
102 {
103 dst->i[0] = (int)floorf(src->f[0]);
104 dst->i[1] = (int)floorf(src->f[1]);
105 dst->i[2] = (int)floorf(src->f[2]);
106 dst->i[3] = (int)floorf(src->f[3]);
107 }
108
109 static void
110 micro_arr(union tgsi_exec_channel *dst,
111 const union tgsi_exec_channel *src)
112 {
113 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118
119 static void
120 micro_ceil(union tgsi_exec_channel *dst,
121 const union tgsi_exec_channel *src)
122 {
123 dst->f[0] = ceilf(src->f[0]);
124 dst->f[1] = ceilf(src->f[1]);
125 dst->f[2] = ceilf(src->f[2]);
126 dst->f[3] = ceilf(src->f[3]);
127 }
128
129 static void
130 micro_cmp(union tgsi_exec_channel *dst,
131 const union tgsi_exec_channel *src0,
132 const union tgsi_exec_channel *src1,
133 const union tgsi_exec_channel *src2)
134 {
135 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
136 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
137 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
138 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
139 }
140
141 static void
142 micro_cos(union tgsi_exec_channel *dst,
143 const union tgsi_exec_channel *src)
144 {
145 dst->f[0] = cosf(src->f[0]);
146 dst->f[1] = cosf(src->f[1]);
147 dst->f[2] = cosf(src->f[2]);
148 dst->f[3] = cosf(src->f[3]);
149 }
150
151 static void
152 micro_d2f(union tgsi_exec_channel *dst,
153 const union tgsi_double_channel *src)
154 {
155 dst->f[0] = (float)src->d[0];
156 dst->f[1] = (float)src->d[1];
157 dst->f[2] = (float)src->d[2];
158 dst->f[3] = (float)src->d[3];
159 }
160
161 static void
162 micro_d2i(union tgsi_exec_channel *dst,
163 const union tgsi_double_channel *src)
164 {
165 dst->i[0] = (int)src->d[0];
166 dst->i[1] = (int)src->d[1];
167 dst->i[2] = (int)src->d[2];
168 dst->i[3] = (int)src->d[3];
169 }
170
171 static void
172 micro_d2u(union tgsi_exec_channel *dst,
173 const union tgsi_double_channel *src)
174 {
175 dst->u[0] = (unsigned)src->d[0];
176 dst->u[1] = (unsigned)src->d[1];
177 dst->u[2] = (unsigned)src->d[2];
178 dst->u[3] = (unsigned)src->d[3];
179 }
180 static void
181 micro_dabs(union tgsi_double_channel *dst,
182 const union tgsi_double_channel *src)
183 {
184 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
185 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
186 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
187 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
188 }
189
190 static void
191 micro_dadd(union tgsi_double_channel *dst,
192 const union tgsi_double_channel *src)
193 {
194 dst->d[0] = src[0].d[0] + src[1].d[0];
195 dst->d[1] = src[0].d[1] + src[1].d[1];
196 dst->d[2] = src[0].d[2] + src[1].d[2];
197 dst->d[3] = src[0].d[3] + src[1].d[3];
198 }
199
200 static void
201 micro_ddiv(union tgsi_double_channel *dst,
202 const union tgsi_double_channel *src)
203 {
204 dst->d[0] = src[0].d[0] / src[1].d[0];
205 dst->d[1] = src[0].d[1] / src[1].d[1];
206 dst->d[2] = src[0].d[2] / src[1].d[2];
207 dst->d[3] = src[0].d[3] / src[1].d[3];
208 }
209
210 static void
211 micro_ddx(union tgsi_exec_channel *dst,
212 const union tgsi_exec_channel *src)
213 {
214 dst->f[0] =
215 dst->f[1] =
216 dst->f[2] =
217 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
218 }
219
220 static void
221 micro_ddx_fine(union tgsi_exec_channel *dst,
222 const union tgsi_exec_channel *src)
223 {
224 dst->f[0] =
225 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
226 dst->f[2] =
227 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
228 }
229
230
231 static void
232 micro_ddy(union tgsi_exec_channel *dst,
233 const union tgsi_exec_channel *src)
234 {
235 dst->f[0] =
236 dst->f[1] =
237 dst->f[2] =
238 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
239 }
240
241 static void
242 micro_ddy_fine(union tgsi_exec_channel *dst,
243 const union tgsi_exec_channel *src)
244 {
245 dst->f[0] =
246 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
247 dst->f[1] =
248 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
249 }
250
251 static void
252 micro_dmul(union tgsi_double_channel *dst,
253 const union tgsi_double_channel *src)
254 {
255 dst->d[0] = src[0].d[0] * src[1].d[0];
256 dst->d[1] = src[0].d[1] * src[1].d[1];
257 dst->d[2] = src[0].d[2] * src[1].d[2];
258 dst->d[3] = src[0].d[3] * src[1].d[3];
259 }
260
261 static void
262 micro_dmax(union tgsi_double_channel *dst,
263 const union tgsi_double_channel *src)
264 {
265 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
266 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
267 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
268 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
269 }
270
271 static void
272 micro_dmin(union tgsi_double_channel *dst,
273 const union tgsi_double_channel *src)
274 {
275 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
276 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
277 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
278 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
279 }
280
281 static void
282 micro_dneg(union tgsi_double_channel *dst,
283 const union tgsi_double_channel *src)
284 {
285 dst->d[0] = -src->d[0];
286 dst->d[1] = -src->d[1];
287 dst->d[2] = -src->d[2];
288 dst->d[3] = -src->d[3];
289 }
290
291 static void
292 micro_dslt(union tgsi_double_channel *dst,
293 const union tgsi_double_channel *src)
294 {
295 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
296 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
297 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
298 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
299 }
300
301 static void
302 micro_dsne(union tgsi_double_channel *dst,
303 const union tgsi_double_channel *src)
304 {
305 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
306 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
307 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
308 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
309 }
310
311 static void
312 micro_dsge(union tgsi_double_channel *dst,
313 const union tgsi_double_channel *src)
314 {
315 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
316 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
317 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
318 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
319 }
320
321 static void
322 micro_dseq(union tgsi_double_channel *dst,
323 const union tgsi_double_channel *src)
324 {
325 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
326 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
327 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
328 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
329 }
330
331 static void
332 micro_drcp(union tgsi_double_channel *dst,
333 const union tgsi_double_channel *src)
334 {
335 dst->d[0] = 1.0 / src->d[0];
336 dst->d[1] = 1.0 / src->d[1];
337 dst->d[2] = 1.0 / src->d[2];
338 dst->d[3] = 1.0 / src->d[3];
339 }
340
341 static void
342 micro_dsqrt(union tgsi_double_channel *dst,
343 const union tgsi_double_channel *src)
344 {
345 dst->d[0] = sqrt(src->d[0]);
346 dst->d[1] = sqrt(src->d[1]);
347 dst->d[2] = sqrt(src->d[2]);
348 dst->d[3] = sqrt(src->d[3]);
349 }
350
351 static void
352 micro_drsq(union tgsi_double_channel *dst,
353 const union tgsi_double_channel *src)
354 {
355 dst->d[0] = 1.0 / sqrt(src->d[0]);
356 dst->d[1] = 1.0 / sqrt(src->d[1]);
357 dst->d[2] = 1.0 / sqrt(src->d[2]);
358 dst->d[3] = 1.0 / sqrt(src->d[3]);
359 }
360
361 static void
362 micro_dmad(union tgsi_double_channel *dst,
363 const union tgsi_double_channel *src)
364 {
365 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
366 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
367 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
368 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
369 }
370
371 static void
372 micro_dfrac(union tgsi_double_channel *dst,
373 const union tgsi_double_channel *src)
374 {
375 dst->d[0] = src->d[0] - floor(src->d[0]);
376 dst->d[1] = src->d[1] - floor(src->d[1]);
377 dst->d[2] = src->d[2] - floor(src->d[2]);
378 dst->d[3] = src->d[3] - floor(src->d[3]);
379 }
380
381 static void
382 micro_dldexp(union tgsi_double_channel *dst,
383 const union tgsi_double_channel *src0,
384 union tgsi_exec_channel *src1)
385 {
386 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
387 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
388 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
389 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
390 }
391
392 static void
393 micro_dfracexp(union tgsi_double_channel *dst,
394 union tgsi_exec_channel *dst_exp,
395 const union tgsi_double_channel *src)
396 {
397 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
398 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
399 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
400 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
401 }
402
403 static void
404 micro_exp2(union tgsi_exec_channel *dst,
405 const union tgsi_exec_channel *src)
406 {
407 #if FAST_MATH
408 dst->f[0] = util_fast_exp2(src->f[0]);
409 dst->f[1] = util_fast_exp2(src->f[1]);
410 dst->f[2] = util_fast_exp2(src->f[2]);
411 dst->f[3] = util_fast_exp2(src->f[3]);
412 #else
413 #if DEBUG
414 /* Inf is okay for this instruction, so clamp it to silence assertions. */
415 uint i;
416 union tgsi_exec_channel clamped;
417
418 for (i = 0; i < 4; i++) {
419 if (src->f[i] > 127.99999f) {
420 clamped.f[i] = 127.99999f;
421 } else if (src->f[i] < -126.99999f) {
422 clamped.f[i] = -126.99999f;
423 } else {
424 clamped.f[i] = src->f[i];
425 }
426 }
427 src = &clamped;
428 #endif /* DEBUG */
429
430 dst->f[0] = powf(2.0f, src->f[0]);
431 dst->f[1] = powf(2.0f, src->f[1]);
432 dst->f[2] = powf(2.0f, src->f[2]);
433 dst->f[3] = powf(2.0f, src->f[3]);
434 #endif /* FAST_MATH */
435 }
436
437 static void
438 micro_f2d(union tgsi_double_channel *dst,
439 const union tgsi_exec_channel *src)
440 {
441 dst->d[0] = (double)src->f[0];
442 dst->d[1] = (double)src->f[1];
443 dst->d[2] = (double)src->f[2];
444 dst->d[3] = (double)src->f[3];
445 }
446
447 static void
448 micro_flr(union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src)
450 {
451 dst->f[0] = floorf(src->f[0]);
452 dst->f[1] = floorf(src->f[1]);
453 dst->f[2] = floorf(src->f[2]);
454 dst->f[3] = floorf(src->f[3]);
455 }
456
457 static void
458 micro_frc(union tgsi_exec_channel *dst,
459 const union tgsi_exec_channel *src)
460 {
461 dst->f[0] = src->f[0] - floorf(src->f[0]);
462 dst->f[1] = src->f[1] - floorf(src->f[1]);
463 dst->f[2] = src->f[2] - floorf(src->f[2]);
464 dst->f[3] = src->f[3] - floorf(src->f[3]);
465 }
466
467 static void
468 micro_i2d(union tgsi_double_channel *dst,
469 const union tgsi_exec_channel *src)
470 {
471 dst->d[0] = (double)src->i[0];
472 dst->d[1] = (double)src->i[1];
473 dst->d[2] = (double)src->i[2];
474 dst->d[3] = (double)src->i[3];
475 }
476
477 static void
478 micro_iabs(union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src)
480 {
481 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
482 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
483 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
484 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
485 }
486
487 static void
488 micro_ineg(union tgsi_exec_channel *dst,
489 const union tgsi_exec_channel *src)
490 {
491 dst->i[0] = -src->i[0];
492 dst->i[1] = -src->i[1];
493 dst->i[2] = -src->i[2];
494 dst->i[3] = -src->i[3];
495 }
496
497 static void
498 micro_lg2(union tgsi_exec_channel *dst,
499 const union tgsi_exec_channel *src)
500 {
501 #if FAST_MATH
502 dst->f[0] = util_fast_log2(src->f[0]);
503 dst->f[1] = util_fast_log2(src->f[1]);
504 dst->f[2] = util_fast_log2(src->f[2]);
505 dst->f[3] = util_fast_log2(src->f[3]);
506 #else
507 dst->f[0] = logf(src->f[0]) * 1.442695f;
508 dst->f[1] = logf(src->f[1]) * 1.442695f;
509 dst->f[2] = logf(src->f[2]) * 1.442695f;
510 dst->f[3] = logf(src->f[3]) * 1.442695f;
511 #endif
512 }
513
514 static void
515 micro_lrp(union tgsi_exec_channel *dst,
516 const union tgsi_exec_channel *src0,
517 const union tgsi_exec_channel *src1,
518 const union tgsi_exec_channel *src2)
519 {
520 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
521 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
522 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
523 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
524 }
525
526 static void
527 micro_mad(union tgsi_exec_channel *dst,
528 const union tgsi_exec_channel *src0,
529 const union tgsi_exec_channel *src1,
530 const union tgsi_exec_channel *src2)
531 {
532 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
533 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
534 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
535 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
536 }
537
538 static void
539 micro_mov(union tgsi_exec_channel *dst,
540 const union tgsi_exec_channel *src)
541 {
542 dst->u[0] = src->u[0];
543 dst->u[1] = src->u[1];
544 dst->u[2] = src->u[2];
545 dst->u[3] = src->u[3];
546 }
547
548 static void
549 micro_rcp(union tgsi_exec_channel *dst,
550 const union tgsi_exec_channel *src)
551 {
552 #if 0 /* for debugging */
553 assert(src->f[0] != 0.0f);
554 assert(src->f[1] != 0.0f);
555 assert(src->f[2] != 0.0f);
556 assert(src->f[3] != 0.0f);
557 #endif
558 dst->f[0] = 1.0f / src->f[0];
559 dst->f[1] = 1.0f / src->f[1];
560 dst->f[2] = 1.0f / src->f[2];
561 dst->f[3] = 1.0f / src->f[3];
562 }
563
564 static void
565 micro_rnd(union tgsi_exec_channel *dst,
566 const union tgsi_exec_channel *src)
567 {
568 dst->f[0] = _mesa_roundevenf(src->f[0]);
569 dst->f[1] = _mesa_roundevenf(src->f[1]);
570 dst->f[2] = _mesa_roundevenf(src->f[2]);
571 dst->f[3] = _mesa_roundevenf(src->f[3]);
572 }
573
574 static void
575 micro_rsq(union tgsi_exec_channel *dst,
576 const union tgsi_exec_channel *src)
577 {
578 #if 0 /* for debugging */
579 assert(src->f[0] != 0.0f);
580 assert(src->f[1] != 0.0f);
581 assert(src->f[2] != 0.0f);
582 assert(src->f[3] != 0.0f);
583 #endif
584 dst->f[0] = 1.0f / sqrtf(src->f[0]);
585 dst->f[1] = 1.0f / sqrtf(src->f[1]);
586 dst->f[2] = 1.0f / sqrtf(src->f[2]);
587 dst->f[3] = 1.0f / sqrtf(src->f[3]);
588 }
589
590 static void
591 micro_sqrt(union tgsi_exec_channel *dst,
592 const union tgsi_exec_channel *src)
593 {
594 dst->f[0] = sqrtf(src->f[0]);
595 dst->f[1] = sqrtf(src->f[1]);
596 dst->f[2] = sqrtf(src->f[2]);
597 dst->f[3] = sqrtf(src->f[3]);
598 }
599
600 static void
601 micro_seq(union tgsi_exec_channel *dst,
602 const union tgsi_exec_channel *src0,
603 const union tgsi_exec_channel *src1)
604 {
605 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
606 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
607 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
608 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
609 }
610
611 static void
612 micro_sge(union tgsi_exec_channel *dst,
613 const union tgsi_exec_channel *src0,
614 const union tgsi_exec_channel *src1)
615 {
616 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
617 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
618 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
619 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
620 }
621
622 static void
623 micro_sgn(union tgsi_exec_channel *dst,
624 const union tgsi_exec_channel *src)
625 {
626 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
627 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
628 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
629 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
630 }
631
632 static void
633 micro_isgn(union tgsi_exec_channel *dst,
634 const union tgsi_exec_channel *src)
635 {
636 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
637 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
638 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
639 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
640 }
641
642 static void
643 micro_sgt(union tgsi_exec_channel *dst,
644 const union tgsi_exec_channel *src0,
645 const union tgsi_exec_channel *src1)
646 {
647 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
648 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
649 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
650 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
651 }
652
653 static void
654 micro_sin(union tgsi_exec_channel *dst,
655 const union tgsi_exec_channel *src)
656 {
657 dst->f[0] = sinf(src->f[0]);
658 dst->f[1] = sinf(src->f[1]);
659 dst->f[2] = sinf(src->f[2]);
660 dst->f[3] = sinf(src->f[3]);
661 }
662
663 static void
664 micro_sle(union tgsi_exec_channel *dst,
665 const union tgsi_exec_channel *src0,
666 const union tgsi_exec_channel *src1)
667 {
668 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
669 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
670 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
671 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
672 }
673
674 static void
675 micro_slt(union tgsi_exec_channel *dst,
676 const union tgsi_exec_channel *src0,
677 const union tgsi_exec_channel *src1)
678 {
679 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
680 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
681 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
682 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
683 }
684
685 static void
686 micro_sne(union tgsi_exec_channel *dst,
687 const union tgsi_exec_channel *src0,
688 const union tgsi_exec_channel *src1)
689 {
690 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
691 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
692 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
693 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
694 }
695
696 static void
697 micro_trunc(union tgsi_exec_channel *dst,
698 const union tgsi_exec_channel *src)
699 {
700 dst->f[0] = truncf(src->f[0]);
701 dst->f[1] = truncf(src->f[1]);
702 dst->f[2] = truncf(src->f[2]);
703 dst->f[3] = truncf(src->f[3]);
704 }
705
706 static void
707 micro_u2d(union tgsi_double_channel *dst,
708 const union tgsi_exec_channel *src)
709 {
710 dst->d[0] = (double)src->u[0];
711 dst->d[1] = (double)src->u[1];
712 dst->d[2] = (double)src->u[2];
713 dst->d[3] = (double)src->u[3];
714 }
715
716 static void
717 micro_i64abs(union tgsi_double_channel *dst,
718 const union tgsi_double_channel *src)
719 {
720 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
721 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
722 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
723 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
724 }
725
726 static void
727 micro_i64sgn(union tgsi_double_channel *dst,
728 const union tgsi_double_channel *src)
729 {
730 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
731 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
732 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
733 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
734 }
735
736 static void
737 micro_i64neg(union tgsi_double_channel *dst,
738 const union tgsi_double_channel *src)
739 {
740 dst->i64[0] = -src->i64[0];
741 dst->i64[1] = -src->i64[1];
742 dst->i64[2] = -src->i64[2];
743 dst->i64[3] = -src->i64[3];
744 }
745
746 static void
747 micro_u64seq(union tgsi_double_channel *dst,
748 const union tgsi_double_channel *src)
749 {
750 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
751 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
752 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
753 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
754 }
755
756 static void
757 micro_u64sne(union tgsi_double_channel *dst,
758 const union tgsi_double_channel *src)
759 {
760 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
761 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
762 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
763 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
764 }
765
766 static void
767 micro_i64slt(union tgsi_double_channel *dst,
768 const union tgsi_double_channel *src)
769 {
770 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
771 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
772 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
773 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
774 }
775
776 static void
777 micro_u64slt(union tgsi_double_channel *dst,
778 const union tgsi_double_channel *src)
779 {
780 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
781 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
782 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
783 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
784 }
785
786 static void
787 micro_i64sge(union tgsi_double_channel *dst,
788 const union tgsi_double_channel *src)
789 {
790 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
791 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
792 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
793 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
794 }
795
796 static void
797 micro_u64sge(union tgsi_double_channel *dst,
798 const union tgsi_double_channel *src)
799 {
800 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
801 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
802 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
803 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
804 }
805
806 static void
807 micro_u64max(union tgsi_double_channel *dst,
808 const union tgsi_double_channel *src)
809 {
810 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
811 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
812 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
813 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
814 }
815
816 static void
817 micro_i64max(union tgsi_double_channel *dst,
818 const union tgsi_double_channel *src)
819 {
820 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
821 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
822 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
823 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
824 }
825
826 static void
827 micro_u64min(union tgsi_double_channel *dst,
828 const union tgsi_double_channel *src)
829 {
830 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
831 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
832 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
833 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
834 }
835
836 static void
837 micro_i64min(union tgsi_double_channel *dst,
838 const union tgsi_double_channel *src)
839 {
840 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
841 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
842 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
843 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
844 }
845
846 static void
847 micro_u64add(union tgsi_double_channel *dst,
848 const union tgsi_double_channel *src)
849 {
850 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
851 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
852 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
853 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
854 }
855
856 static void
857 micro_u64mul(union tgsi_double_channel *dst,
858 const union tgsi_double_channel *src)
859 {
860 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
861 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
862 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
863 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
864 }
865
866 static void
867 micro_u64div(union tgsi_double_channel *dst,
868 const union tgsi_double_channel *src)
869 {
870 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
871 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
872 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
873 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
874 }
875
876 static void
877 micro_i64div(union tgsi_double_channel *dst,
878 const union tgsi_double_channel *src)
879 {
880 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
881 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
882 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
883 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
884 }
885
886 static void
887 micro_u64mod(union tgsi_double_channel *dst,
888 const union tgsi_double_channel *src)
889 {
890 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
891 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
892 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
893 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
894 }
895
896 static void
897 micro_i64mod(union tgsi_double_channel *dst,
898 const union tgsi_double_channel *src)
899 {
900 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
901 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
902 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
903 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
904 }
905
906 static void
907 micro_u64shl(union tgsi_double_channel *dst,
908 const union tgsi_double_channel *src0,
909 union tgsi_exec_channel *src1)
910 {
911 unsigned masked_count;
912 masked_count = src1->u[0] & 0x3f;
913 dst->u64[0] = src0->u64[0] << masked_count;
914 masked_count = src1->u[1] & 0x3f;
915 dst->u64[1] = src0->u64[1] << masked_count;
916 masked_count = src1->u[2] & 0x3f;
917 dst->u64[2] = src0->u64[2] << masked_count;
918 masked_count = src1->u[3] & 0x3f;
919 dst->u64[3] = src0->u64[3] << masked_count;
920 }
921
922 static void
923 micro_i64shr(union tgsi_double_channel *dst,
924 const union tgsi_double_channel *src0,
925 union tgsi_exec_channel *src1)
926 {
927 unsigned masked_count;
928 masked_count = src1->u[0] & 0x3f;
929 dst->i64[0] = src0->i64[0] >> masked_count;
930 masked_count = src1->u[1] & 0x3f;
931 dst->i64[1] = src0->i64[1] >> masked_count;
932 masked_count = src1->u[2] & 0x3f;
933 dst->i64[2] = src0->i64[2] >> masked_count;
934 masked_count = src1->u[3] & 0x3f;
935 dst->i64[3] = src0->i64[3] >> masked_count;
936 }
937
938 static void
939 micro_u64shr(union tgsi_double_channel *dst,
940 const union tgsi_double_channel *src0,
941 union tgsi_exec_channel *src1)
942 {
943 unsigned masked_count;
944 masked_count = src1->u[0] & 0x3f;
945 dst->u64[0] = src0->u64[0] >> masked_count;
946 masked_count = src1->u[1] & 0x3f;
947 dst->u64[1] = src0->u64[1] >> masked_count;
948 masked_count = src1->u[2] & 0x3f;
949 dst->u64[2] = src0->u64[2] >> masked_count;
950 masked_count = src1->u[3] & 0x3f;
951 dst->u64[3] = src0->u64[3] >> masked_count;
952 }
953
954 enum tgsi_exec_datatype {
955 TGSI_EXEC_DATA_FLOAT,
956 TGSI_EXEC_DATA_INT,
957 TGSI_EXEC_DATA_UINT,
958 TGSI_EXEC_DATA_DOUBLE,
959 TGSI_EXEC_DATA_INT64,
960 TGSI_EXEC_DATA_UINT64,
961 };
962
963 /*
964 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
965 */
966 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
967 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
968 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
969 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
970 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
971 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
972 #define TEMP_PRIMITIVE_S1_I TGSI_EXEC_TEMP_PRIMITIVE_S1_I
973 #define TEMP_PRIMITIVE_S1_C TGSI_EXEC_TEMP_PRIMITIVE_S1_C
974 #define TEMP_PRIMITIVE_S2_I TGSI_EXEC_TEMP_PRIMITIVE_S2_I
975 #define TEMP_PRIMITIVE_S2_C TGSI_EXEC_TEMP_PRIMITIVE_S2_C
976 #define TEMP_PRIMITIVE_S3_I TGSI_EXEC_TEMP_PRIMITIVE_S3_I
977 #define TEMP_PRIMITIVE_S3_C TGSI_EXEC_TEMP_PRIMITIVE_S3_C
978
979 static const struct {
980 int idx;
981 int chan;
982 } temp_prim_idxs[] = {
983 { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
984 { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
985 { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
986 { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
987 };
988
989 /** The execution mask depends on the conditional mask and the loop mask */
990 #define UPDATE_EXEC_MASK(MACH) \
991 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
992
993
994 static const union tgsi_exec_channel ZeroVec =
995 { { 0.0, 0.0, 0.0, 0.0 } };
996
997 static const union tgsi_exec_channel OneVec = {
998 {1.0f, 1.0f, 1.0f, 1.0f}
999 };
1000
1001 static const union tgsi_exec_channel P128Vec = {
1002 {128.0f, 128.0f, 128.0f, 128.0f}
1003 };
1004
1005 static const union tgsi_exec_channel M128Vec = {
1006 {-128.0f, -128.0f, -128.0f, -128.0f}
1007 };
1008
1009
1010 /**
1011 * Assert that none of the float values in 'chan' are infinite or NaN.
1012 * NaN and Inf may occur normally during program execution and should
1013 * not lead to crashes, etc. But when debugging, it's helpful to catch
1014 * them.
1015 */
1016 static inline void
1017 check_inf_or_nan(const union tgsi_exec_channel *chan)
1018 {
1019 assert(!util_is_inf_or_nan((chan)->f[0]));
1020 assert(!util_is_inf_or_nan((chan)->f[1]));
1021 assert(!util_is_inf_or_nan((chan)->f[2]));
1022 assert(!util_is_inf_or_nan((chan)->f[3]));
1023 }
1024
1025
1026 #ifdef DEBUG
1027 static void
1028 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1029 {
1030 debug_printf("%s = {%f, %f, %f, %f}\n",
1031 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1032 }
1033 #endif
1034
1035
1036 #ifdef DEBUG
1037 static void
1038 print_temp(const struct tgsi_exec_machine *mach, uint index)
1039 {
1040 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1041 int i;
1042 debug_printf("Temp[%u] =\n", index);
1043 for (i = 0; i < 4; i++) {
1044 debug_printf(" %c: { %f, %f, %f, %f }\n",
1045 "XYZW"[i],
1046 tmp->xyzw[i].f[0],
1047 tmp->xyzw[i].f[1],
1048 tmp->xyzw[i].f[2],
1049 tmp->xyzw[i].f[3]);
1050 }
1051 }
1052 #endif
1053
1054
1055 void
1056 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1057 unsigned num_bufs,
1058 const void **bufs,
1059 const unsigned *buf_sizes)
1060 {
1061 unsigned i;
1062
1063 for (i = 0; i < num_bufs; i++) {
1064 mach->Consts[i] = bufs[i];
1065 mach->ConstsSize[i] = buf_sizes[i];
1066 }
1067 }
1068
1069 /**
1070 * Initialize machine state by expanding tokens to full instructions,
1071 * allocating temporary storage, setting up constants, etc.
1072 * After this, we can call tgsi_exec_machine_run() many times.
1073 */
1074 void
1075 tgsi_exec_machine_bind_shader(
1076 struct tgsi_exec_machine *mach,
1077 const struct tgsi_token *tokens,
1078 struct tgsi_sampler *sampler,
1079 struct tgsi_image *image,
1080 struct tgsi_buffer *buffer)
1081 {
1082 uint k;
1083 struct tgsi_parse_context parse;
1084 struct tgsi_full_instruction *instructions;
1085 struct tgsi_full_declaration *declarations;
1086 uint maxInstructions = 10, numInstructions = 0;
1087 uint maxDeclarations = 10, numDeclarations = 0;
1088
1089 #if 0
1090 tgsi_dump(tokens, 0);
1091 #endif
1092
1093 util_init_math();
1094
1095
1096 mach->Tokens = tokens;
1097 mach->Sampler = sampler;
1098 mach->Image = image;
1099 mach->Buffer = buffer;
1100
1101 if (!tokens) {
1102 /* unbind and free all */
1103 FREE(mach->Declarations);
1104 mach->Declarations = NULL;
1105 mach->NumDeclarations = 0;
1106
1107 FREE(mach->Instructions);
1108 mach->Instructions = NULL;
1109 mach->NumInstructions = 0;
1110
1111 return;
1112 }
1113
1114 k = tgsi_parse_init (&parse, mach->Tokens);
1115 if (k != TGSI_PARSE_OK) {
1116 debug_printf( "Problem parsing!\n" );
1117 return;
1118 }
1119
1120 mach->ImmLimit = 0;
1121 mach->NumOutputs = 0;
1122
1123 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1124 mach->SysSemanticToIndex[k] = -1;
1125
1126 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1127 !mach->UsedGeometryShader) {
1128 struct tgsi_exec_vector *inputs;
1129 struct tgsi_exec_vector *outputs;
1130
1131 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1132 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1133 16);
1134
1135 if (!inputs)
1136 return;
1137
1138 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1139 TGSI_MAX_TOTAL_VERTICES, 16);
1140
1141 if (!outputs) {
1142 align_free(inputs);
1143 return;
1144 }
1145
1146 align_free(mach->Inputs);
1147 align_free(mach->Outputs);
1148
1149 mach->Inputs = inputs;
1150 mach->Outputs = outputs;
1151 mach->UsedGeometryShader = TRUE;
1152 }
1153
1154 declarations = (struct tgsi_full_declaration *)
1155 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1156
1157 if (!declarations) {
1158 return;
1159 }
1160
1161 instructions = (struct tgsi_full_instruction *)
1162 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1163
1164 if (!instructions) {
1165 FREE( declarations );
1166 return;
1167 }
1168
1169 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1170 uint i;
1171
1172 tgsi_parse_token( &parse );
1173 switch( parse.FullToken.Token.Type ) {
1174 case TGSI_TOKEN_TYPE_DECLARATION:
1175 /* save expanded declaration */
1176 if (numDeclarations == maxDeclarations) {
1177 declarations = REALLOC(declarations,
1178 maxDeclarations
1179 * sizeof(struct tgsi_full_declaration),
1180 (maxDeclarations + 10)
1181 * sizeof(struct tgsi_full_declaration));
1182 maxDeclarations += 10;
1183 }
1184 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
1185 unsigned reg;
1186 for (reg = parse.FullToken.FullDeclaration.Range.First;
1187 reg <= parse.FullToken.FullDeclaration.Range.Last;
1188 ++reg) {
1189 ++mach->NumOutputs;
1190 }
1191 }
1192 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1193 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1194 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1195 }
1196
1197 memcpy(declarations + numDeclarations,
1198 &parse.FullToken.FullDeclaration,
1199 sizeof(declarations[0]));
1200 numDeclarations++;
1201 break;
1202
1203 case TGSI_TOKEN_TYPE_IMMEDIATE:
1204 {
1205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1206 assert( size <= 4 );
1207 if (mach->ImmLimit >= mach->ImmsReserved) {
1208 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1209 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1210 if (imms) {
1211 mach->ImmsReserved = newReserved;
1212 mach->Imms = imms;
1213 } else {
1214 debug_printf("Unable to (re)allocate space for immidiate constants\n");
1215 break;
1216 }
1217 }
1218
1219 for( i = 0; i < size; i++ ) {
1220 mach->Imms[mach->ImmLimit][i] =
1221 parse.FullToken.FullImmediate.u[i].Float;
1222 }
1223 mach->ImmLimit += 1;
1224 }
1225 break;
1226
1227 case TGSI_TOKEN_TYPE_INSTRUCTION:
1228
1229 /* save expanded instruction */
1230 if (numInstructions == maxInstructions) {
1231 instructions = REALLOC(instructions,
1232 maxInstructions
1233 * sizeof(struct tgsi_full_instruction),
1234 (maxInstructions + 10)
1235 * sizeof(struct tgsi_full_instruction));
1236 maxInstructions += 10;
1237 }
1238
1239 memcpy(instructions + numInstructions,
1240 &parse.FullToken.FullInstruction,
1241 sizeof(instructions[0]));
1242
1243 numInstructions++;
1244 break;
1245
1246 case TGSI_TOKEN_TYPE_PROPERTY:
1247 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1248 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1249 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1250 }
1251 }
1252 break;
1253
1254 default:
1255 assert( 0 );
1256 }
1257 }
1258 tgsi_parse_free (&parse);
1259
1260 FREE(mach->Declarations);
1261 mach->Declarations = declarations;
1262 mach->NumDeclarations = numDeclarations;
1263
1264 FREE(mach->Instructions);
1265 mach->Instructions = instructions;
1266 mach->NumInstructions = numInstructions;
1267 }
1268
1269
1270 struct tgsi_exec_machine *
1271 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1272 {
1273 struct tgsi_exec_machine *mach;
1274
1275 mach = align_malloc( sizeof *mach, 16 );
1276 if (!mach)
1277 goto fail;
1278
1279 memset(mach, 0, sizeof(*mach));
1280
1281 mach->ShaderType = shader_type;
1282 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1283 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1284
1285 if (shader_type != PIPE_SHADER_COMPUTE) {
1286 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1287 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1288 if (!mach->Inputs || !mach->Outputs)
1289 goto fail;
1290 }
1291
1292 if (shader_type == PIPE_SHADER_FRAGMENT) {
1293 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1294 if (!mach->InputSampleOffsetApply)
1295 goto fail;
1296 }
1297
1298 #ifdef DEBUG
1299 /* silence warnings */
1300 (void) print_chan;
1301 (void) print_temp;
1302 #endif
1303
1304 return mach;
1305
1306 fail:
1307 if (mach) {
1308 align_free(mach->InputSampleOffsetApply);
1309 align_free(mach->Inputs);
1310 align_free(mach->Outputs);
1311 align_free(mach);
1312 }
1313 return NULL;
1314 }
1315
1316
1317 void
1318 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1319 {
1320 if (mach) {
1321 FREE(mach->Instructions);
1322 FREE(mach->Declarations);
1323 FREE(mach->Imms);
1324
1325 align_free(mach->InputSampleOffsetApply);
1326 align_free(mach->Inputs);
1327 align_free(mach->Outputs);
1328
1329 align_free(mach);
1330 }
1331 }
1332
1333 static void
1334 micro_add(union tgsi_exec_channel *dst,
1335 const union tgsi_exec_channel *src0,
1336 const union tgsi_exec_channel *src1)
1337 {
1338 dst->f[0] = src0->f[0] + src1->f[0];
1339 dst->f[1] = src0->f[1] + src1->f[1];
1340 dst->f[2] = src0->f[2] + src1->f[2];
1341 dst->f[3] = src0->f[3] + src1->f[3];
1342 }
1343
1344 static void
1345 micro_div(
1346 union tgsi_exec_channel *dst,
1347 const union tgsi_exec_channel *src0,
1348 const union tgsi_exec_channel *src1 )
1349 {
1350 if (src1->f[0] != 0) {
1351 dst->f[0] = src0->f[0] / src1->f[0];
1352 }
1353 if (src1->f[1] != 0) {
1354 dst->f[1] = src0->f[1] / src1->f[1];
1355 }
1356 if (src1->f[2] != 0) {
1357 dst->f[2] = src0->f[2] / src1->f[2];
1358 }
1359 if (src1->f[3] != 0) {
1360 dst->f[3] = src0->f[3] / src1->f[3];
1361 }
1362 }
1363
1364 static void
1365 micro_lt(
1366 union tgsi_exec_channel *dst,
1367 const union tgsi_exec_channel *src0,
1368 const union tgsi_exec_channel *src1,
1369 const union tgsi_exec_channel *src2,
1370 const union tgsi_exec_channel *src3 )
1371 {
1372 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1373 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1374 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1375 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1376 }
1377
1378 static void
1379 micro_max(union tgsi_exec_channel *dst,
1380 const union tgsi_exec_channel *src0,
1381 const union tgsi_exec_channel *src1)
1382 {
1383 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1384 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1385 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1386 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1387 }
1388
1389 static void
1390 micro_min(union tgsi_exec_channel *dst,
1391 const union tgsi_exec_channel *src0,
1392 const union tgsi_exec_channel *src1)
1393 {
1394 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1395 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1396 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1397 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1398 }
1399
1400 static void
1401 micro_mul(union tgsi_exec_channel *dst,
1402 const union tgsi_exec_channel *src0,
1403 const union tgsi_exec_channel *src1)
1404 {
1405 dst->f[0] = src0->f[0] * src1->f[0];
1406 dst->f[1] = src0->f[1] * src1->f[1];
1407 dst->f[2] = src0->f[2] * src1->f[2];
1408 dst->f[3] = src0->f[3] * src1->f[3];
1409 }
1410
1411 static void
1412 micro_neg(
1413 union tgsi_exec_channel *dst,
1414 const union tgsi_exec_channel *src )
1415 {
1416 dst->f[0] = -src->f[0];
1417 dst->f[1] = -src->f[1];
1418 dst->f[2] = -src->f[2];
1419 dst->f[3] = -src->f[3];
1420 }
1421
1422 static void
1423 micro_pow(
1424 union tgsi_exec_channel *dst,
1425 const union tgsi_exec_channel *src0,
1426 const union tgsi_exec_channel *src1 )
1427 {
1428 #if FAST_MATH
1429 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1430 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1431 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1432 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1433 #else
1434 dst->f[0] = powf( src0->f[0], src1->f[0] );
1435 dst->f[1] = powf( src0->f[1], src1->f[1] );
1436 dst->f[2] = powf( src0->f[2], src1->f[2] );
1437 dst->f[3] = powf( src0->f[3], src1->f[3] );
1438 #endif
1439 }
1440
1441 static void
1442 micro_ldexp(union tgsi_exec_channel *dst,
1443 const union tgsi_exec_channel *src0,
1444 const union tgsi_exec_channel *src1)
1445 {
1446 dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1447 dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1448 dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1449 dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1450 }
1451
1452 static void
1453 micro_sub(union tgsi_exec_channel *dst,
1454 const union tgsi_exec_channel *src0,
1455 const union tgsi_exec_channel *src1)
1456 {
1457 dst->f[0] = src0->f[0] - src1->f[0];
1458 dst->f[1] = src0->f[1] - src1->f[1];
1459 dst->f[2] = src0->f[2] - src1->f[2];
1460 dst->f[3] = src0->f[3] - src1->f[3];
1461 }
1462
1463 static void
1464 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1465 const uint file,
1466 const uint swizzle,
1467 const union tgsi_exec_channel *index,
1468 const union tgsi_exec_channel *index2D,
1469 union tgsi_exec_channel *chan)
1470 {
1471 uint i;
1472
1473 assert(swizzle < 4);
1474
1475 switch (file) {
1476 case TGSI_FILE_CONSTANT:
1477 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1478 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1479 assert(mach->Consts[index2D->i[i]]);
1480
1481 if (index->i[i] < 0) {
1482 chan->u[i] = 0;
1483 } else {
1484 /* NOTE: copying the const value as a uint instead of float */
1485 const uint constbuf = index2D->i[i];
1486 const uint *buf = (const uint *)mach->Consts[constbuf];
1487 const int pos = index->i[i] * 4 + swizzle;
1488 /* const buffer bounds check */
1489 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1490 if (0) {
1491 /* Debug: print warning */
1492 static int count = 0;
1493 if (count++ < 100)
1494 debug_printf("TGSI Exec: const buffer index %d"
1495 " out of bounds\n", pos);
1496 }
1497 chan->u[i] = 0;
1498 }
1499 else
1500 chan->u[i] = buf[pos];
1501 }
1502 }
1503 break;
1504
1505 case TGSI_FILE_INPUT:
1506 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1507 /*
1508 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1509 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1510 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1511 index2D->i[i], index->i[i]);
1512 }*/
1513 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1514 assert(pos >= 0);
1515 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1516 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1517 }
1518 break;
1519
1520 case TGSI_FILE_SYSTEM_VALUE:
1521 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1522 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1523 }
1524 break;
1525
1526 case TGSI_FILE_TEMPORARY:
1527 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1528 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1529 assert(index2D->i[i] == 0);
1530
1531 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1532 }
1533 break;
1534
1535 case TGSI_FILE_IMMEDIATE:
1536 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1537 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1538 assert(index2D->i[i] == 0);
1539
1540 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1541 }
1542 break;
1543
1544 case TGSI_FILE_ADDRESS:
1545 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1546 assert(index->i[i] >= 0);
1547 assert(index2D->i[i] == 0);
1548
1549 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1550 }
1551 break;
1552
1553 case TGSI_FILE_OUTPUT:
1554 /* vertex/fragment output vars can be read too */
1555 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1556 assert(index->i[i] >= 0);
1557 assert(index2D->i[i] == 0);
1558
1559 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1560 }
1561 break;
1562
1563 default:
1564 assert(0);
1565 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1566 chan->u[i] = 0;
1567 }
1568 }
1569 }
1570
1571 static void
1572 get_index_registers(const struct tgsi_exec_machine *mach,
1573 const struct tgsi_full_src_register *reg,
1574 union tgsi_exec_channel *index,
1575 union tgsi_exec_channel *index2D)
1576 {
1577 uint swizzle;
1578
1579 /* We start with a direct index into a register file.
1580 *
1581 * file[1],
1582 * where:
1583 * file = Register.File
1584 * [1] = Register.Index
1585 */
1586 index->i[0] =
1587 index->i[1] =
1588 index->i[2] =
1589 index->i[3] = reg->Register.Index;
1590
1591 /* There is an extra source register that indirectly subscripts
1592 * a register file. The direct index now becomes an offset
1593 * that is being added to the indirect register.
1594 *
1595 * file[ind[2].x+1],
1596 * where:
1597 * ind = Indirect.File
1598 * [2] = Indirect.Index
1599 * .x = Indirect.SwizzleX
1600 */
1601 if (reg->Register.Indirect) {
1602 union tgsi_exec_channel index2;
1603 union tgsi_exec_channel indir_index;
1604 const uint execmask = mach->ExecMask;
1605 uint i;
1606
1607 /* which address register (always zero now) */
1608 index2.i[0] =
1609 index2.i[1] =
1610 index2.i[2] =
1611 index2.i[3] = reg->Indirect.Index;
1612 /* get current value of address register[swizzle] */
1613 swizzle = reg->Indirect.Swizzle;
1614 fetch_src_file_channel(mach,
1615 reg->Indirect.File,
1616 swizzle,
1617 &index2,
1618 &ZeroVec,
1619 &indir_index);
1620
1621 /* add value of address register to the offset */
1622 index->i[0] += indir_index.i[0];
1623 index->i[1] += indir_index.i[1];
1624 index->i[2] += indir_index.i[2];
1625 index->i[3] += indir_index.i[3];
1626
1627 /* for disabled execution channels, zero-out the index to
1628 * avoid using a potential garbage value.
1629 */
1630 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1631 if ((execmask & (1 << i)) == 0)
1632 index->i[i] = 0;
1633 }
1634 }
1635
1636 /* There is an extra source register that is a second
1637 * subscript to a register file. Effectively it means that
1638 * the register file is actually a 2D array of registers.
1639 *
1640 * file[3][1],
1641 * where:
1642 * [3] = Dimension.Index
1643 */
1644 if (reg->Register.Dimension) {
1645 index2D->i[0] =
1646 index2D->i[1] =
1647 index2D->i[2] =
1648 index2D->i[3] = reg->Dimension.Index;
1649
1650 /* Again, the second subscript index can be addressed indirectly
1651 * identically to the first one.
1652 * Nothing stops us from indirectly addressing the indirect register,
1653 * but there is no need for that, so we won't exercise it.
1654 *
1655 * file[ind[4].y+3][1],
1656 * where:
1657 * ind = DimIndirect.File
1658 * [4] = DimIndirect.Index
1659 * .y = DimIndirect.SwizzleX
1660 */
1661 if (reg->Dimension.Indirect) {
1662 union tgsi_exec_channel index2;
1663 union tgsi_exec_channel indir_index;
1664 const uint execmask = mach->ExecMask;
1665 uint i;
1666
1667 index2.i[0] =
1668 index2.i[1] =
1669 index2.i[2] =
1670 index2.i[3] = reg->DimIndirect.Index;
1671
1672 swizzle = reg->DimIndirect.Swizzle;
1673 fetch_src_file_channel(mach,
1674 reg->DimIndirect.File,
1675 swizzle,
1676 &index2,
1677 &ZeroVec,
1678 &indir_index);
1679
1680 index2D->i[0] += indir_index.i[0];
1681 index2D->i[1] += indir_index.i[1];
1682 index2D->i[2] += indir_index.i[2];
1683 index2D->i[3] += indir_index.i[3];
1684
1685 /* for disabled execution channels, zero-out the index to
1686 * avoid using a potential garbage value.
1687 */
1688 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1689 if ((execmask & (1 << i)) == 0) {
1690 index2D->i[i] = 0;
1691 }
1692 }
1693 }
1694
1695 /* If by any chance there was a need for a 3D array of register
1696 * files, we would have to check whether Dimension is followed
1697 * by a dimension register and continue the saga.
1698 */
1699 } else {
1700 index2D->i[0] =
1701 index2D->i[1] =
1702 index2D->i[2] =
1703 index2D->i[3] = 0;
1704 }
1705 }
1706
1707
1708 static void
1709 fetch_source_d(const struct tgsi_exec_machine *mach,
1710 union tgsi_exec_channel *chan,
1711 const struct tgsi_full_src_register *reg,
1712 const uint chan_index)
1713 {
1714 union tgsi_exec_channel index;
1715 union tgsi_exec_channel index2D;
1716 uint swizzle;
1717
1718 get_index_registers(mach, reg, &index, &index2D);
1719
1720
1721 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1722 fetch_src_file_channel(mach,
1723 reg->Register.File,
1724 swizzle,
1725 &index,
1726 &index2D,
1727 chan);
1728 }
1729
1730 static void
1731 fetch_source(const struct tgsi_exec_machine *mach,
1732 union tgsi_exec_channel *chan,
1733 const struct tgsi_full_src_register *reg,
1734 const uint chan_index,
1735 enum tgsi_exec_datatype src_datatype)
1736 {
1737 fetch_source_d(mach, chan, reg, chan_index);
1738
1739 if (reg->Register.Absolute) {
1740 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1741 micro_abs(chan, chan);
1742 } else {
1743 micro_iabs(chan, chan);
1744 }
1745 }
1746
1747 if (reg->Register.Negate) {
1748 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1749 micro_neg(chan, chan);
1750 } else {
1751 micro_ineg(chan, chan);
1752 }
1753 }
1754 }
1755
1756 static union tgsi_exec_channel *
1757 store_dest_dstret(struct tgsi_exec_machine *mach,
1758 const union tgsi_exec_channel *chan,
1759 const struct tgsi_full_dst_register *reg,
1760 uint chan_index,
1761 enum tgsi_exec_datatype dst_datatype)
1762 {
1763 static union tgsi_exec_channel null;
1764 union tgsi_exec_channel *dst;
1765 union tgsi_exec_channel index2D;
1766 int offset = 0; /* indirection offset */
1767 int index;
1768
1769 /* for debugging */
1770 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1771 check_inf_or_nan(chan);
1772 }
1773
1774 /* There is an extra source register that indirectly subscripts
1775 * a register file. The direct index now becomes an offset
1776 * that is being added to the indirect register.
1777 *
1778 * file[ind[2].x+1],
1779 * where:
1780 * ind = Indirect.File
1781 * [2] = Indirect.Index
1782 * .x = Indirect.SwizzleX
1783 */
1784 if (reg->Register.Indirect) {
1785 union tgsi_exec_channel index;
1786 union tgsi_exec_channel indir_index;
1787 uint swizzle;
1788
1789 /* which address register (always zero for now) */
1790 index.i[0] =
1791 index.i[1] =
1792 index.i[2] =
1793 index.i[3] = reg->Indirect.Index;
1794
1795 /* get current value of address register[swizzle] */
1796 swizzle = reg->Indirect.Swizzle;
1797
1798 /* fetch values from the address/indirection register */
1799 fetch_src_file_channel(mach,
1800 reg->Indirect.File,
1801 swizzle,
1802 &index,
1803 &ZeroVec,
1804 &indir_index);
1805
1806 /* save indirection offset */
1807 offset = indir_index.i[0];
1808 }
1809
1810 /* There is an extra source register that is a second
1811 * subscript to a register file. Effectively it means that
1812 * the register file is actually a 2D array of registers.
1813 *
1814 * file[3][1],
1815 * where:
1816 * [3] = Dimension.Index
1817 */
1818 if (reg->Register.Dimension) {
1819 index2D.i[0] =
1820 index2D.i[1] =
1821 index2D.i[2] =
1822 index2D.i[3] = reg->Dimension.Index;
1823
1824 /* Again, the second subscript index can be addressed indirectly
1825 * identically to the first one.
1826 * Nothing stops us from indirectly addressing the indirect register,
1827 * but there is no need for that, so we won't exercise it.
1828 *
1829 * file[ind[4].y+3][1],
1830 * where:
1831 * ind = DimIndirect.File
1832 * [4] = DimIndirect.Index
1833 * .y = DimIndirect.SwizzleX
1834 */
1835 if (reg->Dimension.Indirect) {
1836 union tgsi_exec_channel index2;
1837 union tgsi_exec_channel indir_index;
1838 const uint execmask = mach->ExecMask;
1839 unsigned swizzle;
1840 uint i;
1841
1842 index2.i[0] =
1843 index2.i[1] =
1844 index2.i[2] =
1845 index2.i[3] = reg->DimIndirect.Index;
1846
1847 swizzle = reg->DimIndirect.Swizzle;
1848 fetch_src_file_channel(mach,
1849 reg->DimIndirect.File,
1850 swizzle,
1851 &index2,
1852 &ZeroVec,
1853 &indir_index);
1854
1855 index2D.i[0] += indir_index.i[0];
1856 index2D.i[1] += indir_index.i[1];
1857 index2D.i[2] += indir_index.i[2];
1858 index2D.i[3] += indir_index.i[3];
1859
1860 /* for disabled execution channels, zero-out the index to
1861 * avoid using a potential garbage value.
1862 */
1863 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1864 if ((execmask & (1 << i)) == 0) {
1865 index2D.i[i] = 0;
1866 }
1867 }
1868 }
1869
1870 /* If by any chance there was a need for a 3D array of register
1871 * files, we would have to check whether Dimension is followed
1872 * by a dimension register and continue the saga.
1873 */
1874 } else {
1875 index2D.i[0] =
1876 index2D.i[1] =
1877 index2D.i[2] =
1878 index2D.i[3] = 0;
1879 }
1880
1881 switch (reg->Register.File) {
1882 case TGSI_FILE_NULL:
1883 dst = &null;
1884 break;
1885
1886 case TGSI_FILE_OUTPUT:
1887 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1888 + reg->Register.Index;
1889 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1890 #if 0
1891 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1892 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1893 reg->Register.Index);
1894 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1895 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1896 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1897 if (execmask & (1 << i))
1898 debug_printf("%f, ", chan->f[i]);
1899 debug_printf(")\n");
1900 }
1901 #endif
1902 break;
1903
1904 case TGSI_FILE_TEMPORARY:
1905 index = reg->Register.Index;
1906 assert( index < TGSI_EXEC_NUM_TEMPS );
1907 dst = &mach->Temps[offset + index].xyzw[chan_index];
1908 break;
1909
1910 case TGSI_FILE_ADDRESS:
1911 index = reg->Register.Index;
1912 dst = &mach->Addrs[index].xyzw[chan_index];
1913 break;
1914
1915 default:
1916 assert( 0 );
1917 return NULL;
1918 }
1919
1920 return dst;
1921 }
1922
1923 static void
1924 store_dest_double(struct tgsi_exec_machine *mach,
1925 const union tgsi_exec_channel *chan,
1926 const struct tgsi_full_dst_register *reg,
1927 uint chan_index,
1928 enum tgsi_exec_datatype dst_datatype)
1929 {
1930 union tgsi_exec_channel *dst;
1931 const uint execmask = mach->ExecMask;
1932 int i;
1933
1934 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1935 if (!dst)
1936 return;
1937
1938 /* doubles path */
1939 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1940 if (execmask & (1 << i))
1941 dst->i[i] = chan->i[i];
1942 }
1943
1944 static void
1945 store_dest(struct tgsi_exec_machine *mach,
1946 const union tgsi_exec_channel *chan,
1947 const struct tgsi_full_dst_register *reg,
1948 const struct tgsi_full_instruction *inst,
1949 uint chan_index,
1950 enum tgsi_exec_datatype dst_datatype)
1951 {
1952 union tgsi_exec_channel *dst;
1953 const uint execmask = mach->ExecMask;
1954 int i;
1955
1956 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1957 if (!dst)
1958 return;
1959
1960 if (!inst->Instruction.Saturate) {
1961 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1962 if (execmask & (1 << i))
1963 dst->i[i] = chan->i[i];
1964 }
1965 else {
1966 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1967 if (execmask & (1 << i)) {
1968 if (chan->f[i] < 0.0f)
1969 dst->f[i] = 0.0f;
1970 else if (chan->f[i] > 1.0f)
1971 dst->f[i] = 1.0f;
1972 else
1973 dst->i[i] = chan->i[i];
1974 }
1975 }
1976 }
1977
1978 #define FETCH(VAL,INDEX,CHAN)\
1979 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1980
1981 #define IFETCH(VAL,INDEX,CHAN)\
1982 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1983
1984
1985 /**
1986 * Execute ARB-style KIL which is predicated by a src register.
1987 * Kill fragment if any of the four values is less than zero.
1988 */
1989 static void
1990 exec_kill_if(struct tgsi_exec_machine *mach,
1991 const struct tgsi_full_instruction *inst)
1992 {
1993 uint uniquemask;
1994 uint chan_index;
1995 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1996 union tgsi_exec_channel r[1];
1997
1998 /* This mask stores component bits that were already tested. */
1999 uniquemask = 0;
2000
2001 for (chan_index = 0; chan_index < 4; chan_index++)
2002 {
2003 uint swizzle;
2004 uint i;
2005
2006 /* unswizzle channel */
2007 swizzle = tgsi_util_get_full_src_register_swizzle (
2008 &inst->Src[0],
2009 chan_index);
2010
2011 /* check if the component has not been already tested */
2012 if (uniquemask & (1 << swizzle))
2013 continue;
2014 uniquemask |= 1 << swizzle;
2015
2016 FETCH(&r[0], 0, chan_index);
2017 for (i = 0; i < 4; i++)
2018 if (r[0].f[i] < 0.0f)
2019 kilmask |= 1 << i;
2020 }
2021
2022 /* restrict to fragments currently executing */
2023 kilmask &= mach->ExecMask;
2024
2025 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2026 }
2027
2028 /**
2029 * Unconditional fragment kill/discard.
2030 */
2031 static void
2032 exec_kill(struct tgsi_exec_machine *mach)
2033 {
2034 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2035
2036 /* kill fragment for all fragments currently executing */
2037 kilmask = mach->ExecMask;
2038 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2039 }
2040
2041 static void
2042 emit_vertex(struct tgsi_exec_machine *mach,
2043 const struct tgsi_full_instruction *inst)
2044 {
2045 union tgsi_exec_channel r[1];
2046 unsigned stream_id;
2047 unsigned *prim_count;
2048 /* FIXME: check for exec mask correctly
2049 unsigned i;
2050 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2051 if ((mach->ExecMask & (1 << i)))
2052 */
2053 IFETCH(&r[0], 0, TGSI_CHAN_X);
2054 stream_id = r[0].u[0];
2055 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2056 if (mach->ExecMask) {
2057 if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices)
2058 return;
2059
2060 if (mach->Primitives[stream_id][*prim_count] == 0)
2061 mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
2062 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2063 mach->Primitives[stream_id][*prim_count]++;
2064 }
2065 }
2066
2067 static void
2068 emit_primitive(struct tgsi_exec_machine *mach,
2069 const struct tgsi_full_instruction *inst)
2070 {
2071 unsigned *prim_count;
2072 union tgsi_exec_channel r[1];
2073 unsigned stream_id = 0;
2074 /* FIXME: check for exec mask correctly
2075 unsigned i;
2076 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2077 if ((mach->ExecMask & (1 << i)))
2078 */
2079 if (inst) {
2080 IFETCH(&r[0], 0, TGSI_CHAN_X);
2081 stream_id = r[0].u[0];
2082 }
2083 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2084 if (mach->ExecMask) {
2085 ++(*prim_count);
2086 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2087 mach->Primitives[stream_id][*prim_count] = 0;
2088 }
2089 }
2090
2091 static void
2092 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2093 {
2094 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2095 int emitted_verts =
2096 mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
2097 if (emitted_verts) {
2098 emit_primitive(mach, NULL);
2099 }
2100 }
2101 }
2102
2103
2104 /*
2105 * Fetch four texture samples using STR texture coordinates.
2106 */
2107 static void
2108 fetch_texel( struct tgsi_sampler *sampler,
2109 const unsigned sview_idx,
2110 const unsigned sampler_idx,
2111 const union tgsi_exec_channel *s,
2112 const union tgsi_exec_channel *t,
2113 const union tgsi_exec_channel *p,
2114 const union tgsi_exec_channel *c0,
2115 const union tgsi_exec_channel *c1,
2116 float derivs[3][2][TGSI_QUAD_SIZE],
2117 const int8_t offset[3],
2118 enum tgsi_sampler_control control,
2119 union tgsi_exec_channel *r,
2120 union tgsi_exec_channel *g,
2121 union tgsi_exec_channel *b,
2122 union tgsi_exec_channel *a )
2123 {
2124 uint j;
2125 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2126
2127 /* FIXME: handle explicit derivs, offsets */
2128 sampler->get_samples(sampler, sview_idx, sampler_idx,
2129 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2130
2131 for (j = 0; j < 4; j++) {
2132 r->f[j] = rgba[0][j];
2133 g->f[j] = rgba[1][j];
2134 b->f[j] = rgba[2][j];
2135 a->f[j] = rgba[3][j];
2136 }
2137 }
2138
2139
2140 #define TEX_MODIFIER_NONE 0
2141 #define TEX_MODIFIER_PROJECTED 1
2142 #define TEX_MODIFIER_LOD_BIAS 2
2143 #define TEX_MODIFIER_EXPLICIT_LOD 3
2144 #define TEX_MODIFIER_LEVEL_ZERO 4
2145 #define TEX_MODIFIER_GATHER 5
2146
2147 /*
2148 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2149 */
2150 static void
2151 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2152 const struct tgsi_full_instruction *inst,
2153 int8_t offsets[3])
2154 {
2155 if (inst->Texture.NumOffsets == 1) {
2156 union tgsi_exec_channel index;
2157 union tgsi_exec_channel offset[3];
2158 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2159 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2160 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2161 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2162 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2163 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2164 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2165 offsets[0] = offset[0].i[0];
2166 offsets[1] = offset[1].i[0];
2167 offsets[2] = offset[2].i[0];
2168 } else {
2169 assert(inst->Texture.NumOffsets == 0);
2170 offsets[0] = offsets[1] = offsets[2] = 0;
2171 }
2172 }
2173
2174
2175 /*
2176 * Fetch dx and dy values for one channel (s, t or r).
2177 * Put dx values into one float array, dy values into another.
2178 */
2179 static void
2180 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2181 const struct tgsi_full_instruction *inst,
2182 unsigned regdsrcx,
2183 unsigned chan,
2184 float derivs[2][TGSI_QUAD_SIZE])
2185 {
2186 union tgsi_exec_channel d;
2187 FETCH(&d, regdsrcx, chan);
2188 derivs[0][0] = d.f[0];
2189 derivs[0][1] = d.f[1];
2190 derivs[0][2] = d.f[2];
2191 derivs[0][3] = d.f[3];
2192 FETCH(&d, regdsrcx + 1, chan);
2193 derivs[1][0] = d.f[0];
2194 derivs[1][1] = d.f[1];
2195 derivs[1][2] = d.f[2];
2196 derivs[1][3] = d.f[3];
2197 }
2198
2199 static uint
2200 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2201 const struct tgsi_full_instruction *inst,
2202 uint sampler)
2203 {
2204 uint unit = 0;
2205 int i;
2206 if (inst->Src[sampler].Register.Indirect) {
2207 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2208 union tgsi_exec_channel indir_index, index2;
2209 const uint execmask = mach->ExecMask;
2210 index2.i[0] =
2211 index2.i[1] =
2212 index2.i[2] =
2213 index2.i[3] = reg->Indirect.Index;
2214
2215 fetch_src_file_channel(mach,
2216 reg->Indirect.File,
2217 reg->Indirect.Swizzle,
2218 &index2,
2219 &ZeroVec,
2220 &indir_index);
2221 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2222 if (execmask & (1 << i)) {
2223 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2224 break;
2225 }
2226 }
2227
2228 } else {
2229 unit = inst->Src[sampler].Register.Index;
2230 }
2231 return unit;
2232 }
2233
2234 /*
2235 * execute a texture instruction.
2236 *
2237 * modifier is used to control the channel routing for the
2238 * instruction variants like proj, lod, and texture with lod bias.
2239 * sampler indicates which src register the sampler is contained in.
2240 */
2241 static void
2242 exec_tex(struct tgsi_exec_machine *mach,
2243 const struct tgsi_full_instruction *inst,
2244 uint modifier, uint sampler)
2245 {
2246 const union tgsi_exec_channel *args[5], *proj = NULL;
2247 union tgsi_exec_channel r[5];
2248 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2249 uint chan;
2250 uint unit;
2251 int8_t offsets[3];
2252 int dim, shadow_ref, i;
2253
2254 unit = fetch_sampler_unit(mach, inst, sampler);
2255 /* always fetch all 3 offsets, overkill but keeps code simple */
2256 fetch_texel_offsets(mach, inst, offsets);
2257
2258 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2259 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2260
2261 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2262 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2263
2264 assert(dim <= 4);
2265 if (shadow_ref >= 0)
2266 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2267
2268 /* fetch modifier to the last argument */
2269 if (modifier != TEX_MODIFIER_NONE) {
2270 const int last = ARRAY_SIZE(args) - 1;
2271
2272 /* fetch modifier from src0.w or src1.x */
2273 if (sampler == 1) {
2274 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2275 FETCH(&r[last], 0, TGSI_CHAN_W);
2276 }
2277 else {
2278 FETCH(&r[last], 1, TGSI_CHAN_X);
2279 }
2280
2281 if (modifier != TEX_MODIFIER_PROJECTED) {
2282 args[last] = &r[last];
2283 }
2284 else {
2285 proj = &r[last];
2286 args[last] = &ZeroVec;
2287 }
2288
2289 /* point unused arguments to zero vector */
2290 for (i = dim; i < last; i++)
2291 args[i] = &ZeroVec;
2292
2293 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2294 control = TGSI_SAMPLER_LOD_EXPLICIT;
2295 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2296 control = TGSI_SAMPLER_LOD_BIAS;
2297 else if (modifier == TEX_MODIFIER_GATHER)
2298 control = TGSI_SAMPLER_GATHER;
2299 }
2300 else {
2301 for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2302 args[i] = &ZeroVec;
2303 }
2304
2305 /* fetch coordinates */
2306 for (i = 0; i < dim; i++) {
2307 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2308
2309 if (proj)
2310 micro_div(&r[i], &r[i], proj);
2311
2312 args[i] = &r[i];
2313 }
2314
2315 /* fetch reference value */
2316 if (shadow_ref >= 0) {
2317 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2318
2319 if (proj)
2320 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2321
2322 args[shadow_ref] = &r[shadow_ref];
2323 }
2324
2325 fetch_texel(mach->Sampler, unit, unit,
2326 args[0], args[1], args[2], args[3], args[4],
2327 NULL, offsets, control,
2328 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2329
2330 #if 0
2331 debug_printf("fetch r: %g %g %g %g\n",
2332 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2333 debug_printf("fetch g: %g %g %g %g\n",
2334 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2335 debug_printf("fetch b: %g %g %g %g\n",
2336 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2337 debug_printf("fetch a: %g %g %g %g\n",
2338 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2339 #endif
2340
2341 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2342 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2343 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2344 }
2345 }
2346 }
2347
2348 static void
2349 exec_lodq(struct tgsi_exec_machine *mach,
2350 const struct tgsi_full_instruction *inst)
2351 {
2352 uint resource_unit, sampler_unit;
2353 unsigned dim;
2354 unsigned i;
2355 union tgsi_exec_channel coords[4];
2356 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2357 union tgsi_exec_channel r[2];
2358
2359 resource_unit = fetch_sampler_unit(mach, inst, 1);
2360 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2361 uint target = mach->SamplerViews[resource_unit].Resource;
2362 dim = tgsi_util_get_texture_coord_dim(target);
2363 sampler_unit = fetch_sampler_unit(mach, inst, 2);
2364 } else {
2365 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2366 sampler_unit = resource_unit;
2367 }
2368 assert(dim <= ARRAY_SIZE(coords));
2369 /* fetch coordinates */
2370 for (i = 0; i < dim; i++) {
2371 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2372 args[i] = &coords[i];
2373 }
2374 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2375 args[i] = &ZeroVec;
2376 }
2377 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2378 args[0]->f,
2379 args[1]->f,
2380 args[2]->f,
2381 args[3]->f,
2382 TGSI_SAMPLER_LOD_NONE,
2383 r[0].f,
2384 r[1].f);
2385
2386 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2387 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2388 TGSI_EXEC_DATA_FLOAT);
2389 }
2390 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2391 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2392 TGSI_EXEC_DATA_FLOAT);
2393 }
2394 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2395 unsigned char swizzles[4];
2396 unsigned chan;
2397 swizzles[0] = inst->Src[1].Register.SwizzleX;
2398 swizzles[1] = inst->Src[1].Register.SwizzleY;
2399 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2400 swizzles[3] = inst->Src[1].Register.SwizzleW;
2401
2402 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2403 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2404 if (swizzles[chan] >= 2) {
2405 store_dest(mach, &ZeroVec,
2406 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2407 } else {
2408 store_dest(mach, &r[swizzles[chan]],
2409 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2410 }
2411 }
2412 }
2413 } else {
2414 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2415 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2416 TGSI_EXEC_DATA_FLOAT);
2417 }
2418 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2419 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2420 TGSI_EXEC_DATA_FLOAT);
2421 }
2422 }
2423 }
2424
2425 static void
2426 exec_txd(struct tgsi_exec_machine *mach,
2427 const struct tgsi_full_instruction *inst)
2428 {
2429 union tgsi_exec_channel r[4];
2430 float derivs[3][2][TGSI_QUAD_SIZE];
2431 uint chan;
2432 uint unit;
2433 int8_t offsets[3];
2434
2435 unit = fetch_sampler_unit(mach, inst, 3);
2436 /* always fetch all 3 offsets, overkill but keeps code simple */
2437 fetch_texel_offsets(mach, inst, offsets);
2438
2439 switch (inst->Texture.Texture) {
2440 case TGSI_TEXTURE_1D:
2441 FETCH(&r[0], 0, TGSI_CHAN_X);
2442
2443 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2444
2445 fetch_texel(mach->Sampler, unit, unit,
2446 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2447 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2448 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2449 break;
2450
2451 case TGSI_TEXTURE_SHADOW1D:
2452 case TGSI_TEXTURE_1D_ARRAY:
2453 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2454 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2455 FETCH(&r[0], 0, TGSI_CHAN_X);
2456 FETCH(&r[1], 0, TGSI_CHAN_Y);
2457 FETCH(&r[2], 0, TGSI_CHAN_Z);
2458
2459 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2460
2461 fetch_texel(mach->Sampler, unit, unit,
2462 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2463 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2464 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2465 break;
2466
2467 case TGSI_TEXTURE_2D:
2468 case TGSI_TEXTURE_RECT:
2469 FETCH(&r[0], 0, TGSI_CHAN_X);
2470 FETCH(&r[1], 0, TGSI_CHAN_Y);
2471
2472 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2473 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2474
2475 fetch_texel(mach->Sampler, unit, unit,
2476 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2477 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2478 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2479 break;
2480
2481
2482 case TGSI_TEXTURE_SHADOW2D:
2483 case TGSI_TEXTURE_SHADOWRECT:
2484 case TGSI_TEXTURE_2D_ARRAY:
2485 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2486 /* only SHADOW2D_ARRAY actually needs W */
2487 FETCH(&r[0], 0, TGSI_CHAN_X);
2488 FETCH(&r[1], 0, TGSI_CHAN_Y);
2489 FETCH(&r[2], 0, TGSI_CHAN_Z);
2490 FETCH(&r[3], 0, TGSI_CHAN_W);
2491
2492 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2493 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2494
2495 fetch_texel(mach->Sampler, unit, unit,
2496 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2497 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2498 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2499 break;
2500
2501 case TGSI_TEXTURE_3D:
2502 case TGSI_TEXTURE_CUBE:
2503 case TGSI_TEXTURE_CUBE_ARRAY:
2504 case TGSI_TEXTURE_SHADOWCUBE:
2505 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2506 FETCH(&r[0], 0, TGSI_CHAN_X);
2507 FETCH(&r[1], 0, TGSI_CHAN_Y);
2508 FETCH(&r[2], 0, TGSI_CHAN_Z);
2509 FETCH(&r[3], 0, TGSI_CHAN_W);
2510
2511 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2512 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2513 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2514
2515 fetch_texel(mach->Sampler, unit, unit,
2516 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2517 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2518 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2519 break;
2520
2521 default:
2522 assert(0);
2523 }
2524
2525 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2526 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2527 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2528 }
2529 }
2530 }
2531
2532
2533 static void
2534 exec_txf(struct tgsi_exec_machine *mach,
2535 const struct tgsi_full_instruction *inst)
2536 {
2537 union tgsi_exec_channel r[4];
2538 uint chan;
2539 uint unit;
2540 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2541 int j;
2542 int8_t offsets[3];
2543 unsigned target;
2544
2545 unit = fetch_sampler_unit(mach, inst, 1);
2546 /* always fetch all 3 offsets, overkill but keeps code simple */
2547 fetch_texel_offsets(mach, inst, offsets);
2548
2549 IFETCH(&r[3], 0, TGSI_CHAN_W);
2550
2551 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2552 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2553 target = mach->SamplerViews[unit].Resource;
2554 }
2555 else {
2556 target = inst->Texture.Texture;
2557 }
2558 switch(target) {
2559 case TGSI_TEXTURE_3D:
2560 case TGSI_TEXTURE_2D_ARRAY:
2561 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2562 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2563 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2564 /* fallthrough */
2565 case TGSI_TEXTURE_2D:
2566 case TGSI_TEXTURE_RECT:
2567 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2568 case TGSI_TEXTURE_SHADOW2D:
2569 case TGSI_TEXTURE_SHADOWRECT:
2570 case TGSI_TEXTURE_1D_ARRAY:
2571 case TGSI_TEXTURE_2D_MSAA:
2572 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2573 /* fallthrough */
2574 case TGSI_TEXTURE_BUFFER:
2575 case TGSI_TEXTURE_1D:
2576 case TGSI_TEXTURE_SHADOW1D:
2577 IFETCH(&r[0], 0, TGSI_CHAN_X);
2578 break;
2579 default:
2580 assert(0);
2581 break;
2582 }
2583
2584 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2585 offsets, rgba);
2586
2587 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2588 r[0].f[j] = rgba[0][j];
2589 r[1].f[j] = rgba[1][j];
2590 r[2].f[j] = rgba[2][j];
2591 r[3].f[j] = rgba[3][j];
2592 }
2593
2594 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2595 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2596 unsigned char swizzles[4];
2597 swizzles[0] = inst->Src[1].Register.SwizzleX;
2598 swizzles[1] = inst->Src[1].Register.SwizzleY;
2599 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2600 swizzles[3] = inst->Src[1].Register.SwizzleW;
2601
2602 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2603 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2604 store_dest(mach, &r[swizzles[chan]],
2605 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2606 }
2607 }
2608 }
2609 else {
2610 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2612 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2613 }
2614 }
2615 }
2616 }
2617
2618 static void
2619 exec_txq(struct tgsi_exec_machine *mach,
2620 const struct tgsi_full_instruction *inst)
2621 {
2622 int result[4];
2623 union tgsi_exec_channel r[4], src;
2624 uint chan;
2625 uint unit;
2626 int i,j;
2627
2628 unit = fetch_sampler_unit(mach, inst, 1);
2629
2630 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2631
2632 /* XXX: This interface can't return per-pixel values */
2633 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2634
2635 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2636 for (j = 0; j < 4; j++) {
2637 r[j].i[i] = result[j];
2638 }
2639 }
2640
2641 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2642 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2643 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2644 TGSI_EXEC_DATA_INT);
2645 }
2646 }
2647 }
2648
2649 static void
2650 exec_sample(struct tgsi_exec_machine *mach,
2651 const struct tgsi_full_instruction *inst,
2652 uint modifier, boolean compare)
2653 {
2654 const uint resource_unit = inst->Src[1].Register.Index;
2655 const uint sampler_unit = inst->Src[2].Register.Index;
2656 union tgsi_exec_channel r[5], c1;
2657 const union tgsi_exec_channel *lod = &ZeroVec;
2658 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2659 uint chan;
2660 unsigned char swizzles[4];
2661 int8_t offsets[3];
2662
2663 /* always fetch all 3 offsets, overkill but keeps code simple */
2664 fetch_texel_offsets(mach, inst, offsets);
2665
2666 assert(modifier != TEX_MODIFIER_PROJECTED);
2667
2668 if (modifier != TEX_MODIFIER_NONE) {
2669 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2670 FETCH(&c1, 3, TGSI_CHAN_X);
2671 lod = &c1;
2672 control = TGSI_SAMPLER_LOD_BIAS;
2673 }
2674 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2675 FETCH(&c1, 3, TGSI_CHAN_X);
2676 lod = &c1;
2677 control = TGSI_SAMPLER_LOD_EXPLICIT;
2678 }
2679 else if (modifier == TEX_MODIFIER_GATHER) {
2680 control = TGSI_SAMPLER_GATHER;
2681 }
2682 else {
2683 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2684 control = TGSI_SAMPLER_LOD_ZERO;
2685 }
2686 }
2687
2688 FETCH(&r[0], 0, TGSI_CHAN_X);
2689
2690 switch (mach->SamplerViews[resource_unit].Resource) {
2691 case TGSI_TEXTURE_1D:
2692 if (compare) {
2693 FETCH(&r[2], 3, TGSI_CHAN_X);
2694 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2695 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2696 NULL, offsets, control,
2697 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2698 }
2699 else {
2700 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2701 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2702 NULL, offsets, control,
2703 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2704 }
2705 break;
2706
2707 case TGSI_TEXTURE_1D_ARRAY:
2708 case TGSI_TEXTURE_2D:
2709 case TGSI_TEXTURE_RECT:
2710 FETCH(&r[1], 0, TGSI_CHAN_Y);
2711 if (compare) {
2712 FETCH(&r[2], 3, TGSI_CHAN_X);
2713 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2714 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2715 NULL, offsets, control,
2716 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2717 }
2718 else {
2719 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2720 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2721 NULL, offsets, control,
2722 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2723 }
2724 break;
2725
2726 case TGSI_TEXTURE_2D_ARRAY:
2727 case TGSI_TEXTURE_3D:
2728 case TGSI_TEXTURE_CUBE:
2729 FETCH(&r[1], 0, TGSI_CHAN_Y);
2730 FETCH(&r[2], 0, TGSI_CHAN_Z);
2731 if(compare) {
2732 FETCH(&r[3], 3, TGSI_CHAN_X);
2733 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2734 &r[0], &r[1], &r[2], &r[3], lod,
2735 NULL, offsets, control,
2736 &r[0], &r[1], &r[2], &r[3]);
2737 }
2738 else {
2739 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2740 &r[0], &r[1], &r[2], &ZeroVec, lod,
2741 NULL, offsets, control,
2742 &r[0], &r[1], &r[2], &r[3]);
2743 }
2744 break;
2745
2746 case TGSI_TEXTURE_CUBE_ARRAY:
2747 FETCH(&r[1], 0, TGSI_CHAN_Y);
2748 FETCH(&r[2], 0, TGSI_CHAN_Z);
2749 FETCH(&r[3], 0, TGSI_CHAN_W);
2750 if(compare) {
2751 FETCH(&r[4], 3, TGSI_CHAN_X);
2752 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2753 &r[0], &r[1], &r[2], &r[3], &r[4],
2754 NULL, offsets, control,
2755 &r[0], &r[1], &r[2], &r[3]);
2756 }
2757 else {
2758 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2759 &r[0], &r[1], &r[2], &r[3], lod,
2760 NULL, offsets, control,
2761 &r[0], &r[1], &r[2], &r[3]);
2762 }
2763 break;
2764
2765
2766 default:
2767 assert(0);
2768 }
2769
2770 swizzles[0] = inst->Src[1].Register.SwizzleX;
2771 swizzles[1] = inst->Src[1].Register.SwizzleY;
2772 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2773 swizzles[3] = inst->Src[1].Register.SwizzleW;
2774
2775 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2776 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2777 store_dest(mach, &r[swizzles[chan]],
2778 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2779 }
2780 }
2781 }
2782
2783 static void
2784 exec_sample_d(struct tgsi_exec_machine *mach,
2785 const struct tgsi_full_instruction *inst)
2786 {
2787 const uint resource_unit = inst->Src[1].Register.Index;
2788 const uint sampler_unit = inst->Src[2].Register.Index;
2789 union tgsi_exec_channel r[4];
2790 float derivs[3][2][TGSI_QUAD_SIZE];
2791 uint chan;
2792 unsigned char swizzles[4];
2793 int8_t offsets[3];
2794
2795 /* always fetch all 3 offsets, overkill but keeps code simple */
2796 fetch_texel_offsets(mach, inst, offsets);
2797
2798 FETCH(&r[0], 0, TGSI_CHAN_X);
2799
2800 switch (mach->SamplerViews[resource_unit].Resource) {
2801 case TGSI_TEXTURE_1D:
2802 case TGSI_TEXTURE_1D_ARRAY:
2803 /* only 1D array actually needs Y */
2804 FETCH(&r[1], 0, TGSI_CHAN_Y);
2805
2806 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2807
2808 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2809 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2810 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2811 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2812 break;
2813
2814 case TGSI_TEXTURE_2D:
2815 case TGSI_TEXTURE_RECT:
2816 case TGSI_TEXTURE_2D_ARRAY:
2817 /* only 2D array actually needs Z */
2818 FETCH(&r[1], 0, TGSI_CHAN_Y);
2819 FETCH(&r[2], 0, TGSI_CHAN_Z);
2820
2821 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2822 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2823
2824 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2825 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2826 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2827 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2828 break;
2829
2830 case TGSI_TEXTURE_3D:
2831 case TGSI_TEXTURE_CUBE:
2832 case TGSI_TEXTURE_CUBE_ARRAY:
2833 /* only cube array actually needs W */
2834 FETCH(&r[1], 0, TGSI_CHAN_Y);
2835 FETCH(&r[2], 0, TGSI_CHAN_Z);
2836 FETCH(&r[3], 0, TGSI_CHAN_W);
2837
2838 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2839 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2840 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2841
2842 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2843 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2844 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2845 &r[0], &r[1], &r[2], &r[3]);
2846 break;
2847
2848 default:
2849 assert(0);
2850 }
2851
2852 swizzles[0] = inst->Src[1].Register.SwizzleX;
2853 swizzles[1] = inst->Src[1].Register.SwizzleY;
2854 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2855 swizzles[3] = inst->Src[1].Register.SwizzleW;
2856
2857 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2858 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2859 store_dest(mach, &r[swizzles[chan]],
2860 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2861 }
2862 }
2863 }
2864
2865
2866 /**
2867 * Evaluate a constant-valued coefficient at the position of the
2868 * current quad.
2869 */
2870 static void
2871 eval_constant_coef(
2872 struct tgsi_exec_machine *mach,
2873 unsigned attrib,
2874 unsigned chan )
2875 {
2876 unsigned i;
2877
2878 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2879 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2880 }
2881 }
2882
2883 static void
2884 interp_constant_offset(
2885 UNUSED const struct tgsi_exec_machine *mach,
2886 UNUSED unsigned attrib,
2887 UNUSED unsigned chan,
2888 UNUSED float ofs_x,
2889 UNUSED float ofs_y,
2890 UNUSED union tgsi_exec_channel *out_chan)
2891 {
2892 }
2893
2894 /**
2895 * Evaluate a linear-valued coefficient at the position of the
2896 * current quad.
2897 */
2898 static void
2899 interp_linear_offset(
2900 const struct tgsi_exec_machine *mach,
2901 unsigned attrib,
2902 unsigned chan,
2903 float ofs_x,
2904 float ofs_y,
2905 union tgsi_exec_channel *out_chan)
2906 {
2907 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2908 const float dady = mach->InterpCoefs[attrib].dady[chan];
2909 const float delta = ofs_x * dadx + ofs_y * dady;
2910 out_chan->f[0] += delta;
2911 out_chan->f[1] += delta;
2912 out_chan->f[2] += delta;
2913 out_chan->f[3] += delta;
2914 }
2915
2916 static void
2917 eval_linear_coef(struct tgsi_exec_machine *mach,
2918 unsigned attrib,
2919 unsigned chan)
2920 {
2921 const float x = mach->QuadPos.xyzw[0].f[0];
2922 const float y = mach->QuadPos.xyzw[1].f[0];
2923 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2924 const float dady = mach->InterpCoefs[attrib].dady[chan];
2925 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2926
2927 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2928 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2929 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2930 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2931 }
2932
2933 /**
2934 * Evaluate a perspective-valued coefficient at the position of the
2935 * current quad.
2936 */
2937
2938 static void
2939 interp_perspective_offset(
2940 const struct tgsi_exec_machine *mach,
2941 unsigned attrib,
2942 unsigned chan,
2943 float ofs_x,
2944 float ofs_y,
2945 union tgsi_exec_channel *out_chan)
2946 {
2947 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2948 const float dady = mach->InterpCoefs[attrib].dady[chan];
2949 const float *w = mach->QuadPos.xyzw[3].f;
2950 const float delta = ofs_x * dadx + ofs_y * dady;
2951 out_chan->f[0] += delta / w[0];
2952 out_chan->f[1] += delta / w[1];
2953 out_chan->f[2] += delta / w[2];
2954 out_chan->f[3] += delta / w[3];
2955 }
2956
2957 static void
2958 eval_perspective_coef(
2959 struct tgsi_exec_machine *mach,
2960 unsigned attrib,
2961 unsigned chan )
2962 {
2963 const float x = mach->QuadPos.xyzw[0].f[0];
2964 const float y = mach->QuadPos.xyzw[1].f[0];
2965 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2966 const float dady = mach->InterpCoefs[attrib].dady[chan];
2967 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2968 const float *w = mach->QuadPos.xyzw[3].f;
2969 /* divide by W here */
2970 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2971 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2972 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2973 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2974 }
2975
2976
2977 typedef void (* eval_coef_func)(
2978 struct tgsi_exec_machine *mach,
2979 unsigned attrib,
2980 unsigned chan );
2981
2982 static void
2983 exec_declaration(struct tgsi_exec_machine *mach,
2984 const struct tgsi_full_declaration *decl)
2985 {
2986 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2987 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2988 return;
2989 }
2990
2991 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2992 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2993 uint first, last, mask;
2994
2995 first = decl->Range.First;
2996 last = decl->Range.Last;
2997 mask = decl->Declaration.UsageMask;
2998
2999 /* XXX we could remove this special-case code since
3000 * mach->InterpCoefs[first].a0 should already have the
3001 * front/back-face value. But we should first update the
3002 * ureg code to emit the right UsageMask value (WRITEMASK_X).
3003 * Then, we could remove the tgsi_exec_machine::Face field.
3004 */
3005 /* XXX make FACE a system value */
3006 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
3007 uint i;
3008
3009 assert(decl->Semantic.Index == 0);
3010 assert(first == last);
3011
3012 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3013 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
3014 }
3015 } else {
3016 eval_coef_func eval;
3017 apply_sample_offset_func interp;
3018 uint i, j;
3019
3020 switch (decl->Interp.Interpolate) {
3021 case TGSI_INTERPOLATE_CONSTANT:
3022 eval = eval_constant_coef;
3023 interp = interp_constant_offset;
3024 break;
3025
3026 case TGSI_INTERPOLATE_LINEAR:
3027 eval = eval_linear_coef;
3028 interp = interp_linear_offset;
3029 break;
3030
3031 case TGSI_INTERPOLATE_PERSPECTIVE:
3032 eval = eval_perspective_coef;
3033 interp = interp_perspective_offset;
3034 break;
3035
3036 case TGSI_INTERPOLATE_COLOR:
3037 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3038 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
3039 break;
3040
3041 default:
3042 assert(0);
3043 return;
3044 }
3045
3046 for (i = first; i <= last; i++)
3047 mach->InputSampleOffsetApply[i] = interp;
3048
3049 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3050 if (mask & (1 << j)) {
3051 for (i = first; i <= last; i++) {
3052 eval(mach, i, j);
3053 }
3054 }
3055 }
3056 }
3057
3058 if (DEBUG_EXECUTION) {
3059 uint i, j;
3060 for (i = first; i <= last; ++i) {
3061 debug_printf("IN[%2u] = ", i);
3062 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3063 if (j > 0) {
3064 debug_printf(" ");
3065 }
3066 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3067 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3068 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3069 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3070 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3071 }
3072 }
3073 }
3074 }
3075 }
3076
3077 }
3078
3079 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3080 const union tgsi_exec_channel *src);
3081
3082 static void
3083 exec_scalar_unary(struct tgsi_exec_machine *mach,
3084 const struct tgsi_full_instruction *inst,
3085 micro_unary_op op,
3086 enum tgsi_exec_datatype dst_datatype,
3087 enum tgsi_exec_datatype src_datatype)
3088 {
3089 unsigned int chan;
3090 union tgsi_exec_channel src;
3091 union tgsi_exec_channel dst;
3092
3093 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3094 op(&dst, &src);
3095 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3096 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3097 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3098 }
3099 }
3100 }
3101
3102 static void
3103 exec_vector_unary(struct tgsi_exec_machine *mach,
3104 const struct tgsi_full_instruction *inst,
3105 micro_unary_op op,
3106 enum tgsi_exec_datatype dst_datatype,
3107 enum tgsi_exec_datatype src_datatype)
3108 {
3109 unsigned int chan;
3110 struct tgsi_exec_vector dst;
3111
3112 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3113 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3114 union tgsi_exec_channel src;
3115
3116 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3117 op(&dst.xyzw[chan], &src);
3118 }
3119 }
3120 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3121 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3122 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3123 }
3124 }
3125 }
3126
3127 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3128 const union tgsi_exec_channel *src0,
3129 const union tgsi_exec_channel *src1);
3130
3131 static void
3132 exec_scalar_binary(struct tgsi_exec_machine *mach,
3133 const struct tgsi_full_instruction *inst,
3134 micro_binary_op op,
3135 enum tgsi_exec_datatype dst_datatype,
3136 enum tgsi_exec_datatype src_datatype)
3137 {
3138 unsigned int chan;
3139 union tgsi_exec_channel src[2];
3140 union tgsi_exec_channel dst;
3141
3142 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3143 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3144 op(&dst, &src[0], &src[1]);
3145 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3146 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3147 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3148 }
3149 }
3150 }
3151
3152 static void
3153 exec_vector_binary(struct tgsi_exec_machine *mach,
3154 const struct tgsi_full_instruction *inst,
3155 micro_binary_op op,
3156 enum tgsi_exec_datatype dst_datatype,
3157 enum tgsi_exec_datatype src_datatype)
3158 {
3159 unsigned int chan;
3160 struct tgsi_exec_vector dst;
3161
3162 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3163 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3164 union tgsi_exec_channel src[2];
3165
3166 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3167 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3168 op(&dst.xyzw[chan], &src[0], &src[1]);
3169 }
3170 }
3171 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3172 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3173 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3174 }
3175 }
3176 }
3177
3178 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3179 const union tgsi_exec_channel *src0,
3180 const union tgsi_exec_channel *src1,
3181 const union tgsi_exec_channel *src2);
3182
3183 static void
3184 exec_vector_trinary(struct tgsi_exec_machine *mach,
3185 const struct tgsi_full_instruction *inst,
3186 micro_trinary_op op,
3187 enum tgsi_exec_datatype dst_datatype,
3188 enum tgsi_exec_datatype src_datatype)
3189 {
3190 unsigned int chan;
3191 struct tgsi_exec_vector dst;
3192
3193 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3194 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3195 union tgsi_exec_channel src[3];
3196
3197 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3198 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3199 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3200 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3201 }
3202 }
3203 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3204 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3205 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3206 }
3207 }
3208 }
3209
3210 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3211 const union tgsi_exec_channel *src0,
3212 const union tgsi_exec_channel *src1,
3213 const union tgsi_exec_channel *src2,
3214 const union tgsi_exec_channel *src3);
3215
3216 static void
3217 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3218 const struct tgsi_full_instruction *inst,
3219 micro_quaternary_op op,
3220 enum tgsi_exec_datatype dst_datatype,
3221 enum tgsi_exec_datatype src_datatype)
3222 {
3223 unsigned int chan;
3224 struct tgsi_exec_vector dst;
3225
3226 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3227 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3228 union tgsi_exec_channel src[4];
3229
3230 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3231 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3232 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3233 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3234 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3235 }
3236 }
3237 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3238 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3239 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3240 }
3241 }
3242 }
3243
3244 static void
3245 exec_dp3(struct tgsi_exec_machine *mach,
3246 const struct tgsi_full_instruction *inst)
3247 {
3248 unsigned int chan;
3249 union tgsi_exec_channel arg[3];
3250
3251 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3252 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3253 micro_mul(&arg[2], &arg[0], &arg[1]);
3254
3255 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3256 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3257 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3258 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3259 }
3260
3261 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3262 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3263 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3264 }
3265 }
3266 }
3267
3268 static void
3269 exec_dp4(struct tgsi_exec_machine *mach,
3270 const struct tgsi_full_instruction *inst)
3271 {
3272 unsigned int chan;
3273 union tgsi_exec_channel arg[3];
3274
3275 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3276 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3277 micro_mul(&arg[2], &arg[0], &arg[1]);
3278
3279 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3280 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3281 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3282 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3283 }
3284
3285 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3286 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3287 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3288 }
3289 }
3290 }
3291
3292 static void
3293 exec_dp2(struct tgsi_exec_machine *mach,
3294 const struct tgsi_full_instruction *inst)
3295 {
3296 unsigned int chan;
3297 union tgsi_exec_channel arg[3];
3298
3299 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3300 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3301 micro_mul(&arg[2], &arg[0], &arg[1]);
3302
3303 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3304 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3305 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3306
3307 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3308 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3309 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3310 }
3311 }
3312 }
3313
3314 static void
3315 exec_pk2h(struct tgsi_exec_machine *mach,
3316 const struct tgsi_full_instruction *inst)
3317 {
3318 unsigned chan;
3319 union tgsi_exec_channel arg[2], dst;
3320
3321 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3322 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3323 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3324 dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
3325 (util_float_to_half(arg[1].f[chan]) << 16);
3326 }
3327 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3328 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3329 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3330 }
3331 }
3332 }
3333
3334 static void
3335 exec_up2h(struct tgsi_exec_machine *mach,
3336 const struct tgsi_full_instruction *inst)
3337 {
3338 unsigned chan;
3339 union tgsi_exec_channel arg, dst[2];
3340
3341 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3342 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3343 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
3344 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
3345 }
3346 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3347 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3348 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3349 }
3350 }
3351 }
3352
3353 static void
3354 micro_ucmp(union tgsi_exec_channel *dst,
3355 const union tgsi_exec_channel *src0,
3356 const union tgsi_exec_channel *src1,
3357 const union tgsi_exec_channel *src2)
3358 {
3359 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3360 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3361 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3362 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3363 }
3364
3365 static void
3366 exec_ucmp(struct tgsi_exec_machine *mach,
3367 const struct tgsi_full_instruction *inst)
3368 {
3369 unsigned int chan;
3370 struct tgsi_exec_vector dst;
3371
3372 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3373 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3374 union tgsi_exec_channel src[3];
3375
3376 fetch_source(mach, &src[0], &inst->Src[0], chan,
3377 TGSI_EXEC_DATA_UINT);
3378 fetch_source(mach, &src[1], &inst->Src[1], chan,
3379 TGSI_EXEC_DATA_FLOAT);
3380 fetch_source(mach, &src[2], &inst->Src[2], chan,
3381 TGSI_EXEC_DATA_FLOAT);
3382 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3383 }
3384 }
3385 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3386 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3387 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
3388 TGSI_EXEC_DATA_FLOAT);
3389 }
3390 }
3391 }
3392
3393 static void
3394 exec_dst(struct tgsi_exec_machine *mach,
3395 const struct tgsi_full_instruction *inst)
3396 {
3397 union tgsi_exec_channel r[2];
3398 union tgsi_exec_channel d[4];
3399
3400 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3401 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3402 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3403 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3404 }
3405 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3406 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3407 }
3408 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3409 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3410 }
3411
3412 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3413 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3414 }
3415 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3416 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3417 }
3418 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3419 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3420 }
3421 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3422 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3423 }
3424 }
3425
3426 static void
3427 exec_log(struct tgsi_exec_machine *mach,
3428 const struct tgsi_full_instruction *inst)
3429 {
3430 union tgsi_exec_channel r[3];
3431
3432 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3433 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3434 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3435 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3436 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3437 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3438 }
3439 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3440 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3441 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3442 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3443 }
3444 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3445 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3446 }
3447 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3448 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3449 }
3450 }
3451
3452 static void
3453 exec_exp(struct tgsi_exec_machine *mach,
3454 const struct tgsi_full_instruction *inst)
3455 {
3456 union tgsi_exec_channel r[3];
3457
3458 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3459 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3461 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3462 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3463 }
3464 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3465 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3466 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3467 }
3468 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3469 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3470 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3471 }
3472 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3473 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3474 }
3475 }
3476
3477 static void
3478 exec_lit(struct tgsi_exec_machine *mach,
3479 const struct tgsi_full_instruction *inst)
3480 {
3481 union tgsi_exec_channel r[3];
3482 union tgsi_exec_channel d[3];
3483
3484 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3485 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3486 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3487 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3488 micro_max(&r[1], &r[1], &ZeroVec);
3489
3490 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3491 micro_min(&r[2], &r[2], &P128Vec);
3492 micro_max(&r[2], &r[2], &M128Vec);
3493 micro_pow(&r[1], &r[1], &r[2]);
3494 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3495 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3496 }
3497 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3498 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3499 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3500 }
3501 }
3502 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3503 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3504 }
3505
3506 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3507 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3508 }
3509 }
3510
3511 static void
3512 exec_break(struct tgsi_exec_machine *mach)
3513 {
3514 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3515 /* turn off loop channels for each enabled exec channel */
3516 mach->LoopMask &= ~mach->ExecMask;
3517 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3518 UPDATE_EXEC_MASK(mach);
3519 } else {
3520 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3521
3522 mach->Switch.mask = 0x0;
3523
3524 UPDATE_EXEC_MASK(mach);
3525 }
3526 }
3527
3528 static void
3529 exec_switch(struct tgsi_exec_machine *mach,
3530 const struct tgsi_full_instruction *inst)
3531 {
3532 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3533 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3534
3535 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3536 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3537 mach->Switch.mask = 0x0;
3538 mach->Switch.defaultMask = 0x0;
3539
3540 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3541 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3542
3543 UPDATE_EXEC_MASK(mach);
3544 }
3545
3546 static void
3547 exec_case(struct tgsi_exec_machine *mach,
3548 const struct tgsi_full_instruction *inst)
3549 {
3550 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3551 union tgsi_exec_channel src;
3552 uint mask = 0;
3553
3554 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3555
3556 if (mach->Switch.selector.u[0] == src.u[0]) {
3557 mask |= 0x1;
3558 }
3559 if (mach->Switch.selector.u[1] == src.u[1]) {
3560 mask |= 0x2;
3561 }
3562 if (mach->Switch.selector.u[2] == src.u[2]) {
3563 mask |= 0x4;
3564 }
3565 if (mach->Switch.selector.u[3] == src.u[3]) {
3566 mask |= 0x8;
3567 }
3568
3569 mach->Switch.defaultMask |= mask;
3570
3571 mach->Switch.mask |= mask & prevMask;
3572
3573 UPDATE_EXEC_MASK(mach);
3574 }
3575
3576 /* FIXME: this will only work if default is last */
3577 static void
3578 exec_default(struct tgsi_exec_machine *mach)
3579 {
3580 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3581
3582 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3583
3584 UPDATE_EXEC_MASK(mach);
3585 }
3586
3587 static void
3588 exec_endswitch(struct tgsi_exec_machine *mach)
3589 {
3590 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3591 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3592
3593 UPDATE_EXEC_MASK(mach);
3594 }
3595
3596 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3597 const union tgsi_double_channel *src);
3598
3599 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3600 const union tgsi_double_channel *src0,
3601 union tgsi_exec_channel *src1);
3602
3603 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3604 const union tgsi_exec_channel *src);
3605
3606 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3607 const union tgsi_double_channel *src);
3608
3609 static void
3610 fetch_double_channel(struct tgsi_exec_machine *mach,
3611 union tgsi_double_channel *chan,
3612 const struct tgsi_full_src_register *reg,
3613 uint chan_0,
3614 uint chan_1)
3615 {
3616 union tgsi_exec_channel src[2];
3617 uint i;
3618
3619 fetch_source_d(mach, &src[0], reg, chan_0);
3620 fetch_source_d(mach, &src[1], reg, chan_1);
3621
3622 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3623 chan->u[i][0] = src[0].u[i];
3624 chan->u[i][1] = src[1].u[i];
3625 }
3626 if (reg->Register.Absolute) {
3627 micro_dabs(chan, chan);
3628 }
3629 if (reg->Register.Negate) {
3630 micro_dneg(chan, chan);
3631 }
3632 }
3633
3634 static void
3635 store_double_channel(struct tgsi_exec_machine *mach,
3636 const union tgsi_double_channel *chan,
3637 const struct tgsi_full_dst_register *reg,
3638 const struct tgsi_full_instruction *inst,
3639 uint chan_0,
3640 uint chan_1)
3641 {
3642 union tgsi_exec_channel dst[2];
3643 uint i;
3644 union tgsi_double_channel temp;
3645 const uint execmask = mach->ExecMask;
3646
3647 if (!inst->Instruction.Saturate) {
3648 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3649 if (execmask & (1 << i)) {
3650 dst[0].u[i] = chan->u[i][0];
3651 dst[1].u[i] = chan->u[i][1];
3652 }
3653 }
3654 else {
3655 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3656 if (execmask & (1 << i)) {
3657 if (chan->d[i] < 0.0)
3658 temp.d[i] = 0.0;
3659 else if (chan->d[i] > 1.0)
3660 temp.d[i] = 1.0;
3661 else
3662 temp.d[i] = chan->d[i];
3663
3664 dst[0].u[i] = temp.u[i][0];
3665 dst[1].u[i] = temp.u[i][1];
3666 }
3667 }
3668
3669 store_dest_double(mach, &dst[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3670 if (chan_1 != (unsigned)-1)
3671 store_dest_double(mach, &dst[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3672 }
3673
3674 static void
3675 exec_double_unary(struct tgsi_exec_machine *mach,
3676 const struct tgsi_full_instruction *inst,
3677 micro_dop op)
3678 {
3679 union tgsi_double_channel src;
3680 union tgsi_double_channel dst;
3681
3682 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3683 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3684 op(&dst, &src);
3685 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3686 }
3687 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3688 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3689 op(&dst, &src);
3690 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3691 }
3692 }
3693
3694 static void
3695 exec_double_binary(struct tgsi_exec_machine *mach,
3696 const struct tgsi_full_instruction *inst,
3697 micro_dop op,
3698 enum tgsi_exec_datatype dst_datatype)
3699 {
3700 union tgsi_double_channel src[2];
3701 union tgsi_double_channel dst;
3702 int first_dest_chan, second_dest_chan;
3703 int wmask;
3704
3705 wmask = inst->Dst[0].Register.WriteMask;
3706 /* these are & because of the way DSLT etc store their destinations */
3707 if (wmask & TGSI_WRITEMASK_XY) {
3708 first_dest_chan = TGSI_CHAN_X;
3709 second_dest_chan = TGSI_CHAN_Y;
3710 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3711 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3712 second_dest_chan = -1;
3713 }
3714
3715 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3716 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3717 op(&dst, src);
3718 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3719 }
3720
3721 if (wmask & TGSI_WRITEMASK_ZW) {
3722 first_dest_chan = TGSI_CHAN_Z;
3723 second_dest_chan = TGSI_CHAN_W;
3724 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3725 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3726 second_dest_chan = -1;
3727 }
3728
3729 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3730 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3731 op(&dst, src);
3732 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3733 }
3734 }
3735
3736 static void
3737 exec_double_trinary(struct tgsi_exec_machine *mach,
3738 const struct tgsi_full_instruction *inst,
3739 micro_dop op)
3740 {
3741 union tgsi_double_channel src[3];
3742 union tgsi_double_channel dst;
3743
3744 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3745 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3746 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3747 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3748 op(&dst, src);
3749 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3750 }
3751 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3752 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3753 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3754 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3755 op(&dst, src);
3756 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3757 }
3758 }
3759
3760 static void
3761 exec_dldexp(struct tgsi_exec_machine *mach,
3762 const struct tgsi_full_instruction *inst)
3763 {
3764 union tgsi_double_channel src0;
3765 union tgsi_exec_channel src1;
3766 union tgsi_double_channel dst;
3767 int wmask;
3768
3769 wmask = inst->Dst[0].Register.WriteMask;
3770 if (wmask & TGSI_WRITEMASK_XY) {
3771 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3772 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3773 micro_dldexp(&dst, &src0, &src1);
3774 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3775 }
3776
3777 if (wmask & TGSI_WRITEMASK_ZW) {
3778 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3779 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3780 micro_dldexp(&dst, &src0, &src1);
3781 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3782 }
3783 }
3784
3785 static void
3786 exec_dfracexp(struct tgsi_exec_machine *mach,
3787 const struct tgsi_full_instruction *inst)
3788 {
3789 union tgsi_double_channel src;
3790 union tgsi_double_channel dst;
3791 union tgsi_exec_channel dst_exp;
3792
3793 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3794 micro_dfracexp(&dst, &dst_exp, &src);
3795 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
3796 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3797 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
3798 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3799 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3800 if (inst->Dst[1].Register.WriteMask & (1 << chan))
3801 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
3802 }
3803 }
3804
3805 static void
3806 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3807 const struct tgsi_full_instruction *inst,
3808 micro_dop_sop op)
3809 {
3810 union tgsi_double_channel src0;
3811 union tgsi_exec_channel src1;
3812 union tgsi_double_channel dst;
3813 int wmask;
3814
3815 wmask = inst->Dst[0].Register.WriteMask;
3816 if (wmask & TGSI_WRITEMASK_XY) {
3817 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3818 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3819 op(&dst, &src0, &src1);
3820 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3821 }
3822
3823 if (wmask & TGSI_WRITEMASK_ZW) {
3824 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3825 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3826 op(&dst, &src0, &src1);
3827 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3828 }
3829 }
3830
3831 static int
3832 get_image_coord_dim(unsigned tgsi_tex)
3833 {
3834 int dim;
3835 switch (tgsi_tex) {
3836 case TGSI_TEXTURE_BUFFER:
3837 case TGSI_TEXTURE_1D:
3838 dim = 1;
3839 break;
3840 case TGSI_TEXTURE_2D:
3841 case TGSI_TEXTURE_RECT:
3842 case TGSI_TEXTURE_1D_ARRAY:
3843 case TGSI_TEXTURE_2D_MSAA:
3844 dim = 2;
3845 break;
3846 case TGSI_TEXTURE_3D:
3847 case TGSI_TEXTURE_CUBE:
3848 case TGSI_TEXTURE_2D_ARRAY:
3849 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3850 case TGSI_TEXTURE_CUBE_ARRAY:
3851 dim = 3;
3852 break;
3853 default:
3854 assert(!"unknown texture target");
3855 dim = 0;
3856 break;
3857 }
3858
3859 return dim;
3860 }
3861
3862 static int
3863 get_image_coord_sample(unsigned tgsi_tex)
3864 {
3865 int sample = 0;
3866 switch (tgsi_tex) {
3867 case TGSI_TEXTURE_2D_MSAA:
3868 sample = 3;
3869 break;
3870 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3871 sample = 4;
3872 break;
3873 default:
3874 break;
3875 }
3876 return sample;
3877 }
3878
3879 static void
3880 exec_load_img(struct tgsi_exec_machine *mach,
3881 const struct tgsi_full_instruction *inst)
3882 {
3883 union tgsi_exec_channel r[4], sample_r;
3884 uint unit;
3885 int sample;
3886 int i, j;
3887 int dim;
3888 uint chan;
3889 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3890 struct tgsi_image_params params;
3891 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3892
3893 unit = fetch_sampler_unit(mach, inst, 0);
3894 dim = get_image_coord_dim(inst->Memory.Texture);
3895 sample = get_image_coord_sample(inst->Memory.Texture);
3896 assert(dim <= 3);
3897
3898 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3899 params.unit = unit;
3900 params.tgsi_tex_instr = inst->Memory.Texture;
3901 params.format = inst->Memory.Format;
3902
3903 for (i = 0; i < dim; i++) {
3904 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3905 }
3906
3907 if (sample)
3908 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3909
3910 mach->Image->load(mach->Image, &params,
3911 r[0].i, r[1].i, r[2].i, sample_r.i,
3912 rgba);
3913 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3914 r[0].f[j] = rgba[0][j];
3915 r[1].f[j] = rgba[1][j];
3916 r[2].f[j] = rgba[2][j];
3917 r[3].f[j] = rgba[3][j];
3918 }
3919 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3920 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3921 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3922 }
3923 }
3924 }
3925
3926 static void
3927 exec_load_buf(struct tgsi_exec_machine *mach,
3928 const struct tgsi_full_instruction *inst)
3929 {
3930 union tgsi_exec_channel r[4];
3931 uint unit;
3932 int j;
3933 uint chan;
3934 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3935 struct tgsi_buffer_params params;
3936 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3937
3938 unit = fetch_sampler_unit(mach, inst, 0);
3939
3940 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3941 params.unit = unit;
3942 IFETCH(&r[0], 1, TGSI_CHAN_X);
3943
3944 mach->Buffer->load(mach->Buffer, &params,
3945 r[0].i, rgba);
3946 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3947 r[0].f[j] = rgba[0][j];
3948 r[1].f[j] = rgba[1][j];
3949 r[2].f[j] = rgba[2][j];
3950 r[3].f[j] = rgba[3][j];
3951 }
3952 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3953 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3954 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3955 }
3956 }
3957 }
3958
3959 static void
3960 exec_load_mem(struct tgsi_exec_machine *mach,
3961 const struct tgsi_full_instruction *inst)
3962 {
3963 union tgsi_exec_channel r[4];
3964 uint chan;
3965 char *ptr = mach->LocalMem;
3966 uint32_t offset;
3967 int j;
3968
3969 IFETCH(&r[0], 1, TGSI_CHAN_X);
3970 if (r[0].u[0] >= mach->LocalMemSize)
3971 return;
3972
3973 offset = r[0].u[0];
3974 ptr += offset;
3975
3976 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3977 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3978 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3979 memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
3980 }
3981 }
3982 }
3983
3984 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3985 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3986 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3987 }
3988 }
3989 }
3990
3991 static void
3992 exec_load(struct tgsi_exec_machine *mach,
3993 const struct tgsi_full_instruction *inst)
3994 {
3995 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
3996 exec_load_img(mach, inst);
3997 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
3998 exec_load_buf(mach, inst);
3999 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4000 exec_load_mem(mach, inst);
4001 }
4002
4003 static uint
4004 fetch_store_img_unit(struct tgsi_exec_machine *mach,
4005 const struct tgsi_full_dst_register *dst)
4006 {
4007 uint unit = 0;
4008 int i;
4009 if (dst->Register.Indirect) {
4010 union tgsi_exec_channel indir_index, index2;
4011 const uint execmask = mach->ExecMask;
4012 index2.i[0] =
4013 index2.i[1] =
4014 index2.i[2] =
4015 index2.i[3] = dst->Indirect.Index;
4016
4017 fetch_src_file_channel(mach,
4018 dst->Indirect.File,
4019 dst->Indirect.Swizzle,
4020 &index2,
4021 &ZeroVec,
4022 &indir_index);
4023 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4024 if (execmask & (1 << i)) {
4025 unit = dst->Register.Index + indir_index.i[i];
4026 break;
4027 }
4028 }
4029 } else {
4030 unit = dst->Register.Index;
4031 }
4032 return unit;
4033 }
4034
4035 static void
4036 exec_store_img(struct tgsi_exec_machine *mach,
4037 const struct tgsi_full_instruction *inst)
4038 {
4039 union tgsi_exec_channel r[3], sample_r;
4040 union tgsi_exec_channel value[4];
4041 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4042 struct tgsi_image_params params;
4043 int dim;
4044 int sample;
4045 int i, j;
4046 uint unit;
4047 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4048 unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4049 dim = get_image_coord_dim(inst->Memory.Texture);
4050 sample = get_image_coord_sample(inst->Memory.Texture);
4051 assert(dim <= 3);
4052
4053 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4054 params.unit = unit;
4055 params.tgsi_tex_instr = inst->Memory.Texture;
4056 params.format = inst->Memory.Format;
4057
4058 for (i = 0; i < dim; i++) {
4059 IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4060 }
4061
4062 for (i = 0; i < 4; i++) {
4063 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4064 }
4065 if (sample)
4066 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4067
4068 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4069 rgba[0][j] = value[0].f[j];
4070 rgba[1][j] = value[1].f[j];
4071 rgba[2][j] = value[2].f[j];
4072 rgba[3][j] = value[3].f[j];
4073 }
4074
4075 mach->Image->store(mach->Image, &params,
4076 r[0].i, r[1].i, r[2].i, sample_r.i,
4077 rgba);
4078 }
4079
4080 static void
4081 exec_store_buf(struct tgsi_exec_machine *mach,
4082 const struct tgsi_full_instruction *inst)
4083 {
4084 union tgsi_exec_channel r[3];
4085 union tgsi_exec_channel value[4];
4086 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4087 struct tgsi_buffer_params params;
4088 int i, j;
4089 uint unit;
4090 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4091
4092 unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4093
4094 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4095 params.unit = unit;
4096 params.writemask = inst->Dst[0].Register.WriteMask;
4097
4098 IFETCH(&r[0], 0, TGSI_CHAN_X);
4099 for (i = 0; i < 4; i++) {
4100 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4101 }
4102
4103 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4104 rgba[0][j] = value[0].f[j];
4105 rgba[1][j] = value[1].f[j];
4106 rgba[2][j] = value[2].f[j];
4107 rgba[3][j] = value[3].f[j];
4108 }
4109
4110 mach->Buffer->store(mach->Buffer, &params,
4111 r[0].i,
4112 rgba);
4113 }
4114
4115 static void
4116 exec_store_mem(struct tgsi_exec_machine *mach,
4117 const struct tgsi_full_instruction *inst)
4118 {
4119 union tgsi_exec_channel r[3];
4120 union tgsi_exec_channel value[4];
4121 uint i, chan;
4122 char *ptr = mach->LocalMem;
4123 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4124 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4125
4126 IFETCH(&r[0], 0, TGSI_CHAN_X);
4127
4128 for (i = 0; i < 4; i++) {
4129 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4130 }
4131
4132 if (r[0].u[0] >= mach->LocalMemSize)
4133 return;
4134 ptr += r[0].u[0];
4135
4136 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4137 if (execmask & (1 << i)) {
4138 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4139 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4140 memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4141 }
4142 }
4143 }
4144 }
4145 }
4146
4147 static void
4148 exec_store(struct tgsi_exec_machine *mach,
4149 const struct tgsi_full_instruction *inst)
4150 {
4151 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4152 exec_store_img(mach, inst);
4153 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4154 exec_store_buf(mach, inst);
4155 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4156 exec_store_mem(mach, inst);
4157 }
4158
4159 static void
4160 exec_atomop_img(struct tgsi_exec_machine *mach,
4161 const struct tgsi_full_instruction *inst)
4162 {
4163 union tgsi_exec_channel r[4], sample_r;
4164 union tgsi_exec_channel value[4], value2[4];
4165 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4166 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4167 struct tgsi_image_params params;
4168 int dim;
4169 int sample;
4170 int i, j;
4171 uint unit, chan;
4172 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4173 unit = fetch_sampler_unit(mach, inst, 0);
4174 dim = get_image_coord_dim(inst->Memory.Texture);
4175 sample = get_image_coord_sample(inst->Memory.Texture);
4176 assert(dim <= 3);
4177
4178 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4179 params.unit = unit;
4180 params.tgsi_tex_instr = inst->Memory.Texture;
4181 params.format = inst->Memory.Format;
4182
4183 for (i = 0; i < dim; i++) {
4184 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4185 }
4186
4187 for (i = 0; i < 4; i++) {
4188 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4189 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4190 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4191 }
4192 if (sample)
4193 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4194
4195 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4196 rgba[0][j] = value[0].f[j];
4197 rgba[1][j] = value[1].f[j];
4198 rgba[2][j] = value[2].f[j];
4199 rgba[3][j] = value[3].f[j];
4200 }
4201 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4202 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4203 rgba2[0][j] = value2[0].f[j];
4204 rgba2[1][j] = value2[1].f[j];
4205 rgba2[2][j] = value2[2].f[j];
4206 rgba2[3][j] = value2[3].f[j];
4207 }
4208 }
4209
4210 mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4211 r[0].i, r[1].i, r[2].i, sample_r.i,
4212 rgba, rgba2);
4213
4214 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4215 r[0].f[j] = rgba[0][j];
4216 r[1].f[j] = rgba[1][j];
4217 r[2].f[j] = rgba[2][j];
4218 r[3].f[j] = rgba[3][j];
4219 }
4220 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4221 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4222 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4223 }
4224 }
4225 }
4226
4227 static void
4228 exec_atomop_buf(struct tgsi_exec_machine *mach,
4229 const struct tgsi_full_instruction *inst)
4230 {
4231 union tgsi_exec_channel r[4];
4232 union tgsi_exec_channel value[4], value2[4];
4233 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4234 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4235 struct tgsi_buffer_params params;
4236 int i, j;
4237 uint unit, chan;
4238 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4239
4240 unit = fetch_sampler_unit(mach, inst, 0);
4241
4242 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4243 params.unit = unit;
4244 params.writemask = inst->Dst[0].Register.WriteMask;
4245
4246 IFETCH(&r[0], 1, TGSI_CHAN_X);
4247
4248 for (i = 0; i < 4; i++) {
4249 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4250 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4251 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4252 }
4253
4254 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4255 rgba[0][j] = value[0].f[j];
4256 rgba[1][j] = value[1].f[j];
4257 rgba[2][j] = value[2].f[j];
4258 rgba[3][j] = value[3].f[j];
4259 }
4260 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4261 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4262 rgba2[0][j] = value2[0].f[j];
4263 rgba2[1][j] = value2[1].f[j];
4264 rgba2[2][j] = value2[2].f[j];
4265 rgba2[3][j] = value2[3].f[j];
4266 }
4267 }
4268
4269 mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
4270 r[0].i,
4271 rgba, rgba2);
4272
4273 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4274 r[0].f[j] = rgba[0][j];
4275 r[1].f[j] = rgba[1][j];
4276 r[2].f[j] = rgba[2][j];
4277 r[3].f[j] = rgba[3][j];
4278 }
4279 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4280 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4281 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4282 }
4283 }
4284 }
4285
4286 static void
4287 exec_atomop_mem(struct tgsi_exec_machine *mach,
4288 const struct tgsi_full_instruction *inst)
4289 {
4290 union tgsi_exec_channel r[4];
4291 union tgsi_exec_channel value[4], value2[4];
4292 char *ptr = mach->LocalMem;
4293 uint32_t val;
4294 uint chan, i;
4295 uint32_t offset;
4296 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4297 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4298 IFETCH(&r[0], 1, TGSI_CHAN_X);
4299
4300 if (r[0].u[0] >= mach->LocalMemSize)
4301 return;
4302
4303 offset = r[0].u[0];
4304 ptr += offset;
4305 for (i = 0; i < 4; i++) {
4306 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4307 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4308 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4309 }
4310
4311 memcpy(&r[0].u[0], ptr, 4);
4312 val = r[0].u[0];
4313 switch (inst->Instruction.Opcode) {
4314 case TGSI_OPCODE_ATOMUADD:
4315 val += value[0].u[0];
4316 break;
4317 case TGSI_OPCODE_ATOMXOR:
4318 val ^= value[0].u[0];
4319 break;
4320 case TGSI_OPCODE_ATOMOR:
4321 val |= value[0].u[0];
4322 break;
4323 case TGSI_OPCODE_ATOMAND:
4324 val &= value[0].u[0];
4325 break;
4326 case TGSI_OPCODE_ATOMUMIN:
4327 val = MIN2(val, value[0].u[0]);
4328 break;
4329 case TGSI_OPCODE_ATOMUMAX:
4330 val = MAX2(val, value[0].u[0]);
4331 break;
4332 case TGSI_OPCODE_ATOMIMIN:
4333 val = MIN2(r[0].i[0], value[0].i[0]);
4334 break;
4335 case TGSI_OPCODE_ATOMIMAX:
4336 val = MAX2(r[0].i[0], value[0].i[0]);
4337 break;
4338 case TGSI_OPCODE_ATOMXCHG:
4339 val = value[0].i[0];
4340 break;
4341 case TGSI_OPCODE_ATOMCAS:
4342 if (val == value[0].u[0])
4343 val = value2[0].u[0];
4344 break;
4345 case TGSI_OPCODE_ATOMFADD:
4346 val = fui(r[0].f[0] + value[0].f[0]);
4347 break;
4348 default:
4349 break;
4350 }
4351 for (i = 0; i < TGSI_QUAD_SIZE; i++)
4352 if (execmask & (1 << i))
4353 memcpy(ptr, &val, 4);
4354
4355 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4356 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4357 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4358 }
4359 }
4360 }
4361
4362 static void
4363 exec_atomop(struct tgsi_exec_machine *mach,
4364 const struct tgsi_full_instruction *inst)
4365 {
4366 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4367 exec_atomop_img(mach, inst);
4368 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4369 exec_atomop_buf(mach, inst);
4370 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4371 exec_atomop_mem(mach, inst);
4372 }
4373
4374 static void
4375 exec_resq_img(struct tgsi_exec_machine *mach,
4376 const struct tgsi_full_instruction *inst)
4377 {
4378 int result[4];
4379 union tgsi_exec_channel r[4];
4380 uint unit;
4381 int i, chan, j;
4382 struct tgsi_image_params params;
4383 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4384
4385 unit = fetch_sampler_unit(mach, inst, 0);
4386
4387 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4388 params.unit = unit;
4389 params.tgsi_tex_instr = inst->Memory.Texture;
4390 params.format = inst->Memory.Format;
4391
4392 mach->Image->get_dims(mach->Image, &params, result);
4393
4394 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4395 for (j = 0; j < 4; j++) {
4396 r[j].i[i] = result[j];
4397 }
4398 }
4399
4400 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4401 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4402 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4403 TGSI_EXEC_DATA_INT);
4404 }
4405 }
4406 }
4407
4408 static void
4409 exec_resq_buf(struct tgsi_exec_machine *mach,
4410 const struct tgsi_full_instruction *inst)
4411 {
4412 int result;
4413 union tgsi_exec_channel r[4];
4414 uint unit;
4415 int i, chan;
4416 struct tgsi_buffer_params params;
4417 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4418
4419 unit = fetch_sampler_unit(mach, inst, 0);
4420
4421 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4422 params.unit = unit;
4423
4424 mach->Buffer->get_dims(mach->Buffer, &params, &result);
4425
4426 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4427 r[0].i[i] = result;
4428 }
4429
4430 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4431 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4432 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4433 TGSI_EXEC_DATA_INT);
4434 }
4435 }
4436 }
4437
4438 static void
4439 exec_resq(struct tgsi_exec_machine *mach,
4440 const struct tgsi_full_instruction *inst)
4441 {
4442 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4443 exec_resq_img(mach, inst);
4444 else
4445 exec_resq_buf(mach, inst);
4446 }
4447
4448 static void
4449 micro_f2u64(union tgsi_double_channel *dst,
4450 const union tgsi_exec_channel *src)
4451 {
4452 dst->u64[0] = (uint64_t)src->f[0];
4453 dst->u64[1] = (uint64_t)src->f[1];
4454 dst->u64[2] = (uint64_t)src->f[2];
4455 dst->u64[3] = (uint64_t)src->f[3];
4456 }
4457
4458 static void
4459 micro_f2i64(union tgsi_double_channel *dst,
4460 const union tgsi_exec_channel *src)
4461 {
4462 dst->i64[0] = (int64_t)src->f[0];
4463 dst->i64[1] = (int64_t)src->f[1];
4464 dst->i64[2] = (int64_t)src->f[2];
4465 dst->i64[3] = (int64_t)src->f[3];
4466 }
4467
4468 static void
4469 micro_u2i64(union tgsi_double_channel *dst,
4470 const union tgsi_exec_channel *src)
4471 {
4472 dst->u64[0] = (uint64_t)src->u[0];
4473 dst->u64[1] = (uint64_t)src->u[1];
4474 dst->u64[2] = (uint64_t)src->u[2];
4475 dst->u64[3] = (uint64_t)src->u[3];
4476 }
4477
4478 static void
4479 micro_i2i64(union tgsi_double_channel *dst,
4480 const union tgsi_exec_channel *src)
4481 {
4482 dst->i64[0] = (int64_t)src->i[0];
4483 dst->i64[1] = (int64_t)src->i[1];
4484 dst->i64[2] = (int64_t)src->i[2];
4485 dst->i64[3] = (int64_t)src->i[3];
4486 }
4487
4488 static void
4489 micro_d2u64(union tgsi_double_channel *dst,
4490 const union tgsi_double_channel *src)
4491 {
4492 dst->u64[0] = (uint64_t)src->d[0];
4493 dst->u64[1] = (uint64_t)src->d[1];
4494 dst->u64[2] = (uint64_t)src->d[2];
4495 dst->u64[3] = (uint64_t)src->d[3];
4496 }
4497
4498 static void
4499 micro_d2i64(union tgsi_double_channel *dst,
4500 const union tgsi_double_channel *src)
4501 {
4502 dst->i64[0] = (int64_t)src->d[0];
4503 dst->i64[1] = (int64_t)src->d[1];
4504 dst->i64[2] = (int64_t)src->d[2];
4505 dst->i64[3] = (int64_t)src->d[3];
4506 }
4507
4508 static void
4509 micro_u642d(union tgsi_double_channel *dst,
4510 const union tgsi_double_channel *src)
4511 {
4512 dst->d[0] = (double)src->u64[0];
4513 dst->d[1] = (double)src->u64[1];
4514 dst->d[2] = (double)src->u64[2];
4515 dst->d[3] = (double)src->u64[3];
4516 }
4517
4518 static void
4519 micro_i642d(union tgsi_double_channel *dst,
4520 const union tgsi_double_channel *src)
4521 {
4522 dst->d[0] = (double)src->i64[0];
4523 dst->d[1] = (double)src->i64[1];
4524 dst->d[2] = (double)src->i64[2];
4525 dst->d[3] = (double)src->i64[3];
4526 }
4527
4528 static void
4529 micro_u642f(union tgsi_exec_channel *dst,
4530 const union tgsi_double_channel *src)
4531 {
4532 dst->f[0] = (float)src->u64[0];
4533 dst->f[1] = (float)src->u64[1];
4534 dst->f[2] = (float)src->u64[2];
4535 dst->f[3] = (float)src->u64[3];
4536 }
4537
4538 static void
4539 micro_i642f(union tgsi_exec_channel *dst,
4540 const union tgsi_double_channel *src)
4541 {
4542 dst->f[0] = (float)src->i64[0];
4543 dst->f[1] = (float)src->i64[1];
4544 dst->f[2] = (float)src->i64[2];
4545 dst->f[3] = (float)src->i64[3];
4546 }
4547
4548 static void
4549 exec_t_2_64(struct tgsi_exec_machine *mach,
4550 const struct tgsi_full_instruction *inst,
4551 micro_dop_s op,
4552 enum tgsi_exec_datatype src_datatype)
4553 {
4554 union tgsi_exec_channel src;
4555 union tgsi_double_channel dst;
4556
4557 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4558 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4559 op(&dst, &src);
4560 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4561 }
4562 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4563 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4564 op(&dst, &src);
4565 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4566 }
4567 }
4568
4569 static void
4570 exec_64_2_t(struct tgsi_exec_machine *mach,
4571 const struct tgsi_full_instruction *inst,
4572 micro_sop_d op,
4573 enum tgsi_exec_datatype dst_datatype)
4574 {
4575 union tgsi_double_channel src;
4576 union tgsi_exec_channel dst;
4577 int wm = inst->Dst[0].Register.WriteMask;
4578 int i;
4579 int bit;
4580 for (i = 0; i < 2; i++) {
4581 bit = ffs(wm);
4582 if (bit) {
4583 wm &= ~(1 << (bit - 1));
4584 if (i == 0)
4585 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4586 else
4587 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4588 op(&dst, &src);
4589 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4590 }
4591 }
4592 }
4593
4594 static void
4595 micro_i2f(union tgsi_exec_channel *dst,
4596 const union tgsi_exec_channel *src)
4597 {
4598 dst->f[0] = (float)src->i[0];
4599 dst->f[1] = (float)src->i[1];
4600 dst->f[2] = (float)src->i[2];
4601 dst->f[3] = (float)src->i[3];
4602 }
4603
4604 static void
4605 micro_not(union tgsi_exec_channel *dst,
4606 const union tgsi_exec_channel *src)
4607 {
4608 dst->u[0] = ~src->u[0];
4609 dst->u[1] = ~src->u[1];
4610 dst->u[2] = ~src->u[2];
4611 dst->u[3] = ~src->u[3];
4612 }
4613
4614 static void
4615 micro_shl(union tgsi_exec_channel *dst,
4616 const union tgsi_exec_channel *src0,
4617 const union tgsi_exec_channel *src1)
4618 {
4619 unsigned masked_count;
4620 masked_count = src1->u[0] & 0x1f;
4621 dst->u[0] = src0->u[0] << masked_count;
4622 masked_count = src1->u[1] & 0x1f;
4623 dst->u[1] = src0->u[1] << masked_count;
4624 masked_count = src1->u[2] & 0x1f;
4625 dst->u[2] = src0->u[2] << masked_count;
4626 masked_count = src1->u[3] & 0x1f;
4627 dst->u[3] = src0->u[3] << masked_count;
4628 }
4629
4630 static void
4631 micro_and(union tgsi_exec_channel *dst,
4632 const union tgsi_exec_channel *src0,
4633 const union tgsi_exec_channel *src1)
4634 {
4635 dst->u[0] = src0->u[0] & src1->u[0];
4636 dst->u[1] = src0->u[1] & src1->u[1];
4637 dst->u[2] = src0->u[2] & src1->u[2];
4638 dst->u[3] = src0->u[3] & src1->u[3];
4639 }
4640
4641 static void
4642 micro_or(union tgsi_exec_channel *dst,
4643 const union tgsi_exec_channel *src0,
4644 const union tgsi_exec_channel *src1)
4645 {
4646 dst->u[0] = src0->u[0] | src1->u[0];
4647 dst->u[1] = src0->u[1] | src1->u[1];
4648 dst->u[2] = src0->u[2] | src1->u[2];
4649 dst->u[3] = src0->u[3] | src1->u[3];
4650 }
4651
4652 static void
4653 micro_xor(union tgsi_exec_channel *dst,
4654 const union tgsi_exec_channel *src0,
4655 const union tgsi_exec_channel *src1)
4656 {
4657 dst->u[0] = src0->u[0] ^ src1->u[0];
4658 dst->u[1] = src0->u[1] ^ src1->u[1];
4659 dst->u[2] = src0->u[2] ^ src1->u[2];
4660 dst->u[3] = src0->u[3] ^ src1->u[3];
4661 }
4662
4663 static void
4664 micro_mod(union tgsi_exec_channel *dst,
4665 const union tgsi_exec_channel *src0,
4666 const union tgsi_exec_channel *src1)
4667 {
4668 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4669 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4670 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4671 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4672 }
4673
4674 static void
4675 micro_f2i(union tgsi_exec_channel *dst,
4676 const union tgsi_exec_channel *src)
4677 {
4678 dst->i[0] = (int)src->f[0];
4679 dst->i[1] = (int)src->f[1];
4680 dst->i[2] = (int)src->f[2];
4681 dst->i[3] = (int)src->f[3];
4682 }
4683
4684 static void
4685 micro_fseq(union tgsi_exec_channel *dst,
4686 const union tgsi_exec_channel *src0,
4687 const union tgsi_exec_channel *src1)
4688 {
4689 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4690 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4691 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4692 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4693 }
4694
4695 static void
4696 micro_fsge(union tgsi_exec_channel *dst,
4697 const union tgsi_exec_channel *src0,
4698 const union tgsi_exec_channel *src1)
4699 {
4700 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4701 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4702 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4703 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4704 }
4705
4706 static void
4707 micro_fslt(union tgsi_exec_channel *dst,
4708 const union tgsi_exec_channel *src0,
4709 const union tgsi_exec_channel *src1)
4710 {
4711 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4712 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4713 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4714 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4715 }
4716
4717 static void
4718 micro_fsne(union tgsi_exec_channel *dst,
4719 const union tgsi_exec_channel *src0,
4720 const union tgsi_exec_channel *src1)
4721 {
4722 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4723 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4724 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4725 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4726 }
4727
4728 static void
4729 micro_idiv(union tgsi_exec_channel *dst,
4730 const union tgsi_exec_channel *src0,
4731 const union tgsi_exec_channel *src1)
4732 {
4733 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4734 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4735 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4736 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4737 }
4738
4739 static void
4740 micro_imax(union tgsi_exec_channel *dst,
4741 const union tgsi_exec_channel *src0,
4742 const union tgsi_exec_channel *src1)
4743 {
4744 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4745 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4746 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4747 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4748 }
4749
4750 static void
4751 micro_imin(union tgsi_exec_channel *dst,
4752 const union tgsi_exec_channel *src0,
4753 const union tgsi_exec_channel *src1)
4754 {
4755 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4756 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4757 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4758 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4759 }
4760
4761 static void
4762 micro_isge(union tgsi_exec_channel *dst,
4763 const union tgsi_exec_channel *src0,
4764 const union tgsi_exec_channel *src1)
4765 {
4766 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4767 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4768 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4769 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4770 }
4771
4772 static void
4773 micro_ishr(union tgsi_exec_channel *dst,
4774 const union tgsi_exec_channel *src0,
4775 const union tgsi_exec_channel *src1)
4776 {
4777 unsigned masked_count;
4778 masked_count = src1->i[0] & 0x1f;
4779 dst->i[0] = src0->i[0] >> masked_count;
4780 masked_count = src1->i[1] & 0x1f;
4781 dst->i[1] = src0->i[1] >> masked_count;
4782 masked_count = src1->i[2] & 0x1f;
4783 dst->i[2] = src0->i[2] >> masked_count;
4784 masked_count = src1->i[3] & 0x1f;
4785 dst->i[3] = src0->i[3] >> masked_count;
4786 }
4787
4788 static void
4789 micro_islt(union tgsi_exec_channel *dst,
4790 const union tgsi_exec_channel *src0,
4791 const union tgsi_exec_channel *src1)
4792 {
4793 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4794 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4795 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4796 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4797 }
4798
4799 static void
4800 micro_f2u(union tgsi_exec_channel *dst,
4801 const union tgsi_exec_channel *src)
4802 {
4803 dst->u[0] = (uint)src->f[0];
4804 dst->u[1] = (uint)src->f[1];
4805 dst->u[2] = (uint)src->f[2];
4806 dst->u[3] = (uint)src->f[3];
4807 }
4808
4809 static void
4810 micro_u2f(union tgsi_exec_channel *dst,
4811 const union tgsi_exec_channel *src)
4812 {
4813 dst->f[0] = (float)src->u[0];
4814 dst->f[1] = (float)src->u[1];
4815 dst->f[2] = (float)src->u[2];
4816 dst->f[3] = (float)src->u[3];
4817 }
4818
4819 static void
4820 micro_uadd(union tgsi_exec_channel *dst,
4821 const union tgsi_exec_channel *src0,
4822 const union tgsi_exec_channel *src1)
4823 {
4824 dst->u[0] = src0->u[0] + src1->u[0];
4825 dst->u[1] = src0->u[1] + src1->u[1];
4826 dst->u[2] = src0->u[2] + src1->u[2];
4827 dst->u[3] = src0->u[3] + src1->u[3];
4828 }
4829
4830 static void
4831 micro_udiv(union tgsi_exec_channel *dst,
4832 const union tgsi_exec_channel *src0,
4833 const union tgsi_exec_channel *src1)
4834 {
4835 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4836 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4837 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4838 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4839 }
4840
4841 static void
4842 micro_umad(union tgsi_exec_channel *dst,
4843 const union tgsi_exec_channel *src0,
4844 const union tgsi_exec_channel *src1,
4845 const union tgsi_exec_channel *src2)
4846 {
4847 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4848 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4849 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4850 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4851 }
4852
4853 static void
4854 micro_umax(union tgsi_exec_channel *dst,
4855 const union tgsi_exec_channel *src0,
4856 const union tgsi_exec_channel *src1)
4857 {
4858 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4859 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4860 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4861 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4862 }
4863
4864 static void
4865 micro_umin(union tgsi_exec_channel *dst,
4866 const union tgsi_exec_channel *src0,
4867 const union tgsi_exec_channel *src1)
4868 {
4869 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4870 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4871 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4872 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4873 }
4874
4875 static void
4876 micro_umod(union tgsi_exec_channel *dst,
4877 const union tgsi_exec_channel *src0,
4878 const union tgsi_exec_channel *src1)
4879 {
4880 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4881 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4882 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4883 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4884 }
4885
4886 static void
4887 micro_umul(union tgsi_exec_channel *dst,
4888 const union tgsi_exec_channel *src0,
4889 const union tgsi_exec_channel *src1)
4890 {
4891 dst->u[0] = src0->u[0] * src1->u[0];
4892 dst->u[1] = src0->u[1] * src1->u[1];
4893 dst->u[2] = src0->u[2] * src1->u[2];
4894 dst->u[3] = src0->u[3] * src1->u[3];
4895 }
4896
4897 static void
4898 micro_imul_hi(union tgsi_exec_channel *dst,
4899 const union tgsi_exec_channel *src0,
4900 const union tgsi_exec_channel *src1)
4901 {
4902 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4903 dst->i[0] = I64M(src0->i[0], src1->i[0]);
4904 dst->i[1] = I64M(src0->i[1], src1->i[1]);
4905 dst->i[2] = I64M(src0->i[2], src1->i[2]);
4906 dst->i[3] = I64M(src0->i[3], src1->i[3]);
4907 #undef I64M
4908 }
4909
4910 static void
4911 micro_umul_hi(union tgsi_exec_channel *dst,
4912 const union tgsi_exec_channel *src0,
4913 const union tgsi_exec_channel *src1)
4914 {
4915 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4916 dst->u[0] = U64M(src0->u[0], src1->u[0]);
4917 dst->u[1] = U64M(src0->u[1], src1->u[1]);
4918 dst->u[2] = U64M(src0->u[2], src1->u[2]);
4919 dst->u[3] = U64M(src0->u[3], src1->u[3]);
4920 #undef U64M
4921 }
4922
4923 static void
4924 micro_useq(union tgsi_exec_channel *dst,
4925 const union tgsi_exec_channel *src0,
4926 const union tgsi_exec_channel *src1)
4927 {
4928 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4929 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4930 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4931 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4932 }
4933
4934 static void
4935 micro_usge(union tgsi_exec_channel *dst,
4936 const union tgsi_exec_channel *src0,
4937 const union tgsi_exec_channel *src1)
4938 {
4939 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4940 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4941 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4942 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4943 }
4944
4945 static void
4946 micro_ushr(union tgsi_exec_channel *dst,
4947 const union tgsi_exec_channel *src0,
4948 const union tgsi_exec_channel *src1)
4949 {
4950 unsigned masked_count;
4951 masked_count = src1->u[0] & 0x1f;
4952 dst->u[0] = src0->u[0] >> masked_count;
4953 masked_count = src1->u[1] & 0x1f;
4954 dst->u[1] = src0->u[1] >> masked_count;
4955 masked_count = src1->u[2] & 0x1f;
4956 dst->u[2] = src0->u[2] >> masked_count;
4957 masked_count = src1->u[3] & 0x1f;
4958 dst->u[3] = src0->u[3] >> masked_count;
4959 }
4960
4961 static void
4962 micro_uslt(union tgsi_exec_channel *dst,
4963 const union tgsi_exec_channel *src0,
4964 const union tgsi_exec_channel *src1)
4965 {
4966 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4967 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4968 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4969 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4970 }
4971
4972 static void
4973 micro_usne(union tgsi_exec_channel *dst,
4974 const union tgsi_exec_channel *src0,
4975 const union tgsi_exec_channel *src1)
4976 {
4977 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4978 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4979 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4980 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4981 }
4982
4983 static void
4984 micro_uarl(union tgsi_exec_channel *dst,
4985 const union tgsi_exec_channel *src)
4986 {
4987 dst->i[0] = src->u[0];
4988 dst->i[1] = src->u[1];
4989 dst->i[2] = src->u[2];
4990 dst->i[3] = src->u[3];
4991 }
4992
4993 /**
4994 * Signed bitfield extract (i.e. sign-extend the extracted bits)
4995 */
4996 static void
4997 micro_ibfe(union tgsi_exec_channel *dst,
4998 const union tgsi_exec_channel *src0,
4999 const union tgsi_exec_channel *src1,
5000 const union tgsi_exec_channel *src2)
5001 {
5002 int i;
5003 for (i = 0; i < 4; i++) {
5004 int width = src2->i[i];
5005 int offset = src1->i[i] & 0x1f;
5006 if (width == 32 && offset == 0) {
5007 dst->i[i] = src0->i[i];
5008 continue;
5009 }
5010 width &= 0x1f;
5011 if (width == 0)
5012 dst->i[i] = 0;
5013 else if (width + offset < 32)
5014 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5015 else
5016 dst->i[i] = src0->i[i] >> offset;
5017 }
5018 }
5019
5020 /**
5021 * Unsigned bitfield extract
5022 */
5023 static void
5024 micro_ubfe(union tgsi_exec_channel *dst,
5025 const union tgsi_exec_channel *src0,
5026 const union tgsi_exec_channel *src1,
5027 const union tgsi_exec_channel *src2)
5028 {
5029 int i;
5030 for (i = 0; i < 4; i++) {
5031 int width = src2->u[i];
5032 int offset = src1->u[i] & 0x1f;
5033 if (width == 32 && offset == 0) {
5034 dst->u[i] = src0->u[i];
5035 continue;
5036 }
5037 width &= 0x1f;
5038 if (width == 0)
5039 dst->u[i] = 0;
5040 else if (width + offset < 32)
5041 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5042 else
5043 dst->u[i] = src0->u[i] >> offset;
5044 }
5045 }
5046
5047 /**
5048 * Bitfield insert: copy low bits from src1 into a region of src0.
5049 */
5050 static void
5051 micro_bfi(union tgsi_exec_channel *dst,
5052 const union tgsi_exec_channel *src0,
5053 const union tgsi_exec_channel *src1,
5054 const union tgsi_exec_channel *src2,
5055 const union tgsi_exec_channel *src3)
5056 {
5057 int i;
5058 for (i = 0; i < 4; i++) {
5059 int width = src3->u[i];
5060 int offset = src2->u[i] & 0x1f;
5061 if (width == 32) {
5062 dst->u[i] = src1->u[i];
5063 } else {
5064 int bitmask = ((1 << width) - 1) << offset;
5065 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5066 }
5067 }
5068 }
5069
5070 static void
5071 micro_brev(union tgsi_exec_channel *dst,
5072 const union tgsi_exec_channel *src)
5073 {
5074 dst->u[0] = util_bitreverse(src->u[0]);
5075 dst->u[1] = util_bitreverse(src->u[1]);
5076 dst->u[2] = util_bitreverse(src->u[2]);
5077 dst->u[3] = util_bitreverse(src->u[3]);
5078 }
5079
5080 static void
5081 micro_popc(union tgsi_exec_channel *dst,
5082 const union tgsi_exec_channel *src)
5083 {
5084 dst->u[0] = util_bitcount(src->u[0]);
5085 dst->u[1] = util_bitcount(src->u[1]);
5086 dst->u[2] = util_bitcount(src->u[2]);
5087 dst->u[3] = util_bitcount(src->u[3]);
5088 }
5089
5090 static void
5091 micro_lsb(union tgsi_exec_channel *dst,
5092 const union tgsi_exec_channel *src)
5093 {
5094 dst->i[0] = ffs(src->u[0]) - 1;
5095 dst->i[1] = ffs(src->u[1]) - 1;
5096 dst->i[2] = ffs(src->u[2]) - 1;
5097 dst->i[3] = ffs(src->u[3]) - 1;
5098 }
5099
5100 static void
5101 micro_imsb(union tgsi_exec_channel *dst,
5102 const union tgsi_exec_channel *src)
5103 {
5104 dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5105 dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5106 dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5107 dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5108 }
5109
5110 static void
5111 micro_umsb(union tgsi_exec_channel *dst,
5112 const union tgsi_exec_channel *src)
5113 {
5114 dst->i[0] = util_last_bit(src->u[0]) - 1;
5115 dst->i[1] = util_last_bit(src->u[1]) - 1;
5116 dst->i[2] = util_last_bit(src->u[2]) - 1;
5117 dst->i[3] = util_last_bit(src->u[3]) - 1;
5118 }
5119
5120
5121 static void
5122 exec_interp_at_sample(struct tgsi_exec_machine *mach,
5123 const struct tgsi_full_instruction *inst)
5124 {
5125 union tgsi_exec_channel index;
5126 union tgsi_exec_channel index2D;
5127 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5128 const struct tgsi_full_src_register *reg = &inst->Src[0];
5129
5130 assert(reg->Register.File == TGSI_FILE_INPUT);
5131 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
5132
5133 get_index_registers(mach, reg, &index, &index2D);
5134 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
5135
5136 /* Short cut: sample 0 is like a normal fetch */
5137 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5138 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5139 continue;
5140
5141 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5142 &result[chan]);
5143 if (sample != 0.0f) {
5144
5145 /* TODO: define the samples > 0, but so far we only do fake MSAA */
5146 float x = 0;
5147 float y = 0;
5148
5149 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
5150 assert(pos >= 0);
5151 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
5152 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
5153 }
5154 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5155 }
5156 }
5157
5158
5159 static void
5160 exec_interp_at_offset(struct tgsi_exec_machine *mach,
5161 const struct tgsi_full_instruction *inst)
5162 {
5163 union tgsi_exec_channel index;
5164 union tgsi_exec_channel index2D;
5165 union tgsi_exec_channel ofsx;
5166 union tgsi_exec_channel ofsy;
5167 const struct tgsi_full_src_register *reg = &inst->Src[0];
5168
5169 assert(reg->Register.File == TGSI_FILE_INPUT);
5170
5171 get_index_registers(mach, reg, &index, &index2D);
5172 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
5173
5174 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
5175 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
5176
5177 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5178 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5179 continue;
5180 union tgsi_exec_channel result;
5181 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
5182 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
5183 store_dest(mach, &result, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5184 }
5185 }
5186
5187
5188 static void
5189 exec_interp_at_centroid(struct tgsi_exec_machine *mach,
5190 const struct tgsi_full_instruction *inst)
5191 {
5192 union tgsi_exec_channel index;
5193 union tgsi_exec_channel index2D;
5194 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5195 const struct tgsi_full_src_register *reg = &inst->Src[0];
5196
5197 assert(reg->Register.File == TGSI_FILE_INPUT);
5198 get_index_registers(mach, reg, &index, &index2D);
5199
5200 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5201 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5202 continue;
5203
5204 /* Here we should add the change to use a sample that lies within the
5205 * primitive (Section 15.2):
5206 *
5207 * "When interpolating variables declared using centroid in ,
5208 * the variable is sampled at a location within the pixel covered
5209 * by the primitive generating the fragment.
5210 * ...
5211 * The built-in functions interpolateAtCentroid ... will sample
5212 * variables as though they were declared with the centroid ...
5213 * qualifier[s]."
5214 *
5215 * Since we only support 1 sample currently, this is just a pass-through.
5216 */
5217 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5218 &result[chan]);
5219 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5220 }
5221
5222 }
5223
5224
5225 /**
5226 * Execute a TGSI instruction.
5227 * Returns TRUE if a barrier instruction is hit,
5228 * otherwise FALSE.
5229 */
5230 static boolean
5231 exec_instruction(
5232 struct tgsi_exec_machine *mach,
5233 const struct tgsi_full_instruction *inst,
5234 int *pc )
5235 {
5236 union tgsi_exec_channel r[10];
5237
5238 (*pc)++;
5239
5240 switch (inst->Instruction.Opcode) {
5241 case TGSI_OPCODE_ARL:
5242 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5243 break;
5244
5245 case TGSI_OPCODE_MOV:
5246 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5247 break;
5248
5249 case TGSI_OPCODE_LIT:
5250 exec_lit(mach, inst);
5251 break;
5252
5253 case TGSI_OPCODE_RCP:
5254 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5255 break;
5256
5257 case TGSI_OPCODE_RSQ:
5258 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5259 break;
5260
5261 case TGSI_OPCODE_EXP:
5262 exec_exp(mach, inst);
5263 break;
5264
5265 case TGSI_OPCODE_LOG:
5266 exec_log(mach, inst);
5267 break;
5268
5269 case TGSI_OPCODE_MUL:
5270 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5271 break;
5272
5273 case TGSI_OPCODE_ADD:
5274 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5275 break;
5276
5277 case TGSI_OPCODE_DP3:
5278 exec_dp3(mach, inst);
5279 break;
5280
5281 case TGSI_OPCODE_DP4:
5282 exec_dp4(mach, inst);
5283 break;
5284
5285 case TGSI_OPCODE_DST:
5286 exec_dst(mach, inst);
5287 break;
5288
5289 case TGSI_OPCODE_MIN:
5290 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5291 break;
5292
5293 case TGSI_OPCODE_MAX:
5294 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5295 break;
5296
5297 case TGSI_OPCODE_SLT:
5298 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5299 break;
5300
5301 case TGSI_OPCODE_SGE:
5302 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5303 break;
5304
5305 case TGSI_OPCODE_MAD:
5306 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5307 break;
5308
5309 case TGSI_OPCODE_LRP:
5310 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5311 break;
5312
5313 case TGSI_OPCODE_SQRT:
5314 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5315 break;
5316
5317 case TGSI_OPCODE_FRC:
5318 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5319 break;
5320
5321 case TGSI_OPCODE_FLR:
5322 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5323 break;
5324
5325 case TGSI_OPCODE_ROUND:
5326 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5327 break;
5328
5329 case TGSI_OPCODE_EX2:
5330 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5331 break;
5332
5333 case TGSI_OPCODE_LG2:
5334 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5335 break;
5336
5337 case TGSI_OPCODE_POW:
5338 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5339 break;
5340
5341 case TGSI_OPCODE_LDEXP:
5342 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5343 break;
5344
5345 case TGSI_OPCODE_COS:
5346 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5347 break;
5348
5349 case TGSI_OPCODE_DDX_FINE:
5350 exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5351 break;
5352
5353 case TGSI_OPCODE_DDX:
5354 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5355 break;
5356
5357 case TGSI_OPCODE_DDY_FINE:
5358 exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5359 break;
5360
5361 case TGSI_OPCODE_DDY:
5362 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5363 break;
5364
5365 case TGSI_OPCODE_KILL:
5366 exec_kill (mach);
5367 break;
5368
5369 case TGSI_OPCODE_KILL_IF:
5370 exec_kill_if (mach, inst);
5371 break;
5372
5373 case TGSI_OPCODE_PK2H:
5374 exec_pk2h(mach, inst);
5375 break;
5376
5377 case TGSI_OPCODE_PK2US:
5378 assert (0);
5379 break;
5380
5381 case TGSI_OPCODE_PK4B:
5382 assert (0);
5383 break;
5384
5385 case TGSI_OPCODE_PK4UB:
5386 assert (0);
5387 break;
5388
5389 case TGSI_OPCODE_SEQ:
5390 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5391 break;
5392
5393 case TGSI_OPCODE_SGT:
5394 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5395 break;
5396
5397 case TGSI_OPCODE_SIN:
5398 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5399 break;
5400
5401 case TGSI_OPCODE_SLE:
5402 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5403 break;
5404
5405 case TGSI_OPCODE_SNE:
5406 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5407 break;
5408
5409 case TGSI_OPCODE_TEX:
5410 /* simple texture lookup */
5411 /* src[0] = texcoord */
5412 /* src[1] = sampler unit */
5413 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5414 break;
5415
5416 case TGSI_OPCODE_TXB:
5417 /* Texture lookup with lod bias */
5418 /* src[0] = texcoord (src[0].w = LOD bias) */
5419 /* src[1] = sampler unit */
5420 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5421 break;
5422
5423 case TGSI_OPCODE_TXD:
5424 /* Texture lookup with explict partial derivatives */
5425 /* src[0] = texcoord */
5426 /* src[1] = d[strq]/dx */
5427 /* src[2] = d[strq]/dy */
5428 /* src[3] = sampler unit */
5429 exec_txd(mach, inst);
5430 break;
5431
5432 case TGSI_OPCODE_TXL:
5433 /* Texture lookup with explit LOD */
5434 /* src[0] = texcoord (src[0].w = LOD) */
5435 /* src[1] = sampler unit */
5436 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5437 break;
5438
5439 case TGSI_OPCODE_TXP:
5440 /* Texture lookup with projection */
5441 /* src[0] = texcoord (src[0].w = projection) */
5442 /* src[1] = sampler unit */
5443 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5444 break;
5445
5446 case TGSI_OPCODE_TG4:
5447 /* src[0] = texcoord */
5448 /* src[1] = component */
5449 /* src[2] = sampler unit */
5450 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5451 break;
5452
5453 case TGSI_OPCODE_LODQ:
5454 /* src[0] = texcoord */
5455 /* src[1] = sampler unit */
5456 exec_lodq(mach, inst);
5457 break;
5458
5459 case TGSI_OPCODE_UP2H:
5460 exec_up2h(mach, inst);
5461 break;
5462
5463 case TGSI_OPCODE_UP2US:
5464 assert (0);
5465 break;
5466
5467 case TGSI_OPCODE_UP4B:
5468 assert (0);
5469 break;
5470
5471 case TGSI_OPCODE_UP4UB:
5472 assert (0);
5473 break;
5474
5475 case TGSI_OPCODE_ARR:
5476 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5477 break;
5478
5479 case TGSI_OPCODE_CAL:
5480 /* skip the call if no execution channels are enabled */
5481 if (mach->ExecMask) {
5482 /* do the call */
5483
5484 /* First, record the depths of the execution stacks.
5485 * This is important for deeply nested/looped return statements.
5486 * We have to unwind the stacks by the correct amount. For a
5487 * real code generator, we could determine the number of entries
5488 * to pop off each stack with simple static analysis and avoid
5489 * implementing this data structure at run time.
5490 */
5491 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5492 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5493 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5494 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5495 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5496 /* note that PC was already incremented above */
5497 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5498
5499 mach->CallStackTop++;
5500
5501 /* Second, push the Cond, Loop, Cont, Func stacks */
5502 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5503 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5504 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5505 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5506 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5507 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5508
5509 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5510 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5511 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5512 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5513 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5514 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5515
5516 /* Finally, jump to the subroutine. The label is a pointer
5517 * (an instruction number) to the BGNSUB instruction.
5518 */
5519 *pc = inst->Label.Label;
5520 assert(mach->Instructions[*pc].Instruction.Opcode
5521 == TGSI_OPCODE_BGNSUB);
5522 }
5523 break;
5524
5525 case TGSI_OPCODE_RET:
5526 mach->FuncMask &= ~mach->ExecMask;
5527 UPDATE_EXEC_MASK(mach);
5528
5529 if (mach->FuncMask == 0x0) {
5530 /* really return now (otherwise, keep executing */
5531
5532 if (mach->CallStackTop == 0) {
5533 /* returning from main() */
5534 mach->CondStackTop = 0;
5535 mach->LoopStackTop = 0;
5536 mach->ContStackTop = 0;
5537 mach->LoopLabelStackTop = 0;
5538 mach->SwitchStackTop = 0;
5539 mach->BreakStackTop = 0;
5540 *pc = -1;
5541 return FALSE;
5542 }
5543
5544 assert(mach->CallStackTop > 0);
5545 mach->CallStackTop--;
5546
5547 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5548 mach->CondMask = mach->CondStack[mach->CondStackTop];
5549
5550 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5551 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5552
5553 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5554 mach->ContMask = mach->ContStack[mach->ContStackTop];
5555
5556 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5557 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5558
5559 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5560 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5561
5562 assert(mach->FuncStackTop > 0);
5563 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5564
5565 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5566
5567 UPDATE_EXEC_MASK(mach);
5568 }
5569 break;
5570
5571 case TGSI_OPCODE_SSG:
5572 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5573 break;
5574
5575 case TGSI_OPCODE_CMP:
5576 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5577 break;
5578
5579 case TGSI_OPCODE_DIV:
5580 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5581 break;
5582
5583 case TGSI_OPCODE_DP2:
5584 exec_dp2(mach, inst);
5585 break;
5586
5587 case TGSI_OPCODE_IF:
5588 /* push CondMask */
5589 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5590 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5591 FETCH( &r[0], 0, TGSI_CHAN_X );
5592 /* update CondMask */
5593 if( ! r[0].f[0] ) {
5594 mach->CondMask &= ~0x1;
5595 }
5596 if( ! r[0].f[1] ) {
5597 mach->CondMask &= ~0x2;
5598 }
5599 if( ! r[0].f[2] ) {
5600 mach->CondMask &= ~0x4;
5601 }
5602 if( ! r[0].f[3] ) {
5603 mach->CondMask &= ~0x8;
5604 }
5605 UPDATE_EXEC_MASK(mach);
5606 /* Todo: If CondMask==0, jump to ELSE */
5607 break;
5608
5609 case TGSI_OPCODE_UIF:
5610 /* push CondMask */
5611 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5612 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5613 IFETCH( &r[0], 0, TGSI_CHAN_X );
5614 /* update CondMask */
5615 if( ! r[0].u[0] ) {
5616 mach->CondMask &= ~0x1;
5617 }
5618 if( ! r[0].u[1] ) {
5619 mach->CondMask &= ~0x2;
5620 }
5621 if( ! r[0].u[2] ) {
5622 mach->CondMask &= ~0x4;
5623 }
5624 if( ! r[0].u[3] ) {
5625 mach->CondMask &= ~0x8;
5626 }
5627 UPDATE_EXEC_MASK(mach);
5628 /* Todo: If CondMask==0, jump to ELSE */
5629 break;
5630
5631 case TGSI_OPCODE_ELSE:
5632 /* invert CondMask wrt previous mask */
5633 {
5634 uint prevMask;
5635 assert(mach->CondStackTop > 0);
5636 prevMask = mach->CondStack[mach->CondStackTop - 1];
5637 mach->CondMask = ~mach->CondMask & prevMask;
5638 UPDATE_EXEC_MASK(mach);
5639 /* Todo: If CondMask==0, jump to ENDIF */
5640 }
5641 break;
5642
5643 case TGSI_OPCODE_ENDIF:
5644 /* pop CondMask */
5645 assert(mach->CondStackTop > 0);
5646 mach->CondMask = mach->CondStack[--mach->CondStackTop];
5647 UPDATE_EXEC_MASK(mach);
5648 break;
5649
5650 case TGSI_OPCODE_END:
5651 /* make sure we end primitives which haven't
5652 * been explicitly emitted */
5653 conditional_emit_primitive(mach);
5654 /* halt execution */
5655 *pc = -1;
5656 break;
5657
5658 case TGSI_OPCODE_CEIL:
5659 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5660 break;
5661
5662 case TGSI_OPCODE_I2F:
5663 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5664 break;
5665
5666 case TGSI_OPCODE_NOT:
5667 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5668 break;
5669
5670 case TGSI_OPCODE_TRUNC:
5671 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5672 break;
5673
5674 case TGSI_OPCODE_SHL:
5675 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5676 break;
5677
5678 case TGSI_OPCODE_AND:
5679 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5680 break;
5681
5682 case TGSI_OPCODE_OR:
5683 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5684 break;
5685
5686 case TGSI_OPCODE_MOD:
5687 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5688 break;
5689
5690 case TGSI_OPCODE_XOR:
5691 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5692 break;
5693
5694 case TGSI_OPCODE_TXF:
5695 exec_txf(mach, inst);
5696 break;
5697
5698 case TGSI_OPCODE_TXQ:
5699 exec_txq(mach, inst);
5700 break;
5701
5702 case TGSI_OPCODE_EMIT:
5703 emit_vertex(mach, inst);
5704 break;
5705
5706 case TGSI_OPCODE_ENDPRIM:
5707 emit_primitive(mach, inst);
5708 break;
5709
5710 case TGSI_OPCODE_BGNLOOP:
5711 /* push LoopMask and ContMasks */
5712 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5713 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5714 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5715 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5716
5717 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5718 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5719 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5720 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5721 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5722 break;
5723
5724 case TGSI_OPCODE_ENDLOOP:
5725 /* Restore ContMask, but don't pop */
5726 assert(mach->ContStackTop > 0);
5727 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5728 UPDATE_EXEC_MASK(mach);
5729 if (mach->ExecMask) {
5730 /* repeat loop: jump to instruction just past BGNLOOP */
5731 assert(mach->LoopLabelStackTop > 0);
5732 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5733 }
5734 else {
5735 /* exit loop: pop LoopMask */
5736 assert(mach->LoopStackTop > 0);
5737 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5738 /* pop ContMask */
5739 assert(mach->ContStackTop > 0);
5740 mach->ContMask = mach->ContStack[--mach->ContStackTop];
5741 assert(mach->LoopLabelStackTop > 0);
5742 --mach->LoopLabelStackTop;
5743
5744 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5745 }
5746 UPDATE_EXEC_MASK(mach);
5747 break;
5748
5749 case TGSI_OPCODE_BRK:
5750 exec_break(mach);
5751 break;
5752
5753 case TGSI_OPCODE_CONT:
5754 /* turn off cont channels for each enabled exec channel */
5755 mach->ContMask &= ~mach->ExecMask;
5756 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5757 UPDATE_EXEC_MASK(mach);
5758 break;
5759
5760 case TGSI_OPCODE_BGNSUB:
5761 /* no-op */
5762 break;
5763
5764 case TGSI_OPCODE_ENDSUB:
5765 /*
5766 * XXX: This really should be a no-op. We should never reach this opcode.
5767 */
5768
5769 assert(mach->CallStackTop > 0);
5770 mach->CallStackTop--;
5771
5772 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5773 mach->CondMask = mach->CondStack[mach->CondStackTop];
5774
5775 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5776 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5777
5778 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5779 mach->ContMask = mach->ContStack[mach->ContStackTop];
5780
5781 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5782 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5783
5784 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5785 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5786
5787 assert(mach->FuncStackTop > 0);
5788 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5789
5790 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5791
5792 UPDATE_EXEC_MASK(mach);
5793 break;
5794
5795 case TGSI_OPCODE_NOP:
5796 break;
5797
5798 case TGSI_OPCODE_F2I:
5799 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5800 break;
5801
5802 case TGSI_OPCODE_FSEQ:
5803 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5804 break;
5805
5806 case TGSI_OPCODE_FSGE:
5807 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5808 break;
5809
5810 case TGSI_OPCODE_FSLT:
5811 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5812 break;
5813
5814 case TGSI_OPCODE_FSNE:
5815 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5816 break;
5817
5818 case TGSI_OPCODE_IDIV:
5819 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5820 break;
5821
5822 case TGSI_OPCODE_IMAX:
5823 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5824 break;
5825
5826 case TGSI_OPCODE_IMIN:
5827 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5828 break;
5829
5830 case TGSI_OPCODE_INEG:
5831 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5832 break;
5833
5834 case TGSI_OPCODE_ISGE:
5835 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5836 break;
5837
5838 case TGSI_OPCODE_ISHR:
5839 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5840 break;
5841
5842 case TGSI_OPCODE_ISLT:
5843 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5844 break;
5845
5846 case TGSI_OPCODE_F2U:
5847 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5848 break;
5849
5850 case TGSI_OPCODE_U2F:
5851 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5852 break;
5853
5854 case TGSI_OPCODE_UADD:
5855 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5856 break;
5857
5858 case TGSI_OPCODE_UDIV:
5859 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5860 break;
5861
5862 case TGSI_OPCODE_UMAD:
5863 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5864 break;
5865
5866 case TGSI_OPCODE_UMAX:
5867 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5868 break;
5869
5870 case TGSI_OPCODE_UMIN:
5871 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5872 break;
5873
5874 case TGSI_OPCODE_UMOD:
5875 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5876 break;
5877
5878 case TGSI_OPCODE_UMUL:
5879 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5880 break;
5881
5882 case TGSI_OPCODE_IMUL_HI:
5883 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5884 break;
5885
5886 case TGSI_OPCODE_UMUL_HI:
5887 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5888 break;
5889
5890 case TGSI_OPCODE_USEQ:
5891 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5892 break;
5893
5894 case TGSI_OPCODE_USGE:
5895 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5896 break;
5897
5898 case TGSI_OPCODE_USHR:
5899 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5900 break;
5901
5902 case TGSI_OPCODE_USLT:
5903 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5904 break;
5905
5906 case TGSI_OPCODE_USNE:
5907 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5908 break;
5909
5910 case TGSI_OPCODE_SWITCH:
5911 exec_switch(mach, inst);
5912 break;
5913
5914 case TGSI_OPCODE_CASE:
5915 exec_case(mach, inst);
5916 break;
5917
5918 case TGSI_OPCODE_DEFAULT:
5919 exec_default(mach);
5920 break;
5921
5922 case TGSI_OPCODE_ENDSWITCH:
5923 exec_endswitch(mach);
5924 break;
5925
5926 case TGSI_OPCODE_SAMPLE_I:
5927 exec_txf(mach, inst);
5928 break;
5929
5930 case TGSI_OPCODE_SAMPLE_I_MS:
5931 exec_txf(mach, inst);
5932 break;
5933
5934 case TGSI_OPCODE_SAMPLE:
5935 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5936 break;
5937
5938 case TGSI_OPCODE_SAMPLE_B:
5939 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5940 break;
5941
5942 case TGSI_OPCODE_SAMPLE_C:
5943 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5944 break;
5945
5946 case TGSI_OPCODE_SAMPLE_C_LZ:
5947 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5948 break;
5949
5950 case TGSI_OPCODE_SAMPLE_D:
5951 exec_sample_d(mach, inst);
5952 break;
5953
5954 case TGSI_OPCODE_SAMPLE_L:
5955 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5956 break;
5957
5958 case TGSI_OPCODE_GATHER4:
5959 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
5960 break;
5961
5962 case TGSI_OPCODE_SVIEWINFO:
5963 exec_txq(mach, inst);
5964 break;
5965
5966 case TGSI_OPCODE_SAMPLE_POS:
5967 assert(0);
5968 break;
5969
5970 case TGSI_OPCODE_SAMPLE_INFO:
5971 assert(0);
5972 break;
5973
5974 case TGSI_OPCODE_LOD:
5975 exec_lodq(mach, inst);
5976 break;
5977
5978 case TGSI_OPCODE_UARL:
5979 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5980 break;
5981
5982 case TGSI_OPCODE_UCMP:
5983 exec_ucmp(mach, inst);
5984 break;
5985
5986 case TGSI_OPCODE_IABS:
5987 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5988 break;
5989
5990 case TGSI_OPCODE_ISSG:
5991 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5992 break;
5993
5994 case TGSI_OPCODE_TEX2:
5995 /* simple texture lookup */
5996 /* src[0] = texcoord */
5997 /* src[1] = compare */
5998 /* src[2] = sampler unit */
5999 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
6000 break;
6001 case TGSI_OPCODE_TXB2:
6002 /* simple texture lookup */
6003 /* src[0] = texcoord */
6004 /* src[1] = bias */
6005 /* src[2] = sampler unit */
6006 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
6007 break;
6008 case TGSI_OPCODE_TXL2:
6009 /* simple texture lookup */
6010 /* src[0] = texcoord */
6011 /* src[1] = lod */
6012 /* src[2] = sampler unit */
6013 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
6014 break;
6015
6016 case TGSI_OPCODE_IBFE:
6017 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6018 break;
6019 case TGSI_OPCODE_UBFE:
6020 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6021 break;
6022 case TGSI_OPCODE_BFI:
6023 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6024 break;
6025 case TGSI_OPCODE_BREV:
6026 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6027 break;
6028 case TGSI_OPCODE_POPC:
6029 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6030 break;
6031 case TGSI_OPCODE_LSB:
6032 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6033 break;
6034 case TGSI_OPCODE_IMSB:
6035 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6036 break;
6037 case TGSI_OPCODE_UMSB:
6038 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6039 break;
6040
6041 case TGSI_OPCODE_F2D:
6042 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
6043 break;
6044
6045 case TGSI_OPCODE_D2F:
6046 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
6047 break;
6048
6049 case TGSI_OPCODE_DABS:
6050 exec_double_unary(mach, inst, micro_dabs);
6051 break;
6052
6053 case TGSI_OPCODE_DNEG:
6054 exec_double_unary(mach, inst, micro_dneg);
6055 break;
6056
6057 case TGSI_OPCODE_DADD:
6058 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6059 break;
6060
6061 case TGSI_OPCODE_DDIV:
6062 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
6063 break;
6064
6065 case TGSI_OPCODE_DMUL:
6066 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6067 break;
6068
6069 case TGSI_OPCODE_DMAX:
6070 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6071 break;
6072
6073 case TGSI_OPCODE_DMIN:
6074 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6075 break;
6076
6077 case TGSI_OPCODE_DSLT:
6078 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6079 break;
6080
6081 case TGSI_OPCODE_DSGE:
6082 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6083 break;
6084
6085 case TGSI_OPCODE_DSEQ:
6086 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6087 break;
6088
6089 case TGSI_OPCODE_DSNE:
6090 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6091 break;
6092
6093 case TGSI_OPCODE_DRCP:
6094 exec_double_unary(mach, inst, micro_drcp);
6095 break;
6096
6097 case TGSI_OPCODE_DSQRT:
6098 exec_double_unary(mach, inst, micro_dsqrt);
6099 break;
6100
6101 case TGSI_OPCODE_DRSQ:
6102 exec_double_unary(mach, inst, micro_drsq);
6103 break;
6104
6105 case TGSI_OPCODE_DMAD:
6106 exec_double_trinary(mach, inst, micro_dmad);
6107 break;
6108
6109 case TGSI_OPCODE_DFRAC:
6110 exec_double_unary(mach, inst, micro_dfrac);
6111 break;
6112
6113 case TGSI_OPCODE_DLDEXP:
6114 exec_dldexp(mach, inst);
6115 break;
6116
6117 case TGSI_OPCODE_DFRACEXP:
6118 exec_dfracexp(mach, inst);
6119 break;
6120
6121 case TGSI_OPCODE_I2D:
6122 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6123 break;
6124
6125 case TGSI_OPCODE_D2I:
6126 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6127 break;
6128
6129 case TGSI_OPCODE_U2D:
6130 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6131 break;
6132
6133 case TGSI_OPCODE_D2U:
6134 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6135 break;
6136
6137 case TGSI_OPCODE_LOAD:
6138 exec_load(mach, inst);
6139 break;
6140
6141 case TGSI_OPCODE_STORE:
6142 exec_store(mach, inst);
6143 break;
6144
6145 case TGSI_OPCODE_ATOMUADD:
6146 case TGSI_OPCODE_ATOMXCHG:
6147 case TGSI_OPCODE_ATOMCAS:
6148 case TGSI_OPCODE_ATOMAND:
6149 case TGSI_OPCODE_ATOMOR:
6150 case TGSI_OPCODE_ATOMXOR:
6151 case TGSI_OPCODE_ATOMUMIN:
6152 case TGSI_OPCODE_ATOMUMAX:
6153 case TGSI_OPCODE_ATOMIMIN:
6154 case TGSI_OPCODE_ATOMIMAX:
6155 case TGSI_OPCODE_ATOMFADD:
6156 exec_atomop(mach, inst);
6157 break;
6158
6159 case TGSI_OPCODE_RESQ:
6160 exec_resq(mach, inst);
6161 break;
6162 case TGSI_OPCODE_BARRIER:
6163 case TGSI_OPCODE_MEMBAR:
6164 return TRUE;
6165 break;
6166
6167 case TGSI_OPCODE_I64ABS:
6168 exec_double_unary(mach, inst, micro_i64abs);
6169 break;
6170
6171 case TGSI_OPCODE_I64SSG:
6172 exec_double_unary(mach, inst, micro_i64sgn);
6173 break;
6174
6175 case TGSI_OPCODE_I64NEG:
6176 exec_double_unary(mach, inst, micro_i64neg);
6177 break;
6178
6179 case TGSI_OPCODE_U64SEQ:
6180 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6181 break;
6182
6183 case TGSI_OPCODE_U64SNE:
6184 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6185 break;
6186
6187 case TGSI_OPCODE_I64SLT:
6188 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6189 break;
6190 case TGSI_OPCODE_U64SLT:
6191 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6192 break;
6193
6194 case TGSI_OPCODE_I64SGE:
6195 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6196 break;
6197 case TGSI_OPCODE_U64SGE:
6198 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6199 break;
6200
6201 case TGSI_OPCODE_I64MIN:
6202 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6203 break;
6204 case TGSI_OPCODE_U64MIN:
6205 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6206 break;
6207 case TGSI_OPCODE_I64MAX:
6208 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6209 break;
6210 case TGSI_OPCODE_U64MAX:
6211 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6212 break;
6213 case TGSI_OPCODE_U64ADD:
6214 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6215 break;
6216 case TGSI_OPCODE_U64MUL:
6217 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6218 break;
6219 case TGSI_OPCODE_U64SHL:
6220 exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6221 break;
6222 case TGSI_OPCODE_I64SHR:
6223 exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6224 break;
6225 case TGSI_OPCODE_U64SHR:
6226 exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6227 break;
6228 case TGSI_OPCODE_U64DIV:
6229 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6230 break;
6231 case TGSI_OPCODE_I64DIV:
6232 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6233 break;
6234 case TGSI_OPCODE_U64MOD:
6235 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6236 break;
6237 case TGSI_OPCODE_I64MOD:
6238 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6239 break;
6240
6241 case TGSI_OPCODE_F2U64:
6242 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6243 break;
6244
6245 case TGSI_OPCODE_F2I64:
6246 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6247 break;
6248
6249 case TGSI_OPCODE_U2I64:
6250 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6251 break;
6252 case TGSI_OPCODE_I2I64:
6253 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6254 break;
6255
6256 case TGSI_OPCODE_D2U64:
6257 exec_double_unary(mach, inst, micro_d2u64);
6258 break;
6259
6260 case TGSI_OPCODE_D2I64:
6261 exec_double_unary(mach, inst, micro_d2i64);
6262 break;
6263
6264 case TGSI_OPCODE_U642F:
6265 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6266 break;
6267 case TGSI_OPCODE_I642F:
6268 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6269 break;
6270
6271 case TGSI_OPCODE_U642D:
6272 exec_double_unary(mach, inst, micro_u642d);
6273 break;
6274 case TGSI_OPCODE_I642D:
6275 exec_double_unary(mach, inst, micro_i642d);
6276 break;
6277 case TGSI_OPCODE_INTERP_SAMPLE:
6278 exec_interp_at_sample(mach, inst);
6279 break;
6280 case TGSI_OPCODE_INTERP_OFFSET:
6281 exec_interp_at_offset(mach, inst);
6282 break;
6283 case TGSI_OPCODE_INTERP_CENTROID:
6284 exec_interp_at_centroid(mach, inst);
6285 break;
6286 default:
6287 assert( 0 );
6288 }
6289 return FALSE;
6290 }
6291
6292 static void
6293 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6294 {
6295 uint default_mask = 0xf;
6296
6297 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6298 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6299
6300 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6301 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
6302 mach->Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0] = 0;
6303 mach->Primitives[i][0] = 0;
6304 }
6305 /* GS runs on a single primitive for now */
6306 default_mask = 0x1;
6307 }
6308
6309 if (mach->NonHelperMask == 0)
6310 mach->NonHelperMask = default_mask;
6311 mach->CondMask = default_mask;
6312 mach->LoopMask = default_mask;
6313 mach->ContMask = default_mask;
6314 mach->FuncMask = default_mask;
6315 mach->ExecMask = default_mask;
6316
6317 mach->Switch.mask = default_mask;
6318
6319 assert(mach->CondStackTop == 0);
6320 assert(mach->LoopStackTop == 0);
6321 assert(mach->ContStackTop == 0);
6322 assert(mach->SwitchStackTop == 0);
6323 assert(mach->BreakStackTop == 0);
6324 assert(mach->CallStackTop == 0);
6325 }
6326
6327 /**
6328 * Run TGSI interpreter.
6329 * \return bitmask of "alive" quad components
6330 */
6331 uint
6332 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6333 {
6334 uint i;
6335
6336 mach->pc = start_pc;
6337
6338 if (!start_pc) {
6339 tgsi_exec_machine_setup_masks(mach);
6340
6341 /* execute declarations (interpolants) */
6342 for (i = 0; i < mach->NumDeclarations; i++) {
6343 exec_declaration( mach, mach->Declarations+i );
6344 }
6345 }
6346
6347 {
6348 #if DEBUG_EXECUTION
6349 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6350 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6351 uint inst = 1;
6352
6353 if (!start_pc) {
6354 memset(mach->Temps, 0, sizeof(temps));
6355 if (mach->Outputs)
6356 memset(mach->Outputs, 0, sizeof(outputs));
6357 memset(temps, 0, sizeof(temps));
6358 memset(outputs, 0, sizeof(outputs));
6359 }
6360 #endif
6361
6362 /* execute instructions, until pc is set to -1 */
6363 while (mach->pc != -1) {
6364 boolean barrier_hit;
6365 #if DEBUG_EXECUTION
6366 uint i;
6367
6368 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6369 #endif
6370
6371 assert(mach->pc < (int) mach->NumInstructions);
6372 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6373
6374 /* for compute shaders if we hit a barrier return now for later rescheduling */
6375 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6376 return 0;
6377
6378 #if DEBUG_EXECUTION
6379 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6380 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6381 uint j;
6382
6383 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6384 debug_printf("TEMP[%2u] = ", i);
6385 for (j = 0; j < 4; j++) {
6386 if (j > 0) {
6387 debug_printf(" ");
6388 }
6389 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6390 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6391 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6392 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6393 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6394 }
6395 }
6396 }
6397 if (mach->Outputs) {
6398 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6399 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6400 uint j;
6401
6402 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6403 debug_printf("OUT[%2u] = ", i);
6404 for (j = 0; j < 4; j++) {
6405 if (j > 0) {
6406 debug_printf(" ");
6407 }
6408 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6409 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6410 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6411 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6412 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6413 }
6414 }
6415 }
6416 }
6417 #endif
6418 }
6419 }
6420
6421 #if 0
6422 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6423 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6424 /*
6425 * Scale back depth component.
6426 */
6427 for (i = 0; i < 4; i++)
6428 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6429 }
6430 #endif
6431
6432 /* Strictly speaking, these assertions aren't really needed but they
6433 * can potentially catch some bugs in the control flow code.
6434 */
6435 assert(mach->CondStackTop == 0);
6436 assert(mach->LoopStackTop == 0);
6437 assert(mach->ContStackTop == 0);
6438 assert(mach->SwitchStackTop == 0);
6439 assert(mach->BreakStackTop == 0);
6440 assert(mach->CallStackTop == 0);
6441
6442 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6443 }