1 /**************************************************************************
3 * Copyright 2007-2008 VMware, Inc.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
30 * TGSI interpreter/executor.
32 * Flow control information:
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
67 #define DEBUG_EXECUTION 0
72 #define TILE_TOP_LEFT 0
73 #define TILE_TOP_RIGHT 1
74 #define TILE_BOTTOM_LEFT 2
75 #define TILE_BOTTOM_RIGHT 3
77 union tgsi_double_channel
{
78 double d
[TGSI_QUAD_SIZE
];
79 unsigned u
[TGSI_QUAD_SIZE
][2];
80 uint64_t u64
[TGSI_QUAD_SIZE
];
81 int64_t i64
[TGSI_QUAD_SIZE
];
84 struct tgsi_double_vector
{
85 union tgsi_double_channel xy
;
86 union tgsi_double_channel zw
;
90 micro_abs(union tgsi_exec_channel
*dst
,
91 const union tgsi_exec_channel
*src
)
93 dst
->f
[0] = fabsf(src
->f
[0]);
94 dst
->f
[1] = fabsf(src
->f
[1]);
95 dst
->f
[2] = fabsf(src
->f
[2]);
96 dst
->f
[3] = fabsf(src
->f
[3]);
100 micro_arl(union tgsi_exec_channel
*dst
,
101 const union tgsi_exec_channel
*src
)
103 dst
->i
[0] = (int)floorf(src
->f
[0]);
104 dst
->i
[1] = (int)floorf(src
->f
[1]);
105 dst
->i
[2] = (int)floorf(src
->f
[2]);
106 dst
->i
[3] = (int)floorf(src
->f
[3]);
110 micro_arr(union tgsi_exec_channel
*dst
,
111 const union tgsi_exec_channel
*src
)
113 dst
->i
[0] = (int)floorf(src
->f
[0] + 0.5f
);
114 dst
->i
[1] = (int)floorf(src
->f
[1] + 0.5f
);
115 dst
->i
[2] = (int)floorf(src
->f
[2] + 0.5f
);
116 dst
->i
[3] = (int)floorf(src
->f
[3] + 0.5f
);
120 micro_ceil(union tgsi_exec_channel
*dst
,
121 const union tgsi_exec_channel
*src
)
123 dst
->f
[0] = ceilf(src
->f
[0]);
124 dst
->f
[1] = ceilf(src
->f
[1]);
125 dst
->f
[2] = ceilf(src
->f
[2]);
126 dst
->f
[3] = ceilf(src
->f
[3]);
130 micro_cmp(union tgsi_exec_channel
*dst
,
131 const union tgsi_exec_channel
*src0
,
132 const union tgsi_exec_channel
*src1
,
133 const union tgsi_exec_channel
*src2
)
135 dst
->f
[0] = src0
->f
[0] < 0.0f
? src1
->f
[0] : src2
->f
[0];
136 dst
->f
[1] = src0
->f
[1] < 0.0f
? src1
->f
[1] : src2
->f
[1];
137 dst
->f
[2] = src0
->f
[2] < 0.0f
? src1
->f
[2] : src2
->f
[2];
138 dst
->f
[3] = src0
->f
[3] < 0.0f
? src1
->f
[3] : src2
->f
[3];
142 micro_cos(union tgsi_exec_channel
*dst
,
143 const union tgsi_exec_channel
*src
)
145 dst
->f
[0] = cosf(src
->f
[0]);
146 dst
->f
[1] = cosf(src
->f
[1]);
147 dst
->f
[2] = cosf(src
->f
[2]);
148 dst
->f
[3] = cosf(src
->f
[3]);
152 micro_d2f(union tgsi_exec_channel
*dst
,
153 const union tgsi_double_channel
*src
)
155 dst
->f
[0] = (float)src
->d
[0];
156 dst
->f
[1] = (float)src
->d
[1];
157 dst
->f
[2] = (float)src
->d
[2];
158 dst
->f
[3] = (float)src
->d
[3];
162 micro_d2i(union tgsi_exec_channel
*dst
,
163 const union tgsi_double_channel
*src
)
165 dst
->i
[0] = (int)src
->d
[0];
166 dst
->i
[1] = (int)src
->d
[1];
167 dst
->i
[2] = (int)src
->d
[2];
168 dst
->i
[3] = (int)src
->d
[3];
172 micro_d2u(union tgsi_exec_channel
*dst
,
173 const union tgsi_double_channel
*src
)
175 dst
->u
[0] = (unsigned)src
->d
[0];
176 dst
->u
[1] = (unsigned)src
->d
[1];
177 dst
->u
[2] = (unsigned)src
->d
[2];
178 dst
->u
[3] = (unsigned)src
->d
[3];
181 micro_dabs(union tgsi_double_channel
*dst
,
182 const union tgsi_double_channel
*src
)
184 dst
->d
[0] = src
->d
[0] >= 0.0 ? src
->d
[0] : -src
->d
[0];
185 dst
->d
[1] = src
->d
[1] >= 0.0 ? src
->d
[1] : -src
->d
[1];
186 dst
->d
[2] = src
->d
[2] >= 0.0 ? src
->d
[2] : -src
->d
[2];
187 dst
->d
[3] = src
->d
[3] >= 0.0 ? src
->d
[3] : -src
->d
[3];
191 micro_dadd(union tgsi_double_channel
*dst
,
192 const union tgsi_double_channel
*src
)
194 dst
->d
[0] = src
[0].d
[0] + src
[1].d
[0];
195 dst
->d
[1] = src
[0].d
[1] + src
[1].d
[1];
196 dst
->d
[2] = src
[0].d
[2] + src
[1].d
[2];
197 dst
->d
[3] = src
[0].d
[3] + src
[1].d
[3];
201 micro_ddiv(union tgsi_double_channel
*dst
,
202 const union tgsi_double_channel
*src
)
204 dst
->d
[0] = src
[0].d
[0] / src
[1].d
[0];
205 dst
->d
[1] = src
[0].d
[1] / src
[1].d
[1];
206 dst
->d
[2] = src
[0].d
[2] / src
[1].d
[2];
207 dst
->d
[3] = src
[0].d
[3] / src
[1].d
[3];
211 micro_ddx(union tgsi_exec_channel
*dst
,
212 const union tgsi_exec_channel
*src
)
217 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
221 micro_ddy(union tgsi_exec_channel
*dst
,
222 const union tgsi_exec_channel
*src
)
227 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
231 micro_dmul(union tgsi_double_channel
*dst
,
232 const union tgsi_double_channel
*src
)
234 dst
->d
[0] = src
[0].d
[0] * src
[1].d
[0];
235 dst
->d
[1] = src
[0].d
[1] * src
[1].d
[1];
236 dst
->d
[2] = src
[0].d
[2] * src
[1].d
[2];
237 dst
->d
[3] = src
[0].d
[3] * src
[1].d
[3];
241 micro_dmax(union tgsi_double_channel
*dst
,
242 const union tgsi_double_channel
*src
)
244 dst
->d
[0] = src
[0].d
[0] > src
[1].d
[0] ? src
[0].d
[0] : src
[1].d
[0];
245 dst
->d
[1] = src
[0].d
[1] > src
[1].d
[1] ? src
[0].d
[1] : src
[1].d
[1];
246 dst
->d
[2] = src
[0].d
[2] > src
[1].d
[2] ? src
[0].d
[2] : src
[1].d
[2];
247 dst
->d
[3] = src
[0].d
[3] > src
[1].d
[3] ? src
[0].d
[3] : src
[1].d
[3];
251 micro_dmin(union tgsi_double_channel
*dst
,
252 const union tgsi_double_channel
*src
)
254 dst
->d
[0] = src
[0].d
[0] < src
[1].d
[0] ? src
[0].d
[0] : src
[1].d
[0];
255 dst
->d
[1] = src
[0].d
[1] < src
[1].d
[1] ? src
[0].d
[1] : src
[1].d
[1];
256 dst
->d
[2] = src
[0].d
[2] < src
[1].d
[2] ? src
[0].d
[2] : src
[1].d
[2];
257 dst
->d
[3] = src
[0].d
[3] < src
[1].d
[3] ? src
[0].d
[3] : src
[1].d
[3];
261 micro_dneg(union tgsi_double_channel
*dst
,
262 const union tgsi_double_channel
*src
)
264 dst
->d
[0] = -src
->d
[0];
265 dst
->d
[1] = -src
->d
[1];
266 dst
->d
[2] = -src
->d
[2];
267 dst
->d
[3] = -src
->d
[3];
271 micro_dslt(union tgsi_double_channel
*dst
,
272 const union tgsi_double_channel
*src
)
274 dst
->u
[0][0] = src
[0].d
[0] < src
[1].d
[0] ? ~0U : 0U;
275 dst
->u
[1][0] = src
[0].d
[1] < src
[1].d
[1] ? ~0U : 0U;
276 dst
->u
[2][0] = src
[0].d
[2] < src
[1].d
[2] ? ~0U : 0U;
277 dst
->u
[3][0] = src
[0].d
[3] < src
[1].d
[3] ? ~0U : 0U;
281 micro_dsne(union tgsi_double_channel
*dst
,
282 const union tgsi_double_channel
*src
)
284 dst
->u
[0][0] = src
[0].d
[0] != src
[1].d
[0] ? ~0U : 0U;
285 dst
->u
[1][0] = src
[0].d
[1] != src
[1].d
[1] ? ~0U : 0U;
286 dst
->u
[2][0] = src
[0].d
[2] != src
[1].d
[2] ? ~0U : 0U;
287 dst
->u
[3][0] = src
[0].d
[3] != src
[1].d
[3] ? ~0U : 0U;
291 micro_dsge(union tgsi_double_channel
*dst
,
292 const union tgsi_double_channel
*src
)
294 dst
->u
[0][0] = src
[0].d
[0] >= src
[1].d
[0] ? ~0U : 0U;
295 dst
->u
[1][0] = src
[0].d
[1] >= src
[1].d
[1] ? ~0U : 0U;
296 dst
->u
[2][0] = src
[0].d
[2] >= src
[1].d
[2] ? ~0U : 0U;
297 dst
->u
[3][0] = src
[0].d
[3] >= src
[1].d
[3] ? ~0U : 0U;
301 micro_dseq(union tgsi_double_channel
*dst
,
302 const union tgsi_double_channel
*src
)
304 dst
->u
[0][0] = src
[0].d
[0] == src
[1].d
[0] ? ~0U : 0U;
305 dst
->u
[1][0] = src
[0].d
[1] == src
[1].d
[1] ? ~0U : 0U;
306 dst
->u
[2][0] = src
[0].d
[2] == src
[1].d
[2] ? ~0U : 0U;
307 dst
->u
[3][0] = src
[0].d
[3] == src
[1].d
[3] ? ~0U : 0U;
311 micro_drcp(union tgsi_double_channel
*dst
,
312 const union tgsi_double_channel
*src
)
314 dst
->d
[0] = 1.0 / src
->d
[0];
315 dst
->d
[1] = 1.0 / src
->d
[1];
316 dst
->d
[2] = 1.0 / src
->d
[2];
317 dst
->d
[3] = 1.0 / src
->d
[3];
321 micro_dsqrt(union tgsi_double_channel
*dst
,
322 const union tgsi_double_channel
*src
)
324 dst
->d
[0] = sqrt(src
->d
[0]);
325 dst
->d
[1] = sqrt(src
->d
[1]);
326 dst
->d
[2] = sqrt(src
->d
[2]);
327 dst
->d
[3] = sqrt(src
->d
[3]);
331 micro_drsq(union tgsi_double_channel
*dst
,
332 const union tgsi_double_channel
*src
)
334 dst
->d
[0] = 1.0 / sqrt(src
->d
[0]);
335 dst
->d
[1] = 1.0 / sqrt(src
->d
[1]);
336 dst
->d
[2] = 1.0 / sqrt(src
->d
[2]);
337 dst
->d
[3] = 1.0 / sqrt(src
->d
[3]);
341 micro_dmad(union tgsi_double_channel
*dst
,
342 const union tgsi_double_channel
*src
)
344 dst
->d
[0] = src
[0].d
[0] * src
[1].d
[0] + src
[2].d
[0];
345 dst
->d
[1] = src
[0].d
[1] * src
[1].d
[1] + src
[2].d
[1];
346 dst
->d
[2] = src
[0].d
[2] * src
[1].d
[2] + src
[2].d
[2];
347 dst
->d
[3] = src
[0].d
[3] * src
[1].d
[3] + src
[2].d
[3];
351 micro_dfrac(union tgsi_double_channel
*dst
,
352 const union tgsi_double_channel
*src
)
354 dst
->d
[0] = src
->d
[0] - floor(src
->d
[0]);
355 dst
->d
[1] = src
->d
[1] - floor(src
->d
[1]);
356 dst
->d
[2] = src
->d
[2] - floor(src
->d
[2]);
357 dst
->d
[3] = src
->d
[3] - floor(src
->d
[3]);
361 micro_dldexp(union tgsi_double_channel
*dst
,
362 const union tgsi_double_channel
*src0
,
363 union tgsi_exec_channel
*src1
)
365 dst
->d
[0] = ldexp(src0
->d
[0], src1
->i
[0]);
366 dst
->d
[1] = ldexp(src0
->d
[1], src1
->i
[1]);
367 dst
->d
[2] = ldexp(src0
->d
[2], src1
->i
[2]);
368 dst
->d
[3] = ldexp(src0
->d
[3], src1
->i
[3]);
372 micro_dfracexp(union tgsi_double_channel
*dst
,
373 union tgsi_exec_channel
*dst_exp
,
374 const union tgsi_double_channel
*src
)
376 dst
->d
[0] = frexp(src
->d
[0], &dst_exp
->i
[0]);
377 dst
->d
[1] = frexp(src
->d
[1], &dst_exp
->i
[1]);
378 dst
->d
[2] = frexp(src
->d
[2], &dst_exp
->i
[2]);
379 dst
->d
[3] = frexp(src
->d
[3], &dst_exp
->i
[3]);
383 micro_exp2(union tgsi_exec_channel
*dst
,
384 const union tgsi_exec_channel
*src
)
387 dst
->f
[0] = util_fast_exp2(src
->f
[0]);
388 dst
->f
[1] = util_fast_exp2(src
->f
[1]);
389 dst
->f
[2] = util_fast_exp2(src
->f
[2]);
390 dst
->f
[3] = util_fast_exp2(src
->f
[3]);
393 /* Inf is okay for this instruction, so clamp it to silence assertions. */
395 union tgsi_exec_channel clamped
;
397 for (i
= 0; i
< 4; i
++) {
398 if (src
->f
[i
] > 127.99999f
) {
399 clamped
.f
[i
] = 127.99999f
;
400 } else if (src
->f
[i
] < -126.99999f
) {
401 clamped
.f
[i
] = -126.99999f
;
403 clamped
.f
[i
] = src
->f
[i
];
409 dst
->f
[0] = powf(2.0f
, src
->f
[0]);
410 dst
->f
[1] = powf(2.0f
, src
->f
[1]);
411 dst
->f
[2] = powf(2.0f
, src
->f
[2]);
412 dst
->f
[3] = powf(2.0f
, src
->f
[3]);
413 #endif /* FAST_MATH */
417 micro_f2d(union tgsi_double_channel
*dst
,
418 const union tgsi_exec_channel
*src
)
420 dst
->d
[0] = (double)src
->f
[0];
421 dst
->d
[1] = (double)src
->f
[1];
422 dst
->d
[2] = (double)src
->f
[2];
423 dst
->d
[3] = (double)src
->f
[3];
427 micro_flr(union tgsi_exec_channel
*dst
,
428 const union tgsi_exec_channel
*src
)
430 dst
->f
[0] = floorf(src
->f
[0]);
431 dst
->f
[1] = floorf(src
->f
[1]);
432 dst
->f
[2] = floorf(src
->f
[2]);
433 dst
->f
[3] = floorf(src
->f
[3]);
437 micro_frc(union tgsi_exec_channel
*dst
,
438 const union tgsi_exec_channel
*src
)
440 dst
->f
[0] = src
->f
[0] - floorf(src
->f
[0]);
441 dst
->f
[1] = src
->f
[1] - floorf(src
->f
[1]);
442 dst
->f
[2] = src
->f
[2] - floorf(src
->f
[2]);
443 dst
->f
[3] = src
->f
[3] - floorf(src
->f
[3]);
447 micro_i2d(union tgsi_double_channel
*dst
,
448 const union tgsi_exec_channel
*src
)
450 dst
->d
[0] = (double)src
->i
[0];
451 dst
->d
[1] = (double)src
->i
[1];
452 dst
->d
[2] = (double)src
->i
[2];
453 dst
->d
[3] = (double)src
->i
[3];
457 micro_iabs(union tgsi_exec_channel
*dst
,
458 const union tgsi_exec_channel
*src
)
460 dst
->i
[0] = src
->i
[0] >= 0 ? src
->i
[0] : -src
->i
[0];
461 dst
->i
[1] = src
->i
[1] >= 0 ? src
->i
[1] : -src
->i
[1];
462 dst
->i
[2] = src
->i
[2] >= 0 ? src
->i
[2] : -src
->i
[2];
463 dst
->i
[3] = src
->i
[3] >= 0 ? src
->i
[3] : -src
->i
[3];
467 micro_ineg(union tgsi_exec_channel
*dst
,
468 const union tgsi_exec_channel
*src
)
470 dst
->i
[0] = -src
->i
[0];
471 dst
->i
[1] = -src
->i
[1];
472 dst
->i
[2] = -src
->i
[2];
473 dst
->i
[3] = -src
->i
[3];
477 micro_lg2(union tgsi_exec_channel
*dst
,
478 const union tgsi_exec_channel
*src
)
481 dst
->f
[0] = util_fast_log2(src
->f
[0]);
482 dst
->f
[1] = util_fast_log2(src
->f
[1]);
483 dst
->f
[2] = util_fast_log2(src
->f
[2]);
484 dst
->f
[3] = util_fast_log2(src
->f
[3]);
486 dst
->f
[0] = logf(src
->f
[0]) * 1.442695f
;
487 dst
->f
[1] = logf(src
->f
[1]) * 1.442695f
;
488 dst
->f
[2] = logf(src
->f
[2]) * 1.442695f
;
489 dst
->f
[3] = logf(src
->f
[3]) * 1.442695f
;
494 micro_lrp(union tgsi_exec_channel
*dst
,
495 const union tgsi_exec_channel
*src0
,
496 const union tgsi_exec_channel
*src1
,
497 const union tgsi_exec_channel
*src2
)
499 dst
->f
[0] = src0
->f
[0] * (src1
->f
[0] - src2
->f
[0]) + src2
->f
[0];
500 dst
->f
[1] = src0
->f
[1] * (src1
->f
[1] - src2
->f
[1]) + src2
->f
[1];
501 dst
->f
[2] = src0
->f
[2] * (src1
->f
[2] - src2
->f
[2]) + src2
->f
[2];
502 dst
->f
[3] = src0
->f
[3] * (src1
->f
[3] - src2
->f
[3]) + src2
->f
[3];
506 micro_mad(union tgsi_exec_channel
*dst
,
507 const union tgsi_exec_channel
*src0
,
508 const union tgsi_exec_channel
*src1
,
509 const union tgsi_exec_channel
*src2
)
511 dst
->f
[0] = src0
->f
[0] * src1
->f
[0] + src2
->f
[0];
512 dst
->f
[1] = src0
->f
[1] * src1
->f
[1] + src2
->f
[1];
513 dst
->f
[2] = src0
->f
[2] * src1
->f
[2] + src2
->f
[2];
514 dst
->f
[3] = src0
->f
[3] * src1
->f
[3] + src2
->f
[3];
518 micro_mov(union tgsi_exec_channel
*dst
,
519 const union tgsi_exec_channel
*src
)
521 dst
->u
[0] = src
->u
[0];
522 dst
->u
[1] = src
->u
[1];
523 dst
->u
[2] = src
->u
[2];
524 dst
->u
[3] = src
->u
[3];
528 micro_rcp(union tgsi_exec_channel
*dst
,
529 const union tgsi_exec_channel
*src
)
531 #if 0 /* for debugging */
532 assert(src
->f
[0] != 0.0f
);
533 assert(src
->f
[1] != 0.0f
);
534 assert(src
->f
[2] != 0.0f
);
535 assert(src
->f
[3] != 0.0f
);
537 dst
->f
[0] = 1.0f
/ src
->f
[0];
538 dst
->f
[1] = 1.0f
/ src
->f
[1];
539 dst
->f
[2] = 1.0f
/ src
->f
[2];
540 dst
->f
[3] = 1.0f
/ src
->f
[3];
544 micro_rnd(union tgsi_exec_channel
*dst
,
545 const union tgsi_exec_channel
*src
)
547 dst
->f
[0] = _mesa_roundevenf(src
->f
[0]);
548 dst
->f
[1] = _mesa_roundevenf(src
->f
[1]);
549 dst
->f
[2] = _mesa_roundevenf(src
->f
[2]);
550 dst
->f
[3] = _mesa_roundevenf(src
->f
[3]);
554 micro_rsq(union tgsi_exec_channel
*dst
,
555 const union tgsi_exec_channel
*src
)
557 #if 0 /* for debugging */
558 assert(src
->f
[0] != 0.0f
);
559 assert(src
->f
[1] != 0.0f
);
560 assert(src
->f
[2] != 0.0f
);
561 assert(src
->f
[3] != 0.0f
);
563 dst
->f
[0] = 1.0f
/ sqrtf(src
->f
[0]);
564 dst
->f
[1] = 1.0f
/ sqrtf(src
->f
[1]);
565 dst
->f
[2] = 1.0f
/ sqrtf(src
->f
[2]);
566 dst
->f
[3] = 1.0f
/ sqrtf(src
->f
[3]);
570 micro_sqrt(union tgsi_exec_channel
*dst
,
571 const union tgsi_exec_channel
*src
)
573 dst
->f
[0] = sqrtf(src
->f
[0]);
574 dst
->f
[1] = sqrtf(src
->f
[1]);
575 dst
->f
[2] = sqrtf(src
->f
[2]);
576 dst
->f
[3] = sqrtf(src
->f
[3]);
580 micro_seq(union tgsi_exec_channel
*dst
,
581 const union tgsi_exec_channel
*src0
,
582 const union tgsi_exec_channel
*src1
)
584 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? 1.0f
: 0.0f
;
585 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? 1.0f
: 0.0f
;
586 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? 1.0f
: 0.0f
;
587 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? 1.0f
: 0.0f
;
591 micro_sge(union tgsi_exec_channel
*dst
,
592 const union tgsi_exec_channel
*src0
,
593 const union tgsi_exec_channel
*src1
)
595 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? 1.0f
: 0.0f
;
596 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? 1.0f
: 0.0f
;
597 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? 1.0f
: 0.0f
;
598 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? 1.0f
: 0.0f
;
602 micro_sgn(union tgsi_exec_channel
*dst
,
603 const union tgsi_exec_channel
*src
)
605 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
606 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
607 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
608 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
612 micro_isgn(union tgsi_exec_channel
*dst
,
613 const union tgsi_exec_channel
*src
)
615 dst
->i
[0] = src
->i
[0] < 0 ? -1 : src
->i
[0] > 0 ? 1 : 0;
616 dst
->i
[1] = src
->i
[1] < 0 ? -1 : src
->i
[1] > 0 ? 1 : 0;
617 dst
->i
[2] = src
->i
[2] < 0 ? -1 : src
->i
[2] > 0 ? 1 : 0;
618 dst
->i
[3] = src
->i
[3] < 0 ? -1 : src
->i
[3] > 0 ? 1 : 0;
622 micro_sgt(union tgsi_exec_channel
*dst
,
623 const union tgsi_exec_channel
*src0
,
624 const union tgsi_exec_channel
*src1
)
626 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? 1.0f
: 0.0f
;
627 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? 1.0f
: 0.0f
;
628 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? 1.0f
: 0.0f
;
629 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? 1.0f
: 0.0f
;
633 micro_sin(union tgsi_exec_channel
*dst
,
634 const union tgsi_exec_channel
*src
)
636 dst
->f
[0] = sinf(src
->f
[0]);
637 dst
->f
[1] = sinf(src
->f
[1]);
638 dst
->f
[2] = sinf(src
->f
[2]);
639 dst
->f
[3] = sinf(src
->f
[3]);
643 micro_sle(union tgsi_exec_channel
*dst
,
644 const union tgsi_exec_channel
*src0
,
645 const union tgsi_exec_channel
*src1
)
647 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? 1.0f
: 0.0f
;
648 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? 1.0f
: 0.0f
;
649 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? 1.0f
: 0.0f
;
650 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? 1.0f
: 0.0f
;
654 micro_slt(union tgsi_exec_channel
*dst
,
655 const union tgsi_exec_channel
*src0
,
656 const union tgsi_exec_channel
*src1
)
658 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? 1.0f
: 0.0f
;
659 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? 1.0f
: 0.0f
;
660 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? 1.0f
: 0.0f
;
661 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? 1.0f
: 0.0f
;
665 micro_sne(union tgsi_exec_channel
*dst
,
666 const union tgsi_exec_channel
*src0
,
667 const union tgsi_exec_channel
*src1
)
669 dst
->f
[0] = src0
->f
[0] != src1
->f
[0] ? 1.0f
: 0.0f
;
670 dst
->f
[1] = src0
->f
[1] != src1
->f
[1] ? 1.0f
: 0.0f
;
671 dst
->f
[2] = src0
->f
[2] != src1
->f
[2] ? 1.0f
: 0.0f
;
672 dst
->f
[3] = src0
->f
[3] != src1
->f
[3] ? 1.0f
: 0.0f
;
676 micro_trunc(union tgsi_exec_channel
*dst
,
677 const union tgsi_exec_channel
*src
)
679 dst
->f
[0] = truncf(src
->f
[0]);
680 dst
->f
[1] = truncf(src
->f
[1]);
681 dst
->f
[2] = truncf(src
->f
[2]);
682 dst
->f
[3] = truncf(src
->f
[3]);
686 micro_u2d(union tgsi_double_channel
*dst
,
687 const union tgsi_exec_channel
*src
)
689 dst
->d
[0] = (double)src
->u
[0];
690 dst
->d
[1] = (double)src
->u
[1];
691 dst
->d
[2] = (double)src
->u
[2];
692 dst
->d
[3] = (double)src
->u
[3];
696 micro_i64abs(union tgsi_double_channel
*dst
,
697 const union tgsi_double_channel
*src
)
699 dst
->i64
[0] = src
->i64
[0] >= 0.0 ? src
->i64
[0] : -src
->i64
[0];
700 dst
->i64
[1] = src
->i64
[1] >= 0.0 ? src
->i64
[1] : -src
->i64
[1];
701 dst
->i64
[2] = src
->i64
[2] >= 0.0 ? src
->i64
[2] : -src
->i64
[2];
702 dst
->i64
[3] = src
->i64
[3] >= 0.0 ? src
->i64
[3] : -src
->i64
[3];
706 micro_i64sgn(union tgsi_double_channel
*dst
,
707 const union tgsi_double_channel
*src
)
709 dst
->i64
[0] = src
->i64
[0] < 0 ? -1 : src
->i64
[0] > 0 ? 1 : 0;
710 dst
->i64
[1] = src
->i64
[1] < 0 ? -1 : src
->i64
[1] > 0 ? 1 : 0;
711 dst
->i64
[2] = src
->i64
[2] < 0 ? -1 : src
->i64
[2] > 0 ? 1 : 0;
712 dst
->i64
[3] = src
->i64
[3] < 0 ? -1 : src
->i64
[3] > 0 ? 1 : 0;
716 micro_i64neg(union tgsi_double_channel
*dst
,
717 const union tgsi_double_channel
*src
)
719 dst
->i64
[0] = -src
->i64
[0];
720 dst
->i64
[1] = -src
->i64
[1];
721 dst
->i64
[2] = -src
->i64
[2];
722 dst
->i64
[3] = -src
->i64
[3];
726 micro_u64seq(union tgsi_double_channel
*dst
,
727 const union tgsi_double_channel
*src
)
729 dst
->u
[0][0] = src
[0].u64
[0] == src
[1].u64
[0] ? ~0U : 0U;
730 dst
->u
[1][0] = src
[0].u64
[1] == src
[1].u64
[1] ? ~0U : 0U;
731 dst
->u
[2][0] = src
[0].u64
[2] == src
[1].u64
[2] ? ~0U : 0U;
732 dst
->u
[3][0] = src
[0].u64
[3] == src
[1].u64
[3] ? ~0U : 0U;
736 micro_u64sne(union tgsi_double_channel
*dst
,
737 const union tgsi_double_channel
*src
)
739 dst
->u
[0][0] = src
[0].u64
[0] != src
[1].u64
[0] ? ~0U : 0U;
740 dst
->u
[1][0] = src
[0].u64
[1] != src
[1].u64
[1] ? ~0U : 0U;
741 dst
->u
[2][0] = src
[0].u64
[2] != src
[1].u64
[2] ? ~0U : 0U;
742 dst
->u
[3][0] = src
[0].u64
[3] != src
[1].u64
[3] ? ~0U : 0U;
746 micro_i64slt(union tgsi_double_channel
*dst
,
747 const union tgsi_double_channel
*src
)
749 dst
->u
[0][0] = src
[0].i64
[0] < src
[1].i64
[0] ? ~0U : 0U;
750 dst
->u
[1][0] = src
[0].i64
[1] < src
[1].i64
[1] ? ~0U : 0U;
751 dst
->u
[2][0] = src
[0].i64
[2] < src
[1].i64
[2] ? ~0U : 0U;
752 dst
->u
[3][0] = src
[0].i64
[3] < src
[1].i64
[3] ? ~0U : 0U;
756 micro_u64slt(union tgsi_double_channel
*dst
,
757 const union tgsi_double_channel
*src
)
759 dst
->u
[0][0] = src
[0].u64
[0] < src
[1].u64
[0] ? ~0U : 0U;
760 dst
->u
[1][0] = src
[0].u64
[1] < src
[1].u64
[1] ? ~0U : 0U;
761 dst
->u
[2][0] = src
[0].u64
[2] < src
[1].u64
[2] ? ~0U : 0U;
762 dst
->u
[3][0] = src
[0].u64
[3] < src
[1].u64
[3] ? ~0U : 0U;
766 micro_i64sge(union tgsi_double_channel
*dst
,
767 const union tgsi_double_channel
*src
)
769 dst
->u
[0][0] = src
[0].i64
[0] >= src
[1].i64
[0] ? ~0U : 0U;
770 dst
->u
[1][0] = src
[0].i64
[1] >= src
[1].i64
[1] ? ~0U : 0U;
771 dst
->u
[2][0] = src
[0].i64
[2] >= src
[1].i64
[2] ? ~0U : 0U;
772 dst
->u
[3][0] = src
[0].i64
[3] >= src
[1].i64
[3] ? ~0U : 0U;
776 micro_u64sge(union tgsi_double_channel
*dst
,
777 const union tgsi_double_channel
*src
)
779 dst
->u
[0][0] = src
[0].u64
[0] >= src
[1].u64
[0] ? ~0U : 0U;
780 dst
->u
[1][0] = src
[0].u64
[1] >= src
[1].u64
[1] ? ~0U : 0U;
781 dst
->u
[2][0] = src
[0].u64
[2] >= src
[1].u64
[2] ? ~0U : 0U;
782 dst
->u
[3][0] = src
[0].u64
[3] >= src
[1].u64
[3] ? ~0U : 0U;
786 micro_u64max(union tgsi_double_channel
*dst
,
787 const union tgsi_double_channel
*src
)
789 dst
->u64
[0] = src
[0].u64
[0] > src
[1].u64
[0] ? src
[0].u64
[0] : src
[1].u64
[0];
790 dst
->u64
[1] = src
[0].u64
[1] > src
[1].u64
[1] ? src
[0].u64
[1] : src
[1].u64
[1];
791 dst
->u64
[2] = src
[0].u64
[2] > src
[1].u64
[2] ? src
[0].u64
[2] : src
[1].u64
[2];
792 dst
->u64
[3] = src
[0].u64
[3] > src
[1].u64
[3] ? src
[0].u64
[3] : src
[1].u64
[3];
796 micro_i64max(union tgsi_double_channel
*dst
,
797 const union tgsi_double_channel
*src
)
799 dst
->i64
[0] = src
[0].i64
[0] > src
[1].i64
[0] ? src
[0].i64
[0] : src
[1].i64
[0];
800 dst
->i64
[1] = src
[0].i64
[1] > src
[1].i64
[1] ? src
[0].i64
[1] : src
[1].i64
[1];
801 dst
->i64
[2] = src
[0].i64
[2] > src
[1].i64
[2] ? src
[0].i64
[2] : src
[1].i64
[2];
802 dst
->i64
[3] = src
[0].i64
[3] > src
[1].i64
[3] ? src
[0].i64
[3] : src
[1].i64
[3];
806 micro_u64min(union tgsi_double_channel
*dst
,
807 const union tgsi_double_channel
*src
)
809 dst
->u64
[0] = src
[0].u64
[0] < src
[1].u64
[0] ? src
[0].u64
[0] : src
[1].u64
[0];
810 dst
->u64
[1] = src
[0].u64
[1] < src
[1].u64
[1] ? src
[0].u64
[1] : src
[1].u64
[1];
811 dst
->u64
[2] = src
[0].u64
[2] < src
[1].u64
[2] ? src
[0].u64
[2] : src
[1].u64
[2];
812 dst
->u64
[3] = src
[0].u64
[3] < src
[1].u64
[3] ? src
[0].u64
[3] : src
[1].u64
[3];
816 micro_i64min(union tgsi_double_channel
*dst
,
817 const union tgsi_double_channel
*src
)
819 dst
->i64
[0] = src
[0].i64
[0] < src
[1].i64
[0] ? src
[0].i64
[0] : src
[1].i64
[0];
820 dst
->i64
[1] = src
[0].i64
[1] < src
[1].i64
[1] ? src
[0].i64
[1] : src
[1].i64
[1];
821 dst
->i64
[2] = src
[0].i64
[2] < src
[1].i64
[2] ? src
[0].i64
[2] : src
[1].i64
[2];
822 dst
->i64
[3] = src
[0].i64
[3] < src
[1].i64
[3] ? src
[0].i64
[3] : src
[1].i64
[3];
826 micro_u64add(union tgsi_double_channel
*dst
,
827 const union tgsi_double_channel
*src
)
829 dst
->u64
[0] = src
[0].u64
[0] + src
[1].u64
[0];
830 dst
->u64
[1] = src
[0].u64
[1] + src
[1].u64
[1];
831 dst
->u64
[2] = src
[0].u64
[2] + src
[1].u64
[2];
832 dst
->u64
[3] = src
[0].u64
[3] + src
[1].u64
[3];
836 micro_u64mul(union tgsi_double_channel
*dst
,
837 const union tgsi_double_channel
*src
)
839 dst
->u64
[0] = src
[0].u64
[0] * src
[1].u64
[0];
840 dst
->u64
[1] = src
[0].u64
[1] * src
[1].u64
[1];
841 dst
->u64
[2] = src
[0].u64
[2] * src
[1].u64
[2];
842 dst
->u64
[3] = src
[0].u64
[3] * src
[1].u64
[3];
846 micro_u64div(union tgsi_double_channel
*dst
,
847 const union tgsi_double_channel
*src
)
849 dst
->u64
[0] = src
[1].u64
[0] ? src
[0].u64
[0] / src
[1].u64
[0] : ~0ull;
850 dst
->u64
[1] = src
[1].u64
[1] ? src
[0].u64
[1] / src
[1].u64
[1] : ~0ull;
851 dst
->u64
[2] = src
[1].u64
[2] ? src
[0].u64
[2] / src
[1].u64
[2] : ~0ull;
852 dst
->u64
[3] = src
[1].u64
[3] ? src
[0].u64
[3] / src
[1].u64
[3] : ~0ull;
856 micro_i64div(union tgsi_double_channel
*dst
,
857 const union tgsi_double_channel
*src
)
859 dst
->i64
[0] = src
[1].i64
[0] ? src
[0].i64
[0] / src
[1].i64
[0] : 0;
860 dst
->i64
[1] = src
[1].i64
[1] ? src
[0].i64
[1] / src
[1].i64
[1] : 0;
861 dst
->i64
[2] = src
[1].i64
[2] ? src
[0].i64
[2] / src
[1].i64
[2] : 0;
862 dst
->i64
[3] = src
[1].i64
[3] ? src
[0].i64
[3] / src
[1].i64
[3] : 0;
866 micro_u64mod(union tgsi_double_channel
*dst
,
867 const union tgsi_double_channel
*src
)
869 dst
->u64
[0] = src
[1].u64
[0] ? src
[0].u64
[0] % src
[1].u64
[0] : ~0ull;
870 dst
->u64
[1] = src
[1].u64
[1] ? src
[0].u64
[1] % src
[1].u64
[1] : ~0ull;
871 dst
->u64
[2] = src
[1].u64
[2] ? src
[0].u64
[2] % src
[1].u64
[2] : ~0ull;
872 dst
->u64
[3] = src
[1].u64
[3] ? src
[0].u64
[3] % src
[1].u64
[3] : ~0ull;
876 micro_i64mod(union tgsi_double_channel
*dst
,
877 const union tgsi_double_channel
*src
)
879 dst
->i64
[0] = src
[1].i64
[0] ? src
[0].i64
[0] % src
[1].i64
[0] : ~0ll;
880 dst
->i64
[1] = src
[1].i64
[1] ? src
[0].i64
[1] % src
[1].i64
[1] : ~0ll;
881 dst
->i64
[2] = src
[1].i64
[2] ? src
[0].i64
[2] % src
[1].i64
[2] : ~0ll;
882 dst
->i64
[3] = src
[1].i64
[3] ? src
[0].i64
[3] % src
[1].i64
[3] : ~0ll;
886 micro_u64shl(union tgsi_double_channel
*dst
,
887 const union tgsi_double_channel
*src0
,
888 union tgsi_exec_channel
*src1
)
890 unsigned masked_count
;
891 masked_count
= src1
->u
[0] & 0x3f;
892 dst
->u64
[0] = src0
->u64
[0] << masked_count
;
893 masked_count
= src1
->u
[1] & 0x3f;
894 dst
->u64
[1] = src0
->u64
[1] << masked_count
;
895 masked_count
= src1
->u
[2] & 0x3f;
896 dst
->u64
[2] = src0
->u64
[2] << masked_count
;
897 masked_count
= src1
->u
[3] & 0x3f;
898 dst
->u64
[3] = src0
->u64
[3] << masked_count
;
902 micro_i64shr(union tgsi_double_channel
*dst
,
903 const union tgsi_double_channel
*src0
,
904 union tgsi_exec_channel
*src1
)
906 unsigned masked_count
;
907 masked_count
= src1
->u
[0] & 0x3f;
908 dst
->i64
[0] = src0
->i64
[0] >> masked_count
;
909 masked_count
= src1
->u
[1] & 0x3f;
910 dst
->i64
[1] = src0
->i64
[1] >> masked_count
;
911 masked_count
= src1
->u
[2] & 0x3f;
912 dst
->i64
[2] = src0
->i64
[2] >> masked_count
;
913 masked_count
= src1
->u
[3] & 0x3f;
914 dst
->i64
[3] = src0
->i64
[3] >> masked_count
;
918 micro_u64shr(union tgsi_double_channel
*dst
,
919 const union tgsi_double_channel
*src0
,
920 union tgsi_exec_channel
*src1
)
922 unsigned masked_count
;
923 masked_count
= src1
->u
[0] & 0x3f;
924 dst
->u64
[0] = src0
->u64
[0] >> masked_count
;
925 masked_count
= src1
->u
[1] & 0x3f;
926 dst
->u64
[1] = src0
->u64
[1] >> masked_count
;
927 masked_count
= src1
->u
[2] & 0x3f;
928 dst
->u64
[2] = src0
->u64
[2] >> masked_count
;
929 masked_count
= src1
->u
[3] & 0x3f;
930 dst
->u64
[3] = src0
->u64
[3] >> masked_count
;
933 enum tgsi_exec_datatype
{
934 TGSI_EXEC_DATA_FLOAT
,
937 TGSI_EXEC_DATA_DOUBLE
,
938 TGSI_EXEC_DATA_INT64
,
939 TGSI_EXEC_DATA_UINT64
,
943 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
945 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
946 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
947 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
948 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
949 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
950 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
951 #define TEMP_PRIMITIVE_S1_I TGSI_EXEC_TEMP_PRIMITIVE_S1_I
952 #define TEMP_PRIMITIVE_S1_C TGSI_EXEC_TEMP_PRIMITIVE_S1_C
953 #define TEMP_PRIMITIVE_S2_I TGSI_EXEC_TEMP_PRIMITIVE_S2_I
954 #define TEMP_PRIMITIVE_S2_C TGSI_EXEC_TEMP_PRIMITIVE_S2_C
955 #define TEMP_PRIMITIVE_S3_I TGSI_EXEC_TEMP_PRIMITIVE_S3_I
956 #define TEMP_PRIMITIVE_S3_C TGSI_EXEC_TEMP_PRIMITIVE_S3_C
958 static const struct {
961 } temp_prim_idxs
[] = {
962 { TEMP_PRIMITIVE_I
, TEMP_PRIMITIVE_C
},
963 { TEMP_PRIMITIVE_S1_I
, TEMP_PRIMITIVE_S1_C
},
964 { TEMP_PRIMITIVE_S2_I
, TEMP_PRIMITIVE_S2_C
},
965 { TEMP_PRIMITIVE_S3_I
, TEMP_PRIMITIVE_S3_C
},
968 /** The execution mask depends on the conditional mask and the loop mask */
969 #define UPDATE_EXEC_MASK(MACH) \
970 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
973 static const union tgsi_exec_channel ZeroVec
=
974 { { 0.0, 0.0, 0.0, 0.0 } };
976 static const union tgsi_exec_channel OneVec
= {
977 {1.0f
, 1.0f
, 1.0f
, 1.0f
}
980 static const union tgsi_exec_channel P128Vec
= {
981 {128.0f
, 128.0f
, 128.0f
, 128.0f
}
984 static const union tgsi_exec_channel M128Vec
= {
985 {-128.0f
, -128.0f
, -128.0f
, -128.0f
}
990 * Assert that none of the float values in 'chan' are infinite or NaN.
991 * NaN and Inf may occur normally during program execution and should
992 * not lead to crashes, etc. But when debugging, it's helpful to catch
996 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
998 assert(!util_is_inf_or_nan((chan
)->f
[0]));
999 assert(!util_is_inf_or_nan((chan
)->f
[1]));
1000 assert(!util_is_inf_or_nan((chan
)->f
[2]));
1001 assert(!util_is_inf_or_nan((chan
)->f
[3]));
1007 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
1009 debug_printf("%s = {%f, %f, %f, %f}\n",
1010 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
1017 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
1019 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
1021 debug_printf("Temp[%u] =\n", index
);
1022 for (i
= 0; i
< 4; i
++) {
1023 debug_printf(" %c: { %f, %f, %f, %f }\n",
1035 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine
*mach
,
1038 const unsigned *buf_sizes
)
1042 for (i
= 0; i
< num_bufs
; i
++) {
1043 mach
->Consts
[i
] = bufs
[i
];
1044 mach
->ConstsSize
[i
] = buf_sizes
[i
];
1050 * Check if there's a potential src/dst register data dependency when
1051 * using SOA execution.
1054 * This would expand into:
1059 * The second instruction will have the wrong value for t0 if executed as-is.
1062 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
1066 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
1067 if (writemask
== TGSI_WRITEMASK_X
||
1068 writemask
== TGSI_WRITEMASK_Y
||
1069 writemask
== TGSI_WRITEMASK_Z
||
1070 writemask
== TGSI_WRITEMASK_W
||
1071 writemask
== TGSI_WRITEMASK_NONE
) {
1072 /* no chance of data dependency */
1076 /* loop over src regs */
1077 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1078 if ((inst
->Src
[i
].Register
.File
==
1079 inst
->Dst
[0].Register
.File
) &&
1080 ((inst
->Src
[i
].Register
.Index
==
1081 inst
->Dst
[0].Register
.Index
) ||
1082 inst
->Src
[i
].Register
.Indirect
||
1083 inst
->Dst
[0].Register
.Indirect
)) {
1084 /* loop over dest channels */
1085 uint channelsWritten
= 0x0;
1086 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1087 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1088 /* check if we're reading a channel that's been written */
1089 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
1090 if (channelsWritten
& (1 << swizzle
)) {
1094 channelsWritten
|= (1 << chan
);
1104 * Initialize machine state by expanding tokens to full instructions,
1105 * allocating temporary storage, setting up constants, etc.
1106 * After this, we can call tgsi_exec_machine_run() many times.
1109 tgsi_exec_machine_bind_shader(
1110 struct tgsi_exec_machine
*mach
,
1111 const struct tgsi_token
*tokens
,
1112 struct tgsi_sampler
*sampler
,
1113 struct tgsi_image
*image
,
1114 struct tgsi_buffer
*buffer
)
1117 struct tgsi_parse_context parse
;
1118 struct tgsi_full_instruction
*instructions
;
1119 struct tgsi_full_declaration
*declarations
;
1120 uint maxInstructions
= 10, numInstructions
= 0;
1121 uint maxDeclarations
= 10, numDeclarations
= 0;
1124 tgsi_dump(tokens
, 0);
1130 mach
->Tokens
= tokens
;
1131 mach
->Sampler
= sampler
;
1132 mach
->Image
= image
;
1133 mach
->Buffer
= buffer
;
1136 /* unbind and free all */
1137 FREE(mach
->Declarations
);
1138 mach
->Declarations
= NULL
;
1139 mach
->NumDeclarations
= 0;
1141 FREE(mach
->Instructions
);
1142 mach
->Instructions
= NULL
;
1143 mach
->NumInstructions
= 0;
1148 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
1149 if (k
!= TGSI_PARSE_OK
) {
1150 debug_printf( "Problem parsing!\n" );
1155 mach
->NumOutputs
= 0;
1157 for (k
= 0; k
< TGSI_SEMANTIC_COUNT
; k
++)
1158 mach
->SysSemanticToIndex
[k
] = -1;
1160 if (mach
->ShaderType
== PIPE_SHADER_GEOMETRY
&&
1161 !mach
->UsedGeometryShader
) {
1162 struct tgsi_exec_vector
*inputs
;
1163 struct tgsi_exec_vector
*outputs
;
1165 inputs
= align_malloc(sizeof(struct tgsi_exec_vector
) *
1166 TGSI_MAX_PRIM_VERTICES
* PIPE_MAX_SHADER_INPUTS
,
1172 outputs
= align_malloc(sizeof(struct tgsi_exec_vector
) *
1173 TGSI_MAX_TOTAL_VERTICES
, 16);
1180 align_free(mach
->Inputs
);
1181 align_free(mach
->Outputs
);
1183 mach
->Inputs
= inputs
;
1184 mach
->Outputs
= outputs
;
1185 mach
->UsedGeometryShader
= TRUE
;
1188 declarations
= (struct tgsi_full_declaration
*)
1189 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
1191 if (!declarations
) {
1195 instructions
= (struct tgsi_full_instruction
*)
1196 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
1198 if (!instructions
) {
1199 FREE( declarations
);
1203 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1206 tgsi_parse_token( &parse
);
1207 switch( parse
.FullToken
.Token
.Type
) {
1208 case TGSI_TOKEN_TYPE_DECLARATION
:
1209 /* save expanded declaration */
1210 if (numDeclarations
== maxDeclarations
) {
1211 declarations
= REALLOC(declarations
,
1213 * sizeof(struct tgsi_full_declaration
),
1214 (maxDeclarations
+ 10)
1215 * sizeof(struct tgsi_full_declaration
));
1216 maxDeclarations
+= 10;
1218 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
1220 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
1221 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
1226 else if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
1227 const struct tgsi_full_declaration
*decl
= &parse
.FullToken
.FullDeclaration
;
1228 mach
->SysSemanticToIndex
[decl
->Semantic
.Name
] = decl
->Range
.First
;
1231 memcpy(declarations
+ numDeclarations
,
1232 &parse
.FullToken
.FullDeclaration
,
1233 sizeof(declarations
[0]));
1237 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1239 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1240 assert( size
<= 4 );
1241 if (mach
->ImmLimit
>= mach
->ImmsReserved
) {
1242 unsigned newReserved
= mach
->ImmsReserved
? 2 * mach
->ImmsReserved
: 128;
1243 float4
*imms
= REALLOC(mach
->Imms
, mach
->ImmsReserved
, newReserved
* sizeof(float4
));
1245 mach
->ImmsReserved
= newReserved
;
1248 debug_printf("Unable to (re)allocate space for immidiate constants\n");
1253 for( i
= 0; i
< size
; i
++ ) {
1254 mach
->Imms
[mach
->ImmLimit
][i
] =
1255 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1257 mach
->ImmLimit
+= 1;
1261 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1263 /* save expanded instruction */
1264 if (numInstructions
== maxInstructions
) {
1265 instructions
= REALLOC(instructions
,
1267 * sizeof(struct tgsi_full_instruction
),
1268 (maxInstructions
+ 10)
1269 * sizeof(struct tgsi_full_instruction
));
1270 maxInstructions
+= 10;
1273 memcpy(instructions
+ numInstructions
,
1274 &parse
.FullToken
.FullInstruction
,
1275 sizeof(instructions
[0]));
1280 case TGSI_TOKEN_TYPE_PROPERTY
:
1281 if (mach
->ShaderType
== PIPE_SHADER_GEOMETRY
) {
1282 if (parse
.FullToken
.FullProperty
.Property
.PropertyName
== TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
) {
1283 mach
->MaxOutputVertices
= parse
.FullToken
.FullProperty
.u
[0].Data
;
1292 tgsi_parse_free (&parse
);
1294 FREE(mach
->Declarations
);
1295 mach
->Declarations
= declarations
;
1296 mach
->NumDeclarations
= numDeclarations
;
1298 FREE(mach
->Instructions
);
1299 mach
->Instructions
= instructions
;
1300 mach
->NumInstructions
= numInstructions
;
1304 struct tgsi_exec_machine
*
1305 tgsi_exec_machine_create(enum pipe_shader_type shader_type
)
1307 struct tgsi_exec_machine
*mach
;
1310 mach
= align_malloc( sizeof *mach
, 16 );
1314 memset(mach
, 0, sizeof(*mach
));
1316 mach
->ShaderType
= shader_type
;
1317 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
1318 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
1320 if (shader_type
!= PIPE_SHADER_COMPUTE
) {
1321 mach
->Inputs
= align_malloc(sizeof(struct tgsi_exec_vector
) * PIPE_MAX_SHADER_INPUTS
, 16);
1322 mach
->Outputs
= align_malloc(sizeof(struct tgsi_exec_vector
) * PIPE_MAX_SHADER_OUTPUTS
, 16);
1323 if (!mach
->Inputs
|| !mach
->Outputs
)
1327 /* Setup constants needed by the SSE2 executor. */
1328 for( i
= 0; i
< 4; i
++ ) {
1329 mach
->Temps
[TGSI_EXEC_TEMP_00000000_I
].xyzw
[TGSI_EXEC_TEMP_00000000_C
].u
[i
] = 0x00000000;
1330 mach
->Temps
[TGSI_EXEC_TEMP_7FFFFFFF_I
].xyzw
[TGSI_EXEC_TEMP_7FFFFFFF_C
].u
[i
] = 0x7FFFFFFF;
1331 mach
->Temps
[TGSI_EXEC_TEMP_80000000_I
].xyzw
[TGSI_EXEC_TEMP_80000000_C
].u
[i
] = 0x80000000;
1332 mach
->Temps
[TGSI_EXEC_TEMP_FFFFFFFF_I
].xyzw
[TGSI_EXEC_TEMP_FFFFFFFF_C
].u
[i
] = 0xFFFFFFFF; /* not used */
1333 mach
->Temps
[TGSI_EXEC_TEMP_ONE_I
].xyzw
[TGSI_EXEC_TEMP_ONE_C
].f
[i
] = 1.0f
;
1334 mach
->Temps
[TGSI_EXEC_TEMP_TWO_I
].xyzw
[TGSI_EXEC_TEMP_TWO_C
].f
[i
] = 2.0f
; /* not used */
1335 mach
->Temps
[TGSI_EXEC_TEMP_128_I
].xyzw
[TGSI_EXEC_TEMP_128_C
].f
[i
] = 128.0f
;
1336 mach
->Temps
[TGSI_EXEC_TEMP_MINUS_128_I
].xyzw
[TGSI_EXEC_TEMP_MINUS_128_C
].f
[i
] = -128.0f
;
1337 mach
->Temps
[TGSI_EXEC_TEMP_THREE_I
].xyzw
[TGSI_EXEC_TEMP_THREE_C
].f
[i
] = 3.0f
;
1338 mach
->Temps
[TGSI_EXEC_TEMP_HALF_I
].xyzw
[TGSI_EXEC_TEMP_HALF_C
].f
[i
] = 0.5f
;
1342 /* silence warnings */
1351 align_free(mach
->Inputs
);
1352 align_free(mach
->Outputs
);
1360 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
1363 FREE(mach
->Instructions
);
1364 FREE(mach
->Declarations
);
1367 align_free(mach
->Inputs
);
1368 align_free(mach
->Outputs
);
1375 micro_add(union tgsi_exec_channel
*dst
,
1376 const union tgsi_exec_channel
*src0
,
1377 const union tgsi_exec_channel
*src1
)
1379 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
1380 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
1381 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
1382 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
1387 union tgsi_exec_channel
*dst
,
1388 const union tgsi_exec_channel
*src0
,
1389 const union tgsi_exec_channel
*src1
)
1391 if (src1
->f
[0] != 0) {
1392 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
1394 if (src1
->f
[1] != 0) {
1395 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
1397 if (src1
->f
[2] != 0) {
1398 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
1400 if (src1
->f
[3] != 0) {
1401 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
1407 union tgsi_exec_channel
*dst
,
1408 const union tgsi_exec_channel
*src0
,
1409 const union tgsi_exec_channel
*src1
,
1410 const union tgsi_exec_channel
*src2
,
1411 const union tgsi_exec_channel
*src3
)
1413 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
1414 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
1415 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
1416 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
1420 micro_max(union tgsi_exec_channel
*dst
,
1421 const union tgsi_exec_channel
*src0
,
1422 const union tgsi_exec_channel
*src1
)
1424 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
1425 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
1426 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
1427 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
1431 micro_min(union tgsi_exec_channel
*dst
,
1432 const union tgsi_exec_channel
*src0
,
1433 const union tgsi_exec_channel
*src1
)
1435 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
1436 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
1437 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
1438 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
1442 micro_mul(union tgsi_exec_channel
*dst
,
1443 const union tgsi_exec_channel
*src0
,
1444 const union tgsi_exec_channel
*src1
)
1446 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
1447 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
1448 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
1449 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
1454 union tgsi_exec_channel
*dst
,
1455 const union tgsi_exec_channel
*src
)
1457 dst
->f
[0] = -src
->f
[0];
1458 dst
->f
[1] = -src
->f
[1];
1459 dst
->f
[2] = -src
->f
[2];
1460 dst
->f
[3] = -src
->f
[3];
1465 union tgsi_exec_channel
*dst
,
1466 const union tgsi_exec_channel
*src0
,
1467 const union tgsi_exec_channel
*src1
)
1470 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
1471 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
1472 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
1473 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
1475 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
1476 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
1477 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
1478 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
1483 micro_ldexp(union tgsi_exec_channel
*dst
,
1484 const union tgsi_exec_channel
*src0
,
1485 const union tgsi_exec_channel
*src1
)
1487 dst
->f
[0] = ldexpf(src0
->f
[0], src1
->i
[0]);
1488 dst
->f
[1] = ldexpf(src0
->f
[1], src1
->i
[1]);
1489 dst
->f
[2] = ldexpf(src0
->f
[2], src1
->i
[2]);
1490 dst
->f
[3] = ldexpf(src0
->f
[3], src1
->i
[3]);
1494 micro_sub(union tgsi_exec_channel
*dst
,
1495 const union tgsi_exec_channel
*src0
,
1496 const union tgsi_exec_channel
*src1
)
1498 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1499 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1500 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1501 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1505 fetch_src_file_channel(const struct tgsi_exec_machine
*mach
,
1508 const union tgsi_exec_channel
*index
,
1509 const union tgsi_exec_channel
*index2D
,
1510 union tgsi_exec_channel
*chan
)
1514 assert(swizzle
< 4);
1517 case TGSI_FILE_CONSTANT
:
1518 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1519 assert(index2D
->i
[i
] >= 0 && index2D
->i
[i
] < PIPE_MAX_CONSTANT_BUFFERS
);
1520 assert(mach
->Consts
[index2D
->i
[i
]]);
1522 if (index
->i
[i
] < 0) {
1525 /* NOTE: copying the const value as a uint instead of float */
1526 const uint constbuf
= index2D
->i
[i
];
1527 const uint
*buf
= (const uint
*)mach
->Consts
[constbuf
];
1528 const int pos
= index
->i
[i
] * 4 + swizzle
;
1529 /* const buffer bounds check */
1530 if (pos
< 0 || pos
>= (int) mach
->ConstsSize
[constbuf
]) {
1532 /* Debug: print warning */
1533 static int count
= 0;
1535 debug_printf("TGSI Exec: const buffer index %d"
1536 " out of bounds\n", pos
);
1541 chan
->u
[i
] = buf
[pos
];
1546 case TGSI_FILE_INPUT
:
1547 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1549 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1550 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1551 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1552 index2D->i[i], index->i[i]);
1554 int pos
= index2D
->i
[i
] * TGSI_EXEC_MAX_INPUT_ATTRIBS
+ index
->i
[i
];
1556 assert(pos
< TGSI_MAX_PRIM_VERTICES
* PIPE_MAX_ATTRIBS
);
1557 chan
->u
[i
] = mach
->Inputs
[pos
].xyzw
[swizzle
].u
[i
];
1561 case TGSI_FILE_SYSTEM_VALUE
:
1562 /* XXX no swizzling at this point. Will be needed if we put
1563 * gl_FragCoord, for example, in a sys value register.
1565 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1566 chan
->u
[i
] = mach
->SystemValue
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1570 case TGSI_FILE_TEMPORARY
:
1571 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1572 assert(index
->i
[i
] < TGSI_EXEC_NUM_TEMPS
);
1573 assert(index2D
->i
[i
] == 0);
1575 chan
->u
[i
] = mach
->Temps
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1579 case TGSI_FILE_IMMEDIATE
:
1580 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1581 assert(index
->i
[i
] >= 0 && index
->i
[i
] < (int)mach
->ImmLimit
);
1582 assert(index2D
->i
[i
] == 0);
1584 chan
->f
[i
] = mach
->Imms
[index
->i
[i
]][swizzle
];
1588 case TGSI_FILE_ADDRESS
:
1589 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1590 assert(index
->i
[i
] >= 0);
1591 assert(index2D
->i
[i
] == 0);
1593 chan
->u
[i
] = mach
->Addrs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1597 case TGSI_FILE_OUTPUT
:
1598 /* vertex/fragment output vars can be read too */
1599 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1600 assert(index
->i
[i
] >= 0);
1601 assert(index2D
->i
[i
] == 0);
1603 chan
->u
[i
] = mach
->Outputs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1609 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1616 fetch_source_d(const struct tgsi_exec_machine
*mach
,
1617 union tgsi_exec_channel
*chan
,
1618 const struct tgsi_full_src_register
*reg
,
1619 const uint chan_index
)
1621 union tgsi_exec_channel index
;
1622 union tgsi_exec_channel index2D
;
1625 /* We start with a direct index into a register file.
1629 * file = Register.File
1630 * [1] = Register.Index
1635 index
.i
[3] = reg
->Register
.Index
;
1637 /* There is an extra source register that indirectly subscripts
1638 * a register file. The direct index now becomes an offset
1639 * that is being added to the indirect register.
1643 * ind = Indirect.File
1644 * [2] = Indirect.Index
1645 * .x = Indirect.SwizzleX
1647 if (reg
->Register
.Indirect
) {
1648 union tgsi_exec_channel index2
;
1649 union tgsi_exec_channel indir_index
;
1650 const uint execmask
= mach
->ExecMask
;
1653 /* which address register (always zero now) */
1657 index2
.i
[3] = reg
->Indirect
.Index
;
1658 /* get current value of address register[swizzle] */
1659 swizzle
= reg
->Indirect
.Swizzle
;
1660 fetch_src_file_channel(mach
,
1667 /* add value of address register to the offset */
1668 index
.i
[0] += indir_index
.i
[0];
1669 index
.i
[1] += indir_index
.i
[1];
1670 index
.i
[2] += indir_index
.i
[2];
1671 index
.i
[3] += indir_index
.i
[3];
1673 /* for disabled execution channels, zero-out the index to
1674 * avoid using a potential garbage value.
1676 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1677 if ((execmask
& (1 << i
)) == 0)
1682 /* There is an extra source register that is a second
1683 * subscript to a register file. Effectively it means that
1684 * the register file is actually a 2D array of registers.
1688 * [3] = Dimension.Index
1690 if (reg
->Register
.Dimension
) {
1694 index2D
.i
[3] = reg
->Dimension
.Index
;
1696 /* Again, the second subscript index can be addressed indirectly
1697 * identically to the first one.
1698 * Nothing stops us from indirectly addressing the indirect register,
1699 * but there is no need for that, so we won't exercise it.
1701 * file[ind[4].y+3][1],
1703 * ind = DimIndirect.File
1704 * [4] = DimIndirect.Index
1705 * .y = DimIndirect.SwizzleX
1707 if (reg
->Dimension
.Indirect
) {
1708 union tgsi_exec_channel index2
;
1709 union tgsi_exec_channel indir_index
;
1710 const uint execmask
= mach
->ExecMask
;
1716 index2
.i
[3] = reg
->DimIndirect
.Index
;
1718 swizzle
= reg
->DimIndirect
.Swizzle
;
1719 fetch_src_file_channel(mach
,
1720 reg
->DimIndirect
.File
,
1726 index2D
.i
[0] += indir_index
.i
[0];
1727 index2D
.i
[1] += indir_index
.i
[1];
1728 index2D
.i
[2] += indir_index
.i
[2];
1729 index2D
.i
[3] += indir_index
.i
[3];
1731 /* for disabled execution channels, zero-out the index to
1732 * avoid using a potential garbage value.
1734 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1735 if ((execmask
& (1 << i
)) == 0) {
1741 /* If by any chance there was a need for a 3D array of register
1742 * files, we would have to check whether Dimension is followed
1743 * by a dimension register and continue the saga.
1752 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1753 fetch_src_file_channel(mach
,
1762 fetch_source(const struct tgsi_exec_machine
*mach
,
1763 union tgsi_exec_channel
*chan
,
1764 const struct tgsi_full_src_register
*reg
,
1765 const uint chan_index
,
1766 enum tgsi_exec_datatype src_datatype
)
1768 fetch_source_d(mach
, chan
, reg
, chan_index
);
1770 if (reg
->Register
.Absolute
) {
1771 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1772 micro_abs(chan
, chan
);
1774 micro_iabs(chan
, chan
);
1778 if (reg
->Register
.Negate
) {
1779 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1780 micro_neg(chan
, chan
);
1782 micro_ineg(chan
, chan
);
1787 static union tgsi_exec_channel
*
1788 store_dest_dstret(struct tgsi_exec_machine
*mach
,
1789 const union tgsi_exec_channel
*chan
,
1790 const struct tgsi_full_dst_register
*reg
,
1792 enum tgsi_exec_datatype dst_datatype
)
1794 static union tgsi_exec_channel null
;
1795 union tgsi_exec_channel
*dst
;
1796 union tgsi_exec_channel index2D
;
1797 int offset
= 0; /* indirection offset */
1801 if (0 && dst_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1802 check_inf_or_nan(chan
);
1805 /* There is an extra source register that indirectly subscripts
1806 * a register file. The direct index now becomes an offset
1807 * that is being added to the indirect register.
1811 * ind = Indirect.File
1812 * [2] = Indirect.Index
1813 * .x = Indirect.SwizzleX
1815 if (reg
->Register
.Indirect
) {
1816 union tgsi_exec_channel index
;
1817 union tgsi_exec_channel indir_index
;
1820 /* which address register (always zero for now) */
1824 index
.i
[3] = reg
->Indirect
.Index
;
1826 /* get current value of address register[swizzle] */
1827 swizzle
= reg
->Indirect
.Swizzle
;
1829 /* fetch values from the address/indirection register */
1830 fetch_src_file_channel(mach
,
1837 /* save indirection offset */
1838 offset
= indir_index
.i
[0];
1841 /* There is an extra source register that is a second
1842 * subscript to a register file. Effectively it means that
1843 * the register file is actually a 2D array of registers.
1847 * [3] = Dimension.Index
1849 if (reg
->Register
.Dimension
) {
1853 index2D
.i
[3] = reg
->Dimension
.Index
;
1855 /* Again, the second subscript index can be addressed indirectly
1856 * identically to the first one.
1857 * Nothing stops us from indirectly addressing the indirect register,
1858 * but there is no need for that, so we won't exercise it.
1860 * file[ind[4].y+3][1],
1862 * ind = DimIndirect.File
1863 * [4] = DimIndirect.Index
1864 * .y = DimIndirect.SwizzleX
1866 if (reg
->Dimension
.Indirect
) {
1867 union tgsi_exec_channel index2
;
1868 union tgsi_exec_channel indir_index
;
1869 const uint execmask
= mach
->ExecMask
;
1876 index2
.i
[3] = reg
->DimIndirect
.Index
;
1878 swizzle
= reg
->DimIndirect
.Swizzle
;
1879 fetch_src_file_channel(mach
,
1880 reg
->DimIndirect
.File
,
1886 index2D
.i
[0] += indir_index
.i
[0];
1887 index2D
.i
[1] += indir_index
.i
[1];
1888 index2D
.i
[2] += indir_index
.i
[2];
1889 index2D
.i
[3] += indir_index
.i
[3];
1891 /* for disabled execution channels, zero-out the index to
1892 * avoid using a potential garbage value.
1894 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1895 if ((execmask
& (1 << i
)) == 0) {
1901 /* If by any chance there was a need for a 3D array of register
1902 * files, we would have to check whether Dimension is followed
1903 * by a dimension register and continue the saga.
1912 switch (reg
->Register
.File
) {
1913 case TGSI_FILE_NULL
:
1917 case TGSI_FILE_OUTPUT
:
1918 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1919 + reg
->Register
.Index
;
1920 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1922 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1923 mach
->NumOutputs
, mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0],
1924 reg
->Register
.Index
);
1925 if (PIPE_SHADER_GEOMETRY
== mach
->ShaderType
) {
1926 debug_printf("STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1927 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1928 if (execmask
& (1 << i
))
1929 debug_printf("%f, ", chan
->f
[i
]);
1930 debug_printf(")\n");
1935 case TGSI_FILE_TEMPORARY
:
1936 index
= reg
->Register
.Index
;
1937 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1938 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1941 case TGSI_FILE_ADDRESS
:
1942 index
= reg
->Register
.Index
;
1943 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1955 store_dest_double(struct tgsi_exec_machine
*mach
,
1956 const union tgsi_exec_channel
*chan
,
1957 const struct tgsi_full_dst_register
*reg
,
1959 enum tgsi_exec_datatype dst_datatype
)
1961 union tgsi_exec_channel
*dst
;
1962 const uint execmask
= mach
->ExecMask
;
1965 dst
= store_dest_dstret(mach
, chan
, reg
, chan_index
, dst_datatype
);
1970 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1971 if (execmask
& (1 << i
))
1972 dst
->i
[i
] = chan
->i
[i
];
1976 store_dest(struct tgsi_exec_machine
*mach
,
1977 const union tgsi_exec_channel
*chan
,
1978 const struct tgsi_full_dst_register
*reg
,
1979 const struct tgsi_full_instruction
*inst
,
1981 enum tgsi_exec_datatype dst_datatype
)
1983 union tgsi_exec_channel
*dst
;
1984 const uint execmask
= mach
->ExecMask
;
1987 dst
= store_dest_dstret(mach
, chan
, reg
, chan_index
, dst_datatype
);
1991 if (!inst
->Instruction
.Saturate
) {
1992 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1993 if (execmask
& (1 << i
))
1994 dst
->i
[i
] = chan
->i
[i
];
1997 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1998 if (execmask
& (1 << i
)) {
1999 if (chan
->f
[i
] < 0.0f
)
2001 else if (chan
->f
[i
] > 1.0f
)
2004 dst
->i
[i
] = chan
->i
[i
];
2009 #define FETCH(VAL,INDEX,CHAN)\
2010 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
2012 #define IFETCH(VAL,INDEX,CHAN)\
2013 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
2017 * Execute ARB-style KIL which is predicated by a src register.
2018 * Kill fragment if any of the four values is less than zero.
2021 exec_kill_if(struct tgsi_exec_machine
*mach
,
2022 const struct tgsi_full_instruction
*inst
)
2026 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2027 union tgsi_exec_channel r
[1];
2029 /* This mask stores component bits that were already tested. */
2032 for (chan_index
= 0; chan_index
< 4; chan_index
++)
2037 /* unswizzle channel */
2038 swizzle
= tgsi_util_get_full_src_register_swizzle (
2042 /* check if the component has not been already tested */
2043 if (uniquemask
& (1 << swizzle
))
2045 uniquemask
|= 1 << swizzle
;
2047 FETCH(&r
[0], 0, chan_index
);
2048 for (i
= 0; i
< 4; i
++)
2049 if (r
[0].f
[i
] < 0.0f
)
2053 /* restrict to fragments currently executing */
2054 kilmask
&= mach
->ExecMask
;
2056 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
2060 * Unconditional fragment kill/discard.
2063 exec_kill(struct tgsi_exec_machine
*mach
)
2065 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2067 /* kill fragment for all fragments currently executing */
2068 kilmask
= mach
->ExecMask
;
2069 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
2073 emit_vertex(struct tgsi_exec_machine
*mach
,
2074 const struct tgsi_full_instruction
*inst
)
2076 union tgsi_exec_channel r
[1];
2078 unsigned *prim_count
;
2079 /* FIXME: check for exec mask correctly
2081 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2082 if ((mach->ExecMask & (1 << i)))
2084 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
2085 stream_id
= r
[0].u
[0];
2086 prim_count
= &mach
->Temps
[temp_prim_idxs
[stream_id
].idx
].xyzw
[temp_prim_idxs
[stream_id
].chan
].u
[0];
2087 if (mach
->ExecMask
) {
2088 if (mach
->Primitives
[stream_id
][*prim_count
] >= mach
->MaxOutputVertices
)
2091 if (mach
->Primitives
[stream_id
][*prim_count
] == 0)
2092 mach
->PrimitiveOffsets
[stream_id
][*prim_count
] = mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0];
2093 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
2094 mach
->Primitives
[stream_id
][*prim_count
]++;
2099 emit_primitive(struct tgsi_exec_machine
*mach
,
2100 const struct tgsi_full_instruction
*inst
)
2102 unsigned *prim_count
;
2103 union tgsi_exec_channel r
[1];
2104 unsigned stream_id
= 0;
2105 /* FIXME: check for exec mask correctly
2107 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2108 if ((mach->ExecMask & (1 << i)))
2111 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
2112 stream_id
= r
[0].u
[0];
2114 prim_count
= &mach
->Temps
[temp_prim_idxs
[stream_id
].idx
].xyzw
[temp_prim_idxs
[stream_id
].chan
].u
[0];
2115 if (mach
->ExecMask
) {
2117 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
2118 mach
->Primitives
[stream_id
][*prim_count
] = 0;
2123 conditional_emit_primitive(struct tgsi_exec_machine
*mach
)
2125 if (PIPE_SHADER_GEOMETRY
== mach
->ShaderType
) {
2127 mach
->Primitives
[0][mach
->Temps
[temp_prim_idxs
[0].idx
].xyzw
[temp_prim_idxs
[0].chan
].u
[0]];
2128 if (emitted_verts
) {
2129 emit_primitive(mach
, NULL
);
2136 * Fetch four texture samples using STR texture coordinates.
2139 fetch_texel( struct tgsi_sampler
*sampler
,
2140 const unsigned sview_idx
,
2141 const unsigned sampler_idx
,
2142 const union tgsi_exec_channel
*s
,
2143 const union tgsi_exec_channel
*t
,
2144 const union tgsi_exec_channel
*p
,
2145 const union tgsi_exec_channel
*c0
,
2146 const union tgsi_exec_channel
*c1
,
2147 float derivs
[3][2][TGSI_QUAD_SIZE
],
2148 const int8_t offset
[3],
2149 enum tgsi_sampler_control control
,
2150 union tgsi_exec_channel
*r
,
2151 union tgsi_exec_channel
*g
,
2152 union tgsi_exec_channel
*b
,
2153 union tgsi_exec_channel
*a
)
2156 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
2158 /* FIXME: handle explicit derivs, offsets */
2159 sampler
->get_samples(sampler
, sview_idx
, sampler_idx
,
2160 s
->f
, t
->f
, p
->f
, c0
->f
, c1
->f
, derivs
, offset
, control
, rgba
);
2162 for (j
= 0; j
< 4; j
++) {
2163 r
->f
[j
] = rgba
[0][j
];
2164 g
->f
[j
] = rgba
[1][j
];
2165 b
->f
[j
] = rgba
[2][j
];
2166 a
->f
[j
] = rgba
[3][j
];
2171 #define TEX_MODIFIER_NONE 0
2172 #define TEX_MODIFIER_PROJECTED 1
2173 #define TEX_MODIFIER_LOD_BIAS 2
2174 #define TEX_MODIFIER_EXPLICIT_LOD 3
2175 #define TEX_MODIFIER_LEVEL_ZERO 4
2176 #define TEX_MODIFIER_GATHER 5
2179 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2182 fetch_texel_offsets(struct tgsi_exec_machine
*mach
,
2183 const struct tgsi_full_instruction
*inst
,
2186 if (inst
->Texture
.NumOffsets
== 1) {
2187 union tgsi_exec_channel index
;
2188 union tgsi_exec_channel offset
[3];
2189 index
.i
[0] = index
.i
[1] = index
.i
[2] = index
.i
[3] = inst
->TexOffsets
[0].Index
;
2190 fetch_src_file_channel(mach
, inst
->TexOffsets
[0].File
,
2191 inst
->TexOffsets
[0].SwizzleX
, &index
, &ZeroVec
, &offset
[0]);
2192 fetch_src_file_channel(mach
, inst
->TexOffsets
[0].File
,
2193 inst
->TexOffsets
[0].SwizzleY
, &index
, &ZeroVec
, &offset
[1]);
2194 fetch_src_file_channel(mach
, inst
->TexOffsets
[0].File
,
2195 inst
->TexOffsets
[0].SwizzleZ
, &index
, &ZeroVec
, &offset
[2]);
2196 offsets
[0] = offset
[0].i
[0];
2197 offsets
[1] = offset
[1].i
[0];
2198 offsets
[2] = offset
[2].i
[0];
2200 assert(inst
->Texture
.NumOffsets
== 0);
2201 offsets
[0] = offsets
[1] = offsets
[2] = 0;
2207 * Fetch dx and dy values for one channel (s, t or r).
2208 * Put dx values into one float array, dy values into another.
2211 fetch_assign_deriv_channel(struct tgsi_exec_machine
*mach
,
2212 const struct tgsi_full_instruction
*inst
,
2215 float derivs
[2][TGSI_QUAD_SIZE
])
2217 union tgsi_exec_channel d
;
2218 FETCH(&d
, regdsrcx
, chan
);
2219 derivs
[0][0] = d
.f
[0];
2220 derivs
[0][1] = d
.f
[1];
2221 derivs
[0][2] = d
.f
[2];
2222 derivs
[0][3] = d
.f
[3];
2223 FETCH(&d
, regdsrcx
+ 1, chan
);
2224 derivs
[1][0] = d
.f
[0];
2225 derivs
[1][1] = d
.f
[1];
2226 derivs
[1][2] = d
.f
[2];
2227 derivs
[1][3] = d
.f
[3];
2231 fetch_sampler_unit(struct tgsi_exec_machine
*mach
,
2232 const struct tgsi_full_instruction
*inst
,
2237 if (inst
->Src
[sampler
].Register
.Indirect
) {
2238 const struct tgsi_full_src_register
*reg
= &inst
->Src
[sampler
];
2239 union tgsi_exec_channel indir_index
, index2
;
2240 const uint execmask
= mach
->ExecMask
;
2244 index2
.i
[3] = reg
->Indirect
.Index
;
2246 fetch_src_file_channel(mach
,
2248 reg
->Indirect
.Swizzle
,
2252 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
2253 if (execmask
& (1 << i
)) {
2254 unit
= inst
->Src
[sampler
].Register
.Index
+ indir_index
.i
[i
];
2260 unit
= inst
->Src
[sampler
].Register
.Index
;
2266 * execute a texture instruction.
2268 * modifier is used to control the channel routing for the
2269 * instruction variants like proj, lod, and texture with lod bias.
2270 * sampler indicates which src register the sampler is contained in.
2273 exec_tex(struct tgsi_exec_machine
*mach
,
2274 const struct tgsi_full_instruction
*inst
,
2275 uint modifier
, uint sampler
)
2277 const union tgsi_exec_channel
*args
[5], *proj
= NULL
;
2278 union tgsi_exec_channel r
[5];
2279 enum tgsi_sampler_control control
= TGSI_SAMPLER_LOD_NONE
;
2283 int dim
, shadow_ref
, i
;
2285 unit
= fetch_sampler_unit(mach
, inst
, sampler
);
2286 /* always fetch all 3 offsets, overkill but keeps code simple */
2287 fetch_texel_offsets(mach
, inst
, offsets
);
2289 assert(modifier
!= TEX_MODIFIER_LEVEL_ZERO
);
2290 assert(inst
->Texture
.Texture
!= TGSI_TEXTURE_BUFFER
);
2292 dim
= tgsi_util_get_texture_coord_dim(inst
->Texture
.Texture
);
2293 shadow_ref
= tgsi_util_get_shadow_ref_src_index(inst
->Texture
.Texture
);
2296 if (shadow_ref
>= 0)
2297 assert(shadow_ref
>= dim
&& shadow_ref
< (int)ARRAY_SIZE(args
));
2299 /* fetch modifier to the last argument */
2300 if (modifier
!= TEX_MODIFIER_NONE
) {
2301 const int last
= ARRAY_SIZE(args
) - 1;
2303 /* fetch modifier from src0.w or src1.x */
2305 assert(dim
<= TGSI_CHAN_W
&& shadow_ref
!= TGSI_CHAN_W
);
2306 FETCH(&r
[last
], 0, TGSI_CHAN_W
);
2309 FETCH(&r
[last
], 1, TGSI_CHAN_X
);
2312 if (modifier
!= TEX_MODIFIER_PROJECTED
) {
2313 args
[last
] = &r
[last
];
2317 args
[last
] = &ZeroVec
;
2320 /* point unused arguments to zero vector */
2321 for (i
= dim
; i
< last
; i
++)
2324 if (modifier
== TEX_MODIFIER_EXPLICIT_LOD
)
2325 control
= TGSI_SAMPLER_LOD_EXPLICIT
;
2326 else if (modifier
== TEX_MODIFIER_LOD_BIAS
)
2327 control
= TGSI_SAMPLER_LOD_BIAS
;
2328 else if (modifier
== TEX_MODIFIER_GATHER
)
2329 control
= TGSI_SAMPLER_GATHER
;
2332 for (i
= dim
; i
< (int)ARRAY_SIZE(args
); i
++)
2336 /* fetch coordinates */
2337 for (i
= 0; i
< dim
; i
++) {
2338 FETCH(&r
[i
], 0, TGSI_CHAN_X
+ i
);
2341 micro_div(&r
[i
], &r
[i
], proj
);
2346 /* fetch reference value */
2347 if (shadow_ref
>= 0) {
2348 FETCH(&r
[shadow_ref
], shadow_ref
/ 4, TGSI_CHAN_X
+ (shadow_ref
% 4));
2351 micro_div(&r
[shadow_ref
], &r
[shadow_ref
], proj
);
2353 args
[shadow_ref
] = &r
[shadow_ref
];
2356 fetch_texel(mach
->Sampler
, unit
, unit
,
2357 args
[0], args
[1], args
[2], args
[3], args
[4],
2358 NULL
, offsets
, control
,
2359 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2362 debug_printf("fetch r: %g %g %g %g\n",
2363 r
[0].f
[0], r
[0].f
[1], r
[0].f
[2], r
[0].f
[3]);
2364 debug_printf("fetch g: %g %g %g %g\n",
2365 r
[1].f
[0], r
[1].f
[1], r
[1].f
[2], r
[1].f
[3]);
2366 debug_printf("fetch b: %g %g %g %g\n",
2367 r
[2].f
[0], r
[2].f
[1], r
[2].f
[2], r
[2].f
[3]);
2368 debug_printf("fetch a: %g %g %g %g\n",
2369 r
[3].f
[0], r
[3].f
[1], r
[3].f
[2], r
[3].f
[3]);
2372 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2373 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2374 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2380 exec_lodq(struct tgsi_exec_machine
*mach
,
2381 const struct tgsi_full_instruction
*inst
)
2383 uint resource_unit
, sampler_unit
;
2386 union tgsi_exec_channel coords
[4];
2387 const union tgsi_exec_channel
*args
[ARRAY_SIZE(coords
)];
2388 union tgsi_exec_channel r
[2];
2390 resource_unit
= fetch_sampler_unit(mach
, inst
, 1);
2391 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_LOD
) {
2392 uint target
= mach
->SamplerViews
[resource_unit
].Resource
;
2393 dim
= tgsi_util_get_texture_coord_dim(target
);
2394 sampler_unit
= fetch_sampler_unit(mach
, inst
, 2);
2396 dim
= tgsi_util_get_texture_coord_dim(inst
->Texture
.Texture
);
2397 sampler_unit
= resource_unit
;
2399 assert(dim
<= ARRAY_SIZE(coords
));
2400 /* fetch coordinates */
2401 for (i
= 0; i
< dim
; i
++) {
2402 FETCH(&coords
[i
], 0, TGSI_CHAN_X
+ i
);
2403 args
[i
] = &coords
[i
];
2405 for (i
= dim
; i
< ARRAY_SIZE(coords
); i
++) {
2408 mach
->Sampler
->query_lod(mach
->Sampler
, resource_unit
, sampler_unit
,
2413 TGSI_SAMPLER_LOD_NONE
,
2417 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2418 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_X
,
2419 TGSI_EXEC_DATA_FLOAT
);
2421 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2422 store_dest(mach
, &r
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
,
2423 TGSI_EXEC_DATA_FLOAT
);
2425 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_LOD
) {
2426 unsigned char swizzles
[4];
2428 swizzles
[0] = inst
->Src
[1].Register
.SwizzleX
;
2429 swizzles
[1] = inst
->Src
[1].Register
.SwizzleY
;
2430 swizzles
[2] = inst
->Src
[1].Register
.SwizzleZ
;
2431 swizzles
[3] = inst
->Src
[1].Register
.SwizzleW
;
2433 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2434 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2435 if (swizzles
[chan
] >= 2) {
2436 store_dest(mach
, &ZeroVec
,
2437 &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2439 store_dest(mach
, &r
[swizzles
[chan
]],
2440 &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2445 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2446 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_X
,
2447 TGSI_EXEC_DATA_FLOAT
);
2449 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2450 store_dest(mach
, &r
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
,
2451 TGSI_EXEC_DATA_FLOAT
);
2457 exec_txd(struct tgsi_exec_machine
*mach
,
2458 const struct tgsi_full_instruction
*inst
)
2460 union tgsi_exec_channel r
[4];
2461 float derivs
[3][2][TGSI_QUAD_SIZE
];
2466 unit
= fetch_sampler_unit(mach
, inst
, 3);
2467 /* always fetch all 3 offsets, overkill but keeps code simple */
2468 fetch_texel_offsets(mach
, inst
, offsets
);
2470 switch (inst
->Texture
.Texture
) {
2471 case TGSI_TEXTURE_1D
:
2472 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2474 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_X
, derivs
[0]);
2476 fetch_texel(mach
->Sampler
, unit
, unit
,
2477 &r
[0], &ZeroVec
, &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, C, LOD */
2478 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2479 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2482 case TGSI_TEXTURE_SHADOW1D
:
2483 case TGSI_TEXTURE_1D_ARRAY
:
2484 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
2485 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2486 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2487 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2488 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2490 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_X
, derivs
[0]);
2492 fetch_texel(mach
->Sampler
, unit
, unit
,
2493 &r
[0], &r
[1], &r
[2], &ZeroVec
, &ZeroVec
, /* S, T, P, C, LOD */
2494 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2495 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2498 case TGSI_TEXTURE_2D
:
2499 case TGSI_TEXTURE_RECT
:
2500 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2501 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2503 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_X
, derivs
[0]);
2504 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_Y
, derivs
[1]);
2506 fetch_texel(mach
->Sampler
, unit
, unit
,
2507 &r
[0], &r
[1], &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, C, LOD */
2508 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2509 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2513 case TGSI_TEXTURE_SHADOW2D
:
2514 case TGSI_TEXTURE_SHADOWRECT
:
2515 case TGSI_TEXTURE_2D_ARRAY
:
2516 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
2517 /* only SHADOW2D_ARRAY actually needs W */
2518 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2519 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2520 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2521 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2523 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_X
, derivs
[0]);
2524 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_Y
, derivs
[1]);
2526 fetch_texel(mach
->Sampler
, unit
, unit
,
2527 &r
[0], &r
[1], &r
[2], &r
[3], &ZeroVec
, /* inputs */
2528 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2529 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2532 case TGSI_TEXTURE_3D
:
2533 case TGSI_TEXTURE_CUBE
:
2534 case TGSI_TEXTURE_CUBE_ARRAY
:
2535 case TGSI_TEXTURE_SHADOWCUBE
:
2536 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2537 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2538 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2539 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2540 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2542 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_X
, derivs
[0]);
2543 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_Y
, derivs
[1]);
2544 fetch_assign_deriv_channel(mach
, inst
, 1, TGSI_CHAN_Z
, derivs
[2]);
2546 fetch_texel(mach
->Sampler
, unit
, unit
,
2547 &r
[0], &r
[1], &r
[2], &r
[3], &ZeroVec
, /* inputs */
2548 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2549 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2556 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2557 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2558 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2565 exec_txf(struct tgsi_exec_machine
*mach
,
2566 const struct tgsi_full_instruction
*inst
)
2568 union tgsi_exec_channel r
[4];
2571 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
2576 unit
= fetch_sampler_unit(mach
, inst
, 1);
2577 /* always fetch all 3 offsets, overkill but keeps code simple */
2578 fetch_texel_offsets(mach
, inst
, offsets
);
2580 IFETCH(&r
[3], 0, TGSI_CHAN_W
);
2582 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_SAMPLE_I
||
2583 inst
->Instruction
.Opcode
== TGSI_OPCODE_SAMPLE_I_MS
) {
2584 target
= mach
->SamplerViews
[unit
].Resource
;
2587 target
= inst
->Texture
.Texture
;
2590 case TGSI_TEXTURE_3D
:
2591 case TGSI_TEXTURE_2D_ARRAY
:
2592 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
2593 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
2594 IFETCH(&r
[2], 0, TGSI_CHAN_Z
);
2596 case TGSI_TEXTURE_2D
:
2597 case TGSI_TEXTURE_RECT
:
2598 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
2599 case TGSI_TEXTURE_SHADOW2D
:
2600 case TGSI_TEXTURE_SHADOWRECT
:
2601 case TGSI_TEXTURE_1D_ARRAY
:
2602 case TGSI_TEXTURE_2D_MSAA
:
2603 IFETCH(&r
[1], 0, TGSI_CHAN_Y
);
2605 case TGSI_TEXTURE_BUFFER
:
2606 case TGSI_TEXTURE_1D
:
2607 case TGSI_TEXTURE_SHADOW1D
:
2608 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
2615 mach
->Sampler
->get_texel(mach
->Sampler
, unit
, r
[0].i
, r
[1].i
, r
[2].i
, r
[3].i
,
2618 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
2619 r
[0].f
[j
] = rgba
[0][j
];
2620 r
[1].f
[j
] = rgba
[1][j
];
2621 r
[2].f
[j
] = rgba
[2][j
];
2622 r
[3].f
[j
] = rgba
[3][j
];
2625 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_SAMPLE_I
||
2626 inst
->Instruction
.Opcode
== TGSI_OPCODE_SAMPLE_I_MS
) {
2627 unsigned char swizzles
[4];
2628 swizzles
[0] = inst
->Src
[1].Register
.SwizzleX
;
2629 swizzles
[1] = inst
->Src
[1].Register
.SwizzleY
;
2630 swizzles
[2] = inst
->Src
[1].Register
.SwizzleZ
;
2631 swizzles
[3] = inst
->Src
[1].Register
.SwizzleW
;
2633 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2634 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2635 store_dest(mach
, &r
[swizzles
[chan
]],
2636 &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2641 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2642 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2643 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2650 exec_txq(struct tgsi_exec_machine
*mach
,
2651 const struct tgsi_full_instruction
*inst
)
2654 union tgsi_exec_channel r
[4], src
;
2659 unit
= fetch_sampler_unit(mach
, inst
, 1);
2661 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_INT
);
2663 /* XXX: This interface can't return per-pixel values */
2664 mach
->Sampler
->get_dims(mach
->Sampler
, unit
, src
.i
[0], result
);
2666 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
2667 for (j
= 0; j
< 4; j
++) {
2668 r
[j
].i
[i
] = result
[j
];
2672 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2673 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2674 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
,
2675 TGSI_EXEC_DATA_INT
);
2681 exec_sample(struct tgsi_exec_machine
*mach
,
2682 const struct tgsi_full_instruction
*inst
,
2683 uint modifier
, boolean compare
)
2685 const uint resource_unit
= inst
->Src
[1].Register
.Index
;
2686 const uint sampler_unit
= inst
->Src
[2].Register
.Index
;
2687 union tgsi_exec_channel r
[5], c1
;
2688 const union tgsi_exec_channel
*lod
= &ZeroVec
;
2689 enum tgsi_sampler_control control
= TGSI_SAMPLER_LOD_NONE
;
2691 unsigned char swizzles
[4];
2694 /* always fetch all 3 offsets, overkill but keeps code simple */
2695 fetch_texel_offsets(mach
, inst
, offsets
);
2697 assert(modifier
!= TEX_MODIFIER_PROJECTED
);
2699 if (modifier
!= TEX_MODIFIER_NONE
) {
2700 if (modifier
== TEX_MODIFIER_LOD_BIAS
) {
2701 FETCH(&c1
, 3, TGSI_CHAN_X
);
2703 control
= TGSI_SAMPLER_LOD_BIAS
;
2705 else if (modifier
== TEX_MODIFIER_EXPLICIT_LOD
) {
2706 FETCH(&c1
, 3, TGSI_CHAN_X
);
2708 control
= TGSI_SAMPLER_LOD_EXPLICIT
;
2710 else if (modifier
== TEX_MODIFIER_GATHER
) {
2711 control
= TGSI_SAMPLER_GATHER
;
2714 assert(modifier
== TEX_MODIFIER_LEVEL_ZERO
);
2715 control
= TGSI_SAMPLER_LOD_ZERO
;
2719 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2721 switch (mach
->SamplerViews
[resource_unit
].Resource
) {
2722 case TGSI_TEXTURE_1D
:
2724 FETCH(&r
[2], 3, TGSI_CHAN_X
);
2725 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2726 &r
[0], &ZeroVec
, &r
[2], &ZeroVec
, lod
, /* S, T, P, C, LOD */
2727 NULL
, offsets
, control
,
2728 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2731 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2732 &r
[0], &ZeroVec
, &ZeroVec
, &ZeroVec
, lod
, /* S, T, P, C, LOD */
2733 NULL
, offsets
, control
,
2734 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2738 case TGSI_TEXTURE_1D_ARRAY
:
2739 case TGSI_TEXTURE_2D
:
2740 case TGSI_TEXTURE_RECT
:
2741 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2743 FETCH(&r
[2], 3, TGSI_CHAN_X
);
2744 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2745 &r
[0], &r
[1], &r
[2], &ZeroVec
, lod
, /* S, T, P, C, LOD */
2746 NULL
, offsets
, control
,
2747 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2750 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2751 &r
[0], &r
[1], &ZeroVec
, &ZeroVec
, lod
, /* S, T, P, C, LOD */
2752 NULL
, offsets
, control
,
2753 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2757 case TGSI_TEXTURE_2D_ARRAY
:
2758 case TGSI_TEXTURE_3D
:
2759 case TGSI_TEXTURE_CUBE
:
2760 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2761 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2763 FETCH(&r
[3], 3, TGSI_CHAN_X
);
2764 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2765 &r
[0], &r
[1], &r
[2], &r
[3], lod
,
2766 NULL
, offsets
, control
,
2767 &r
[0], &r
[1], &r
[2], &r
[3]);
2770 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2771 &r
[0], &r
[1], &r
[2], &ZeroVec
, lod
,
2772 NULL
, offsets
, control
,
2773 &r
[0], &r
[1], &r
[2], &r
[3]);
2777 case TGSI_TEXTURE_CUBE_ARRAY
:
2778 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2779 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2780 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2782 FETCH(&r
[4], 3, TGSI_CHAN_X
);
2783 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2784 &r
[0], &r
[1], &r
[2], &r
[3], &r
[4],
2785 NULL
, offsets
, control
,
2786 &r
[0], &r
[1], &r
[2], &r
[3]);
2789 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2790 &r
[0], &r
[1], &r
[2], &r
[3], lod
,
2791 NULL
, offsets
, control
,
2792 &r
[0], &r
[1], &r
[2], &r
[3]);
2801 swizzles
[0] = inst
->Src
[1].Register
.SwizzleX
;
2802 swizzles
[1] = inst
->Src
[1].Register
.SwizzleY
;
2803 swizzles
[2] = inst
->Src
[1].Register
.SwizzleZ
;
2804 swizzles
[3] = inst
->Src
[1].Register
.SwizzleW
;
2806 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2807 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2808 store_dest(mach
, &r
[swizzles
[chan
]],
2809 &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2815 exec_sample_d(struct tgsi_exec_machine
*mach
,
2816 const struct tgsi_full_instruction
*inst
)
2818 const uint resource_unit
= inst
->Src
[1].Register
.Index
;
2819 const uint sampler_unit
= inst
->Src
[2].Register
.Index
;
2820 union tgsi_exec_channel r
[4];
2821 float derivs
[3][2][TGSI_QUAD_SIZE
];
2823 unsigned char swizzles
[4];
2826 /* always fetch all 3 offsets, overkill but keeps code simple */
2827 fetch_texel_offsets(mach
, inst
, offsets
);
2829 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2831 switch (mach
->SamplerViews
[resource_unit
].Resource
) {
2832 case TGSI_TEXTURE_1D
:
2833 case TGSI_TEXTURE_1D_ARRAY
:
2834 /* only 1D array actually needs Y */
2835 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2837 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_X
, derivs
[0]);
2839 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2840 &r
[0], &r
[1], &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, C, LOD */
2841 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2842 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2845 case TGSI_TEXTURE_2D
:
2846 case TGSI_TEXTURE_RECT
:
2847 case TGSI_TEXTURE_2D_ARRAY
:
2848 /* only 2D array actually needs Z */
2849 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2850 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2852 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_X
, derivs
[0]);
2853 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_Y
, derivs
[1]);
2855 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2856 &r
[0], &r
[1], &r
[2], &ZeroVec
, &ZeroVec
, /* inputs */
2857 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2858 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2861 case TGSI_TEXTURE_3D
:
2862 case TGSI_TEXTURE_CUBE
:
2863 case TGSI_TEXTURE_CUBE_ARRAY
:
2864 /* only cube array actually needs W */
2865 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2866 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2867 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2869 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_X
, derivs
[0]);
2870 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_Y
, derivs
[1]);
2871 fetch_assign_deriv_channel(mach
, inst
, 3, TGSI_CHAN_Z
, derivs
[2]);
2873 fetch_texel(mach
->Sampler
, resource_unit
, sampler_unit
,
2874 &r
[0], &r
[1], &r
[2], &r
[3], &ZeroVec
,
2875 derivs
, offsets
, TGSI_SAMPLER_DERIVS_EXPLICIT
,
2876 &r
[0], &r
[1], &r
[2], &r
[3]);
2883 swizzles
[0] = inst
->Src
[1].Register
.SwizzleX
;
2884 swizzles
[1] = inst
->Src
[1].Register
.SwizzleY
;
2885 swizzles
[2] = inst
->Src
[1].Register
.SwizzleZ
;
2886 swizzles
[3] = inst
->Src
[1].Register
.SwizzleW
;
2888 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2889 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2890 store_dest(mach
, &r
[swizzles
[chan
]],
2891 &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2898 * Evaluate a constant-valued coefficient at the position of the
2903 struct tgsi_exec_machine
*mach
,
2909 for( i
= 0; i
< TGSI_QUAD_SIZE
; i
++ ) {
2910 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
2915 * Evaluate a linear-valued coefficient at the position of the
2920 struct tgsi_exec_machine
*mach
,
2924 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
2925 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
2926 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
2927 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
2928 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
2929 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
2930 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
2931 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
2932 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
2936 * Evaluate a perspective-valued coefficient at the position of the
2940 eval_perspective_coef(
2941 struct tgsi_exec_machine
*mach
,
2945 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
2946 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
2947 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
2948 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
2949 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
2950 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
2951 /* divide by W here */
2952 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
2953 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
2954 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
2955 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
2959 typedef void (* eval_coef_func
)(
2960 struct tgsi_exec_machine
*mach
,
2965 exec_declaration(struct tgsi_exec_machine
*mach
,
2966 const struct tgsi_full_declaration
*decl
)
2968 if (decl
->Declaration
.File
== TGSI_FILE_SAMPLER_VIEW
) {
2969 mach
->SamplerViews
[decl
->Range
.First
] = decl
->SamplerView
;
2973 if (mach
->ShaderType
== PIPE_SHADER_FRAGMENT
) {
2974 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
2975 uint first
, last
, mask
;
2977 first
= decl
->Range
.First
;
2978 last
= decl
->Range
.Last
;
2979 mask
= decl
->Declaration
.UsageMask
;
2981 /* XXX we could remove this special-case code since
2982 * mach->InterpCoefs[first].a0 should already have the
2983 * front/back-face value. But we should first update the
2984 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2985 * Then, we could remove the tgsi_exec_machine::Face field.
2987 /* XXX make FACE a system value */
2988 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
2991 assert(decl
->Semantic
.Index
== 0);
2992 assert(first
== last
);
2994 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
2995 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
2998 eval_coef_func eval
;
3001 switch (decl
->Interp
.Interpolate
) {
3002 case TGSI_INTERPOLATE_CONSTANT
:
3003 eval
= eval_constant_coef
;
3006 case TGSI_INTERPOLATE_LINEAR
:
3007 eval
= eval_linear_coef
;
3010 case TGSI_INTERPOLATE_PERSPECTIVE
:
3011 eval
= eval_perspective_coef
;
3014 case TGSI_INTERPOLATE_COLOR
:
3015 eval
= mach
->flatshade_color
? eval_constant_coef
: eval_perspective_coef
;
3023 for (j
= 0; j
< TGSI_NUM_CHANNELS
; j
++) {
3024 if (mask
& (1 << j
)) {
3025 for (i
= first
; i
<= last
; i
++) {
3032 if (DEBUG_EXECUTION
) {
3034 for (i
= first
; i
<= last
; ++i
) {
3035 debug_printf("IN[%2u] = ", i
);
3036 for (j
= 0; j
< TGSI_NUM_CHANNELS
; j
++) {
3040 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3041 mach
->Inputs
[i
].xyzw
[0].f
[j
], mach
->Inputs
[i
].xyzw
[0].u
[j
],
3042 mach
->Inputs
[i
].xyzw
[1].f
[j
], mach
->Inputs
[i
].xyzw
[1].u
[j
],
3043 mach
->Inputs
[i
].xyzw
[2].f
[j
], mach
->Inputs
[i
].xyzw
[2].u
[j
],
3044 mach
->Inputs
[i
].xyzw
[3].f
[j
], mach
->Inputs
[i
].xyzw
[3].u
[j
]);
3053 typedef void (* micro_unary_op
)(union tgsi_exec_channel
*dst
,
3054 const union tgsi_exec_channel
*src
);
3057 exec_scalar_unary(struct tgsi_exec_machine
*mach
,
3058 const struct tgsi_full_instruction
*inst
,
3060 enum tgsi_exec_datatype dst_datatype
,
3061 enum tgsi_exec_datatype src_datatype
)
3064 union tgsi_exec_channel src
;
3065 union tgsi_exec_channel dst
;
3067 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, src_datatype
);
3069 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3070 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3071 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3077 exec_vector_unary(struct tgsi_exec_machine
*mach
,
3078 const struct tgsi_full_instruction
*inst
,
3080 enum tgsi_exec_datatype dst_datatype
,
3081 enum tgsi_exec_datatype src_datatype
)
3084 struct tgsi_exec_vector dst
;
3086 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3087 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3088 union tgsi_exec_channel src
;
3090 fetch_source(mach
, &src
, &inst
->Src
[0], chan
, src_datatype
);
3091 op(&dst
.xyzw
[chan
], &src
);
3094 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3095 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3096 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3101 typedef void (* micro_binary_op
)(union tgsi_exec_channel
*dst
,
3102 const union tgsi_exec_channel
*src0
,
3103 const union tgsi_exec_channel
*src1
);
3106 exec_scalar_binary(struct tgsi_exec_machine
*mach
,
3107 const struct tgsi_full_instruction
*inst
,
3109 enum tgsi_exec_datatype dst_datatype
,
3110 enum tgsi_exec_datatype src_datatype
)
3113 union tgsi_exec_channel src
[2];
3114 union tgsi_exec_channel dst
;
3116 fetch_source(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_X
, src_datatype
);
3117 fetch_source(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_X
, src_datatype
);
3118 op(&dst
, &src
[0], &src
[1]);
3119 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3120 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3121 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3127 exec_vector_binary(struct tgsi_exec_machine
*mach
,
3128 const struct tgsi_full_instruction
*inst
,
3130 enum tgsi_exec_datatype dst_datatype
,
3131 enum tgsi_exec_datatype src_datatype
)
3134 struct tgsi_exec_vector dst
;
3136 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3137 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3138 union tgsi_exec_channel src
[2];
3140 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
3141 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
3142 op(&dst
.xyzw
[chan
], &src
[0], &src
[1]);
3145 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3146 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3147 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3152 typedef void (* micro_trinary_op
)(union tgsi_exec_channel
*dst
,
3153 const union tgsi_exec_channel
*src0
,
3154 const union tgsi_exec_channel
*src1
,
3155 const union tgsi_exec_channel
*src2
);
3158 exec_vector_trinary(struct tgsi_exec_machine
*mach
,
3159 const struct tgsi_full_instruction
*inst
,
3160 micro_trinary_op op
,
3161 enum tgsi_exec_datatype dst_datatype
,
3162 enum tgsi_exec_datatype src_datatype
)
3165 struct tgsi_exec_vector dst
;
3167 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3168 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3169 union tgsi_exec_channel src
[3];
3171 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
3172 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
3173 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
3174 op(&dst
.xyzw
[chan
], &src
[0], &src
[1], &src
[2]);
3177 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3178 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3179 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3184 typedef void (* micro_quaternary_op
)(union tgsi_exec_channel
*dst
,
3185 const union tgsi_exec_channel
*src0
,
3186 const union tgsi_exec_channel
*src1
,
3187 const union tgsi_exec_channel
*src2
,
3188 const union tgsi_exec_channel
*src3
);
3191 exec_vector_quaternary(struct tgsi_exec_machine
*mach
,
3192 const struct tgsi_full_instruction
*inst
,
3193 micro_quaternary_op op
,
3194 enum tgsi_exec_datatype dst_datatype
,
3195 enum tgsi_exec_datatype src_datatype
)
3198 struct tgsi_exec_vector dst
;
3200 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3201 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3202 union tgsi_exec_channel src
[4];
3204 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
3205 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
3206 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
3207 fetch_source(mach
, &src
[3], &inst
->Src
[3], chan
, src_datatype
);
3208 op(&dst
.xyzw
[chan
], &src
[0], &src
[1], &src
[2], &src
[3]);
3211 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3212 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3213 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
3219 exec_dp3(struct tgsi_exec_machine
*mach
,
3220 const struct tgsi_full_instruction
*inst
)
3223 union tgsi_exec_channel arg
[3];
3225 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3226 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3227 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
3229 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_Z
; chan
++) {
3230 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
3231 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
3232 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
3235 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3236 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3237 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3243 exec_dp4(struct tgsi_exec_machine
*mach
,
3244 const struct tgsi_full_instruction
*inst
)
3247 union tgsi_exec_channel arg
[3];
3249 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3250 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3251 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
3253 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_W
; chan
++) {
3254 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
3255 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
3256 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
3259 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3260 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3261 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3267 exec_dp2(struct tgsi_exec_machine
*mach
,
3268 const struct tgsi_full_instruction
*inst
)
3271 union tgsi_exec_channel arg
[3];
3273 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3274 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3275 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
3277 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3278 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3279 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
3281 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3282 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3283 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3289 exec_pk2h(struct tgsi_exec_machine
*mach
,
3290 const struct tgsi_full_instruction
*inst
)
3293 union tgsi_exec_channel arg
[2], dst
;
3295 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3296 fetch_source(mach
, &arg
[1], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3297 for (chan
= 0; chan
< TGSI_QUAD_SIZE
; chan
++) {
3298 dst
.u
[chan
] = util_float_to_half(arg
[0].f
[chan
]) |
3299 (util_float_to_half(arg
[1].f
[chan
]) << 16);
3301 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3302 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3303 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_UINT
);
3309 exec_up2h(struct tgsi_exec_machine
*mach
,
3310 const struct tgsi_full_instruction
*inst
)
3313 union tgsi_exec_channel arg
, dst
[2];
3315 fetch_source(mach
, &arg
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_UINT
);
3316 for (chan
= 0; chan
< TGSI_QUAD_SIZE
; chan
++) {
3317 dst
[0].f
[chan
] = util_half_to_float(arg
.u
[chan
] & 0xffff);
3318 dst
[1].f
[chan
] = util_half_to_float(arg
.u
[chan
] >> 16);
3320 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3321 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3322 store_dest(mach
, &dst
[chan
& 1], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3328 micro_ucmp(union tgsi_exec_channel
*dst
,
3329 const union tgsi_exec_channel
*src0
,
3330 const union tgsi_exec_channel
*src1
,
3331 const union tgsi_exec_channel
*src2
)
3333 dst
->f
[0] = src0
->u
[0] ? src1
->f
[0] : src2
->f
[0];
3334 dst
->f
[1] = src0
->u
[1] ? src1
->f
[1] : src2
->f
[1];
3335 dst
->f
[2] = src0
->u
[2] ? src1
->f
[2] : src2
->f
[2];
3336 dst
->f
[3] = src0
->u
[3] ? src1
->f
[3] : src2
->f
[3];
3340 exec_ucmp(struct tgsi_exec_machine
*mach
,
3341 const struct tgsi_full_instruction
*inst
)
3344 struct tgsi_exec_vector dst
;
3346 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3347 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3348 union tgsi_exec_channel src
[3];
3350 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
,
3351 TGSI_EXEC_DATA_UINT
);
3352 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
,
3353 TGSI_EXEC_DATA_FLOAT
);
3354 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
,
3355 TGSI_EXEC_DATA_FLOAT
);
3356 micro_ucmp(&dst
.xyzw
[chan
], &src
[0], &src
[1], &src
[2]);
3359 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3360 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3361 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
,
3362 TGSI_EXEC_DATA_FLOAT
);
3368 exec_dst(struct tgsi_exec_machine
*mach
,
3369 const struct tgsi_full_instruction
*inst
)
3371 union tgsi_exec_channel r
[2];
3372 union tgsi_exec_channel d
[4];
3374 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3375 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3376 fetch_source(mach
, &r
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3377 micro_mul(&d
[TGSI_CHAN_Y
], &r
[0], &r
[1]);
3379 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3380 fetch_source(mach
, &d
[TGSI_CHAN_Z
], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3382 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3383 fetch_source(mach
, &d
[TGSI_CHAN_W
], &inst
->Src
[1], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3386 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
3387 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3389 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3390 store_dest(mach
, &d
[TGSI_CHAN_Y
], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3392 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3393 store_dest(mach
, &d
[TGSI_CHAN_Z
], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3395 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3396 store_dest(mach
, &d
[TGSI_CHAN_W
], &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3401 exec_log(struct tgsi_exec_machine
*mach
,
3402 const struct tgsi_full_instruction
*inst
)
3404 union tgsi_exec_channel r
[3];
3406 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3407 micro_abs(&r
[2], &r
[0]); /* r2 = abs(r0) */
3408 micro_lg2(&r
[1], &r
[2]); /* r1 = lg2(r2) */
3409 micro_flr(&r
[0], &r
[1]); /* r0 = floor(r1) */
3410 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
3411 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3413 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3414 micro_exp2(&r
[0], &r
[0]); /* r0 = 2 ^ r0 */
3415 micro_div(&r
[0], &r
[2], &r
[0]); /* r0 = r2 / r0 */
3416 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3418 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3419 store_dest(mach
, &r
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3421 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3422 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3427 exec_exp(struct tgsi_exec_machine
*mach
,
3428 const struct tgsi_full_instruction
*inst
)
3430 union tgsi_exec_channel r
[3];
3432 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3433 micro_flr(&r
[1], &r
[0]); /* r1 = floor(r0) */
3434 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
3435 micro_exp2(&r
[2], &r
[1]); /* r2 = 2 ^ r1 */
3436 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3438 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3439 micro_sub(&r
[2], &r
[0], &r
[1]); /* r2 = r0 - r1 */
3440 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3442 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3443 micro_exp2(&r
[2], &r
[0]); /* r2 = 2 ^ r0 */
3444 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3446 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3447 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3452 exec_lit(struct tgsi_exec_machine
*mach
,
3453 const struct tgsi_full_instruction
*inst
)
3455 union tgsi_exec_channel r
[3];
3456 union tgsi_exec_channel d
[3];
3458 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
3459 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3460 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3461 fetch_source(mach
, &r
[1], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3462 micro_max(&r
[1], &r
[1], &ZeroVec
);
3464 fetch_source(mach
, &r
[2], &inst
->Src
[0], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3465 micro_min(&r
[2], &r
[2], &P128Vec
);
3466 micro_max(&r
[2], &r
[2], &M128Vec
);
3467 micro_pow(&r
[1], &r
[1], &r
[2]);
3468 micro_lt(&d
[TGSI_CHAN_Z
], &ZeroVec
, &r
[0], &r
[1], &ZeroVec
);
3469 store_dest(mach
, &d
[TGSI_CHAN_Z
], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3471 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3472 micro_max(&d
[TGSI_CHAN_Y
], &r
[0], &ZeroVec
);
3473 store_dest(mach
, &d
[TGSI_CHAN_Y
], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3476 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
3477 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3480 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3481 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3486 exec_break(struct tgsi_exec_machine
*mach
)
3488 if (mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_LOOP
) {
3489 /* turn off loop channels for each enabled exec channel */
3490 mach
->LoopMask
&= ~mach
->ExecMask
;
3491 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3492 UPDATE_EXEC_MASK(mach
);
3494 assert(mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_SWITCH
);
3496 mach
->Switch
.mask
= 0x0;
3498 UPDATE_EXEC_MASK(mach
);
3503 exec_switch(struct tgsi_exec_machine
*mach
,
3504 const struct tgsi_full_instruction
*inst
)
3506 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
3507 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3509 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
3510 fetch_source(mach
, &mach
->Switch
.selector
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_UINT
);
3511 mach
->Switch
.mask
= 0x0;
3512 mach
->Switch
.defaultMask
= 0x0;
3514 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3515 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_SWITCH
;
3517 UPDATE_EXEC_MASK(mach
);
3521 exec_case(struct tgsi_exec_machine
*mach
,
3522 const struct tgsi_full_instruction
*inst
)
3524 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
3525 union tgsi_exec_channel src
;
3528 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_UINT
);
3530 if (mach
->Switch
.selector
.u
[0] == src
.u
[0]) {
3533 if (mach
->Switch
.selector
.u
[1] == src
.u
[1]) {
3536 if (mach
->Switch
.selector
.u
[2] == src
.u
[2]) {
3539 if (mach
->Switch
.selector
.u
[3] == src
.u
[3]) {
3543 mach
->Switch
.defaultMask
|= mask
;
3545 mach
->Switch
.mask
|= mask
& prevMask
;
3547 UPDATE_EXEC_MASK(mach
);
3550 /* FIXME: this will only work if default is last */
3552 exec_default(struct tgsi_exec_machine
*mach
)
3554 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
3556 mach
->Switch
.mask
|= ~mach
->Switch
.defaultMask
& prevMask
;
3558 UPDATE_EXEC_MASK(mach
);
3562 exec_endswitch(struct tgsi_exec_machine
*mach
)
3564 mach
->Switch
= mach
->SwitchStack
[--mach
->SwitchStackTop
];
3565 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3567 UPDATE_EXEC_MASK(mach
);
3570 typedef void (* micro_dop
)(union tgsi_double_channel
*dst
,
3571 const union tgsi_double_channel
*src
);
3573 typedef void (* micro_dop_sop
)(union tgsi_double_channel
*dst
,
3574 const union tgsi_double_channel
*src0
,
3575 union tgsi_exec_channel
*src1
);
3577 typedef void (* micro_dop_s
)(union tgsi_double_channel
*dst
,
3578 const union tgsi_exec_channel
*src
);
3580 typedef void (* micro_sop_d
)(union tgsi_exec_channel
*dst
,
3581 const union tgsi_double_channel
*src
);
3584 fetch_double_channel(struct tgsi_exec_machine
*mach
,
3585 union tgsi_double_channel
*chan
,
3586 const struct tgsi_full_src_register
*reg
,
3590 union tgsi_exec_channel src
[2];
3593 fetch_source_d(mach
, &src
[0], reg
, chan_0
);
3594 fetch_source_d(mach
, &src
[1], reg
, chan_1
);
3596 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
3597 chan
->u
[i
][0] = src
[0].u
[i
];
3598 chan
->u
[i
][1] = src
[1].u
[i
];
3600 if (reg
->Register
.Absolute
) {
3601 micro_dabs(chan
, chan
);
3603 if (reg
->Register
.Negate
) {
3604 micro_dneg(chan
, chan
);
3609 store_double_channel(struct tgsi_exec_machine
*mach
,
3610 const union tgsi_double_channel
*chan
,
3611 const struct tgsi_full_dst_register
*reg
,
3612 const struct tgsi_full_instruction
*inst
,
3616 union tgsi_exec_channel dst
[2];
3618 union tgsi_double_channel temp
;
3619 const uint execmask
= mach
->ExecMask
;
3621 if (!inst
->Instruction
.Saturate
) {
3622 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
3623 if (execmask
& (1 << i
)) {
3624 dst
[0].u
[i
] = chan
->u
[i
][0];
3625 dst
[1].u
[i
] = chan
->u
[i
][1];
3629 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
3630 if (execmask
& (1 << i
)) {
3631 if (chan
->d
[i
] < 0.0)
3633 else if (chan
->d
[i
] > 1.0)
3636 temp
.d
[i
] = chan
->d
[i
];
3638 dst
[0].u
[i
] = temp
.u
[i
][0];
3639 dst
[1].u
[i
] = temp
.u
[i
][1];
3643 store_dest_double(mach
, &dst
[0], reg
, chan_0
, TGSI_EXEC_DATA_UINT
);
3644 if (chan_1
!= (unsigned)-1)
3645 store_dest_double(mach
, &dst
[1], reg
, chan_1
, TGSI_EXEC_DATA_UINT
);
3649 exec_double_unary(struct tgsi_exec_machine
*mach
,
3650 const struct tgsi_full_instruction
*inst
,
3653 union tgsi_double_channel src
;
3654 union tgsi_double_channel dst
;
3656 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) == TGSI_WRITEMASK_XY
) {
3657 fetch_double_channel(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3659 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
3661 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_ZW
) == TGSI_WRITEMASK_ZW
) {
3662 fetch_double_channel(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3664 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
3669 exec_double_binary(struct tgsi_exec_machine
*mach
,
3670 const struct tgsi_full_instruction
*inst
,
3672 enum tgsi_exec_datatype dst_datatype
)
3674 union tgsi_double_channel src
[2];
3675 union tgsi_double_channel dst
;
3676 int first_dest_chan
, second_dest_chan
;
3679 wmask
= inst
->Dst
[0].Register
.WriteMask
;
3680 /* these are & because of the way DSLT etc store their destinations */
3681 if (wmask
& TGSI_WRITEMASK_XY
) {
3682 first_dest_chan
= TGSI_CHAN_X
;
3683 second_dest_chan
= TGSI_CHAN_Y
;
3684 if (dst_datatype
== TGSI_EXEC_DATA_UINT
) {
3685 first_dest_chan
= (wmask
& TGSI_WRITEMASK_X
) ? TGSI_CHAN_X
: TGSI_CHAN_Y
;
3686 second_dest_chan
= -1;
3689 fetch_double_channel(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3690 fetch_double_channel(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3692 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, first_dest_chan
, second_dest_chan
);
3695 if (wmask
& TGSI_WRITEMASK_ZW
) {
3696 first_dest_chan
= TGSI_CHAN_Z
;
3697 second_dest_chan
= TGSI_CHAN_W
;
3698 if (dst_datatype
== TGSI_EXEC_DATA_UINT
) {
3699 first_dest_chan
= (wmask
& TGSI_WRITEMASK_Z
) ? TGSI_CHAN_Z
: TGSI_CHAN_W
;
3700 second_dest_chan
= -1;
3703 fetch_double_channel(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3704 fetch_double_channel(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3706 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, first_dest_chan
, second_dest_chan
);
3711 exec_double_trinary(struct tgsi_exec_machine
*mach
,
3712 const struct tgsi_full_instruction
*inst
,
3715 union tgsi_double_channel src
[3];
3716 union tgsi_double_channel dst
;
3718 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) == TGSI_WRITEMASK_XY
) {
3719 fetch_double_channel(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3720 fetch_double_channel(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3721 fetch_double_channel(mach
, &src
[2], &inst
->Src
[2], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3723 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
3725 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_ZW
) == TGSI_WRITEMASK_ZW
) {
3726 fetch_double_channel(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3727 fetch_double_channel(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3728 fetch_double_channel(mach
, &src
[2], &inst
->Src
[2], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3730 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
3735 exec_dldexp(struct tgsi_exec_machine
*mach
,
3736 const struct tgsi_full_instruction
*inst
)
3738 union tgsi_double_channel src0
;
3739 union tgsi_exec_channel src1
;
3740 union tgsi_double_channel dst
;
3743 wmask
= inst
->Dst
[0].Register
.WriteMask
;
3744 if (wmask
& TGSI_WRITEMASK_XY
) {
3745 fetch_double_channel(mach
, &src0
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3746 fetch_source(mach
, &src1
, &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_INT
);
3747 micro_dldexp(&dst
, &src0
, &src1
);
3748 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
3751 if (wmask
& TGSI_WRITEMASK_ZW
) {
3752 fetch_double_channel(mach
, &src0
, &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3753 fetch_source(mach
, &src1
, &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_EXEC_DATA_INT
);
3754 micro_dldexp(&dst
, &src0
, &src1
);
3755 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
3760 exec_dfracexp(struct tgsi_exec_machine
*mach
,
3761 const struct tgsi_full_instruction
*inst
)
3763 union tgsi_double_channel src
;
3764 union tgsi_double_channel dst
;
3765 union tgsi_exec_channel dst_exp
;
3767 fetch_double_channel(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3768 micro_dfracexp(&dst
, &dst_exp
, &src
);
3769 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) == TGSI_WRITEMASK_XY
)
3770 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
3771 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_ZW
) == TGSI_WRITEMASK_ZW
)
3772 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
3773 for (unsigned chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3774 if (inst
->Dst
[1].Register
.WriteMask
& (1 << chan
))
3775 store_dest(mach
, &dst_exp
, &inst
->Dst
[1], inst
, chan
, TGSI_EXEC_DATA_INT
);
3780 exec_arg0_64_arg1_32(struct tgsi_exec_machine
*mach
,
3781 const struct tgsi_full_instruction
*inst
,
3784 union tgsi_double_channel src0
;
3785 union tgsi_exec_channel src1
;
3786 union tgsi_double_channel dst
;
3789 wmask
= inst
->Dst
[0].Register
.WriteMask
;
3790 if (wmask
& TGSI_WRITEMASK_XY
) {
3791 fetch_double_channel(mach
, &src0
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
3792 fetch_source(mach
, &src1
, &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_INT
);
3793 op(&dst
, &src0
, &src1
);
3794 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
3797 if (wmask
& TGSI_WRITEMASK_ZW
) {
3798 fetch_double_channel(mach
, &src0
, &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
3799 fetch_source(mach
, &src1
, &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_EXEC_DATA_INT
);
3800 op(&dst
, &src0
, &src1
);
3801 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
3806 get_image_coord_dim(unsigned tgsi_tex
)
3810 case TGSI_TEXTURE_BUFFER
:
3811 case TGSI_TEXTURE_1D
:
3814 case TGSI_TEXTURE_2D
:
3815 case TGSI_TEXTURE_RECT
:
3816 case TGSI_TEXTURE_1D_ARRAY
:
3817 case TGSI_TEXTURE_2D_MSAA
:
3820 case TGSI_TEXTURE_3D
:
3821 case TGSI_TEXTURE_CUBE
:
3822 case TGSI_TEXTURE_2D_ARRAY
:
3823 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
3824 case TGSI_TEXTURE_CUBE_ARRAY
:
3828 assert(!"unknown texture target");
3837 get_image_coord_sample(unsigned tgsi_tex
)
3841 case TGSI_TEXTURE_2D_MSAA
:
3844 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
3854 exec_load_img(struct tgsi_exec_machine
*mach
,
3855 const struct tgsi_full_instruction
*inst
)
3857 union tgsi_exec_channel r
[4], sample_r
;
3863 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
3864 struct tgsi_image_params params
;
3865 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
3867 unit
= fetch_sampler_unit(mach
, inst
, 0);
3868 dim
= get_image_coord_dim(inst
->Memory
.Texture
);
3869 sample
= get_image_coord_sample(inst
->Memory
.Texture
);
3872 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
3874 params
.tgsi_tex_instr
= inst
->Memory
.Texture
;
3875 params
.format
= inst
->Memory
.Format
;
3877 for (i
= 0; i
< dim
; i
++) {
3878 IFETCH(&r
[i
], 1, TGSI_CHAN_X
+ i
);
3882 IFETCH(&sample_r
, 1, TGSI_CHAN_X
+ sample
);
3884 mach
->Image
->load(mach
->Image
, ¶ms
,
3885 r
[0].i
, r
[1].i
, r
[2].i
, sample_r
.i
,
3887 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
3888 r
[0].f
[j
] = rgba
[0][j
];
3889 r
[1].f
[j
] = rgba
[1][j
];
3890 r
[2].f
[j
] = rgba
[2][j
];
3891 r
[3].f
[j
] = rgba
[3][j
];
3893 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3894 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3895 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3901 exec_load_buf(struct tgsi_exec_machine
*mach
,
3902 const struct tgsi_full_instruction
*inst
)
3904 union tgsi_exec_channel r
[4];
3908 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
3909 struct tgsi_buffer_params params
;
3910 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
3912 unit
= fetch_sampler_unit(mach
, inst
, 0);
3914 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
3916 IFETCH(&r
[0], 1, TGSI_CHAN_X
);
3918 mach
->Buffer
->load(mach
->Buffer
, ¶ms
,
3920 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
3921 r
[0].f
[j
] = rgba
[0][j
];
3922 r
[1].f
[j
] = rgba
[1][j
];
3923 r
[2].f
[j
] = rgba
[2][j
];
3924 r
[3].f
[j
] = rgba
[3][j
];
3926 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3927 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3928 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3934 exec_load_mem(struct tgsi_exec_machine
*mach
,
3935 const struct tgsi_full_instruction
*inst
)
3937 union tgsi_exec_channel r
[4];
3939 char *ptr
= mach
->LocalMem
;
3943 IFETCH(&r
[0], 1, TGSI_CHAN_X
);
3944 if (r
[0].u
[0] >= mach
->LocalMemSize
)
3950 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
3951 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3952 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3953 memcpy(&r
[chan
].u
[j
], ptr
+ (4 * chan
), 4);
3958 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
3959 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
3960 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
3966 exec_load(struct tgsi_exec_machine
*mach
,
3967 const struct tgsi_full_instruction
*inst
)
3969 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
)
3970 exec_load_img(mach
, inst
);
3971 else if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
)
3972 exec_load_buf(mach
, inst
);
3973 else if (inst
->Src
[0].Register
.File
== TGSI_FILE_MEMORY
)
3974 exec_load_mem(mach
, inst
);
3978 fetch_store_img_unit(struct tgsi_exec_machine
*mach
,
3979 const struct tgsi_full_dst_register
*dst
)
3983 if (dst
->Register
.Indirect
) {
3984 union tgsi_exec_channel indir_index
, index2
;
3985 const uint execmask
= mach
->ExecMask
;
3989 index2
.i
[3] = dst
->Indirect
.Index
;
3991 fetch_src_file_channel(mach
,
3993 dst
->Indirect
.Swizzle
,
3997 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
3998 if (execmask
& (1 << i
)) {
3999 unit
= dst
->Register
.Index
+ indir_index
.i
[i
];
4004 unit
= dst
->Register
.Index
;
4010 exec_store_img(struct tgsi_exec_machine
*mach
,
4011 const struct tgsi_full_instruction
*inst
)
4013 union tgsi_exec_channel r
[3], sample_r
;
4014 union tgsi_exec_channel value
[4];
4015 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4016 struct tgsi_image_params params
;
4021 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4022 unit
= fetch_store_img_unit(mach
, &inst
->Dst
[0]);
4023 dim
= get_image_coord_dim(inst
->Memory
.Texture
);
4024 sample
= get_image_coord_sample(inst
->Memory
.Texture
);
4027 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4029 params
.tgsi_tex_instr
= inst
->Memory
.Texture
;
4030 params
.format
= inst
->Memory
.Format
;
4032 for (i
= 0; i
< dim
; i
++) {
4033 IFETCH(&r
[i
], 0, TGSI_CHAN_X
+ i
);
4036 for (i
= 0; i
< 4; i
++) {
4037 FETCH(&value
[i
], 1, TGSI_CHAN_X
+ i
);
4040 IFETCH(&sample_r
, 0, TGSI_CHAN_X
+ sample
);
4042 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4043 rgba
[0][j
] = value
[0].f
[j
];
4044 rgba
[1][j
] = value
[1].f
[j
];
4045 rgba
[2][j
] = value
[2].f
[j
];
4046 rgba
[3][j
] = value
[3].f
[j
];
4049 mach
->Image
->store(mach
->Image
, ¶ms
,
4050 r
[0].i
, r
[1].i
, r
[2].i
, sample_r
.i
,
4055 exec_store_buf(struct tgsi_exec_machine
*mach
,
4056 const struct tgsi_full_instruction
*inst
)
4058 union tgsi_exec_channel r
[3];
4059 union tgsi_exec_channel value
[4];
4060 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4061 struct tgsi_buffer_params params
;
4064 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4066 unit
= fetch_store_img_unit(mach
, &inst
->Dst
[0]);
4068 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4070 params
.writemask
= inst
->Dst
[0].Register
.WriteMask
;
4072 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
4073 for (i
= 0; i
< 4; i
++) {
4074 FETCH(&value
[i
], 1, TGSI_CHAN_X
+ i
);
4077 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4078 rgba
[0][j
] = value
[0].f
[j
];
4079 rgba
[1][j
] = value
[1].f
[j
];
4080 rgba
[2][j
] = value
[2].f
[j
];
4081 rgba
[3][j
] = value
[3].f
[j
];
4084 mach
->Buffer
->store(mach
->Buffer
, ¶ms
,
4090 exec_store_mem(struct tgsi_exec_machine
*mach
,
4091 const struct tgsi_full_instruction
*inst
)
4093 union tgsi_exec_channel r
[3];
4094 union tgsi_exec_channel value
[4];
4096 char *ptr
= mach
->LocalMem
;
4097 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4098 int execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4100 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
4102 for (i
= 0; i
< 4; i
++) {
4103 FETCH(&value
[i
], 1, TGSI_CHAN_X
+ i
);
4106 if (r
[0].u
[0] >= mach
->LocalMemSize
)
4110 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
4111 if (execmask
& (1 << i
)) {
4112 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4113 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4114 memcpy(ptr
+ (chan
* 4), &value
[chan
].u
[0], 4);
4122 exec_store(struct tgsi_exec_machine
*mach
,
4123 const struct tgsi_full_instruction
*inst
)
4125 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_IMAGE
)
4126 exec_store_img(mach
, inst
);
4127 else if (inst
->Dst
[0].Register
.File
== TGSI_FILE_BUFFER
)
4128 exec_store_buf(mach
, inst
);
4129 else if (inst
->Dst
[0].Register
.File
== TGSI_FILE_MEMORY
)
4130 exec_store_mem(mach
, inst
);
4134 exec_atomop_img(struct tgsi_exec_machine
*mach
,
4135 const struct tgsi_full_instruction
*inst
)
4137 union tgsi_exec_channel r
[4], sample_r
;
4138 union tgsi_exec_channel value
[4], value2
[4];
4139 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4140 float rgba2
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4141 struct tgsi_image_params params
;
4146 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4147 unit
= fetch_sampler_unit(mach
, inst
, 0);
4148 dim
= get_image_coord_dim(inst
->Memory
.Texture
);
4149 sample
= get_image_coord_sample(inst
->Memory
.Texture
);
4152 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4154 params
.tgsi_tex_instr
= inst
->Memory
.Texture
;
4155 params
.format
= inst
->Memory
.Format
;
4157 for (i
= 0; i
< dim
; i
++) {
4158 IFETCH(&r
[i
], 1, TGSI_CHAN_X
+ i
);
4161 for (i
= 0; i
< 4; i
++) {
4162 FETCH(&value
[i
], 2, TGSI_CHAN_X
+ i
);
4163 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ATOMCAS
)
4164 FETCH(&value2
[i
], 3, TGSI_CHAN_X
+ i
);
4167 IFETCH(&sample_r
, 1, TGSI_CHAN_X
+ sample
);
4169 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4170 rgba
[0][j
] = value
[0].f
[j
];
4171 rgba
[1][j
] = value
[1].f
[j
];
4172 rgba
[2][j
] = value
[2].f
[j
];
4173 rgba
[3][j
] = value
[3].f
[j
];
4175 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ATOMCAS
) {
4176 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4177 rgba2
[0][j
] = value2
[0].f
[j
];
4178 rgba2
[1][j
] = value2
[1].f
[j
];
4179 rgba2
[2][j
] = value2
[2].f
[j
];
4180 rgba2
[3][j
] = value2
[3].f
[j
];
4184 mach
->Image
->op(mach
->Image
, ¶ms
, inst
->Instruction
.Opcode
,
4185 r
[0].i
, r
[1].i
, r
[2].i
, sample_r
.i
,
4188 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4189 r
[0].f
[j
] = rgba
[0][j
];
4190 r
[1].f
[j
] = rgba
[1][j
];
4191 r
[2].f
[j
] = rgba
[2][j
];
4192 r
[3].f
[j
] = rgba
[3][j
];
4194 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4195 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4196 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
4202 exec_atomop_buf(struct tgsi_exec_machine
*mach
,
4203 const struct tgsi_full_instruction
*inst
)
4205 union tgsi_exec_channel r
[4];
4206 union tgsi_exec_channel value
[4], value2
[4];
4207 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4208 float rgba2
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
4209 struct tgsi_buffer_params params
;
4212 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4214 unit
= fetch_sampler_unit(mach
, inst
, 0);
4216 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4218 params
.writemask
= inst
->Dst
[0].Register
.WriteMask
;
4220 IFETCH(&r
[0], 1, TGSI_CHAN_X
);
4222 for (i
= 0; i
< 4; i
++) {
4223 FETCH(&value
[i
], 2, TGSI_CHAN_X
+ i
);
4224 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ATOMCAS
)
4225 FETCH(&value2
[i
], 3, TGSI_CHAN_X
+ i
);
4228 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4229 rgba
[0][j
] = value
[0].f
[j
];
4230 rgba
[1][j
] = value
[1].f
[j
];
4231 rgba
[2][j
] = value
[2].f
[j
];
4232 rgba
[3][j
] = value
[3].f
[j
];
4234 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ATOMCAS
) {
4235 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4236 rgba2
[0][j
] = value2
[0].f
[j
];
4237 rgba2
[1][j
] = value2
[1].f
[j
];
4238 rgba2
[2][j
] = value2
[2].f
[j
];
4239 rgba2
[3][j
] = value2
[3].f
[j
];
4243 mach
->Buffer
->op(mach
->Buffer
, ¶ms
, inst
->Instruction
.Opcode
,
4247 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
4248 r
[0].f
[j
] = rgba
[0][j
];
4249 r
[1].f
[j
] = rgba
[1][j
];
4250 r
[2].f
[j
] = rgba
[2][j
];
4251 r
[3].f
[j
] = rgba
[3][j
];
4253 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4254 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4255 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
4261 exec_atomop_mem(struct tgsi_exec_machine
*mach
,
4262 const struct tgsi_full_instruction
*inst
)
4264 union tgsi_exec_channel r
[4];
4265 union tgsi_exec_channel value
[4], value2
[4];
4266 char *ptr
= mach
->LocalMem
;
4270 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4271 int execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4272 IFETCH(&r
[0], 1, TGSI_CHAN_X
);
4274 if (r
[0].u
[0] >= mach
->LocalMemSize
)
4279 for (i
= 0; i
< 4; i
++) {
4280 FETCH(&value
[i
], 2, TGSI_CHAN_X
+ i
);
4281 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ATOMCAS
)
4282 FETCH(&value2
[i
], 3, TGSI_CHAN_X
+ i
);
4285 memcpy(&r
[0].u
[0], ptr
, 4);
4287 switch (inst
->Instruction
.Opcode
) {
4288 case TGSI_OPCODE_ATOMUADD
:
4289 val
+= value
[0].u
[0];
4291 case TGSI_OPCODE_ATOMXOR
:
4292 val
^= value
[0].u
[0];
4294 case TGSI_OPCODE_ATOMOR
:
4295 val
|= value
[0].u
[0];
4297 case TGSI_OPCODE_ATOMAND
:
4298 val
&= value
[0].u
[0];
4300 case TGSI_OPCODE_ATOMUMIN
:
4301 val
= MIN2(val
, value
[0].u
[0]);
4303 case TGSI_OPCODE_ATOMUMAX
:
4304 val
= MAX2(val
, value
[0].u
[0]);
4306 case TGSI_OPCODE_ATOMIMIN
:
4307 val
= MIN2(r
[0].i
[0], value
[0].i
[0]);
4309 case TGSI_OPCODE_ATOMIMAX
:
4310 val
= MAX2(r
[0].i
[0], value
[0].i
[0]);
4312 case TGSI_OPCODE_ATOMXCHG
:
4313 val
= value
[0].i
[0];
4315 case TGSI_OPCODE_ATOMCAS
:
4316 if (val
== value
[0].u
[0])
4317 val
= value2
[0].u
[0];
4319 case TGSI_OPCODE_ATOMFADD
:
4320 val
= fui(r
[0].f
[0] + value
[0].f
[0]);
4325 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
4326 if (execmask
& (1 << i
))
4327 memcpy(ptr
, &val
, 4);
4329 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4330 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4331 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
4337 exec_atomop(struct tgsi_exec_machine
*mach
,
4338 const struct tgsi_full_instruction
*inst
)
4340 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
)
4341 exec_atomop_img(mach
, inst
);
4342 else if (inst
->Src
[0].Register
.File
== TGSI_FILE_BUFFER
)
4343 exec_atomop_buf(mach
, inst
);
4344 else if (inst
->Src
[0].Register
.File
== TGSI_FILE_MEMORY
)
4345 exec_atomop_mem(mach
, inst
);
4349 exec_resq_img(struct tgsi_exec_machine
*mach
,
4350 const struct tgsi_full_instruction
*inst
)
4353 union tgsi_exec_channel r
[4];
4356 struct tgsi_image_params params
;
4357 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4359 unit
= fetch_sampler_unit(mach
, inst
, 0);
4361 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4363 params
.tgsi_tex_instr
= inst
->Memory
.Texture
;
4364 params
.format
= inst
->Memory
.Format
;
4366 mach
->Image
->get_dims(mach
->Image
, ¶ms
, result
);
4368 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
4369 for (j
= 0; j
< 4; j
++) {
4370 r
[j
].i
[i
] = result
[j
];
4374 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4375 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4376 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
,
4377 TGSI_EXEC_DATA_INT
);
4383 exec_resq_buf(struct tgsi_exec_machine
*mach
,
4384 const struct tgsi_full_instruction
*inst
)
4387 union tgsi_exec_channel r
[4];
4390 struct tgsi_buffer_params params
;
4391 int kilmask
= mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];
4393 unit
= fetch_sampler_unit(mach
, inst
, 0);
4395 params
.execmask
= mach
->ExecMask
& mach
->NonHelperMask
& ~kilmask
;
4398 mach
->Buffer
->get_dims(mach
->Buffer
, ¶ms
, &result
);
4400 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
4404 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
4405 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
4406 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
,
4407 TGSI_EXEC_DATA_INT
);
4413 exec_resq(struct tgsi_exec_machine
*mach
,
4414 const struct tgsi_full_instruction
*inst
)
4416 if (inst
->Src
[0].Register
.File
== TGSI_FILE_IMAGE
)
4417 exec_resq_img(mach
, inst
);
4419 exec_resq_buf(mach
, inst
);
4423 micro_f2u64(union tgsi_double_channel
*dst
,
4424 const union tgsi_exec_channel
*src
)
4426 dst
->u64
[0] = (uint64_t)src
->f
[0];
4427 dst
->u64
[1] = (uint64_t)src
->f
[1];
4428 dst
->u64
[2] = (uint64_t)src
->f
[2];
4429 dst
->u64
[3] = (uint64_t)src
->f
[3];
4433 micro_f2i64(union tgsi_double_channel
*dst
,
4434 const union tgsi_exec_channel
*src
)
4436 dst
->i64
[0] = (int64_t)src
->f
[0];
4437 dst
->i64
[1] = (int64_t)src
->f
[1];
4438 dst
->i64
[2] = (int64_t)src
->f
[2];
4439 dst
->i64
[3] = (int64_t)src
->f
[3];
4443 micro_u2i64(union tgsi_double_channel
*dst
,
4444 const union tgsi_exec_channel
*src
)
4446 dst
->u64
[0] = (uint64_t)src
->u
[0];
4447 dst
->u64
[1] = (uint64_t)src
->u
[1];
4448 dst
->u64
[2] = (uint64_t)src
->u
[2];
4449 dst
->u64
[3] = (uint64_t)src
->u
[3];
4453 micro_i2i64(union tgsi_double_channel
*dst
,
4454 const union tgsi_exec_channel
*src
)
4456 dst
->i64
[0] = (int64_t)src
->i
[0];
4457 dst
->i64
[1] = (int64_t)src
->i
[1];
4458 dst
->i64
[2] = (int64_t)src
->i
[2];
4459 dst
->i64
[3] = (int64_t)src
->i
[3];
4463 micro_d2u64(union tgsi_double_channel
*dst
,
4464 const union tgsi_double_channel
*src
)
4466 dst
->u64
[0] = (uint64_t)src
->d
[0];
4467 dst
->u64
[1] = (uint64_t)src
->d
[1];
4468 dst
->u64
[2] = (uint64_t)src
->d
[2];
4469 dst
->u64
[3] = (uint64_t)src
->d
[3];
4473 micro_d2i64(union tgsi_double_channel
*dst
,
4474 const union tgsi_double_channel
*src
)
4476 dst
->i64
[0] = (int64_t)src
->d
[0];
4477 dst
->i64
[1] = (int64_t)src
->d
[1];
4478 dst
->i64
[2] = (int64_t)src
->d
[2];
4479 dst
->i64
[3] = (int64_t)src
->d
[3];
4483 micro_u642d(union tgsi_double_channel
*dst
,
4484 const union tgsi_double_channel
*src
)
4486 dst
->d
[0] = (double)src
->u64
[0];
4487 dst
->d
[1] = (double)src
->u64
[1];
4488 dst
->d
[2] = (double)src
->u64
[2];
4489 dst
->d
[3] = (double)src
->u64
[3];
4493 micro_i642d(union tgsi_double_channel
*dst
,
4494 const union tgsi_double_channel
*src
)
4496 dst
->d
[0] = (double)src
->i64
[0];
4497 dst
->d
[1] = (double)src
->i64
[1];
4498 dst
->d
[2] = (double)src
->i64
[2];
4499 dst
->d
[3] = (double)src
->i64
[3];
4503 micro_u642f(union tgsi_exec_channel
*dst
,
4504 const union tgsi_double_channel
*src
)
4506 dst
->f
[0] = (float)src
->u64
[0];
4507 dst
->f
[1] = (float)src
->u64
[1];
4508 dst
->f
[2] = (float)src
->u64
[2];
4509 dst
->f
[3] = (float)src
->u64
[3];
4513 micro_i642f(union tgsi_exec_channel
*dst
,
4514 const union tgsi_double_channel
*src
)
4516 dst
->f
[0] = (float)src
->i64
[0];
4517 dst
->f
[1] = (float)src
->i64
[1];
4518 dst
->f
[2] = (float)src
->i64
[2];
4519 dst
->f
[3] = (float)src
->i64
[3];
4523 exec_t_2_64(struct tgsi_exec_machine
*mach
,
4524 const struct tgsi_full_instruction
*inst
,
4526 enum tgsi_exec_datatype src_datatype
)
4528 union tgsi_exec_channel src
;
4529 union tgsi_double_channel dst
;
4531 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) == TGSI_WRITEMASK_XY
) {
4532 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, src_datatype
);
4534 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_CHAN_Y
);
4536 if ((inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_ZW
) == TGSI_WRITEMASK_ZW
) {
4537 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_Y
, src_datatype
);
4539 store_double_channel(mach
, &dst
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_CHAN_W
);
4544 exec_64_2_t(struct tgsi_exec_machine
*mach
,
4545 const struct tgsi_full_instruction
*inst
,
4547 enum tgsi_exec_datatype dst_datatype
)
4549 union tgsi_double_channel src
;
4550 union tgsi_exec_channel dst
;
4551 int wm
= inst
->Dst
[0].Register
.WriteMask
;
4554 for (i
= 0; i
< 2; i
++) {
4557 wm
&= ~(1 << (bit
- 1));
4559 fetch_double_channel(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_CHAN_Y
);
4561 fetch_double_channel(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_CHAN_W
);
4563 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, bit
- 1, dst_datatype
);
4569 micro_i2f(union tgsi_exec_channel
*dst
,
4570 const union tgsi_exec_channel
*src
)
4572 dst
->f
[0] = (float)src
->i
[0];
4573 dst
->f
[1] = (float)src
->i
[1];
4574 dst
->f
[2] = (float)src
->i
[2];
4575 dst
->f
[3] = (float)src
->i
[3];
4579 micro_not(union tgsi_exec_channel
*dst
,
4580 const union tgsi_exec_channel
*src
)
4582 dst
->u
[0] = ~src
->u
[0];
4583 dst
->u
[1] = ~src
->u
[1];
4584 dst
->u
[2] = ~src
->u
[2];
4585 dst
->u
[3] = ~src
->u
[3];
4589 micro_shl(union tgsi_exec_channel
*dst
,
4590 const union tgsi_exec_channel
*src0
,
4591 const union tgsi_exec_channel
*src1
)
4593 unsigned masked_count
;
4594 masked_count
= src1
->u
[0] & 0x1f;
4595 dst
->u
[0] = src0
->u
[0] << masked_count
;
4596 masked_count
= src1
->u
[1] & 0x1f;
4597 dst
->u
[1] = src0
->u
[1] << masked_count
;
4598 masked_count
= src1
->u
[2] & 0x1f;
4599 dst
->u
[2] = src0
->u
[2] << masked_count
;
4600 masked_count
= src1
->u
[3] & 0x1f;
4601 dst
->u
[3] = src0
->u
[3] << masked_count
;
4605 micro_and(union tgsi_exec_channel
*dst
,
4606 const union tgsi_exec_channel
*src0
,
4607 const union tgsi_exec_channel
*src1
)
4609 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
4610 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
4611 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
4612 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
4616 micro_or(union tgsi_exec_channel
*dst
,
4617 const union tgsi_exec_channel
*src0
,
4618 const union tgsi_exec_channel
*src1
)
4620 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
4621 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
4622 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
4623 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
4627 micro_xor(union tgsi_exec_channel
*dst
,
4628 const union tgsi_exec_channel
*src0
,
4629 const union tgsi_exec_channel
*src1
)
4631 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
4632 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
4633 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
4634 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
4638 micro_mod(union tgsi_exec_channel
*dst
,
4639 const union tgsi_exec_channel
*src0
,
4640 const union tgsi_exec_channel
*src1
)
4642 dst
->i
[0] = src1
->i
[0] ? src0
->i
[0] % src1
->i
[0] : ~0;
4643 dst
->i
[1] = src1
->i
[1] ? src0
->i
[1] % src1
->i
[1] : ~0;
4644 dst
->i
[2] = src1
->i
[2] ? src0
->i
[2] % src1
->i
[2] : ~0;
4645 dst
->i
[3] = src1
->i
[3] ? src0
->i
[3] % src1
->i
[3] : ~0;
4649 micro_f2i(union tgsi_exec_channel
*dst
,
4650 const union tgsi_exec_channel
*src
)
4652 dst
->i
[0] = (int)src
->f
[0];
4653 dst
->i
[1] = (int)src
->f
[1];
4654 dst
->i
[2] = (int)src
->f
[2];
4655 dst
->i
[3] = (int)src
->f
[3];
4659 micro_fseq(union tgsi_exec_channel
*dst
,
4660 const union tgsi_exec_channel
*src0
,
4661 const union tgsi_exec_channel
*src1
)
4663 dst
->u
[0] = src0
->f
[0] == src1
->f
[0] ? ~0 : 0;
4664 dst
->u
[1] = src0
->f
[1] == src1
->f
[1] ? ~0 : 0;
4665 dst
->u
[2] = src0
->f
[2] == src1
->f
[2] ? ~0 : 0;
4666 dst
->u
[3] = src0
->f
[3] == src1
->f
[3] ? ~0 : 0;
4670 micro_fsge(union tgsi_exec_channel
*dst
,
4671 const union tgsi_exec_channel
*src0
,
4672 const union tgsi_exec_channel
*src1
)
4674 dst
->u
[0] = src0
->f
[0] >= src1
->f
[0] ? ~0 : 0;
4675 dst
->u
[1] = src0
->f
[1] >= src1
->f
[1] ? ~0 : 0;
4676 dst
->u
[2] = src0
->f
[2] >= src1
->f
[2] ? ~0 : 0;
4677 dst
->u
[3] = src0
->f
[3] >= src1
->f
[3] ? ~0 : 0;
4681 micro_fslt(union tgsi_exec_channel
*dst
,
4682 const union tgsi_exec_channel
*src0
,
4683 const union tgsi_exec_channel
*src1
)
4685 dst
->u
[0] = src0
->f
[0] < src1
->f
[0] ? ~0 : 0;
4686 dst
->u
[1] = src0
->f
[1] < src1
->f
[1] ? ~0 : 0;
4687 dst
->u
[2] = src0
->f
[2] < src1
->f
[2] ? ~0 : 0;
4688 dst
->u
[3] = src0
->f
[3] < src1
->f
[3] ? ~0 : 0;
4692 micro_fsne(union tgsi_exec_channel
*dst
,
4693 const union tgsi_exec_channel
*src0
,
4694 const union tgsi_exec_channel
*src1
)
4696 dst
->u
[0] = src0
->f
[0] != src1
->f
[0] ? ~0 : 0;
4697 dst
->u
[1] = src0
->f
[1] != src1
->f
[1] ? ~0 : 0;
4698 dst
->u
[2] = src0
->f
[2] != src1
->f
[2] ? ~0 : 0;
4699 dst
->u
[3] = src0
->f
[3] != src1
->f
[3] ? ~0 : 0;
4703 micro_idiv(union tgsi_exec_channel
*dst
,
4704 const union tgsi_exec_channel
*src0
,
4705 const union tgsi_exec_channel
*src1
)
4707 dst
->i
[0] = src1
->i
[0] ? src0
->i
[0] / src1
->i
[0] : 0;
4708 dst
->i
[1] = src1
->i
[1] ? src0
->i
[1] / src1
->i
[1] : 0;
4709 dst
->i
[2] = src1
->i
[2] ? src0
->i
[2] / src1
->i
[2] : 0;
4710 dst
->i
[3] = src1
->i
[3] ? src0
->i
[3] / src1
->i
[3] : 0;
4714 micro_imax(union tgsi_exec_channel
*dst
,
4715 const union tgsi_exec_channel
*src0
,
4716 const union tgsi_exec_channel
*src1
)
4718 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
4719 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
4720 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
4721 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
4725 micro_imin(union tgsi_exec_channel
*dst
,
4726 const union tgsi_exec_channel
*src0
,
4727 const union tgsi_exec_channel
*src1
)
4729 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
4730 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
4731 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
4732 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
4736 micro_isge(union tgsi_exec_channel
*dst
,
4737 const union tgsi_exec_channel
*src0
,
4738 const union tgsi_exec_channel
*src1
)
4740 dst
->i
[0] = src0
->i
[0] >= src1
->i
[0] ? -1 : 0;
4741 dst
->i
[1] = src0
->i
[1] >= src1
->i
[1] ? -1 : 0;
4742 dst
->i
[2] = src0
->i
[2] >= src1
->i
[2] ? -1 : 0;
4743 dst
->i
[3] = src0
->i
[3] >= src1
->i
[3] ? -1 : 0;
4747 micro_ishr(union tgsi_exec_channel
*dst
,
4748 const union tgsi_exec_channel
*src0
,
4749 const union tgsi_exec_channel
*src1
)
4751 unsigned masked_count
;
4752 masked_count
= src1
->i
[0] & 0x1f;
4753 dst
->i
[0] = src0
->i
[0] >> masked_count
;
4754 masked_count
= src1
->i
[1] & 0x1f;
4755 dst
->i
[1] = src0
->i
[1] >> masked_count
;
4756 masked_count
= src1
->i
[2] & 0x1f;
4757 dst
->i
[2] = src0
->i
[2] >> masked_count
;
4758 masked_count
= src1
->i
[3] & 0x1f;
4759 dst
->i
[3] = src0
->i
[3] >> masked_count
;
4763 micro_islt(union tgsi_exec_channel
*dst
,
4764 const union tgsi_exec_channel
*src0
,
4765 const union tgsi_exec_channel
*src1
)
4767 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? -1 : 0;
4768 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? -1 : 0;
4769 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? -1 : 0;
4770 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? -1 : 0;
4774 micro_f2u(union tgsi_exec_channel
*dst
,
4775 const union tgsi_exec_channel
*src
)
4777 dst
->u
[0] = (uint
)src
->f
[0];
4778 dst
->u
[1] = (uint
)src
->f
[1];
4779 dst
->u
[2] = (uint
)src
->f
[2];
4780 dst
->u
[3] = (uint
)src
->f
[3];
4784 micro_u2f(union tgsi_exec_channel
*dst
,
4785 const union tgsi_exec_channel
*src
)
4787 dst
->f
[0] = (float)src
->u
[0];
4788 dst
->f
[1] = (float)src
->u
[1];
4789 dst
->f
[2] = (float)src
->u
[2];
4790 dst
->f
[3] = (float)src
->u
[3];
4794 micro_uadd(union tgsi_exec_channel
*dst
,
4795 const union tgsi_exec_channel
*src0
,
4796 const union tgsi_exec_channel
*src1
)
4798 dst
->u
[0] = src0
->u
[0] + src1
->u
[0];
4799 dst
->u
[1] = src0
->u
[1] + src1
->u
[1];
4800 dst
->u
[2] = src0
->u
[2] + src1
->u
[2];
4801 dst
->u
[3] = src0
->u
[3] + src1
->u
[3];
4805 micro_udiv(union tgsi_exec_channel
*dst
,
4806 const union tgsi_exec_channel
*src0
,
4807 const union tgsi_exec_channel
*src1
)
4809 dst
->u
[0] = src1
->u
[0] ? src0
->u
[0] / src1
->u
[0] : ~0u;
4810 dst
->u
[1] = src1
->u
[1] ? src0
->u
[1] / src1
->u
[1] : ~0u;
4811 dst
->u
[2] = src1
->u
[2] ? src0
->u
[2] / src1
->u
[2] : ~0u;
4812 dst
->u
[3] = src1
->u
[3] ? src0
->u
[3] / src1
->u
[3] : ~0u;
4816 micro_umad(union tgsi_exec_channel
*dst
,
4817 const union tgsi_exec_channel
*src0
,
4818 const union tgsi_exec_channel
*src1
,
4819 const union tgsi_exec_channel
*src2
)
4821 dst
->u
[0] = src0
->u
[0] * src1
->u
[0] + src2
->u
[0];
4822 dst
->u
[1] = src0
->u
[1] * src1
->u
[1] + src2
->u
[1];
4823 dst
->u
[2] = src0
->u
[2] * src1
->u
[2] + src2
->u
[2];
4824 dst
->u
[3] = src0
->u
[3] * src1
->u
[3] + src2
->u
[3];
4828 micro_umax(union tgsi_exec_channel
*dst
,
4829 const union tgsi_exec_channel
*src0
,
4830 const union tgsi_exec_channel
*src1
)
4832 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
4833 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
4834 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
4835 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
4839 micro_umin(union tgsi_exec_channel
*dst
,
4840 const union tgsi_exec_channel
*src0
,
4841 const union tgsi_exec_channel
*src1
)
4843 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
4844 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
4845 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
4846 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
4850 micro_umod(union tgsi_exec_channel
*dst
,
4851 const union tgsi_exec_channel
*src0
,
4852 const union tgsi_exec_channel
*src1
)
4854 dst
->u
[0] = src1
->u
[0] ? src0
->u
[0] % src1
->u
[0] : ~0u;
4855 dst
->u
[1] = src1
->u
[1] ? src0
->u
[1] % src1
->u
[1] : ~0u;
4856 dst
->u
[2] = src1
->u
[2] ? src0
->u
[2] % src1
->u
[2] : ~0u;
4857 dst
->u
[3] = src1
->u
[3] ? src0
->u
[3] % src1
->u
[3] : ~0u;
4861 micro_umul(union tgsi_exec_channel
*dst
,
4862 const union tgsi_exec_channel
*src0
,
4863 const union tgsi_exec_channel
*src1
)
4865 dst
->u
[0] = src0
->u
[0] * src1
->u
[0];
4866 dst
->u
[1] = src0
->u
[1] * src1
->u
[1];
4867 dst
->u
[2] = src0
->u
[2] * src1
->u
[2];
4868 dst
->u
[3] = src0
->u
[3] * src1
->u
[3];
4872 micro_imul_hi(union tgsi_exec_channel
*dst
,
4873 const union tgsi_exec_channel
*src0
,
4874 const union tgsi_exec_channel
*src1
)
4876 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4877 dst
->i
[0] = I64M(src0
->i
[0], src1
->i
[0]);
4878 dst
->i
[1] = I64M(src0
->i
[1], src1
->i
[1]);
4879 dst
->i
[2] = I64M(src0
->i
[2], src1
->i
[2]);
4880 dst
->i
[3] = I64M(src0
->i
[3], src1
->i
[3]);
4885 micro_umul_hi(union tgsi_exec_channel
*dst
,
4886 const union tgsi_exec_channel
*src0
,
4887 const union tgsi_exec_channel
*src1
)
4889 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4890 dst
->u
[0] = U64M(src0
->u
[0], src1
->u
[0]);
4891 dst
->u
[1] = U64M(src0
->u
[1], src1
->u
[1]);
4892 dst
->u
[2] = U64M(src0
->u
[2], src1
->u
[2]);
4893 dst
->u
[3] = U64M(src0
->u
[3], src1
->u
[3]);
4898 micro_useq(union tgsi_exec_channel
*dst
,
4899 const union tgsi_exec_channel
*src0
,
4900 const union tgsi_exec_channel
*src1
)
4902 dst
->u
[0] = src0
->u
[0] == src1
->u
[0] ? ~0 : 0;
4903 dst
->u
[1] = src0
->u
[1] == src1
->u
[1] ? ~0 : 0;
4904 dst
->u
[2] = src0
->u
[2] == src1
->u
[2] ? ~0 : 0;
4905 dst
->u
[3] = src0
->u
[3] == src1
->u
[3] ? ~0 : 0;
4909 micro_usge(union tgsi_exec_channel
*dst
,
4910 const union tgsi_exec_channel
*src0
,
4911 const union tgsi_exec_channel
*src1
)
4913 dst
->u
[0] = src0
->u
[0] >= src1
->u
[0] ? ~0 : 0;
4914 dst
->u
[1] = src0
->u
[1] >= src1
->u
[1] ? ~0 : 0;
4915 dst
->u
[2] = src0
->u
[2] >= src1
->u
[2] ? ~0 : 0;
4916 dst
->u
[3] = src0
->u
[3] >= src1
->u
[3] ? ~0 : 0;
4920 micro_ushr(union tgsi_exec_channel
*dst
,
4921 const union tgsi_exec_channel
*src0
,
4922 const union tgsi_exec_channel
*src1
)
4924 unsigned masked_count
;
4925 masked_count
= src1
->u
[0] & 0x1f;
4926 dst
->u
[0] = src0
->u
[0] >> masked_count
;
4927 masked_count
= src1
->u
[1] & 0x1f;
4928 dst
->u
[1] = src0
->u
[1] >> masked_count
;
4929 masked_count
= src1
->u
[2] & 0x1f;
4930 dst
->u
[2] = src0
->u
[2] >> masked_count
;
4931 masked_count
= src1
->u
[3] & 0x1f;
4932 dst
->u
[3] = src0
->u
[3] >> masked_count
;
4936 micro_uslt(union tgsi_exec_channel
*dst
,
4937 const union tgsi_exec_channel
*src0
,
4938 const union tgsi_exec_channel
*src1
)
4940 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? ~0 : 0;
4941 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? ~0 : 0;
4942 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? ~0 : 0;
4943 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? ~0 : 0;
4947 micro_usne(union tgsi_exec_channel
*dst
,
4948 const union tgsi_exec_channel
*src0
,
4949 const union tgsi_exec_channel
*src1
)
4951 dst
->u
[0] = src0
->u
[0] != src1
->u
[0] ? ~0 : 0;
4952 dst
->u
[1] = src0
->u
[1] != src1
->u
[1] ? ~0 : 0;
4953 dst
->u
[2] = src0
->u
[2] != src1
->u
[2] ? ~0 : 0;
4954 dst
->u
[3] = src0
->u
[3] != src1
->u
[3] ? ~0 : 0;
4958 micro_uarl(union tgsi_exec_channel
*dst
,
4959 const union tgsi_exec_channel
*src
)
4961 dst
->i
[0] = src
->u
[0];
4962 dst
->i
[1] = src
->u
[1];
4963 dst
->i
[2] = src
->u
[2];
4964 dst
->i
[3] = src
->u
[3];
4968 * Signed bitfield extract (i.e. sign-extend the extracted bits)
4971 micro_ibfe(union tgsi_exec_channel
*dst
,
4972 const union tgsi_exec_channel
*src0
,
4973 const union tgsi_exec_channel
*src1
,
4974 const union tgsi_exec_channel
*src2
)
4977 for (i
= 0; i
< 4; i
++) {
4978 int width
= src2
->i
[i
];
4979 int offset
= src1
->i
[i
] & 0x1f;
4980 if (width
== 32 && offset
== 0) {
4981 dst
->i
[i
] = src0
->i
[i
];
4987 else if (width
+ offset
< 32)
4988 dst
->i
[i
] = (src0
->i
[i
] << (32 - width
- offset
)) >> (32 - width
);
4990 dst
->i
[i
] = src0
->i
[i
] >> offset
;
4995 * Unsigned bitfield extract
4998 micro_ubfe(union tgsi_exec_channel
*dst
,
4999 const union tgsi_exec_channel
*src0
,
5000 const union tgsi_exec_channel
*src1
,
5001 const union tgsi_exec_channel
*src2
)
5004 for (i
= 0; i
< 4; i
++) {
5005 int width
= src2
->u
[i
];
5006 int offset
= src1
->u
[i
] & 0x1f;
5007 if (width
== 32 && offset
== 0) {
5008 dst
->u
[i
] = src0
->u
[i
];
5014 else if (width
+ offset
< 32)
5015 dst
->u
[i
] = (src0
->u
[i
] << (32 - width
- offset
)) >> (32 - width
);
5017 dst
->u
[i
] = src0
->u
[i
] >> offset
;
5022 * Bitfield insert: copy low bits from src1 into a region of src0.
5025 micro_bfi(union tgsi_exec_channel
*dst
,
5026 const union tgsi_exec_channel
*src0
,
5027 const union tgsi_exec_channel
*src1
,
5028 const union tgsi_exec_channel
*src2
,
5029 const union tgsi_exec_channel
*src3
)
5032 for (i
= 0; i
< 4; i
++) {
5033 int width
= src3
->u
[i
];
5034 int offset
= src2
->u
[i
] & 0x1f;
5036 dst
->u
[i
] = src1
->u
[i
];
5038 int bitmask
= ((1 << width
) - 1) << offset
;
5039 dst
->u
[i
] = ((src1
->u
[i
] << offset
) & bitmask
) | (src0
->u
[i
] & ~bitmask
);
5045 micro_brev(union tgsi_exec_channel
*dst
,
5046 const union tgsi_exec_channel
*src
)
5048 dst
->u
[0] = util_bitreverse(src
->u
[0]);
5049 dst
->u
[1] = util_bitreverse(src
->u
[1]);
5050 dst
->u
[2] = util_bitreverse(src
->u
[2]);
5051 dst
->u
[3] = util_bitreverse(src
->u
[3]);
5055 micro_popc(union tgsi_exec_channel
*dst
,
5056 const union tgsi_exec_channel
*src
)
5058 dst
->u
[0] = util_bitcount(src
->u
[0]);
5059 dst
->u
[1] = util_bitcount(src
->u
[1]);
5060 dst
->u
[2] = util_bitcount(src
->u
[2]);
5061 dst
->u
[3] = util_bitcount(src
->u
[3]);
5065 micro_lsb(union tgsi_exec_channel
*dst
,
5066 const union tgsi_exec_channel
*src
)
5068 dst
->i
[0] = ffs(src
->u
[0]) - 1;
5069 dst
->i
[1] = ffs(src
->u
[1]) - 1;
5070 dst
->i
[2] = ffs(src
->u
[2]) - 1;
5071 dst
->i
[3] = ffs(src
->u
[3]) - 1;
5075 micro_imsb(union tgsi_exec_channel
*dst
,
5076 const union tgsi_exec_channel
*src
)
5078 dst
->i
[0] = util_last_bit_signed(src
->i
[0]) - 1;
5079 dst
->i
[1] = util_last_bit_signed(src
->i
[1]) - 1;
5080 dst
->i
[2] = util_last_bit_signed(src
->i
[2]) - 1;
5081 dst
->i
[3] = util_last_bit_signed(src
->i
[3]) - 1;
5085 micro_umsb(union tgsi_exec_channel
*dst
,
5086 const union tgsi_exec_channel
*src
)
5088 dst
->i
[0] = util_last_bit(src
->u
[0]) - 1;
5089 dst
->i
[1] = util_last_bit(src
->u
[1]) - 1;
5090 dst
->i
[2] = util_last_bit(src
->u
[2]) - 1;
5091 dst
->i
[3] = util_last_bit(src
->u
[3]) - 1;
5095 * Execute a TGSI instruction.
5096 * Returns TRUE if a barrier instruction is hit,
5101 struct tgsi_exec_machine
*mach
,
5102 const struct tgsi_full_instruction
*inst
,
5105 union tgsi_exec_channel r
[10];
5109 switch (inst
->Instruction
.Opcode
) {
5110 case TGSI_OPCODE_ARL
:
5111 exec_vector_unary(mach
, inst
, micro_arl
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
5114 case TGSI_OPCODE_MOV
:
5115 exec_vector_unary(mach
, inst
, micro_mov
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5118 case TGSI_OPCODE_LIT
:
5119 exec_lit(mach
, inst
);
5122 case TGSI_OPCODE_RCP
:
5123 exec_scalar_unary(mach
, inst
, micro_rcp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5126 case TGSI_OPCODE_RSQ
:
5127 exec_scalar_unary(mach
, inst
, micro_rsq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5130 case TGSI_OPCODE_EXP
:
5131 exec_exp(mach
, inst
);
5134 case TGSI_OPCODE_LOG
:
5135 exec_log(mach
, inst
);
5138 case TGSI_OPCODE_MUL
:
5139 exec_vector_binary(mach
, inst
, micro_mul
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5142 case TGSI_OPCODE_ADD
:
5143 exec_vector_binary(mach
, inst
, micro_add
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5146 case TGSI_OPCODE_DP3
:
5147 exec_dp3(mach
, inst
);
5150 case TGSI_OPCODE_DP4
:
5151 exec_dp4(mach
, inst
);
5154 case TGSI_OPCODE_DST
:
5155 exec_dst(mach
, inst
);
5158 case TGSI_OPCODE_MIN
:
5159 exec_vector_binary(mach
, inst
, micro_min
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5162 case TGSI_OPCODE_MAX
:
5163 exec_vector_binary(mach
, inst
, micro_max
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5166 case TGSI_OPCODE_SLT
:
5167 exec_vector_binary(mach
, inst
, micro_slt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5170 case TGSI_OPCODE_SGE
:
5171 exec_vector_binary(mach
, inst
, micro_sge
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5174 case TGSI_OPCODE_MAD
:
5175 exec_vector_trinary(mach
, inst
, micro_mad
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5178 case TGSI_OPCODE_LRP
:
5179 exec_vector_trinary(mach
, inst
, micro_lrp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5182 case TGSI_OPCODE_SQRT
:
5183 exec_scalar_unary(mach
, inst
, micro_sqrt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5186 case TGSI_OPCODE_FRC
:
5187 exec_vector_unary(mach
, inst
, micro_frc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5190 case TGSI_OPCODE_FLR
:
5191 exec_vector_unary(mach
, inst
, micro_flr
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5194 case TGSI_OPCODE_ROUND
:
5195 exec_vector_unary(mach
, inst
, micro_rnd
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5198 case TGSI_OPCODE_EX2
:
5199 exec_scalar_unary(mach
, inst
, micro_exp2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5202 case TGSI_OPCODE_LG2
:
5203 exec_scalar_unary(mach
, inst
, micro_lg2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5206 case TGSI_OPCODE_POW
:
5207 exec_scalar_binary(mach
, inst
, micro_pow
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5210 case TGSI_OPCODE_LDEXP
:
5211 exec_vector_binary(mach
, inst
, micro_ldexp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5214 case TGSI_OPCODE_COS
:
5215 exec_scalar_unary(mach
, inst
, micro_cos
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5218 case TGSI_OPCODE_DDX
:
5219 exec_vector_unary(mach
, inst
, micro_ddx
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5222 case TGSI_OPCODE_DDY
:
5223 exec_vector_unary(mach
, inst
, micro_ddy
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5226 case TGSI_OPCODE_KILL
:
5230 case TGSI_OPCODE_KILL_IF
:
5231 exec_kill_if (mach
, inst
);
5234 case TGSI_OPCODE_PK2H
:
5235 exec_pk2h(mach
, inst
);
5238 case TGSI_OPCODE_PK2US
:
5242 case TGSI_OPCODE_PK4B
:
5246 case TGSI_OPCODE_PK4UB
:
5250 case TGSI_OPCODE_SEQ
:
5251 exec_vector_binary(mach
, inst
, micro_seq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5254 case TGSI_OPCODE_SGT
:
5255 exec_vector_binary(mach
, inst
, micro_sgt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5258 case TGSI_OPCODE_SIN
:
5259 exec_scalar_unary(mach
, inst
, micro_sin
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5262 case TGSI_OPCODE_SLE
:
5263 exec_vector_binary(mach
, inst
, micro_sle
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5266 case TGSI_OPCODE_SNE
:
5267 exec_vector_binary(mach
, inst
, micro_sne
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5270 case TGSI_OPCODE_TEX
:
5271 /* simple texture lookup */
5272 /* src[0] = texcoord */
5273 /* src[1] = sampler unit */
5274 exec_tex(mach
, inst
, TEX_MODIFIER_NONE
, 1);
5277 case TGSI_OPCODE_TXB
:
5278 /* Texture lookup with lod bias */
5279 /* src[0] = texcoord (src[0].w = LOD bias) */
5280 /* src[1] = sampler unit */
5281 exec_tex(mach
, inst
, TEX_MODIFIER_LOD_BIAS
, 1);
5284 case TGSI_OPCODE_TXD
:
5285 /* Texture lookup with explict partial derivatives */
5286 /* src[0] = texcoord */
5287 /* src[1] = d[strq]/dx */
5288 /* src[2] = d[strq]/dy */
5289 /* src[3] = sampler unit */
5290 exec_txd(mach
, inst
);
5293 case TGSI_OPCODE_TXL
:
5294 /* Texture lookup with explit LOD */
5295 /* src[0] = texcoord (src[0].w = LOD) */
5296 /* src[1] = sampler unit */
5297 exec_tex(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
, 1);
5300 case TGSI_OPCODE_TXP
:
5301 /* Texture lookup with projection */
5302 /* src[0] = texcoord (src[0].w = projection) */
5303 /* src[1] = sampler unit */
5304 exec_tex(mach
, inst
, TEX_MODIFIER_PROJECTED
, 1);
5307 case TGSI_OPCODE_TG4
:
5308 /* src[0] = texcoord */
5309 /* src[1] = component */
5310 /* src[2] = sampler unit */
5311 exec_tex(mach
, inst
, TEX_MODIFIER_GATHER
, 2);
5314 case TGSI_OPCODE_LODQ
:
5315 /* src[0] = texcoord */
5316 /* src[1] = sampler unit */
5317 exec_lodq(mach
, inst
);
5320 case TGSI_OPCODE_UP2H
:
5321 exec_up2h(mach
, inst
);
5324 case TGSI_OPCODE_UP2US
:
5328 case TGSI_OPCODE_UP4B
:
5332 case TGSI_OPCODE_UP4UB
:
5336 case TGSI_OPCODE_ARR
:
5337 exec_vector_unary(mach
, inst
, micro_arr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
5340 case TGSI_OPCODE_CAL
:
5341 /* skip the call if no execution channels are enabled */
5342 if (mach
->ExecMask
) {
5345 /* First, record the depths of the execution stacks.
5346 * This is important for deeply nested/looped return statements.
5347 * We have to unwind the stacks by the correct amount. For a
5348 * real code generator, we could determine the number of entries
5349 * to pop off each stack with simple static analysis and avoid
5350 * implementing this data structure at run time.
5352 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
5353 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
5354 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
5355 mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
= mach
->SwitchStackTop
;
5356 mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
= mach
->BreakStackTop
;
5357 /* note that PC was already incremented above */
5358 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
5360 mach
->CallStackTop
++;
5362 /* Second, push the Cond, Loop, Cont, Func stacks */
5363 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
5364 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
5365 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
5366 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
5367 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
5368 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
5370 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
5371 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
5372 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
5373 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
5374 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
5375 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
5377 /* Finally, jump to the subroutine. The label is a pointer
5378 * (an instruction number) to the BGNSUB instruction.
5380 *pc
= inst
->Label
.Label
;
5381 assert(mach
->Instructions
[*pc
].Instruction
.Opcode
5382 == TGSI_OPCODE_BGNSUB
);
5386 case TGSI_OPCODE_RET
:
5387 mach
->FuncMask
&= ~mach
->ExecMask
;
5388 UPDATE_EXEC_MASK(mach
);
5390 if (mach
->FuncMask
== 0x0) {
5391 /* really return now (otherwise, keep executing */
5393 if (mach
->CallStackTop
== 0) {
5394 /* returning from main() */
5395 mach
->CondStackTop
= 0;
5396 mach
->LoopStackTop
= 0;
5397 mach
->ContStackTop
= 0;
5398 mach
->LoopLabelStackTop
= 0;
5399 mach
->SwitchStackTop
= 0;
5400 mach
->BreakStackTop
= 0;
5405 assert(mach
->CallStackTop
> 0);
5406 mach
->CallStackTop
--;
5408 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
5409 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
5411 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
5412 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
5414 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
5415 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
5417 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
5418 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
5420 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
5421 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
5423 assert(mach
->FuncStackTop
> 0);
5424 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
5426 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
5428 UPDATE_EXEC_MASK(mach
);
5432 case TGSI_OPCODE_SSG
:
5433 exec_vector_unary(mach
, inst
, micro_sgn
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5436 case TGSI_OPCODE_CMP
:
5437 exec_vector_trinary(mach
, inst
, micro_cmp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5440 case TGSI_OPCODE_DIV
:
5441 exec_vector_binary(mach
, inst
, micro_div
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5444 case TGSI_OPCODE_DP2
:
5445 exec_dp2(mach
, inst
);
5448 case TGSI_OPCODE_IF
:
5450 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
5451 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
5452 FETCH( &r
[0], 0, TGSI_CHAN_X
);
5453 /* update CondMask */
5455 mach
->CondMask
&= ~0x1;
5458 mach
->CondMask
&= ~0x2;
5461 mach
->CondMask
&= ~0x4;
5464 mach
->CondMask
&= ~0x8;
5466 UPDATE_EXEC_MASK(mach
);
5467 /* Todo: If CondMask==0, jump to ELSE */
5470 case TGSI_OPCODE_UIF
:
5472 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
5473 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
5474 IFETCH( &r
[0], 0, TGSI_CHAN_X
);
5475 /* update CondMask */
5477 mach
->CondMask
&= ~0x1;
5480 mach
->CondMask
&= ~0x2;
5483 mach
->CondMask
&= ~0x4;
5486 mach
->CondMask
&= ~0x8;
5488 UPDATE_EXEC_MASK(mach
);
5489 /* Todo: If CondMask==0, jump to ELSE */
5492 case TGSI_OPCODE_ELSE
:
5493 /* invert CondMask wrt previous mask */
5496 assert(mach
->CondStackTop
> 0);
5497 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
5498 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
5499 UPDATE_EXEC_MASK(mach
);
5500 /* Todo: If CondMask==0, jump to ENDIF */
5504 case TGSI_OPCODE_ENDIF
:
5506 assert(mach
->CondStackTop
> 0);
5507 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
5508 UPDATE_EXEC_MASK(mach
);
5511 case TGSI_OPCODE_END
:
5512 /* make sure we end primitives which haven't
5513 * been explicitly emitted */
5514 conditional_emit_primitive(mach
);
5515 /* halt execution */
5519 case TGSI_OPCODE_CEIL
:
5520 exec_vector_unary(mach
, inst
, micro_ceil
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5523 case TGSI_OPCODE_I2F
:
5524 exec_vector_unary(mach
, inst
, micro_i2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_INT
);
5527 case TGSI_OPCODE_NOT
:
5528 exec_vector_unary(mach
, inst
, micro_not
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5531 case TGSI_OPCODE_TRUNC
:
5532 exec_vector_unary(mach
, inst
, micro_trunc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
5535 case TGSI_OPCODE_SHL
:
5536 exec_vector_binary(mach
, inst
, micro_shl
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5539 case TGSI_OPCODE_AND
:
5540 exec_vector_binary(mach
, inst
, micro_and
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5543 case TGSI_OPCODE_OR
:
5544 exec_vector_binary(mach
, inst
, micro_or
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5547 case TGSI_OPCODE_MOD
:
5548 exec_vector_binary(mach
, inst
, micro_mod
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5551 case TGSI_OPCODE_XOR
:
5552 exec_vector_binary(mach
, inst
, micro_xor
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5555 case TGSI_OPCODE_TXF
:
5556 exec_txf(mach
, inst
);
5559 case TGSI_OPCODE_TXQ
:
5560 exec_txq(mach
, inst
);
5563 case TGSI_OPCODE_EMIT
:
5564 emit_vertex(mach
, inst
);
5567 case TGSI_OPCODE_ENDPRIM
:
5568 emit_primitive(mach
, inst
);
5571 case TGSI_OPCODE_BGNLOOP
:
5572 /* push LoopMask and ContMasks */
5573 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
5574 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
5575 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
5576 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
5578 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
5579 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
5580 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
5581 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
5582 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_LOOP
;
5585 case TGSI_OPCODE_ENDLOOP
:
5586 /* Restore ContMask, but don't pop */
5587 assert(mach
->ContStackTop
> 0);
5588 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
5589 UPDATE_EXEC_MASK(mach
);
5590 if (mach
->ExecMask
) {
5591 /* repeat loop: jump to instruction just past BGNLOOP */
5592 assert(mach
->LoopLabelStackTop
> 0);
5593 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
5596 /* exit loop: pop LoopMask */
5597 assert(mach
->LoopStackTop
> 0);
5598 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
5600 assert(mach
->ContStackTop
> 0);
5601 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
5602 assert(mach
->LoopLabelStackTop
> 0);
5603 --mach
->LoopLabelStackTop
;
5605 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
5607 UPDATE_EXEC_MASK(mach
);
5610 case TGSI_OPCODE_BRK
:
5614 case TGSI_OPCODE_CONT
:
5615 /* turn off cont channels for each enabled exec channel */
5616 mach
->ContMask
&= ~mach
->ExecMask
;
5617 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5618 UPDATE_EXEC_MASK(mach
);
5621 case TGSI_OPCODE_BGNSUB
:
5625 case TGSI_OPCODE_ENDSUB
:
5627 * XXX: This really should be a no-op. We should never reach this opcode.
5630 assert(mach
->CallStackTop
> 0);
5631 mach
->CallStackTop
--;
5633 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
5634 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
5636 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
5637 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
5639 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
5640 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
5642 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
5643 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
5645 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
5646 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
5648 assert(mach
->FuncStackTop
> 0);
5649 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
5651 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
5653 UPDATE_EXEC_MASK(mach
);
5656 case TGSI_OPCODE_NOP
:
5659 case TGSI_OPCODE_F2I
:
5660 exec_vector_unary(mach
, inst
, micro_f2i
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
5663 case TGSI_OPCODE_FSEQ
:
5664 exec_vector_binary(mach
, inst
, micro_fseq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5667 case TGSI_OPCODE_FSGE
:
5668 exec_vector_binary(mach
, inst
, micro_fsge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5671 case TGSI_OPCODE_FSLT
:
5672 exec_vector_binary(mach
, inst
, micro_fslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5675 case TGSI_OPCODE_FSNE
:
5676 exec_vector_binary(mach
, inst
, micro_fsne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5679 case TGSI_OPCODE_IDIV
:
5680 exec_vector_binary(mach
, inst
, micro_idiv
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5683 case TGSI_OPCODE_IMAX
:
5684 exec_vector_binary(mach
, inst
, micro_imax
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5687 case TGSI_OPCODE_IMIN
:
5688 exec_vector_binary(mach
, inst
, micro_imin
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5691 case TGSI_OPCODE_INEG
:
5692 exec_vector_unary(mach
, inst
, micro_ineg
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5695 case TGSI_OPCODE_ISGE
:
5696 exec_vector_binary(mach
, inst
, micro_isge
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5699 case TGSI_OPCODE_ISHR
:
5700 exec_vector_binary(mach
, inst
, micro_ishr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5703 case TGSI_OPCODE_ISLT
:
5704 exec_vector_binary(mach
, inst
, micro_islt
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5707 case TGSI_OPCODE_F2U
:
5708 exec_vector_unary(mach
, inst
, micro_f2u
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
5711 case TGSI_OPCODE_U2F
:
5712 exec_vector_unary(mach
, inst
, micro_u2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_UINT
);
5715 case TGSI_OPCODE_UADD
:
5716 exec_vector_binary(mach
, inst
, micro_uadd
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5719 case TGSI_OPCODE_UDIV
:
5720 exec_vector_binary(mach
, inst
, micro_udiv
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5723 case TGSI_OPCODE_UMAD
:
5724 exec_vector_trinary(mach
, inst
, micro_umad
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5727 case TGSI_OPCODE_UMAX
:
5728 exec_vector_binary(mach
, inst
, micro_umax
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5731 case TGSI_OPCODE_UMIN
:
5732 exec_vector_binary(mach
, inst
, micro_umin
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5735 case TGSI_OPCODE_UMOD
:
5736 exec_vector_binary(mach
, inst
, micro_umod
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5739 case TGSI_OPCODE_UMUL
:
5740 exec_vector_binary(mach
, inst
, micro_umul
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5743 case TGSI_OPCODE_IMUL_HI
:
5744 exec_vector_binary(mach
, inst
, micro_imul_hi
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5747 case TGSI_OPCODE_UMUL_HI
:
5748 exec_vector_binary(mach
, inst
, micro_umul_hi
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5751 case TGSI_OPCODE_USEQ
:
5752 exec_vector_binary(mach
, inst
, micro_useq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5755 case TGSI_OPCODE_USGE
:
5756 exec_vector_binary(mach
, inst
, micro_usge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5759 case TGSI_OPCODE_USHR
:
5760 exec_vector_binary(mach
, inst
, micro_ushr
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5763 case TGSI_OPCODE_USLT
:
5764 exec_vector_binary(mach
, inst
, micro_uslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5767 case TGSI_OPCODE_USNE
:
5768 exec_vector_binary(mach
, inst
, micro_usne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5771 case TGSI_OPCODE_SWITCH
:
5772 exec_switch(mach
, inst
);
5775 case TGSI_OPCODE_CASE
:
5776 exec_case(mach
, inst
);
5779 case TGSI_OPCODE_DEFAULT
:
5783 case TGSI_OPCODE_ENDSWITCH
:
5784 exec_endswitch(mach
);
5787 case TGSI_OPCODE_SAMPLE_I
:
5788 exec_txf(mach
, inst
);
5791 case TGSI_OPCODE_SAMPLE_I_MS
:
5792 exec_txf(mach
, inst
);
5795 case TGSI_OPCODE_SAMPLE
:
5796 exec_sample(mach
, inst
, TEX_MODIFIER_NONE
, FALSE
);
5799 case TGSI_OPCODE_SAMPLE_B
:
5800 exec_sample(mach
, inst
, TEX_MODIFIER_LOD_BIAS
, FALSE
);
5803 case TGSI_OPCODE_SAMPLE_C
:
5804 exec_sample(mach
, inst
, TEX_MODIFIER_NONE
, TRUE
);
5807 case TGSI_OPCODE_SAMPLE_C_LZ
:
5808 exec_sample(mach
, inst
, TEX_MODIFIER_LEVEL_ZERO
, TRUE
);
5811 case TGSI_OPCODE_SAMPLE_D
:
5812 exec_sample_d(mach
, inst
);
5815 case TGSI_OPCODE_SAMPLE_L
:
5816 exec_sample(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
, FALSE
);
5819 case TGSI_OPCODE_GATHER4
:
5820 exec_sample(mach
, inst
, TEX_MODIFIER_GATHER
, FALSE
);
5823 case TGSI_OPCODE_SVIEWINFO
:
5824 exec_txq(mach
, inst
);
5827 case TGSI_OPCODE_SAMPLE_POS
:
5831 case TGSI_OPCODE_SAMPLE_INFO
:
5835 case TGSI_OPCODE_LOD
:
5836 exec_lodq(mach
, inst
);
5839 case TGSI_OPCODE_UARL
:
5840 exec_vector_unary(mach
, inst
, micro_uarl
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_UINT
);
5843 case TGSI_OPCODE_UCMP
:
5844 exec_ucmp(mach
, inst
);
5847 case TGSI_OPCODE_IABS
:
5848 exec_vector_unary(mach
, inst
, micro_iabs
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5851 case TGSI_OPCODE_ISSG
:
5852 exec_vector_unary(mach
, inst
, micro_isgn
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5855 case TGSI_OPCODE_TEX2
:
5856 /* simple texture lookup */
5857 /* src[0] = texcoord */
5858 /* src[1] = compare */
5859 /* src[2] = sampler unit */
5860 exec_tex(mach
, inst
, TEX_MODIFIER_NONE
, 2);
5862 case TGSI_OPCODE_TXB2
:
5863 /* simple texture lookup */
5864 /* src[0] = texcoord */
5866 /* src[2] = sampler unit */
5867 exec_tex(mach
, inst
, TEX_MODIFIER_LOD_BIAS
, 2);
5869 case TGSI_OPCODE_TXL2
:
5870 /* simple texture lookup */
5871 /* src[0] = texcoord */
5873 /* src[2] = sampler unit */
5874 exec_tex(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
, 2);
5877 case TGSI_OPCODE_IBFE
:
5878 exec_vector_trinary(mach
, inst
, micro_ibfe
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5880 case TGSI_OPCODE_UBFE
:
5881 exec_vector_trinary(mach
, inst
, micro_ubfe
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5883 case TGSI_OPCODE_BFI
:
5884 exec_vector_quaternary(mach
, inst
, micro_bfi
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5886 case TGSI_OPCODE_BREV
:
5887 exec_vector_unary(mach
, inst
, micro_brev
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5889 case TGSI_OPCODE_POPC
:
5890 exec_vector_unary(mach
, inst
, micro_popc
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
5892 case TGSI_OPCODE_LSB
:
5893 exec_vector_unary(mach
, inst
, micro_lsb
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_UINT
);
5895 case TGSI_OPCODE_IMSB
:
5896 exec_vector_unary(mach
, inst
, micro_imsb
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
5898 case TGSI_OPCODE_UMSB
:
5899 exec_vector_unary(mach
, inst
, micro_umsb
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_UINT
);
5902 case TGSI_OPCODE_F2D
:
5903 exec_t_2_64(mach
, inst
, micro_f2d
, TGSI_EXEC_DATA_FLOAT
);
5906 case TGSI_OPCODE_D2F
:
5907 exec_64_2_t(mach
, inst
, micro_d2f
, TGSI_EXEC_DATA_FLOAT
);
5910 case TGSI_OPCODE_DABS
:
5911 exec_double_unary(mach
, inst
, micro_dabs
);
5914 case TGSI_OPCODE_DNEG
:
5915 exec_double_unary(mach
, inst
, micro_dneg
);
5918 case TGSI_OPCODE_DADD
:
5919 exec_double_binary(mach
, inst
, micro_dadd
, TGSI_EXEC_DATA_DOUBLE
);
5922 case TGSI_OPCODE_DDIV
:
5923 exec_double_binary(mach
, inst
, micro_ddiv
, TGSI_EXEC_DATA_DOUBLE
);
5926 case TGSI_OPCODE_DMUL
:
5927 exec_double_binary(mach
, inst
, micro_dmul
, TGSI_EXEC_DATA_DOUBLE
);
5930 case TGSI_OPCODE_DMAX
:
5931 exec_double_binary(mach
, inst
, micro_dmax
, TGSI_EXEC_DATA_DOUBLE
);
5934 case TGSI_OPCODE_DMIN
:
5935 exec_double_binary(mach
, inst
, micro_dmin
, TGSI_EXEC_DATA_DOUBLE
);
5938 case TGSI_OPCODE_DSLT
:
5939 exec_double_binary(mach
, inst
, micro_dslt
, TGSI_EXEC_DATA_UINT
);
5942 case TGSI_OPCODE_DSGE
:
5943 exec_double_binary(mach
, inst
, micro_dsge
, TGSI_EXEC_DATA_UINT
);
5946 case TGSI_OPCODE_DSEQ
:
5947 exec_double_binary(mach
, inst
, micro_dseq
, TGSI_EXEC_DATA_UINT
);
5950 case TGSI_OPCODE_DSNE
:
5951 exec_double_binary(mach
, inst
, micro_dsne
, TGSI_EXEC_DATA_UINT
);
5954 case TGSI_OPCODE_DRCP
:
5955 exec_double_unary(mach
, inst
, micro_drcp
);
5958 case TGSI_OPCODE_DSQRT
:
5959 exec_double_unary(mach
, inst
, micro_dsqrt
);
5962 case TGSI_OPCODE_DRSQ
:
5963 exec_double_unary(mach
, inst
, micro_drsq
);
5966 case TGSI_OPCODE_DMAD
:
5967 exec_double_trinary(mach
, inst
, micro_dmad
);
5970 case TGSI_OPCODE_DFRAC
:
5971 exec_double_unary(mach
, inst
, micro_dfrac
);
5974 case TGSI_OPCODE_DLDEXP
:
5975 exec_dldexp(mach
, inst
);
5978 case TGSI_OPCODE_DFRACEXP
:
5979 exec_dfracexp(mach
, inst
);
5982 case TGSI_OPCODE_I2D
:
5983 exec_t_2_64(mach
, inst
, micro_i2d
, TGSI_EXEC_DATA_INT
);
5986 case TGSI_OPCODE_D2I
:
5987 exec_64_2_t(mach
, inst
, micro_d2i
, TGSI_EXEC_DATA_INT
);
5990 case TGSI_OPCODE_U2D
:
5991 exec_t_2_64(mach
, inst
, micro_u2d
, TGSI_EXEC_DATA_UINT
);
5994 case TGSI_OPCODE_D2U
:
5995 exec_64_2_t(mach
, inst
, micro_d2u
, TGSI_EXEC_DATA_INT
);
5998 case TGSI_OPCODE_LOAD
:
5999 exec_load(mach
, inst
);
6002 case TGSI_OPCODE_STORE
:
6003 exec_store(mach
, inst
);
6006 case TGSI_OPCODE_ATOMUADD
:
6007 case TGSI_OPCODE_ATOMXCHG
:
6008 case TGSI_OPCODE_ATOMCAS
:
6009 case TGSI_OPCODE_ATOMAND
:
6010 case TGSI_OPCODE_ATOMOR
:
6011 case TGSI_OPCODE_ATOMXOR
:
6012 case TGSI_OPCODE_ATOMUMIN
:
6013 case TGSI_OPCODE_ATOMUMAX
:
6014 case TGSI_OPCODE_ATOMIMIN
:
6015 case TGSI_OPCODE_ATOMIMAX
:
6016 case TGSI_OPCODE_ATOMFADD
:
6017 exec_atomop(mach
, inst
);
6020 case TGSI_OPCODE_RESQ
:
6021 exec_resq(mach
, inst
);
6023 case TGSI_OPCODE_BARRIER
:
6024 case TGSI_OPCODE_MEMBAR
:
6028 case TGSI_OPCODE_I64ABS
:
6029 exec_double_unary(mach
, inst
, micro_i64abs
);
6032 case TGSI_OPCODE_I64SSG
:
6033 exec_double_unary(mach
, inst
, micro_i64sgn
);
6036 case TGSI_OPCODE_I64NEG
:
6037 exec_double_unary(mach
, inst
, micro_i64neg
);
6040 case TGSI_OPCODE_U64SEQ
:
6041 exec_double_binary(mach
, inst
, micro_u64seq
, TGSI_EXEC_DATA_UINT
);
6044 case TGSI_OPCODE_U64SNE
:
6045 exec_double_binary(mach
, inst
, micro_u64sne
, TGSI_EXEC_DATA_UINT
);
6048 case TGSI_OPCODE_I64SLT
:
6049 exec_double_binary(mach
, inst
, micro_i64slt
, TGSI_EXEC_DATA_UINT
);
6051 case TGSI_OPCODE_U64SLT
:
6052 exec_double_binary(mach
, inst
, micro_u64slt
, TGSI_EXEC_DATA_UINT
);
6055 case TGSI_OPCODE_I64SGE
:
6056 exec_double_binary(mach
, inst
, micro_i64sge
, TGSI_EXEC_DATA_UINT
);
6058 case TGSI_OPCODE_U64SGE
:
6059 exec_double_binary(mach
, inst
, micro_u64sge
, TGSI_EXEC_DATA_UINT
);
6062 case TGSI_OPCODE_I64MIN
:
6063 exec_double_binary(mach
, inst
, micro_i64min
, TGSI_EXEC_DATA_INT64
);
6065 case TGSI_OPCODE_U64MIN
:
6066 exec_double_binary(mach
, inst
, micro_u64min
, TGSI_EXEC_DATA_UINT64
);
6068 case TGSI_OPCODE_I64MAX
:
6069 exec_double_binary(mach
, inst
, micro_i64max
, TGSI_EXEC_DATA_INT64
);
6071 case TGSI_OPCODE_U64MAX
:
6072 exec_double_binary(mach
, inst
, micro_u64max
, TGSI_EXEC_DATA_UINT64
);
6074 case TGSI_OPCODE_U64ADD
:
6075 exec_double_binary(mach
, inst
, micro_u64add
, TGSI_EXEC_DATA_UINT64
);
6077 case TGSI_OPCODE_U64MUL
:
6078 exec_double_binary(mach
, inst
, micro_u64mul
, TGSI_EXEC_DATA_UINT64
);
6080 case TGSI_OPCODE_U64SHL
:
6081 exec_arg0_64_arg1_32(mach
, inst
, micro_u64shl
);
6083 case TGSI_OPCODE_I64SHR
:
6084 exec_arg0_64_arg1_32(mach
, inst
, micro_i64shr
);
6086 case TGSI_OPCODE_U64SHR
:
6087 exec_arg0_64_arg1_32(mach
, inst
, micro_u64shr
);
6089 case TGSI_OPCODE_U64DIV
:
6090 exec_double_binary(mach
, inst
, micro_u64div
, TGSI_EXEC_DATA_UINT64
);
6092 case TGSI_OPCODE_I64DIV
:
6093 exec_double_binary(mach
, inst
, micro_i64div
, TGSI_EXEC_DATA_INT64
);
6095 case TGSI_OPCODE_U64MOD
:
6096 exec_double_binary(mach
, inst
, micro_u64mod
, TGSI_EXEC_DATA_UINT64
);
6098 case TGSI_OPCODE_I64MOD
:
6099 exec_double_binary(mach
, inst
, micro_i64mod
, TGSI_EXEC_DATA_INT64
);
6102 case TGSI_OPCODE_F2U64
:
6103 exec_t_2_64(mach
, inst
, micro_f2u64
, TGSI_EXEC_DATA_FLOAT
);
6106 case TGSI_OPCODE_F2I64
:
6107 exec_t_2_64(mach
, inst
, micro_f2i64
, TGSI_EXEC_DATA_FLOAT
);
6110 case TGSI_OPCODE_U2I64
:
6111 exec_t_2_64(mach
, inst
, micro_u2i64
, TGSI_EXEC_DATA_INT
);
6113 case TGSI_OPCODE_I2I64
:
6114 exec_t_2_64(mach
, inst
, micro_i2i64
, TGSI_EXEC_DATA_INT
);
6117 case TGSI_OPCODE_D2U64
:
6118 exec_double_unary(mach
, inst
, micro_d2u64
);
6121 case TGSI_OPCODE_D2I64
:
6122 exec_double_unary(mach
, inst
, micro_d2i64
);
6125 case TGSI_OPCODE_U642F
:
6126 exec_64_2_t(mach
, inst
, micro_u642f
, TGSI_EXEC_DATA_FLOAT
);
6128 case TGSI_OPCODE_I642F
:
6129 exec_64_2_t(mach
, inst
, micro_i642f
, TGSI_EXEC_DATA_FLOAT
);
6132 case TGSI_OPCODE_U642D
:
6133 exec_double_unary(mach
, inst
, micro_u642d
);
6135 case TGSI_OPCODE_I642D
:
6136 exec_double_unary(mach
, inst
, micro_i642d
);
6146 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine
*mach
)
6148 uint default_mask
= 0xf;
6150 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
6151 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
6153 if (mach
->ShaderType
== PIPE_SHADER_GEOMETRY
) {
6154 for (unsigned i
= 0; i
< TGSI_MAX_VERTEX_STREAMS
; i
++) {
6155 mach
->Temps
[temp_prim_idxs
[i
].idx
].xyzw
[temp_prim_idxs
[i
].chan
].u
[0] = 0;
6156 mach
->Primitives
[i
][0] = 0;
6158 /* GS runs on a single primitive for now */
6162 if (mach
->NonHelperMask
== 0)
6163 mach
->NonHelperMask
= default_mask
;
6164 mach
->CondMask
= default_mask
;
6165 mach
->LoopMask
= default_mask
;
6166 mach
->ContMask
= default_mask
;
6167 mach
->FuncMask
= default_mask
;
6168 mach
->ExecMask
= default_mask
;
6170 mach
->Switch
.mask
= default_mask
;
6172 assert(mach
->CondStackTop
== 0);
6173 assert(mach
->LoopStackTop
== 0);
6174 assert(mach
->ContStackTop
== 0);
6175 assert(mach
->SwitchStackTop
== 0);
6176 assert(mach
->BreakStackTop
== 0);
6177 assert(mach
->CallStackTop
== 0);
6181 * Run TGSI interpreter.
6182 * \return bitmask of "alive" quad components
6185 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
, int start_pc
)
6189 mach
->pc
= start_pc
;
6192 tgsi_exec_machine_setup_masks(mach
);
6194 /* execute declarations (interpolants) */
6195 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
6196 exec_declaration( mach
, mach
->Declarations
+i
);
6202 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
6203 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
6207 memset(mach
->Temps
, 0, sizeof(temps
));
6209 memset(mach
->Outputs
, 0, sizeof(outputs
));
6210 memset(temps
, 0, sizeof(temps
));
6211 memset(outputs
, 0, sizeof(outputs
));
6215 /* execute instructions, until pc is set to -1 */
6216 while (mach
->pc
!= -1) {
6217 boolean barrier_hit
;
6221 tgsi_dump_instruction(&mach
->Instructions
[mach
->pc
], inst
++);
6224 assert(mach
->pc
< (int) mach
->NumInstructions
);
6225 barrier_hit
= exec_instruction(mach
, mach
->Instructions
+ mach
->pc
, &mach
->pc
);
6227 /* for compute shaders if we hit a barrier return now for later rescheduling */
6228 if (barrier_hit
&& mach
->ShaderType
== PIPE_SHADER_COMPUTE
)
6232 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
6233 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
6236 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
6237 debug_printf("TEMP[%2u] = ", i
);
6238 for (j
= 0; j
< 4; j
++) {
6242 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6243 temps
[i
].xyzw
[0].f
[j
], temps
[i
].xyzw
[0].u
[j
],
6244 temps
[i
].xyzw
[1].f
[j
], temps
[i
].xyzw
[1].u
[j
],
6245 temps
[i
].xyzw
[2].f
[j
], temps
[i
].xyzw
[2].u
[j
],
6246 temps
[i
].xyzw
[3].f
[j
], temps
[i
].xyzw
[3].u
[j
]);
6250 if (mach
->Outputs
) {
6251 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
6252 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
6255 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
6256 debug_printf("OUT[%2u] = ", i
);
6257 for (j
= 0; j
< 4; j
++) {
6261 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6262 outputs
[i
].xyzw
[0].f
[j
], outputs
[i
].xyzw
[0].u
[j
],
6263 outputs
[i
].xyzw
[1].f
[j
], outputs
[i
].xyzw
[1].u
[j
],
6264 outputs
[i
].xyzw
[2].f
[j
], outputs
[i
].xyzw
[2].u
[j
],
6265 outputs
[i
].xyzw
[3].f
[j
], outputs
[i
].xyzw
[3].u
[j
]);
6275 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6276 if (mach
->ShaderType
== PIPE_SHADER_FRAGMENT
) {
6278 * Scale back depth component.
6280 for (i
= 0; i
< 4; i
++)
6281 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
6285 /* Strictly speaking, these assertions aren't really needed but they
6286 * can potentially catch some bugs in the control flow code.
6288 assert(mach
->CondStackTop
== 0);
6289 assert(mach
->LoopStackTop
== 0);
6290 assert(mach
->ContStackTop
== 0);
6291 assert(mach
->SwitchStackTop
== 0);
6292 assert(mach
->BreakStackTop
== 0);
6293 assert(mach
->CallStackTop
== 0);
6295 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];