1 /**************************************************************************
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
30 * TGSI interpreter/executor.
32 * Flow control information:
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
67 #define TILE_TOP_LEFT 0
68 #define TILE_TOP_RIGHT 1
69 #define TILE_BOTTOM_LEFT 2
70 #define TILE_BOTTOM_RIGHT 3
73 micro_abs(union tgsi_exec_channel
*dst
,
74 const union tgsi_exec_channel
*src
)
76 dst
->f
[0] = fabsf(src
->f
[0]);
77 dst
->f
[1] = fabsf(src
->f
[1]);
78 dst
->f
[2] = fabsf(src
->f
[2]);
79 dst
->f
[3] = fabsf(src
->f
[3]);
83 micro_arl(union tgsi_exec_channel
*dst
,
84 const union tgsi_exec_channel
*src
)
86 dst
->i
[0] = (int)floorf(src
->f
[0]);
87 dst
->i
[1] = (int)floorf(src
->f
[1]);
88 dst
->i
[2] = (int)floorf(src
->f
[2]);
89 dst
->i
[3] = (int)floorf(src
->f
[3]);
93 micro_arr(union tgsi_exec_channel
*dst
,
94 const union tgsi_exec_channel
*src
)
96 dst
->i
[0] = (int)floorf(src
->f
[0] + 0.5f
);
97 dst
->i
[1] = (int)floorf(src
->f
[1] + 0.5f
);
98 dst
->i
[2] = (int)floorf(src
->f
[2] + 0.5f
);
99 dst
->i
[3] = (int)floorf(src
->f
[3] + 0.5f
);
103 micro_ceil(union tgsi_exec_channel
*dst
,
104 const union tgsi_exec_channel
*src
)
106 dst
->f
[0] = ceilf(src
->f
[0]);
107 dst
->f
[1] = ceilf(src
->f
[1]);
108 dst
->f
[2] = ceilf(src
->f
[2]);
109 dst
->f
[3] = ceilf(src
->f
[3]);
113 micro_clamp(union tgsi_exec_channel
*dst
,
114 const union tgsi_exec_channel
*src0
,
115 const union tgsi_exec_channel
*src1
,
116 const union tgsi_exec_channel
*src2
)
118 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src1
->f
[0] : src0
->f
[0] > src2
->f
[0] ? src2
->f
[0] : src0
->f
[0];
119 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src1
->f
[1] : src0
->f
[1] > src2
->f
[1] ? src2
->f
[1] : src0
->f
[1];
120 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src1
->f
[2] : src0
->f
[2] > src2
->f
[2] ? src2
->f
[2] : src0
->f
[2];
121 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src1
->f
[3] : src0
->f
[3] > src2
->f
[3] ? src2
->f
[3] : src0
->f
[3];
125 micro_cmp(union tgsi_exec_channel
*dst
,
126 const union tgsi_exec_channel
*src0
,
127 const union tgsi_exec_channel
*src1
,
128 const union tgsi_exec_channel
*src2
)
130 dst
->f
[0] = src0
->f
[0] < 0.0f
? src1
->f
[0] : src2
->f
[0];
131 dst
->f
[1] = src0
->f
[1] < 0.0f
? src1
->f
[1] : src2
->f
[1];
132 dst
->f
[2] = src0
->f
[2] < 0.0f
? src1
->f
[2] : src2
->f
[2];
133 dst
->f
[3] = src0
->f
[3] < 0.0f
? src1
->f
[3] : src2
->f
[3];
137 micro_cnd(union tgsi_exec_channel
*dst
,
138 const union tgsi_exec_channel
*src0
,
139 const union tgsi_exec_channel
*src1
,
140 const union tgsi_exec_channel
*src2
)
142 dst
->f
[0] = src2
->f
[0] > 0.5f
? src0
->f
[0] : src1
->f
[0];
143 dst
->f
[1] = src2
->f
[1] > 0.5f
? src0
->f
[1] : src1
->f
[1];
144 dst
->f
[2] = src2
->f
[2] > 0.5f
? src0
->f
[2] : src1
->f
[2];
145 dst
->f
[3] = src2
->f
[3] > 0.5f
? src0
->f
[3] : src1
->f
[3];
149 micro_cos(union tgsi_exec_channel
*dst
,
150 const union tgsi_exec_channel
*src
)
152 dst
->f
[0] = cosf(src
->f
[0]);
153 dst
->f
[1] = cosf(src
->f
[1]);
154 dst
->f
[2] = cosf(src
->f
[2]);
155 dst
->f
[3] = cosf(src
->f
[3]);
159 micro_ddx(union tgsi_exec_channel
*dst
,
160 const union tgsi_exec_channel
*src
)
165 dst
->f
[3] = src
->f
[TILE_BOTTOM_RIGHT
] - src
->f
[TILE_BOTTOM_LEFT
];
169 micro_ddy(union tgsi_exec_channel
*dst
,
170 const union tgsi_exec_channel
*src
)
175 dst
->f
[3] = src
->f
[TILE_BOTTOM_LEFT
] - src
->f
[TILE_TOP_LEFT
];
179 micro_exp2(union tgsi_exec_channel
*dst
,
180 const union tgsi_exec_channel
*src
)
183 dst
->f
[0] = util_fast_exp2(src
->f
[0]);
184 dst
->f
[1] = util_fast_exp2(src
->f
[1]);
185 dst
->f
[2] = util_fast_exp2(src
->f
[2]);
186 dst
->f
[3] = util_fast_exp2(src
->f
[3]);
189 /* Inf is okay for this instruction, so clamp it to silence assertions. */
191 union tgsi_exec_channel clamped
;
193 for (i
= 0; i
< 4; i
++) {
194 if (src
->f
[i
] > 127.99999f
) {
195 clamped
.f
[i
] = 127.99999f
;
196 } else if (src
->f
[i
] < -126.99999f
) {
197 clamped
.f
[i
] = -126.99999f
;
199 clamped
.f
[i
] = src
->f
[i
];
205 dst
->f
[0] = powf(2.0f
, src
->f
[0]);
206 dst
->f
[1] = powf(2.0f
, src
->f
[1]);
207 dst
->f
[2] = powf(2.0f
, src
->f
[2]);
208 dst
->f
[3] = powf(2.0f
, src
->f
[3]);
209 #endif /* FAST_MATH */
213 micro_flr(union tgsi_exec_channel
*dst
,
214 const union tgsi_exec_channel
*src
)
216 dst
->f
[0] = floorf(src
->f
[0]);
217 dst
->f
[1] = floorf(src
->f
[1]);
218 dst
->f
[2] = floorf(src
->f
[2]);
219 dst
->f
[3] = floorf(src
->f
[3]);
223 micro_frc(union tgsi_exec_channel
*dst
,
224 const union tgsi_exec_channel
*src
)
226 dst
->f
[0] = src
->f
[0] - floorf(src
->f
[0]);
227 dst
->f
[1] = src
->f
[1] - floorf(src
->f
[1]);
228 dst
->f
[2] = src
->f
[2] - floorf(src
->f
[2]);
229 dst
->f
[3] = src
->f
[3] - floorf(src
->f
[3]);
233 micro_iabs(union tgsi_exec_channel
*dst
,
234 const union tgsi_exec_channel
*src
)
236 dst
->i
[0] = src
->i
[0] >= 0 ? src
->i
[0] : -src
->i
[0];
237 dst
->i
[1] = src
->i
[1] >= 0 ? src
->i
[1] : -src
->i
[1];
238 dst
->i
[2] = src
->i
[2] >= 0 ? src
->i
[2] : -src
->i
[2];
239 dst
->i
[3] = src
->i
[3] >= 0 ? src
->i
[3] : -src
->i
[3];
243 micro_ineg(union tgsi_exec_channel
*dst
,
244 const union tgsi_exec_channel
*src
)
246 dst
->i
[0] = -src
->i
[0];
247 dst
->i
[1] = -src
->i
[1];
248 dst
->i
[2] = -src
->i
[2];
249 dst
->i
[3] = -src
->i
[3];
253 micro_lg2(union tgsi_exec_channel
*dst
,
254 const union tgsi_exec_channel
*src
)
257 dst
->f
[0] = util_fast_log2(src
->f
[0]);
258 dst
->f
[1] = util_fast_log2(src
->f
[1]);
259 dst
->f
[2] = util_fast_log2(src
->f
[2]);
260 dst
->f
[3] = util_fast_log2(src
->f
[3]);
262 dst
->f
[0] = logf(src
->f
[0]) * 1.442695f
;
263 dst
->f
[1] = logf(src
->f
[1]) * 1.442695f
;
264 dst
->f
[2] = logf(src
->f
[2]) * 1.442695f
;
265 dst
->f
[3] = logf(src
->f
[3]) * 1.442695f
;
270 micro_lrp(union tgsi_exec_channel
*dst
,
271 const union tgsi_exec_channel
*src0
,
272 const union tgsi_exec_channel
*src1
,
273 const union tgsi_exec_channel
*src2
)
275 dst
->f
[0] = src0
->f
[0] * (src1
->f
[0] - src2
->f
[0]) + src2
->f
[0];
276 dst
->f
[1] = src0
->f
[1] * (src1
->f
[1] - src2
->f
[1]) + src2
->f
[1];
277 dst
->f
[2] = src0
->f
[2] * (src1
->f
[2] - src2
->f
[2]) + src2
->f
[2];
278 dst
->f
[3] = src0
->f
[3] * (src1
->f
[3] - src2
->f
[3]) + src2
->f
[3];
282 micro_mad(union tgsi_exec_channel
*dst
,
283 const union tgsi_exec_channel
*src0
,
284 const union tgsi_exec_channel
*src1
,
285 const union tgsi_exec_channel
*src2
)
287 dst
->f
[0] = src0
->f
[0] * src1
->f
[0] + src2
->f
[0];
288 dst
->f
[1] = src0
->f
[1] * src1
->f
[1] + src2
->f
[1];
289 dst
->f
[2] = src0
->f
[2] * src1
->f
[2] + src2
->f
[2];
290 dst
->f
[3] = src0
->f
[3] * src1
->f
[3] + src2
->f
[3];
294 micro_mov(union tgsi_exec_channel
*dst
,
295 const union tgsi_exec_channel
*src
)
297 dst
->u
[0] = src
->u
[0];
298 dst
->u
[1] = src
->u
[1];
299 dst
->u
[2] = src
->u
[2];
300 dst
->u
[3] = src
->u
[3];
304 micro_rcp(union tgsi_exec_channel
*dst
,
305 const union tgsi_exec_channel
*src
)
307 #if 0 /* for debugging */
308 assert(src
->f
[0] != 0.0f
);
309 assert(src
->f
[1] != 0.0f
);
310 assert(src
->f
[2] != 0.0f
);
311 assert(src
->f
[3] != 0.0f
);
313 dst
->f
[0] = 1.0f
/ src
->f
[0];
314 dst
->f
[1] = 1.0f
/ src
->f
[1];
315 dst
->f
[2] = 1.0f
/ src
->f
[2];
316 dst
->f
[3] = 1.0f
/ src
->f
[3];
320 micro_rnd(union tgsi_exec_channel
*dst
,
321 const union tgsi_exec_channel
*src
)
323 dst
->f
[0] = floorf(src
->f
[0] + 0.5f
);
324 dst
->f
[1] = floorf(src
->f
[1] + 0.5f
);
325 dst
->f
[2] = floorf(src
->f
[2] + 0.5f
);
326 dst
->f
[3] = floorf(src
->f
[3] + 0.5f
);
330 micro_rsq(union tgsi_exec_channel
*dst
,
331 const union tgsi_exec_channel
*src
)
333 #if 0 /* for debugging */
334 assert(src
->f
[0] != 0.0f
);
335 assert(src
->f
[1] != 0.0f
);
336 assert(src
->f
[2] != 0.0f
);
337 assert(src
->f
[3] != 0.0f
);
339 dst
->f
[0] = 1.0f
/ sqrtf(fabsf(src
->f
[0]));
340 dst
->f
[1] = 1.0f
/ sqrtf(fabsf(src
->f
[1]));
341 dst
->f
[2] = 1.0f
/ sqrtf(fabsf(src
->f
[2]));
342 dst
->f
[3] = 1.0f
/ sqrtf(fabsf(src
->f
[3]));
346 micro_seq(union tgsi_exec_channel
*dst
,
347 const union tgsi_exec_channel
*src0
,
348 const union tgsi_exec_channel
*src1
)
350 dst
->f
[0] = src0
->f
[0] == src1
->f
[0] ? 1.0f
: 0.0f
;
351 dst
->f
[1] = src0
->f
[1] == src1
->f
[1] ? 1.0f
: 0.0f
;
352 dst
->f
[2] = src0
->f
[2] == src1
->f
[2] ? 1.0f
: 0.0f
;
353 dst
->f
[3] = src0
->f
[3] == src1
->f
[3] ? 1.0f
: 0.0f
;
357 micro_sge(union tgsi_exec_channel
*dst
,
358 const union tgsi_exec_channel
*src0
,
359 const union tgsi_exec_channel
*src1
)
361 dst
->f
[0] = src0
->f
[0] >= src1
->f
[0] ? 1.0f
: 0.0f
;
362 dst
->f
[1] = src0
->f
[1] >= src1
->f
[1] ? 1.0f
: 0.0f
;
363 dst
->f
[2] = src0
->f
[2] >= src1
->f
[2] ? 1.0f
: 0.0f
;
364 dst
->f
[3] = src0
->f
[3] >= src1
->f
[3] ? 1.0f
: 0.0f
;
368 micro_sgn(union tgsi_exec_channel
*dst
,
369 const union tgsi_exec_channel
*src
)
371 dst
->f
[0] = src
->f
[0] < 0.0f
? -1.0f
: src
->f
[0] > 0.0f
? 1.0f
: 0.0f
;
372 dst
->f
[1] = src
->f
[1] < 0.0f
? -1.0f
: src
->f
[1] > 0.0f
? 1.0f
: 0.0f
;
373 dst
->f
[2] = src
->f
[2] < 0.0f
? -1.0f
: src
->f
[2] > 0.0f
? 1.0f
: 0.0f
;
374 dst
->f
[3] = src
->f
[3] < 0.0f
? -1.0f
: src
->f
[3] > 0.0f
? 1.0f
: 0.0f
;
378 micro_isgn(union tgsi_exec_channel
*dst
,
379 const union tgsi_exec_channel
*src
)
381 dst
->i
[0] = src
->i
[0] < 0 ? -1 : src
->i
[0] > 0 ? 1 : 0;
382 dst
->i
[1] = src
->i
[1] < 0 ? -1 : src
->i
[1] > 0 ? 1 : 0;
383 dst
->i
[2] = src
->i
[2] < 0 ? -1 : src
->i
[2] > 0 ? 1 : 0;
384 dst
->i
[3] = src
->i
[3] < 0 ? -1 : src
->i
[3] > 0 ? 1 : 0;
388 micro_sgt(union tgsi_exec_channel
*dst
,
389 const union tgsi_exec_channel
*src0
,
390 const union tgsi_exec_channel
*src1
)
392 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? 1.0f
: 0.0f
;
393 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? 1.0f
: 0.0f
;
394 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? 1.0f
: 0.0f
;
395 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? 1.0f
: 0.0f
;
399 micro_sin(union tgsi_exec_channel
*dst
,
400 const union tgsi_exec_channel
*src
)
402 dst
->f
[0] = sinf(src
->f
[0]);
403 dst
->f
[1] = sinf(src
->f
[1]);
404 dst
->f
[2] = sinf(src
->f
[2]);
405 dst
->f
[3] = sinf(src
->f
[3]);
409 micro_sle(union tgsi_exec_channel
*dst
,
410 const union tgsi_exec_channel
*src0
,
411 const union tgsi_exec_channel
*src1
)
413 dst
->f
[0] = src0
->f
[0] <= src1
->f
[0] ? 1.0f
: 0.0f
;
414 dst
->f
[1] = src0
->f
[1] <= src1
->f
[1] ? 1.0f
: 0.0f
;
415 dst
->f
[2] = src0
->f
[2] <= src1
->f
[2] ? 1.0f
: 0.0f
;
416 dst
->f
[3] = src0
->f
[3] <= src1
->f
[3] ? 1.0f
: 0.0f
;
420 micro_slt(union tgsi_exec_channel
*dst
,
421 const union tgsi_exec_channel
*src0
,
422 const union tgsi_exec_channel
*src1
)
424 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? 1.0f
: 0.0f
;
425 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? 1.0f
: 0.0f
;
426 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? 1.0f
: 0.0f
;
427 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? 1.0f
: 0.0f
;
431 micro_sne(union tgsi_exec_channel
*dst
,
432 const union tgsi_exec_channel
*src0
,
433 const union tgsi_exec_channel
*src1
)
435 dst
->f
[0] = src0
->f
[0] != src1
->f
[0] ? 1.0f
: 0.0f
;
436 dst
->f
[1] = src0
->f
[1] != src1
->f
[1] ? 1.0f
: 0.0f
;
437 dst
->f
[2] = src0
->f
[2] != src1
->f
[2] ? 1.0f
: 0.0f
;
438 dst
->f
[3] = src0
->f
[3] != src1
->f
[3] ? 1.0f
: 0.0f
;
442 micro_sfl(union tgsi_exec_channel
*dst
)
451 micro_str(union tgsi_exec_channel
*dst
)
460 micro_trunc(union tgsi_exec_channel
*dst
,
461 const union tgsi_exec_channel
*src
)
463 dst
->f
[0] = (float)(int)src
->f
[0];
464 dst
->f
[1] = (float)(int)src
->f
[1];
465 dst
->f
[2] = (float)(int)src
->f
[2];
466 dst
->f
[3] = (float)(int)src
->f
[3];
470 enum tgsi_exec_datatype
{
471 TGSI_EXEC_DATA_FLOAT
,
477 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
479 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
480 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
481 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
482 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
483 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
484 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
487 /** The execution mask depends on the conditional mask and the loop mask */
488 #define UPDATE_EXEC_MASK(MACH) \
489 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
492 static const union tgsi_exec_channel ZeroVec
=
493 { { 0.0, 0.0, 0.0, 0.0 } };
495 static const union tgsi_exec_channel OneVec
= {
496 {1.0f
, 1.0f
, 1.0f
, 1.0f
}
499 static const union tgsi_exec_channel P128Vec
= {
500 {128.0f
, 128.0f
, 128.0f
, 128.0f
}
503 static const union tgsi_exec_channel M128Vec
= {
504 {-128.0f
, -128.0f
, -128.0f
, -128.0f
}
509 * Assert that none of the float values in 'chan' are infinite or NaN.
510 * NaN and Inf may occur normally during program execution and should
511 * not lead to crashes, etc. But when debugging, it's helpful to catch
515 check_inf_or_nan(const union tgsi_exec_channel
*chan
)
517 assert(!util_is_inf_or_nan((chan
)->f
[0]));
518 assert(!util_is_inf_or_nan((chan
)->f
[1]));
519 assert(!util_is_inf_or_nan((chan
)->f
[2]));
520 assert(!util_is_inf_or_nan((chan
)->f
[3]));
526 print_chan(const char *msg
, const union tgsi_exec_channel
*chan
)
528 debug_printf("%s = {%f, %f, %f, %f}\n",
529 msg
, chan
->f
[0], chan
->f
[1], chan
->f
[2], chan
->f
[3]);
536 print_temp(const struct tgsi_exec_machine
*mach
, uint index
)
538 const struct tgsi_exec_vector
*tmp
= &mach
->Temps
[index
];
540 debug_printf("Temp[%u] =\n", index
);
541 for (i
= 0; i
< 4; i
++) {
542 debug_printf(" %c: { %f, %f, %f, %f }\n",
554 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine
*mach
,
557 const unsigned *buf_sizes
)
561 for (i
= 0; i
< num_bufs
; i
++) {
562 mach
->Consts
[i
] = bufs
[i
];
563 mach
->ConstsSize
[i
] = buf_sizes
[i
];
569 * Check if there's a potential src/dst register data dependency when
570 * using SOA execution.
573 * This would expand into:
578 * The second instruction will have the wrong value for t0 if executed as-is.
581 tgsi_check_soa_dependencies(const struct tgsi_full_instruction
*inst
)
585 uint writemask
= inst
->Dst
[0].Register
.WriteMask
;
586 if (writemask
== TGSI_WRITEMASK_X
||
587 writemask
== TGSI_WRITEMASK_Y
||
588 writemask
== TGSI_WRITEMASK_Z
||
589 writemask
== TGSI_WRITEMASK_W
||
590 writemask
== TGSI_WRITEMASK_NONE
) {
591 /* no chance of data dependency */
595 /* loop over src regs */
596 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
597 if ((inst
->Src
[i
].Register
.File
==
598 inst
->Dst
[0].Register
.File
) &&
599 ((inst
->Src
[i
].Register
.Index
==
600 inst
->Dst
[0].Register
.Index
) ||
601 inst
->Src
[i
].Register
.Indirect
||
602 inst
->Dst
[0].Register
.Indirect
)) {
603 /* loop over dest channels */
604 uint channelsWritten
= 0x0;
605 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
606 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
607 /* check if we're reading a channel that's been written */
608 uint swizzle
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[i
], chan
);
609 if (channelsWritten
& (1 << swizzle
)) {
613 channelsWritten
|= (1 << chan
);
623 * Initialize machine state by expanding tokens to full instructions,
624 * allocating temporary storage, setting up constants, etc.
625 * After this, we can call tgsi_exec_machine_run() many times.
628 tgsi_exec_machine_bind_shader(
629 struct tgsi_exec_machine
*mach
,
630 const struct tgsi_token
*tokens
,
632 struct tgsi_sampler
**samplers
)
635 struct tgsi_parse_context parse
;
636 struct tgsi_full_instruction
*instructions
;
637 struct tgsi_full_declaration
*declarations
;
638 uint maxInstructions
= 10, numInstructions
= 0;
639 uint maxDeclarations
= 10, numDeclarations
= 0;
642 tgsi_dump(tokens
, 0);
651 mach
->Tokens
= tokens
;
652 mach
->Samplers
= samplers
;
655 /* unbind and free all */
656 if (mach
->Declarations
) {
657 FREE( mach
->Declarations
);
659 mach
->Declarations
= NULL
;
660 mach
->NumDeclarations
= 0;
662 if (mach
->Instructions
) {
663 FREE( mach
->Instructions
);
665 mach
->Instructions
= NULL
;
666 mach
->NumInstructions
= 0;
671 k
= tgsi_parse_init (&parse
, mach
->Tokens
);
672 if (k
!= TGSI_PARSE_OK
) {
673 debug_printf( "Problem parsing!\n" );
677 mach
->Processor
= parse
.FullHeader
.Processor
.Processor
;
680 if (mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
&&
681 !mach
->UsedGeometryShader
) {
682 struct tgsi_exec_vector
*inputs
;
683 struct tgsi_exec_vector
*outputs
;
685 inputs
= align_malloc(sizeof(struct tgsi_exec_vector
) *
686 TGSI_MAX_PRIM_VERTICES
* PIPE_MAX_ATTRIBS
,
692 outputs
= align_malloc(sizeof(struct tgsi_exec_vector
) *
693 TGSI_MAX_TOTAL_VERTICES
, 16);
700 align_free(mach
->Inputs
);
701 align_free(mach
->Outputs
);
703 mach
->Inputs
= inputs
;
704 mach
->Outputs
= outputs
;
705 mach
->UsedGeometryShader
= TRUE
;
708 declarations
= (struct tgsi_full_declaration
*)
709 MALLOC( maxDeclarations
* sizeof(struct tgsi_full_declaration
) );
715 instructions
= (struct tgsi_full_instruction
*)
716 MALLOC( maxInstructions
* sizeof(struct tgsi_full_instruction
) );
719 FREE( declarations
);
723 while( !tgsi_parse_end_of_tokens( &parse
) ) {
726 tgsi_parse_token( &parse
);
727 switch( parse
.FullToken
.Token
.Type
) {
728 case TGSI_TOKEN_TYPE_DECLARATION
:
729 /* save expanded declaration */
730 if (numDeclarations
== maxDeclarations
) {
731 declarations
= REALLOC(declarations
,
733 * sizeof(struct tgsi_full_declaration
),
734 (maxDeclarations
+ 10)
735 * sizeof(struct tgsi_full_declaration
));
736 maxDeclarations
+= 10;
738 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
== TGSI_FILE_OUTPUT
) {
740 for (reg
= parse
.FullToken
.FullDeclaration
.Range
.First
;
741 reg
<= parse
.FullToken
.FullDeclaration
.Range
.Last
;
746 if (parse
.FullToken
.FullDeclaration
.Declaration
.File
==
747 TGSI_FILE_IMMEDIATE_ARRAY
) {
749 struct tgsi_full_declaration
*decl
=
750 &parse
.FullToken
.FullDeclaration
;
751 debug_assert(decl
->Range
.Last
< TGSI_EXEC_NUM_IMMEDIATES
);
752 for (reg
= decl
->Range
.First
; reg
<= decl
->Range
.Last
; ++reg
) {
753 for( i
= 0; i
< 4; i
++ ) {
754 int idx
= reg
* 4 + i
;
755 mach
->ImmArray
[reg
][i
] = decl
->ImmediateData
.u
[idx
].Float
;
759 memcpy(declarations
+ numDeclarations
,
760 &parse
.FullToken
.FullDeclaration
,
761 sizeof(declarations
[0]));
765 case TGSI_TOKEN_TYPE_IMMEDIATE
:
767 uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
769 assert( mach
->ImmLimit
+ 1 <= TGSI_EXEC_NUM_IMMEDIATES
);
771 for( i
= 0; i
< size
; i
++ ) {
772 mach
->Imms
[mach
->ImmLimit
][i
] =
773 parse
.FullToken
.FullImmediate
.u
[i
].Float
;
779 case TGSI_TOKEN_TYPE_INSTRUCTION
:
781 /* save expanded instruction */
782 if (numInstructions
== maxInstructions
) {
783 instructions
= REALLOC(instructions
,
785 * sizeof(struct tgsi_full_instruction
),
786 (maxInstructions
+ 10)
787 * sizeof(struct tgsi_full_instruction
));
788 maxInstructions
+= 10;
791 memcpy(instructions
+ numInstructions
,
792 &parse
.FullToken
.FullInstruction
,
793 sizeof(instructions
[0]));
798 case TGSI_TOKEN_TYPE_PROPERTY
:
805 tgsi_parse_free (&parse
);
807 if (mach
->Declarations
) {
808 FREE( mach
->Declarations
);
810 mach
->Declarations
= declarations
;
811 mach
->NumDeclarations
= numDeclarations
;
813 if (mach
->Instructions
) {
814 FREE( mach
->Instructions
);
816 mach
->Instructions
= instructions
;
817 mach
->NumInstructions
= numInstructions
;
821 struct tgsi_exec_machine
*
822 tgsi_exec_machine_create( void )
824 struct tgsi_exec_machine
*mach
;
827 mach
= align_malloc( sizeof *mach
, 16 );
831 memset(mach
, 0, sizeof(*mach
));
833 mach
->Addrs
= &mach
->Temps
[TGSI_EXEC_TEMP_ADDR
];
834 mach
->MaxGeometryShaderOutputs
= TGSI_MAX_TOTAL_VERTICES
;
835 mach
->Predicates
= &mach
->Temps
[TGSI_EXEC_TEMP_P0
];
837 mach
->Inputs
= align_malloc(sizeof(struct tgsi_exec_vector
) * PIPE_MAX_ATTRIBS
, 16);
838 mach
->Outputs
= align_malloc(sizeof(struct tgsi_exec_vector
) * PIPE_MAX_ATTRIBS
, 16);
839 if (!mach
->Inputs
|| !mach
->Outputs
)
842 /* Setup constants needed by the SSE2 executor. */
843 for( i
= 0; i
< 4; i
++ ) {
844 mach
->Temps
[TGSI_EXEC_TEMP_00000000_I
].xyzw
[TGSI_EXEC_TEMP_00000000_C
].u
[i
] = 0x00000000;
845 mach
->Temps
[TGSI_EXEC_TEMP_7FFFFFFF_I
].xyzw
[TGSI_EXEC_TEMP_7FFFFFFF_C
].u
[i
] = 0x7FFFFFFF;
846 mach
->Temps
[TGSI_EXEC_TEMP_80000000_I
].xyzw
[TGSI_EXEC_TEMP_80000000_C
].u
[i
] = 0x80000000;
847 mach
->Temps
[TGSI_EXEC_TEMP_FFFFFFFF_I
].xyzw
[TGSI_EXEC_TEMP_FFFFFFFF_C
].u
[i
] = 0xFFFFFFFF; /* not used */
848 mach
->Temps
[TGSI_EXEC_TEMP_ONE_I
].xyzw
[TGSI_EXEC_TEMP_ONE_C
].f
[i
] = 1.0f
;
849 mach
->Temps
[TGSI_EXEC_TEMP_TWO_I
].xyzw
[TGSI_EXEC_TEMP_TWO_C
].f
[i
] = 2.0f
; /* not used */
850 mach
->Temps
[TGSI_EXEC_TEMP_128_I
].xyzw
[TGSI_EXEC_TEMP_128_C
].f
[i
] = 128.0f
;
851 mach
->Temps
[TGSI_EXEC_TEMP_MINUS_128_I
].xyzw
[TGSI_EXEC_TEMP_MINUS_128_C
].f
[i
] = -128.0f
;
852 mach
->Temps
[TGSI_EXEC_TEMP_THREE_I
].xyzw
[TGSI_EXEC_TEMP_THREE_C
].f
[i
] = 3.0f
;
853 mach
->Temps
[TGSI_EXEC_TEMP_HALF_I
].xyzw
[TGSI_EXEC_TEMP_HALF_C
].f
[i
] = 0.5f
;
857 /* silence warnings */
866 align_free(mach
->Inputs
);
867 align_free(mach
->Outputs
);
875 tgsi_exec_machine_destroy(struct tgsi_exec_machine
*mach
)
878 if (mach
->Instructions
)
879 FREE(mach
->Instructions
);
880 if (mach
->Declarations
)
881 FREE(mach
->Declarations
);
883 align_free(mach
->Inputs
);
884 align_free(mach
->Outputs
);
891 micro_add(union tgsi_exec_channel
*dst
,
892 const union tgsi_exec_channel
*src0
,
893 const union tgsi_exec_channel
*src1
)
895 dst
->f
[0] = src0
->f
[0] + src1
->f
[0];
896 dst
->f
[1] = src0
->f
[1] + src1
->f
[1];
897 dst
->f
[2] = src0
->f
[2] + src1
->f
[2];
898 dst
->f
[3] = src0
->f
[3] + src1
->f
[3];
903 union tgsi_exec_channel
*dst
,
904 const union tgsi_exec_channel
*src0
,
905 const union tgsi_exec_channel
*src1
)
907 if (src1
->f
[0] != 0) {
908 dst
->f
[0] = src0
->f
[0] / src1
->f
[0];
910 if (src1
->f
[1] != 0) {
911 dst
->f
[1] = src0
->f
[1] / src1
->f
[1];
913 if (src1
->f
[2] != 0) {
914 dst
->f
[2] = src0
->f
[2] / src1
->f
[2];
916 if (src1
->f
[3] != 0) {
917 dst
->f
[3] = src0
->f
[3] / src1
->f
[3];
922 micro_rcc(union tgsi_exec_channel
*dst
,
923 const union tgsi_exec_channel
*src
)
927 for (i
= 0; i
< 4; i
++) {
928 float recip
= 1.0f
/ src
->f
[i
];
931 if (recip
> 1.884467e+019f
) {
932 dst
->f
[i
] = 1.884467e+019f
;
934 else if (recip
< 5.42101e-020f
) {
935 dst
->f
[i
] = 5.42101e-020f
;
942 if (recip
< -1.884467e+019f
) {
943 dst
->f
[i
] = -1.884467e+019f
;
945 else if (recip
> -5.42101e-020f
) {
946 dst
->f
[i
] = -5.42101e-020f
;
957 union tgsi_exec_channel
*dst
,
958 const union tgsi_exec_channel
*src0
,
959 const union tgsi_exec_channel
*src1
,
960 const union tgsi_exec_channel
*src2
,
961 const union tgsi_exec_channel
*src3
)
963 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src2
->f
[0] : src3
->f
[0];
964 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src2
->f
[1] : src3
->f
[1];
965 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src2
->f
[2] : src3
->f
[2];
966 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src2
->f
[3] : src3
->f
[3];
970 micro_max(union tgsi_exec_channel
*dst
,
971 const union tgsi_exec_channel
*src0
,
972 const union tgsi_exec_channel
*src1
)
974 dst
->f
[0] = src0
->f
[0] > src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
975 dst
->f
[1] = src0
->f
[1] > src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
976 dst
->f
[2] = src0
->f
[2] > src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
977 dst
->f
[3] = src0
->f
[3] > src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
981 micro_min(union tgsi_exec_channel
*dst
,
982 const union tgsi_exec_channel
*src0
,
983 const union tgsi_exec_channel
*src1
)
985 dst
->f
[0] = src0
->f
[0] < src1
->f
[0] ? src0
->f
[0] : src1
->f
[0];
986 dst
->f
[1] = src0
->f
[1] < src1
->f
[1] ? src0
->f
[1] : src1
->f
[1];
987 dst
->f
[2] = src0
->f
[2] < src1
->f
[2] ? src0
->f
[2] : src1
->f
[2];
988 dst
->f
[3] = src0
->f
[3] < src1
->f
[3] ? src0
->f
[3] : src1
->f
[3];
992 micro_mul(union tgsi_exec_channel
*dst
,
993 const union tgsi_exec_channel
*src0
,
994 const union tgsi_exec_channel
*src1
)
996 dst
->f
[0] = src0
->f
[0] * src1
->f
[0];
997 dst
->f
[1] = src0
->f
[1] * src1
->f
[1];
998 dst
->f
[2] = src0
->f
[2] * src1
->f
[2];
999 dst
->f
[3] = src0
->f
[3] * src1
->f
[3];
1004 union tgsi_exec_channel
*dst
,
1005 const union tgsi_exec_channel
*src
)
1007 dst
->f
[0] = -src
->f
[0];
1008 dst
->f
[1] = -src
->f
[1];
1009 dst
->f
[2] = -src
->f
[2];
1010 dst
->f
[3] = -src
->f
[3];
1015 union tgsi_exec_channel
*dst
,
1016 const union tgsi_exec_channel
*src0
,
1017 const union tgsi_exec_channel
*src1
)
1020 dst
->f
[0] = util_fast_pow( src0
->f
[0], src1
->f
[0] );
1021 dst
->f
[1] = util_fast_pow( src0
->f
[1], src1
->f
[1] );
1022 dst
->f
[2] = util_fast_pow( src0
->f
[2], src1
->f
[2] );
1023 dst
->f
[3] = util_fast_pow( src0
->f
[3], src1
->f
[3] );
1025 dst
->f
[0] = powf( src0
->f
[0], src1
->f
[0] );
1026 dst
->f
[1] = powf( src0
->f
[1], src1
->f
[1] );
1027 dst
->f
[2] = powf( src0
->f
[2], src1
->f
[2] );
1028 dst
->f
[3] = powf( src0
->f
[3], src1
->f
[3] );
1033 micro_sub(union tgsi_exec_channel
*dst
,
1034 const union tgsi_exec_channel
*src0
,
1035 const union tgsi_exec_channel
*src1
)
1037 dst
->f
[0] = src0
->f
[0] - src1
->f
[0];
1038 dst
->f
[1] = src0
->f
[1] - src1
->f
[1];
1039 dst
->f
[2] = src0
->f
[2] - src1
->f
[2];
1040 dst
->f
[3] = src0
->f
[3] - src1
->f
[3];
1044 fetch_src_file_channel(const struct tgsi_exec_machine
*mach
,
1045 const uint chan_index
,
1048 const union tgsi_exec_channel
*index
,
1049 const union tgsi_exec_channel
*index2D
,
1050 union tgsi_exec_channel
*chan
)
1054 assert(swizzle
< 4);
1057 case TGSI_FILE_CONSTANT
:
1058 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1059 assert(index2D
->i
[i
] >= 0 && index2D
->i
[i
] < PIPE_MAX_CONSTANT_BUFFERS
);
1060 assert(mach
->Consts
[index2D
->i
[i
]]);
1062 if (index
->i
[i
] < 0) {
1065 /* NOTE: copying the const value as a uint instead of float */
1066 const uint constbuf
= index2D
->i
[i
];
1067 const uint
*buf
= (const uint
*)mach
->Consts
[constbuf
];
1068 const int pos
= index
->i
[i
] * 4 + swizzle
;
1069 /* const buffer bounds check */
1070 if (pos
< 0 || pos
>= mach
->ConstsSize
[constbuf
]) {
1072 /* Debug: print warning */
1073 static int count
= 0;
1075 debug_printf("TGSI Exec: const buffer index %d"
1076 " out of bounds\n", pos
);
1081 chan
->u
[i
] = buf
[pos
];
1086 case TGSI_FILE_INPUT
:
1087 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1089 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1090 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1091 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1092 index2D->i[i], index->i[i]);
1094 int pos
= index2D
->i
[i
] * TGSI_EXEC_MAX_INPUT_ATTRIBS
+ index
->i
[i
];
1096 assert(pos
< TGSI_MAX_PRIM_VERTICES
* PIPE_MAX_ATTRIBS
);
1097 chan
->u
[i
] = mach
->Inputs
[pos
].xyzw
[swizzle
].u
[i
];
1101 case TGSI_FILE_SYSTEM_VALUE
:
1102 /* XXX no swizzling at this point. Will be needed if we put
1103 * gl_FragCoord, for example, in a sys value register.
1105 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1106 chan
->u
[i
] = mach
->SystemValue
[index
->i
[i
]].u
[i
];
1110 case TGSI_FILE_TEMPORARY
:
1111 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1112 assert(index
->i
[i
] < TGSI_EXEC_NUM_TEMPS
);
1113 assert(index2D
->i
[i
] == 0);
1115 chan
->u
[i
] = mach
->Temps
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1119 case TGSI_FILE_TEMPORARY_ARRAY
:
1120 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1121 assert(index
->i
[i
] < TGSI_EXEC_NUM_TEMPS
);
1122 assert(index2D
->i
[i
] < TGSI_EXEC_NUM_TEMP_ARRAYS
);
1125 mach
->TempArray
[index2D
->i
[i
]][index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1129 case TGSI_FILE_IMMEDIATE
:
1130 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1131 assert(index
->i
[i
] >= 0 && index
->i
[i
] < (int)mach
->ImmLimit
);
1132 assert(index2D
->i
[i
] == 0);
1134 chan
->f
[i
] = mach
->Imms
[index
->i
[i
]][swizzle
];
1138 case TGSI_FILE_IMMEDIATE_ARRAY
:
1139 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1140 assert(index2D
->i
[i
] == 0);
1142 chan
->f
[i
] = mach
->ImmArray
[index
->i
[i
]][swizzle
];
1146 case TGSI_FILE_ADDRESS
:
1147 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1148 assert(index
->i
[i
] >= 0);
1149 assert(index2D
->i
[i
] == 0);
1151 chan
->u
[i
] = mach
->Addrs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1155 case TGSI_FILE_PREDICATE
:
1156 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1157 assert(index
->i
[i
] >= 0 && index
->i
[i
] < TGSI_EXEC_NUM_PREDS
);
1158 assert(index2D
->i
[i
] == 0);
1160 chan
->u
[i
] = mach
->Predicates
[0].xyzw
[swizzle
].u
[i
];
1164 case TGSI_FILE_OUTPUT
:
1165 /* vertex/fragment output vars can be read too */
1166 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1167 assert(index
->i
[i
] >= 0);
1168 assert(index2D
->i
[i
] == 0);
1170 chan
->u
[i
] = mach
->Outputs
[index
->i
[i
]].xyzw
[swizzle
].u
[i
];
1176 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1183 fetch_source(const struct tgsi_exec_machine
*mach
,
1184 union tgsi_exec_channel
*chan
,
1185 const struct tgsi_full_src_register
*reg
,
1186 const uint chan_index
,
1187 enum tgsi_exec_datatype src_datatype
)
1189 union tgsi_exec_channel index
;
1190 union tgsi_exec_channel index2D
;
1193 /* We start with a direct index into a register file.
1197 * file = Register.File
1198 * [1] = Register.Index
1203 index
.i
[3] = reg
->Register
.Index
;
1205 /* There is an extra source register that indirectly subscripts
1206 * a register file. The direct index now becomes an offset
1207 * that is being added to the indirect register.
1211 * ind = Indirect.File
1212 * [2] = Indirect.Index
1213 * .x = Indirect.SwizzleX
1215 if (reg
->Register
.Indirect
) {
1216 union tgsi_exec_channel index2
;
1217 union tgsi_exec_channel indir_index
;
1218 const uint execmask
= mach
->ExecMask
;
1221 /* which address register (always zero now) */
1225 index2
.i
[3] = reg
->Indirect
.Index
;
1226 assert(reg
->Indirect
.File
== TGSI_FILE_ADDRESS
);
1227 /* get current value of address register[swizzle] */
1228 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, TGSI_CHAN_X
);
1229 fetch_src_file_channel(mach
,
1237 /* add value of address register to the offset */
1238 index
.i
[0] += indir_index
.i
[0];
1239 index
.i
[1] += indir_index
.i
[1];
1240 index
.i
[2] += indir_index
.i
[2];
1241 index
.i
[3] += indir_index
.i
[3];
1243 /* for disabled execution channels, zero-out the index to
1244 * avoid using a potential garbage value.
1246 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1247 if ((execmask
& (1 << i
)) == 0)
1252 /* There is an extra source register that is a second
1253 * subscript to a register file. Effectively it means that
1254 * the register file is actually a 2D array of registers.
1258 * [3] = Dimension.Index
1260 if (reg
->Register
.Dimension
) {
1264 index2D
.i
[3] = reg
->Dimension
.Index
;
1266 /* Again, the second subscript index can be addressed indirectly
1267 * identically to the first one.
1268 * Nothing stops us from indirectly addressing the indirect register,
1269 * but there is no need for that, so we won't exercise it.
1271 * file[ind[4].y+3][1],
1273 * ind = DimIndirect.File
1274 * [4] = DimIndirect.Index
1275 * .y = DimIndirect.SwizzleX
1277 if (reg
->Dimension
.Indirect
) {
1278 union tgsi_exec_channel index2
;
1279 union tgsi_exec_channel indir_index
;
1280 const uint execmask
= mach
->ExecMask
;
1286 index2
.i
[3] = reg
->DimIndirect
.Index
;
1288 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, TGSI_CHAN_X
);
1289 fetch_src_file_channel(mach
,
1291 reg
->DimIndirect
.File
,
1297 index2D
.i
[0] += indir_index
.i
[0];
1298 index2D
.i
[1] += indir_index
.i
[1];
1299 index2D
.i
[2] += indir_index
.i
[2];
1300 index2D
.i
[3] += indir_index
.i
[3];
1302 /* for disabled execution channels, zero-out the index to
1303 * avoid using a potential garbage value.
1305 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1306 if ((execmask
& (1 << i
)) == 0) {
1312 /* If by any chance there was a need for a 3D array of register
1313 * files, we would have to check whether Dimension is followed
1314 * by a dimension register and continue the saga.
1323 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1324 fetch_src_file_channel(mach
,
1332 if (reg
->Register
.Absolute
) {
1333 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1334 micro_abs(chan
, chan
);
1336 micro_iabs(chan
, chan
);
1340 if (reg
->Register
.Negate
) {
1341 if (src_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1342 micro_neg(chan
, chan
);
1344 micro_ineg(chan
, chan
);
1350 store_dest(struct tgsi_exec_machine
*mach
,
1351 const union tgsi_exec_channel
*chan
,
1352 const struct tgsi_full_dst_register
*reg
,
1353 const struct tgsi_full_instruction
*inst
,
1355 enum tgsi_exec_datatype dst_datatype
)
1358 union tgsi_exec_channel null
;
1359 union tgsi_exec_channel
*dst
;
1360 union tgsi_exec_channel index2D
;
1361 uint execmask
= mach
->ExecMask
;
1362 int offset
= 0; /* indirection offset */
1366 if (0 && dst_datatype
== TGSI_EXEC_DATA_FLOAT
) {
1367 check_inf_or_nan(chan
);
1370 /* There is an extra source register that indirectly subscripts
1371 * a register file. The direct index now becomes an offset
1372 * that is being added to the indirect register.
1376 * ind = Indirect.File
1377 * [2] = Indirect.Index
1378 * .x = Indirect.SwizzleX
1380 if (reg
->Register
.Indirect
) {
1381 union tgsi_exec_channel index
;
1382 union tgsi_exec_channel indir_index
;
1385 /* which address register (always zero for now) */
1389 index
.i
[3] = reg
->Indirect
.Index
;
1391 /* get current value of address register[swizzle] */
1392 swizzle
= tgsi_util_get_src_register_swizzle( ®
->Indirect
, TGSI_CHAN_X
);
1394 /* fetch values from the address/indirection register */
1395 fetch_src_file_channel(mach
,
1403 /* save indirection offset */
1404 offset
= indir_index
.i
[0];
1407 /* There is an extra source register that is a second
1408 * subscript to a register file. Effectively it means that
1409 * the register file is actually a 2D array of registers.
1413 * [3] = Dimension.Index
1415 if (reg
->Register
.Dimension
) {
1419 index2D
.i
[3] = reg
->Dimension
.Index
;
1421 /* Again, the second subscript index can be addressed indirectly
1422 * identically to the first one.
1423 * Nothing stops us from indirectly addressing the indirect register,
1424 * but there is no need for that, so we won't exercise it.
1426 * file[ind[4].y+3][1],
1428 * ind = DimIndirect.File
1429 * [4] = DimIndirect.Index
1430 * .y = DimIndirect.SwizzleX
1432 if (reg
->Dimension
.Indirect
) {
1433 union tgsi_exec_channel index2
;
1434 union tgsi_exec_channel indir_index
;
1435 const uint execmask
= mach
->ExecMask
;
1442 index2
.i
[3] = reg
->DimIndirect
.Index
;
1444 swizzle
= tgsi_util_get_src_register_swizzle( ®
->DimIndirect
, TGSI_CHAN_X
);
1445 fetch_src_file_channel(mach
,
1447 reg
->DimIndirect
.File
,
1453 index2D
.i
[0] += indir_index
.i
[0];
1454 index2D
.i
[1] += indir_index
.i
[1];
1455 index2D
.i
[2] += indir_index
.i
[2];
1456 index2D
.i
[3] += indir_index
.i
[3];
1458 /* for disabled execution channels, zero-out the index to
1459 * avoid using a potential garbage value.
1461 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1462 if ((execmask
& (1 << i
)) == 0) {
1468 /* If by any chance there was a need for a 3D array of register
1469 * files, we would have to check whether Dimension is followed
1470 * by a dimension register and continue the saga.
1479 switch (reg
->Register
.File
) {
1480 case TGSI_FILE_NULL
:
1484 case TGSI_FILE_OUTPUT
:
1485 index
= mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0]
1486 + reg
->Register
.Index
;
1487 dst
= &mach
->Outputs
[offset
+ index
].xyzw
[chan_index
];
1489 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1490 fprintf(stderr
, "STORING OUT[%d] mask(%d), = (", offset
+ index
, execmask
);
1491 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1492 if (execmask
& (1 << i
))
1493 fprintf(stderr
, "%f, ", chan
->f
[i
]);
1494 fprintf(stderr
, ")\n");
1499 case TGSI_FILE_TEMPORARY
:
1500 index
= reg
->Register
.Index
;
1501 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1502 dst
= &mach
->Temps
[offset
+ index
].xyzw
[chan_index
];
1505 case TGSI_FILE_TEMPORARY_ARRAY
:
1506 index
= reg
->Register
.Index
;
1507 assert( index
< TGSI_EXEC_NUM_TEMPS
);
1508 assert( index2D
.i
[0] < TGSI_EXEC_NUM_TEMP_ARRAYS
);
1509 /* XXX we use index2D.i[0] here but somehow we might
1510 * end up with someone trying to store indirectly in
1511 * different buffers */
1512 dst
= &mach
->TempArray
[index2D
.i
[0]][offset
+ index
].xyzw
[chan_index
];
1515 case TGSI_FILE_ADDRESS
:
1516 index
= reg
->Register
.Index
;
1517 dst
= &mach
->Addrs
[index
].xyzw
[chan_index
];
1520 case TGSI_FILE_PREDICATE
:
1521 index
= reg
->Register
.Index
;
1522 assert(index
< TGSI_EXEC_NUM_PREDS
);
1523 dst
= &mach
->Predicates
[index
].xyzw
[chan_index
];
1531 if (inst
->Instruction
.Predicate
) {
1533 union tgsi_exec_channel
*pred
;
1535 switch (chan_index
) {
1537 swizzle
= inst
->Predicate
.SwizzleX
;
1540 swizzle
= inst
->Predicate
.SwizzleY
;
1543 swizzle
= inst
->Predicate
.SwizzleZ
;
1546 swizzle
= inst
->Predicate
.SwizzleW
;
1553 assert(inst
->Predicate
.Index
== 0);
1555 pred
= &mach
->Predicates
[inst
->Predicate
.Index
].xyzw
[swizzle
];
1557 if (inst
->Predicate
.Negate
) {
1558 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1560 execmask
&= ~(1 << i
);
1564 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
1566 execmask
&= ~(1 << i
);
1572 switch (inst
->Instruction
.Saturate
) {
1574 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1575 if (execmask
& (1 << i
))
1576 dst
->i
[i
] = chan
->i
[i
];
1579 case TGSI_SAT_ZERO_ONE
:
1580 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1581 if (execmask
& (1 << i
)) {
1582 if (chan
->f
[i
] < 0.0f
)
1584 else if (chan
->f
[i
] > 1.0f
)
1587 dst
->i
[i
] = chan
->i
[i
];
1591 case TGSI_SAT_MINUS_PLUS_ONE
:
1592 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++)
1593 if (execmask
& (1 << i
)) {
1594 if (chan
->f
[i
] < -1.0f
)
1596 else if (chan
->f
[i
] > 1.0f
)
1599 dst
->i
[i
] = chan
->i
[i
];
1608 #define FETCH(VAL,INDEX,CHAN)\
1609 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1611 #define IFETCH(VAL,INDEX,CHAN)\
1612 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1616 * Execute ARB-style KIL which is predicated by a src register.
1617 * Kill fragment if any of the four values is less than zero.
1620 exec_kil(struct tgsi_exec_machine
*mach
,
1621 const struct tgsi_full_instruction
*inst
)
1625 uint kilmask
= 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1626 union tgsi_exec_channel r
[1];
1628 /* This mask stores component bits that were already tested. */
1631 for (chan_index
= 0; chan_index
< 4; chan_index
++)
1636 /* unswizzle channel */
1637 swizzle
= tgsi_util_get_full_src_register_swizzle (
1641 /* check if the component has not been already tested */
1642 if (uniquemask
& (1 << swizzle
))
1644 uniquemask
|= 1 << swizzle
;
1646 FETCH(&r
[0], 0, chan_index
);
1647 for (i
= 0; i
< 4; i
++)
1648 if (r
[0].f
[i
] < 0.0f
)
1652 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1656 * Execute NVIDIA-style KIL which is predicated by a condition code.
1657 * Kill fragment if the condition code is TRUE.
1660 exec_kilp(struct tgsi_exec_machine
*mach
,
1661 const struct tgsi_full_instruction
*inst
)
1663 uint kilmask
; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1665 /* "unconditional" kil */
1666 kilmask
= mach
->ExecMask
;
1667 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] |= kilmask
;
1671 emit_vertex(struct tgsi_exec_machine
*mach
)
1673 /* FIXME: check for exec mask correctly
1675 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1676 if ((mach->ExecMask & (1 << i)))
1678 if (mach
->ExecMask
) {
1679 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] += mach
->NumOutputs
;
1680 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]]++;
1685 emit_primitive(struct tgsi_exec_machine
*mach
)
1687 unsigned *prim_count
= &mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0];
1688 /* FIXME: check for exec mask correctly
1690 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1691 if ((mach->ExecMask & (1 << i)))
1693 if (mach
->ExecMask
) {
1695 debug_assert((*prim_count
* mach
->NumOutputs
) < mach
->MaxGeometryShaderOutputs
);
1696 mach
->Primitives
[*prim_count
] = 0;
1701 conditional_emit_primitive(struct tgsi_exec_machine
*mach
)
1703 if (TGSI_PROCESSOR_GEOMETRY
== mach
->Processor
) {
1705 mach
->Primitives
[mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0]];
1706 if (emitted_verts
) {
1707 emit_primitive(mach
);
1714 * Fetch four texture samples using STR texture coordinates.
1717 fetch_texel( struct tgsi_sampler
*sampler
,
1718 const union tgsi_exec_channel
*s
,
1719 const union tgsi_exec_channel
*t
,
1720 const union tgsi_exec_channel
*p
,
1721 const union tgsi_exec_channel
*c0
,
1722 enum tgsi_sampler_control control
,
1723 union tgsi_exec_channel
*r
,
1724 union tgsi_exec_channel
*g
,
1725 union tgsi_exec_channel
*b
,
1726 union tgsi_exec_channel
*a
)
1729 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
1731 sampler
->get_samples(sampler
, s
->f
, t
->f
, p
->f
, c0
->f
, control
, rgba
);
1733 for (j
= 0; j
< 4; j
++) {
1734 r
->f
[j
] = rgba
[0][j
];
1735 g
->f
[j
] = rgba
[1][j
];
1736 b
->f
[j
] = rgba
[2][j
];
1737 a
->f
[j
] = rgba
[3][j
];
1742 #define TEX_MODIFIER_NONE 0
1743 #define TEX_MODIFIER_PROJECTED 1
1744 #define TEX_MODIFIER_LOD_BIAS 2
1745 #define TEX_MODIFIER_EXPLICIT_LOD 3
1749 exec_tex(struct tgsi_exec_machine
*mach
,
1750 const struct tgsi_full_instruction
*inst
,
1753 const uint unit
= inst
->Src
[1].Register
.Index
;
1754 union tgsi_exec_channel r
[4];
1755 const union tgsi_exec_channel
*lod
= &ZeroVec
;
1756 enum tgsi_sampler_control control
;
1759 if (modifier
!= TEX_MODIFIER_NONE
) {
1760 FETCH(&r
[3], 0, TGSI_CHAN_W
);
1761 if (modifier
!= TEX_MODIFIER_PROJECTED
) {
1766 if (modifier
== TEX_MODIFIER_EXPLICIT_LOD
) {
1767 control
= tgsi_sampler_lod_explicit
;
1769 control
= tgsi_sampler_lod_bias
;
1772 switch (inst
->Texture
.Texture
) {
1773 case TGSI_TEXTURE_1D
:
1774 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1776 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1777 micro_div(&r
[0], &r
[0], &r
[3]);
1780 fetch_texel(mach
->Samplers
[unit
],
1781 &r
[0], &ZeroVec
, &ZeroVec
, lod
, /* S, T, P, LOD */
1783 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1785 case TGSI_TEXTURE_SHADOW1D
:
1786 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1787 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1789 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1790 micro_div(&r
[0], &r
[0], &r
[3]);
1793 fetch_texel(mach
->Samplers
[unit
],
1794 &r
[0], &ZeroVec
, &r
[2], lod
, /* S, T, P, LOD */
1796 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1799 case TGSI_TEXTURE_2D
:
1800 case TGSI_TEXTURE_RECT
:
1801 case TGSI_TEXTURE_SHADOW2D
:
1802 case TGSI_TEXTURE_SHADOWRECT
:
1803 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1804 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1805 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1807 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1808 micro_div(&r
[0], &r
[0], &r
[3]);
1809 micro_div(&r
[1], &r
[1], &r
[3]);
1810 micro_div(&r
[2], &r
[2], &r
[3]);
1813 fetch_texel(mach
->Samplers
[unit
],
1814 &r
[0], &r
[1], &r
[2], lod
, /* S, T, P, LOD */
1816 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1819 case TGSI_TEXTURE_1D_ARRAY
:
1820 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1821 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1823 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1824 micro_div(&r
[0], &r
[0], &r
[3]);
1827 fetch_texel(mach
->Samplers
[unit
],
1828 &r
[0], &r
[1], &ZeroVec
, lod
, /* S, T, P, LOD */
1830 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1832 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1833 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1834 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1835 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1837 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1838 micro_div(&r
[0], &r
[0], &r
[3]);
1841 fetch_texel(mach
->Samplers
[unit
],
1842 &r
[0], &r
[1], &r
[2], lod
, /* S, T, P, LOD */
1844 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1847 case TGSI_TEXTURE_2D_ARRAY
:
1848 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1849 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1850 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1852 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1853 micro_div(&r
[0], &r
[0], &r
[3]);
1854 micro_div(&r
[1], &r
[1], &r
[3]);
1857 fetch_texel(mach
->Samplers
[unit
],
1858 &r
[0], &r
[1], &r
[2], lod
, /* S, T, P, LOD */
1860 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1862 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1863 case TGSI_TEXTURE_SHADOWCUBE
:
1864 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1865 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1866 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1867 FETCH(&r
[3], 0, TGSI_CHAN_W
);
1869 fetch_texel(mach
->Samplers
[unit
],
1870 &r
[0], &r
[1], &r
[2], &r
[3], /* S, T, P, LOD */
1872 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1874 case TGSI_TEXTURE_3D
:
1875 case TGSI_TEXTURE_CUBE
:
1876 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1877 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1878 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1880 if (modifier
== TEX_MODIFIER_PROJECTED
) {
1881 micro_div(&r
[0], &r
[0], &r
[3]);
1882 micro_div(&r
[1], &r
[1], &r
[3]);
1883 micro_div(&r
[2], &r
[2], &r
[3]);
1886 fetch_texel(mach
->Samplers
[unit
],
1887 &r
[0], &r
[1], &r
[2], lod
,
1889 &r
[0], &r
[1], &r
[2], &r
[3]);
1897 debug_printf("fetch r: %g %g %g %g\n",
1898 r
[0].f
[0], r
[0].f
[1], r
[0].f
[2], r
[0].f
[3]);
1899 debug_printf("fetch g: %g %g %g %g\n",
1900 r
[1].f
[0], r
[1].f
[1], r
[1].f
[2], r
[1].f
[3]);
1901 debug_printf("fetch b: %g %g %g %g\n",
1902 r
[2].f
[0], r
[2].f
[1], r
[2].f
[2], r
[2].f
[3]);
1903 debug_printf("fetch a: %g %g %g %g\n",
1904 r
[3].f
[0], r
[3].f
[1], r
[3].f
[2], r
[3].f
[3]);
1907 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1908 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1909 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
1915 exec_txd(struct tgsi_exec_machine
*mach
,
1916 const struct tgsi_full_instruction
*inst
)
1918 const uint unit
= inst
->Src
[3].Register
.Index
;
1919 union tgsi_exec_channel r
[4];
1923 * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
1926 switch (inst
->Texture
.Texture
) {
1927 case TGSI_TEXTURE_1D
:
1928 case TGSI_TEXTURE_SHADOW1D
:
1930 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1932 fetch_texel(mach
->Samplers
[unit
],
1933 &r
[0], &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, BIAS */
1934 tgsi_sampler_lod_bias
,
1935 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
1938 case TGSI_TEXTURE_1D_ARRAY
:
1939 case TGSI_TEXTURE_2D
:
1940 case TGSI_TEXTURE_RECT
:
1941 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1942 case TGSI_TEXTURE_SHADOW2D
:
1943 case TGSI_TEXTURE_SHADOWRECT
:
1945 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1946 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1947 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1949 fetch_texel(mach
->Samplers
[unit
],
1950 &r
[0], &r
[1], &r
[2], &ZeroVec
, /* inputs */
1951 tgsi_sampler_lod_bias
,
1952 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
1955 case TGSI_TEXTURE_2D_ARRAY
:
1956 case TGSI_TEXTURE_3D
:
1957 case TGSI_TEXTURE_CUBE
:
1959 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1960 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1961 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1963 fetch_texel(mach
->Samplers
[unit
],
1964 &r
[0], &r
[1], &r
[2], &ZeroVec
,
1965 tgsi_sampler_lod_bias
,
1966 &r
[0], &r
[1], &r
[2], &r
[3]);
1969 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1971 FETCH(&r
[0], 0, TGSI_CHAN_X
);
1972 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
1973 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
1974 FETCH(&r
[3], 0, TGSI_CHAN_W
);
1976 fetch_texel(mach
->Samplers
[unit
],
1977 &r
[0], &r
[1], &r
[2], &r
[3],
1978 tgsi_sampler_lod_bias
,
1979 &r
[0], &r
[1], &r
[2], &r
[3]);
1986 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1987 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
1988 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
1995 exec_txf(struct tgsi_exec_machine
*mach
,
1996 const struct tgsi_full_instruction
*inst
)
1998 struct tgsi_sampler
*sampler
;
1999 const uint unit
= inst
->Src
[2].Register
.Index
;
2000 union tgsi_exec_channel r
[4];
2001 union tgsi_exec_channel offset
[3];
2003 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
];
2007 if (inst
->Texture
.NumOffsets
== 1) {
2008 union tgsi_exec_channel index
;
2009 index
.i
[0] = index
.i
[1] = index
.i
[2] = index
.i
[3] = inst
->TexOffsets
[0].Index
;
2010 fetch_src_file_channel(mach
, 0, inst
->TexOffsets
[0].File
,
2011 inst
->TexOffsets
[0].SwizzleX
, &index
, &ZeroVec
, &offset
[0]);
2012 fetch_src_file_channel(mach
, 0, inst
->TexOffsets
[0].File
,
2013 inst
->TexOffsets
[0].SwizzleY
, &index
, &ZeroVec
, &offset
[1]);
2014 fetch_src_file_channel(mach
, 0, inst
->TexOffsets
[0].File
,
2015 inst
->TexOffsets
[0].SwizzleZ
, &index
, &ZeroVec
, &offset
[2]);
2016 offsets
[0] = offset
[0].i
[0];
2017 offsets
[1] = offset
[1].i
[0];
2018 offsets
[2] = offset
[2].i
[0];
2020 offsets
[0] = offsets
[1] = offsets
[2] = 0;
2022 IFETCH(&r
[3], 0, TGSI_CHAN_W
);
2024 switch(inst
->Texture
.Texture
) {
2025 case TGSI_TEXTURE_3D
:
2026 case TGSI_TEXTURE_2D_ARRAY
:
2027 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
2028 IFETCH(&r
[2], 0, TGSI_CHAN_Z
);
2030 case TGSI_TEXTURE_2D
:
2031 case TGSI_TEXTURE_RECT
:
2032 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
2033 case TGSI_TEXTURE_SHADOW2D
:
2034 case TGSI_TEXTURE_SHADOWRECT
:
2035 case TGSI_TEXTURE_1D_ARRAY
:
2036 IFETCH(&r
[1], 0, TGSI_CHAN_Y
);
2038 case TGSI_TEXTURE_1D
:
2039 case TGSI_TEXTURE_SHADOW1D
:
2040 IFETCH(&r
[0], 0, TGSI_CHAN_X
);
2047 sampler
= mach
->Samplers
[unit
];
2048 sampler
->get_texel(sampler
, r
[0].i
, r
[1].i
, r
[2].i
, r
[3].i
,
2051 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
2052 r
[0].f
[j
] = rgba
[0][j
];
2053 r
[1].f
[j
] = rgba
[1][j
];
2054 r
[2].f
[j
] = rgba
[2][j
];
2055 r
[3].f
[j
] = rgba
[3][j
];
2058 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2059 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2060 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2066 exec_txq(struct tgsi_exec_machine
*mach
,
2067 const struct tgsi_full_instruction
*inst
)
2069 struct tgsi_sampler
*sampler
;
2070 const uint unit
= inst
->Src
[1].Register
.Index
;
2072 union tgsi_exec_channel r
[4], src
;
2076 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_INT
);
2077 sampler
= mach
->Samplers
[unit
];
2079 sampler
->get_dims(sampler
, src
.i
[0], result
);
2081 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
2082 for (j
= 0; j
< 4; j
++) {
2083 r
[j
].i
[i
] = result
[j
];
2087 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2088 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2089 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
,
2090 TGSI_EXEC_DATA_INT
);
2096 exec_sample(struct tgsi_exec_machine
*mach
,
2097 const struct tgsi_full_instruction
*inst
,
2100 const uint resource_unit
= inst
->Src
[1].Register
.Index
;
2101 const uint sampler_unit
= inst
->Src
[2].Register
.Index
;
2102 union tgsi_exec_channel r
[4];
2103 const union tgsi_exec_channel
*lod
= &ZeroVec
;
2104 enum tgsi_sampler_control control
;
2107 if (modifier
!= TEX_MODIFIER_NONE
) {
2108 if (modifier
== TEX_MODIFIER_LOD_BIAS
)
2109 FETCH(&r
[3], 3, TGSI_CHAN_X
);
2110 else /*TEX_MODIFIER_LOD*/
2111 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2113 if (modifier
!= TEX_MODIFIER_PROJECTED
) {
2118 if (modifier
== TEX_MODIFIER_EXPLICIT_LOD
) {
2119 control
= tgsi_sampler_lod_explicit
;
2121 control
= tgsi_sampler_lod_bias
;
2124 switch (mach
->SamplerViews
[resource_unit
].Resource
) {
2125 case TGSI_TEXTURE_1D
:
2126 case TGSI_TEXTURE_SHADOW1D
:
2127 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2129 if (modifier
== TEX_MODIFIER_PROJECTED
) {
2130 micro_div(&r
[0], &r
[0], &r
[3]);
2133 fetch_texel(mach
->Samplers
[sampler_unit
],
2134 &r
[0], &ZeroVec
, &ZeroVec
, lod
, /* S, T, P, LOD */
2136 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2139 case TGSI_TEXTURE_1D_ARRAY
:
2140 case TGSI_TEXTURE_2D
:
2141 case TGSI_TEXTURE_RECT
:
2142 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
2143 case TGSI_TEXTURE_SHADOW2D
:
2144 case TGSI_TEXTURE_SHADOWRECT
:
2145 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2146 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2147 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2149 if (modifier
== TEX_MODIFIER_PROJECTED
) {
2150 micro_div(&r
[0], &r
[0], &r
[3]);
2151 micro_div(&r
[1], &r
[1], &r
[3]);
2152 micro_div(&r
[2], &r
[2], &r
[3]);
2155 fetch_texel(mach
->Samplers
[sampler_unit
],
2156 &r
[0], &r
[1], &r
[2], lod
, /* S, T, P, LOD */
2158 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2161 case TGSI_TEXTURE_2D_ARRAY
:
2162 case TGSI_TEXTURE_3D
:
2163 case TGSI_TEXTURE_CUBE
:
2164 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2165 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2166 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2168 if (modifier
== TEX_MODIFIER_PROJECTED
) {
2169 micro_div(&r
[0], &r
[0], &r
[3]);
2170 micro_div(&r
[1], &r
[1], &r
[3]);
2171 micro_div(&r
[2], &r
[2], &r
[3]);
2174 fetch_texel(mach
->Samplers
[sampler_unit
],
2175 &r
[0], &r
[1], &r
[2], lod
,
2177 &r
[0], &r
[1], &r
[2], &r
[3]);
2180 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
2181 case TGSI_TEXTURE_SHADOWCUBE
:
2182 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2183 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2184 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2185 FETCH(&r
[3], 0, TGSI_CHAN_W
);
2187 assert(modifier
!= TEX_MODIFIER_PROJECTED
);
2189 fetch_texel(mach
->Samplers
[sampler_unit
],
2190 &r
[0], &r
[1], &r
[2], &r
[3],
2192 &r
[0], &r
[1], &r
[2], &r
[3]);
2199 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2200 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2201 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2207 exec_sample_d(struct tgsi_exec_machine
*mach
,
2208 const struct tgsi_full_instruction
*inst
)
2210 const uint resource_unit
= inst
->Src
[1].Register
.Index
;
2211 const uint sampler_unit
= inst
->Src
[2].Register
.Index
;
2212 union tgsi_exec_channel r
[4];
2215 * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet.
2218 switch (mach
->SamplerViews
[resource_unit
].Resource
) {
2219 case TGSI_TEXTURE_1D
:
2220 case TGSI_TEXTURE_SHADOW1D
:
2222 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2224 fetch_texel(mach
->Samplers
[sampler_unit
],
2225 &r
[0], &ZeroVec
, &ZeroVec
, &ZeroVec
, /* S, T, P, BIAS */
2226 tgsi_sampler_lod_bias
,
2227 &r
[0], &r
[1], &r
[2], &r
[3]); /* R, G, B, A */
2230 case TGSI_TEXTURE_2D
:
2231 case TGSI_TEXTURE_RECT
:
2232 case TGSI_TEXTURE_SHADOW2D
:
2233 case TGSI_TEXTURE_SHADOWRECT
:
2235 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2236 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2237 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2239 fetch_texel(mach
->Samplers
[sampler_unit
],
2240 &r
[0], &r
[1], &r
[2], &ZeroVec
, /* inputs */
2241 tgsi_sampler_lod_bias
,
2242 &r
[0], &r
[1], &r
[2], &r
[3]); /* outputs */
2245 case TGSI_TEXTURE_3D
:
2246 case TGSI_TEXTURE_CUBE
:
2248 FETCH(&r
[0], 0, TGSI_CHAN_X
);
2249 FETCH(&r
[1], 0, TGSI_CHAN_Y
);
2250 FETCH(&r
[2], 0, TGSI_CHAN_Z
);
2252 fetch_texel(mach
->Samplers
[sampler_unit
],
2253 &r
[0], &r
[1], &r
[2], &ZeroVec
,
2254 tgsi_sampler_lod_bias
,
2255 &r
[0], &r
[1], &r
[2], &r
[3]);
2262 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2263 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2264 store_dest(mach
, &r
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2271 * Evaluate a constant-valued coefficient at the position of the
2276 struct tgsi_exec_machine
*mach
,
2282 for( i
= 0; i
< TGSI_QUAD_SIZE
; i
++ ) {
2283 mach
->Inputs
[attrib
].xyzw
[chan
].f
[i
] = mach
->InterpCoefs
[attrib
].a0
[chan
];
2288 * Evaluate a linear-valued coefficient at the position of the
2293 struct tgsi_exec_machine
*mach
,
2297 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
2298 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
2299 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
2300 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
2301 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
2302 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
;
2303 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = a0
+ dadx
;
2304 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = a0
+ dady
;
2305 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = a0
+ dadx
+ dady
;
2309 * Evaluate a perspective-valued coefficient at the position of the
2313 eval_perspective_coef(
2314 struct tgsi_exec_machine
*mach
,
2318 const float x
= mach
->QuadPos
.xyzw
[0].f
[0];
2319 const float y
= mach
->QuadPos
.xyzw
[1].f
[0];
2320 const float dadx
= mach
->InterpCoefs
[attrib
].dadx
[chan
];
2321 const float dady
= mach
->InterpCoefs
[attrib
].dady
[chan
];
2322 const float a0
= mach
->InterpCoefs
[attrib
].a0
[chan
] + dadx
* x
+ dady
* y
;
2323 const float *w
= mach
->QuadPos
.xyzw
[3].f
;
2324 /* divide by W here */
2325 mach
->Inputs
[attrib
].xyzw
[chan
].f
[0] = a0
/ w
[0];
2326 mach
->Inputs
[attrib
].xyzw
[chan
].f
[1] = (a0
+ dadx
) / w
[1];
2327 mach
->Inputs
[attrib
].xyzw
[chan
].f
[2] = (a0
+ dady
) / w
[2];
2328 mach
->Inputs
[attrib
].xyzw
[chan
].f
[3] = (a0
+ dadx
+ dady
) / w
[3];
2332 typedef void (* eval_coef_func
)(
2333 struct tgsi_exec_machine
*mach
,
2338 exec_declaration(struct tgsi_exec_machine
*mach
,
2339 const struct tgsi_full_declaration
*decl
)
2341 if (decl
->Declaration
.File
== TGSI_FILE_SAMPLER_VIEW
) {
2342 mach
->SamplerViews
[decl
->Range
.First
] = decl
->SamplerView
;
2346 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
2347 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
2348 uint first
, last
, mask
;
2350 first
= decl
->Range
.First
;
2351 last
= decl
->Range
.Last
;
2352 mask
= decl
->Declaration
.UsageMask
;
2354 /* XXX we could remove this special-case code since
2355 * mach->InterpCoefs[first].a0 should already have the
2356 * front/back-face value. But we should first update the
2357 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2358 * Then, we could remove the tgsi_exec_machine::Face field.
2360 /* XXX make FACE a system value */
2361 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_FACE
) {
2364 assert(decl
->Semantic
.Index
== 0);
2365 assert(first
== last
);
2367 for (i
= 0; i
< TGSI_QUAD_SIZE
; i
++) {
2368 mach
->Inputs
[first
].xyzw
[0].f
[i
] = mach
->Face
;
2371 eval_coef_func eval
;
2374 switch (decl
->Interp
.Interpolate
) {
2375 case TGSI_INTERPOLATE_CONSTANT
:
2376 eval
= eval_constant_coef
;
2379 case TGSI_INTERPOLATE_LINEAR
:
2380 eval
= eval_linear_coef
;
2383 case TGSI_INTERPOLATE_PERSPECTIVE
:
2384 eval
= eval_perspective_coef
;
2387 case TGSI_INTERPOLATE_COLOR
:
2388 eval
= mach
->flatshade_color
? eval_constant_coef
: eval_perspective_coef
;
2396 for (j
= 0; j
< TGSI_NUM_CHANNELS
; j
++) {
2397 if (mask
& (1 << j
)) {
2398 for (i
= first
; i
<= last
; i
++) {
2407 if (decl
->Declaration
.File
== TGSI_FILE_SYSTEM_VALUE
) {
2408 mach
->SysSemanticToIndex
[decl
->Declaration
.Semantic
] = decl
->Range
.First
;
2413 typedef void (* micro_op
)(union tgsi_exec_channel
*dst
);
2416 exec_vector(struct tgsi_exec_machine
*mach
,
2417 const struct tgsi_full_instruction
*inst
,
2419 enum tgsi_exec_datatype dst_datatype
)
2423 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2424 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2425 union tgsi_exec_channel dst
;
2428 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2433 typedef void (* micro_unary_op
)(union tgsi_exec_channel
*dst
,
2434 const union tgsi_exec_channel
*src
);
2437 exec_scalar_unary(struct tgsi_exec_machine
*mach
,
2438 const struct tgsi_full_instruction
*inst
,
2440 enum tgsi_exec_datatype dst_datatype
,
2441 enum tgsi_exec_datatype src_datatype
)
2444 union tgsi_exec_channel src
;
2445 union tgsi_exec_channel dst
;
2447 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, src_datatype
);
2449 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2450 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2451 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2457 exec_vector_unary(struct tgsi_exec_machine
*mach
,
2458 const struct tgsi_full_instruction
*inst
,
2460 enum tgsi_exec_datatype dst_datatype
,
2461 enum tgsi_exec_datatype src_datatype
)
2464 struct tgsi_exec_vector dst
;
2466 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2467 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2468 union tgsi_exec_channel src
;
2470 fetch_source(mach
, &src
, &inst
->Src
[0], chan
, src_datatype
);
2471 op(&dst
.xyzw
[chan
], &src
);
2474 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2475 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2476 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2481 typedef void (* micro_binary_op
)(union tgsi_exec_channel
*dst
,
2482 const union tgsi_exec_channel
*src0
,
2483 const union tgsi_exec_channel
*src1
);
2486 exec_scalar_binary(struct tgsi_exec_machine
*mach
,
2487 const struct tgsi_full_instruction
*inst
,
2489 enum tgsi_exec_datatype dst_datatype
,
2490 enum tgsi_exec_datatype src_datatype
)
2493 union tgsi_exec_channel src
[2];
2494 union tgsi_exec_channel dst
;
2496 fetch_source(mach
, &src
[0], &inst
->Src
[0], TGSI_CHAN_X
, src_datatype
);
2497 fetch_source(mach
, &src
[1], &inst
->Src
[1], TGSI_CHAN_Y
, src_datatype
);
2498 op(&dst
, &src
[0], &src
[1]);
2499 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2500 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2501 store_dest(mach
, &dst
, &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2507 exec_vector_binary(struct tgsi_exec_machine
*mach
,
2508 const struct tgsi_full_instruction
*inst
,
2510 enum tgsi_exec_datatype dst_datatype
,
2511 enum tgsi_exec_datatype src_datatype
)
2514 struct tgsi_exec_vector dst
;
2516 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2517 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2518 union tgsi_exec_channel src
[2];
2520 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
2521 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
2522 op(&dst
.xyzw
[chan
], &src
[0], &src
[1]);
2525 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2526 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2527 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2532 typedef void (* micro_trinary_op
)(union tgsi_exec_channel
*dst
,
2533 const union tgsi_exec_channel
*src0
,
2534 const union tgsi_exec_channel
*src1
,
2535 const union tgsi_exec_channel
*src2
);
2538 exec_vector_trinary(struct tgsi_exec_machine
*mach
,
2539 const struct tgsi_full_instruction
*inst
,
2540 micro_trinary_op op
,
2541 enum tgsi_exec_datatype dst_datatype
,
2542 enum tgsi_exec_datatype src_datatype
)
2545 struct tgsi_exec_vector dst
;
2547 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2548 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2549 union tgsi_exec_channel src
[3];
2551 fetch_source(mach
, &src
[0], &inst
->Src
[0], chan
, src_datatype
);
2552 fetch_source(mach
, &src
[1], &inst
->Src
[1], chan
, src_datatype
);
2553 fetch_source(mach
, &src
[2], &inst
->Src
[2], chan
, src_datatype
);
2554 op(&dst
.xyzw
[chan
], &src
[0], &src
[1], &src
[2]);
2557 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2558 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2559 store_dest(mach
, &dst
.xyzw
[chan
], &inst
->Dst
[0], inst
, chan
, dst_datatype
);
2565 exec_dp3(struct tgsi_exec_machine
*mach
,
2566 const struct tgsi_full_instruction
*inst
)
2569 union tgsi_exec_channel arg
[3];
2571 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2572 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2573 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2575 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_Z
; chan
++) {
2576 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
2577 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
2578 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
2581 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2582 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2583 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2589 exec_dp4(struct tgsi_exec_machine
*mach
,
2590 const struct tgsi_full_instruction
*inst
)
2593 union tgsi_exec_channel arg
[3];
2595 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2596 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2597 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2599 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_W
; chan
++) {
2600 fetch_source(mach
, &arg
[0], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
2601 fetch_source(mach
, &arg
[1], &inst
->Src
[1], chan
, TGSI_EXEC_DATA_FLOAT
);
2602 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
2605 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2606 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2607 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2613 exec_dp2a(struct tgsi_exec_machine
*mach
,
2614 const struct tgsi_full_instruction
*inst
)
2617 union tgsi_exec_channel arg
[3];
2619 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2620 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2621 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2623 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2624 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2625 micro_mad(&arg
[0], &arg
[0], &arg
[1], &arg
[2]);
2627 fetch_source(mach
, &arg
[1], &inst
->Src
[2], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2628 micro_add(&arg
[0], &arg
[0], &arg
[1]);
2630 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2631 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2632 store_dest(mach
, &arg
[0], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2638 exec_dph(struct tgsi_exec_machine
*mach
,
2639 const struct tgsi_full_instruction
*inst
)
2642 union tgsi_exec_channel arg
[3];
2644 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2645 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2646 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2648 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2649 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2650 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
2652 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2653 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2654 micro_mad(&arg
[0], &arg
[0], &arg
[1], &arg
[2]);
2656 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2657 micro_add(&arg
[0], &arg
[0], &arg
[1]);
2659 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2660 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2661 store_dest(mach
, &arg
[0], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2667 exec_dp2(struct tgsi_exec_machine
*mach
,
2668 const struct tgsi_full_instruction
*inst
)
2671 union tgsi_exec_channel arg
[3];
2673 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2674 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2675 micro_mul(&arg
[2], &arg
[0], &arg
[1]);
2677 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2678 fetch_source(mach
, &arg
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2679 micro_mad(&arg
[2], &arg
[0], &arg
[1], &arg
[2]);
2681 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2682 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2683 store_dest(mach
, &arg
[2], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2689 exec_nrm4(struct tgsi_exec_machine
*mach
,
2690 const struct tgsi_full_instruction
*inst
)
2693 union tgsi_exec_channel arg
[4];
2694 union tgsi_exec_channel scale
;
2696 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2697 micro_mul(&scale
, &arg
[0], &arg
[0]);
2699 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_W
; chan
++) {
2700 union tgsi_exec_channel product
;
2702 fetch_source(mach
, &arg
[chan
], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
2703 micro_mul(&product
, &arg
[chan
], &arg
[chan
]);
2704 micro_add(&scale
, &scale
, &product
);
2707 micro_rsq(&scale
, &scale
);
2709 for (chan
= TGSI_CHAN_X
; chan
<= TGSI_CHAN_W
; chan
++) {
2710 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2711 micro_mul(&arg
[chan
], &arg
[chan
], &scale
);
2712 store_dest(mach
, &arg
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2718 exec_nrm3(struct tgsi_exec_machine
*mach
,
2719 const struct tgsi_full_instruction
*inst
)
2721 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
2723 union tgsi_exec_channel arg
[3];
2724 union tgsi_exec_channel scale
;
2726 fetch_source(mach
, &arg
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2727 micro_mul(&scale
, &arg
[0], &arg
[0]);
2729 for (chan
= TGSI_CHAN_Y
; chan
<= TGSI_CHAN_Z
; chan
++) {
2730 union tgsi_exec_channel product
;
2732 fetch_source(mach
, &arg
[chan
], &inst
->Src
[0], chan
, TGSI_EXEC_DATA_FLOAT
);
2733 micro_mul(&product
, &arg
[chan
], &arg
[chan
]);
2734 micro_add(&scale
, &scale
, &product
);
2737 micro_rsq(&scale
, &scale
);
2739 for (chan
= TGSI_CHAN_X
; chan
<= TGSI_CHAN_Z
; chan
++) {
2740 if (inst
->Dst
[0].Register
.WriteMask
& (1 << chan
)) {
2741 micro_mul(&arg
[chan
], &arg
[chan
], &scale
);
2742 store_dest(mach
, &arg
[chan
], &inst
->Dst
[0], inst
, chan
, TGSI_EXEC_DATA_FLOAT
);
2747 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2748 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2753 exec_scs(struct tgsi_exec_machine
*mach
,
2754 const struct tgsi_full_instruction
*inst
)
2756 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
2757 union tgsi_exec_channel arg
;
2758 union tgsi_exec_channel result
;
2760 fetch_source(mach
, &arg
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2762 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2763 micro_cos(&result
, &arg
);
2764 store_dest(mach
, &result
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2766 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2767 micro_sin(&result
, &arg
);
2768 store_dest(mach
, &result
, &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2771 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2772 store_dest(mach
, &ZeroVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2774 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2775 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2780 exec_x2d(struct tgsi_exec_machine
*mach
,
2781 const struct tgsi_full_instruction
*inst
)
2783 union tgsi_exec_channel r
[4];
2784 union tgsi_exec_channel d
[2];
2786 fetch_source(mach
, &r
[0], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2787 fetch_source(mach
, &r
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2788 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
2789 fetch_source(mach
, &r
[2], &inst
->Src
[2], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2790 micro_mul(&r
[2], &r
[2], &r
[0]);
2791 fetch_source(mach
, &r
[3], &inst
->Src
[2], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2792 micro_mul(&r
[3], &r
[3], &r
[1]);
2793 micro_add(&r
[2], &r
[2], &r
[3]);
2794 fetch_source(mach
, &r
[3], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2795 micro_add(&d
[0], &r
[2], &r
[3]);
2797 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_YW
) {
2798 fetch_source(mach
, &r
[2], &inst
->Src
[2], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2799 micro_mul(&r
[2], &r
[2], &r
[0]);
2800 fetch_source(mach
, &r
[3], &inst
->Src
[2], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2801 micro_mul(&r
[3], &r
[3], &r
[1]);
2802 micro_add(&r
[2], &r
[2], &r
[3]);
2803 fetch_source(mach
, &r
[3], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2804 micro_add(&d
[1], &r
[2], &r
[3]);
2806 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2807 store_dest(mach
, &d
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2809 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2810 store_dest(mach
, &d
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2812 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2813 store_dest(mach
, &d
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2815 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2816 store_dest(mach
, &d
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2821 exec_rfl(struct tgsi_exec_machine
*mach
,
2822 const struct tgsi_full_instruction
*inst
)
2824 union tgsi_exec_channel r
[9];
2826 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
2827 /* r0 = dp3(src0, src0) */
2828 fetch_source(mach
, &r
[2], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2829 micro_mul(&r
[0], &r
[2], &r
[2]);
2830 fetch_source(mach
, &r
[4], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2831 micro_mul(&r
[8], &r
[4], &r
[4]);
2832 micro_add(&r
[0], &r
[0], &r
[8]);
2833 fetch_source(mach
, &r
[6], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2834 micro_mul(&r
[8], &r
[6], &r
[6]);
2835 micro_add(&r
[0], &r
[0], &r
[8]);
2837 /* r1 = dp3(src0, src1) */
2838 fetch_source(mach
, &r
[3], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2839 micro_mul(&r
[1], &r
[2], &r
[3]);
2840 fetch_source(mach
, &r
[5], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2841 micro_mul(&r
[8], &r
[4], &r
[5]);
2842 micro_add(&r
[1], &r
[1], &r
[8]);
2843 fetch_source(mach
, &r
[7], &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2844 micro_mul(&r
[8], &r
[6], &r
[7]);
2845 micro_add(&r
[1], &r
[1], &r
[8]);
2847 /* r1 = 2 * r1 / r0 */
2848 micro_add(&r
[1], &r
[1], &r
[1]);
2849 micro_div(&r
[1], &r
[1], &r
[0]);
2851 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2852 micro_mul(&r
[2], &r
[2], &r
[1]);
2853 micro_sub(&r
[2], &r
[2], &r
[3]);
2854 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2856 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2857 micro_mul(&r
[4], &r
[4], &r
[1]);
2858 micro_sub(&r
[4], &r
[4], &r
[5]);
2859 store_dest(mach
, &r
[4], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2861 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2862 micro_mul(&r
[6], &r
[6], &r
[1]);
2863 micro_sub(&r
[6], &r
[6], &r
[7]);
2864 store_dest(mach
, &r
[6], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2867 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2868 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2873 exec_xpd(struct tgsi_exec_machine
*mach
,
2874 const struct tgsi_full_instruction
*inst
)
2876 union tgsi_exec_channel r
[6];
2877 union tgsi_exec_channel d
[3];
2879 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2880 fetch_source(mach
, &r
[1], &inst
->Src
[1], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2882 micro_mul(&r
[2], &r
[0], &r
[1]);
2884 fetch_source(mach
, &r
[3], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2885 fetch_source(mach
, &r
[4], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2887 micro_mul(&r
[5], &r
[3], &r
[4] );
2888 micro_sub(&d
[TGSI_CHAN_X
], &r
[2], &r
[5]);
2890 fetch_source(mach
, &r
[2], &inst
->Src
[1], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2892 micro_mul(&r
[3], &r
[3], &r
[2]);
2894 fetch_source(mach
, &r
[5], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2896 micro_mul(&r
[1], &r
[1], &r
[5]);
2897 micro_sub(&d
[TGSI_CHAN_Y
], &r
[3], &r
[1]);
2899 micro_mul(&r
[5], &r
[5], &r
[4]);
2900 micro_mul(&r
[0], &r
[0], &r
[2]);
2901 micro_sub(&d
[TGSI_CHAN_Z
], &r
[5], &r
[0]);
2903 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2904 store_dest(mach
, &d
[TGSI_CHAN_X
], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2906 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2907 store_dest(mach
, &d
[TGSI_CHAN_Y
], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2909 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2910 store_dest(mach
, &d
[TGSI_CHAN_Z
], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2912 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2913 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2918 exec_dst(struct tgsi_exec_machine
*mach
,
2919 const struct tgsi_full_instruction
*inst
)
2921 union tgsi_exec_channel r
[2];
2922 union tgsi_exec_channel d
[4];
2924 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2925 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2926 fetch_source(mach
, &r
[1], &inst
->Src
[1], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2927 micro_mul(&d
[TGSI_CHAN_Y
], &r
[0], &r
[1]);
2929 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2930 fetch_source(mach
, &d
[TGSI_CHAN_Z
], &inst
->Src
[0], TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2932 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2933 fetch_source(mach
, &d
[TGSI_CHAN_W
], &inst
->Src
[1], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2936 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2937 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2939 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2940 store_dest(mach
, &d
[TGSI_CHAN_Y
], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2942 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2943 store_dest(mach
, &d
[TGSI_CHAN_Z
], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2945 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2946 store_dest(mach
, &d
[TGSI_CHAN_W
], &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2951 exec_log(struct tgsi_exec_machine
*mach
,
2952 const struct tgsi_full_instruction
*inst
)
2954 union tgsi_exec_channel r
[3];
2956 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2957 micro_abs(&r
[2], &r
[0]); /* r2 = abs(r0) */
2958 micro_lg2(&r
[1], &r
[2]); /* r1 = lg2(r2) */
2959 micro_flr(&r
[0], &r
[1]); /* r0 = floor(r1) */
2960 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2961 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2963 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2964 micro_exp2(&r
[0], &r
[0]); /* r0 = 2 ^ r0 */
2965 micro_div(&r
[0], &r
[2], &r
[0]); /* r0 = r2 / r0 */
2966 store_dest(mach
, &r
[0], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2968 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2969 store_dest(mach
, &r
[1], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2971 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2972 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
2977 exec_exp(struct tgsi_exec_machine
*mach
,
2978 const struct tgsi_full_instruction
*inst
)
2980 union tgsi_exec_channel r
[3];
2982 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2983 micro_flr(&r
[1], &r
[0]); /* r1 = floor(r0) */
2984 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
2985 micro_exp2(&r
[2], &r
[1]); /* r2 = 2 ^ r1 */
2986 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
2988 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
2989 micro_sub(&r
[2], &r
[0], &r
[1]); /* r2 = r0 - r1 */
2990 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
2992 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
2993 micro_exp2(&r
[2], &r
[0]); /* r2 = 2 ^ r0 */
2994 store_dest(mach
, &r
[2], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
2996 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
2997 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3002 exec_lit(struct tgsi_exec_machine
*mach
,
3003 const struct tgsi_full_instruction
*inst
)
3005 union tgsi_exec_channel r
[3];
3006 union tgsi_exec_channel d
[3];
3008 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
3009 fetch_source(mach
, &r
[0], &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3010 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
3011 fetch_source(mach
, &r
[1], &inst
->Src
[0], TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3012 micro_max(&r
[1], &r
[1], &ZeroVec
);
3014 fetch_source(mach
, &r
[2], &inst
->Src
[0], TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3015 micro_min(&r
[2], &r
[2], &P128Vec
);
3016 micro_max(&r
[2], &r
[2], &M128Vec
);
3017 micro_pow(&r
[1], &r
[1], &r
[2]);
3018 micro_lt(&d
[TGSI_CHAN_Z
], &ZeroVec
, &r
[0], &r
[1], &ZeroVec
);
3019 store_dest(mach
, &d
[TGSI_CHAN_Z
], &inst
->Dst
[0], inst
, TGSI_CHAN_Z
, TGSI_EXEC_DATA_FLOAT
);
3021 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
3022 micro_max(&d
[TGSI_CHAN_Y
], &r
[0], &ZeroVec
);
3023 store_dest(mach
, &d
[TGSI_CHAN_Y
], &inst
->Dst
[0], inst
, TGSI_CHAN_Y
, TGSI_EXEC_DATA_FLOAT
);
3026 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
3027 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_X
, TGSI_EXEC_DATA_FLOAT
);
3030 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
3031 store_dest(mach
, &OneVec
, &inst
->Dst
[0], inst
, TGSI_CHAN_W
, TGSI_EXEC_DATA_FLOAT
);
3036 exec_break(struct tgsi_exec_machine
*mach
)
3038 if (mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_LOOP
) {
3039 /* turn off loop channels for each enabled exec channel */
3040 mach
->LoopMask
&= ~mach
->ExecMask
;
3041 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3042 UPDATE_EXEC_MASK(mach
);
3044 assert(mach
->BreakType
== TGSI_EXEC_BREAK_INSIDE_SWITCH
);
3046 mach
->Switch
.mask
= 0x0;
3048 UPDATE_EXEC_MASK(mach
);
3053 exec_switch(struct tgsi_exec_machine
*mach
,
3054 const struct tgsi_full_instruction
*inst
)
3056 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
3057 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3059 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
3060 fetch_source(mach
, &mach
->Switch
.selector
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_UINT
);
3061 mach
->Switch
.mask
= 0x0;
3062 mach
->Switch
.defaultMask
= 0x0;
3064 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3065 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_SWITCH
;
3067 UPDATE_EXEC_MASK(mach
);
3071 exec_case(struct tgsi_exec_machine
*mach
,
3072 const struct tgsi_full_instruction
*inst
)
3074 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
3075 union tgsi_exec_channel src
;
3078 fetch_source(mach
, &src
, &inst
->Src
[0], TGSI_CHAN_X
, TGSI_EXEC_DATA_UINT
);
3080 if (mach
->Switch
.selector
.u
[0] == src
.u
[0]) {
3083 if (mach
->Switch
.selector
.u
[1] == src
.u
[1]) {
3086 if (mach
->Switch
.selector
.u
[2] == src
.u
[2]) {
3089 if (mach
->Switch
.selector
.u
[3] == src
.u
[3]) {
3093 mach
->Switch
.defaultMask
|= mask
;
3095 mach
->Switch
.mask
|= mask
& prevMask
;
3097 UPDATE_EXEC_MASK(mach
);
3101 exec_default(struct tgsi_exec_machine
*mach
)
3103 uint prevMask
= mach
->SwitchStack
[mach
->SwitchStackTop
- 1].mask
;
3105 mach
->Switch
.mask
|= ~mach
->Switch
.defaultMask
& prevMask
;
3107 UPDATE_EXEC_MASK(mach
);
3111 exec_endswitch(struct tgsi_exec_machine
*mach
)
3113 mach
->Switch
= mach
->SwitchStack
[--mach
->SwitchStackTop
];
3114 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3116 UPDATE_EXEC_MASK(mach
);
3120 micro_i2f(union tgsi_exec_channel
*dst
,
3121 const union tgsi_exec_channel
*src
)
3123 dst
->f
[0] = (float)src
->i
[0];
3124 dst
->f
[1] = (float)src
->i
[1];
3125 dst
->f
[2] = (float)src
->i
[2];
3126 dst
->f
[3] = (float)src
->i
[3];
3130 micro_not(union tgsi_exec_channel
*dst
,
3131 const union tgsi_exec_channel
*src
)
3133 dst
->u
[0] = ~src
->u
[0];
3134 dst
->u
[1] = ~src
->u
[1];
3135 dst
->u
[2] = ~src
->u
[2];
3136 dst
->u
[3] = ~src
->u
[3];
3140 micro_shl(union tgsi_exec_channel
*dst
,
3141 const union tgsi_exec_channel
*src0
,
3142 const union tgsi_exec_channel
*src1
)
3144 dst
->u
[0] = src0
->u
[0] << src1
->u
[0];
3145 dst
->u
[1] = src0
->u
[1] << src1
->u
[1];
3146 dst
->u
[2] = src0
->u
[2] << src1
->u
[2];
3147 dst
->u
[3] = src0
->u
[3] << src1
->u
[3];
3151 micro_and(union tgsi_exec_channel
*dst
,
3152 const union tgsi_exec_channel
*src0
,
3153 const union tgsi_exec_channel
*src1
)
3155 dst
->u
[0] = src0
->u
[0] & src1
->u
[0];
3156 dst
->u
[1] = src0
->u
[1] & src1
->u
[1];
3157 dst
->u
[2] = src0
->u
[2] & src1
->u
[2];
3158 dst
->u
[3] = src0
->u
[3] & src1
->u
[3];
3162 micro_or(union tgsi_exec_channel
*dst
,
3163 const union tgsi_exec_channel
*src0
,
3164 const union tgsi_exec_channel
*src1
)
3166 dst
->u
[0] = src0
->u
[0] | src1
->u
[0];
3167 dst
->u
[1] = src0
->u
[1] | src1
->u
[1];
3168 dst
->u
[2] = src0
->u
[2] | src1
->u
[2];
3169 dst
->u
[3] = src0
->u
[3] | src1
->u
[3];
3173 micro_xor(union tgsi_exec_channel
*dst
,
3174 const union tgsi_exec_channel
*src0
,
3175 const union tgsi_exec_channel
*src1
)
3177 dst
->u
[0] = src0
->u
[0] ^ src1
->u
[0];
3178 dst
->u
[1] = src0
->u
[1] ^ src1
->u
[1];
3179 dst
->u
[2] = src0
->u
[2] ^ src1
->u
[2];
3180 dst
->u
[3] = src0
->u
[3] ^ src1
->u
[3];
3184 micro_mod(union tgsi_exec_channel
*dst
,
3185 const union tgsi_exec_channel
*src0
,
3186 const union tgsi_exec_channel
*src1
)
3188 dst
->i
[0] = src0
->i
[0] % src1
->i
[0];
3189 dst
->i
[1] = src0
->i
[1] % src1
->i
[1];
3190 dst
->i
[2] = src0
->i
[2] % src1
->i
[2];
3191 dst
->i
[3] = src0
->i
[3] % src1
->i
[3];
3195 micro_f2i(union tgsi_exec_channel
*dst
,
3196 const union tgsi_exec_channel
*src
)
3198 dst
->i
[0] = (int)src
->f
[0];
3199 dst
->i
[1] = (int)src
->f
[1];
3200 dst
->i
[2] = (int)src
->f
[2];
3201 dst
->i
[3] = (int)src
->f
[3];
3205 micro_idiv(union tgsi_exec_channel
*dst
,
3206 const union tgsi_exec_channel
*src0
,
3207 const union tgsi_exec_channel
*src1
)
3209 dst
->i
[0] = src0
->i
[0] / src1
->i
[0];
3210 dst
->i
[1] = src0
->i
[1] / src1
->i
[1];
3211 dst
->i
[2] = src0
->i
[2] / src1
->i
[2];
3212 dst
->i
[3] = src0
->i
[3] / src1
->i
[3];
3216 micro_imax(union tgsi_exec_channel
*dst
,
3217 const union tgsi_exec_channel
*src0
,
3218 const union tgsi_exec_channel
*src1
)
3220 dst
->i
[0] = src0
->i
[0] > src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
3221 dst
->i
[1] = src0
->i
[1] > src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
3222 dst
->i
[2] = src0
->i
[2] > src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
3223 dst
->i
[3] = src0
->i
[3] > src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
3227 micro_imin(union tgsi_exec_channel
*dst
,
3228 const union tgsi_exec_channel
*src0
,
3229 const union tgsi_exec_channel
*src1
)
3231 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? src0
->i
[0] : src1
->i
[0];
3232 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? src0
->i
[1] : src1
->i
[1];
3233 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? src0
->i
[2] : src1
->i
[2];
3234 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? src0
->i
[3] : src1
->i
[3];
3238 micro_isge(union tgsi_exec_channel
*dst
,
3239 const union tgsi_exec_channel
*src0
,
3240 const union tgsi_exec_channel
*src1
)
3242 dst
->i
[0] = src0
->i
[0] >= src1
->i
[0] ? -1 : 0;
3243 dst
->i
[1] = src0
->i
[1] >= src1
->i
[1] ? -1 : 0;
3244 dst
->i
[2] = src0
->i
[2] >= src1
->i
[2] ? -1 : 0;
3245 dst
->i
[3] = src0
->i
[3] >= src1
->i
[3] ? -1 : 0;
3249 micro_ishr(union tgsi_exec_channel
*dst
,
3250 const union tgsi_exec_channel
*src0
,
3251 const union tgsi_exec_channel
*src1
)
3253 dst
->i
[0] = src0
->i
[0] >> src1
->i
[0];
3254 dst
->i
[1] = src0
->i
[1] >> src1
->i
[1];
3255 dst
->i
[2] = src0
->i
[2] >> src1
->i
[2];
3256 dst
->i
[3] = src0
->i
[3] >> src1
->i
[3];
3260 micro_islt(union tgsi_exec_channel
*dst
,
3261 const union tgsi_exec_channel
*src0
,
3262 const union tgsi_exec_channel
*src1
)
3264 dst
->i
[0] = src0
->i
[0] < src1
->i
[0] ? -1 : 0;
3265 dst
->i
[1] = src0
->i
[1] < src1
->i
[1] ? -1 : 0;
3266 dst
->i
[2] = src0
->i
[2] < src1
->i
[2] ? -1 : 0;
3267 dst
->i
[3] = src0
->i
[3] < src1
->i
[3] ? -1 : 0;
3271 micro_f2u(union tgsi_exec_channel
*dst
,
3272 const union tgsi_exec_channel
*src
)
3274 dst
->u
[0] = (uint
)src
->f
[0];
3275 dst
->u
[1] = (uint
)src
->f
[1];
3276 dst
->u
[2] = (uint
)src
->f
[2];
3277 dst
->u
[3] = (uint
)src
->f
[3];
3281 micro_u2f(union tgsi_exec_channel
*dst
,
3282 const union tgsi_exec_channel
*src
)
3284 dst
->f
[0] = (float)src
->u
[0];
3285 dst
->f
[1] = (float)src
->u
[1];
3286 dst
->f
[2] = (float)src
->u
[2];
3287 dst
->f
[3] = (float)src
->u
[3];
3291 micro_uadd(union tgsi_exec_channel
*dst
,
3292 const union tgsi_exec_channel
*src0
,
3293 const union tgsi_exec_channel
*src1
)
3295 dst
->u
[0] = src0
->u
[0] + src1
->u
[0];
3296 dst
->u
[1] = src0
->u
[1] + src1
->u
[1];
3297 dst
->u
[2] = src0
->u
[2] + src1
->u
[2];
3298 dst
->u
[3] = src0
->u
[3] + src1
->u
[3];
3302 micro_udiv(union tgsi_exec_channel
*dst
,
3303 const union tgsi_exec_channel
*src0
,
3304 const union tgsi_exec_channel
*src1
)
3306 dst
->u
[0] = src0
->u
[0] / src1
->u
[0];
3307 dst
->u
[1] = src0
->u
[1] / src1
->u
[1];
3308 dst
->u
[2] = src0
->u
[2] / src1
->u
[2];
3309 dst
->u
[3] = src0
->u
[3] / src1
->u
[3];
3313 micro_umad(union tgsi_exec_channel
*dst
,
3314 const union tgsi_exec_channel
*src0
,
3315 const union tgsi_exec_channel
*src1
,
3316 const union tgsi_exec_channel
*src2
)
3318 dst
->u
[0] = src0
->u
[0] * src1
->u
[0] + src2
->u
[0];
3319 dst
->u
[1] = src0
->u
[1] * src1
->u
[1] + src2
->u
[1];
3320 dst
->u
[2] = src0
->u
[2] * src1
->u
[2] + src2
->u
[2];
3321 dst
->u
[3] = src0
->u
[3] * src1
->u
[3] + src2
->u
[3];
3325 micro_umax(union tgsi_exec_channel
*dst
,
3326 const union tgsi_exec_channel
*src0
,
3327 const union tgsi_exec_channel
*src1
)
3329 dst
->u
[0] = src0
->u
[0] > src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
3330 dst
->u
[1] = src0
->u
[1] > src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
3331 dst
->u
[2] = src0
->u
[2] > src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
3332 dst
->u
[3] = src0
->u
[3] > src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
3336 micro_umin(union tgsi_exec_channel
*dst
,
3337 const union tgsi_exec_channel
*src0
,
3338 const union tgsi_exec_channel
*src1
)
3340 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? src0
->u
[0] : src1
->u
[0];
3341 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? src0
->u
[1] : src1
->u
[1];
3342 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? src0
->u
[2] : src1
->u
[2];
3343 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? src0
->u
[3] : src1
->u
[3];
3347 micro_umod(union tgsi_exec_channel
*dst
,
3348 const union tgsi_exec_channel
*src0
,
3349 const union tgsi_exec_channel
*src1
)
3351 dst
->u
[0] = src0
->u
[0] % src1
->u
[0];
3352 dst
->u
[1] = src0
->u
[1] % src1
->u
[1];
3353 dst
->u
[2] = src0
->u
[2] % src1
->u
[2];
3354 dst
->u
[3] = src0
->u
[3] % src1
->u
[3];
3358 micro_umul(union tgsi_exec_channel
*dst
,
3359 const union tgsi_exec_channel
*src0
,
3360 const union tgsi_exec_channel
*src1
)
3362 dst
->u
[0] = src0
->u
[0] * src1
->u
[0];
3363 dst
->u
[1] = src0
->u
[1] * src1
->u
[1];
3364 dst
->u
[2] = src0
->u
[2] * src1
->u
[2];
3365 dst
->u
[3] = src0
->u
[3] * src1
->u
[3];
3369 micro_useq(union tgsi_exec_channel
*dst
,
3370 const union tgsi_exec_channel
*src0
,
3371 const union tgsi_exec_channel
*src1
)
3373 dst
->u
[0] = src0
->u
[0] == src1
->u
[0] ? ~0 : 0;
3374 dst
->u
[1] = src0
->u
[1] == src1
->u
[1] ? ~0 : 0;
3375 dst
->u
[2] = src0
->u
[2] == src1
->u
[2] ? ~0 : 0;
3376 dst
->u
[3] = src0
->u
[3] == src1
->u
[3] ? ~0 : 0;
3380 micro_usge(union tgsi_exec_channel
*dst
,
3381 const union tgsi_exec_channel
*src0
,
3382 const union tgsi_exec_channel
*src1
)
3384 dst
->u
[0] = src0
->u
[0] >= src1
->u
[0] ? ~0 : 0;
3385 dst
->u
[1] = src0
->u
[1] >= src1
->u
[1] ? ~0 : 0;
3386 dst
->u
[2] = src0
->u
[2] >= src1
->u
[2] ? ~0 : 0;
3387 dst
->u
[3] = src0
->u
[3] >= src1
->u
[3] ? ~0 : 0;
3391 micro_ushr(union tgsi_exec_channel
*dst
,
3392 const union tgsi_exec_channel
*src0
,
3393 const union tgsi_exec_channel
*src1
)
3395 dst
->u
[0] = src0
->u
[0] >> src1
->u
[0];
3396 dst
->u
[1] = src0
->u
[1] >> src1
->u
[1];
3397 dst
->u
[2] = src0
->u
[2] >> src1
->u
[2];
3398 dst
->u
[3] = src0
->u
[3] >> src1
->u
[3];
3402 micro_uslt(union tgsi_exec_channel
*dst
,
3403 const union tgsi_exec_channel
*src0
,
3404 const union tgsi_exec_channel
*src1
)
3406 dst
->u
[0] = src0
->u
[0] < src1
->u
[0] ? ~0 : 0;
3407 dst
->u
[1] = src0
->u
[1] < src1
->u
[1] ? ~0 : 0;
3408 dst
->u
[2] = src0
->u
[2] < src1
->u
[2] ? ~0 : 0;
3409 dst
->u
[3] = src0
->u
[3] < src1
->u
[3] ? ~0 : 0;
3413 micro_usne(union tgsi_exec_channel
*dst
,
3414 const union tgsi_exec_channel
*src0
,
3415 const union tgsi_exec_channel
*src1
)
3417 dst
->u
[0] = src0
->u
[0] != src1
->u
[0] ? ~0 : 0;
3418 dst
->u
[1] = src0
->u
[1] != src1
->u
[1] ? ~0 : 0;
3419 dst
->u
[2] = src0
->u
[2] != src1
->u
[2] ? ~0 : 0;
3420 dst
->u
[3] = src0
->u
[3] != src1
->u
[3] ? ~0 : 0;
3424 micro_uarl(union tgsi_exec_channel
*dst
,
3425 const union tgsi_exec_channel
*src
)
3427 dst
->i
[0] = src
->u
[0];
3428 dst
->i
[1] = src
->u
[1];
3429 dst
->i
[2] = src
->u
[2];
3430 dst
->i
[3] = src
->u
[3];
3434 micro_ucmp(union tgsi_exec_channel
*dst
,
3435 const union tgsi_exec_channel
*src0
,
3436 const union tgsi_exec_channel
*src1
,
3437 const union tgsi_exec_channel
*src2
)
3439 dst
->u
[0] = src0
->u
[0] ? src1
->u
[0] : src2
->u
[0];
3440 dst
->u
[1] = src0
->u
[1] ? src1
->u
[1] : src2
->u
[1];
3441 dst
->u
[2] = src0
->u
[2] ? src1
->u
[2] : src2
->u
[2];
3442 dst
->u
[3] = src0
->u
[3] ? src1
->u
[3] : src2
->u
[3];
3447 struct tgsi_exec_machine
*mach
,
3448 const struct tgsi_full_instruction
*inst
,
3451 union tgsi_exec_channel r
[10];
3455 switch (inst
->Instruction
.Opcode
) {
3456 case TGSI_OPCODE_ARL
:
3457 exec_vector_unary(mach
, inst
, micro_arl
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
3460 case TGSI_OPCODE_MOV
:
3461 exec_vector_unary(mach
, inst
, micro_mov
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
3464 case TGSI_OPCODE_LIT
:
3465 exec_lit(mach
, inst
);
3468 case TGSI_OPCODE_RCP
:
3469 exec_scalar_unary(mach
, inst
, micro_rcp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3472 case TGSI_OPCODE_RSQ
:
3473 exec_scalar_unary(mach
, inst
, micro_rsq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3476 case TGSI_OPCODE_EXP
:
3477 exec_exp(mach
, inst
);
3480 case TGSI_OPCODE_LOG
:
3481 exec_log(mach
, inst
);
3484 case TGSI_OPCODE_MUL
:
3485 exec_vector_binary(mach
, inst
, micro_mul
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3488 case TGSI_OPCODE_ADD
:
3489 exec_vector_binary(mach
, inst
, micro_add
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3492 case TGSI_OPCODE_DP3
:
3493 exec_dp3(mach
, inst
);
3496 case TGSI_OPCODE_DP4
:
3497 exec_dp4(mach
, inst
);
3500 case TGSI_OPCODE_DST
:
3501 exec_dst(mach
, inst
);
3504 case TGSI_OPCODE_MIN
:
3505 exec_vector_binary(mach
, inst
, micro_min
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3508 case TGSI_OPCODE_MAX
:
3509 exec_vector_binary(mach
, inst
, micro_max
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3512 case TGSI_OPCODE_SLT
:
3513 exec_vector_binary(mach
, inst
, micro_slt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3516 case TGSI_OPCODE_SGE
:
3517 exec_vector_binary(mach
, inst
, micro_sge
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3520 case TGSI_OPCODE_MAD
:
3521 exec_vector_trinary(mach
, inst
, micro_mad
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3524 case TGSI_OPCODE_SUB
:
3525 exec_vector_binary(mach
, inst
, micro_sub
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3528 case TGSI_OPCODE_LRP
:
3529 exec_vector_trinary(mach
, inst
, micro_lrp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3532 case TGSI_OPCODE_CND
:
3533 exec_vector_trinary(mach
, inst
, micro_cnd
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3536 case TGSI_OPCODE_DP2A
:
3537 exec_dp2a(mach
, inst
);
3540 case TGSI_OPCODE_FRC
:
3541 exec_vector_unary(mach
, inst
, micro_frc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3544 case TGSI_OPCODE_CLAMP
:
3545 exec_vector_trinary(mach
, inst
, micro_clamp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3548 case TGSI_OPCODE_FLR
:
3549 exec_vector_unary(mach
, inst
, micro_flr
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3552 case TGSI_OPCODE_ROUND
:
3553 exec_vector_unary(mach
, inst
, micro_rnd
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3556 case TGSI_OPCODE_EX2
:
3557 exec_scalar_unary(mach
, inst
, micro_exp2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3560 case TGSI_OPCODE_LG2
:
3561 exec_scalar_unary(mach
, inst
, micro_lg2
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3564 case TGSI_OPCODE_POW
:
3565 exec_scalar_binary(mach
, inst
, micro_pow
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3568 case TGSI_OPCODE_XPD
:
3569 exec_xpd(mach
, inst
);
3572 case TGSI_OPCODE_ABS
:
3573 exec_vector_unary(mach
, inst
, micro_abs
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3576 case TGSI_OPCODE_RCC
:
3577 exec_scalar_unary(mach
, inst
, micro_rcc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3580 case TGSI_OPCODE_DPH
:
3581 exec_dph(mach
, inst
);
3584 case TGSI_OPCODE_COS
:
3585 exec_scalar_unary(mach
, inst
, micro_cos
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3588 case TGSI_OPCODE_DDX
:
3589 exec_vector_unary(mach
, inst
, micro_ddx
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3592 case TGSI_OPCODE_DDY
:
3593 exec_vector_unary(mach
, inst
, micro_ddy
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3596 case TGSI_OPCODE_KILP
:
3597 exec_kilp (mach
, inst
);
3600 case TGSI_OPCODE_KIL
:
3601 exec_kil (mach
, inst
);
3604 case TGSI_OPCODE_PK2H
:
3608 case TGSI_OPCODE_PK2US
:
3612 case TGSI_OPCODE_PK4B
:
3616 case TGSI_OPCODE_PK4UB
:
3620 case TGSI_OPCODE_RFL
:
3621 exec_rfl(mach
, inst
);
3624 case TGSI_OPCODE_SEQ
:
3625 exec_vector_binary(mach
, inst
, micro_seq
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3628 case TGSI_OPCODE_SFL
:
3629 exec_vector(mach
, inst
, micro_sfl
, TGSI_EXEC_DATA_FLOAT
);
3632 case TGSI_OPCODE_SGT
:
3633 exec_vector_binary(mach
, inst
, micro_sgt
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3636 case TGSI_OPCODE_SIN
:
3637 exec_scalar_unary(mach
, inst
, micro_sin
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3640 case TGSI_OPCODE_SLE
:
3641 exec_vector_binary(mach
, inst
, micro_sle
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3644 case TGSI_OPCODE_SNE
:
3645 exec_vector_binary(mach
, inst
, micro_sne
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3648 case TGSI_OPCODE_STR
:
3649 exec_vector(mach
, inst
, micro_str
, TGSI_EXEC_DATA_FLOAT
);
3652 case TGSI_OPCODE_TEX
:
3653 /* simple texture lookup */
3654 /* src[0] = texcoord */
3655 /* src[1] = sampler unit */
3656 exec_tex(mach
, inst
, TEX_MODIFIER_NONE
);
3659 case TGSI_OPCODE_TXB
:
3660 /* Texture lookup with lod bias */
3661 /* src[0] = texcoord (src[0].w = LOD bias) */
3662 /* src[1] = sampler unit */
3663 exec_tex(mach
, inst
, TEX_MODIFIER_LOD_BIAS
);
3666 case TGSI_OPCODE_TXD
:
3667 /* Texture lookup with explict partial derivatives */
3668 /* src[0] = texcoord */
3669 /* src[1] = d[strq]/dx */
3670 /* src[2] = d[strq]/dy */
3671 /* src[3] = sampler unit */
3672 exec_txd(mach
, inst
);
3675 case TGSI_OPCODE_TXL
:
3676 /* Texture lookup with explit LOD */
3677 /* src[0] = texcoord (src[0].w = LOD) */
3678 /* src[1] = sampler unit */
3679 exec_tex(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
);
3682 case TGSI_OPCODE_TXP
:
3683 /* Texture lookup with projection */
3684 /* src[0] = texcoord (src[0].w = projection) */
3685 /* src[1] = sampler unit */
3686 exec_tex(mach
, inst
, TEX_MODIFIER_PROJECTED
);
3689 case TGSI_OPCODE_UP2H
:
3693 case TGSI_OPCODE_UP2US
:
3697 case TGSI_OPCODE_UP4B
:
3701 case TGSI_OPCODE_UP4UB
:
3705 case TGSI_OPCODE_X2D
:
3706 exec_x2d(mach
, inst
);
3709 case TGSI_OPCODE_ARA
:
3713 case TGSI_OPCODE_ARR
:
3714 exec_vector_unary(mach
, inst
, micro_arr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
3717 case TGSI_OPCODE_BRA
:
3721 case TGSI_OPCODE_CAL
:
3722 /* skip the call if no execution channels are enabled */
3723 if (mach
->ExecMask
) {
3726 /* First, record the depths of the execution stacks.
3727 * This is important for deeply nested/looped return statements.
3728 * We have to unwind the stacks by the correct amount. For a
3729 * real code generator, we could determine the number of entries
3730 * to pop off each stack with simple static analysis and avoid
3731 * implementing this data structure at run time.
3733 mach
->CallStack
[mach
->CallStackTop
].CondStackTop
= mach
->CondStackTop
;
3734 mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
= mach
->LoopStackTop
;
3735 mach
->CallStack
[mach
->CallStackTop
].ContStackTop
= mach
->ContStackTop
;
3736 mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
= mach
->SwitchStackTop
;
3737 mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
= mach
->BreakStackTop
;
3738 /* note that PC was already incremented above */
3739 mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
= *pc
;
3741 mach
->CallStackTop
++;
3743 /* Second, push the Cond, Loop, Cont, Func stacks */
3744 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
3745 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3746 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3747 assert(mach
->SwitchStackTop
< TGSI_EXEC_MAX_SWITCH_NESTING
);
3748 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3749 assert(mach
->FuncStackTop
< TGSI_EXEC_MAX_CALL_NESTING
);
3751 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
3752 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3753 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3754 mach
->SwitchStack
[mach
->SwitchStackTop
++] = mach
->Switch
;
3755 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3756 mach
->FuncStack
[mach
->FuncStackTop
++] = mach
->FuncMask
;
3758 /* Finally, jump to the subroutine */
3759 *pc
= inst
->Label
.Label
;
3763 case TGSI_OPCODE_RET
:
3764 mach
->FuncMask
&= ~mach
->ExecMask
;
3765 UPDATE_EXEC_MASK(mach
);
3767 if (mach
->FuncMask
== 0x0) {
3768 /* really return now (otherwise, keep executing */
3770 if (mach
->CallStackTop
== 0) {
3771 /* returning from main() */
3772 mach
->CondStackTop
= 0;
3773 mach
->LoopStackTop
= 0;
3778 assert(mach
->CallStackTop
> 0);
3779 mach
->CallStackTop
--;
3781 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
3782 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
3784 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
3785 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
3787 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
3788 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
3790 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
3791 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
3793 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
3794 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
3796 assert(mach
->FuncStackTop
> 0);
3797 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
3799 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
3801 UPDATE_EXEC_MASK(mach
);
3805 case TGSI_OPCODE_SSG
:
3806 exec_vector_unary(mach
, inst
, micro_sgn
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3809 case TGSI_OPCODE_CMP
:
3810 exec_vector_trinary(mach
, inst
, micro_cmp
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3813 case TGSI_OPCODE_SCS
:
3814 exec_scs(mach
, inst
);
3817 case TGSI_OPCODE_NRM
:
3818 exec_nrm3(mach
, inst
);
3821 case TGSI_OPCODE_NRM4
:
3822 exec_nrm4(mach
, inst
);
3825 case TGSI_OPCODE_DIV
:
3826 exec_vector_binary(mach
, inst
, micro_div
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3829 case TGSI_OPCODE_DP2
:
3830 exec_dp2(mach
, inst
);
3833 case TGSI_OPCODE_IF
:
3835 assert(mach
->CondStackTop
< TGSI_EXEC_MAX_COND_NESTING
);
3836 mach
->CondStack
[mach
->CondStackTop
++] = mach
->CondMask
;
3837 FETCH( &r
[0], 0, TGSI_CHAN_X
);
3838 /* update CondMask */
3840 mach
->CondMask
&= ~0x1;
3843 mach
->CondMask
&= ~0x2;
3846 mach
->CondMask
&= ~0x4;
3849 mach
->CondMask
&= ~0x8;
3851 UPDATE_EXEC_MASK(mach
);
3852 /* Todo: If CondMask==0, jump to ELSE */
3855 case TGSI_OPCODE_ELSE
:
3856 /* invert CondMask wrt previous mask */
3859 assert(mach
->CondStackTop
> 0);
3860 prevMask
= mach
->CondStack
[mach
->CondStackTop
- 1];
3861 mach
->CondMask
= ~mach
->CondMask
& prevMask
;
3862 UPDATE_EXEC_MASK(mach
);
3863 /* Todo: If CondMask==0, jump to ENDIF */
3867 case TGSI_OPCODE_ENDIF
:
3869 assert(mach
->CondStackTop
> 0);
3870 mach
->CondMask
= mach
->CondStack
[--mach
->CondStackTop
];
3871 UPDATE_EXEC_MASK(mach
);
3874 case TGSI_OPCODE_END
:
3875 /* make sure we end primitives which haven't
3876 * been explicitly emitted */
3877 conditional_emit_primitive(mach
);
3878 /* halt execution */
3882 case TGSI_OPCODE_PUSHA
:
3886 case TGSI_OPCODE_POPA
:
3890 case TGSI_OPCODE_CEIL
:
3891 exec_vector_unary(mach
, inst
, micro_ceil
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3894 case TGSI_OPCODE_I2F
:
3895 exec_vector_unary(mach
, inst
, micro_i2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_INT
);
3898 case TGSI_OPCODE_NOT
:
3899 exec_vector_unary(mach
, inst
, micro_not
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3902 case TGSI_OPCODE_TRUNC
:
3903 exec_vector_unary(mach
, inst
, micro_trunc
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_FLOAT
);
3906 case TGSI_OPCODE_SHL
:
3907 exec_vector_binary(mach
, inst
, micro_shl
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3910 case TGSI_OPCODE_AND
:
3911 exec_vector_binary(mach
, inst
, micro_and
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3914 case TGSI_OPCODE_OR
:
3915 exec_vector_binary(mach
, inst
, micro_or
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3918 case TGSI_OPCODE_MOD
:
3919 exec_vector_binary(mach
, inst
, micro_mod
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
3922 case TGSI_OPCODE_XOR
:
3923 exec_vector_binary(mach
, inst
, micro_xor
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
3926 case TGSI_OPCODE_SAD
:
3930 case TGSI_OPCODE_TXF
:
3931 exec_txf(mach
, inst
);
3934 case TGSI_OPCODE_TXQ
:
3935 exec_txq(mach
, inst
);
3938 case TGSI_OPCODE_EMIT
:
3942 case TGSI_OPCODE_ENDPRIM
:
3943 emit_primitive(mach
);
3946 case TGSI_OPCODE_BGNLOOP
:
3947 /* push LoopMask and ContMasks */
3948 assert(mach
->LoopStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3949 assert(mach
->ContStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3950 assert(mach
->LoopLabelStackTop
< TGSI_EXEC_MAX_LOOP_NESTING
);
3951 assert(mach
->BreakStackTop
< TGSI_EXEC_MAX_BREAK_STACK
);
3953 mach
->LoopStack
[mach
->LoopStackTop
++] = mach
->LoopMask
;
3954 mach
->ContStack
[mach
->ContStackTop
++] = mach
->ContMask
;
3955 mach
->LoopLabelStack
[mach
->LoopLabelStackTop
++] = *pc
- 1;
3956 mach
->BreakStack
[mach
->BreakStackTop
++] = mach
->BreakType
;
3957 mach
->BreakType
= TGSI_EXEC_BREAK_INSIDE_LOOP
;
3960 case TGSI_OPCODE_ENDLOOP
:
3961 /* Restore ContMask, but don't pop */
3962 assert(mach
->ContStackTop
> 0);
3963 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
- 1];
3964 UPDATE_EXEC_MASK(mach
);
3965 if (mach
->ExecMask
) {
3966 /* repeat loop: jump to instruction just past BGNLOOP */
3967 assert(mach
->LoopLabelStackTop
> 0);
3968 *pc
= mach
->LoopLabelStack
[mach
->LoopLabelStackTop
- 1] + 1;
3971 /* exit loop: pop LoopMask */
3972 assert(mach
->LoopStackTop
> 0);
3973 mach
->LoopMask
= mach
->LoopStack
[--mach
->LoopStackTop
];
3975 assert(mach
->ContStackTop
> 0);
3976 mach
->ContMask
= mach
->ContStack
[--mach
->ContStackTop
];
3977 assert(mach
->LoopLabelStackTop
> 0);
3978 --mach
->LoopLabelStackTop
;
3980 mach
->BreakType
= mach
->BreakStack
[--mach
->BreakStackTop
];
3982 UPDATE_EXEC_MASK(mach
);
3985 case TGSI_OPCODE_BRK
:
3989 case TGSI_OPCODE_CONT
:
3990 /* turn off cont channels for each enabled exec channel */
3991 mach
->ContMask
&= ~mach
->ExecMask
;
3992 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3993 UPDATE_EXEC_MASK(mach
);
3996 case TGSI_OPCODE_BGNSUB
:
4000 case TGSI_OPCODE_ENDSUB
:
4002 * XXX: This really should be a no-op. We should never reach this opcode.
4005 assert(mach
->CallStackTop
> 0);
4006 mach
->CallStackTop
--;
4008 mach
->CondStackTop
= mach
->CallStack
[mach
->CallStackTop
].CondStackTop
;
4009 mach
->CondMask
= mach
->CondStack
[mach
->CondStackTop
];
4011 mach
->LoopStackTop
= mach
->CallStack
[mach
->CallStackTop
].LoopStackTop
;
4012 mach
->LoopMask
= mach
->LoopStack
[mach
->LoopStackTop
];
4014 mach
->ContStackTop
= mach
->CallStack
[mach
->CallStackTop
].ContStackTop
;
4015 mach
->ContMask
= mach
->ContStack
[mach
->ContStackTop
];
4017 mach
->SwitchStackTop
= mach
->CallStack
[mach
->CallStackTop
].SwitchStackTop
;
4018 mach
->Switch
= mach
->SwitchStack
[mach
->SwitchStackTop
];
4020 mach
->BreakStackTop
= mach
->CallStack
[mach
->CallStackTop
].BreakStackTop
;
4021 mach
->BreakType
= mach
->BreakStack
[mach
->BreakStackTop
];
4023 assert(mach
->FuncStackTop
> 0);
4024 mach
->FuncMask
= mach
->FuncStack
[--mach
->FuncStackTop
];
4026 *pc
= mach
->CallStack
[mach
->CallStackTop
].ReturnAddr
;
4028 UPDATE_EXEC_MASK(mach
);
4031 case TGSI_OPCODE_NOP
:
4034 case TGSI_OPCODE_BREAKC
:
4035 FETCH(&r
[0], 0, TGSI_CHAN_X
);
4036 /* update CondMask */
4037 if (r
[0].u
[0] && (mach
->ExecMask
& 0x1)) {
4038 mach
->LoopMask
&= ~0x1;
4040 if (r
[0].u
[1] && (mach
->ExecMask
& 0x2)) {
4041 mach
->LoopMask
&= ~0x2;
4043 if (r
[0].u
[2] && (mach
->ExecMask
& 0x4)) {
4044 mach
->LoopMask
&= ~0x4;
4046 if (r
[0].u
[3] && (mach
->ExecMask
& 0x8)) {
4047 mach
->LoopMask
&= ~0x8;
4049 /* Todo: if mach->LoopMask == 0, jump to end of loop */
4050 UPDATE_EXEC_MASK(mach
);
4053 case TGSI_OPCODE_F2I
:
4054 exec_vector_unary(mach
, inst
, micro_f2i
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_FLOAT
);
4057 case TGSI_OPCODE_IDIV
:
4058 exec_vector_binary(mach
, inst
, micro_idiv
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4061 case TGSI_OPCODE_IMAX
:
4062 exec_vector_binary(mach
, inst
, micro_imax
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4065 case TGSI_OPCODE_IMIN
:
4066 exec_vector_binary(mach
, inst
, micro_imin
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4069 case TGSI_OPCODE_INEG
:
4070 exec_vector_unary(mach
, inst
, micro_ineg
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4073 case TGSI_OPCODE_ISGE
:
4074 exec_vector_binary(mach
, inst
, micro_isge
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4077 case TGSI_OPCODE_ISHR
:
4078 exec_vector_binary(mach
, inst
, micro_ishr
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4081 case TGSI_OPCODE_ISLT
:
4082 exec_vector_binary(mach
, inst
, micro_islt
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4085 case TGSI_OPCODE_F2U
:
4086 exec_vector_unary(mach
, inst
, micro_f2u
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_FLOAT
);
4089 case TGSI_OPCODE_U2F
:
4090 exec_vector_unary(mach
, inst
, micro_u2f
, TGSI_EXEC_DATA_FLOAT
, TGSI_EXEC_DATA_UINT
);
4093 case TGSI_OPCODE_UADD
:
4094 exec_vector_binary(mach
, inst
, micro_uadd
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4097 case TGSI_OPCODE_UDIV
:
4098 exec_vector_binary(mach
, inst
, micro_udiv
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4101 case TGSI_OPCODE_UMAD
:
4102 exec_vector_trinary(mach
, inst
, micro_umad
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4105 case TGSI_OPCODE_UMAX
:
4106 exec_vector_binary(mach
, inst
, micro_umax
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4109 case TGSI_OPCODE_UMIN
:
4110 exec_vector_binary(mach
, inst
, micro_umin
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4113 case TGSI_OPCODE_UMOD
:
4114 exec_vector_binary(mach
, inst
, micro_umod
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4117 case TGSI_OPCODE_UMUL
:
4118 exec_vector_binary(mach
, inst
, micro_umul
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4121 case TGSI_OPCODE_USEQ
:
4122 exec_vector_binary(mach
, inst
, micro_useq
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4125 case TGSI_OPCODE_USGE
:
4126 exec_vector_binary(mach
, inst
, micro_usge
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4129 case TGSI_OPCODE_USHR
:
4130 exec_vector_binary(mach
, inst
, micro_ushr
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4133 case TGSI_OPCODE_USLT
:
4134 exec_vector_binary(mach
, inst
, micro_uslt
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4137 case TGSI_OPCODE_USNE
:
4138 exec_vector_binary(mach
, inst
, micro_usne
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4141 case TGSI_OPCODE_SWITCH
:
4142 exec_switch(mach
, inst
);
4145 case TGSI_OPCODE_CASE
:
4146 exec_case(mach
, inst
);
4149 case TGSI_OPCODE_DEFAULT
:
4153 case TGSI_OPCODE_ENDSWITCH
:
4154 exec_endswitch(mach
);
4157 case TGSI_OPCODE_SAMPLE_I
:
4161 case TGSI_OPCODE_SAMPLE_I_MS
:
4165 case TGSI_OPCODE_SAMPLE
:
4166 exec_sample(mach
, inst
, TEX_MODIFIER_NONE
);
4169 case TGSI_OPCODE_SAMPLE_B
:
4170 exec_sample(mach
, inst
, TEX_MODIFIER_LOD_BIAS
);
4173 case TGSI_OPCODE_SAMPLE_C
:
4174 exec_sample(mach
, inst
, TEX_MODIFIER_NONE
);
4177 case TGSI_OPCODE_SAMPLE_C_LZ
:
4178 exec_sample(mach
, inst
, TEX_MODIFIER_LOD_BIAS
);
4181 case TGSI_OPCODE_SAMPLE_D
:
4182 exec_sample_d(mach
, inst
);
4185 case TGSI_OPCODE_SAMPLE_L
:
4186 exec_sample(mach
, inst
, TEX_MODIFIER_EXPLICIT_LOD
);
4189 case TGSI_OPCODE_GATHER4
:
4193 case TGSI_OPCODE_SVIEWINFO
:
4197 case TGSI_OPCODE_SAMPLE_POS
:
4201 case TGSI_OPCODE_SAMPLE_INFO
:
4205 case TGSI_OPCODE_UARL
:
4206 exec_vector_unary(mach
, inst
, micro_uarl
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_UINT
);
4209 case TGSI_OPCODE_UCMP
:
4210 exec_vector_trinary(mach
, inst
, micro_ucmp
, TGSI_EXEC_DATA_UINT
, TGSI_EXEC_DATA_UINT
);
4213 case TGSI_OPCODE_IABS
:
4214 exec_vector_unary(mach
, inst
, micro_iabs
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4217 case TGSI_OPCODE_ISSG
:
4218 exec_vector_unary(mach
, inst
, micro_isgn
, TGSI_EXEC_DATA_INT
, TGSI_EXEC_DATA_INT
);
4227 #define DEBUG_EXECUTION 0
4231 * Run TGSI interpreter.
4232 * \return bitmask of "alive" quad components
4235 tgsi_exec_machine_run( struct tgsi_exec_machine
*mach
)
4240 mach
->CondMask
= 0xf;
4241 mach
->LoopMask
= 0xf;
4242 mach
->ContMask
= 0xf;
4243 mach
->FuncMask
= 0xf;
4244 mach
->ExecMask
= 0xf;
4246 mach
->Switch
.mask
= 0xf;
4248 assert(mach
->CondStackTop
== 0);
4249 assert(mach
->LoopStackTop
== 0);
4250 assert(mach
->ContStackTop
== 0);
4251 assert(mach
->SwitchStackTop
== 0);
4252 assert(mach
->BreakStackTop
== 0);
4253 assert(mach
->CallStackTop
== 0);
4255 mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0] = 0;
4256 mach
->Temps
[TEMP_OUTPUT_I
].xyzw
[TEMP_OUTPUT_C
].u
[0] = 0;
4258 if( mach
->Processor
== TGSI_PROCESSOR_GEOMETRY
) {
4259 mach
->Temps
[TEMP_PRIMITIVE_I
].xyzw
[TEMP_PRIMITIVE_C
].u
[0] = 0;
4260 mach
->Primitives
[0] = 0;
4263 /* execute declarations (interpolants) */
4264 for (i
= 0; i
< mach
->NumDeclarations
; i
++) {
4265 exec_declaration( mach
, mach
->Declarations
+i
);
4270 struct tgsi_exec_vector temps
[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
];
4271 struct tgsi_exec_vector outputs
[PIPE_MAX_ATTRIBS
];
4274 memcpy(temps
, mach
->Temps
, sizeof(temps
));
4275 memcpy(outputs
, mach
->Outputs
, sizeof(outputs
));
4278 /* execute instructions, until pc is set to -1 */
4284 tgsi_dump_instruction(&mach
->Instructions
[pc
], inst
++);
4287 assert(pc
< (int) mach
->NumInstructions
);
4288 exec_instruction(mach
, mach
->Instructions
+ pc
, &pc
);
4291 for (i
= 0; i
< TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS
; i
++) {
4292 if (memcmp(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]))) {
4295 memcpy(&temps
[i
], &mach
->Temps
[i
], sizeof(temps
[i
]));
4296 debug_printf("TEMP[%2u] = ", i
);
4297 for (j
= 0; j
< 4; j
++) {
4301 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4302 temps
[i
].xyzw
[0].f
[j
], temps
[i
].xyzw
[0].u
[j
],
4303 temps
[i
].xyzw
[1].f
[j
], temps
[i
].xyzw
[1].u
[j
],
4304 temps
[i
].xyzw
[2].f
[j
], temps
[i
].xyzw
[2].u
[j
],
4305 temps
[i
].xyzw
[3].f
[j
], temps
[i
].xyzw
[3].u
[j
]);
4309 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++) {
4310 if (memcmp(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]))) {
4313 memcpy(&outputs
[i
], &mach
->Outputs
[i
], sizeof(outputs
[i
]));
4314 debug_printf("OUT[%2u] = ", i
);
4315 for (j
= 0; j
< 4; j
++) {
4319 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4320 outputs
[i
].xyzw
[0].f
[j
], outputs
[i
].xyzw
[0].u
[j
],
4321 outputs
[i
].xyzw
[1].f
[j
], outputs
[i
].xyzw
[1].u
[j
],
4322 outputs
[i
].xyzw
[2].f
[j
], outputs
[i
].xyzw
[2].u
[j
],
4323 outputs
[i
].xyzw
[3].f
[j
], outputs
[i
].xyzw
[3].u
[j
]);
4332 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4333 if (mach
->Processor
== TGSI_PROCESSOR_FRAGMENT
) {
4335 * Scale back depth component.
4337 for (i
= 0; i
< 4; i
++)
4338 mach
->Outputs
[0].xyzw
[2].f
[i
] *= ctx
->DrawBuffer
->_DepthMaxF
;
4342 /* Strictly speaking, these assertions aren't really needed but they
4343 * can potentially catch some bugs in the control flow code.
4345 assert(mach
->CondStackTop
== 0);
4346 assert(mach
->LoopStackTop
== 0);
4347 assert(mach
->ContStackTop
== 0);
4348 assert(mach
->SwitchStackTop
== 0);
4349 assert(mach
->BreakStackTop
== 0);
4350 assert(mach
->CallStackTop
== 0);
4352 return ~mach
->Temps
[TEMP_KILMASK_I
].xyzw
[TEMP_KILMASK_C
].u
[0];