tgsi: fix wrong reg used for unit for TGSI_OPCODE_TXF
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_memory.h"
62 #include "util/u_math.h"
63
64
65 #define DEBUG_EXECUTION 0
66
67
68 #define FAST_MATH 0
69
70 #define TILE_TOP_LEFT 0
71 #define TILE_TOP_RIGHT 1
72 #define TILE_BOTTOM_LEFT 2
73 #define TILE_BOTTOM_RIGHT 3
74
75 static void
76 micro_abs(union tgsi_exec_channel *dst,
77 const union tgsi_exec_channel *src)
78 {
79 dst->f[0] = fabsf(src->f[0]);
80 dst->f[1] = fabsf(src->f[1]);
81 dst->f[2] = fabsf(src->f[2]);
82 dst->f[3] = fabsf(src->f[3]);
83 }
84
85 static void
86 micro_arl(union tgsi_exec_channel *dst,
87 const union tgsi_exec_channel *src)
88 {
89 dst->i[0] = (int)floorf(src->f[0]);
90 dst->i[1] = (int)floorf(src->f[1]);
91 dst->i[2] = (int)floorf(src->f[2]);
92 dst->i[3] = (int)floorf(src->f[3]);
93 }
94
95 static void
96 micro_arr(union tgsi_exec_channel *dst,
97 const union tgsi_exec_channel *src)
98 {
99 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
100 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
101 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
102 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
103 }
104
105 static void
106 micro_ceil(union tgsi_exec_channel *dst,
107 const union tgsi_exec_channel *src)
108 {
109 dst->f[0] = ceilf(src->f[0]);
110 dst->f[1] = ceilf(src->f[1]);
111 dst->f[2] = ceilf(src->f[2]);
112 dst->f[3] = ceilf(src->f[3]);
113 }
114
115 static void
116 micro_clamp(union tgsi_exec_channel *dst,
117 const union tgsi_exec_channel *src0,
118 const union tgsi_exec_channel *src1,
119 const union tgsi_exec_channel *src2)
120 {
121 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
122 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
123 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
124 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
125 }
126
127 static void
128 micro_cmp(union tgsi_exec_channel *dst,
129 const union tgsi_exec_channel *src0,
130 const union tgsi_exec_channel *src1,
131 const union tgsi_exec_channel *src2)
132 {
133 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
134 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
135 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
136 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
137 }
138
139 static void
140 micro_cnd(union tgsi_exec_channel *dst,
141 const union tgsi_exec_channel *src0,
142 const union tgsi_exec_channel *src1,
143 const union tgsi_exec_channel *src2)
144 {
145 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
146 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
147 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
148 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
149 }
150
151 static void
152 micro_cos(union tgsi_exec_channel *dst,
153 const union tgsi_exec_channel *src)
154 {
155 dst->f[0] = cosf(src->f[0]);
156 dst->f[1] = cosf(src->f[1]);
157 dst->f[2] = cosf(src->f[2]);
158 dst->f[3] = cosf(src->f[3]);
159 }
160
161 static void
162 micro_ddx(union tgsi_exec_channel *dst,
163 const union tgsi_exec_channel *src)
164 {
165 dst->f[0] =
166 dst->f[1] =
167 dst->f[2] =
168 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
169 }
170
171 static void
172 micro_ddy(union tgsi_exec_channel *dst,
173 const union tgsi_exec_channel *src)
174 {
175 dst->f[0] =
176 dst->f[1] =
177 dst->f[2] =
178 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
179 }
180
181 static void
182 micro_exp2(union tgsi_exec_channel *dst,
183 const union tgsi_exec_channel *src)
184 {
185 #if FAST_MATH
186 dst->f[0] = util_fast_exp2(src->f[0]);
187 dst->f[1] = util_fast_exp2(src->f[1]);
188 dst->f[2] = util_fast_exp2(src->f[2]);
189 dst->f[3] = util_fast_exp2(src->f[3]);
190 #else
191 #if DEBUG
192 /* Inf is okay for this instruction, so clamp it to silence assertions. */
193 uint i;
194 union tgsi_exec_channel clamped;
195
196 for (i = 0; i < 4; i++) {
197 if (src->f[i] > 127.99999f) {
198 clamped.f[i] = 127.99999f;
199 } else if (src->f[i] < -126.99999f) {
200 clamped.f[i] = -126.99999f;
201 } else {
202 clamped.f[i] = src->f[i];
203 }
204 }
205 src = &clamped;
206 #endif /* DEBUG */
207
208 dst->f[0] = powf(2.0f, src->f[0]);
209 dst->f[1] = powf(2.0f, src->f[1]);
210 dst->f[2] = powf(2.0f, src->f[2]);
211 dst->f[3] = powf(2.0f, src->f[3]);
212 #endif /* FAST_MATH */
213 }
214
215 static void
216 micro_flr(union tgsi_exec_channel *dst,
217 const union tgsi_exec_channel *src)
218 {
219 dst->f[0] = floorf(src->f[0]);
220 dst->f[1] = floorf(src->f[1]);
221 dst->f[2] = floorf(src->f[2]);
222 dst->f[3] = floorf(src->f[3]);
223 }
224
225 static void
226 micro_frc(union tgsi_exec_channel *dst,
227 const union tgsi_exec_channel *src)
228 {
229 dst->f[0] = src->f[0] - floorf(src->f[0]);
230 dst->f[1] = src->f[1] - floorf(src->f[1]);
231 dst->f[2] = src->f[2] - floorf(src->f[2]);
232 dst->f[3] = src->f[3] - floorf(src->f[3]);
233 }
234
235 static void
236 micro_iabs(union tgsi_exec_channel *dst,
237 const union tgsi_exec_channel *src)
238 {
239 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
240 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
241 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
242 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
243 }
244
245 static void
246 micro_ineg(union tgsi_exec_channel *dst,
247 const union tgsi_exec_channel *src)
248 {
249 dst->i[0] = -src->i[0];
250 dst->i[1] = -src->i[1];
251 dst->i[2] = -src->i[2];
252 dst->i[3] = -src->i[3];
253 }
254
255 static void
256 micro_lg2(union tgsi_exec_channel *dst,
257 const union tgsi_exec_channel *src)
258 {
259 #if FAST_MATH
260 dst->f[0] = util_fast_log2(src->f[0]);
261 dst->f[1] = util_fast_log2(src->f[1]);
262 dst->f[2] = util_fast_log2(src->f[2]);
263 dst->f[3] = util_fast_log2(src->f[3]);
264 #else
265 dst->f[0] = logf(src->f[0]) * 1.442695f;
266 dst->f[1] = logf(src->f[1]) * 1.442695f;
267 dst->f[2] = logf(src->f[2]) * 1.442695f;
268 dst->f[3] = logf(src->f[3]) * 1.442695f;
269 #endif
270 }
271
272 static void
273 micro_lrp(union tgsi_exec_channel *dst,
274 const union tgsi_exec_channel *src0,
275 const union tgsi_exec_channel *src1,
276 const union tgsi_exec_channel *src2)
277 {
278 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
279 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
280 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
281 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
282 }
283
284 static void
285 micro_mad(union tgsi_exec_channel *dst,
286 const union tgsi_exec_channel *src0,
287 const union tgsi_exec_channel *src1,
288 const union tgsi_exec_channel *src2)
289 {
290 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
291 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
292 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
293 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
294 }
295
296 static void
297 micro_mov(union tgsi_exec_channel *dst,
298 const union tgsi_exec_channel *src)
299 {
300 dst->u[0] = src->u[0];
301 dst->u[1] = src->u[1];
302 dst->u[2] = src->u[2];
303 dst->u[3] = src->u[3];
304 }
305
306 static void
307 micro_rcp(union tgsi_exec_channel *dst,
308 const union tgsi_exec_channel *src)
309 {
310 #if 0 /* for debugging */
311 assert(src->f[0] != 0.0f);
312 assert(src->f[1] != 0.0f);
313 assert(src->f[2] != 0.0f);
314 assert(src->f[3] != 0.0f);
315 #endif
316 dst->f[0] = 1.0f / src->f[0];
317 dst->f[1] = 1.0f / src->f[1];
318 dst->f[2] = 1.0f / src->f[2];
319 dst->f[3] = 1.0f / src->f[3];
320 }
321
322 static void
323 micro_rnd(union tgsi_exec_channel *dst,
324 const union tgsi_exec_channel *src)
325 {
326 dst->f[0] = floorf(src->f[0] + 0.5f);
327 dst->f[1] = floorf(src->f[1] + 0.5f);
328 dst->f[2] = floorf(src->f[2] + 0.5f);
329 dst->f[3] = floorf(src->f[3] + 0.5f);
330 }
331
332 static void
333 micro_rsq(union tgsi_exec_channel *dst,
334 const union tgsi_exec_channel *src)
335 {
336 #if 0 /* for debugging */
337 assert(src->f[0] != 0.0f);
338 assert(src->f[1] != 0.0f);
339 assert(src->f[2] != 0.0f);
340 assert(src->f[3] != 0.0f);
341 #endif
342 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
343 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
344 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
345 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
346 }
347
348 static void
349 micro_sqrt(union tgsi_exec_channel *dst,
350 const union tgsi_exec_channel *src)
351 {
352 dst->f[0] = sqrtf(fabsf(src->f[0]));
353 dst->f[1] = sqrtf(fabsf(src->f[1]));
354 dst->f[2] = sqrtf(fabsf(src->f[2]));
355 dst->f[3] = sqrtf(fabsf(src->f[3]));
356 }
357
358 static void
359 micro_seq(union tgsi_exec_channel *dst,
360 const union tgsi_exec_channel *src0,
361 const union tgsi_exec_channel *src1)
362 {
363 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
364 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
365 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
366 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
367 }
368
369 static void
370 micro_sge(union tgsi_exec_channel *dst,
371 const union tgsi_exec_channel *src0,
372 const union tgsi_exec_channel *src1)
373 {
374 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
375 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
376 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
377 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
378 }
379
380 static void
381 micro_sgn(union tgsi_exec_channel *dst,
382 const union tgsi_exec_channel *src)
383 {
384 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
385 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
386 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
387 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
388 }
389
390 static void
391 micro_isgn(union tgsi_exec_channel *dst,
392 const union tgsi_exec_channel *src)
393 {
394 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
395 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
396 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
397 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
398 }
399
400 static void
401 micro_sgt(union tgsi_exec_channel *dst,
402 const union tgsi_exec_channel *src0,
403 const union tgsi_exec_channel *src1)
404 {
405 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
406 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
407 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
408 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
409 }
410
411 static void
412 micro_sin(union tgsi_exec_channel *dst,
413 const union tgsi_exec_channel *src)
414 {
415 dst->f[0] = sinf(src->f[0]);
416 dst->f[1] = sinf(src->f[1]);
417 dst->f[2] = sinf(src->f[2]);
418 dst->f[3] = sinf(src->f[3]);
419 }
420
421 static void
422 micro_sle(union tgsi_exec_channel *dst,
423 const union tgsi_exec_channel *src0,
424 const union tgsi_exec_channel *src1)
425 {
426 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
427 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
428 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
429 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
430 }
431
432 static void
433 micro_slt(union tgsi_exec_channel *dst,
434 const union tgsi_exec_channel *src0,
435 const union tgsi_exec_channel *src1)
436 {
437 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
438 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
439 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
440 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
441 }
442
443 static void
444 micro_sne(union tgsi_exec_channel *dst,
445 const union tgsi_exec_channel *src0,
446 const union tgsi_exec_channel *src1)
447 {
448 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
449 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
450 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
451 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
452 }
453
454 static void
455 micro_sfl(union tgsi_exec_channel *dst)
456 {
457 dst->f[0] = 0.0f;
458 dst->f[1] = 0.0f;
459 dst->f[2] = 0.0f;
460 dst->f[3] = 0.0f;
461 }
462
463 static void
464 micro_str(union tgsi_exec_channel *dst)
465 {
466 dst->f[0] = 1.0f;
467 dst->f[1] = 1.0f;
468 dst->f[2] = 1.0f;
469 dst->f[3] = 1.0f;
470 }
471
472 static void
473 micro_trunc(union tgsi_exec_channel *dst,
474 const union tgsi_exec_channel *src)
475 {
476 dst->f[0] = (float)(int)src->f[0];
477 dst->f[1] = (float)(int)src->f[1];
478 dst->f[2] = (float)(int)src->f[2];
479 dst->f[3] = (float)(int)src->f[3];
480 }
481
482
483 enum tgsi_exec_datatype {
484 TGSI_EXEC_DATA_FLOAT,
485 TGSI_EXEC_DATA_INT,
486 TGSI_EXEC_DATA_UINT
487 };
488
489 /*
490 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
491 */
492 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
493 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
494 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
495 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
496 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
497 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
498
499
500 /** The execution mask depends on the conditional mask and the loop mask */
501 #define UPDATE_EXEC_MASK(MACH) \
502 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
503
504
505 static const union tgsi_exec_channel ZeroVec =
506 { { 0.0, 0.0, 0.0, 0.0 } };
507
508 static const union tgsi_exec_channel OneVec = {
509 {1.0f, 1.0f, 1.0f, 1.0f}
510 };
511
512 static const union tgsi_exec_channel P128Vec = {
513 {128.0f, 128.0f, 128.0f, 128.0f}
514 };
515
516 static const union tgsi_exec_channel M128Vec = {
517 {-128.0f, -128.0f, -128.0f, -128.0f}
518 };
519
520
521 /**
522 * Assert that none of the float values in 'chan' are infinite or NaN.
523 * NaN and Inf may occur normally during program execution and should
524 * not lead to crashes, etc. But when debugging, it's helpful to catch
525 * them.
526 */
527 static INLINE void
528 check_inf_or_nan(const union tgsi_exec_channel *chan)
529 {
530 assert(!util_is_inf_or_nan((chan)->f[0]));
531 assert(!util_is_inf_or_nan((chan)->f[1]));
532 assert(!util_is_inf_or_nan((chan)->f[2]));
533 assert(!util_is_inf_or_nan((chan)->f[3]));
534 }
535
536
537 #ifdef DEBUG
538 static void
539 print_chan(const char *msg, const union tgsi_exec_channel *chan)
540 {
541 debug_printf("%s = {%f, %f, %f, %f}\n",
542 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
543 }
544 #endif
545
546
547 #ifdef DEBUG
548 static void
549 print_temp(const struct tgsi_exec_machine *mach, uint index)
550 {
551 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
552 int i;
553 debug_printf("Temp[%u] =\n", index);
554 for (i = 0; i < 4; i++) {
555 debug_printf(" %c: { %f, %f, %f, %f }\n",
556 "XYZW"[i],
557 tmp->xyzw[i].f[0],
558 tmp->xyzw[i].f[1],
559 tmp->xyzw[i].f[2],
560 tmp->xyzw[i].f[3]);
561 }
562 }
563 #endif
564
565
566 void
567 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
568 unsigned num_bufs,
569 const void **bufs,
570 const unsigned *buf_sizes)
571 {
572 unsigned i;
573
574 for (i = 0; i < num_bufs; i++) {
575 mach->Consts[i] = bufs[i];
576 mach->ConstsSize[i] = buf_sizes[i];
577 }
578 }
579
580
581 /**
582 * Check if there's a potential src/dst register data dependency when
583 * using SOA execution.
584 * Example:
585 * MOV T, T.yxwz;
586 * This would expand into:
587 * MOV t0, t1;
588 * MOV t1, t0;
589 * MOV t2, t3;
590 * MOV t3, t2;
591 * The second instruction will have the wrong value for t0 if executed as-is.
592 */
593 boolean
594 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
595 {
596 uint i, chan;
597
598 uint writemask = inst->Dst[0].Register.WriteMask;
599 if (writemask == TGSI_WRITEMASK_X ||
600 writemask == TGSI_WRITEMASK_Y ||
601 writemask == TGSI_WRITEMASK_Z ||
602 writemask == TGSI_WRITEMASK_W ||
603 writemask == TGSI_WRITEMASK_NONE) {
604 /* no chance of data dependency */
605 return FALSE;
606 }
607
608 /* loop over src regs */
609 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
610 if ((inst->Src[i].Register.File ==
611 inst->Dst[0].Register.File) &&
612 ((inst->Src[i].Register.Index ==
613 inst->Dst[0].Register.Index) ||
614 inst->Src[i].Register.Indirect ||
615 inst->Dst[0].Register.Indirect)) {
616 /* loop over dest channels */
617 uint channelsWritten = 0x0;
618 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
619 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
620 /* check if we're reading a channel that's been written */
621 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
622 if (channelsWritten & (1 << swizzle)) {
623 return TRUE;
624 }
625
626 channelsWritten |= (1 << chan);
627 }
628 }
629 }
630 }
631 return FALSE;
632 }
633
634
635 /**
636 * Initialize machine state by expanding tokens to full instructions,
637 * allocating temporary storage, setting up constants, etc.
638 * After this, we can call tgsi_exec_machine_run() many times.
639 */
640 void
641 tgsi_exec_machine_bind_shader(
642 struct tgsi_exec_machine *mach,
643 const struct tgsi_token *tokens,
644 struct tgsi_sampler *sampler)
645 {
646 uint k;
647 struct tgsi_parse_context parse;
648 struct tgsi_full_instruction *instructions;
649 struct tgsi_full_declaration *declarations;
650 uint maxInstructions = 10, numInstructions = 0;
651 uint maxDeclarations = 10, numDeclarations = 0;
652
653 #if 0
654 tgsi_dump(tokens, 0);
655 #endif
656
657 util_init_math();
658
659
660 mach->Tokens = tokens;
661 mach->Sampler = sampler;
662
663 if (!tokens) {
664 /* unbind and free all */
665 FREE(mach->Declarations);
666 mach->Declarations = NULL;
667 mach->NumDeclarations = 0;
668
669 FREE(mach->Instructions);
670 mach->Instructions = NULL;
671 mach->NumInstructions = 0;
672
673 return;
674 }
675
676 k = tgsi_parse_init (&parse, mach->Tokens);
677 if (k != TGSI_PARSE_OK) {
678 debug_printf( "Problem parsing!\n" );
679 return;
680 }
681
682 mach->Processor = parse.FullHeader.Processor.Processor;
683 mach->ImmLimit = 0;
684
685 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
686 !mach->UsedGeometryShader) {
687 struct tgsi_exec_vector *inputs;
688 struct tgsi_exec_vector *outputs;
689
690 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
691 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
692 16);
693
694 if (!inputs)
695 return;
696
697 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
698 TGSI_MAX_TOTAL_VERTICES, 16);
699
700 if (!outputs) {
701 align_free(inputs);
702 return;
703 }
704
705 align_free(mach->Inputs);
706 align_free(mach->Outputs);
707
708 mach->Inputs = inputs;
709 mach->Outputs = outputs;
710 mach->UsedGeometryShader = TRUE;
711 }
712
713 declarations = (struct tgsi_full_declaration *)
714 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
715
716 if (!declarations) {
717 return;
718 }
719
720 instructions = (struct tgsi_full_instruction *)
721 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
722
723 if (!instructions) {
724 FREE( declarations );
725 return;
726 }
727
728 while( !tgsi_parse_end_of_tokens( &parse ) ) {
729 uint i;
730
731 tgsi_parse_token( &parse );
732 switch( parse.FullToken.Token.Type ) {
733 case TGSI_TOKEN_TYPE_DECLARATION:
734 /* save expanded declaration */
735 if (numDeclarations == maxDeclarations) {
736 declarations = REALLOC(declarations,
737 maxDeclarations
738 * sizeof(struct tgsi_full_declaration),
739 (maxDeclarations + 10)
740 * sizeof(struct tgsi_full_declaration));
741 maxDeclarations += 10;
742 }
743 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
744 unsigned reg;
745 for (reg = parse.FullToken.FullDeclaration.Range.First;
746 reg <= parse.FullToken.FullDeclaration.Range.Last;
747 ++reg) {
748 ++mach->NumOutputs;
749 }
750 }
751 if (parse.FullToken.FullDeclaration.Declaration.File ==
752 TGSI_FILE_IMMEDIATE_ARRAY) {
753 unsigned reg;
754 struct tgsi_full_declaration *decl =
755 &parse.FullToken.FullDeclaration;
756 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES);
757 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) {
758 for( i = 0; i < 4; i++ ) {
759 int idx = reg * 4 + i;
760 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float;
761 }
762 }
763 }
764 memcpy(declarations + numDeclarations,
765 &parse.FullToken.FullDeclaration,
766 sizeof(declarations[0]));
767 numDeclarations++;
768 break;
769
770 case TGSI_TOKEN_TYPE_IMMEDIATE:
771 {
772 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
773 assert( size <= 4 );
774 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
775
776 for( i = 0; i < size; i++ ) {
777 mach->Imms[mach->ImmLimit][i] =
778 parse.FullToken.FullImmediate.u[i].Float;
779 }
780 mach->ImmLimit += 1;
781 }
782 break;
783
784 case TGSI_TOKEN_TYPE_INSTRUCTION:
785
786 /* save expanded instruction */
787 if (numInstructions == maxInstructions) {
788 instructions = REALLOC(instructions,
789 maxInstructions
790 * sizeof(struct tgsi_full_instruction),
791 (maxInstructions + 10)
792 * sizeof(struct tgsi_full_instruction));
793 maxInstructions += 10;
794 }
795
796 memcpy(instructions + numInstructions,
797 &parse.FullToken.FullInstruction,
798 sizeof(instructions[0]));
799
800 numInstructions++;
801 break;
802
803 case TGSI_TOKEN_TYPE_PROPERTY:
804 break;
805
806 default:
807 assert( 0 );
808 }
809 }
810 tgsi_parse_free (&parse);
811
812 FREE(mach->Declarations);
813 mach->Declarations = declarations;
814 mach->NumDeclarations = numDeclarations;
815
816 FREE(mach->Instructions);
817 mach->Instructions = instructions;
818 mach->NumInstructions = numInstructions;
819 }
820
821
822 struct tgsi_exec_machine *
823 tgsi_exec_machine_create( void )
824 {
825 struct tgsi_exec_machine *mach;
826 uint i;
827
828 mach = align_malloc( sizeof *mach, 16 );
829 if (!mach)
830 goto fail;
831
832 memset(mach, 0, sizeof(*mach));
833
834 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
835 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
836 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
837
838 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
839 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
840 if (!mach->Inputs || !mach->Outputs)
841 goto fail;
842
843 /* Setup constants needed by the SSE2 executor. */
844 for( i = 0; i < 4; i++ ) {
845 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
846 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
847 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
848 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
849 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
850 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
851 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
852 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
853 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
854 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
855 }
856
857 #ifdef DEBUG
858 /* silence warnings */
859 (void) print_chan;
860 (void) print_temp;
861 #endif
862
863 return mach;
864
865 fail:
866 if (mach) {
867 align_free(mach->Inputs);
868 align_free(mach->Outputs);
869 align_free(mach);
870 }
871 return NULL;
872 }
873
874
875 void
876 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
877 {
878 if (mach) {
879 FREE(mach->Instructions);
880 FREE(mach->Declarations);
881
882 align_free(mach->Inputs);
883 align_free(mach->Outputs);
884
885 align_free(mach);
886 }
887 }
888
889 static void
890 micro_add(union tgsi_exec_channel *dst,
891 const union tgsi_exec_channel *src0,
892 const union tgsi_exec_channel *src1)
893 {
894 dst->f[0] = src0->f[0] + src1->f[0];
895 dst->f[1] = src0->f[1] + src1->f[1];
896 dst->f[2] = src0->f[2] + src1->f[2];
897 dst->f[3] = src0->f[3] + src1->f[3];
898 }
899
900 static void
901 micro_div(
902 union tgsi_exec_channel *dst,
903 const union tgsi_exec_channel *src0,
904 const union tgsi_exec_channel *src1 )
905 {
906 if (src1->f[0] != 0) {
907 dst->f[0] = src0->f[0] / src1->f[0];
908 }
909 if (src1->f[1] != 0) {
910 dst->f[1] = src0->f[1] / src1->f[1];
911 }
912 if (src1->f[2] != 0) {
913 dst->f[2] = src0->f[2] / src1->f[2];
914 }
915 if (src1->f[3] != 0) {
916 dst->f[3] = src0->f[3] / src1->f[3];
917 }
918 }
919
920 static void
921 micro_rcc(union tgsi_exec_channel *dst,
922 const union tgsi_exec_channel *src)
923 {
924 uint i;
925
926 for (i = 0; i < 4; i++) {
927 float recip = 1.0f / src->f[i];
928
929 if (recip > 0.0f) {
930 if (recip > 1.884467e+019f) {
931 dst->f[i] = 1.884467e+019f;
932 }
933 else if (recip < 5.42101e-020f) {
934 dst->f[i] = 5.42101e-020f;
935 }
936 else {
937 dst->f[i] = recip;
938 }
939 }
940 else {
941 if (recip < -1.884467e+019f) {
942 dst->f[i] = -1.884467e+019f;
943 }
944 else if (recip > -5.42101e-020f) {
945 dst->f[i] = -5.42101e-020f;
946 }
947 else {
948 dst->f[i] = recip;
949 }
950 }
951 }
952 }
953
954 static void
955 micro_lt(
956 union tgsi_exec_channel *dst,
957 const union tgsi_exec_channel *src0,
958 const union tgsi_exec_channel *src1,
959 const union tgsi_exec_channel *src2,
960 const union tgsi_exec_channel *src3 )
961 {
962 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
963 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
964 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
965 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
966 }
967
968 static void
969 micro_max(union tgsi_exec_channel *dst,
970 const union tgsi_exec_channel *src0,
971 const union tgsi_exec_channel *src1)
972 {
973 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
974 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
975 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
976 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
977 }
978
979 static void
980 micro_min(union tgsi_exec_channel *dst,
981 const union tgsi_exec_channel *src0,
982 const union tgsi_exec_channel *src1)
983 {
984 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
985 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
986 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
987 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
988 }
989
990 static void
991 micro_mul(union tgsi_exec_channel *dst,
992 const union tgsi_exec_channel *src0,
993 const union tgsi_exec_channel *src1)
994 {
995 dst->f[0] = src0->f[0] * src1->f[0];
996 dst->f[1] = src0->f[1] * src1->f[1];
997 dst->f[2] = src0->f[2] * src1->f[2];
998 dst->f[3] = src0->f[3] * src1->f[3];
999 }
1000
1001 static void
1002 micro_neg(
1003 union tgsi_exec_channel *dst,
1004 const union tgsi_exec_channel *src )
1005 {
1006 dst->f[0] = -src->f[0];
1007 dst->f[1] = -src->f[1];
1008 dst->f[2] = -src->f[2];
1009 dst->f[3] = -src->f[3];
1010 }
1011
1012 static void
1013 micro_pow(
1014 union tgsi_exec_channel *dst,
1015 const union tgsi_exec_channel *src0,
1016 const union tgsi_exec_channel *src1 )
1017 {
1018 #if FAST_MATH
1019 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1020 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1021 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1022 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1023 #else
1024 dst->f[0] = powf( src0->f[0], src1->f[0] );
1025 dst->f[1] = powf( src0->f[1], src1->f[1] );
1026 dst->f[2] = powf( src0->f[2], src1->f[2] );
1027 dst->f[3] = powf( src0->f[3], src1->f[3] );
1028 #endif
1029 }
1030
1031 static void
1032 micro_sub(union tgsi_exec_channel *dst,
1033 const union tgsi_exec_channel *src0,
1034 const union tgsi_exec_channel *src1)
1035 {
1036 dst->f[0] = src0->f[0] - src1->f[0];
1037 dst->f[1] = src0->f[1] - src1->f[1];
1038 dst->f[2] = src0->f[2] - src1->f[2];
1039 dst->f[3] = src0->f[3] - src1->f[3];
1040 }
1041
1042 static void
1043 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1044 const uint chan_index,
1045 const uint file,
1046 const uint swizzle,
1047 const union tgsi_exec_channel *index,
1048 const union tgsi_exec_channel *index2D,
1049 union tgsi_exec_channel *chan)
1050 {
1051 uint i;
1052
1053 assert(swizzle < 4);
1054
1055 switch (file) {
1056 case TGSI_FILE_CONSTANT:
1057 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1058 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1059 assert(mach->Consts[index2D->i[i]]);
1060
1061 if (index->i[i] < 0) {
1062 chan->u[i] = 0;
1063 } else {
1064 /* NOTE: copying the const value as a uint instead of float */
1065 const uint constbuf = index2D->i[i];
1066 const uint *buf = (const uint *)mach->Consts[constbuf];
1067 const int pos = index->i[i] * 4 + swizzle;
1068 /* const buffer bounds check */
1069 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1070 if (0) {
1071 /* Debug: print warning */
1072 static int count = 0;
1073 if (count++ < 100)
1074 debug_printf("TGSI Exec: const buffer index %d"
1075 " out of bounds\n", pos);
1076 }
1077 chan->u[i] = 0;
1078 }
1079 else
1080 chan->u[i] = buf[pos];
1081 }
1082 }
1083 break;
1084
1085 case TGSI_FILE_INPUT:
1086 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1087 /*
1088 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1089 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1090 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1091 index2D->i[i], index->i[i]);
1092 }*/
1093 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1094 assert(pos >= 0);
1095 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1096 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1097 }
1098 break;
1099
1100 case TGSI_FILE_SYSTEM_VALUE:
1101 /* XXX no swizzling at this point. Will be needed if we put
1102 * gl_FragCoord, for example, in a sys value register.
1103 */
1104 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1105 chan->u[i] = mach->SystemValue[index->i[i]].u[i];
1106 }
1107 break;
1108
1109 case TGSI_FILE_TEMPORARY:
1110 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1111 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1112 assert(index2D->i[i] == 0);
1113
1114 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1115 }
1116 break;
1117
1118 case TGSI_FILE_TEMPORARY_ARRAY:
1119 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1120 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1121 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS);
1122
1123 chan->u[i] =
1124 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i];
1125 }
1126 break;
1127
1128 case TGSI_FILE_IMMEDIATE:
1129 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1130 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1131 assert(index2D->i[i] == 0);
1132
1133 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1134 }
1135 break;
1136
1137 case TGSI_FILE_IMMEDIATE_ARRAY:
1138 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1139 assert(index2D->i[i] == 0);
1140
1141 chan->f[i] = mach->ImmArray[index->i[i]][swizzle];
1142 }
1143 break;
1144
1145 case TGSI_FILE_ADDRESS:
1146 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1147 assert(index->i[i] >= 0);
1148 assert(index2D->i[i] == 0);
1149
1150 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1151 }
1152 break;
1153
1154 case TGSI_FILE_PREDICATE:
1155 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1156 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1157 assert(index2D->i[i] == 0);
1158
1159 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1160 }
1161 break;
1162
1163 case TGSI_FILE_OUTPUT:
1164 /* vertex/fragment output vars can be read too */
1165 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1166 assert(index->i[i] >= 0);
1167 assert(index2D->i[i] == 0);
1168
1169 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1170 }
1171 break;
1172
1173 default:
1174 assert(0);
1175 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1176 chan->u[i] = 0;
1177 }
1178 }
1179 }
1180
1181 static void
1182 fetch_source(const struct tgsi_exec_machine *mach,
1183 union tgsi_exec_channel *chan,
1184 const struct tgsi_full_src_register *reg,
1185 const uint chan_index,
1186 enum tgsi_exec_datatype src_datatype)
1187 {
1188 union tgsi_exec_channel index;
1189 union tgsi_exec_channel index2D;
1190 uint swizzle;
1191
1192 /* We start with a direct index into a register file.
1193 *
1194 * file[1],
1195 * where:
1196 * file = Register.File
1197 * [1] = Register.Index
1198 */
1199 index.i[0] =
1200 index.i[1] =
1201 index.i[2] =
1202 index.i[3] = reg->Register.Index;
1203
1204 /* There is an extra source register that indirectly subscripts
1205 * a register file. The direct index now becomes an offset
1206 * that is being added to the indirect register.
1207 *
1208 * file[ind[2].x+1],
1209 * where:
1210 * ind = Indirect.File
1211 * [2] = Indirect.Index
1212 * .x = Indirect.SwizzleX
1213 */
1214 if (reg->Register.Indirect) {
1215 union tgsi_exec_channel index2;
1216 union tgsi_exec_channel indir_index;
1217 const uint execmask = mach->ExecMask;
1218 uint i;
1219
1220 /* which address register (always zero now) */
1221 index2.i[0] =
1222 index2.i[1] =
1223 index2.i[2] =
1224 index2.i[3] = reg->Indirect.Index;
1225 /* get current value of address register[swizzle] */
1226 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, TGSI_CHAN_X );
1227 fetch_src_file_channel(mach,
1228 chan_index,
1229 reg->Indirect.File,
1230 swizzle,
1231 &index2,
1232 &ZeroVec,
1233 &indir_index);
1234
1235 /* add value of address register to the offset */
1236 index.i[0] += indir_index.i[0];
1237 index.i[1] += indir_index.i[1];
1238 index.i[2] += indir_index.i[2];
1239 index.i[3] += indir_index.i[3];
1240
1241 /* for disabled execution channels, zero-out the index to
1242 * avoid using a potential garbage value.
1243 */
1244 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1245 if ((execmask & (1 << i)) == 0)
1246 index.i[i] = 0;
1247 }
1248 }
1249
1250 /* There is an extra source register that is a second
1251 * subscript to a register file. Effectively it means that
1252 * the register file is actually a 2D array of registers.
1253 *
1254 * file[3][1],
1255 * where:
1256 * [3] = Dimension.Index
1257 */
1258 if (reg->Register.Dimension) {
1259 index2D.i[0] =
1260 index2D.i[1] =
1261 index2D.i[2] =
1262 index2D.i[3] = reg->Dimension.Index;
1263
1264 /* Again, the second subscript index can be addressed indirectly
1265 * identically to the first one.
1266 * Nothing stops us from indirectly addressing the indirect register,
1267 * but there is no need for that, so we won't exercise it.
1268 *
1269 * file[ind[4].y+3][1],
1270 * where:
1271 * ind = DimIndirect.File
1272 * [4] = DimIndirect.Index
1273 * .y = DimIndirect.SwizzleX
1274 */
1275 if (reg->Dimension.Indirect) {
1276 union tgsi_exec_channel index2;
1277 union tgsi_exec_channel indir_index;
1278 const uint execmask = mach->ExecMask;
1279 uint i;
1280
1281 index2.i[0] =
1282 index2.i[1] =
1283 index2.i[2] =
1284 index2.i[3] = reg->DimIndirect.Index;
1285
1286 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, TGSI_CHAN_X );
1287 fetch_src_file_channel(mach,
1288 chan_index,
1289 reg->DimIndirect.File,
1290 swizzle,
1291 &index2,
1292 &ZeroVec,
1293 &indir_index);
1294
1295 index2D.i[0] += indir_index.i[0];
1296 index2D.i[1] += indir_index.i[1];
1297 index2D.i[2] += indir_index.i[2];
1298 index2D.i[3] += indir_index.i[3];
1299
1300 /* for disabled execution channels, zero-out the index to
1301 * avoid using a potential garbage value.
1302 */
1303 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1304 if ((execmask & (1 << i)) == 0) {
1305 index2D.i[i] = 0;
1306 }
1307 }
1308 }
1309
1310 /* If by any chance there was a need for a 3D array of register
1311 * files, we would have to check whether Dimension is followed
1312 * by a dimension register and continue the saga.
1313 */
1314 } else {
1315 index2D.i[0] =
1316 index2D.i[1] =
1317 index2D.i[2] =
1318 index2D.i[3] = 0;
1319 }
1320
1321 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1322 fetch_src_file_channel(mach,
1323 chan_index,
1324 reg->Register.File,
1325 swizzle,
1326 &index,
1327 &index2D,
1328 chan);
1329
1330 if (reg->Register.Absolute) {
1331 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1332 micro_abs(chan, chan);
1333 } else {
1334 micro_iabs(chan, chan);
1335 }
1336 }
1337
1338 if (reg->Register.Negate) {
1339 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1340 micro_neg(chan, chan);
1341 } else {
1342 micro_ineg(chan, chan);
1343 }
1344 }
1345 }
1346
1347 static void
1348 store_dest(struct tgsi_exec_machine *mach,
1349 const union tgsi_exec_channel *chan,
1350 const struct tgsi_full_dst_register *reg,
1351 const struct tgsi_full_instruction *inst,
1352 uint chan_index,
1353 enum tgsi_exec_datatype dst_datatype)
1354 {
1355 uint i;
1356 union tgsi_exec_channel null;
1357 union tgsi_exec_channel *dst;
1358 union tgsi_exec_channel index2D;
1359 uint execmask = mach->ExecMask;
1360 int offset = 0; /* indirection offset */
1361 int index;
1362
1363 /* for debugging */
1364 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1365 check_inf_or_nan(chan);
1366 }
1367
1368 /* There is an extra source register that indirectly subscripts
1369 * a register file. The direct index now becomes an offset
1370 * that is being added to the indirect register.
1371 *
1372 * file[ind[2].x+1],
1373 * where:
1374 * ind = Indirect.File
1375 * [2] = Indirect.Index
1376 * .x = Indirect.SwizzleX
1377 */
1378 if (reg->Register.Indirect) {
1379 union tgsi_exec_channel index;
1380 union tgsi_exec_channel indir_index;
1381 uint swizzle;
1382
1383 /* which address register (always zero for now) */
1384 index.i[0] =
1385 index.i[1] =
1386 index.i[2] =
1387 index.i[3] = reg->Indirect.Index;
1388
1389 /* get current value of address register[swizzle] */
1390 swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, TGSI_CHAN_X );
1391
1392 /* fetch values from the address/indirection register */
1393 fetch_src_file_channel(mach,
1394 chan_index,
1395 reg->Indirect.File,
1396 swizzle,
1397 &index,
1398 &ZeroVec,
1399 &indir_index);
1400
1401 /* save indirection offset */
1402 offset = indir_index.i[0];
1403 }
1404
1405 /* There is an extra source register that is a second
1406 * subscript to a register file. Effectively it means that
1407 * the register file is actually a 2D array of registers.
1408 *
1409 * file[3][1],
1410 * where:
1411 * [3] = Dimension.Index
1412 */
1413 if (reg->Register.Dimension) {
1414 index2D.i[0] =
1415 index2D.i[1] =
1416 index2D.i[2] =
1417 index2D.i[3] = reg->Dimension.Index;
1418
1419 /* Again, the second subscript index can be addressed indirectly
1420 * identically to the first one.
1421 * Nothing stops us from indirectly addressing the indirect register,
1422 * but there is no need for that, so we won't exercise it.
1423 *
1424 * file[ind[4].y+3][1],
1425 * where:
1426 * ind = DimIndirect.File
1427 * [4] = DimIndirect.Index
1428 * .y = DimIndirect.SwizzleX
1429 */
1430 if (reg->Dimension.Indirect) {
1431 union tgsi_exec_channel index2;
1432 union tgsi_exec_channel indir_index;
1433 const uint execmask = mach->ExecMask;
1434 unsigned swizzle;
1435 uint i;
1436
1437 index2.i[0] =
1438 index2.i[1] =
1439 index2.i[2] =
1440 index2.i[3] = reg->DimIndirect.Index;
1441
1442 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, TGSI_CHAN_X );
1443 fetch_src_file_channel(mach,
1444 chan_index,
1445 reg->DimIndirect.File,
1446 swizzle,
1447 &index2,
1448 &ZeroVec,
1449 &indir_index);
1450
1451 index2D.i[0] += indir_index.i[0];
1452 index2D.i[1] += indir_index.i[1];
1453 index2D.i[2] += indir_index.i[2];
1454 index2D.i[3] += indir_index.i[3];
1455
1456 /* for disabled execution channels, zero-out the index to
1457 * avoid using a potential garbage value.
1458 */
1459 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1460 if ((execmask & (1 << i)) == 0) {
1461 index2D.i[i] = 0;
1462 }
1463 }
1464 }
1465
1466 /* If by any chance there was a need for a 3D array of register
1467 * files, we would have to check whether Dimension is followed
1468 * by a dimension register and continue the saga.
1469 */
1470 } else {
1471 index2D.i[0] =
1472 index2D.i[1] =
1473 index2D.i[2] =
1474 index2D.i[3] = 0;
1475 }
1476
1477 switch (reg->Register.File) {
1478 case TGSI_FILE_NULL:
1479 dst = &null;
1480 break;
1481
1482 case TGSI_FILE_OUTPUT:
1483 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1484 + reg->Register.Index;
1485 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1486 #if 0
1487 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1488 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1489 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1490 if (execmask & (1 << i))
1491 fprintf(stderr, "%f, ", chan->f[i]);
1492 fprintf(stderr, ")\n");
1493 }
1494 #endif
1495 break;
1496
1497 case TGSI_FILE_TEMPORARY:
1498 index = reg->Register.Index;
1499 assert( index < TGSI_EXEC_NUM_TEMPS );
1500 dst = &mach->Temps[offset + index].xyzw[chan_index];
1501 break;
1502
1503 case TGSI_FILE_TEMPORARY_ARRAY:
1504 index = reg->Register.Index;
1505 assert( index < TGSI_EXEC_NUM_TEMPS );
1506 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS );
1507 /* XXX we use index2D.i[0] here but somehow we might
1508 * end up with someone trying to store indirectly in
1509 * different buffers */
1510 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index];
1511 break;
1512
1513 case TGSI_FILE_ADDRESS:
1514 index = reg->Register.Index;
1515 dst = &mach->Addrs[index].xyzw[chan_index];
1516 break;
1517
1518 case TGSI_FILE_PREDICATE:
1519 index = reg->Register.Index;
1520 assert(index < TGSI_EXEC_NUM_PREDS);
1521 dst = &mach->Predicates[index].xyzw[chan_index];
1522 break;
1523
1524 default:
1525 assert( 0 );
1526 return;
1527 }
1528
1529 if (inst->Instruction.Predicate) {
1530 uint swizzle;
1531 union tgsi_exec_channel *pred;
1532
1533 switch (chan_index) {
1534 case TGSI_CHAN_X:
1535 swizzle = inst->Predicate.SwizzleX;
1536 break;
1537 case TGSI_CHAN_Y:
1538 swizzle = inst->Predicate.SwizzleY;
1539 break;
1540 case TGSI_CHAN_Z:
1541 swizzle = inst->Predicate.SwizzleZ;
1542 break;
1543 case TGSI_CHAN_W:
1544 swizzle = inst->Predicate.SwizzleW;
1545 break;
1546 default:
1547 assert(0);
1548 return;
1549 }
1550
1551 assert(inst->Predicate.Index == 0);
1552
1553 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1554
1555 if (inst->Predicate.Negate) {
1556 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1557 if (pred->u[i]) {
1558 execmask &= ~(1 << i);
1559 }
1560 }
1561 } else {
1562 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1563 if (!pred->u[i]) {
1564 execmask &= ~(1 << i);
1565 }
1566 }
1567 }
1568 }
1569
1570 switch (inst->Instruction.Saturate) {
1571 case TGSI_SAT_NONE:
1572 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1573 if (execmask & (1 << i))
1574 dst->i[i] = chan->i[i];
1575 break;
1576
1577 case TGSI_SAT_ZERO_ONE:
1578 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1579 if (execmask & (1 << i)) {
1580 if (chan->f[i] < 0.0f)
1581 dst->f[i] = 0.0f;
1582 else if (chan->f[i] > 1.0f)
1583 dst->f[i] = 1.0f;
1584 else
1585 dst->i[i] = chan->i[i];
1586 }
1587 break;
1588
1589 case TGSI_SAT_MINUS_PLUS_ONE:
1590 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1591 if (execmask & (1 << i)) {
1592 if (chan->f[i] < -1.0f)
1593 dst->f[i] = -1.0f;
1594 else if (chan->f[i] > 1.0f)
1595 dst->f[i] = 1.0f;
1596 else
1597 dst->i[i] = chan->i[i];
1598 }
1599 break;
1600
1601 default:
1602 assert( 0 );
1603 }
1604 }
1605
1606 #define FETCH(VAL,INDEX,CHAN)\
1607 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1608
1609 #define IFETCH(VAL,INDEX,CHAN)\
1610 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1611
1612
1613 /**
1614 * Execute ARB-style KIL which is predicated by a src register.
1615 * Kill fragment if any of the four values is less than zero.
1616 */
1617 static void
1618 exec_kil(struct tgsi_exec_machine *mach,
1619 const struct tgsi_full_instruction *inst)
1620 {
1621 uint uniquemask;
1622 uint chan_index;
1623 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1624 union tgsi_exec_channel r[1];
1625
1626 /* This mask stores component bits that were already tested. */
1627 uniquemask = 0;
1628
1629 for (chan_index = 0; chan_index < 4; chan_index++)
1630 {
1631 uint swizzle;
1632 uint i;
1633
1634 /* unswizzle channel */
1635 swizzle = tgsi_util_get_full_src_register_swizzle (
1636 &inst->Src[0],
1637 chan_index);
1638
1639 /* check if the component has not been already tested */
1640 if (uniquemask & (1 << swizzle))
1641 continue;
1642 uniquemask |= 1 << swizzle;
1643
1644 FETCH(&r[0], 0, chan_index);
1645 for (i = 0; i < 4; i++)
1646 if (r[0].f[i] < 0.0f)
1647 kilmask |= 1 << i;
1648 }
1649
1650 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1651 }
1652
1653 /**
1654 * Execute NVIDIA-style KIL which is predicated by a condition code.
1655 * Kill fragment if the condition code is TRUE.
1656 */
1657 static void
1658 exec_kilp(struct tgsi_exec_machine *mach,
1659 const struct tgsi_full_instruction *inst)
1660 {
1661 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1662
1663 /* "unconditional" kil */
1664 kilmask = mach->ExecMask;
1665 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1666 }
1667
1668 static void
1669 emit_vertex(struct tgsi_exec_machine *mach)
1670 {
1671 /* FIXME: check for exec mask correctly
1672 unsigned i;
1673 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1674 if ((mach->ExecMask & (1 << i)))
1675 */
1676 if (mach->ExecMask) {
1677 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1678 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1679 }
1680 }
1681
1682 static void
1683 emit_primitive(struct tgsi_exec_machine *mach)
1684 {
1685 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1686 /* FIXME: check for exec mask correctly
1687 unsigned i;
1688 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1689 if ((mach->ExecMask & (1 << i)))
1690 */
1691 if (mach->ExecMask) {
1692 ++(*prim_count);
1693 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1694 mach->Primitives[*prim_count] = 0;
1695 }
1696 }
1697
1698 static void
1699 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1700 {
1701 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1702 int emitted_verts =
1703 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1704 if (emitted_verts) {
1705 emit_primitive(mach);
1706 }
1707 }
1708 }
1709
1710
1711 /*
1712 * Fetch four texture samples using STR texture coordinates.
1713 */
1714 static void
1715 fetch_texel( struct tgsi_sampler *sampler,
1716 const unsigned sview_idx,
1717 const unsigned sampler_idx,
1718 const union tgsi_exec_channel *s,
1719 const union tgsi_exec_channel *t,
1720 const union tgsi_exec_channel *p,
1721 const union tgsi_exec_channel *c0,
1722 const union tgsi_exec_channel *c1,
1723 float derivs[3][2][TGSI_QUAD_SIZE],
1724 const int8_t offset[3],
1725 enum tgsi_sampler_control control,
1726 union tgsi_exec_channel *r,
1727 union tgsi_exec_channel *g,
1728 union tgsi_exec_channel *b,
1729 union tgsi_exec_channel *a )
1730 {
1731 uint j;
1732 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1733
1734 /* FIXME: handle explicit derivs, offsets */
1735 sampler->get_samples(sampler, sview_idx, sampler_idx,
1736 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1737
1738 for (j = 0; j < 4; j++) {
1739 r->f[j] = rgba[0][j];
1740 g->f[j] = rgba[1][j];
1741 b->f[j] = rgba[2][j];
1742 a->f[j] = rgba[3][j];
1743 }
1744 }
1745
1746
1747 #define TEX_MODIFIER_NONE 0
1748 #define TEX_MODIFIER_PROJECTED 1
1749 #define TEX_MODIFIER_LOD_BIAS 2
1750 #define TEX_MODIFIER_EXPLICIT_LOD 3
1751 #define TEX_MODIFIER_LEVEL_ZERO 4
1752
1753
1754 /*
1755 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
1756 */
1757 static void
1758 fetch_texel_offsets(struct tgsi_exec_machine *mach,
1759 const struct tgsi_full_instruction *inst,
1760 int8_t offsets[3])
1761 {
1762 if (inst->Texture.NumOffsets == 1) {
1763 union tgsi_exec_channel index;
1764 union tgsi_exec_channel offset[3];
1765 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
1766 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1767 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
1768 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1769 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
1770 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1771 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
1772 offsets[0] = offset[0].i[0];
1773 offsets[1] = offset[1].i[0];
1774 offsets[2] = offset[2].i[0];
1775 } else {
1776 assert(inst->Texture.NumOffsets == 0);
1777 offsets[0] = offsets[1] = offsets[2] = 0;
1778 }
1779 }
1780
1781
1782 /*
1783 * Fetch dx and dy values for one channel (s, t or r).
1784 * Put dx values into one float array, dy values into another.
1785 */
1786 static void
1787 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
1788 const struct tgsi_full_instruction *inst,
1789 unsigned regdsrcx,
1790 unsigned chan,
1791 float derivs[2][TGSI_QUAD_SIZE])
1792 {
1793 union tgsi_exec_channel d;
1794 FETCH(&d, regdsrcx, chan);
1795 derivs[0][0] = d.f[0];
1796 derivs[0][1] = d.f[1];
1797 derivs[0][2] = d.f[2];
1798 derivs[0][3] = d.f[3];
1799 FETCH(&d, regdsrcx + 1, chan);
1800 derivs[1][0] = d.f[0];
1801 derivs[1][1] = d.f[1];
1802 derivs[1][2] = d.f[2];
1803 derivs[1][3] = d.f[3];
1804 }
1805
1806
1807 /*
1808 * execute a texture instruction.
1809 *
1810 * modifier is used to control the channel routing for the\
1811 * instruction variants like proj, lod, and texture with lod bias.
1812 * sampler indicates which src register the sampler is contained in.
1813 */
1814 static void
1815 exec_tex(struct tgsi_exec_machine *mach,
1816 const struct tgsi_full_instruction *inst,
1817 uint modifier, uint sampler)
1818 {
1819 const uint unit = inst->Src[sampler].Register.Index;
1820 union tgsi_exec_channel r[4], cubearraycomp, cubelod;
1821 const union tgsi_exec_channel *lod = &ZeroVec;
1822 enum tgsi_sampler_control control = tgsi_sampler_lod_none;
1823 uint chan;
1824 int8_t offsets[3];
1825
1826 /* always fetch all 3 offsets, overkill but keeps code simple */
1827 fetch_texel_offsets(mach, inst, offsets);
1828
1829 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
1830
1831 if (modifier != TEX_MODIFIER_NONE && (sampler == 1)) {
1832 FETCH(&r[3], 0, TGSI_CHAN_W);
1833 if (modifier != TEX_MODIFIER_PROJECTED) {
1834 lod = &r[3];
1835 }
1836 }
1837
1838 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1839 control = tgsi_sampler_lod_explicit;
1840 } else if (modifier == TEX_MODIFIER_LOD_BIAS){
1841 control = tgsi_sampler_lod_bias;
1842 }
1843
1844 switch (inst->Texture.Texture) {
1845 case TGSI_TEXTURE_1D:
1846 FETCH(&r[0], 0, TGSI_CHAN_X);
1847
1848 if (modifier == TEX_MODIFIER_PROJECTED) {
1849 micro_div(&r[0], &r[0], &r[3]);
1850 }
1851
1852 fetch_texel(mach->Sampler, unit, unit,
1853 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
1854 NULL, offsets, control,
1855 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1856 break;
1857
1858 case TGSI_TEXTURE_SHADOW1D:
1859 FETCH(&r[0], 0, TGSI_CHAN_X);
1860 FETCH(&r[2], 0, TGSI_CHAN_Z);
1861
1862 if (modifier == TEX_MODIFIER_PROJECTED) {
1863 micro_div(&r[0], &r[0], &r[3]);
1864 micro_div(&r[2], &r[2], &r[3]);
1865 }
1866
1867 fetch_texel(mach->Sampler, unit, unit,
1868 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
1869 NULL, offsets, control,
1870 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1871 break;
1872
1873 case TGSI_TEXTURE_2D:
1874 case TGSI_TEXTURE_RECT:
1875 FETCH(&r[0], 0, TGSI_CHAN_X);
1876 FETCH(&r[1], 0, TGSI_CHAN_Y);
1877
1878 if (modifier == TEX_MODIFIER_PROJECTED) {
1879 micro_div(&r[0], &r[0], &r[3]);
1880 micro_div(&r[1], &r[1], &r[3]);
1881 }
1882
1883 fetch_texel(mach->Sampler, unit, unit,
1884 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
1885 NULL, offsets, control,
1886 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1887 break;
1888
1889 case TGSI_TEXTURE_SHADOW2D:
1890 case TGSI_TEXTURE_SHADOWRECT:
1891 FETCH(&r[0], 0, TGSI_CHAN_X);
1892 FETCH(&r[1], 0, TGSI_CHAN_Y);
1893 FETCH(&r[2], 0, TGSI_CHAN_Z);
1894
1895 if (modifier == TEX_MODIFIER_PROJECTED) {
1896 micro_div(&r[0], &r[0], &r[3]);
1897 micro_div(&r[1], &r[1], &r[3]);
1898 micro_div(&r[2], &r[2], &r[3]);
1899 }
1900
1901 fetch_texel(mach->Sampler, unit, unit,
1902 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
1903 NULL, offsets, control,
1904 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1905 break;
1906
1907 case TGSI_TEXTURE_1D_ARRAY:
1908 FETCH(&r[0], 0, TGSI_CHAN_X);
1909 FETCH(&r[1], 0, TGSI_CHAN_Y);
1910
1911 if (modifier == TEX_MODIFIER_PROJECTED) {
1912 micro_div(&r[0], &r[0], &r[3]);
1913 }
1914
1915 fetch_texel(mach->Sampler, unit, unit,
1916 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
1917 NULL, offsets, control,
1918 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1919 break;
1920 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1921 FETCH(&r[0], 0, TGSI_CHAN_X);
1922 FETCH(&r[1], 0, TGSI_CHAN_Y);
1923 FETCH(&r[2], 0, TGSI_CHAN_Z);
1924
1925 if (modifier == TEX_MODIFIER_PROJECTED) {
1926 micro_div(&r[0], &r[0], &r[3]);
1927 micro_div(&r[2], &r[2], &r[3]);
1928 }
1929
1930 fetch_texel(mach->Sampler, unit, unit,
1931 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
1932 NULL, offsets, control,
1933 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1934 break;
1935
1936 case TGSI_TEXTURE_2D_ARRAY:
1937 FETCH(&r[0], 0, TGSI_CHAN_X);
1938 FETCH(&r[1], 0, TGSI_CHAN_Y);
1939 FETCH(&r[2], 0, TGSI_CHAN_Z);
1940
1941 if (modifier == TEX_MODIFIER_PROJECTED) {
1942 micro_div(&r[0], &r[0], &r[3]);
1943 micro_div(&r[1], &r[1], &r[3]);
1944 }
1945
1946 fetch_texel(mach->Sampler, unit, unit,
1947 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
1948 NULL, offsets, control,
1949 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1950 break;
1951 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1952 case TGSI_TEXTURE_SHADOWCUBE:
1953 FETCH(&r[0], 0, TGSI_CHAN_X);
1954 FETCH(&r[1], 0, TGSI_CHAN_Y);
1955 FETCH(&r[2], 0, TGSI_CHAN_Z);
1956 FETCH(&r[3], 0, TGSI_CHAN_W);
1957
1958 fetch_texel(mach->Sampler, unit, unit,
1959 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* S, T, P, C, LOD */
1960 NULL, offsets, control,
1961 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1962 break;
1963 case TGSI_TEXTURE_CUBE_ARRAY:
1964 FETCH(&r[0], 0, TGSI_CHAN_X);
1965 FETCH(&r[1], 0, TGSI_CHAN_Y);
1966 FETCH(&r[2], 0, TGSI_CHAN_Z);
1967 FETCH(&r[3], 0, TGSI_CHAN_W);
1968
1969 if (modifier == TEX_MODIFIER_EXPLICIT_LOD ||
1970 modifier == TEX_MODIFIER_LOD_BIAS)
1971 FETCH(&cubelod, 1, TGSI_CHAN_X);
1972 else
1973 cubelod = ZeroVec;
1974
1975 fetch_texel(mach->Sampler, unit, unit,
1976 &r[0], &r[1], &r[2], &r[3], &cubelod, /* S, T, P, C, LOD */
1977 NULL, offsets, control,
1978 &r[0], &r[1], &r[2], &r[3]); /* outputs */
1979 break;
1980 case TGSI_TEXTURE_3D:
1981 case TGSI_TEXTURE_CUBE:
1982 FETCH(&r[0], 0, TGSI_CHAN_X);
1983 FETCH(&r[1], 0, TGSI_CHAN_Y);
1984 FETCH(&r[2], 0, TGSI_CHAN_Z);
1985
1986 if (modifier == TEX_MODIFIER_PROJECTED) {
1987 micro_div(&r[0], &r[0], &r[3]);
1988 micro_div(&r[1], &r[1], &r[3]);
1989 micro_div(&r[2], &r[2], &r[3]);
1990 }
1991
1992 fetch_texel(mach->Sampler, unit, unit,
1993 &r[0], &r[1], &r[2], &ZeroVec, lod,
1994 NULL, offsets, control,
1995 &r[0], &r[1], &r[2], &r[3]);
1996 break;
1997
1998 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1999 FETCH(&r[0], 0, TGSI_CHAN_X);
2000 FETCH(&r[1], 0, TGSI_CHAN_Y);
2001 FETCH(&r[2], 0, TGSI_CHAN_Z);
2002 FETCH(&r[3], 0, TGSI_CHAN_W);
2003
2004 FETCH(&cubearraycomp, 1, TGSI_CHAN_X);
2005
2006 fetch_texel(mach->Sampler, unit, unit,
2007 &r[0], &r[1], &r[2], &r[3], &cubearraycomp, /* S, T, P, C, LOD */
2008 NULL, offsets, control,
2009 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2010 break;
2011 default:
2012 assert(0);
2013 }
2014
2015 #if 0
2016 debug_printf("fetch r: %g %g %g %g\n",
2017 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2018 debug_printf("fetch g: %g %g %g %g\n",
2019 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2020 debug_printf("fetch b: %g %g %g %g\n",
2021 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2022 debug_printf("fetch a: %g %g %g %g\n",
2023 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2024 #endif
2025
2026 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2027 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2028 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2029 }
2030 }
2031 }
2032
2033
2034 static void
2035 exec_txd(struct tgsi_exec_machine *mach,
2036 const struct tgsi_full_instruction *inst)
2037 {
2038 const uint unit = inst->Src[3].Register.Index;
2039 union tgsi_exec_channel r[4];
2040 float derivs[3][2][TGSI_QUAD_SIZE];
2041 uint chan;
2042 int8_t offsets[3];
2043
2044 /* always fetch all 3 offsets, overkill but keeps code simple */
2045 fetch_texel_offsets(mach, inst, offsets);
2046
2047 switch (inst->Texture.Texture) {
2048 case TGSI_TEXTURE_1D:
2049 FETCH(&r[0], 0, TGSI_CHAN_X);
2050
2051 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2052
2053 fetch_texel(mach->Sampler, unit, unit,
2054 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2055 derivs, offsets, tgsi_sampler_derivs_explicit,
2056 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2057 break;
2058
2059 case TGSI_TEXTURE_SHADOW1D:
2060 case TGSI_TEXTURE_1D_ARRAY:
2061 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2062 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2063 FETCH(&r[0], 0, TGSI_CHAN_X);
2064 FETCH(&r[1], 0, TGSI_CHAN_Y);
2065 FETCH(&r[2], 0, TGSI_CHAN_Z);
2066
2067 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2068
2069 fetch_texel(mach->Sampler, unit, unit,
2070 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2071 derivs, offsets, tgsi_sampler_derivs_explicit,
2072 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2073 break;
2074
2075 case TGSI_TEXTURE_2D:
2076 case TGSI_TEXTURE_RECT:
2077 FETCH(&r[0], 0, TGSI_CHAN_X);
2078 FETCH(&r[1], 0, TGSI_CHAN_Y);
2079
2080 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2081 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2082
2083 fetch_texel(mach->Sampler, unit, unit,
2084 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2085 derivs, offsets, tgsi_sampler_derivs_explicit,
2086 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2087 break;
2088
2089
2090 case TGSI_TEXTURE_SHADOW2D:
2091 case TGSI_TEXTURE_SHADOWRECT:
2092 case TGSI_TEXTURE_2D_ARRAY:
2093 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2094 /* only SHADOW2D_ARRAY actually needs W */
2095 FETCH(&r[0], 0, TGSI_CHAN_X);
2096 FETCH(&r[1], 0, TGSI_CHAN_Y);
2097 FETCH(&r[2], 0, TGSI_CHAN_Z);
2098 FETCH(&r[3], 0, TGSI_CHAN_W);
2099
2100 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2101 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2102
2103 fetch_texel(mach->Sampler, unit, unit,
2104 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2105 derivs, offsets, tgsi_sampler_derivs_explicit,
2106 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2107 break;
2108
2109 case TGSI_TEXTURE_3D:
2110 case TGSI_TEXTURE_CUBE:
2111 case TGSI_TEXTURE_CUBE_ARRAY:
2112 /* only TEXTURE_CUBE_ARRAY actually needs W */
2113 FETCH(&r[0], 0, TGSI_CHAN_X);
2114 FETCH(&r[1], 0, TGSI_CHAN_Y);
2115 FETCH(&r[2], 0, TGSI_CHAN_Z);
2116 FETCH(&r[3], 0, TGSI_CHAN_W);
2117
2118 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2119 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2120 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2121
2122 fetch_texel(mach->Sampler, unit, unit,
2123 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2124 derivs, offsets, tgsi_sampler_derivs_explicit,
2125 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2126 break;
2127
2128 default:
2129 assert(0);
2130 }
2131
2132 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2133 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2134 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2135 }
2136 }
2137 }
2138
2139
2140 static void
2141 exec_txf(struct tgsi_exec_machine *mach,
2142 const struct tgsi_full_instruction *inst)
2143 {
2144 const uint unit = inst->Src[1].Register.Index;
2145 union tgsi_exec_channel r[4];
2146 uint chan;
2147 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2148 int j;
2149 int8_t offsets[3];
2150
2151 /* always fetch all 3 offsets, overkill but keeps code simple */
2152 fetch_texel_offsets(mach, inst, offsets);
2153
2154 IFETCH(&r[3], 0, TGSI_CHAN_W);
2155
2156 switch(inst->Texture.Texture) {
2157 case TGSI_TEXTURE_3D:
2158 case TGSI_TEXTURE_2D_ARRAY:
2159 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2160 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2161 /* fallthrough */
2162 case TGSI_TEXTURE_2D:
2163 case TGSI_TEXTURE_RECT:
2164 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2165 case TGSI_TEXTURE_SHADOW2D:
2166 case TGSI_TEXTURE_SHADOWRECT:
2167 case TGSI_TEXTURE_1D_ARRAY:
2168 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2169 /* fallthrough */
2170 case TGSI_TEXTURE_BUFFER:
2171 case TGSI_TEXTURE_1D:
2172 case TGSI_TEXTURE_SHADOW1D:
2173 IFETCH(&r[0], 0, TGSI_CHAN_X);
2174 break;
2175 default:
2176 assert(0);
2177 break;
2178 }
2179
2180 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2181 offsets, rgba);
2182
2183 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2184 r[0].f[j] = rgba[0][j];
2185 r[1].f[j] = rgba[1][j];
2186 r[2].f[j] = rgba[2][j];
2187 r[3].f[j] = rgba[3][j];
2188 }
2189
2190 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2191 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2192 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2193 }
2194 }
2195 }
2196
2197 static void
2198 exec_txq(struct tgsi_exec_machine *mach,
2199 const struct tgsi_full_instruction *inst)
2200 {
2201 const uint unit = inst->Src[1].Register.Index;
2202 int result[4];
2203 union tgsi_exec_channel r[4], src;
2204 uint chan;
2205 int i,j;
2206
2207 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2208
2209 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2210
2211 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2212 for (j = 0; j < 4; j++) {
2213 r[j].i[i] = result[j];
2214 }
2215 }
2216
2217 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2218 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2219 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2220 TGSI_EXEC_DATA_INT);
2221 }
2222 }
2223 }
2224
2225 static void
2226 exec_sample(struct tgsi_exec_machine *mach,
2227 const struct tgsi_full_instruction *inst,
2228 uint modifier, boolean compare)
2229 {
2230 const uint resource_unit = inst->Src[1].Register.Index;
2231 const uint sampler_unit = inst->Src[2].Register.Index;
2232 union tgsi_exec_channel r[4], c1;
2233 const union tgsi_exec_channel *lod = &ZeroVec;
2234 enum tgsi_sampler_control control = tgsi_sampler_lod_none;
2235 uint chan;
2236 int8_t offsets[3];
2237
2238 /* always fetch all 3 offsets, overkill but keeps code simple */
2239 fetch_texel_offsets(mach, inst, offsets);
2240
2241 assert(modifier != TEX_MODIFIER_PROJECTED);
2242
2243 if (modifier != TEX_MODIFIER_NONE) {
2244 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2245 FETCH(&c1, 3, TGSI_CHAN_X);
2246 lod = &c1;
2247 control = tgsi_sampler_lod_bias;
2248 }
2249 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2250 FETCH(&c1, 3, TGSI_CHAN_X);
2251 lod = &c1;
2252 control = tgsi_sampler_lod_explicit;
2253 }
2254 else {
2255 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2256 control = tgsi_sampler_lod_zero;
2257 }
2258 }
2259
2260 FETCH(&r[0], 0, TGSI_CHAN_X);
2261
2262 switch (mach->SamplerViews[resource_unit].Resource) {
2263 case TGSI_TEXTURE_1D:
2264 if (compare) {
2265 FETCH(&r[2], 3, TGSI_CHAN_X);
2266 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2267 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2268 NULL, offsets, control,
2269 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2270 }
2271 else {
2272 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2273 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2274 NULL, offsets, control,
2275 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2276 }
2277 break;
2278
2279 case TGSI_TEXTURE_1D_ARRAY:
2280 case TGSI_TEXTURE_2D:
2281 case TGSI_TEXTURE_RECT:
2282 FETCH(&r[1], 0, TGSI_CHAN_Y);
2283 if (compare) {
2284 FETCH(&r[2], 3, TGSI_CHAN_X);
2285 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2286 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2287 NULL, offsets, control,
2288 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2289 }
2290 else {
2291 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2292 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2293 NULL, offsets, control,
2294 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2295 }
2296 break;
2297
2298 case TGSI_TEXTURE_2D_ARRAY:
2299 case TGSI_TEXTURE_3D:
2300 case TGSI_TEXTURE_CUBE:
2301 FETCH(&r[1], 0, TGSI_CHAN_Y);
2302 FETCH(&r[2], 0, TGSI_CHAN_Z);
2303 if(compare) {
2304 FETCH(&r[3], 3, TGSI_CHAN_X);
2305 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2306 &r[0], &r[1], &r[2], &r[3], lod,
2307 NULL, offsets, control,
2308 &r[0], &r[1], &r[2], &r[3]);
2309 }
2310 else {
2311 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2312 &r[0], &r[1], &r[2], &ZeroVec, lod,
2313 NULL, offsets, control,
2314 &r[0], &r[1], &r[2], &r[3]);
2315 }
2316 break;
2317
2318 case TGSI_TEXTURE_CUBE_ARRAY:
2319 FETCH(&r[1], 0, TGSI_CHAN_Y);
2320 FETCH(&r[2], 0, TGSI_CHAN_Z);
2321 FETCH(&r[3], 0, TGSI_CHAN_W);
2322 if(compare) {
2323 FETCH(&r[4], 3, TGSI_CHAN_X);
2324 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2325 &r[0], &r[1], &r[2], &r[3], &r[4],
2326 NULL, offsets, control,
2327 &r[0], &r[1], &r[2], &r[3]);
2328 }
2329 else {
2330 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2331 &r[0], &r[1], &r[2], &r[3], lod,
2332 NULL, offsets, control,
2333 &r[0], &r[1], &r[2], &r[3]);
2334 }
2335 break;
2336
2337
2338 default:
2339 assert(0);
2340 }
2341
2342 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2343 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2344 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2345 }
2346 }
2347 }
2348
2349 static void
2350 exec_sample_d(struct tgsi_exec_machine *mach,
2351 const struct tgsi_full_instruction *inst)
2352 {
2353 const uint resource_unit = inst->Src[1].Register.Index;
2354 const uint sampler_unit = inst->Src[2].Register.Index;
2355 union tgsi_exec_channel r[4];
2356 float derivs[3][2][TGSI_QUAD_SIZE];
2357 uint chan;
2358 int8_t offsets[3];
2359
2360 /* always fetch all 3 offsets, overkill but keeps code simple */
2361 fetch_texel_offsets(mach, inst, offsets);
2362
2363 switch (mach->SamplerViews[resource_unit].Resource) {
2364 case TGSI_TEXTURE_1D:
2365 FETCH(&r[0], 0, TGSI_CHAN_X);
2366
2367 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2368
2369 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2370 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2371 derivs, offsets, tgsi_sampler_derivs_explicit,
2372 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2373 break;
2374
2375 case TGSI_TEXTURE_2D:
2376 case TGSI_TEXTURE_RECT:
2377 FETCH(&r[0], 0, TGSI_CHAN_X);
2378 FETCH(&r[1], 0, TGSI_CHAN_Y);
2379
2380 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2381 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2382
2383 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2384 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* inputs */
2385 derivs, offsets, tgsi_sampler_derivs_explicit,
2386 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2387 break;
2388
2389 case TGSI_TEXTURE_3D:
2390 case TGSI_TEXTURE_CUBE:
2391 FETCH(&r[0], 0, TGSI_CHAN_X);
2392 FETCH(&r[1], 0, TGSI_CHAN_Y);
2393 FETCH(&r[2], 0, TGSI_CHAN_Z);
2394
2395 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2396 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2397 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2398
2399 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2400 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,
2401 derivs, offsets, tgsi_sampler_derivs_explicit,
2402 &r[0], &r[1], &r[2], &r[3]);
2403 break;
2404
2405 case TGSI_TEXTURE_CUBE_ARRAY:
2406 FETCH(&r[0], 0, TGSI_CHAN_X);
2407 FETCH(&r[1], 0, TGSI_CHAN_Y);
2408 FETCH(&r[2], 0, TGSI_CHAN_Z);
2409 FETCH(&r[3], 0, TGSI_CHAN_W);
2410
2411 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2412 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2413 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2414
2415 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2416 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2417 derivs, offsets, tgsi_sampler_derivs_explicit,
2418 &r[0], &r[1], &r[2], &r[3]);
2419 break;
2420
2421 default:
2422 assert(0);
2423 }
2424
2425 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2426 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2427 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2428 }
2429 }
2430 }
2431
2432
2433 /**
2434 * Evaluate a constant-valued coefficient at the position of the
2435 * current quad.
2436 */
2437 static void
2438 eval_constant_coef(
2439 struct tgsi_exec_machine *mach,
2440 unsigned attrib,
2441 unsigned chan )
2442 {
2443 unsigned i;
2444
2445 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2446 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2447 }
2448 }
2449
2450 /**
2451 * Evaluate a linear-valued coefficient at the position of the
2452 * current quad.
2453 */
2454 static void
2455 eval_linear_coef(
2456 struct tgsi_exec_machine *mach,
2457 unsigned attrib,
2458 unsigned chan )
2459 {
2460 const float x = mach->QuadPos.xyzw[0].f[0];
2461 const float y = mach->QuadPos.xyzw[1].f[0];
2462 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2463 const float dady = mach->InterpCoefs[attrib].dady[chan];
2464 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2465 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2466 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2467 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2468 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2469 }
2470
2471 /**
2472 * Evaluate a perspective-valued coefficient at the position of the
2473 * current quad.
2474 */
2475 static void
2476 eval_perspective_coef(
2477 struct tgsi_exec_machine *mach,
2478 unsigned attrib,
2479 unsigned chan )
2480 {
2481 const float x = mach->QuadPos.xyzw[0].f[0];
2482 const float y = mach->QuadPos.xyzw[1].f[0];
2483 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2484 const float dady = mach->InterpCoefs[attrib].dady[chan];
2485 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2486 const float *w = mach->QuadPos.xyzw[3].f;
2487 /* divide by W here */
2488 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2489 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2490 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2491 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2492 }
2493
2494
2495 typedef void (* eval_coef_func)(
2496 struct tgsi_exec_machine *mach,
2497 unsigned attrib,
2498 unsigned chan );
2499
2500 static void
2501 exec_declaration(struct tgsi_exec_machine *mach,
2502 const struct tgsi_full_declaration *decl)
2503 {
2504 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2505 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2506 return;
2507 }
2508
2509 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2510 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2511 uint first, last, mask;
2512
2513 first = decl->Range.First;
2514 last = decl->Range.Last;
2515 mask = decl->Declaration.UsageMask;
2516
2517 /* XXX we could remove this special-case code since
2518 * mach->InterpCoefs[first].a0 should already have the
2519 * front/back-face value. But we should first update the
2520 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2521 * Then, we could remove the tgsi_exec_machine::Face field.
2522 */
2523 /* XXX make FACE a system value */
2524 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2525 uint i;
2526
2527 assert(decl->Semantic.Index == 0);
2528 assert(first == last);
2529
2530 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2531 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2532 }
2533 } else {
2534 eval_coef_func eval;
2535 uint i, j;
2536
2537 switch (decl->Interp.Interpolate) {
2538 case TGSI_INTERPOLATE_CONSTANT:
2539 eval = eval_constant_coef;
2540 break;
2541
2542 case TGSI_INTERPOLATE_LINEAR:
2543 eval = eval_linear_coef;
2544 break;
2545
2546 case TGSI_INTERPOLATE_PERSPECTIVE:
2547 eval = eval_perspective_coef;
2548 break;
2549
2550 case TGSI_INTERPOLATE_COLOR:
2551 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2552 break;
2553
2554 default:
2555 assert(0);
2556 return;
2557 }
2558
2559 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2560 if (mask & (1 << j)) {
2561 for (i = first; i <= last; i++) {
2562 eval(mach, i, j);
2563 }
2564 }
2565 }
2566 }
2567
2568 if (DEBUG_EXECUTION) {
2569 uint i, j;
2570 for (i = first; i <= last; ++i) {
2571 debug_printf("IN[%2u] = ", i);
2572 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2573 if (j > 0) {
2574 debug_printf(" ");
2575 }
2576 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2577 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2578 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2579 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2580 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2581 }
2582 }
2583 }
2584 }
2585 }
2586
2587 if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
2588 mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
2589 }
2590 }
2591
2592
2593 typedef void (* micro_op)(union tgsi_exec_channel *dst);
2594
2595 static void
2596 exec_vector(struct tgsi_exec_machine *mach,
2597 const struct tgsi_full_instruction *inst,
2598 micro_op op,
2599 enum tgsi_exec_datatype dst_datatype)
2600 {
2601 unsigned int chan;
2602
2603 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2604 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2605 union tgsi_exec_channel dst;
2606
2607 op(&dst);
2608 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2609 }
2610 }
2611 }
2612
2613 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2614 const union tgsi_exec_channel *src);
2615
2616 static void
2617 exec_scalar_unary(struct tgsi_exec_machine *mach,
2618 const struct tgsi_full_instruction *inst,
2619 micro_unary_op op,
2620 enum tgsi_exec_datatype dst_datatype,
2621 enum tgsi_exec_datatype src_datatype)
2622 {
2623 unsigned int chan;
2624 union tgsi_exec_channel src;
2625 union tgsi_exec_channel dst;
2626
2627 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2628 op(&dst, &src);
2629 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2630 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2631 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2632 }
2633 }
2634 }
2635
2636 static void
2637 exec_vector_unary(struct tgsi_exec_machine *mach,
2638 const struct tgsi_full_instruction *inst,
2639 micro_unary_op op,
2640 enum tgsi_exec_datatype dst_datatype,
2641 enum tgsi_exec_datatype src_datatype)
2642 {
2643 unsigned int chan;
2644 struct tgsi_exec_vector dst;
2645
2646 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2647 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2648 union tgsi_exec_channel src;
2649
2650 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2651 op(&dst.xyzw[chan], &src);
2652 }
2653 }
2654 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2655 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2656 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2657 }
2658 }
2659 }
2660
2661 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2662 const union tgsi_exec_channel *src0,
2663 const union tgsi_exec_channel *src1);
2664
2665 static void
2666 exec_scalar_binary(struct tgsi_exec_machine *mach,
2667 const struct tgsi_full_instruction *inst,
2668 micro_binary_op op,
2669 enum tgsi_exec_datatype dst_datatype,
2670 enum tgsi_exec_datatype src_datatype)
2671 {
2672 unsigned int chan;
2673 union tgsi_exec_channel src[2];
2674 union tgsi_exec_channel dst;
2675
2676 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2677 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_Y, src_datatype);
2678 op(&dst, &src[0], &src[1]);
2679 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2680 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2681 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2682 }
2683 }
2684 }
2685
2686 static void
2687 exec_vector_binary(struct tgsi_exec_machine *mach,
2688 const struct tgsi_full_instruction *inst,
2689 micro_binary_op op,
2690 enum tgsi_exec_datatype dst_datatype,
2691 enum tgsi_exec_datatype src_datatype)
2692 {
2693 unsigned int chan;
2694 struct tgsi_exec_vector dst;
2695
2696 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2697 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2698 union tgsi_exec_channel src[2];
2699
2700 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2701 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2702 op(&dst.xyzw[chan], &src[0], &src[1]);
2703 }
2704 }
2705 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2706 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2707 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2708 }
2709 }
2710 }
2711
2712 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2713 const union tgsi_exec_channel *src0,
2714 const union tgsi_exec_channel *src1,
2715 const union tgsi_exec_channel *src2);
2716
2717 static void
2718 exec_vector_trinary(struct tgsi_exec_machine *mach,
2719 const struct tgsi_full_instruction *inst,
2720 micro_trinary_op op,
2721 enum tgsi_exec_datatype dst_datatype,
2722 enum tgsi_exec_datatype src_datatype)
2723 {
2724 unsigned int chan;
2725 struct tgsi_exec_vector dst;
2726
2727 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2728 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2729 union tgsi_exec_channel src[3];
2730
2731 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2732 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2733 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2734 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2735 }
2736 }
2737 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2738 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2739 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2740 }
2741 }
2742 }
2743
2744 static void
2745 exec_dp3(struct tgsi_exec_machine *mach,
2746 const struct tgsi_full_instruction *inst)
2747 {
2748 unsigned int chan;
2749 union tgsi_exec_channel arg[3];
2750
2751 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2752 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2753 micro_mul(&arg[2], &arg[0], &arg[1]);
2754
2755 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2756 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2757 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2758 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2759 }
2760
2761 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2762 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2763 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2764 }
2765 }
2766 }
2767
2768 static void
2769 exec_dp4(struct tgsi_exec_machine *mach,
2770 const struct tgsi_full_instruction *inst)
2771 {
2772 unsigned int chan;
2773 union tgsi_exec_channel arg[3];
2774
2775 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2776 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2777 micro_mul(&arg[2], &arg[0], &arg[1]);
2778
2779 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2780 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2781 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2782 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2783 }
2784
2785 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2786 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2787 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2788 }
2789 }
2790 }
2791
2792 static void
2793 exec_dp2a(struct tgsi_exec_machine *mach,
2794 const struct tgsi_full_instruction *inst)
2795 {
2796 unsigned int chan;
2797 union tgsi_exec_channel arg[3];
2798
2799 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2800 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2801 micro_mul(&arg[2], &arg[0], &arg[1]);
2802
2803 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2804 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2805 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2806
2807 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2808 micro_add(&arg[0], &arg[0], &arg[1]);
2809
2810 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2811 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2812 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2813 }
2814 }
2815 }
2816
2817 static void
2818 exec_dph(struct tgsi_exec_machine *mach,
2819 const struct tgsi_full_instruction *inst)
2820 {
2821 unsigned int chan;
2822 union tgsi_exec_channel arg[3];
2823
2824 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2825 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2826 micro_mul(&arg[2], &arg[0], &arg[1]);
2827
2828 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2829 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2830 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2831
2832 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2833 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2834 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2835
2836 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2837 micro_add(&arg[0], &arg[0], &arg[1]);
2838
2839 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2840 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2841 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2842 }
2843 }
2844 }
2845
2846 static void
2847 exec_dp2(struct tgsi_exec_machine *mach,
2848 const struct tgsi_full_instruction *inst)
2849 {
2850 unsigned int chan;
2851 union tgsi_exec_channel arg[3];
2852
2853 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2854 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2855 micro_mul(&arg[2], &arg[0], &arg[1]);
2856
2857 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2858 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2859 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2860
2861 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2862 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2863 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2864 }
2865 }
2866 }
2867
2868 static void
2869 exec_nrm4(struct tgsi_exec_machine *mach,
2870 const struct tgsi_full_instruction *inst)
2871 {
2872 unsigned int chan;
2873 union tgsi_exec_channel arg[4];
2874 union tgsi_exec_channel scale;
2875
2876 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2877 micro_mul(&scale, &arg[0], &arg[0]);
2878
2879 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2880 union tgsi_exec_channel product;
2881
2882 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2883 micro_mul(&product, &arg[chan], &arg[chan]);
2884 micro_add(&scale, &scale, &product);
2885 }
2886
2887 micro_rsq(&scale, &scale);
2888
2889 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) {
2890 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2891 micro_mul(&arg[chan], &arg[chan], &scale);
2892 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2893 }
2894 }
2895 }
2896
2897 static void
2898 exec_nrm3(struct tgsi_exec_machine *mach,
2899 const struct tgsi_full_instruction *inst)
2900 {
2901 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2902 unsigned int chan;
2903 union tgsi_exec_channel arg[3];
2904 union tgsi_exec_channel scale;
2905
2906 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2907 micro_mul(&scale, &arg[0], &arg[0]);
2908
2909 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2910 union tgsi_exec_channel product;
2911
2912 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2913 micro_mul(&product, &arg[chan], &arg[chan]);
2914 micro_add(&scale, &scale, &product);
2915 }
2916
2917 micro_rsq(&scale, &scale);
2918
2919 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) {
2920 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2921 micro_mul(&arg[chan], &arg[chan], &scale);
2922 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2923 }
2924 }
2925 }
2926
2927 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2928 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2929 }
2930 }
2931
2932 static void
2933 exec_scs(struct tgsi_exec_machine *mach,
2934 const struct tgsi_full_instruction *inst)
2935 {
2936 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2937 union tgsi_exec_channel arg;
2938 union tgsi_exec_channel result;
2939
2940 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2941
2942 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2943 micro_cos(&result, &arg);
2944 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2945 }
2946 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2947 micro_sin(&result, &arg);
2948 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2949 }
2950 }
2951 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2952 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2953 }
2954 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2955 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2956 }
2957 }
2958
2959 static void
2960 exec_x2d(struct tgsi_exec_machine *mach,
2961 const struct tgsi_full_instruction *inst)
2962 {
2963 union tgsi_exec_channel r[4];
2964 union tgsi_exec_channel d[2];
2965
2966 fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2967 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2968 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2969 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2970 micro_mul(&r[2], &r[2], &r[0]);
2971 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2972 micro_mul(&r[3], &r[3], &r[1]);
2973 micro_add(&r[2], &r[2], &r[3]);
2974 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2975 micro_add(&d[0], &r[2], &r[3]);
2976 }
2977 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2978 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2979 micro_mul(&r[2], &r[2], &r[0]);
2980 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2981 micro_mul(&r[3], &r[3], &r[1]);
2982 micro_add(&r[2], &r[2], &r[3]);
2983 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2984 micro_add(&d[1], &r[2], &r[3]);
2985 }
2986 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2987 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2988 }
2989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2990 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2991 }
2992 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2993 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2994 }
2995 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2996 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2997 }
2998 }
2999
3000 static void
3001 exec_rfl(struct tgsi_exec_machine *mach,
3002 const struct tgsi_full_instruction *inst)
3003 {
3004 union tgsi_exec_channel r[9];
3005
3006 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
3007 /* r0 = dp3(src0, src0) */
3008 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3009 micro_mul(&r[0], &r[2], &r[2]);
3010 fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3011 micro_mul(&r[8], &r[4], &r[4]);
3012 micro_add(&r[0], &r[0], &r[8]);
3013 fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3014 micro_mul(&r[8], &r[6], &r[6]);
3015 micro_add(&r[0], &r[0], &r[8]);
3016
3017 /* r1 = dp3(src0, src1) */
3018 fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3019 micro_mul(&r[1], &r[2], &r[3]);
3020 fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3021 micro_mul(&r[8], &r[4], &r[5]);
3022 micro_add(&r[1], &r[1], &r[8]);
3023 fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3024 micro_mul(&r[8], &r[6], &r[7]);
3025 micro_add(&r[1], &r[1], &r[8]);
3026
3027 /* r1 = 2 * r1 / r0 */
3028 micro_add(&r[1], &r[1], &r[1]);
3029 micro_div(&r[1], &r[1], &r[0]);
3030
3031 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3032 micro_mul(&r[2], &r[2], &r[1]);
3033 micro_sub(&r[2], &r[2], &r[3]);
3034 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3035 }
3036 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3037 micro_mul(&r[4], &r[4], &r[1]);
3038 micro_sub(&r[4], &r[4], &r[5]);
3039 store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3040 }
3041 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3042 micro_mul(&r[6], &r[6], &r[1]);
3043 micro_sub(&r[6], &r[6], &r[7]);
3044 store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3045 }
3046 }
3047 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3048 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3049 }
3050 }
3051
3052 static void
3053 exec_xpd(struct tgsi_exec_machine *mach,
3054 const struct tgsi_full_instruction *inst)
3055 {
3056 union tgsi_exec_channel r[6];
3057 union tgsi_exec_channel d[3];
3058
3059 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3060 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3061
3062 micro_mul(&r[2], &r[0], &r[1]);
3063
3064 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3065 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3066
3067 micro_mul(&r[5], &r[3], &r[4] );
3068 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
3069
3070 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3071
3072 micro_mul(&r[3], &r[3], &r[2]);
3073
3074 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3075
3076 micro_mul(&r[1], &r[1], &r[5]);
3077 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
3078
3079 micro_mul(&r[5], &r[5], &r[4]);
3080 micro_mul(&r[0], &r[0], &r[2]);
3081 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
3082
3083 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3084 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3085 }
3086 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3087 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3088 }
3089 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3090 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3091 }
3092 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3093 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3094 }
3095 }
3096
3097 static void
3098 exec_dst(struct tgsi_exec_machine *mach,
3099 const struct tgsi_full_instruction *inst)
3100 {
3101 union tgsi_exec_channel r[2];
3102 union tgsi_exec_channel d[4];
3103
3104 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3105 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3106 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3107 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3108 }
3109 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3110 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3111 }
3112 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3113 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3114 }
3115
3116 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3117 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3118 }
3119 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3120 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3121 }
3122 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3123 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3124 }
3125 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3126 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3127 }
3128 }
3129
3130 static void
3131 exec_log(struct tgsi_exec_machine *mach,
3132 const struct tgsi_full_instruction *inst)
3133 {
3134 union tgsi_exec_channel r[3];
3135
3136 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3137 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3138 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3139 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3140 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3141 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3142 }
3143 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3144 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3145 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3146 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3147 }
3148 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3149 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3150 }
3151 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3152 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3153 }
3154 }
3155
3156 static void
3157 exec_exp(struct tgsi_exec_machine *mach,
3158 const struct tgsi_full_instruction *inst)
3159 {
3160 union tgsi_exec_channel r[3];
3161
3162 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3163 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3164 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3165 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3166 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3167 }
3168 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3169 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3170 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3171 }
3172 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3173 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3174 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3175 }
3176 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3177 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3178 }
3179 }
3180
3181 static void
3182 exec_lit(struct tgsi_exec_machine *mach,
3183 const struct tgsi_full_instruction *inst)
3184 {
3185 union tgsi_exec_channel r[3];
3186 union tgsi_exec_channel d[3];
3187
3188 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3189 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3190 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3191 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3192 micro_max(&r[1], &r[1], &ZeroVec);
3193
3194 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3195 micro_min(&r[2], &r[2], &P128Vec);
3196 micro_max(&r[2], &r[2], &M128Vec);
3197 micro_pow(&r[1], &r[1], &r[2]);
3198 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3199 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3200 }
3201 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3202 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3203 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3204 }
3205 }
3206 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3207 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3208 }
3209
3210 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3211 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3212 }
3213 }
3214
3215 static void
3216 exec_break(struct tgsi_exec_machine *mach)
3217 {
3218 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3219 /* turn off loop channels for each enabled exec channel */
3220 mach->LoopMask &= ~mach->ExecMask;
3221 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3222 UPDATE_EXEC_MASK(mach);
3223 } else {
3224 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3225
3226 mach->Switch.mask = 0x0;
3227
3228 UPDATE_EXEC_MASK(mach);
3229 }
3230 }
3231
3232 static void
3233 exec_switch(struct tgsi_exec_machine *mach,
3234 const struct tgsi_full_instruction *inst)
3235 {
3236 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3237 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3238
3239 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3240 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3241 mach->Switch.mask = 0x0;
3242 mach->Switch.defaultMask = 0x0;
3243
3244 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3245 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3246
3247 UPDATE_EXEC_MASK(mach);
3248 }
3249
3250 static void
3251 exec_case(struct tgsi_exec_machine *mach,
3252 const struct tgsi_full_instruction *inst)
3253 {
3254 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3255 union tgsi_exec_channel src;
3256 uint mask = 0;
3257
3258 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3259
3260 if (mach->Switch.selector.u[0] == src.u[0]) {
3261 mask |= 0x1;
3262 }
3263 if (mach->Switch.selector.u[1] == src.u[1]) {
3264 mask |= 0x2;
3265 }
3266 if (mach->Switch.selector.u[2] == src.u[2]) {
3267 mask |= 0x4;
3268 }
3269 if (mach->Switch.selector.u[3] == src.u[3]) {
3270 mask |= 0x8;
3271 }
3272
3273 mach->Switch.defaultMask |= mask;
3274
3275 mach->Switch.mask |= mask & prevMask;
3276
3277 UPDATE_EXEC_MASK(mach);
3278 }
3279
3280 static void
3281 exec_default(struct tgsi_exec_machine *mach)
3282 {
3283 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3284
3285 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3286
3287 UPDATE_EXEC_MASK(mach);
3288 }
3289
3290 static void
3291 exec_endswitch(struct tgsi_exec_machine *mach)
3292 {
3293 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3294 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3295
3296 UPDATE_EXEC_MASK(mach);
3297 }
3298
3299 static void
3300 micro_i2f(union tgsi_exec_channel *dst,
3301 const union tgsi_exec_channel *src)
3302 {
3303 dst->f[0] = (float)src->i[0];
3304 dst->f[1] = (float)src->i[1];
3305 dst->f[2] = (float)src->i[2];
3306 dst->f[3] = (float)src->i[3];
3307 }
3308
3309 static void
3310 micro_not(union tgsi_exec_channel *dst,
3311 const union tgsi_exec_channel *src)
3312 {
3313 dst->u[0] = ~src->u[0];
3314 dst->u[1] = ~src->u[1];
3315 dst->u[2] = ~src->u[2];
3316 dst->u[3] = ~src->u[3];
3317 }
3318
3319 static void
3320 micro_shl(union tgsi_exec_channel *dst,
3321 const union tgsi_exec_channel *src0,
3322 const union tgsi_exec_channel *src1)
3323 {
3324 dst->u[0] = src0->u[0] << src1->u[0];
3325 dst->u[1] = src0->u[1] << src1->u[1];
3326 dst->u[2] = src0->u[2] << src1->u[2];
3327 dst->u[3] = src0->u[3] << src1->u[3];
3328 }
3329
3330 static void
3331 micro_and(union tgsi_exec_channel *dst,
3332 const union tgsi_exec_channel *src0,
3333 const union tgsi_exec_channel *src1)
3334 {
3335 dst->u[0] = src0->u[0] & src1->u[0];
3336 dst->u[1] = src0->u[1] & src1->u[1];
3337 dst->u[2] = src0->u[2] & src1->u[2];
3338 dst->u[3] = src0->u[3] & src1->u[3];
3339 }
3340
3341 static void
3342 micro_or(union tgsi_exec_channel *dst,
3343 const union tgsi_exec_channel *src0,
3344 const union tgsi_exec_channel *src1)
3345 {
3346 dst->u[0] = src0->u[0] | src1->u[0];
3347 dst->u[1] = src0->u[1] | src1->u[1];
3348 dst->u[2] = src0->u[2] | src1->u[2];
3349 dst->u[3] = src0->u[3] | src1->u[3];
3350 }
3351
3352 static void
3353 micro_xor(union tgsi_exec_channel *dst,
3354 const union tgsi_exec_channel *src0,
3355 const union tgsi_exec_channel *src1)
3356 {
3357 dst->u[0] = src0->u[0] ^ src1->u[0];
3358 dst->u[1] = src0->u[1] ^ src1->u[1];
3359 dst->u[2] = src0->u[2] ^ src1->u[2];
3360 dst->u[3] = src0->u[3] ^ src1->u[3];
3361 }
3362
3363 static void
3364 micro_mod(union tgsi_exec_channel *dst,
3365 const union tgsi_exec_channel *src0,
3366 const union tgsi_exec_channel *src1)
3367 {
3368 dst->i[0] = src0->i[0] % src1->i[0];
3369 dst->i[1] = src0->i[1] % src1->i[1];
3370 dst->i[2] = src0->i[2] % src1->i[2];
3371 dst->i[3] = src0->i[3] % src1->i[3];
3372 }
3373
3374 static void
3375 micro_f2i(union tgsi_exec_channel *dst,
3376 const union tgsi_exec_channel *src)
3377 {
3378 dst->i[0] = (int)src->f[0];
3379 dst->i[1] = (int)src->f[1];
3380 dst->i[2] = (int)src->f[2];
3381 dst->i[3] = (int)src->f[3];
3382 }
3383
3384 static void
3385 micro_idiv(union tgsi_exec_channel *dst,
3386 const union tgsi_exec_channel *src0,
3387 const union tgsi_exec_channel *src1)
3388 {
3389 dst->i[0] = src0->i[0] / src1->i[0];
3390 dst->i[1] = src0->i[1] / src1->i[1];
3391 dst->i[2] = src0->i[2] / src1->i[2];
3392 dst->i[3] = src0->i[3] / src1->i[3];
3393 }
3394
3395 static void
3396 micro_imax(union tgsi_exec_channel *dst,
3397 const union tgsi_exec_channel *src0,
3398 const union tgsi_exec_channel *src1)
3399 {
3400 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
3401 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
3402 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
3403 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
3404 }
3405
3406 static void
3407 micro_imin(union tgsi_exec_channel *dst,
3408 const union tgsi_exec_channel *src0,
3409 const union tgsi_exec_channel *src1)
3410 {
3411 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
3412 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
3413 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
3414 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
3415 }
3416
3417 static void
3418 micro_isge(union tgsi_exec_channel *dst,
3419 const union tgsi_exec_channel *src0,
3420 const union tgsi_exec_channel *src1)
3421 {
3422 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
3423 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
3424 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
3425 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
3426 }
3427
3428 static void
3429 micro_ishr(union tgsi_exec_channel *dst,
3430 const union tgsi_exec_channel *src0,
3431 const union tgsi_exec_channel *src1)
3432 {
3433 dst->i[0] = src0->i[0] >> src1->i[0];
3434 dst->i[1] = src0->i[1] >> src1->i[1];
3435 dst->i[2] = src0->i[2] >> src1->i[2];
3436 dst->i[3] = src0->i[3] >> src1->i[3];
3437 }
3438
3439 static void
3440 micro_islt(union tgsi_exec_channel *dst,
3441 const union tgsi_exec_channel *src0,
3442 const union tgsi_exec_channel *src1)
3443 {
3444 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
3445 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
3446 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
3447 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
3448 }
3449
3450 static void
3451 micro_f2u(union tgsi_exec_channel *dst,
3452 const union tgsi_exec_channel *src)
3453 {
3454 dst->u[0] = (uint)src->f[0];
3455 dst->u[1] = (uint)src->f[1];
3456 dst->u[2] = (uint)src->f[2];
3457 dst->u[3] = (uint)src->f[3];
3458 }
3459
3460 static void
3461 micro_u2f(union tgsi_exec_channel *dst,
3462 const union tgsi_exec_channel *src)
3463 {
3464 dst->f[0] = (float)src->u[0];
3465 dst->f[1] = (float)src->u[1];
3466 dst->f[2] = (float)src->u[2];
3467 dst->f[3] = (float)src->u[3];
3468 }
3469
3470 static void
3471 micro_uadd(union tgsi_exec_channel *dst,
3472 const union tgsi_exec_channel *src0,
3473 const union tgsi_exec_channel *src1)
3474 {
3475 dst->u[0] = src0->u[0] + src1->u[0];
3476 dst->u[1] = src0->u[1] + src1->u[1];
3477 dst->u[2] = src0->u[2] + src1->u[2];
3478 dst->u[3] = src0->u[3] + src1->u[3];
3479 }
3480
3481 static void
3482 micro_udiv(union tgsi_exec_channel *dst,
3483 const union tgsi_exec_channel *src0,
3484 const union tgsi_exec_channel *src1)
3485 {
3486 dst->u[0] = src0->u[0] / src1->u[0];
3487 dst->u[1] = src0->u[1] / src1->u[1];
3488 dst->u[2] = src0->u[2] / src1->u[2];
3489 dst->u[3] = src0->u[3] / src1->u[3];
3490 }
3491
3492 static void
3493 micro_umad(union tgsi_exec_channel *dst,
3494 const union tgsi_exec_channel *src0,
3495 const union tgsi_exec_channel *src1,
3496 const union tgsi_exec_channel *src2)
3497 {
3498 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
3499 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
3500 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
3501 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
3502 }
3503
3504 static void
3505 micro_umax(union tgsi_exec_channel *dst,
3506 const union tgsi_exec_channel *src0,
3507 const union tgsi_exec_channel *src1)
3508 {
3509 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
3510 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
3511 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
3512 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
3513 }
3514
3515 static void
3516 micro_umin(union tgsi_exec_channel *dst,
3517 const union tgsi_exec_channel *src0,
3518 const union tgsi_exec_channel *src1)
3519 {
3520 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
3521 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
3522 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
3523 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
3524 }
3525
3526 static void
3527 micro_umod(union tgsi_exec_channel *dst,
3528 const union tgsi_exec_channel *src0,
3529 const union tgsi_exec_channel *src1)
3530 {
3531 dst->u[0] = src0->u[0] % src1->u[0];
3532 dst->u[1] = src0->u[1] % src1->u[1];
3533 dst->u[2] = src0->u[2] % src1->u[2];
3534 dst->u[3] = src0->u[3] % src1->u[3];
3535 }
3536
3537 static void
3538 micro_umul(union tgsi_exec_channel *dst,
3539 const union tgsi_exec_channel *src0,
3540 const union tgsi_exec_channel *src1)
3541 {
3542 dst->u[0] = src0->u[0] * src1->u[0];
3543 dst->u[1] = src0->u[1] * src1->u[1];
3544 dst->u[2] = src0->u[2] * src1->u[2];
3545 dst->u[3] = src0->u[3] * src1->u[3];
3546 }
3547
3548 static void
3549 micro_useq(union tgsi_exec_channel *dst,
3550 const union tgsi_exec_channel *src0,
3551 const union tgsi_exec_channel *src1)
3552 {
3553 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
3554 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
3555 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
3556 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
3557 }
3558
3559 static void
3560 micro_usge(union tgsi_exec_channel *dst,
3561 const union tgsi_exec_channel *src0,
3562 const union tgsi_exec_channel *src1)
3563 {
3564 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
3565 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
3566 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
3567 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
3568 }
3569
3570 static void
3571 micro_ushr(union tgsi_exec_channel *dst,
3572 const union tgsi_exec_channel *src0,
3573 const union tgsi_exec_channel *src1)
3574 {
3575 dst->u[0] = src0->u[0] >> src1->u[0];
3576 dst->u[1] = src0->u[1] >> src1->u[1];
3577 dst->u[2] = src0->u[2] >> src1->u[2];
3578 dst->u[3] = src0->u[3] >> src1->u[3];
3579 }
3580
3581 static void
3582 micro_uslt(union tgsi_exec_channel *dst,
3583 const union tgsi_exec_channel *src0,
3584 const union tgsi_exec_channel *src1)
3585 {
3586 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
3587 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
3588 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
3589 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
3590 }
3591
3592 static void
3593 micro_usne(union tgsi_exec_channel *dst,
3594 const union tgsi_exec_channel *src0,
3595 const union tgsi_exec_channel *src1)
3596 {
3597 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
3598 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
3599 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
3600 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
3601 }
3602
3603 static void
3604 micro_uarl(union tgsi_exec_channel *dst,
3605 const union tgsi_exec_channel *src)
3606 {
3607 dst->i[0] = src->u[0];
3608 dst->i[1] = src->u[1];
3609 dst->i[2] = src->u[2];
3610 dst->i[3] = src->u[3];
3611 }
3612
3613 static void
3614 micro_ucmp(union tgsi_exec_channel *dst,
3615 const union tgsi_exec_channel *src0,
3616 const union tgsi_exec_channel *src1,
3617 const union tgsi_exec_channel *src2)
3618 {
3619 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
3620 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
3621 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
3622 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
3623 }
3624
3625 static void
3626 exec_instruction(
3627 struct tgsi_exec_machine *mach,
3628 const struct tgsi_full_instruction *inst,
3629 int *pc )
3630 {
3631 union tgsi_exec_channel r[10];
3632
3633 (*pc)++;
3634
3635 switch (inst->Instruction.Opcode) {
3636 case TGSI_OPCODE_ARL:
3637 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3638 break;
3639
3640 case TGSI_OPCODE_MOV:
3641 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3642 break;
3643
3644 case TGSI_OPCODE_LIT:
3645 exec_lit(mach, inst);
3646 break;
3647
3648 case TGSI_OPCODE_RCP:
3649 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3650 break;
3651
3652 case TGSI_OPCODE_RSQ:
3653 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3654 break;
3655
3656 case TGSI_OPCODE_EXP:
3657 exec_exp(mach, inst);
3658 break;
3659
3660 case TGSI_OPCODE_LOG:
3661 exec_log(mach, inst);
3662 break;
3663
3664 case TGSI_OPCODE_MUL:
3665 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3666 break;
3667
3668 case TGSI_OPCODE_ADD:
3669 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3670 break;
3671
3672 case TGSI_OPCODE_DP3:
3673 exec_dp3(mach, inst);
3674 break;
3675
3676 case TGSI_OPCODE_DP4:
3677 exec_dp4(mach, inst);
3678 break;
3679
3680 case TGSI_OPCODE_DST:
3681 exec_dst(mach, inst);
3682 break;
3683
3684 case TGSI_OPCODE_MIN:
3685 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3686 break;
3687
3688 case TGSI_OPCODE_MAX:
3689 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3690 break;
3691
3692 case TGSI_OPCODE_SLT:
3693 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3694 break;
3695
3696 case TGSI_OPCODE_SGE:
3697 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3698 break;
3699
3700 case TGSI_OPCODE_MAD:
3701 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3702 break;
3703
3704 case TGSI_OPCODE_SUB:
3705 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3706 break;
3707
3708 case TGSI_OPCODE_LRP:
3709 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3710 break;
3711
3712 case TGSI_OPCODE_CND:
3713 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3714 break;
3715
3716 case TGSI_OPCODE_SQRT:
3717 exec_vector_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3718 break;
3719
3720 case TGSI_OPCODE_DP2A:
3721 exec_dp2a(mach, inst);
3722 break;
3723
3724 case TGSI_OPCODE_FRC:
3725 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3726 break;
3727
3728 case TGSI_OPCODE_CLAMP:
3729 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3730 break;
3731
3732 case TGSI_OPCODE_FLR:
3733 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3734 break;
3735
3736 case TGSI_OPCODE_ROUND:
3737 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3738 break;
3739
3740 case TGSI_OPCODE_EX2:
3741 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3742 break;
3743
3744 case TGSI_OPCODE_LG2:
3745 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3746 break;
3747
3748 case TGSI_OPCODE_POW:
3749 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3750 break;
3751
3752 case TGSI_OPCODE_XPD:
3753 exec_xpd(mach, inst);
3754 break;
3755
3756 case TGSI_OPCODE_ABS:
3757 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3758 break;
3759
3760 case TGSI_OPCODE_RCC:
3761 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3762 break;
3763
3764 case TGSI_OPCODE_DPH:
3765 exec_dph(mach, inst);
3766 break;
3767
3768 case TGSI_OPCODE_COS:
3769 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3770 break;
3771
3772 case TGSI_OPCODE_DDX:
3773 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3774 break;
3775
3776 case TGSI_OPCODE_DDY:
3777 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3778 break;
3779
3780 case TGSI_OPCODE_KILP:
3781 exec_kilp (mach, inst);
3782 break;
3783
3784 case TGSI_OPCODE_KIL:
3785 exec_kil (mach, inst);
3786 break;
3787
3788 case TGSI_OPCODE_PK2H:
3789 assert (0);
3790 break;
3791
3792 case TGSI_OPCODE_PK2US:
3793 assert (0);
3794 break;
3795
3796 case TGSI_OPCODE_PK4B:
3797 assert (0);
3798 break;
3799
3800 case TGSI_OPCODE_PK4UB:
3801 assert (0);
3802 break;
3803
3804 case TGSI_OPCODE_RFL:
3805 exec_rfl(mach, inst);
3806 break;
3807
3808 case TGSI_OPCODE_SEQ:
3809 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3810 break;
3811
3812 case TGSI_OPCODE_SFL:
3813 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3814 break;
3815
3816 case TGSI_OPCODE_SGT:
3817 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3818 break;
3819
3820 case TGSI_OPCODE_SIN:
3821 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3822 break;
3823
3824 case TGSI_OPCODE_SLE:
3825 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3826 break;
3827
3828 case TGSI_OPCODE_SNE:
3829 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3830 break;
3831
3832 case TGSI_OPCODE_STR:
3833 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3834 break;
3835
3836 case TGSI_OPCODE_TEX:
3837 /* simple texture lookup */
3838 /* src[0] = texcoord */
3839 /* src[1] = sampler unit */
3840 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
3841 break;
3842
3843 case TGSI_OPCODE_TXB:
3844 /* Texture lookup with lod bias */
3845 /* src[0] = texcoord (src[0].w = LOD bias) */
3846 /* src[1] = sampler unit */
3847 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
3848 break;
3849
3850 case TGSI_OPCODE_TXD:
3851 /* Texture lookup with explict partial derivatives */
3852 /* src[0] = texcoord */
3853 /* src[1] = d[strq]/dx */
3854 /* src[2] = d[strq]/dy */
3855 /* src[3] = sampler unit */
3856 exec_txd(mach, inst);
3857 break;
3858
3859 case TGSI_OPCODE_TXL:
3860 /* Texture lookup with explit LOD */
3861 /* src[0] = texcoord (src[0].w = LOD) */
3862 /* src[1] = sampler unit */
3863 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
3864 break;
3865
3866 case TGSI_OPCODE_TXP:
3867 /* Texture lookup with projection */
3868 /* src[0] = texcoord (src[0].w = projection) */
3869 /* src[1] = sampler unit */
3870 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
3871 break;
3872
3873 case TGSI_OPCODE_UP2H:
3874 assert (0);
3875 break;
3876
3877 case TGSI_OPCODE_UP2US:
3878 assert (0);
3879 break;
3880
3881 case TGSI_OPCODE_UP4B:
3882 assert (0);
3883 break;
3884
3885 case TGSI_OPCODE_UP4UB:
3886 assert (0);
3887 break;
3888
3889 case TGSI_OPCODE_X2D:
3890 exec_x2d(mach, inst);
3891 break;
3892
3893 case TGSI_OPCODE_ARA:
3894 assert (0);
3895 break;
3896
3897 case TGSI_OPCODE_ARR:
3898 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3899 break;
3900
3901 case TGSI_OPCODE_BRA:
3902 assert (0);
3903 break;
3904
3905 case TGSI_OPCODE_CAL:
3906 /* skip the call if no execution channels are enabled */
3907 if (mach->ExecMask) {
3908 /* do the call */
3909
3910 /* First, record the depths of the execution stacks.
3911 * This is important for deeply nested/looped return statements.
3912 * We have to unwind the stacks by the correct amount. For a
3913 * real code generator, we could determine the number of entries
3914 * to pop off each stack with simple static analysis and avoid
3915 * implementing this data structure at run time.
3916 */
3917 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3918 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3919 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3920 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3921 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3922 /* note that PC was already incremented above */
3923 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3924
3925 mach->CallStackTop++;
3926
3927 /* Second, push the Cond, Loop, Cont, Func stacks */
3928 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3929 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3930 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3931 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3932 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3933 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3934
3935 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3936 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3937 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3938 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3939 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3940 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3941
3942 /* Finally, jump to the subroutine */
3943 *pc = inst->Label.Label;
3944 }
3945 break;
3946
3947 case TGSI_OPCODE_RET:
3948 mach->FuncMask &= ~mach->ExecMask;
3949 UPDATE_EXEC_MASK(mach);
3950
3951 if (mach->FuncMask == 0x0) {
3952 /* really return now (otherwise, keep executing */
3953
3954 if (mach->CallStackTop == 0) {
3955 /* returning from main() */
3956 mach->CondStackTop = 0;
3957 mach->LoopStackTop = 0;
3958 *pc = -1;
3959 return;
3960 }
3961
3962 assert(mach->CallStackTop > 0);
3963 mach->CallStackTop--;
3964
3965 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3966 mach->CondMask = mach->CondStack[mach->CondStackTop];
3967
3968 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3969 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3970
3971 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3972 mach->ContMask = mach->ContStack[mach->ContStackTop];
3973
3974 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3975 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3976
3977 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3978 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3979
3980 assert(mach->FuncStackTop > 0);
3981 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3982
3983 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3984
3985 UPDATE_EXEC_MASK(mach);
3986 }
3987 break;
3988
3989 case TGSI_OPCODE_SSG:
3990 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3991 break;
3992
3993 case TGSI_OPCODE_CMP:
3994 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3995 break;
3996
3997 case TGSI_OPCODE_SCS:
3998 exec_scs(mach, inst);
3999 break;
4000
4001 case TGSI_OPCODE_NRM:
4002 exec_nrm3(mach, inst);
4003 break;
4004
4005 case TGSI_OPCODE_NRM4:
4006 exec_nrm4(mach, inst);
4007 break;
4008
4009 case TGSI_OPCODE_DIV:
4010 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
4011 break;
4012
4013 case TGSI_OPCODE_DP2:
4014 exec_dp2(mach, inst);
4015 break;
4016
4017 case TGSI_OPCODE_IF:
4018 /* push CondMask */
4019 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
4020 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
4021 FETCH( &r[0], 0, TGSI_CHAN_X );
4022 /* update CondMask */
4023 if( ! r[0].u[0] ) {
4024 mach->CondMask &= ~0x1;
4025 }
4026 if( ! r[0].u[1] ) {
4027 mach->CondMask &= ~0x2;
4028 }
4029 if( ! r[0].u[2] ) {
4030 mach->CondMask &= ~0x4;
4031 }
4032 if( ! r[0].u[3] ) {
4033 mach->CondMask &= ~0x8;
4034 }
4035 UPDATE_EXEC_MASK(mach);
4036 /* Todo: If CondMask==0, jump to ELSE */
4037 break;
4038
4039 case TGSI_OPCODE_ELSE:
4040 /* invert CondMask wrt previous mask */
4041 {
4042 uint prevMask;
4043 assert(mach->CondStackTop > 0);
4044 prevMask = mach->CondStack[mach->CondStackTop - 1];
4045 mach->CondMask = ~mach->CondMask & prevMask;
4046 UPDATE_EXEC_MASK(mach);
4047 /* Todo: If CondMask==0, jump to ENDIF */
4048 }
4049 break;
4050
4051 case TGSI_OPCODE_ENDIF:
4052 /* pop CondMask */
4053 assert(mach->CondStackTop > 0);
4054 mach->CondMask = mach->CondStack[--mach->CondStackTop];
4055 UPDATE_EXEC_MASK(mach);
4056 break;
4057
4058 case TGSI_OPCODE_END:
4059 /* make sure we end primitives which haven't
4060 * been explicitly emitted */
4061 conditional_emit_primitive(mach);
4062 /* halt execution */
4063 *pc = -1;
4064 break;
4065
4066 case TGSI_OPCODE_PUSHA:
4067 assert (0);
4068 break;
4069
4070 case TGSI_OPCODE_POPA:
4071 assert (0);
4072 break;
4073
4074 case TGSI_OPCODE_CEIL:
4075 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
4076 break;
4077
4078 case TGSI_OPCODE_I2F:
4079 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
4080 break;
4081
4082 case TGSI_OPCODE_NOT:
4083 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4084 break;
4085
4086 case TGSI_OPCODE_TRUNC:
4087 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
4088 break;
4089
4090 case TGSI_OPCODE_SHL:
4091 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4092 break;
4093
4094 case TGSI_OPCODE_AND:
4095 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4096 break;
4097
4098 case TGSI_OPCODE_OR:
4099 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4100 break;
4101
4102 case TGSI_OPCODE_MOD:
4103 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4104 break;
4105
4106 case TGSI_OPCODE_XOR:
4107 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4108 break;
4109
4110 case TGSI_OPCODE_SAD:
4111 assert (0);
4112 break;
4113
4114 case TGSI_OPCODE_TXF:
4115 exec_txf(mach, inst);
4116 break;
4117
4118 case TGSI_OPCODE_TXQ:
4119 exec_txq(mach, inst);
4120 break;
4121
4122 case TGSI_OPCODE_EMIT:
4123 emit_vertex(mach);
4124 break;
4125
4126 case TGSI_OPCODE_ENDPRIM:
4127 emit_primitive(mach);
4128 break;
4129
4130 case TGSI_OPCODE_BGNLOOP:
4131 /* push LoopMask and ContMasks */
4132 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4133 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4134 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4135 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
4136
4137 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
4138 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
4139 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
4140 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
4141 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
4142 break;
4143
4144 case TGSI_OPCODE_ENDLOOP:
4145 /* Restore ContMask, but don't pop */
4146 assert(mach->ContStackTop > 0);
4147 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
4148 UPDATE_EXEC_MASK(mach);
4149 if (mach->ExecMask) {
4150 /* repeat loop: jump to instruction just past BGNLOOP */
4151 assert(mach->LoopLabelStackTop > 0);
4152 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
4153 }
4154 else {
4155 /* exit loop: pop LoopMask */
4156 assert(mach->LoopStackTop > 0);
4157 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
4158 /* pop ContMask */
4159 assert(mach->ContStackTop > 0);
4160 mach->ContMask = mach->ContStack[--mach->ContStackTop];
4161 assert(mach->LoopLabelStackTop > 0);
4162 --mach->LoopLabelStackTop;
4163
4164 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
4165 }
4166 UPDATE_EXEC_MASK(mach);
4167 break;
4168
4169 case TGSI_OPCODE_BRK:
4170 exec_break(mach);
4171 break;
4172
4173 case TGSI_OPCODE_CONT:
4174 /* turn off cont channels for each enabled exec channel */
4175 mach->ContMask &= ~mach->ExecMask;
4176 /* Todo: if mach->LoopMask == 0, jump to end of loop */
4177 UPDATE_EXEC_MASK(mach);
4178 break;
4179
4180 case TGSI_OPCODE_BGNSUB:
4181 /* no-op */
4182 break;
4183
4184 case TGSI_OPCODE_ENDSUB:
4185 /*
4186 * XXX: This really should be a no-op. We should never reach this opcode.
4187 */
4188
4189 assert(mach->CallStackTop > 0);
4190 mach->CallStackTop--;
4191
4192 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
4193 mach->CondMask = mach->CondStack[mach->CondStackTop];
4194
4195 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
4196 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
4197
4198 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
4199 mach->ContMask = mach->ContStack[mach->ContStackTop];
4200
4201 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
4202 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
4203
4204 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
4205 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
4206
4207 assert(mach->FuncStackTop > 0);
4208 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
4209
4210 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
4211
4212 UPDATE_EXEC_MASK(mach);
4213 break;
4214
4215 case TGSI_OPCODE_NOP:
4216 break;
4217
4218 case TGSI_OPCODE_BREAKC:
4219 FETCH(&r[0], 0, TGSI_CHAN_X);
4220 /* update CondMask */
4221 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
4222 mach->LoopMask &= ~0x1;
4223 }
4224 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
4225 mach->LoopMask &= ~0x2;
4226 }
4227 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
4228 mach->LoopMask &= ~0x4;
4229 }
4230 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
4231 mach->LoopMask &= ~0x8;
4232 }
4233 /* Todo: if mach->LoopMask == 0, jump to end of loop */
4234 UPDATE_EXEC_MASK(mach);
4235 break;
4236
4237 case TGSI_OPCODE_F2I:
4238 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
4239 break;
4240
4241 case TGSI_OPCODE_IDIV:
4242 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4243 break;
4244
4245 case TGSI_OPCODE_IMAX:
4246 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4247 break;
4248
4249 case TGSI_OPCODE_IMIN:
4250 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4251 break;
4252
4253 case TGSI_OPCODE_INEG:
4254 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4255 break;
4256
4257 case TGSI_OPCODE_ISGE:
4258 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4259 break;
4260
4261 case TGSI_OPCODE_ISHR:
4262 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4263 break;
4264
4265 case TGSI_OPCODE_ISLT:
4266 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4267 break;
4268
4269 case TGSI_OPCODE_F2U:
4270 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
4271 break;
4272
4273 case TGSI_OPCODE_U2F:
4274 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
4275 break;
4276
4277 case TGSI_OPCODE_UADD:
4278 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4279 break;
4280
4281 case TGSI_OPCODE_UDIV:
4282 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4283 break;
4284
4285 case TGSI_OPCODE_UMAD:
4286 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4287 break;
4288
4289 case TGSI_OPCODE_UMAX:
4290 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4291 break;
4292
4293 case TGSI_OPCODE_UMIN:
4294 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4295 break;
4296
4297 case TGSI_OPCODE_UMOD:
4298 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4299 break;
4300
4301 case TGSI_OPCODE_UMUL:
4302 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4303 break;
4304
4305 case TGSI_OPCODE_USEQ:
4306 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4307 break;
4308
4309 case TGSI_OPCODE_USGE:
4310 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4311 break;
4312
4313 case TGSI_OPCODE_USHR:
4314 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4315 break;
4316
4317 case TGSI_OPCODE_USLT:
4318 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4319 break;
4320
4321 case TGSI_OPCODE_USNE:
4322 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4323 break;
4324
4325 case TGSI_OPCODE_SWITCH:
4326 exec_switch(mach, inst);
4327 break;
4328
4329 case TGSI_OPCODE_CASE:
4330 exec_case(mach, inst);
4331 break;
4332
4333 case TGSI_OPCODE_DEFAULT:
4334 exec_default(mach);
4335 break;
4336
4337 case TGSI_OPCODE_ENDSWITCH:
4338 exec_endswitch(mach);
4339 break;
4340
4341 case TGSI_OPCODE_SAMPLE_I:
4342 assert(0);
4343 break;
4344
4345 case TGSI_OPCODE_SAMPLE_I_MS:
4346 assert(0);
4347 break;
4348
4349 case TGSI_OPCODE_SAMPLE:
4350 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
4351 break;
4352
4353 case TGSI_OPCODE_SAMPLE_B:
4354 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
4355 break;
4356
4357 case TGSI_OPCODE_SAMPLE_C:
4358 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
4359 break;
4360
4361 case TGSI_OPCODE_SAMPLE_C_LZ:
4362 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
4363 break;
4364
4365 case TGSI_OPCODE_SAMPLE_D:
4366 exec_sample_d(mach, inst);
4367 break;
4368
4369 case TGSI_OPCODE_SAMPLE_L:
4370 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
4371 break;
4372
4373 case TGSI_OPCODE_GATHER4:
4374 assert(0);
4375 break;
4376
4377 case TGSI_OPCODE_SVIEWINFO:
4378 assert(0);
4379 break;
4380
4381 case TGSI_OPCODE_SAMPLE_POS:
4382 assert(0);
4383 break;
4384
4385 case TGSI_OPCODE_SAMPLE_INFO:
4386 assert(0);
4387 break;
4388
4389 case TGSI_OPCODE_UARL:
4390 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
4391 break;
4392
4393 case TGSI_OPCODE_UCMP:
4394 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4395 break;
4396
4397 case TGSI_OPCODE_IABS:
4398 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4399 break;
4400
4401 case TGSI_OPCODE_ISSG:
4402 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4403 break;
4404
4405 case TGSI_OPCODE_TEX2:
4406 /* simple texture lookup */
4407 /* src[0] = texcoord */
4408 /* src[1] = compare */
4409 /* src[2] = sampler unit */
4410 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
4411 break;
4412 case TGSI_OPCODE_TXB2:
4413 /* simple texture lookup */
4414 /* src[0] = texcoord */
4415 /* src[1] = bias */
4416 /* src[2] = sampler unit */
4417 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
4418 break;
4419 case TGSI_OPCODE_TXL2:
4420 /* simple texture lookup */
4421 /* src[0] = texcoord */
4422 /* src[1] = lod */
4423 /* src[2] = sampler unit */
4424 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
4425 break;
4426 default:
4427 assert( 0 );
4428 }
4429 }
4430
4431
4432 /**
4433 * Run TGSI interpreter.
4434 * \return bitmask of "alive" quad components
4435 */
4436 uint
4437 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
4438 {
4439 uint i;
4440 int pc = 0;
4441
4442 mach->CondMask = 0xf;
4443 mach->LoopMask = 0xf;
4444 mach->ContMask = 0xf;
4445 mach->FuncMask = 0xf;
4446 mach->ExecMask = 0xf;
4447
4448 mach->Switch.mask = 0xf;
4449
4450 assert(mach->CondStackTop == 0);
4451 assert(mach->LoopStackTop == 0);
4452 assert(mach->ContStackTop == 0);
4453 assert(mach->SwitchStackTop == 0);
4454 assert(mach->BreakStackTop == 0);
4455 assert(mach->CallStackTop == 0);
4456
4457 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
4458 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
4459
4460 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
4461 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
4462 mach->Primitives[0] = 0;
4463 }
4464
4465 /* execute declarations (interpolants) */
4466 for (i = 0; i < mach->NumDeclarations; i++) {
4467 exec_declaration( mach, mach->Declarations+i );
4468 }
4469
4470 {
4471 #if DEBUG_EXECUTION
4472 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
4473 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
4474 uint inst = 1;
4475
4476 memset(mach->Temps, 0, sizeof(temps));
4477 memset(mach->Outputs, 0, sizeof(outputs));
4478 memset(temps, 0, sizeof(temps));
4479 memset(outputs, 0, sizeof(outputs));
4480 #endif
4481
4482 /* execute instructions, until pc is set to -1 */
4483 while (pc != -1) {
4484
4485 #if DEBUG_EXECUTION
4486 uint i;
4487
4488 tgsi_dump_instruction(&mach->Instructions[pc], inst++);
4489 #endif
4490
4491 assert(pc < (int) mach->NumInstructions);
4492 exec_instruction(mach, mach->Instructions + pc, &pc);
4493
4494 #if DEBUG_EXECUTION
4495 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
4496 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
4497 uint j;
4498
4499 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
4500 debug_printf("TEMP[%2u] = ", i);
4501 for (j = 0; j < 4; j++) {
4502 if (j > 0) {
4503 debug_printf(" ");
4504 }
4505 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4506 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
4507 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
4508 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
4509 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
4510 }
4511 }
4512 }
4513 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
4514 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
4515 uint j;
4516
4517 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
4518 debug_printf("OUT[%2u] = ", i);
4519 for (j = 0; j < 4; j++) {
4520 if (j > 0) {
4521 debug_printf(" ");
4522 }
4523 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4524 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
4525 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
4526 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
4527 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
4528 }
4529 }
4530 }
4531 #endif
4532 }
4533 }
4534
4535 #if 0
4536 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4537 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
4538 /*
4539 * Scale back depth component.
4540 */
4541 for (i = 0; i < 4; i++)
4542 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
4543 }
4544 #endif
4545
4546 /* Strictly speaking, these assertions aren't really needed but they
4547 * can potentially catch some bugs in the control flow code.
4548 */
4549 assert(mach->CondStackTop == 0);
4550 assert(mach->LoopStackTop == 0);
4551 assert(mach->ContStackTop == 0);
4552 assert(mach->SwitchStackTop == 0);
4553 assert(mach->BreakStackTop == 0);
4554 assert(mach->CallStackTop == 0);
4555
4556 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4557 }