Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / cell / spu / spu_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include <transpose_matrix4x4.h>
54 #include <simdmath/ceilf4.h>
55 #include <simdmath/cosf4.h>
56 #include <simdmath/divf4.h>
57 #include <simdmath/floorf4.h>
58 #include <simdmath/log2f4.h>
59 #include <simdmath/powf4.h>
60 #include <simdmath/sinf4.h>
61 #include <simdmath/sqrtf4.h>
62 #include <simdmath/truncf4.h>
63
64 #include "pipe/p_compiler.h"
65 #include "pipe/p_state.h"
66 #include "pipe/p_shader_tokens.h"
67 #include "tgsi/tgsi_parse.h"
68 #include "tgsi/tgsi_util.h"
69 #include "spu_exec.h"
70 #include "spu_main.h"
71 #include "spu_vertex_shader.h"
72 #include "spu_dcache.h"
73 #include "cell/common.h"
74
75 #define TILE_TOP_LEFT 0
76 #define TILE_TOP_RIGHT 1
77 #define TILE_BOTTOM_LEFT 2
78 #define TILE_BOTTOM_RIGHT 3
79
80 /*
81 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
82 */
83 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
84 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
85 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
86 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
87 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
88 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
89 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
90 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
91 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
92 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
93 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
94 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
95 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
96 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
97 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
98 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
99 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
100 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
101 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
102 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
103 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
104 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107 #define FOR_EACH_CHANNEL(CHAN)\
108 for (CHAN = 0; CHAN < 4; CHAN++)
109
110 #define IS_CHANNEL_ENABLED(INST, CHAN)\
111 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
112
113 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
114 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
115
116 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
117 FOR_EACH_CHANNEL( CHAN )\
118 if (IS_CHANNEL_ENABLED( INST, CHAN ))
119
120 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
121 FOR_EACH_CHANNEL( CHAN )\
122 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
123
124
125 /** The execution mask depends on the conditional mask and the loop mask */
126 #define UPDATE_EXEC_MASK(MACH) \
127 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
128
129
130 #define CHAN_X 0
131 #define CHAN_Y 1
132 #define CHAN_Z 2
133 #define CHAN_W 3
134
135
136
137 /**
138 * Initialize machine state by expanding tokens to full instructions,
139 * allocating temporary storage, setting up constants, etc.
140 * After this, we can call spu_exec_machine_run() many times.
141 */
142 void
143 spu_exec_machine_init(struct spu_exec_machine *mach,
144 uint numSamplers,
145 struct spu_sampler *samplers,
146 unsigned processor)
147 {
148 const qword zero = si_il(0);
149 const qword not_zero = si_il(~0);
150
151 (void) numSamplers;
152 mach->Samplers = samplers;
153 mach->Processor = processor;
154 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
155
156 /* Setup constants. */
157 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
158 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
159 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
160 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
161
162 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
163 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
164 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
165 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
166 }
167
168
169 static INLINE qword
170 micro_abs(qword src)
171 {
172 return si_rotmi(si_shli(src, 1), -1);
173 }
174
175 static INLINE qword
176 micro_ceil(qword src)
177 {
178 return (qword) _ceilf4((vec_float4) src);
179 }
180
181 static INLINE qword
182 micro_cos(qword src)
183 {
184 return (qword) _cosf4((vec_float4) src);
185 }
186
187 static const qword br_shuf = {
188 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
189 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
190 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
191 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
192 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
193 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
194 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
195 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
196 };
197
198 static const qword bl_shuf = {
199 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
200 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
201 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
202 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
203 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
204 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
205 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
206 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
207 };
208
209 static const qword tl_shuf = {
210 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
211 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
212 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
213 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
214 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
215 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
216 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
217 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
218 };
219
220 static qword
221 micro_ddx(qword src)
222 {
223 qword bottom_right = si_shufb(src, src, br_shuf);
224 qword bottom_left = si_shufb(src, src, bl_shuf);
225
226 return si_fs(bottom_right, bottom_left);
227 }
228
229 static qword
230 micro_ddy(qword src)
231 {
232 qword top_left = si_shufb(src, src, tl_shuf);
233 qword bottom_left = si_shufb(src, src, bl_shuf);
234
235 return si_fs(top_left, bottom_left);
236 }
237
238 static INLINE qword
239 micro_div(qword src0, qword src1)
240 {
241 return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
242 }
243
244 static qword
245 micro_flr(qword src)
246 {
247 return (qword) _floorf4((vec_float4) src);
248 }
249
250 static qword
251 micro_frc(qword src)
252 {
253 return si_fs(src, (qword) _floorf4((vec_float4) src));
254 }
255
256 static INLINE qword
257 micro_ge(qword src0, qword src1)
258 {
259 return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
260 }
261
262 static qword
263 micro_lg2(qword src)
264 {
265 return (qword) _log2f4((vec_float4) src);
266 }
267
268 static INLINE qword
269 micro_lt(qword src0, qword src1)
270 {
271 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
272
273 return si_xori(tmp, 0xff);
274 }
275
276 static INLINE qword
277 micro_max(qword src0, qword src1)
278 {
279 return si_selb(src1, src0, si_fcgt(src0, src1));
280 }
281
282 static INLINE qword
283 micro_min(qword src0, qword src1)
284 {
285 return si_selb(src0, src1, si_fcgt(src0, src1));
286 }
287
288 static qword
289 micro_neg(qword src)
290 {
291 return si_xor(src, (qword) spu_splats(0x80000000));
292 }
293
294 static qword
295 micro_set_sign(qword src)
296 {
297 return si_or(src, (qword) spu_splats(0x80000000));
298 }
299
300 static qword
301 micro_pow(qword src0, qword src1)
302 {
303 return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
304 }
305
306 static qword
307 micro_rnd(qword src)
308 {
309 const qword half = (qword) spu_splats(0.5f);
310
311 /* May be able to use _roundf4. There may be some difference, though.
312 */
313 return (qword) _floorf4((vec_float4) si_fa(src, half));
314 }
315
316 static INLINE qword
317 micro_ishr(qword src0, qword src1)
318 {
319 return si_rotma(src0, si_sfi(src1, 0));
320 }
321
322 static qword
323 micro_trunc(qword src)
324 {
325 return (qword) _truncf4((vec_float4) src);
326 }
327
328 static qword
329 micro_sin(qword src)
330 {
331 return (qword) _sinf4((vec_float4) src);
332 }
333
334 static INLINE qword
335 micro_sqrt(qword src)
336 {
337 return (qword) _sqrtf4((vec_float4) src);
338 }
339
340 static void
341 fetch_src_file_channel(
342 const struct spu_exec_machine *mach,
343 const uint file,
344 const uint swizzle,
345 const union spu_exec_channel *index,
346 union spu_exec_channel *chan )
347 {
348 switch( swizzle ) {
349 case TGSI_EXTSWIZZLE_X:
350 case TGSI_EXTSWIZZLE_Y:
351 case TGSI_EXTSWIZZLE_Z:
352 case TGSI_EXTSWIZZLE_W:
353 switch( file ) {
354 case TGSI_FILE_CONSTANT: {
355 unsigned i;
356
357 for (i = 0; i < 4; i++) {
358 const float *ptr = mach->Consts[index->i[i]];
359 float tmp[4];
360
361 spu_dcache_fetch_unaligned((qword *) tmp,
362 (uintptr_t)(ptr + swizzle),
363 sizeof(float));
364
365 chan->f[i] = tmp[0];
366 }
367 break;
368 }
369
370 case TGSI_FILE_INPUT:
371 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
372 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
373 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
374 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
375 break;
376
377 case TGSI_FILE_TEMPORARY:
378 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
379 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
380 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
381 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
382 break;
383
384 case TGSI_FILE_IMMEDIATE:
385 ASSERT( index->i[0] < (int) mach->ImmLimit );
386 ASSERT( index->i[1] < (int) mach->ImmLimit );
387 ASSERT( index->i[2] < (int) mach->ImmLimit );
388 ASSERT( index->i[3] < (int) mach->ImmLimit );
389
390 chan->f[0] = mach->Imms[index->i[0]][swizzle];
391 chan->f[1] = mach->Imms[index->i[1]][swizzle];
392 chan->f[2] = mach->Imms[index->i[2]][swizzle];
393 chan->f[3] = mach->Imms[index->i[3]][swizzle];
394 break;
395
396 case TGSI_FILE_ADDRESS:
397 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
398 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
399 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
400 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
401 break;
402
403 case TGSI_FILE_OUTPUT:
404 /* vertex/fragment output vars can be read too */
405 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
406 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
407 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
408 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
409 break;
410
411 default:
412 ASSERT( 0 );
413 }
414 break;
415
416 case TGSI_EXTSWIZZLE_ZERO:
417 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
418 break;
419
420 case TGSI_EXTSWIZZLE_ONE:
421 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
422 break;
423
424 default:
425 ASSERT( 0 );
426 }
427 }
428
429 static void
430 fetch_source(
431 const struct spu_exec_machine *mach,
432 union spu_exec_channel *chan,
433 const struct tgsi_full_src_register *reg,
434 const uint chan_index )
435 {
436 union spu_exec_channel index;
437 uint swizzle;
438
439 index.i[0] =
440 index.i[1] =
441 index.i[2] =
442 index.i[3] = reg->SrcRegister.Index;
443
444 if (reg->SrcRegister.Indirect) {
445 union spu_exec_channel index2;
446 union spu_exec_channel indir_index;
447
448 index2.i[0] =
449 index2.i[1] =
450 index2.i[2] =
451 index2.i[3] = reg->SrcRegisterInd.Index;
452
453 swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
454 CHAN_X);
455 fetch_src_file_channel(
456 mach,
457 reg->SrcRegisterInd.File,
458 swizzle,
459 &index2,
460 &indir_index );
461
462 index.q = si_a(index.q, indir_index.q);
463 }
464
465 if( reg->SrcRegister.Dimension ) {
466 switch( reg->SrcRegister.File ) {
467 case TGSI_FILE_INPUT:
468 index.q = si_mpyi(index.q, 17);
469 break;
470 case TGSI_FILE_CONSTANT:
471 index.q = si_shli(index.q, 12);
472 break;
473 default:
474 ASSERT( 0 );
475 }
476
477 index.i[0] += reg->SrcRegisterDim.Index;
478 index.i[1] += reg->SrcRegisterDim.Index;
479 index.i[2] += reg->SrcRegisterDim.Index;
480 index.i[3] += reg->SrcRegisterDim.Index;
481
482 if (reg->SrcRegisterDim.Indirect) {
483 union spu_exec_channel index2;
484 union spu_exec_channel indir_index;
485
486 index2.i[0] =
487 index2.i[1] =
488 index2.i[2] =
489 index2.i[3] = reg->SrcRegisterDimInd.Index;
490
491 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
492 fetch_src_file_channel(
493 mach,
494 reg->SrcRegisterDimInd.File,
495 swizzle,
496 &index2,
497 &indir_index );
498
499 index.q = si_a(index.q, indir_index.q);
500 }
501 }
502
503 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
504 fetch_src_file_channel(
505 mach,
506 reg->SrcRegister.File,
507 swizzle,
508 &index,
509 chan );
510
511 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
512 case TGSI_UTIL_SIGN_CLEAR:
513 chan->q = micro_abs(chan->q);
514 break;
515
516 case TGSI_UTIL_SIGN_SET:
517 chan->q = micro_set_sign(chan->q);
518 break;
519
520 case TGSI_UTIL_SIGN_TOGGLE:
521 chan->q = micro_neg(chan->q);
522 break;
523
524 case TGSI_UTIL_SIGN_KEEP:
525 break;
526 }
527
528 if (reg->SrcRegisterExtMod.Complement) {
529 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
530 }
531 }
532
533 static void
534 store_dest(
535 struct spu_exec_machine *mach,
536 const union spu_exec_channel *chan,
537 const struct tgsi_full_dst_register *reg,
538 const struct tgsi_full_instruction *inst,
539 uint chan_index )
540 {
541 union spu_exec_channel *dst;
542
543 switch( reg->DstRegister.File ) {
544 case TGSI_FILE_NULL:
545 return;
546
547 case TGSI_FILE_OUTPUT:
548 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
549 + reg->DstRegister.Index].xyzw[chan_index];
550 break;
551
552 case TGSI_FILE_TEMPORARY:
553 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
554 break;
555
556 case TGSI_FILE_ADDRESS:
557 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
558 break;
559
560 default:
561 ASSERT( 0 );
562 return;
563 }
564
565 switch (inst->Instruction.Saturate)
566 {
567 case TGSI_SAT_NONE:
568 if (mach->ExecMask & 0x1)
569 dst->i[0] = chan->i[0];
570 if (mach->ExecMask & 0x2)
571 dst->i[1] = chan->i[1];
572 if (mach->ExecMask & 0x4)
573 dst->i[2] = chan->i[2];
574 if (mach->ExecMask & 0x8)
575 dst->i[3] = chan->i[3];
576 break;
577
578 case TGSI_SAT_ZERO_ONE:
579 /* XXX need to obey ExecMask here */
580 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
581 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
582 break;
583
584 case TGSI_SAT_MINUS_PLUS_ONE:
585 ASSERT( 0 );
586 break;
587
588 default:
589 ASSERT( 0 );
590 }
591 }
592
593 #define FETCH(VAL,INDEX,CHAN)\
594 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
595
596 #define STORE(VAL,INDEX,CHAN)\
597 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
598
599
600 /**
601 * Execute ARB-style KIL which is predicated by a src register.
602 * Kill fragment if any of the four values is less than zero.
603 */
604 static void
605 exec_kil(struct spu_exec_machine *mach,
606 const struct tgsi_full_instruction *inst)
607 {
608 uint uniquemask;
609 uint chan_index;
610 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
611 union spu_exec_channel r[1];
612
613 /* This mask stores component bits that were already tested. Note that
614 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
615 * tested. */
616 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
617
618 for (chan_index = 0; chan_index < 4; chan_index++)
619 {
620 uint swizzle;
621 uint i;
622
623 /* unswizzle channel */
624 swizzle = tgsi_util_get_full_src_register_extswizzle (
625 &inst->FullSrcRegisters[0],
626 chan_index);
627
628 /* check if the component has not been already tested */
629 if (uniquemask & (1 << swizzle))
630 continue;
631 uniquemask |= 1 << swizzle;
632
633 FETCH(&r[0], 0, chan_index);
634 for (i = 0; i < 4; i++)
635 if (r[0].f[i] < 0.0f)
636 kilmask |= 1 << i;
637 }
638
639 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
640 }
641
642 /**
643 * Execute NVIDIA-style KIL which is predicated by a condition code.
644 * Kill fragment if the condition code is TRUE.
645 */
646 static void
647 exec_kilp(struct tgsi_exec_machine *mach,
648 const struct tgsi_full_instruction *inst)
649 {
650 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
651
652 /* TODO: build kilmask from CC mask */
653
654 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
655 }
656
657 /*
658 * Fetch a texel using STR texture coordinates.
659 */
660 static void
661 fetch_texel( struct spu_sampler *sampler,
662 const union spu_exec_channel *s,
663 const union spu_exec_channel *t,
664 const union spu_exec_channel *p,
665 float lodbias, /* XXX should be float[4] */
666 union spu_exec_channel *r,
667 union spu_exec_channel *g,
668 union spu_exec_channel *b,
669 union spu_exec_channel *a )
670 {
671 qword rgba[4];
672 qword out[4];
673
674 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
675 (float (*)[4]) rgba);
676
677 _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
678 r->q = out[0];
679 g->q = out[1];
680 b->q = out[2];
681 a->q = out[3];
682 }
683
684
685 static void
686 exec_tex(struct spu_exec_machine *mach,
687 const struct tgsi_full_instruction *inst,
688 boolean biasLod, boolean projected)
689 {
690 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
691 union spu_exec_channel r[8];
692 uint chan_index;
693 float lodBias;
694
695 /* printf("Sampler %u unit %u\n", sampler, unit); */
696
697 switch (inst->InstructionExtTexture.Texture) {
698 case TGSI_TEXTURE_1D:
699
700 FETCH(&r[0], 0, CHAN_X);
701
702 if (projected) {
703 FETCH(&r[1], 0, CHAN_W);
704 r[0].q = micro_div(r[0].q, r[1].q);
705 }
706
707 if (biasLod) {
708 FETCH(&r[1], 0, CHAN_W);
709 lodBias = r[2].f[0];
710 }
711 else
712 lodBias = 0.0;
713
714 fetch_texel(&mach->Samplers[unit],
715 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
716 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
717 break;
718
719 case TGSI_TEXTURE_2D:
720 case TGSI_TEXTURE_RECT:
721
722 FETCH(&r[0], 0, CHAN_X);
723 FETCH(&r[1], 0, CHAN_Y);
724 FETCH(&r[2], 0, CHAN_Z);
725
726 if (projected) {
727 FETCH(&r[3], 0, CHAN_W);
728 r[0].q = micro_div(r[0].q, r[3].q);
729 r[1].q = micro_div(r[1].q, r[3].q);
730 r[2].q = micro_div(r[2].q, r[3].q);
731 }
732
733 if (biasLod) {
734 FETCH(&r[3], 0, CHAN_W);
735 lodBias = r[3].f[0];
736 }
737 else
738 lodBias = 0.0;
739
740 fetch_texel(&mach->Samplers[unit],
741 &r[0], &r[1], &r[2], lodBias, /* inputs */
742 &r[0], &r[1], &r[2], &r[3]); /* outputs */
743 break;
744
745 case TGSI_TEXTURE_3D:
746 case TGSI_TEXTURE_CUBE:
747
748 FETCH(&r[0], 0, CHAN_X);
749 FETCH(&r[1], 0, CHAN_Y);
750 FETCH(&r[2], 0, CHAN_Z);
751
752 if (projected) {
753 FETCH(&r[3], 0, CHAN_W);
754 r[0].q = micro_div(r[0].q, r[3].q);
755 r[1].q = micro_div(r[1].q, r[3].q);
756 r[2].q = micro_div(r[2].q, r[3].q);
757 }
758
759 if (biasLod) {
760 FETCH(&r[3], 0, CHAN_W);
761 lodBias = r[3].f[0];
762 }
763 else
764 lodBias = 0.0;
765
766 fetch_texel(&mach->Samplers[unit],
767 &r[0], &r[1], &r[2], lodBias,
768 &r[0], &r[1], &r[2], &r[3]);
769 break;
770
771 default:
772 ASSERT (0);
773 }
774
775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
776 STORE( &r[chan_index], 0, chan_index );
777 }
778 }
779
780
781
782 static void
783 constant_interpolation(
784 struct spu_exec_machine *mach,
785 unsigned attrib,
786 unsigned chan )
787 {
788 unsigned i;
789
790 for( i = 0; i < QUAD_SIZE; i++ ) {
791 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
792 }
793 }
794
795 static void
796 linear_interpolation(
797 struct spu_exec_machine *mach,
798 unsigned attrib,
799 unsigned chan )
800 {
801 const float x = mach->QuadPos.xyzw[0].f[0];
802 const float y = mach->QuadPos.xyzw[1].f[0];
803 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
804 const float dady = mach->InterpCoefs[attrib].dady[chan];
805 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
806 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
807 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
808 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
809 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
810 }
811
812 static void
813 perspective_interpolation(
814 struct spu_exec_machine *mach,
815 unsigned attrib,
816 unsigned chan )
817 {
818 const float x = mach->QuadPos.xyzw[0].f[0];
819 const float y = mach->QuadPos.xyzw[1].f[0];
820 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
821 const float dady = mach->InterpCoefs[attrib].dady[chan];
822 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
823 const float *w = mach->QuadPos.xyzw[3].f;
824 /* divide by W here */
825 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
826 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
827 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
828 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
829 }
830
831
832 typedef void (* interpolation_func)(
833 struct spu_exec_machine *mach,
834 unsigned attrib,
835 unsigned chan );
836
837 static void
838 exec_declaration(struct spu_exec_machine *mach,
839 const struct tgsi_full_declaration *decl)
840 {
841 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
842 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
843 unsigned first, last, mask;
844 interpolation_func interp;
845
846 first = decl->DeclarationRange.First;
847 last = decl->DeclarationRange.Last;
848 mask = decl->Declaration.UsageMask;
849
850 switch( decl->Declaration.Interpolate ) {
851 case TGSI_INTERPOLATE_CONSTANT:
852 interp = constant_interpolation;
853 break;
854
855 case TGSI_INTERPOLATE_LINEAR:
856 interp = linear_interpolation;
857 break;
858
859 case TGSI_INTERPOLATE_PERSPECTIVE:
860 interp = perspective_interpolation;
861 break;
862
863 default:
864 ASSERT( 0 );
865 }
866
867 if( mask == TGSI_WRITEMASK_XYZW ) {
868 unsigned i, j;
869
870 for( i = first; i <= last; i++ ) {
871 for( j = 0; j < NUM_CHANNELS; j++ ) {
872 interp( mach, i, j );
873 }
874 }
875 }
876 else {
877 unsigned i, j;
878
879 for( j = 0; j < NUM_CHANNELS; j++ ) {
880 if( mask & (1 << j) ) {
881 for( i = first; i <= last; i++ ) {
882 interp( mach, i, j );
883 }
884 }
885 }
886 }
887 }
888 }
889 }
890
891 static void
892 exec_instruction(
893 struct spu_exec_machine *mach,
894 const struct tgsi_full_instruction *inst,
895 int *pc )
896 {
897 uint chan_index;
898 union spu_exec_channel r[8];
899
900 (*pc)++;
901
902 switch (inst->Instruction.Opcode) {
903 case TGSI_OPCODE_ARL:
904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
905 FETCH( &r[0], 0, chan_index );
906 r[0].q = si_cflts(r[0].q, 0);
907 STORE( &r[0], 0, chan_index );
908 }
909 break;
910
911 case TGSI_OPCODE_MOV:
912 case TGSI_OPCODE_SWZ:
913 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
914 FETCH( &r[0], 0, chan_index );
915 STORE( &r[0], 0, chan_index );
916 }
917 break;
918
919 case TGSI_OPCODE_LIT:
920 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
921 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
922 }
923
924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
925 FETCH( &r[0], 0, CHAN_X );
926 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
927 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
928 STORE( &r[0], 0, CHAN_Y );
929 }
930
931 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
932 FETCH( &r[1], 0, CHAN_Y );
933 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
934
935 FETCH( &r[2], 0, CHAN_W );
936 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
937 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
938 r[1].q = micro_pow(r[1].q, r[2].q);
939
940 /* r0 = (r0 > 0.0) ? r1 : 0.0
941 */
942 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
943 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
944 r[0].q);
945 STORE( &r[0], 0, CHAN_Z );
946 }
947 }
948
949 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
950 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
951 }
952 break;
953
954 case TGSI_OPCODE_RCP:
955 /* TGSI_OPCODE_RECIP */
956 FETCH( &r[0], 0, CHAN_X );
957 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
958 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
959 STORE( &r[0], 0, chan_index );
960 }
961 break;
962
963 case TGSI_OPCODE_RSQ:
964 /* TGSI_OPCODE_RECIPSQRT */
965 FETCH( &r[0], 0, CHAN_X );
966 r[0].q = micro_sqrt(r[0].q);
967 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
968 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
969 STORE( &r[0], 0, chan_index );
970 }
971 break;
972
973 case TGSI_OPCODE_EXP:
974 ASSERT (0);
975 break;
976
977 case TGSI_OPCODE_LOG:
978 ASSERT (0);
979 break;
980
981 case TGSI_OPCODE_MUL:
982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
983 {
984 FETCH(&r[0], 0, chan_index);
985 FETCH(&r[1], 1, chan_index);
986
987 r[0].q = si_fm(r[0].q, r[1].q);
988
989 STORE(&r[0], 0, chan_index);
990 }
991 break;
992
993 case TGSI_OPCODE_ADD:
994 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
995 FETCH( &r[0], 0, chan_index );
996 FETCH( &r[1], 1, chan_index );
997 r[0].q = si_fa(r[0].q, r[1].q);
998 STORE( &r[0], 0, chan_index );
999 }
1000 break;
1001
1002 case TGSI_OPCODE_DP3:
1003 /* TGSI_OPCODE_DOT3 */
1004 FETCH( &r[0], 0, CHAN_X );
1005 FETCH( &r[1], 1, CHAN_X );
1006 r[0].q = si_fm(r[0].q, r[1].q);
1007
1008 FETCH( &r[1], 0, CHAN_Y );
1009 FETCH( &r[2], 1, CHAN_Y );
1010 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1011
1012
1013 FETCH( &r[1], 0, CHAN_Z );
1014 FETCH( &r[2], 1, CHAN_Z );
1015 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1016
1017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1018 STORE( &r[0], 0, chan_index );
1019 }
1020 break;
1021
1022 case TGSI_OPCODE_DP4:
1023 /* TGSI_OPCODE_DOT4 */
1024 FETCH(&r[0], 0, CHAN_X);
1025 FETCH(&r[1], 1, CHAN_X);
1026
1027 r[0].q = si_fm(r[0].q, r[1].q);
1028
1029 FETCH(&r[1], 0, CHAN_Y);
1030 FETCH(&r[2], 1, CHAN_Y);
1031
1032 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1033
1034 FETCH(&r[1], 0, CHAN_Z);
1035 FETCH(&r[2], 1, CHAN_Z);
1036
1037 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1038
1039 FETCH(&r[1], 0, CHAN_W);
1040 FETCH(&r[2], 1, CHAN_W);
1041
1042 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1043
1044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1045 STORE( &r[0], 0, chan_index );
1046 }
1047 break;
1048
1049 case TGSI_OPCODE_DST:
1050 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1051 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1052 }
1053
1054 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1055 FETCH( &r[0], 0, CHAN_Y );
1056 FETCH( &r[1], 1, CHAN_Y);
1057 r[0].q = si_fm(r[0].q, r[1].q);
1058 STORE( &r[0], 0, CHAN_Y );
1059 }
1060
1061 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1062 FETCH( &r[0], 0, CHAN_Z );
1063 STORE( &r[0], 0, CHAN_Z );
1064 }
1065
1066 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1067 FETCH( &r[0], 1, CHAN_W );
1068 STORE( &r[0], 0, CHAN_W );
1069 }
1070 break;
1071
1072 case TGSI_OPCODE_MIN:
1073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1074 FETCH(&r[0], 0, chan_index);
1075 FETCH(&r[1], 1, chan_index);
1076
1077 r[0].q = micro_min(r[0].q, r[1].q);
1078
1079 STORE(&r[0], 0, chan_index);
1080 }
1081 break;
1082
1083 case TGSI_OPCODE_MAX:
1084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1085 FETCH(&r[0], 0, chan_index);
1086 FETCH(&r[1], 1, chan_index);
1087
1088 r[0].q = micro_max(r[0].q, r[1].q);
1089
1090 STORE(&r[0], 0, chan_index );
1091 }
1092 break;
1093
1094 case TGSI_OPCODE_SLT:
1095 /* TGSI_OPCODE_SETLT */
1096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1097 FETCH( &r[0], 0, chan_index );
1098 FETCH( &r[1], 1, chan_index );
1099
1100 r[0].q = micro_ge(r[0].q, r[1].q);
1101 r[0].q = si_xori(r[0].q, 0xff);
1102
1103 STORE( &r[0], 0, chan_index );
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_SGE:
1108 /* TGSI_OPCODE_SETGE */
1109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1110 FETCH( &r[0], 0, chan_index );
1111 FETCH( &r[1], 1, chan_index );
1112 r[0].q = micro_ge(r[0].q, r[1].q);
1113 STORE( &r[0], 0, chan_index );
1114 }
1115 break;
1116
1117 case TGSI_OPCODE_MAD:
1118 /* TGSI_OPCODE_MADD */
1119 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1120 FETCH( &r[0], 0, chan_index );
1121 FETCH( &r[1], 1, chan_index );
1122 FETCH( &r[2], 2, chan_index );
1123 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1124 STORE( &r[0], 0, chan_index );
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_SUB:
1129 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1130 FETCH(&r[0], 0, chan_index);
1131 FETCH(&r[1], 1, chan_index);
1132
1133 r[0].q = si_fs(r[0].q, r[1].q);
1134
1135 STORE(&r[0], 0, chan_index);
1136 }
1137 break;
1138
1139 case TGSI_OPCODE_LERP:
1140 /* TGSI_OPCODE_LRP */
1141 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1142 FETCH(&r[0], 0, chan_index);
1143 FETCH(&r[1], 1, chan_index);
1144 FETCH(&r[2], 2, chan_index);
1145
1146 r[1].q = si_fs(r[1].q, r[2].q);
1147 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1148
1149 STORE(&r[0], 0, chan_index);
1150 }
1151 break;
1152
1153 case TGSI_OPCODE_CND:
1154 ASSERT (0);
1155 break;
1156
1157 case TGSI_OPCODE_CND0:
1158 ASSERT (0);
1159 break;
1160
1161 case TGSI_OPCODE_DOT2ADD:
1162 /* TGSI_OPCODE_DP2A */
1163 ASSERT (0);
1164 break;
1165
1166 case TGSI_OPCODE_INDEX:
1167 ASSERT (0);
1168 break;
1169
1170 case TGSI_OPCODE_NEGATE:
1171 ASSERT (0);
1172 break;
1173
1174 case TGSI_OPCODE_FRAC:
1175 /* TGSI_OPCODE_FRC */
1176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1177 FETCH( &r[0], 0, chan_index );
1178 r[0].q = micro_frc(r[0].q);
1179 STORE( &r[0], 0, chan_index );
1180 }
1181 break;
1182
1183 case TGSI_OPCODE_CLAMP:
1184 ASSERT (0);
1185 break;
1186
1187 case TGSI_OPCODE_FLOOR:
1188 /* TGSI_OPCODE_FLR */
1189 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1190 FETCH( &r[0], 0, chan_index );
1191 r[0].q = micro_flr(r[0].q);
1192 STORE( &r[0], 0, chan_index );
1193 }
1194 break;
1195
1196 case TGSI_OPCODE_ROUND:
1197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1198 FETCH( &r[0], 0, chan_index );
1199 r[0].q = micro_rnd(r[0].q);
1200 STORE( &r[0], 0, chan_index );
1201 }
1202 break;
1203
1204 case TGSI_OPCODE_EXPBASE2:
1205 /* TGSI_OPCODE_EX2 */
1206 FETCH(&r[0], 0, CHAN_X);
1207
1208 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
1209
1210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1211 STORE( &r[0], 0, chan_index );
1212 }
1213 break;
1214
1215 case TGSI_OPCODE_LOGBASE2:
1216 /* TGSI_OPCODE_LG2 */
1217 FETCH( &r[0], 0, CHAN_X );
1218 r[0].q = micro_lg2(r[0].q);
1219 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1220 STORE( &r[0], 0, chan_index );
1221 }
1222 break;
1223
1224 case TGSI_OPCODE_POWER:
1225 /* TGSI_OPCODE_POW */
1226 FETCH(&r[0], 0, CHAN_X);
1227 FETCH(&r[1], 1, CHAN_X);
1228
1229 r[0].q = micro_pow(r[0].q, r[1].q);
1230
1231 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1232 STORE( &r[0], 0, chan_index );
1233 }
1234 break;
1235
1236 case TGSI_OPCODE_CROSSPRODUCT:
1237 /* TGSI_OPCODE_XPD */
1238 FETCH(&r[0], 0, CHAN_Y);
1239 FETCH(&r[1], 1, CHAN_Z);
1240 FETCH(&r[3], 0, CHAN_Z);
1241 FETCH(&r[4], 1, CHAN_Y);
1242
1243 /* r2 = (r0 * r1) - (r3 * r5)
1244 */
1245 r[2].q = si_fm(r[3].q, r[5].q);
1246 r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
1247
1248 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1249 STORE( &r[2], 0, CHAN_X );
1250 }
1251
1252 FETCH(&r[2], 1, CHAN_X);
1253 FETCH(&r[5], 0, CHAN_X);
1254
1255 /* r3 = (r3 * r2) - (r1 * r5)
1256 */
1257 r[1].q = si_fm(r[1].q, r[5].q);
1258 r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
1259
1260 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1261 STORE( &r[3], 0, CHAN_Y );
1262 }
1263
1264 /* r5 = (r5 * r4) - (r0 * r2)
1265 */
1266 r[0].q = si_fm(r[0].q, r[2].q);
1267 r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
1268
1269 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1270 STORE( &r[5], 0, CHAN_Z );
1271 }
1272
1273 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1274 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1275 }
1276 break;
1277
1278 case TGSI_OPCODE_MULTIPLYMATRIX:
1279 ASSERT (0);
1280 break;
1281
1282 case TGSI_OPCODE_ABS:
1283 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1284 FETCH(&r[0], 0, chan_index);
1285
1286 r[0].q = micro_abs(r[0].q);
1287
1288 STORE(&r[0], 0, chan_index);
1289 }
1290 break;
1291
1292 case TGSI_OPCODE_RCC:
1293 ASSERT (0);
1294 break;
1295
1296 case TGSI_OPCODE_DPH:
1297 FETCH(&r[0], 0, CHAN_X);
1298 FETCH(&r[1], 1, CHAN_X);
1299
1300 r[0].q = si_fm(r[0].q, r[1].q);
1301
1302 FETCH(&r[1], 0, CHAN_Y);
1303 FETCH(&r[2], 1, CHAN_Y);
1304
1305 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1306
1307 FETCH(&r[1], 0, CHAN_Z);
1308 FETCH(&r[2], 1, CHAN_Z);
1309
1310 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1311
1312 FETCH(&r[1], 1, CHAN_W);
1313
1314 r[0].q = si_fa(r[0].q, r[1].q);
1315
1316 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1317 STORE( &r[0], 0, chan_index );
1318 }
1319 break;
1320
1321 case TGSI_OPCODE_COS:
1322 FETCH(&r[0], 0, CHAN_X);
1323
1324 r[0].q = micro_cos(r[0].q);
1325
1326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1327 STORE( &r[0], 0, chan_index );
1328 }
1329 break;
1330
1331 case TGSI_OPCODE_DDX:
1332 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1333 FETCH( &r[0], 0, chan_index );
1334 r[0].q = micro_ddx(r[0].q);
1335 STORE( &r[0], 0, chan_index );
1336 }
1337 break;
1338
1339 case TGSI_OPCODE_DDY:
1340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1341 FETCH( &r[0], 0, chan_index );
1342 r[0].q = micro_ddy(r[0].q);
1343 STORE( &r[0], 0, chan_index );
1344 }
1345 break;
1346
1347 case TGSI_OPCODE_KILP:
1348 exec_kilp (mach, inst);
1349 break;
1350
1351 case TGSI_OPCODE_KIL:
1352 exec_kil (mach, inst);
1353 break;
1354
1355 case TGSI_OPCODE_PK2H:
1356 ASSERT (0);
1357 break;
1358
1359 case TGSI_OPCODE_PK2US:
1360 ASSERT (0);
1361 break;
1362
1363 case TGSI_OPCODE_PK4B:
1364 ASSERT (0);
1365 break;
1366
1367 case TGSI_OPCODE_PK4UB:
1368 ASSERT (0);
1369 break;
1370
1371 case TGSI_OPCODE_RFL:
1372 ASSERT (0);
1373 break;
1374
1375 case TGSI_OPCODE_SEQ:
1376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1377 FETCH( &r[0], 0, chan_index );
1378 FETCH( &r[1], 1, chan_index );
1379
1380 r[0].q = si_fceq(r[0].q, r[1].q);
1381
1382 STORE( &r[0], 0, chan_index );
1383 }
1384 break;
1385
1386 case TGSI_OPCODE_SFL:
1387 ASSERT (0);
1388 break;
1389
1390 case TGSI_OPCODE_SGT:
1391 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1392 FETCH( &r[0], 0, chan_index );
1393 FETCH( &r[1], 1, chan_index );
1394 r[0].q = si_fcgt(r[0].q, r[1].q);
1395 STORE( &r[0], 0, chan_index );
1396 }
1397 break;
1398
1399 case TGSI_OPCODE_SIN:
1400 FETCH( &r[0], 0, CHAN_X );
1401 r[0].q = micro_sin(r[0].q);
1402 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1403 STORE( &r[0], 0, chan_index );
1404 }
1405 break;
1406
1407 case TGSI_OPCODE_SLE:
1408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1409 FETCH( &r[0], 0, chan_index );
1410 FETCH( &r[1], 1, chan_index );
1411
1412 r[0].q = si_fcgt(r[0].q, r[1].q);
1413 r[0].q = si_xori(r[0].q, 0xff);
1414
1415 STORE( &r[0], 0, chan_index );
1416 }
1417 break;
1418
1419 case TGSI_OPCODE_SNE:
1420 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1421 FETCH( &r[0], 0, chan_index );
1422 FETCH( &r[1], 1, chan_index );
1423
1424 r[0].q = si_fceq(r[0].q, r[1].q);
1425 r[0].q = si_xori(r[0].q, 0xff);
1426
1427 STORE( &r[0], 0, chan_index );
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_STR:
1432 ASSERT (0);
1433 break;
1434
1435 case TGSI_OPCODE_TEX:
1436 /* simple texture lookup */
1437 /* src[0] = texcoord */
1438 /* src[1] = sampler unit */
1439 exec_tex(mach, inst, FALSE, FALSE);
1440 break;
1441
1442 case TGSI_OPCODE_TXB:
1443 /* Texture lookup with lod bias */
1444 /* src[0] = texcoord (src[0].w = load bias) */
1445 /* src[1] = sampler unit */
1446 exec_tex(mach, inst, TRUE, FALSE);
1447 break;
1448
1449 case TGSI_OPCODE_TXD:
1450 /* Texture lookup with explict partial derivatives */
1451 /* src[0] = texcoord */
1452 /* src[1] = d[strq]/dx */
1453 /* src[2] = d[strq]/dy */
1454 /* src[3] = sampler unit */
1455 ASSERT (0);
1456 break;
1457
1458 case TGSI_OPCODE_TXL:
1459 /* Texture lookup with explit LOD */
1460 /* src[0] = texcoord (src[0].w = load bias) */
1461 /* src[1] = sampler unit */
1462 exec_tex(mach, inst, TRUE, FALSE);
1463 break;
1464
1465 case TGSI_OPCODE_TXP:
1466 /* Texture lookup with projection */
1467 /* src[0] = texcoord (src[0].w = projection) */
1468 /* src[1] = sampler unit */
1469 exec_tex(mach, inst, TRUE, TRUE);
1470 break;
1471
1472 case TGSI_OPCODE_UP2H:
1473 ASSERT (0);
1474 break;
1475
1476 case TGSI_OPCODE_UP2US:
1477 ASSERT (0);
1478 break;
1479
1480 case TGSI_OPCODE_UP4B:
1481 ASSERT (0);
1482 break;
1483
1484 case TGSI_OPCODE_UP4UB:
1485 ASSERT (0);
1486 break;
1487
1488 case TGSI_OPCODE_X2D:
1489 ASSERT (0);
1490 break;
1491
1492 case TGSI_OPCODE_ARA:
1493 ASSERT (0);
1494 break;
1495
1496 case TGSI_OPCODE_ARR:
1497 ASSERT (0);
1498 break;
1499
1500 case TGSI_OPCODE_BRA:
1501 ASSERT (0);
1502 break;
1503
1504 case TGSI_OPCODE_CAL:
1505 /* skip the call if no execution channels are enabled */
1506 if (mach->ExecMask) {
1507 /* do the call */
1508
1509 /* push the Cond, Loop, Cont stacks */
1510 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1511 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1512 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1513 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1514 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1515 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1516
1517 ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
1518 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
1519
1520 /* note that PC was already incremented above */
1521 mach->CallStack[mach->CallStackTop++] = *pc;
1522 *pc = inst->InstructionExtLabel.Label;
1523 }
1524 break;
1525
1526 case TGSI_OPCODE_RET:
1527 mach->FuncMask &= ~mach->ExecMask;
1528 UPDATE_EXEC_MASK(mach);
1529
1530 if (mach->ExecMask == 0x0) {
1531 /* really return now (otherwise, keep executing */
1532
1533 if (mach->CallStackTop == 0) {
1534 /* returning from main() */
1535 *pc = -1;
1536 return;
1537 }
1538 *pc = mach->CallStack[--mach->CallStackTop];
1539
1540 /* pop the Cond, Loop, Cont stacks */
1541 ASSERT(mach->CondStackTop > 0);
1542 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1543 ASSERT(mach->LoopStackTop > 0);
1544 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1545 ASSERT(mach->ContStackTop > 0);
1546 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1547 ASSERT(mach->FuncStackTop > 0);
1548 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
1549
1550 UPDATE_EXEC_MASK(mach);
1551 }
1552 break;
1553
1554 case TGSI_OPCODE_SSG:
1555 ASSERT (0);
1556 break;
1557
1558 case TGSI_OPCODE_CMP:
1559 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1560 FETCH(&r[0], 0, chan_index);
1561 FETCH(&r[1], 1, chan_index);
1562 FETCH(&r[2], 2, chan_index);
1563
1564 /* r0 = (r0 < 0.0) ? r1 : r2
1565 */
1566 r[3].q = si_xor(r[3].q, r[3].q);
1567 r[0].q = micro_lt(r[0].q, r[3].q);
1568 r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
1569
1570 STORE(&r[0], 0, chan_index);
1571 }
1572 break;
1573
1574 case TGSI_OPCODE_SCS:
1575 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1576 FETCH( &r[0], 0, CHAN_X );
1577 }
1578 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1579 r[1].q = micro_cos(r[0].q);
1580 STORE( &r[1], 0, CHAN_X );
1581 }
1582 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1583 r[1].q = micro_sin(r[0].q);
1584 STORE( &r[1], 0, CHAN_Y );
1585 }
1586 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1587 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
1588 }
1589 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1590 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1591 }
1592 break;
1593
1594 case TGSI_OPCODE_NRM:
1595 ASSERT (0);
1596 break;
1597
1598 case TGSI_OPCODE_DIV:
1599 ASSERT( 0 );
1600 break;
1601
1602 case TGSI_OPCODE_DP2:
1603 FETCH( &r[0], 0, CHAN_X );
1604 FETCH( &r[1], 1, CHAN_X );
1605 r[0].q = si_fm(r[0].q, r[1].q);
1606
1607 FETCH( &r[1], 0, CHAN_Y );
1608 FETCH( &r[2], 1, CHAN_Y );
1609 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1610
1611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1612 STORE( &r[0], 0, chan_index );
1613 }
1614 break;
1615
1616 case TGSI_OPCODE_IF:
1617 /* push CondMask */
1618 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1619 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1620 FETCH( &r[0], 0, CHAN_X );
1621 /* update CondMask */
1622 if( ! r[0].u[0] ) {
1623 mach->CondMask &= ~0x1;
1624 }
1625 if( ! r[0].u[1] ) {
1626 mach->CondMask &= ~0x2;
1627 }
1628 if( ! r[0].u[2] ) {
1629 mach->CondMask &= ~0x4;
1630 }
1631 if( ! r[0].u[3] ) {
1632 mach->CondMask &= ~0x8;
1633 }
1634 UPDATE_EXEC_MASK(mach);
1635 /* Todo: If CondMask==0, jump to ELSE */
1636 break;
1637
1638 case TGSI_OPCODE_ELSE:
1639 /* invert CondMask wrt previous mask */
1640 {
1641 uint prevMask;
1642 ASSERT(mach->CondStackTop > 0);
1643 prevMask = mach->CondStack[mach->CondStackTop - 1];
1644 mach->CondMask = ~mach->CondMask & prevMask;
1645 UPDATE_EXEC_MASK(mach);
1646 /* Todo: If CondMask==0, jump to ENDIF */
1647 }
1648 break;
1649
1650 case TGSI_OPCODE_ENDIF:
1651 /* pop CondMask */
1652 ASSERT(mach->CondStackTop > 0);
1653 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1654 UPDATE_EXEC_MASK(mach);
1655 break;
1656
1657 case TGSI_OPCODE_END:
1658 /* halt execution */
1659 *pc = -1;
1660 break;
1661
1662 case TGSI_OPCODE_REP:
1663 ASSERT (0);
1664 break;
1665
1666 case TGSI_OPCODE_ENDREP:
1667 ASSERT (0);
1668 break;
1669
1670 case TGSI_OPCODE_PUSHA:
1671 ASSERT (0);
1672 break;
1673
1674 case TGSI_OPCODE_POPA:
1675 ASSERT (0);
1676 break;
1677
1678 case TGSI_OPCODE_CEIL:
1679 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1680 FETCH( &r[0], 0, chan_index );
1681 r[0].q = micro_ceil(r[0].q);
1682 STORE( &r[0], 0, chan_index );
1683 }
1684 break;
1685
1686 case TGSI_OPCODE_I2F:
1687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1688 FETCH( &r[0], 0, chan_index );
1689 r[0].q = si_csflt(r[0].q, 0);
1690 STORE( &r[0], 0, chan_index );
1691 }
1692 break;
1693
1694 case TGSI_OPCODE_NOT:
1695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1696 FETCH( &r[0], 0, chan_index );
1697 r[0].q = si_xorbi(r[0].q, 0xff);
1698 STORE( &r[0], 0, chan_index );
1699 }
1700 break;
1701
1702 case TGSI_OPCODE_TRUNC:
1703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1704 FETCH( &r[0], 0, chan_index );
1705 r[0].q = micro_trunc(r[0].q);
1706 STORE( &r[0], 0, chan_index );
1707 }
1708 break;
1709
1710 case TGSI_OPCODE_SHL:
1711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1712 FETCH( &r[0], 0, chan_index );
1713 FETCH( &r[1], 1, chan_index );
1714
1715 r[0].q = si_shl(r[0].q, r[1].q);
1716
1717 STORE( &r[0], 0, chan_index );
1718 }
1719 break;
1720
1721 case TGSI_OPCODE_SHR:
1722 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1723 FETCH( &r[0], 0, chan_index );
1724 FETCH( &r[1], 1, chan_index );
1725 r[0].q = micro_ishr(r[0].q, r[1].q);
1726 STORE( &r[0], 0, chan_index );
1727 }
1728 break;
1729
1730 case TGSI_OPCODE_AND:
1731 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1732 FETCH( &r[0], 0, chan_index );
1733 FETCH( &r[1], 1, chan_index );
1734 r[0].q = si_and(r[0].q, r[1].q);
1735 STORE( &r[0], 0, chan_index );
1736 }
1737 break;
1738
1739 case TGSI_OPCODE_OR:
1740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1741 FETCH( &r[0], 0, chan_index );
1742 FETCH( &r[1], 1, chan_index );
1743 r[0].q = si_or(r[0].q, r[1].q);
1744 STORE( &r[0], 0, chan_index );
1745 }
1746 break;
1747
1748 case TGSI_OPCODE_MOD:
1749 ASSERT (0);
1750 break;
1751
1752 case TGSI_OPCODE_XOR:
1753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1754 FETCH( &r[0], 0, chan_index );
1755 FETCH( &r[1], 1, chan_index );
1756 r[0].q = si_xor(r[0].q, r[1].q);
1757 STORE( &r[0], 0, chan_index );
1758 }
1759 break;
1760
1761 case TGSI_OPCODE_SAD:
1762 ASSERT (0);
1763 break;
1764
1765 case TGSI_OPCODE_TXF:
1766 ASSERT (0);
1767 break;
1768
1769 case TGSI_OPCODE_TXQ:
1770 ASSERT (0);
1771 break;
1772
1773 case TGSI_OPCODE_EMIT:
1774 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
1775 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1776 break;
1777
1778 case TGSI_OPCODE_ENDPRIM:
1779 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
1780 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
1781 break;
1782
1783 case TGSI_OPCODE_LOOP:
1784 /* fall-through (for now) */
1785 case TGSI_OPCODE_BGNLOOP2:
1786 /* push LoopMask and ContMasks */
1787 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1788 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1789 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1790 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1791 break;
1792
1793 case TGSI_OPCODE_ENDLOOP:
1794 /* fall-through (for now at least) */
1795 case TGSI_OPCODE_ENDLOOP2:
1796 /* Restore ContMask, but don't pop */
1797 ASSERT(mach->ContStackTop > 0);
1798 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
1799 if (mach->LoopMask) {
1800 /* repeat loop: jump to instruction just past BGNLOOP */
1801 *pc = inst->InstructionExtLabel.Label + 1;
1802 }
1803 else {
1804 /* exit loop: pop LoopMask */
1805 ASSERT(mach->LoopStackTop > 0);
1806 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1807 /* pop ContMask */
1808 ASSERT(mach->ContStackTop > 0);
1809 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1810 }
1811 UPDATE_EXEC_MASK(mach);
1812 break;
1813
1814 case TGSI_OPCODE_BRK:
1815 /* turn off loop channels for each enabled exec channel */
1816 mach->LoopMask &= ~mach->ExecMask;
1817 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1818 UPDATE_EXEC_MASK(mach);
1819 break;
1820
1821 case TGSI_OPCODE_CONT:
1822 /* turn off cont channels for each enabled exec channel */
1823 mach->ContMask &= ~mach->ExecMask;
1824 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1825 UPDATE_EXEC_MASK(mach);
1826 break;
1827
1828 case TGSI_OPCODE_BGNSUB:
1829 /* no-op */
1830 break;
1831
1832 case TGSI_OPCODE_ENDSUB:
1833 /* no-op */
1834 break;
1835
1836 case TGSI_OPCODE_NOISE1:
1837 ASSERT( 0 );
1838 break;
1839
1840 case TGSI_OPCODE_NOISE2:
1841 ASSERT( 0 );
1842 break;
1843
1844 case TGSI_OPCODE_NOISE3:
1845 ASSERT( 0 );
1846 break;
1847
1848 case TGSI_OPCODE_NOISE4:
1849 ASSERT( 0 );
1850 break;
1851
1852 case TGSI_OPCODE_NOP:
1853 break;
1854
1855 default:
1856 ASSERT( 0 );
1857 }
1858 }
1859
1860
1861 /**
1862 * Run TGSI interpreter.
1863 * \return bitmask of "alive" quad components
1864 */
1865 uint
1866 spu_exec_machine_run( struct spu_exec_machine *mach )
1867 {
1868 uint i;
1869 int pc = 0;
1870
1871 mach->CondMask = 0xf;
1872 mach->LoopMask = 0xf;
1873 mach->ContMask = 0xf;
1874 mach->FuncMask = 0xf;
1875 mach->ExecMask = 0xf;
1876
1877 mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
1878 ASSERT(mach->CondStackTop == 0);
1879 ASSERT(mach->LoopStackTop == 0);
1880 ASSERT(mach->ContStackTop == 0);
1881 ASSERT(mach->CallStackTop == 0);
1882
1883 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
1884 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
1885
1886 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
1887 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
1888 mach->Primitives[0] = 0;
1889 }
1890
1891
1892 /* execute declarations (interpolants) */
1893 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1894 for (i = 0; i < mach->NumDeclarations; i++) {
1895 union {
1896 struct tgsi_full_declaration decl;
1897 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
1898 } d ALIGN16_ATTRIB;
1899 unsigned ea = (unsigned) (mach->Declarations + pc);
1900
1901 spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
1902
1903 exec_declaration( mach, &d.decl );
1904 }
1905 }
1906
1907 /* execute instructions, until pc is set to -1 */
1908 while (pc != -1) {
1909 union {
1910 struct tgsi_full_instruction inst;
1911 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
1912 } i ALIGN16_ATTRIB;
1913 unsigned ea = (unsigned) (mach->Instructions + pc);
1914
1915 spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
1916 exec_instruction( mach, & i.inst, &pc );
1917 }
1918
1919 #if 0
1920 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
1921 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1922 /*
1923 * Scale back depth component.
1924 */
1925 for (i = 0; i < 4; i++)
1926 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
1927 }
1928 #endif
1929
1930 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
1931 }
1932
1933