cell: fix unclosed comment
[mesa.git] / src / gallium / drivers / cell / spu / spu_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include <transpose_matrix4x4.h>
54 #include <simdmath/ceilf4.h>
55 #include <simdmath/cosf4.h>
56 #include <simdmath/divf4.h>
57 #include <simdmath/floorf4.h>
58 #include <simdmath/log2f4.h>
59 #include <simdmath/powf4.h>
60 #include <simdmath/sinf4.h>
61 #include <simdmath/sqrtf4.h>
62 #include <simdmath/truncf4.h>
63
64 #include "pipe/p_compiler.h"
65 #include "pipe/p_state.h"
66 #include "pipe/p_util.h"
67 #include "pipe/p_shader_tokens.h"
68 #include "tgsi/util/tgsi_parse.h"
69 #include "tgsi/util/tgsi_util.h"
70 #include "spu_exec.h"
71 #include "spu_main.h"
72 #include "spu_vertex_shader.h"
73 #include "spu_dcache.h"
74 #include "cell/common.h"
75
76 #define TILE_TOP_LEFT 0
77 #define TILE_TOP_RIGHT 1
78 #define TILE_BOTTOM_LEFT 2
79 #define TILE_BOTTOM_RIGHT 3
80
81 /*
82 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
83 */
84 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
85 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
86 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
87 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
88 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
89 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
90 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
91 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
92 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
93 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
94 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
95 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
96 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
97 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
98 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
99 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
100 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
101 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
102 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
103 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
104 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
105 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107
108 #define FOR_EACH_CHANNEL(CHAN)\
109 for (CHAN = 0; CHAN < 4; CHAN++)
110
111 #define IS_CHANNEL_ENABLED(INST, CHAN)\
112 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
113
114 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
115 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
116
117 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
118 FOR_EACH_CHANNEL( CHAN )\
119 if (IS_CHANNEL_ENABLED( INST, CHAN ))
120
121 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
122 FOR_EACH_CHANNEL( CHAN )\
123 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124
125
126 /** The execution mask depends on the conditional mask and the loop mask */
127 #define UPDATE_EXEC_MASK(MACH) \
128 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129
130
131 #define CHAN_X 0
132 #define CHAN_Y 1
133 #define CHAN_Z 2
134 #define CHAN_W 3
135
136
137
138 /**
139 * Initialize machine state by expanding tokens to full instructions,
140 * allocating temporary storage, setting up constants, etc.
141 * After this, we can call spu_exec_machine_run() many times.
142 */
143 void
144 spu_exec_machine_init(struct spu_exec_machine *mach,
145 uint numSamplers,
146 struct spu_sampler *samplers,
147 unsigned processor)
148 {
149 const qword zero = si_il(0);
150 const qword not_zero = si_il(~0);
151
152 (void) numSamplers;
153 mach->Samplers = samplers;
154 mach->Processor = processor;
155 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
156
157 /* Setup constants. */
158 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
159 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
160 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
161 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
162
163 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
164 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
165 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
166 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
167 }
168
169
170 static INLINE qword
171 micro_abs(qword src)
172 {
173 return si_rotmi(si_shli(src, 1), -1);
174 }
175
176 static INLINE qword
177 micro_ceil(qword src)
178 {
179 return (qword) _ceilf4((vec_float4) src);
180 }
181
182 static INLINE qword
183 micro_cos(qword src)
184 {
185 return (qword) _cosf4((vec_float4) src);
186 }
187
188 static const qword br_shuf = {
189 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
190 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
191 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
192 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
193 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
194 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
195 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
196 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
197 };
198
199 static const qword bl_shuf = {
200 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
201 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
202 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
203 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
204 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
205 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
206 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
207 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
208 };
209
210 static const qword tl_shuf = {
211 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
212 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
213 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
214 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
215 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
216 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
217 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
218 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
219 };
220
221 static qword
222 micro_ddx(qword src)
223 {
224 qword bottom_right = si_shufb(src, src, br_shuf);
225 qword bottom_left = si_shufb(src, src, bl_shuf);
226
227 return si_fs(bottom_right, bottom_left);
228 }
229
230 static qword
231 micro_ddy(qword src)
232 {
233 qword top_left = si_shufb(src, src, tl_shuf);
234 qword bottom_left = si_shufb(src, src, bl_shuf);
235
236 return si_fs(top_left, bottom_left);
237 }
238
239 static INLINE qword
240 micro_div(qword src0, qword src1)
241 {
242 return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
243 }
244
245 static qword
246 micro_flr(qword src)
247 {
248 return (qword) _floorf4((vec_float4) src);
249 }
250
251 static qword
252 micro_frc(qword src)
253 {
254 return si_fs(src, (qword) _floorf4((vec_float4) src));
255 }
256
257 static INLINE qword
258 micro_ge(qword src0, qword src1)
259 {
260 return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
261 }
262
263 static qword
264 micro_lg2(qword src)
265 {
266 return (qword) _log2f4((vec_float4) src);
267 }
268
269 static INLINE qword
270 micro_lt(qword src0, qword src1)
271 {
272 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
273
274 return si_xori(tmp, 0xff);
275 }
276
277 static INLINE qword
278 micro_max(qword src0, qword src1)
279 {
280 return si_selb(src1, src0, si_fcgt(src0, src1));
281 }
282
283 static INLINE qword
284 micro_min(qword src0, qword src1)
285 {
286 return si_selb(src0, src1, si_fcgt(src0, src1));
287 }
288
289 static qword
290 micro_neg(qword src)
291 {
292 return si_xor(src, (qword) spu_splats(0x80000000));
293 }
294
295 static qword
296 micro_set_sign(qword src)
297 {
298 return si_or(src, (qword) spu_splats(0x80000000));
299 }
300
301 static qword
302 micro_pow(qword src0, qword src1)
303 {
304 return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
305 }
306
307 static qword
308 micro_rnd(qword src)
309 {
310 const qword half = (qword) spu_splats(0.5f);
311
312 /* May be able to use _roundf4. There may be some difference, though.
313 */
314 return (qword) _floorf4((vec_float4) si_fa(src, half));
315 }
316
317 static INLINE qword
318 micro_ishr(qword src0, qword src1)
319 {
320 return si_rotma(src0, si_sfi(src1, 0));
321 }
322
323 static qword
324 micro_trunc(qword src)
325 {
326 return (qword) _truncf4((vec_float4) src);
327 }
328
329 static qword
330 micro_sin(qword src)
331 {
332 return (qword) _sinf4((vec_float4) src);
333 }
334
335 static INLINE qword
336 micro_sqrt(qword src)
337 {
338 return (qword) _sqrtf4((vec_float4) src);
339 }
340
341 static void
342 fetch_src_file_channel(
343 const struct spu_exec_machine *mach,
344 const uint file,
345 const uint swizzle,
346 const union spu_exec_channel *index,
347 union spu_exec_channel *chan )
348 {
349 switch( swizzle ) {
350 case TGSI_EXTSWIZZLE_X:
351 case TGSI_EXTSWIZZLE_Y:
352 case TGSI_EXTSWIZZLE_Z:
353 case TGSI_EXTSWIZZLE_W:
354 switch( file ) {
355 case TGSI_FILE_CONSTANT: {
356 unsigned i;
357
358 for (i = 0; i < 4; i++) {
359 const float *ptr = mach->Consts[index->i[i]];
360 float tmp[4];
361
362 spu_dcache_fetch_unaligned((qword *) tmp,
363 (uintptr_t)(ptr + swizzle),
364 sizeof(float));
365
366 chan->f[i] = tmp[0];
367 }
368 break;
369 }
370
371 case TGSI_FILE_INPUT:
372 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
373 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
374 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
375 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
376 break;
377
378 case TGSI_FILE_TEMPORARY:
379 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
380 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
381 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
382 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
383 break;
384
385 case TGSI_FILE_IMMEDIATE:
386 assert( index->i[0] < (int) mach->ImmLimit );
387 assert( index->i[1] < (int) mach->ImmLimit );
388 assert( index->i[2] < (int) mach->ImmLimit );
389 assert( index->i[3] < (int) mach->ImmLimit );
390
391 chan->f[0] = mach->Imms[index->i[0]][swizzle];
392 chan->f[1] = mach->Imms[index->i[1]][swizzle];
393 chan->f[2] = mach->Imms[index->i[2]][swizzle];
394 chan->f[3] = mach->Imms[index->i[3]][swizzle];
395 break;
396
397 case TGSI_FILE_ADDRESS:
398 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
399 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
400 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
401 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
402 break;
403
404 case TGSI_FILE_OUTPUT:
405 /* vertex/fragment output vars can be read too */
406 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
407 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
408 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
409 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
410 break;
411
412 default:
413 assert( 0 );
414 }
415 break;
416
417 case TGSI_EXTSWIZZLE_ZERO:
418 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
419 break;
420
421 case TGSI_EXTSWIZZLE_ONE:
422 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
423 break;
424
425 default:
426 assert( 0 );
427 }
428 }
429
430 static void
431 fetch_source(
432 const struct spu_exec_machine *mach,
433 union spu_exec_channel *chan,
434 const struct tgsi_full_src_register *reg,
435 const uint chan_index )
436 {
437 union spu_exec_channel index;
438 uint swizzle;
439
440 index.i[0] =
441 index.i[1] =
442 index.i[2] =
443 index.i[3] = reg->SrcRegister.Index;
444
445 if (reg->SrcRegister.Indirect) {
446 union spu_exec_channel index2;
447 union spu_exec_channel indir_index;
448
449 index2.i[0] =
450 index2.i[1] =
451 index2.i[2] =
452 index2.i[3] = reg->SrcRegisterInd.Index;
453
454 swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
455 CHAN_X);
456 fetch_src_file_channel(
457 mach,
458 reg->SrcRegisterInd.File,
459 swizzle,
460 &index2,
461 &indir_index );
462
463 index.q = si_a(index.q, indir_index.q);
464 }
465
466 if( reg->SrcRegister.Dimension ) {
467 switch( reg->SrcRegister.File ) {
468 case TGSI_FILE_INPUT:
469 index.q = si_mpyi(index.q, 17);
470 break;
471 case TGSI_FILE_CONSTANT:
472 index.q = si_shli(index.q, 12);
473 break;
474 default:
475 assert( 0 );
476 }
477
478 index.i[0] += reg->SrcRegisterDim.Index;
479 index.i[1] += reg->SrcRegisterDim.Index;
480 index.i[2] += reg->SrcRegisterDim.Index;
481 index.i[3] += reg->SrcRegisterDim.Index;
482
483 if (reg->SrcRegisterDim.Indirect) {
484 union spu_exec_channel index2;
485 union spu_exec_channel indir_index;
486
487 index2.i[0] =
488 index2.i[1] =
489 index2.i[2] =
490 index2.i[3] = reg->SrcRegisterDimInd.Index;
491
492 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
493 fetch_src_file_channel(
494 mach,
495 reg->SrcRegisterDimInd.File,
496 swizzle,
497 &index2,
498 &indir_index );
499
500 index.q = si_a(index.q, indir_index.q);
501 }
502 }
503
504 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
505 fetch_src_file_channel(
506 mach,
507 reg->SrcRegister.File,
508 swizzle,
509 &index,
510 chan );
511
512 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
513 case TGSI_UTIL_SIGN_CLEAR:
514 chan->q = micro_abs(chan->q);
515 break;
516
517 case TGSI_UTIL_SIGN_SET:
518 chan->q = micro_set_sign(chan->q);
519 break;
520
521 case TGSI_UTIL_SIGN_TOGGLE:
522 chan->q = micro_neg(chan->q);
523 break;
524
525 case TGSI_UTIL_SIGN_KEEP:
526 break;
527 }
528
529 if (reg->SrcRegisterExtMod.Complement) {
530 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
531 }
532 }
533
534 static void
535 store_dest(
536 struct spu_exec_machine *mach,
537 const union spu_exec_channel *chan,
538 const struct tgsi_full_dst_register *reg,
539 const struct tgsi_full_instruction *inst,
540 uint chan_index )
541 {
542 union spu_exec_channel *dst;
543
544 switch( reg->DstRegister.File ) {
545 case TGSI_FILE_NULL:
546 return;
547
548 case TGSI_FILE_OUTPUT:
549 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
550 + reg->DstRegister.Index].xyzw[chan_index];
551 break;
552
553 case TGSI_FILE_TEMPORARY:
554 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
555 break;
556
557 case TGSI_FILE_ADDRESS:
558 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
559 break;
560
561 default:
562 assert( 0 );
563 return;
564 }
565
566 switch (inst->Instruction.Saturate)
567 {
568 case TGSI_SAT_NONE:
569 if (mach->ExecMask & 0x1)
570 dst->i[0] = chan->i[0];
571 if (mach->ExecMask & 0x2)
572 dst->i[1] = chan->i[1];
573 if (mach->ExecMask & 0x4)
574 dst->i[2] = chan->i[2];
575 if (mach->ExecMask & 0x8)
576 dst->i[3] = chan->i[3];
577 break;
578
579 case TGSI_SAT_ZERO_ONE:
580 /* XXX need to obey ExecMask here */
581 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
582 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
583 break;
584
585 case TGSI_SAT_MINUS_PLUS_ONE:
586 assert( 0 );
587 break;
588
589 default:
590 assert( 0 );
591 }
592 }
593
594 #define FETCH(VAL,INDEX,CHAN)\
595 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
596
597 #define STORE(VAL,INDEX,CHAN)\
598 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
599
600
601 /**
602 * Execute ARB-style KIL which is predicated by a src register.
603 * Kill fragment if any of the four values is less than zero.
604 */
605 static void
606 exec_kilp(struct spu_exec_machine *mach,
607 const struct tgsi_full_instruction *inst)
608 {
609 uint uniquemask;
610 uint chan_index;
611 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
612 union spu_exec_channel r[1];
613
614 /* This mask stores component bits that were already tested. Note that
615 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
616 * tested. */
617 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
618
619 for (chan_index = 0; chan_index < 4; chan_index++)
620 {
621 uint swizzle;
622 uint i;
623
624 /* unswizzle channel */
625 swizzle = tgsi_util_get_full_src_register_extswizzle (
626 &inst->FullSrcRegisters[0],
627 chan_index);
628
629 /* check if the component has not been already tested */
630 if (uniquemask & (1 << swizzle))
631 continue;
632 uniquemask |= 1 << swizzle;
633
634 FETCH(&r[0], 0, chan_index);
635 for (i = 0; i < 4; i++)
636 if (r[0].f[i] < 0.0f)
637 kilmask |= 1 << i;
638 }
639
640 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
641 }
642
643
644 /*
645 * Fetch a texel using STR texture coordinates.
646 */
647 static void
648 fetch_texel( struct spu_sampler *sampler,
649 const union spu_exec_channel *s,
650 const union spu_exec_channel *t,
651 const union spu_exec_channel *p,
652 float lodbias, /* XXX should be float[4] */
653 union spu_exec_channel *r,
654 union spu_exec_channel *g,
655 union spu_exec_channel *b,
656 union spu_exec_channel *a )
657 {
658 qword rgba[4];
659 qword out[4];
660
661 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
662 (float (*)[4]) rgba);
663
664 _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
665 r->q = out[0];
666 g->q = out[1];
667 b->q = out[2];
668 a->q = out[3];
669 }
670
671
672 static void
673 exec_tex(struct spu_exec_machine *mach,
674 const struct tgsi_full_instruction *inst,
675 boolean biasLod, boolean projected)
676 {
677 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
678 union spu_exec_channel r[8];
679 uint chan_index;
680 float lodBias;
681
682 /* printf("Sampler %u unit %u\n", sampler, unit); */
683
684 switch (inst->InstructionExtTexture.Texture) {
685 case TGSI_TEXTURE_1D:
686
687 FETCH(&r[0], 0, CHAN_X);
688
689 if (projected) {
690 FETCH(&r[1], 0, CHAN_W);
691 r[0].q = micro_div(r[0].q, r[1].q);
692 }
693
694 if (biasLod) {
695 FETCH(&r[1], 0, CHAN_W);
696 lodBias = r[2].f[0];
697 }
698 else
699 lodBias = 0.0;
700
701 fetch_texel(&mach->Samplers[unit],
702 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
703 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
704 break;
705
706 case TGSI_TEXTURE_2D:
707 case TGSI_TEXTURE_RECT:
708
709 FETCH(&r[0], 0, CHAN_X);
710 FETCH(&r[1], 0, CHAN_Y);
711 FETCH(&r[2], 0, CHAN_Z);
712
713 if (projected) {
714 FETCH(&r[3], 0, CHAN_W);
715 r[0].q = micro_div(r[0].q, r[3].q);
716 r[1].q = micro_div(r[1].q, r[3].q);
717 r[2].q = micro_div(r[2].q, r[3].q);
718 }
719
720 if (biasLod) {
721 FETCH(&r[3], 0, CHAN_W);
722 lodBias = r[3].f[0];
723 }
724 else
725 lodBias = 0.0;
726
727 fetch_texel(&mach->Samplers[unit],
728 &r[0], &r[1], &r[2], lodBias, /* inputs */
729 &r[0], &r[1], &r[2], &r[3]); /* outputs */
730 break;
731
732 case TGSI_TEXTURE_3D:
733 case TGSI_TEXTURE_CUBE:
734
735 FETCH(&r[0], 0, CHAN_X);
736 FETCH(&r[1], 0, CHAN_Y);
737 FETCH(&r[2], 0, CHAN_Z);
738
739 if (projected) {
740 FETCH(&r[3], 0, CHAN_W);
741 r[0].q = micro_div(r[0].q, r[3].q);
742 r[1].q = micro_div(r[1].q, r[3].q);
743 r[2].q = micro_div(r[2].q, r[3].q);
744 }
745
746 if (biasLod) {
747 FETCH(&r[3], 0, CHAN_W);
748 lodBias = r[3].f[0];
749 }
750 else
751 lodBias = 0.0;
752
753 fetch_texel(&mach->Samplers[unit],
754 &r[0], &r[1], &r[2], lodBias,
755 &r[0], &r[1], &r[2], &r[3]);
756 break;
757
758 default:
759 assert (0);
760 }
761
762 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
763 STORE( &r[chan_index], 0, chan_index );
764 }
765 }
766
767
768
769 static void
770 constant_interpolation(
771 struct spu_exec_machine *mach,
772 unsigned attrib,
773 unsigned chan )
774 {
775 unsigned i;
776
777 for( i = 0; i < QUAD_SIZE; i++ ) {
778 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
779 }
780 }
781
782 static void
783 linear_interpolation(
784 struct spu_exec_machine *mach,
785 unsigned attrib,
786 unsigned chan )
787 {
788 const float x = mach->QuadPos.xyzw[0].f[0];
789 const float y = mach->QuadPos.xyzw[1].f[0];
790 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
791 const float dady = mach->InterpCoefs[attrib].dady[chan];
792 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
793 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
794 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
795 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
796 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
797 }
798
799 static void
800 perspective_interpolation(
801 struct spu_exec_machine *mach,
802 unsigned attrib,
803 unsigned chan )
804 {
805 const float x = mach->QuadPos.xyzw[0].f[0];
806 const float y = mach->QuadPos.xyzw[1].f[0];
807 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
808 const float dady = mach->InterpCoefs[attrib].dady[chan];
809 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
810 const float *w = mach->QuadPos.xyzw[3].f;
811 /* divide by W here */
812 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
813 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
814 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
815 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
816 }
817
818
819 typedef void (* interpolation_func)(
820 struct spu_exec_machine *mach,
821 unsigned attrib,
822 unsigned chan );
823
824 static void
825 exec_declaration(struct spu_exec_machine *mach,
826 const struct tgsi_full_declaration *decl)
827 {
828 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
829 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
830 unsigned first, last, mask;
831 interpolation_func interp;
832
833 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
834
835 first = decl->u.DeclarationRange.First;
836 last = decl->u.DeclarationRange.Last;
837 mask = decl->Declaration.UsageMask;
838
839 switch( decl->Interpolation.Interpolate ) {
840 case TGSI_INTERPOLATE_CONSTANT:
841 interp = constant_interpolation;
842 break;
843
844 case TGSI_INTERPOLATE_LINEAR:
845 interp = linear_interpolation;
846 break;
847
848 case TGSI_INTERPOLATE_PERSPECTIVE:
849 interp = perspective_interpolation;
850 break;
851
852 default:
853 assert( 0 );
854 }
855
856 if( mask == TGSI_WRITEMASK_XYZW ) {
857 unsigned i, j;
858
859 for( i = first; i <= last; i++ ) {
860 for( j = 0; j < NUM_CHANNELS; j++ ) {
861 interp( mach, i, j );
862 }
863 }
864 }
865 else {
866 unsigned i, j;
867
868 for( j = 0; j < NUM_CHANNELS; j++ ) {
869 if( mask & (1 << j) ) {
870 for( i = first; i <= last; i++ ) {
871 interp( mach, i, j );
872 }
873 }
874 }
875 }
876 }
877 }
878 }
879
880 static void
881 exec_instruction(
882 struct spu_exec_machine *mach,
883 const struct tgsi_full_instruction *inst,
884 int *pc )
885 {
886 uint chan_index;
887 union spu_exec_channel r[8];
888
889 (*pc)++;
890
891 switch (inst->Instruction.Opcode) {
892 case TGSI_OPCODE_ARL:
893 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
894 FETCH( &r[0], 0, chan_index );
895 r[0].q = si_cflts(r[0].q, 0);
896 STORE( &r[0], 0, chan_index );
897 }
898 break;
899
900 case TGSI_OPCODE_MOV:
901 /* TGSI_OPCODE_SWZ */
902 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
903 FETCH( &r[0], 0, chan_index );
904 STORE( &r[0], 0, chan_index );
905 }
906 break;
907
908 case TGSI_OPCODE_LIT:
909 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
910 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
911 }
912
913 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
914 FETCH( &r[0], 0, CHAN_X );
915 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
916 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
917 STORE( &r[0], 0, CHAN_Y );
918 }
919
920 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
921 FETCH( &r[1], 0, CHAN_Y );
922 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
923
924 FETCH( &r[2], 0, CHAN_W );
925 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
926 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
927 r[1].q = micro_pow(r[1].q, r[2].q);
928
929 /* r0 = (r0 > 0.0) ? r1 : 0.0
930 */
931 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
932 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
933 r[0].q);
934 STORE( &r[0], 0, CHAN_Z );
935 }
936 }
937
938 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
939 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
940 }
941 break;
942
943 case TGSI_OPCODE_RCP:
944 /* TGSI_OPCODE_RECIP */
945 FETCH( &r[0], 0, CHAN_X );
946 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
947 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
948 STORE( &r[0], 0, chan_index );
949 }
950 break;
951
952 case TGSI_OPCODE_RSQ:
953 /* TGSI_OPCODE_RECIPSQRT */
954 FETCH( &r[0], 0, CHAN_X );
955 r[0].q = micro_sqrt(r[0].q);
956 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
957 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
958 STORE( &r[0], 0, chan_index );
959 }
960 break;
961
962 case TGSI_OPCODE_EXP:
963 assert (0);
964 break;
965
966 case TGSI_OPCODE_LOG:
967 assert (0);
968 break;
969
970 case TGSI_OPCODE_MUL:
971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
972 {
973 FETCH(&r[0], 0, chan_index);
974 FETCH(&r[1], 1, chan_index);
975
976 r[0].q = si_fm(r[0].q, r[1].q);
977
978 STORE(&r[0], 0, chan_index);
979 }
980 break;
981
982 case TGSI_OPCODE_ADD:
983 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
984 FETCH( &r[0], 0, chan_index );
985 FETCH( &r[1], 1, chan_index );
986 r[0].q = si_fa(r[0].q, r[1].q);
987 STORE( &r[0], 0, chan_index );
988 }
989 break;
990
991 case TGSI_OPCODE_DP3:
992 /* TGSI_OPCODE_DOT3 */
993 FETCH( &r[0], 0, CHAN_X );
994 FETCH( &r[1], 1, CHAN_X );
995 r[0].q = si_fm(r[0].q, r[1].q);
996
997 FETCH( &r[1], 0, CHAN_Y );
998 FETCH( &r[2], 1, CHAN_Y );
999 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1000
1001
1002 FETCH( &r[1], 0, CHAN_Z );
1003 FETCH( &r[2], 1, CHAN_Z );
1004 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1005
1006 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1007 STORE( &r[0], 0, chan_index );
1008 }
1009 break;
1010
1011 case TGSI_OPCODE_DP4:
1012 /* TGSI_OPCODE_DOT4 */
1013 FETCH(&r[0], 0, CHAN_X);
1014 FETCH(&r[1], 1, CHAN_X);
1015
1016 r[0].q = si_fm(r[0].q, r[1].q);
1017
1018 FETCH(&r[1], 0, CHAN_Y);
1019 FETCH(&r[2], 1, CHAN_Y);
1020
1021 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1022
1023 FETCH(&r[1], 0, CHAN_Z);
1024 FETCH(&r[2], 1, CHAN_Z);
1025
1026 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1027
1028 FETCH(&r[1], 0, CHAN_W);
1029 FETCH(&r[2], 1, CHAN_W);
1030
1031 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1032
1033 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1034 STORE( &r[0], 0, chan_index );
1035 }
1036 break;
1037
1038 case TGSI_OPCODE_DST:
1039 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1040 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1041 }
1042
1043 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1044 FETCH( &r[0], 0, CHAN_Y );
1045 FETCH( &r[1], 1, CHAN_Y);
1046 r[0].q = si_fm(r[0].q, r[1].q);
1047 STORE( &r[0], 0, CHAN_Y );
1048 }
1049
1050 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1051 FETCH( &r[0], 0, CHAN_Z );
1052 STORE( &r[0], 0, CHAN_Z );
1053 }
1054
1055 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1056 FETCH( &r[0], 1, CHAN_W );
1057 STORE( &r[0], 0, CHAN_W );
1058 }
1059 break;
1060
1061 case TGSI_OPCODE_MIN:
1062 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1063 FETCH(&r[0], 0, chan_index);
1064 FETCH(&r[1], 1, chan_index);
1065
1066 r[0].q = micro_min(r[0].q, r[1].q);
1067
1068 STORE(&r[0], 0, chan_index);
1069 }
1070 break;
1071
1072 case TGSI_OPCODE_MAX:
1073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1074 FETCH(&r[0], 0, chan_index);
1075 FETCH(&r[1], 1, chan_index);
1076
1077 r[0].q = micro_max(r[0].q, r[1].q);
1078
1079 STORE(&r[0], 0, chan_index );
1080 }
1081 break;
1082
1083 case TGSI_OPCODE_SLT:
1084 /* TGSI_OPCODE_SETLT */
1085 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1086 FETCH( &r[0], 0, chan_index );
1087 FETCH( &r[1], 1, chan_index );
1088
1089 r[0].q = micro_ge(r[0].q, r[1].q);
1090 r[0].q = si_xori(r[0].q, 0xff);
1091
1092 STORE( &r[0], 0, chan_index );
1093 }
1094 break;
1095
1096 case TGSI_OPCODE_SGE:
1097 /* TGSI_OPCODE_SETGE */
1098 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1099 FETCH( &r[0], 0, chan_index );
1100 FETCH( &r[1], 1, chan_index );
1101 r[0].q = micro_ge(r[0].q, r[1].q);
1102 STORE( &r[0], 0, chan_index );
1103 }
1104 break;
1105
1106 case TGSI_OPCODE_MAD:
1107 /* TGSI_OPCODE_MADD */
1108 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1109 FETCH( &r[0], 0, chan_index );
1110 FETCH( &r[1], 1, chan_index );
1111 FETCH( &r[2], 2, chan_index );
1112 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1113 STORE( &r[0], 0, chan_index );
1114 }
1115 break;
1116
1117 case TGSI_OPCODE_SUB:
1118 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1119 FETCH(&r[0], 0, chan_index);
1120 FETCH(&r[1], 1, chan_index);
1121
1122 r[0].q = si_fs(r[0].q, r[1].q);
1123
1124 STORE(&r[0], 0, chan_index);
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_LERP:
1129 /* TGSI_OPCODE_LRP */
1130 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1131 FETCH(&r[0], 0, chan_index);
1132 FETCH(&r[1], 1, chan_index);
1133 FETCH(&r[2], 2, chan_index);
1134
1135 r[1].q = si_fs(r[1].q, r[2].q);
1136 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1137
1138 STORE(&r[0], 0, chan_index);
1139 }
1140 break;
1141
1142 case TGSI_OPCODE_CND:
1143 assert (0);
1144 break;
1145
1146 case TGSI_OPCODE_CND0:
1147 assert (0);
1148 break;
1149
1150 case TGSI_OPCODE_DOT2ADD:
1151 /* TGSI_OPCODE_DP2A */
1152 assert (0);
1153 break;
1154
1155 case TGSI_OPCODE_INDEX:
1156 assert (0);
1157 break;
1158
1159 case TGSI_OPCODE_NEGATE:
1160 assert (0);
1161 break;
1162
1163 case TGSI_OPCODE_FRAC:
1164 /* TGSI_OPCODE_FRC */
1165 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1166 FETCH( &r[0], 0, chan_index );
1167 r[0].q = micro_frc(r[0].q);
1168 STORE( &r[0], 0, chan_index );
1169 }
1170 break;
1171
1172 case TGSI_OPCODE_CLAMP:
1173 assert (0);
1174 break;
1175
1176 case TGSI_OPCODE_FLOOR:
1177 /* TGSI_OPCODE_FLR */
1178 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1179 FETCH( &r[0], 0, chan_index );
1180 r[0].q = micro_flr(r[0].q);
1181 STORE( &r[0], 0, chan_index );
1182 }
1183 break;
1184
1185 case TGSI_OPCODE_ROUND:
1186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1187 FETCH( &r[0], 0, chan_index );
1188 r[0].q = micro_rnd(r[0].q);
1189 STORE( &r[0], 0, chan_index );
1190 }
1191 break;
1192
1193 case TGSI_OPCODE_EXPBASE2:
1194 /* TGSI_OPCODE_EX2 */
1195 FETCH(&r[0], 0, CHAN_X);
1196
1197 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
1198
1199 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1200 STORE( &r[0], 0, chan_index );
1201 }
1202 break;
1203
1204 case TGSI_OPCODE_LOGBASE2:
1205 /* TGSI_OPCODE_LG2 */
1206 FETCH( &r[0], 0, CHAN_X );
1207 r[0].q = micro_lg2(r[0].q);
1208 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1209 STORE( &r[0], 0, chan_index );
1210 }
1211 break;
1212
1213 case TGSI_OPCODE_POWER:
1214 /* TGSI_OPCODE_POW */
1215 FETCH(&r[0], 0, CHAN_X);
1216 FETCH(&r[1], 1, CHAN_X);
1217
1218 r[0].q = micro_pow(r[0].q, r[1].q);
1219
1220 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1221 STORE( &r[0], 0, chan_index );
1222 }
1223 break;
1224
1225 case TGSI_OPCODE_CROSSPRODUCT:
1226 /* TGSI_OPCODE_XPD */
1227 FETCH(&r[0], 0, CHAN_Y);
1228 FETCH(&r[1], 1, CHAN_Z);
1229 FETCH(&r[3], 0, CHAN_Z);
1230 FETCH(&r[4], 1, CHAN_Y);
1231
1232 /* r2 = (r0 * r1) - (r3 * r5)
1233 */
1234 r[2].q = si_fm(r[3].q, r[5].q);
1235 r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
1236
1237 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1238 STORE( &r[2], 0, CHAN_X );
1239 }
1240
1241 FETCH(&r[2], 1, CHAN_X);
1242 FETCH(&r[5], 0, CHAN_X);
1243
1244 /* r3 = (r3 * r2) - (r1 * r5)
1245 */
1246 r[1].q = si_fm(r[1].q, r[5].q);
1247 r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
1248
1249 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1250 STORE( &r[3], 0, CHAN_Y );
1251 }
1252
1253 /* r5 = (r5 * r4) - (r0 * r2)
1254 */
1255 r[0].q = si_fm(r[0].q, r[2].q);
1256 r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
1257
1258 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1259 STORE( &r[5], 0, CHAN_Z );
1260 }
1261
1262 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1263 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1264 }
1265 break;
1266
1267 case TGSI_OPCODE_MULTIPLYMATRIX:
1268 assert (0);
1269 break;
1270
1271 case TGSI_OPCODE_ABS:
1272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1273 FETCH(&r[0], 0, chan_index);
1274
1275 r[0].q = micro_abs(r[0].q);
1276
1277 STORE(&r[0], 0, chan_index);
1278 }
1279 break;
1280
1281 case TGSI_OPCODE_RCC:
1282 assert (0);
1283 break;
1284
1285 case TGSI_OPCODE_DPH:
1286 FETCH(&r[0], 0, CHAN_X);
1287 FETCH(&r[1], 1, CHAN_X);
1288
1289 r[0].q = si_fm(r[0].q, r[1].q);
1290
1291 FETCH(&r[1], 0, CHAN_Y);
1292 FETCH(&r[2], 1, CHAN_Y);
1293
1294 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1295
1296 FETCH(&r[1], 0, CHAN_Z);
1297 FETCH(&r[2], 1, CHAN_Z);
1298
1299 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1300
1301 FETCH(&r[1], 1, CHAN_W);
1302
1303 r[0].q = si_fa(r[0].q, r[1].q);
1304
1305 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1306 STORE( &r[0], 0, chan_index );
1307 }
1308 break;
1309
1310 case TGSI_OPCODE_COS:
1311 FETCH(&r[0], 0, CHAN_X);
1312
1313 r[0].q = micro_cos(r[0].q);
1314
1315 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1316 STORE( &r[0], 0, chan_index );
1317 }
1318 break;
1319
1320 case TGSI_OPCODE_DDX:
1321 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1322 FETCH( &r[0], 0, chan_index );
1323 r[0].q = micro_ddx(r[0].q);
1324 STORE( &r[0], 0, chan_index );
1325 }
1326 break;
1327
1328 case TGSI_OPCODE_DDY:
1329 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1330 FETCH( &r[0], 0, chan_index );
1331 r[0].q = micro_ddy(r[0].q);
1332 STORE( &r[0], 0, chan_index );
1333 }
1334 break;
1335
1336 case TGSI_OPCODE_KILP:
1337 exec_kilp (mach, inst);
1338 break;
1339
1340 case TGSI_OPCODE_KIL:
1341 /* for enabled ExecMask bits, set the killed bit */
1342 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1343 break;
1344
1345 case TGSI_OPCODE_PK2H:
1346 assert (0);
1347 break;
1348
1349 case TGSI_OPCODE_PK2US:
1350 assert (0);
1351 break;
1352
1353 case TGSI_OPCODE_PK4B:
1354 assert (0);
1355 break;
1356
1357 case TGSI_OPCODE_PK4UB:
1358 assert (0);
1359 break;
1360
1361 case TGSI_OPCODE_RFL:
1362 assert (0);
1363 break;
1364
1365 case TGSI_OPCODE_SEQ:
1366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1367 FETCH( &r[0], 0, chan_index );
1368 FETCH( &r[1], 1, chan_index );
1369
1370 r[0].q = si_fceq(r[0].q, r[1].q);
1371
1372 STORE( &r[0], 0, chan_index );
1373 }
1374 break;
1375
1376 case TGSI_OPCODE_SFL:
1377 assert (0);
1378 break;
1379
1380 case TGSI_OPCODE_SGT:
1381 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1382 FETCH( &r[0], 0, chan_index );
1383 FETCH( &r[1], 1, chan_index );
1384 r[0].q = si_fcgt(r[0].q, r[1].q);
1385 STORE( &r[0], 0, chan_index );
1386 }
1387 break;
1388
1389 case TGSI_OPCODE_SIN:
1390 FETCH( &r[0], 0, CHAN_X );
1391 r[0].q = micro_sin(r[0].q);
1392 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1393 STORE( &r[0], 0, chan_index );
1394 }
1395 break;
1396
1397 case TGSI_OPCODE_SLE:
1398 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1399 FETCH( &r[0], 0, chan_index );
1400 FETCH( &r[1], 1, chan_index );
1401
1402 r[0].q = si_fcgt(r[0].q, r[1].q);
1403 r[0].q = si_xori(r[0].q, 0xff);
1404
1405 STORE( &r[0], 0, chan_index );
1406 }
1407 break;
1408
1409 case TGSI_OPCODE_SNE:
1410 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1411 FETCH( &r[0], 0, chan_index );
1412 FETCH( &r[1], 1, chan_index );
1413
1414 r[0].q = si_fceq(r[0].q, r[1].q);
1415 r[0].q = si_xori(r[0].q, 0xff);
1416
1417 STORE( &r[0], 0, chan_index );
1418 }
1419 break;
1420
1421 case TGSI_OPCODE_STR:
1422 assert (0);
1423 break;
1424
1425 case TGSI_OPCODE_TEX:
1426 /* simple texture lookup */
1427 /* src[0] = texcoord */
1428 /* src[1] = sampler unit */
1429 exec_tex(mach, inst, FALSE, FALSE);
1430 break;
1431
1432 case TGSI_OPCODE_TXB:
1433 /* Texture lookup with lod bias */
1434 /* src[0] = texcoord (src[0].w = load bias) */
1435 /* src[1] = sampler unit */
1436 exec_tex(mach, inst, TRUE, FALSE);
1437 break;
1438
1439 case TGSI_OPCODE_TXD:
1440 /* Texture lookup with explict partial derivatives */
1441 /* src[0] = texcoord */
1442 /* src[1] = d[strq]/dx */
1443 /* src[2] = d[strq]/dy */
1444 /* src[3] = sampler unit */
1445 assert (0);
1446 break;
1447
1448 case TGSI_OPCODE_TXL:
1449 /* Texture lookup with explit LOD */
1450 /* src[0] = texcoord (src[0].w = load bias) */
1451 /* src[1] = sampler unit */
1452 exec_tex(mach, inst, TRUE, FALSE);
1453 break;
1454
1455 case TGSI_OPCODE_TXP:
1456 /* Texture lookup with projection */
1457 /* src[0] = texcoord (src[0].w = projection) */
1458 /* src[1] = sampler unit */
1459 exec_tex(mach, inst, TRUE, TRUE);
1460 break;
1461
1462 case TGSI_OPCODE_UP2H:
1463 assert (0);
1464 break;
1465
1466 case TGSI_OPCODE_UP2US:
1467 assert (0);
1468 break;
1469
1470 case TGSI_OPCODE_UP4B:
1471 assert (0);
1472 break;
1473
1474 case TGSI_OPCODE_UP4UB:
1475 assert (0);
1476 break;
1477
1478 case TGSI_OPCODE_X2D:
1479 assert (0);
1480 break;
1481
1482 case TGSI_OPCODE_ARA:
1483 assert (0);
1484 break;
1485
1486 case TGSI_OPCODE_ARR:
1487 assert (0);
1488 break;
1489
1490 case TGSI_OPCODE_BRA:
1491 assert (0);
1492 break;
1493
1494 case TGSI_OPCODE_CAL:
1495 /* skip the call if no execution channels are enabled */
1496 if (mach->ExecMask) {
1497 /* do the call */
1498
1499 /* push the Cond, Loop, Cont stacks */
1500 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1501 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1502 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1503 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1504 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1505 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1506
1507 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
1508 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
1509
1510 /* note that PC was already incremented above */
1511 mach->CallStack[mach->CallStackTop++] = *pc;
1512 *pc = inst->InstructionExtLabel.Label;
1513 }
1514 break;
1515
1516 case TGSI_OPCODE_RET:
1517 mach->FuncMask &= ~mach->ExecMask;
1518 UPDATE_EXEC_MASK(mach);
1519
1520 if (mach->ExecMask == 0x0) {
1521 /* really return now (otherwise, keep executing */
1522
1523 if (mach->CallStackTop == 0) {
1524 /* returning from main() */
1525 *pc = -1;
1526 return;
1527 }
1528 *pc = mach->CallStack[--mach->CallStackTop];
1529
1530 /* pop the Cond, Loop, Cont stacks */
1531 assert(mach->CondStackTop > 0);
1532 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1533 assert(mach->LoopStackTop > 0);
1534 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1535 assert(mach->ContStackTop > 0);
1536 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1537 assert(mach->FuncStackTop > 0);
1538 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
1539
1540 UPDATE_EXEC_MASK(mach);
1541 }
1542 break;
1543
1544 case TGSI_OPCODE_SSG:
1545 assert (0);
1546 break;
1547
1548 case TGSI_OPCODE_CMP:
1549 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1550 FETCH(&r[0], 0, chan_index);
1551 FETCH(&r[1], 1, chan_index);
1552 FETCH(&r[2], 2, chan_index);
1553
1554 /* r0 = (r0 < 0.0) ? r1 : r2
1555 */
1556 r[3].q = si_xor(r[3].q, r[3].q);
1557 r[0].q = micro_lt(r[0].q, r[3].q);
1558 r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
1559
1560 STORE(&r[0], 0, chan_index);
1561 }
1562 break;
1563
1564 case TGSI_OPCODE_SCS:
1565 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1566 FETCH( &r[0], 0, CHAN_X );
1567 }
1568 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1569 r[1].q = micro_cos(r[0].q);
1570 STORE( &r[1], 0, CHAN_X );
1571 }
1572 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1573 r[1].q = micro_sin(r[0].q);
1574 STORE( &r[1], 0, CHAN_Y );
1575 }
1576 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1577 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
1578 }
1579 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1580 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1581 }
1582 break;
1583
1584 case TGSI_OPCODE_NRM:
1585 assert (0);
1586 break;
1587
1588 case TGSI_OPCODE_DIV:
1589 assert( 0 );
1590 break;
1591
1592 case TGSI_OPCODE_DP2:
1593 FETCH( &r[0], 0, CHAN_X );
1594 FETCH( &r[1], 1, CHAN_X );
1595 r[0].q = si_fm(r[0].q, r[1].q);
1596
1597 FETCH( &r[1], 0, CHAN_Y );
1598 FETCH( &r[2], 1, CHAN_Y );
1599 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1600
1601 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1602 STORE( &r[0], 0, chan_index );
1603 }
1604 break;
1605
1606 case TGSI_OPCODE_IF:
1607 /* push CondMask */
1608 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1609 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1610 FETCH( &r[0], 0, CHAN_X );
1611 /* update CondMask */
1612 if( ! r[0].u[0] ) {
1613 mach->CondMask &= ~0x1;
1614 }
1615 if( ! r[0].u[1] ) {
1616 mach->CondMask &= ~0x2;
1617 }
1618 if( ! r[0].u[2] ) {
1619 mach->CondMask &= ~0x4;
1620 }
1621 if( ! r[0].u[3] ) {
1622 mach->CondMask &= ~0x8;
1623 }
1624 UPDATE_EXEC_MASK(mach);
1625 /* Todo: If CondMask==0, jump to ELSE */
1626 break;
1627
1628 case TGSI_OPCODE_ELSE:
1629 /* invert CondMask wrt previous mask */
1630 {
1631 uint prevMask;
1632 assert(mach->CondStackTop > 0);
1633 prevMask = mach->CondStack[mach->CondStackTop - 1];
1634 mach->CondMask = ~mach->CondMask & prevMask;
1635 UPDATE_EXEC_MASK(mach);
1636 /* Todo: If CondMask==0, jump to ENDIF */
1637 }
1638 break;
1639
1640 case TGSI_OPCODE_ENDIF:
1641 /* pop CondMask */
1642 assert(mach->CondStackTop > 0);
1643 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1644 UPDATE_EXEC_MASK(mach);
1645 break;
1646
1647 case TGSI_OPCODE_END:
1648 /* halt execution */
1649 *pc = -1;
1650 break;
1651
1652 case TGSI_OPCODE_REP:
1653 assert (0);
1654 break;
1655
1656 case TGSI_OPCODE_ENDREP:
1657 assert (0);
1658 break;
1659
1660 case TGSI_OPCODE_PUSHA:
1661 assert (0);
1662 break;
1663
1664 case TGSI_OPCODE_POPA:
1665 assert (0);
1666 break;
1667
1668 case TGSI_OPCODE_CEIL:
1669 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1670 FETCH( &r[0], 0, chan_index );
1671 r[0].q = micro_ceil(r[0].q);
1672 STORE( &r[0], 0, chan_index );
1673 }
1674 break;
1675
1676 case TGSI_OPCODE_I2F:
1677 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1678 FETCH( &r[0], 0, chan_index );
1679 r[0].q = si_csflt(r[0].q, 0);
1680 STORE( &r[0], 0, chan_index );
1681 }
1682 break;
1683
1684 case TGSI_OPCODE_NOT:
1685 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1686 FETCH( &r[0], 0, chan_index );
1687 r[0].q = si_xorbi(r[0].q, 0xff);
1688 STORE( &r[0], 0, chan_index );
1689 }
1690 break;
1691
1692 case TGSI_OPCODE_TRUNC:
1693 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1694 FETCH( &r[0], 0, chan_index );
1695 r[0].q = micro_trunc(r[0].q);
1696 STORE( &r[0], 0, chan_index );
1697 }
1698 break;
1699
1700 case TGSI_OPCODE_SHL:
1701 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1702 FETCH( &r[0], 0, chan_index );
1703 FETCH( &r[1], 1, chan_index );
1704
1705 r[0].q = si_shl(r[0].q, r[1].q);
1706
1707 STORE( &r[0], 0, chan_index );
1708 }
1709 break;
1710
1711 case TGSI_OPCODE_SHR:
1712 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1713 FETCH( &r[0], 0, chan_index );
1714 FETCH( &r[1], 1, chan_index );
1715 r[0].q = micro_ishr(r[0].q, r[1].q);
1716 STORE( &r[0], 0, chan_index );
1717 }
1718 break;
1719
1720 case TGSI_OPCODE_AND:
1721 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1722 FETCH( &r[0], 0, chan_index );
1723 FETCH( &r[1], 1, chan_index );
1724 r[0].q = si_and(r[0].q, r[1].q);
1725 STORE( &r[0], 0, chan_index );
1726 }
1727 break;
1728
1729 case TGSI_OPCODE_OR:
1730 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1731 FETCH( &r[0], 0, chan_index );
1732 FETCH( &r[1], 1, chan_index );
1733 r[0].q = si_or(r[0].q, r[1].q);
1734 STORE( &r[0], 0, chan_index );
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_MOD:
1739 assert (0);
1740 break;
1741
1742 case TGSI_OPCODE_XOR:
1743 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1744 FETCH( &r[0], 0, chan_index );
1745 FETCH( &r[1], 1, chan_index );
1746 r[0].q = si_xor(r[0].q, r[1].q);
1747 STORE( &r[0], 0, chan_index );
1748 }
1749 break;
1750
1751 case TGSI_OPCODE_SAD:
1752 assert (0);
1753 break;
1754
1755 case TGSI_OPCODE_TXF:
1756 assert (0);
1757 break;
1758
1759 case TGSI_OPCODE_TXQ:
1760 assert (0);
1761 break;
1762
1763 case TGSI_OPCODE_EMIT:
1764 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
1765 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1766 break;
1767
1768 case TGSI_OPCODE_ENDPRIM:
1769 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
1770 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
1771 break;
1772
1773 case TGSI_OPCODE_LOOP:
1774 /* fall-through (for now) */
1775 case TGSI_OPCODE_BGNLOOP2:
1776 /* push LoopMask and ContMasks */
1777 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1778 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1779 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1780 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1781 break;
1782
1783 case TGSI_OPCODE_ENDLOOP:
1784 /* fall-through (for now at least) */
1785 case TGSI_OPCODE_ENDLOOP2:
1786 /* Restore ContMask, but don't pop */
1787 assert(mach->ContStackTop > 0);
1788 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
1789 if (mach->LoopMask) {
1790 /* repeat loop: jump to instruction just past BGNLOOP */
1791 *pc = inst->InstructionExtLabel.Label + 1;
1792 }
1793 else {
1794 /* exit loop: pop LoopMask */
1795 assert(mach->LoopStackTop > 0);
1796 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1797 /* pop ContMask */
1798 assert(mach->ContStackTop > 0);
1799 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1800 }
1801 UPDATE_EXEC_MASK(mach);
1802 break;
1803
1804 case TGSI_OPCODE_BRK:
1805 /* turn off loop channels for each enabled exec channel */
1806 mach->LoopMask &= ~mach->ExecMask;
1807 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1808 UPDATE_EXEC_MASK(mach);
1809 break;
1810
1811 case TGSI_OPCODE_CONT:
1812 /* turn off cont channels for each enabled exec channel */
1813 mach->ContMask &= ~mach->ExecMask;
1814 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1815 UPDATE_EXEC_MASK(mach);
1816 break;
1817
1818 case TGSI_OPCODE_BGNSUB:
1819 /* no-op */
1820 break;
1821
1822 case TGSI_OPCODE_ENDSUB:
1823 /* no-op */
1824 break;
1825
1826 case TGSI_OPCODE_NOISE1:
1827 assert( 0 );
1828 break;
1829
1830 case TGSI_OPCODE_NOISE2:
1831 assert( 0 );
1832 break;
1833
1834 case TGSI_OPCODE_NOISE3:
1835 assert( 0 );
1836 break;
1837
1838 case TGSI_OPCODE_NOISE4:
1839 assert( 0 );
1840 break;
1841
1842 case TGSI_OPCODE_NOP:
1843 break;
1844
1845 default:
1846 assert( 0 );
1847 }
1848 }
1849
1850
1851 /**
1852 * Run TGSI interpreter.
1853 * \return bitmask of "alive" quad components
1854 */
1855 uint
1856 spu_exec_machine_run( struct spu_exec_machine *mach )
1857 {
1858 uint i;
1859 int pc = 0;
1860
1861 mach->CondMask = 0xf;
1862 mach->LoopMask = 0xf;
1863 mach->ContMask = 0xf;
1864 mach->FuncMask = 0xf;
1865 mach->ExecMask = 0xf;
1866
1867 mach->CondStackTop = 0; /* temporarily subvert this assertion */
1868 assert(mach->CondStackTop == 0);
1869 assert(mach->LoopStackTop == 0);
1870 assert(mach->ContStackTop == 0);
1871 assert(mach->CallStackTop == 0);
1872
1873 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
1874 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
1875
1876 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
1877 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
1878 mach->Primitives[0] = 0;
1879 }
1880
1881
1882 /* execute declarations (interpolants) */
1883 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1884 for (i = 0; i < mach->NumDeclarations; i++) {
1885 union {
1886 struct tgsi_full_declaration decl;
1887 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
1888 } d ALIGN16_ATTRIB;
1889 unsigned ea = (unsigned) (mach->Declarations + pc);
1890
1891 spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
1892
1893 exec_declaration( mach, &d.decl );
1894 }
1895 }
1896
1897 /* execute instructions, until pc is set to -1 */
1898 while (pc != -1) {
1899 union {
1900 struct tgsi_full_instruction inst;
1901 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
1902 } i ALIGN16_ATTRIB;
1903 unsigned ea = (unsigned) (mach->Instructions + pc);
1904
1905 spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
1906 exec_instruction( mach, & i.inst, &pc );
1907 }
1908
1909 #if 0
1910 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
1911 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1912 /*
1913 * Scale back depth component.
1914 */
1915 for (i = 0; i < 4; i++)
1916 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
1917 }
1918 #endif
1919
1920 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
1921 }
1922
1923