Cell: trivial clean-ups
[mesa.git] / src / gallium / drivers / cell / spu / spu_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53 #include <libmisc.h>
54 #include <spu_mfcio.h>
55 #include <transpose_matrix4x4.h>
56 #include <simdmath/ceilf4.h>
57 #include <simdmath/cosf4.h>
58 #include <simdmath/divf4.h>
59 #include <simdmath/floorf4.h>
60 #include <simdmath/log2f4.h>
61 #include <simdmath/powf4.h>
62 #include <simdmath/sinf4.h>
63 #include <simdmath/sqrtf4.h>
64 #include <simdmath/truncf4.h>
65
66 #include "pipe/p_compiler.h"
67 #include "pipe/p_state.h"
68 #include "pipe/p_util.h"
69 #include "pipe/p_shader_tokens.h"
70 #include "tgsi/util/tgsi_parse.h"
71 #include "tgsi/util/tgsi_util.h"
72 #include "spu_exec.h"
73 #include "spu_main.h"
74 #include "spu_vertex_shader.h"
75
76 #define TILE_TOP_LEFT 0
77 #define TILE_TOP_RIGHT 1
78 #define TILE_BOTTOM_LEFT 2
79 #define TILE_BOTTOM_RIGHT 3
80
81 /*
82 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
83 */
84 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
85 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
86 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
87 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
88 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
89 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
90 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
91 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
92 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
93 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
94 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
95 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
96 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
97 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
98 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
99 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
100 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
101 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
102 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
103 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
104 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
105 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
106 #define TEMP_R0 TGSI_EXEC_TEMP_R0
107
108 #define FOR_EACH_CHANNEL(CHAN)\
109 for (CHAN = 0; CHAN < 4; CHAN++)
110
111 #define IS_CHANNEL_ENABLED(INST, CHAN)\
112 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
113
114 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
115 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
116
117 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
118 FOR_EACH_CHANNEL( CHAN )\
119 if (IS_CHANNEL_ENABLED( INST, CHAN ))
120
121 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
122 FOR_EACH_CHANNEL( CHAN )\
123 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
124
125
126 /** The execution mask depends on the conditional mask and the loop mask */
127 #define UPDATE_EXEC_MASK(MACH) \
128 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
129
130
131 #define CHAN_X 0
132 #define CHAN_Y 1
133 #define CHAN_Z 2
134 #define CHAN_W 3
135
136
137
138 /**
139 * Initialize machine state by expanding tokens to full instructions,
140 * allocating temporary storage, setting up constants, etc.
141 * After this, we can call spu_exec_machine_run() many times.
142 */
143 void
144 spu_exec_machine_init(struct spu_exec_machine *mach,
145 uint numSamplers,
146 struct spu_sampler *samplers,
147 unsigned processor)
148 {
149 const qword zero = si_il(0);
150 const qword not_zero = si_il(~0);
151
152 mach->Samplers = samplers;
153 mach->Processor = processor;
154 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
155
156 /* Setup constants. */
157 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
158 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
159 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
160 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
161
162 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
163 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
164 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
165 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
166 }
167
168
169 static INLINE qword
170 micro_abs(qword src)
171 {
172 return si_rotmi(si_shli(src, 1), -1);
173 }
174
175 static INLINE qword
176 micro_ceil(qword src)
177 {
178 return (qword) _ceilf4((vec_float4) src);
179 }
180
181 static INLINE qword
182 micro_cos(qword src)
183 {
184 return (qword) _cosf4((vec_float4) src);
185 }
186
187 static const qword br_shuf = {
188 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
189 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
190 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
191 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
192 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
193 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
194 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
195 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
196 };
197
198 static const qword bl_shuf = {
199 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
200 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
201 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
202 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
203 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
204 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
205 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
206 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
207 };
208
209 static const qword tl_shuf = {
210 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
211 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
212 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
213 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
214 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
215 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
216 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
217 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
218 };
219
220 static qword
221 micro_ddx(qword src)
222 {
223 qword bottom_right = si_shufb(src, src, br_shuf);
224 qword bottom_left = si_shufb(src, src, bl_shuf);
225
226 return si_fs(bottom_right, bottom_left);
227 }
228
229 static qword
230 micro_ddy(qword src)
231 {
232 qword top_left = si_shufb(src, src, tl_shuf);
233 qword bottom_left = si_shufb(src, src, bl_shuf);
234
235 return si_fs(top_left, bottom_left);
236 }
237
238 static INLINE qword
239 micro_div(qword src0, qword src1)
240 {
241 return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
242 }
243
244 static qword
245 micro_flr(qword src)
246 {
247 return (qword) _floorf4((vec_float4) src);
248 }
249
250 static qword
251 micro_frc(qword src)
252 {
253 return si_fs(src, (qword) _floorf4((vec_float4) src));
254 }
255
256 static INLINE qword
257 micro_ge(qword src0, qword src1)
258 {
259 return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
260 }
261
262 static qword
263 micro_lg2(qword src)
264 {
265 return (qword) _log2f4((vec_float4) src);
266 }
267
268 static INLINE qword
269 micro_lt(qword src0, qword src1)
270 {
271 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
272
273 return si_xori(tmp, 0xff);
274 }
275
276 static INLINE qword
277 micro_max(qword src0, qword src1)
278 {
279 return si_selb(src1, src0, si_fcgt(src0, src1));
280 }
281
282 static INLINE qword
283 micro_min(qword src0, qword src1)
284 {
285 return si_selb(src0, src1, si_fcgt(src0, src1));
286 }
287
288 static qword
289 micro_neg(qword src)
290 {
291 return si_xor(src, (qword) spu_splats(0x80000000));
292 }
293
294 static qword
295 micro_set_sign(qword src)
296 {
297 return si_or(src, (qword) spu_splats(0x80000000));
298 }
299
300 static qword
301 micro_pow(qword src0, qword src1)
302 {
303 return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
304 }
305
306 static qword
307 micro_rnd(qword src)
308 {
309 const qword half = (qword) spu_splats(0.5f);
310
311 /* May be able to use _roundf4. There may be some difference, though.
312 */
313 return (qword) _floorf4((vec_float4) si_fa(src, half));
314 }
315
316 static INLINE qword
317 micro_ishr(qword src0, qword src1)
318 {
319 return si_rotma(src0, si_sfi(src1, 0));
320 }
321
322 static qword
323 micro_trunc(qword src)
324 {
325 return (qword) _truncf4((vec_float4) src);
326 }
327
328 static qword
329 micro_sin(qword src)
330 {
331 return (qword) _sinf4((vec_float4) src);
332 }
333
334 static INLINE qword
335 micro_sqrt(qword src)
336 {
337 return (qword) _sqrtf4((vec_float4) src);
338 }
339
340 static void
341 fetch_src_file_channel(
342 const struct spu_exec_machine *mach,
343 const uint file,
344 const uint swizzle,
345 const union spu_exec_channel *index,
346 union spu_exec_channel *chan )
347 {
348 switch( swizzle ) {
349 case TGSI_EXTSWIZZLE_X:
350 case TGSI_EXTSWIZZLE_Y:
351 case TGSI_EXTSWIZZLE_Z:
352 case TGSI_EXTSWIZZLE_W:
353 switch( file ) {
354 case TGSI_FILE_CONSTANT: {
355 unsigned char buffer[32] ALIGN16_ATTRIB;
356 unsigned i;
357
358 for (i = 0; i < 4; i++) {
359 const float *ptr = mach->Consts[index->i[i]];
360 const uint64_t addr = (uint64_t)(uintptr_t) ptr;
361 const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
362
363 mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
364 wait_on_mask(1 << TAG_VERTEX_BUFFER);
365
366 (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f)
367 + (sizeof(float) * swizzle)], sizeof(float));
368 }
369 break;
370 }
371
372 case TGSI_FILE_INPUT:
373 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
374 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
375 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
376 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
377 break;
378
379 case TGSI_FILE_TEMPORARY:
380 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
381 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
382 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
383 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
384 break;
385
386 case TGSI_FILE_IMMEDIATE:
387 assert( index->i[0] < (int) mach->ImmLimit );
388 assert( index->i[1] < (int) mach->ImmLimit );
389 assert( index->i[2] < (int) mach->ImmLimit );
390 assert( index->i[3] < (int) mach->ImmLimit );
391
392 chan->f[0] = mach->Imms[index->i[0]][swizzle];
393 chan->f[1] = mach->Imms[index->i[1]][swizzle];
394 chan->f[2] = mach->Imms[index->i[2]][swizzle];
395 chan->f[3] = mach->Imms[index->i[3]][swizzle];
396 break;
397
398 case TGSI_FILE_ADDRESS:
399 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
400 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
401 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
402 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
403 break;
404
405 case TGSI_FILE_OUTPUT:
406 /* vertex/fragment output vars can be read too */
407 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
408 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
409 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
410 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
411 break;
412
413 default:
414 assert( 0 );
415 }
416 break;
417
418 case TGSI_EXTSWIZZLE_ZERO:
419 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
420 break;
421
422 case TGSI_EXTSWIZZLE_ONE:
423 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
424 break;
425
426 default:
427 assert( 0 );
428 }
429 }
430
431 static void
432 fetch_source(
433 const struct spu_exec_machine *mach,
434 union spu_exec_channel *chan,
435 const struct tgsi_full_src_register *reg,
436 const uint chan_index )
437 {
438 union spu_exec_channel index;
439 uint swizzle;
440
441 index.i[0] =
442 index.i[1] =
443 index.i[2] =
444 index.i[3] = reg->SrcRegister.Index;
445
446 if (reg->SrcRegister.Indirect) {
447 union spu_exec_channel index2;
448 union spu_exec_channel indir_index;
449
450 index2.i[0] =
451 index2.i[1] =
452 index2.i[2] =
453 index2.i[3] = reg->SrcRegisterInd.Index;
454
455 swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
456 CHAN_X);
457 fetch_src_file_channel(
458 mach,
459 reg->SrcRegisterInd.File,
460 swizzle,
461 &index2,
462 &indir_index );
463
464 index.q = si_a(index.q, indir_index.q);
465 }
466
467 if( reg->SrcRegister.Dimension ) {
468 switch( reg->SrcRegister.File ) {
469 case TGSI_FILE_INPUT:
470 index.q = si_mpyi(index.q, 17);
471 break;
472 case TGSI_FILE_CONSTANT:
473 index.q = si_shli(index.q, 12);
474 break;
475 default:
476 assert( 0 );
477 }
478
479 index.i[0] += reg->SrcRegisterDim.Index;
480 index.i[1] += reg->SrcRegisterDim.Index;
481 index.i[2] += reg->SrcRegisterDim.Index;
482 index.i[3] += reg->SrcRegisterDim.Index;
483
484 if (reg->SrcRegisterDim.Indirect) {
485 union spu_exec_channel index2;
486 union spu_exec_channel indir_index;
487
488 index2.i[0] =
489 index2.i[1] =
490 index2.i[2] =
491 index2.i[3] = reg->SrcRegisterDimInd.Index;
492
493 swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
494 fetch_src_file_channel(
495 mach,
496 reg->SrcRegisterDimInd.File,
497 swizzle,
498 &index2,
499 &indir_index );
500
501 index.q = si_a(index.q, indir_index.q);
502 }
503 }
504
505 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
506 fetch_src_file_channel(
507 mach,
508 reg->SrcRegister.File,
509 swizzle,
510 &index,
511 chan );
512
513 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
514 case TGSI_UTIL_SIGN_CLEAR:
515 chan->q = micro_abs(chan->q);
516 break;
517
518 case TGSI_UTIL_SIGN_SET:
519 chan->q = micro_set_sign(chan->q);
520 break;
521
522 case TGSI_UTIL_SIGN_TOGGLE:
523 chan->q = micro_neg(chan->q);
524 break;
525
526 case TGSI_UTIL_SIGN_KEEP:
527 break;
528 }
529
530 if (reg->SrcRegisterExtMod.Complement) {
531 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
532 }
533 }
534
535 static void
536 store_dest(
537 struct spu_exec_machine *mach,
538 const union spu_exec_channel *chan,
539 const struct tgsi_full_dst_register *reg,
540 const struct tgsi_full_instruction *inst,
541 uint chan_index )
542 {
543 union spu_exec_channel *dst;
544
545 switch( reg->DstRegister.File ) {
546 case TGSI_FILE_NULL:
547 return;
548
549 case TGSI_FILE_OUTPUT:
550 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
551 + reg->DstRegister.Index].xyzw[chan_index];
552 break;
553
554 case TGSI_FILE_TEMPORARY:
555 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
556 break;
557
558 case TGSI_FILE_ADDRESS:
559 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
560 break;
561
562 default:
563 assert( 0 );
564 return;
565 }
566
567 switch (inst->Instruction.Saturate)
568 {
569 case TGSI_SAT_NONE:
570 if (mach->ExecMask & 0x1)
571 dst->i[0] = chan->i[0];
572 if (mach->ExecMask & 0x2)
573 dst->i[1] = chan->i[1];
574 if (mach->ExecMask & 0x4)
575 dst->i[2] = chan->i[2];
576 if (mach->ExecMask & 0x8)
577 dst->i[3] = chan->i[3];
578 break;
579
580 case TGSI_SAT_ZERO_ONE:
581 /* XXX need to obey ExecMask here */
582 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
583 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
584 break;
585
586 case TGSI_SAT_MINUS_PLUS_ONE:
587 assert( 0 );
588 break;
589
590 default:
591 assert( 0 );
592 }
593 }
594
595 #define FETCH(VAL,INDEX,CHAN)\
596 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
597
598 #define STORE(VAL,INDEX,CHAN)\
599 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
600
601
602 /**
603 * Execute ARB-style KIL which is predicated by a src register.
604 * Kill fragment if any of the four values is less than zero.
605 */
606 static void
607 exec_kilp(struct spu_exec_machine *mach,
608 const struct tgsi_full_instruction *inst)
609 {
610 uint uniquemask;
611 uint chan_index;
612 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
613 union spu_exec_channel r[1];
614
615 /* This mask stores component bits that were already tested. Note that
616 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
617 * tested. */
618 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
619
620 for (chan_index = 0; chan_index < 4; chan_index++)
621 {
622 uint swizzle;
623 uint i;
624
625 /* unswizzle channel */
626 swizzle = tgsi_util_get_full_src_register_extswizzle (
627 &inst->FullSrcRegisters[0],
628 chan_index);
629
630 /* check if the component has not been already tested */
631 if (uniquemask & (1 << swizzle))
632 continue;
633 uniquemask |= 1 << swizzle;
634
635 FETCH(&r[0], 0, chan_index);
636 for (i = 0; i < 4; i++)
637 if (r[0].f[i] < 0.0f)
638 kilmask |= 1 << i;
639 }
640
641 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
642 }
643
644
645 /*
646 * Fetch a texel using STR texture coordinates.
647 */
648 static void
649 fetch_texel( struct spu_sampler *sampler,
650 const union spu_exec_channel *s,
651 const union spu_exec_channel *t,
652 const union spu_exec_channel *p,
653 float lodbias, /* XXX should be float[4] */
654 union spu_exec_channel *r,
655 union spu_exec_channel *g,
656 union spu_exec_channel *b,
657 union spu_exec_channel *a )
658 {
659 qword rgba[4];
660 qword out[4];
661
662 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba);
663
664 _transpose_matrix4x4(out, rgba);
665 r->q = out[0];
666 g->q = out[1];
667 b->q = out[2];
668 a->q = out[3];
669 }
670
671
672 static void
673 exec_tex(struct spu_exec_machine *mach,
674 const struct tgsi_full_instruction *inst,
675 boolean biasLod)
676 {
677 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
678 union spu_exec_channel r[8];
679 uint chan_index;
680 float lodBias;
681
682 /* printf("Sampler %u unit %u\n", sampler, unit); */
683
684 switch (inst->InstructionExtTexture.Texture) {
685 case TGSI_TEXTURE_1D:
686
687 FETCH(&r[0], 0, CHAN_X);
688
689 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
690 case TGSI_EXTSWIZZLE_W:
691 FETCH(&r[1], 0, CHAN_W);
692 r[0].q = micro_div(r[0].q, r[1].q);
693 break;
694
695 case TGSI_EXTSWIZZLE_ONE:
696 break;
697
698 default:
699 assert (0);
700 }
701
702 if (biasLod) {
703 FETCH(&r[1], 0, CHAN_W);
704 lodBias = r[2].f[0];
705 }
706 else
707 lodBias = 0.0;
708
709 fetch_texel(&mach->Samplers[unit],
710 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
711 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
712 break;
713
714 case TGSI_TEXTURE_2D:
715 case TGSI_TEXTURE_RECT:
716
717 FETCH(&r[0], 0, CHAN_X);
718 FETCH(&r[1], 0, CHAN_Y);
719 FETCH(&r[2], 0, CHAN_Z);
720
721 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
722 case TGSI_EXTSWIZZLE_W:
723 FETCH(&r[3], 0, CHAN_W);
724 r[0].q = micro_div(r[0].q, r[3].q);
725 r[1].q = micro_div(r[1].q, r[3].q);
726 r[2].q = micro_div(r[2].q, r[3].q);
727 break;
728
729 case TGSI_EXTSWIZZLE_ONE:
730 break;
731
732 default:
733 assert (0);
734 }
735
736 if (biasLod) {
737 FETCH(&r[3], 0, CHAN_W);
738 lodBias = r[3].f[0];
739 }
740 else
741 lodBias = 0.0;
742
743 fetch_texel(&mach->Samplers[unit],
744 &r[0], &r[1], &r[2], lodBias, /* inputs */
745 &r[0], &r[1], &r[2], &r[3]); /* outputs */
746 break;
747
748 case TGSI_TEXTURE_3D:
749 case TGSI_TEXTURE_CUBE:
750
751 FETCH(&r[0], 0, CHAN_X);
752 FETCH(&r[1], 0, CHAN_Y);
753 FETCH(&r[2], 0, CHAN_Z);
754
755 switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
756 case TGSI_EXTSWIZZLE_W:
757 FETCH(&r[3], 0, CHAN_W);
758 r[0].q = micro_div(r[0].q, r[3].q);
759 r[1].q = micro_div(r[1].q, r[3].q);
760 r[2].q = micro_div(r[2].q, r[3].q);
761 break;
762
763 case TGSI_EXTSWIZZLE_ONE:
764 break;
765
766 default:
767 assert (0);
768 }
769
770 if (biasLod) {
771 FETCH(&r[3], 0, CHAN_W);
772 lodBias = r[3].f[0];
773 }
774 else
775 lodBias = 0.0;
776
777 fetch_texel(&mach->Samplers[unit],
778 &r[0], &r[1], &r[2], lodBias,
779 &r[0], &r[1], &r[2], &r[3]);
780 break;
781
782 default:
783 assert (0);
784 }
785
786 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
787 STORE( &r[chan_index], 0, chan_index );
788 }
789 }
790
791
792
793 static void
794 constant_interpolation(
795 struct spu_exec_machine *mach,
796 unsigned attrib,
797 unsigned chan )
798 {
799 unsigned i;
800
801 for( i = 0; i < QUAD_SIZE; i++ ) {
802 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
803 }
804 }
805
806 static void
807 linear_interpolation(
808 struct spu_exec_machine *mach,
809 unsigned attrib,
810 unsigned chan )
811 {
812 const float x = mach->QuadPos.xyzw[0].f[0];
813 const float y = mach->QuadPos.xyzw[1].f[0];
814 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
815 const float dady = mach->InterpCoefs[attrib].dady[chan];
816 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
817 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
818 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
819 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
820 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
821 }
822
823 static void
824 perspective_interpolation(
825 struct spu_exec_machine *mach,
826 unsigned attrib,
827 unsigned chan )
828 {
829 const float x = mach->QuadPos.xyzw[0].f[0];
830 const float y = mach->QuadPos.xyzw[1].f[0];
831 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
832 const float dady = mach->InterpCoefs[attrib].dady[chan];
833 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
834 const float *w = mach->QuadPos.xyzw[3].f;
835 /* divide by W here */
836 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
837 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
838 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
839 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
840 }
841
842
843 typedef void (* interpolation_func)(
844 struct spu_exec_machine *mach,
845 unsigned attrib,
846 unsigned chan );
847
848 static void
849 exec_declaration(struct spu_exec_machine *mach,
850 const struct tgsi_full_declaration *decl)
851 {
852 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
853 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
854 unsigned first, last, mask;
855 interpolation_func interp;
856
857 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
858
859 first = decl->u.DeclarationRange.First;
860 last = decl->u.DeclarationRange.Last;
861 mask = decl->Declaration.UsageMask;
862
863 switch( decl->Interpolation.Interpolate ) {
864 case TGSI_INTERPOLATE_CONSTANT:
865 interp = constant_interpolation;
866 break;
867
868 case TGSI_INTERPOLATE_LINEAR:
869 interp = linear_interpolation;
870 break;
871
872 case TGSI_INTERPOLATE_PERSPECTIVE:
873 interp = perspective_interpolation;
874 break;
875
876 default:
877 assert( 0 );
878 }
879
880 if( mask == TGSI_WRITEMASK_XYZW ) {
881 unsigned i, j;
882
883 for( i = first; i <= last; i++ ) {
884 for( j = 0; j < NUM_CHANNELS; j++ ) {
885 interp( mach, i, j );
886 }
887 }
888 }
889 else {
890 unsigned i, j;
891
892 for( j = 0; j < NUM_CHANNELS; j++ ) {
893 if( mask & (1 << j) ) {
894 for( i = first; i <= last; i++ ) {
895 interp( mach, i, j );
896 }
897 }
898 }
899 }
900 }
901 }
902 }
903
904 static void
905 exec_instruction(
906 struct spu_exec_machine *mach,
907 const struct tgsi_full_instruction *inst,
908 int *pc )
909 {
910 uint chan_index;
911 union spu_exec_channel r[8];
912
913 (*pc)++;
914
915 switch (inst->Instruction.Opcode) {
916 case TGSI_OPCODE_ARL:
917 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
918 FETCH( &r[0], 0, chan_index );
919 r[0].q = si_cflts(r[0].q, 0);
920 STORE( &r[0], 0, chan_index );
921 }
922 break;
923
924 case TGSI_OPCODE_MOV:
925 /* TGSI_OPCODE_SWZ */
926 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
927 FETCH( &r[0], 0, chan_index );
928 STORE( &r[0], 0, chan_index );
929 }
930 break;
931
932 case TGSI_OPCODE_LIT:
933 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
934 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
935 }
936
937 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
938 FETCH( &r[0], 0, CHAN_X );
939 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
940 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
941 STORE( &r[0], 0, CHAN_Y );
942 }
943
944 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
945 FETCH( &r[1], 0, CHAN_Y );
946 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
947
948 FETCH( &r[2], 0, CHAN_W );
949 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
950 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
951 r[1].q = micro_pow(r[1].q, r[2].q);
952
953 /* r0 = (r0 > 0.0) ? r1 : 0.0
954 */
955 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
956 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
957 r[0].q);
958 STORE( &r[0], 0, CHAN_Z );
959 }
960 }
961
962 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
963 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
964 }
965 break;
966
967 case TGSI_OPCODE_RCP:
968 /* TGSI_OPCODE_RECIP */
969 FETCH( &r[0], 0, CHAN_X );
970 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
972 STORE( &r[0], 0, chan_index );
973 }
974 break;
975
976 case TGSI_OPCODE_RSQ:
977 /* TGSI_OPCODE_RECIPSQRT */
978 FETCH( &r[0], 0, CHAN_X );
979 r[0].q = micro_sqrt(r[0].q);
980 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
981 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
982 STORE( &r[0], 0, chan_index );
983 }
984 break;
985
986 case TGSI_OPCODE_EXP:
987 assert (0);
988 break;
989
990 case TGSI_OPCODE_LOG:
991 assert (0);
992 break;
993
994 case TGSI_OPCODE_MUL:
995 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
996 {
997 FETCH(&r[0], 0, chan_index);
998 FETCH(&r[1], 1, chan_index);
999
1000 r[0].q = si_fm(r[0].q, r[1].q);
1001
1002 STORE(&r[0], 0, chan_index);
1003 }
1004 break;
1005
1006 case TGSI_OPCODE_ADD:
1007 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1008 FETCH( &r[0], 0, chan_index );
1009 FETCH( &r[1], 1, chan_index );
1010 r[0].q = si_fa(r[0].q, r[1].q);
1011 STORE( &r[0], 0, chan_index );
1012 }
1013 break;
1014
1015 case TGSI_OPCODE_DP3:
1016 /* TGSI_OPCODE_DOT3 */
1017 FETCH( &r[0], 0, CHAN_X );
1018 FETCH( &r[1], 1, CHAN_X );
1019 r[0].q = si_fm(r[0].q, r[1].q);
1020
1021 FETCH( &r[1], 0, CHAN_Y );
1022 FETCH( &r[2], 1, CHAN_Y );
1023 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1024
1025
1026 FETCH( &r[1], 0, CHAN_Z );
1027 FETCH( &r[2], 1, CHAN_Z );
1028 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1029
1030 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1031 STORE( &r[0], 0, chan_index );
1032 }
1033 break;
1034
1035 case TGSI_OPCODE_DP4:
1036 /* TGSI_OPCODE_DOT4 */
1037 FETCH(&r[0], 0, CHAN_X);
1038 FETCH(&r[1], 1, CHAN_X);
1039
1040 r[0].q = si_fm(r[0].q, r[1].q);
1041
1042 FETCH(&r[1], 0, CHAN_Y);
1043 FETCH(&r[2], 1, CHAN_Y);
1044
1045 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1046
1047 FETCH(&r[1], 0, CHAN_Z);
1048 FETCH(&r[2], 1, CHAN_Z);
1049
1050 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1051
1052 FETCH(&r[1], 0, CHAN_W);
1053 FETCH(&r[2], 1, CHAN_W);
1054
1055 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1056
1057 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1058 STORE( &r[0], 0, chan_index );
1059 }
1060 break;
1061
1062 case TGSI_OPCODE_DST:
1063 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1064 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1065 }
1066
1067 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1068 FETCH( &r[0], 0, CHAN_Y );
1069 FETCH( &r[1], 1, CHAN_Y);
1070 r[0].q = si_fm(r[0].q, r[1].q);
1071 STORE( &r[0], 0, CHAN_Y );
1072 }
1073
1074 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1075 FETCH( &r[0], 0, CHAN_Z );
1076 STORE( &r[0], 0, CHAN_Z );
1077 }
1078
1079 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1080 FETCH( &r[0], 1, CHAN_W );
1081 STORE( &r[0], 0, CHAN_W );
1082 }
1083 break;
1084
1085 case TGSI_OPCODE_MIN:
1086 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1087 FETCH(&r[0], 0, chan_index);
1088 FETCH(&r[1], 1, chan_index);
1089
1090 r[0].q = micro_min(r[0].q, r[1].q);
1091
1092 STORE(&r[0], 0, chan_index);
1093 }
1094 break;
1095
1096 case TGSI_OPCODE_MAX:
1097 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1098 FETCH(&r[0], 0, chan_index);
1099 FETCH(&r[1], 1, chan_index);
1100
1101 r[0].q = micro_max(r[0].q, r[1].q);
1102
1103 STORE(&r[0], 0, chan_index );
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_SLT:
1108 /* TGSI_OPCODE_SETLT */
1109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1110 FETCH( &r[0], 0, chan_index );
1111 FETCH( &r[1], 1, chan_index );
1112
1113 r[0].q = micro_ge(r[0].q, r[1].q);
1114 r[0].q = si_xori(r[0].q, 0xff);
1115
1116 STORE( &r[0], 0, chan_index );
1117 }
1118 break;
1119
1120 case TGSI_OPCODE_SGE:
1121 /* TGSI_OPCODE_SETGE */
1122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1123 FETCH( &r[0], 0, chan_index );
1124 FETCH( &r[1], 1, chan_index );
1125 r[0].q = micro_ge(r[0].q, r[1].q);
1126 STORE( &r[0], 0, chan_index );
1127 }
1128 break;
1129
1130 case TGSI_OPCODE_MAD:
1131 /* TGSI_OPCODE_MADD */
1132 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1133 FETCH( &r[0], 0, chan_index );
1134 FETCH( &r[1], 1, chan_index );
1135 FETCH( &r[2], 2, chan_index );
1136 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1137 STORE( &r[0], 0, chan_index );
1138 }
1139 break;
1140
1141 case TGSI_OPCODE_SUB:
1142 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1143 FETCH(&r[0], 0, chan_index);
1144 FETCH(&r[1], 1, chan_index);
1145
1146 r[0].q = si_fs(r[0].q, r[1].q);
1147
1148 STORE(&r[0], 0, chan_index);
1149 }
1150 break;
1151
1152 case TGSI_OPCODE_LERP:
1153 /* TGSI_OPCODE_LRP */
1154 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1155 FETCH(&r[0], 0, chan_index);
1156 FETCH(&r[1], 1, chan_index);
1157 FETCH(&r[2], 2, chan_index);
1158
1159 r[1].q = si_fs(r[1].q, r[2].q);
1160 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1161
1162 STORE(&r[0], 0, chan_index);
1163 }
1164 break;
1165
1166 case TGSI_OPCODE_CND:
1167 assert (0);
1168 break;
1169
1170 case TGSI_OPCODE_CND0:
1171 assert (0);
1172 break;
1173
1174 case TGSI_OPCODE_DOT2ADD:
1175 /* TGSI_OPCODE_DP2A */
1176 assert (0);
1177 break;
1178
1179 case TGSI_OPCODE_INDEX:
1180 assert (0);
1181 break;
1182
1183 case TGSI_OPCODE_NEGATE:
1184 assert (0);
1185 break;
1186
1187 case TGSI_OPCODE_FRAC:
1188 /* TGSI_OPCODE_FRC */
1189 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1190 FETCH( &r[0], 0, chan_index );
1191 r[0].q = micro_frc(r[0].q);
1192 STORE( &r[0], 0, chan_index );
1193 }
1194 break;
1195
1196 case TGSI_OPCODE_CLAMP:
1197 assert (0);
1198 break;
1199
1200 case TGSI_OPCODE_FLOOR:
1201 /* TGSI_OPCODE_FLR */
1202 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1203 FETCH( &r[0], 0, chan_index );
1204 r[0].q = micro_flr(r[0].q);
1205 STORE( &r[0], 0, chan_index );
1206 }
1207 break;
1208
1209 case TGSI_OPCODE_ROUND:
1210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1211 FETCH( &r[0], 0, chan_index );
1212 r[0].q = micro_rnd(r[0].q);
1213 STORE( &r[0], 0, chan_index );
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_EXPBASE2:
1218 /* TGSI_OPCODE_EX2 */
1219 FETCH(&r[0], 0, CHAN_X);
1220
1221 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
1222
1223 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1224 STORE( &r[0], 0, chan_index );
1225 }
1226 break;
1227
1228 case TGSI_OPCODE_LOGBASE2:
1229 /* TGSI_OPCODE_LG2 */
1230 FETCH( &r[0], 0, CHAN_X );
1231 r[0].q = micro_lg2(r[0].q);
1232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1233 STORE( &r[0], 0, chan_index );
1234 }
1235 break;
1236
1237 case TGSI_OPCODE_POWER:
1238 /* TGSI_OPCODE_POW */
1239 FETCH(&r[0], 0, CHAN_X);
1240 FETCH(&r[1], 1, CHAN_X);
1241
1242 r[0].q = micro_pow(r[0].q, r[1].q);
1243
1244 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1245 STORE( &r[0], 0, chan_index );
1246 }
1247 break;
1248
1249 case TGSI_OPCODE_CROSSPRODUCT:
1250 /* TGSI_OPCODE_XPD */
1251 FETCH(&r[0], 0, CHAN_Y);
1252 FETCH(&r[1], 1, CHAN_Z);
1253 FETCH(&r[3], 0, CHAN_Z);
1254 FETCH(&r[4], 1, CHAN_Y);
1255
1256 /* r2 = (r0 * r1) - (r3 * r5)
1257 */
1258 r[2].q = si_fm(r[3].q, r[5].q);
1259 r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
1260
1261 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1262 STORE( &r[2], 0, CHAN_X );
1263 }
1264
1265 FETCH(&r[2], 1, CHAN_X);
1266 FETCH(&r[5], 0, CHAN_X);
1267
1268 /* r3 = (r3 * r2) - (r1 * r5)
1269 */
1270 r[1].q = si_fm(r[1].q, r[5].q);
1271 r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
1272
1273 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1274 STORE( &r[3], 0, CHAN_Y );
1275 }
1276
1277 /* r5 = (r5 * r4) - (r0 * r2)
1278 */
1279 r[0].q = si_fm(r[0].q, r[2].q);
1280 r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
1281
1282 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1283 STORE( &r[5], 0, CHAN_Z );
1284 }
1285
1286 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1287 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1288 }
1289 break;
1290
1291 case TGSI_OPCODE_MULTIPLYMATRIX:
1292 assert (0);
1293 break;
1294
1295 case TGSI_OPCODE_ABS:
1296 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1297 FETCH(&r[0], 0, chan_index);
1298
1299 r[0].q = micro_abs(r[0].q);
1300
1301 STORE(&r[0], 0, chan_index);
1302 }
1303 break;
1304
1305 case TGSI_OPCODE_RCC:
1306 assert (0);
1307 break;
1308
1309 case TGSI_OPCODE_DPH:
1310 FETCH(&r[0], 0, CHAN_X);
1311 FETCH(&r[1], 1, CHAN_X);
1312
1313 r[0].q = si_fm(r[0].q, r[1].q);
1314
1315 FETCH(&r[1], 0, CHAN_Y);
1316 FETCH(&r[2], 1, CHAN_Y);
1317
1318 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1319
1320 FETCH(&r[1], 0, CHAN_Z);
1321 FETCH(&r[2], 1, CHAN_Z);
1322
1323 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1324
1325 FETCH(&r[1], 1, CHAN_W);
1326
1327 r[0].q = si_fa(r[0].q, r[1].q);
1328
1329 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1330 STORE( &r[0], 0, chan_index );
1331 }
1332 break;
1333
1334 case TGSI_OPCODE_COS:
1335 FETCH(&r[0], 0, CHAN_X);
1336
1337 r[0].q = micro_cos(r[0].q);
1338
1339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1340 STORE( &r[0], 0, chan_index );
1341 }
1342 break;
1343
1344 case TGSI_OPCODE_DDX:
1345 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1346 FETCH( &r[0], 0, chan_index );
1347 r[0].q = micro_ddx(r[0].q);
1348 STORE( &r[0], 0, chan_index );
1349 }
1350 break;
1351
1352 case TGSI_OPCODE_DDY:
1353 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1354 FETCH( &r[0], 0, chan_index );
1355 r[0].q = micro_ddy(r[0].q);
1356 STORE( &r[0], 0, chan_index );
1357 }
1358 break;
1359
1360 case TGSI_OPCODE_KILP:
1361 exec_kilp (mach, inst);
1362 break;
1363
1364 case TGSI_OPCODE_KIL:
1365 /* for enabled ExecMask bits, set the killed bit */
1366 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1367 break;
1368
1369 case TGSI_OPCODE_PK2H:
1370 assert (0);
1371 break;
1372
1373 case TGSI_OPCODE_PK2US:
1374 assert (0);
1375 break;
1376
1377 case TGSI_OPCODE_PK4B:
1378 assert (0);
1379 break;
1380
1381 case TGSI_OPCODE_PK4UB:
1382 assert (0);
1383 break;
1384
1385 case TGSI_OPCODE_RFL:
1386 assert (0);
1387 break;
1388
1389 case TGSI_OPCODE_SEQ:
1390 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1391 FETCH( &r[0], 0, chan_index );
1392 FETCH( &r[1], 1, chan_index );
1393
1394 r[0].q = si_fceq(r[0].q, r[1].q);
1395
1396 STORE( &r[0], 0, chan_index );
1397 }
1398 break;
1399
1400 case TGSI_OPCODE_SFL:
1401 assert (0);
1402 break;
1403
1404 case TGSI_OPCODE_SGT:
1405 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1406 FETCH( &r[0], 0, chan_index );
1407 FETCH( &r[1], 1, chan_index );
1408 r[0].q = si_fcgt(r[0].q, r[1].q);
1409 STORE( &r[0], 0, chan_index );
1410 }
1411 break;
1412
1413 case TGSI_OPCODE_SIN:
1414 FETCH( &r[0], 0, CHAN_X );
1415 r[0].q = micro_sin(r[0].q);
1416 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1417 STORE( &r[0], 0, chan_index );
1418 }
1419 break;
1420
1421 case TGSI_OPCODE_SLE:
1422 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1423 FETCH( &r[0], 0, chan_index );
1424 FETCH( &r[1], 1, chan_index );
1425
1426 r[0].q = si_fcgt(r[0].q, r[1].q);
1427 r[0].q = si_xori(r[0].q, 0xff);
1428
1429 STORE( &r[0], 0, chan_index );
1430 }
1431 break;
1432
1433 case TGSI_OPCODE_SNE:
1434 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1435 FETCH( &r[0], 0, chan_index );
1436 FETCH( &r[1], 1, chan_index );
1437
1438 r[0].q = si_fceq(r[0].q, r[1].q);
1439 r[0].q = si_xori(r[0].q, 0xff);
1440
1441 STORE( &r[0], 0, chan_index );
1442 }
1443 break;
1444
1445 case TGSI_OPCODE_STR:
1446 assert (0);
1447 break;
1448
1449 case TGSI_OPCODE_TEX:
1450 /* simple texture lookup */
1451 /* src[0] = texcoord */
1452 /* src[1] = sampler unit */
1453 exec_tex(mach, inst, FALSE);
1454 break;
1455
1456 case TGSI_OPCODE_TXB:
1457 /* Texture lookup with lod bias */
1458 /* src[0] = texcoord (src[0].w = load bias) */
1459 /* src[1] = sampler unit */
1460 exec_tex(mach, inst, TRUE);
1461 break;
1462
1463 case TGSI_OPCODE_TXD:
1464 /* Texture lookup with explict partial derivatives */
1465 /* src[0] = texcoord */
1466 /* src[1] = d[strq]/dx */
1467 /* src[2] = d[strq]/dy */
1468 /* src[3] = sampler unit */
1469 assert (0);
1470 break;
1471
1472 case TGSI_OPCODE_TXL:
1473 /* Texture lookup with explit LOD */
1474 /* src[0] = texcoord (src[0].w = load bias) */
1475 /* src[1] = sampler unit */
1476 exec_tex(mach, inst, TRUE);
1477 break;
1478
1479 case TGSI_OPCODE_UP2H:
1480 assert (0);
1481 break;
1482
1483 case TGSI_OPCODE_UP2US:
1484 assert (0);
1485 break;
1486
1487 case TGSI_OPCODE_UP4B:
1488 assert (0);
1489 break;
1490
1491 case TGSI_OPCODE_UP4UB:
1492 assert (0);
1493 break;
1494
1495 case TGSI_OPCODE_X2D:
1496 assert (0);
1497 break;
1498
1499 case TGSI_OPCODE_ARA:
1500 assert (0);
1501 break;
1502
1503 case TGSI_OPCODE_ARR:
1504 assert (0);
1505 break;
1506
1507 case TGSI_OPCODE_BRA:
1508 assert (0);
1509 break;
1510
1511 case TGSI_OPCODE_CAL:
1512 /* skip the call if no execution channels are enabled */
1513 if (mach->ExecMask) {
1514 /* do the call */
1515
1516 /* push the Cond, Loop, Cont stacks */
1517 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1518 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1519 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1520 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1521 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1522 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1523
1524 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
1525 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
1526
1527 /* note that PC was already incremented above */
1528 mach->CallStack[mach->CallStackTop++] = *pc;
1529 *pc = inst->InstructionExtLabel.Label;
1530 }
1531 break;
1532
1533 case TGSI_OPCODE_RET:
1534 mach->FuncMask &= ~mach->ExecMask;
1535 UPDATE_EXEC_MASK(mach);
1536
1537 if (mach->ExecMask == 0x0) {
1538 /* really return now (otherwise, keep executing */
1539
1540 if (mach->CallStackTop == 0) {
1541 /* returning from main() */
1542 *pc = -1;
1543 return;
1544 }
1545 *pc = mach->CallStack[--mach->CallStackTop];
1546
1547 /* pop the Cond, Loop, Cont stacks */
1548 assert(mach->CondStackTop > 0);
1549 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1550 assert(mach->LoopStackTop > 0);
1551 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1552 assert(mach->ContStackTop > 0);
1553 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1554 assert(mach->FuncStackTop > 0);
1555 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
1556
1557 UPDATE_EXEC_MASK(mach);
1558 }
1559 break;
1560
1561 case TGSI_OPCODE_SSG:
1562 assert (0);
1563 break;
1564
1565 case TGSI_OPCODE_CMP:
1566 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1567 FETCH(&r[0], 0, chan_index);
1568 FETCH(&r[1], 1, chan_index);
1569 FETCH(&r[2], 2, chan_index);
1570
1571 /* r0 = (r0 < 0.0) ? r1 : r2
1572 */
1573 r[3].q = si_xor(r[3].q, r[3].q);
1574 r[0].q = micro_lt(r[0].q, r[3].q);
1575 r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
1576
1577 STORE(&r[0], 0, chan_index);
1578 }
1579 break;
1580
1581 case TGSI_OPCODE_SCS:
1582 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1583 FETCH( &r[0], 0, CHAN_X );
1584 }
1585 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1586 r[1].q = micro_cos(r[0].q);
1587 STORE( &r[1], 0, CHAN_X );
1588 }
1589 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1590 r[1].q = micro_sin(r[0].q);
1591 STORE( &r[1], 0, CHAN_Y );
1592 }
1593 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1594 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
1595 }
1596 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1597 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1598 }
1599 break;
1600
1601 case TGSI_OPCODE_NRM:
1602 assert (0);
1603 break;
1604
1605 case TGSI_OPCODE_DIV:
1606 assert( 0 );
1607 break;
1608
1609 case TGSI_OPCODE_DP2:
1610 FETCH( &r[0], 0, CHAN_X );
1611 FETCH( &r[1], 1, CHAN_X );
1612 r[0].q = si_fm(r[0].q, r[1].q);
1613
1614 FETCH( &r[1], 0, CHAN_Y );
1615 FETCH( &r[2], 1, CHAN_Y );
1616 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1617
1618 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1619 STORE( &r[0], 0, chan_index );
1620 }
1621 break;
1622
1623 case TGSI_OPCODE_IF:
1624 /* push CondMask */
1625 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1626 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1627 FETCH( &r[0], 0, CHAN_X );
1628 /* update CondMask */
1629 if( ! r[0].u[0] ) {
1630 mach->CondMask &= ~0x1;
1631 }
1632 if( ! r[0].u[1] ) {
1633 mach->CondMask &= ~0x2;
1634 }
1635 if( ! r[0].u[2] ) {
1636 mach->CondMask &= ~0x4;
1637 }
1638 if( ! r[0].u[3] ) {
1639 mach->CondMask &= ~0x8;
1640 }
1641 UPDATE_EXEC_MASK(mach);
1642 /* Todo: If CondMask==0, jump to ELSE */
1643 break;
1644
1645 case TGSI_OPCODE_ELSE:
1646 /* invert CondMask wrt previous mask */
1647 {
1648 uint prevMask;
1649 assert(mach->CondStackTop > 0);
1650 prevMask = mach->CondStack[mach->CondStackTop - 1];
1651 mach->CondMask = ~mach->CondMask & prevMask;
1652 UPDATE_EXEC_MASK(mach);
1653 /* Todo: If CondMask==0, jump to ENDIF */
1654 }
1655 break;
1656
1657 case TGSI_OPCODE_ENDIF:
1658 /* pop CondMask */
1659 assert(mach->CondStackTop > 0);
1660 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1661 UPDATE_EXEC_MASK(mach);
1662 break;
1663
1664 case TGSI_OPCODE_END:
1665 /* halt execution */
1666 *pc = -1;
1667 break;
1668
1669 case TGSI_OPCODE_REP:
1670 assert (0);
1671 break;
1672
1673 case TGSI_OPCODE_ENDREP:
1674 assert (0);
1675 break;
1676
1677 case TGSI_OPCODE_PUSHA:
1678 assert (0);
1679 break;
1680
1681 case TGSI_OPCODE_POPA:
1682 assert (0);
1683 break;
1684
1685 case TGSI_OPCODE_CEIL:
1686 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1687 FETCH( &r[0], 0, chan_index );
1688 r[0].q = micro_ceil(r[0].q);
1689 STORE( &r[0], 0, chan_index );
1690 }
1691 break;
1692
1693 case TGSI_OPCODE_I2F:
1694 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1695 FETCH( &r[0], 0, chan_index );
1696 r[0].q = si_csflt(r[0].q, 0);
1697 STORE( &r[0], 0, chan_index );
1698 }
1699 break;
1700
1701 case TGSI_OPCODE_NOT:
1702 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1703 FETCH( &r[0], 0, chan_index );
1704 r[0].q = si_xorbi(r[0].q, 0xff);
1705 STORE( &r[0], 0, chan_index );
1706 }
1707 break;
1708
1709 case TGSI_OPCODE_TRUNC:
1710 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1711 FETCH( &r[0], 0, chan_index );
1712 r[0].q = micro_trunc(r[0].q);
1713 STORE( &r[0], 0, chan_index );
1714 }
1715 break;
1716
1717 case TGSI_OPCODE_SHL:
1718 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1719 FETCH( &r[0], 0, chan_index );
1720 FETCH( &r[1], 1, chan_index );
1721
1722 r[0].q = si_shl(r[0].q, r[1].q);
1723
1724 STORE( &r[0], 0, chan_index );
1725 }
1726 break;
1727
1728 case TGSI_OPCODE_SHR:
1729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1730 FETCH( &r[0], 0, chan_index );
1731 FETCH( &r[1], 1, chan_index );
1732 r[0].q = micro_ishr(r[0].q, r[1].q);
1733 STORE( &r[0], 0, chan_index );
1734 }
1735 break;
1736
1737 case TGSI_OPCODE_AND:
1738 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1739 FETCH( &r[0], 0, chan_index );
1740 FETCH( &r[1], 1, chan_index );
1741 r[0].q = si_and(r[0].q, r[1].q);
1742 STORE( &r[0], 0, chan_index );
1743 }
1744 break;
1745
1746 case TGSI_OPCODE_OR:
1747 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1748 FETCH( &r[0], 0, chan_index );
1749 FETCH( &r[1], 1, chan_index );
1750 r[0].q = si_or(r[0].q, r[1].q);
1751 STORE( &r[0], 0, chan_index );
1752 }
1753 break;
1754
1755 case TGSI_OPCODE_MOD:
1756 assert (0);
1757 break;
1758
1759 case TGSI_OPCODE_XOR:
1760 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1761 FETCH( &r[0], 0, chan_index );
1762 FETCH( &r[1], 1, chan_index );
1763 r[0].q = si_xor(r[0].q, r[1].q);
1764 STORE( &r[0], 0, chan_index );
1765 }
1766 break;
1767
1768 case TGSI_OPCODE_SAD:
1769 assert (0);
1770 break;
1771
1772 case TGSI_OPCODE_TXF:
1773 assert (0);
1774 break;
1775
1776 case TGSI_OPCODE_TXQ:
1777 assert (0);
1778 break;
1779
1780 case TGSI_OPCODE_EMIT:
1781 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
1782 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1783 break;
1784
1785 case TGSI_OPCODE_ENDPRIM:
1786 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
1787 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
1788 break;
1789
1790 case TGSI_OPCODE_LOOP:
1791 /* fall-through (for now) */
1792 case TGSI_OPCODE_BGNLOOP2:
1793 /* push LoopMask and ContMasks */
1794 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1795 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1796 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1797 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1798 break;
1799
1800 case TGSI_OPCODE_ENDLOOP:
1801 /* fall-through (for now at least) */
1802 case TGSI_OPCODE_ENDLOOP2:
1803 /* Restore ContMask, but don't pop */
1804 assert(mach->ContStackTop > 0);
1805 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
1806 if (mach->LoopMask) {
1807 /* repeat loop: jump to instruction just past BGNLOOP */
1808 *pc = inst->InstructionExtLabel.Label + 1;
1809 }
1810 else {
1811 /* exit loop: pop LoopMask */
1812 assert(mach->LoopStackTop > 0);
1813 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1814 /* pop ContMask */
1815 assert(mach->ContStackTop > 0);
1816 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1817 }
1818 UPDATE_EXEC_MASK(mach);
1819 break;
1820
1821 case TGSI_OPCODE_BRK:
1822 /* turn off loop channels for each enabled exec channel */
1823 mach->LoopMask &= ~mach->ExecMask;
1824 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1825 UPDATE_EXEC_MASK(mach);
1826 break;
1827
1828 case TGSI_OPCODE_CONT:
1829 /* turn off cont channels for each enabled exec channel */
1830 mach->ContMask &= ~mach->ExecMask;
1831 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1832 UPDATE_EXEC_MASK(mach);
1833 break;
1834
1835 case TGSI_OPCODE_BGNSUB:
1836 /* no-op */
1837 break;
1838
1839 case TGSI_OPCODE_ENDSUB:
1840 /* no-op */
1841 break;
1842
1843 case TGSI_OPCODE_NOISE1:
1844 assert( 0 );
1845 break;
1846
1847 case TGSI_OPCODE_NOISE2:
1848 assert( 0 );
1849 break;
1850
1851 case TGSI_OPCODE_NOISE3:
1852 assert( 0 );
1853 break;
1854
1855 case TGSI_OPCODE_NOISE4:
1856 assert( 0 );
1857 break;
1858
1859 case TGSI_OPCODE_NOP:
1860 break;
1861
1862 default:
1863 assert( 0 );
1864 }
1865 }
1866
1867
1868 /**
1869 * Run TGSI interpreter.
1870 * \return bitmask of "alive" quad components
1871 */
1872 uint
1873 spu_exec_machine_run( struct spu_exec_machine *mach )
1874 {
1875 uint i;
1876 int pc = 0;
1877
1878 mach->CondMask = 0xf;
1879 mach->LoopMask = 0xf;
1880 mach->ContMask = 0xf;
1881 mach->FuncMask = 0xf;
1882 mach->ExecMask = 0xf;
1883
1884 mach->CondStackTop = 0; /* temporarily subvert this assertion */
1885 assert(mach->CondStackTop == 0);
1886 assert(mach->LoopStackTop == 0);
1887 assert(mach->ContStackTop == 0);
1888 assert(mach->CallStackTop == 0);
1889
1890 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
1891 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
1892
1893 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
1894 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
1895 mach->Primitives[0] = 0;
1896 }
1897
1898
1899 /* execute declarations (interpolants) */
1900 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1901 for (i = 0; i < mach->NumDeclarations; i++) {
1902 uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB;
1903 struct tgsi_full_declaration decl;
1904 unsigned long decl_addr = (unsigned long) (mach->Declarations+i);
1905 unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f);
1906
1907 mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
1908 wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
1909
1910 memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl));
1911 exec_declaration( mach, &decl );
1912 }
1913 }
1914
1915 /* execute instructions, until pc is set to -1 */
1916 while (pc != -1) {
1917 uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB;
1918 struct tgsi_full_instruction inst;
1919 unsigned long inst_addr = (unsigned long) (mach->Instructions + pc);
1920 unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f);
1921
1922 assert(pc < mach->NumInstructions);
1923 mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
1924 wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
1925
1926 memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst));
1927 exec_instruction( mach, & inst, &pc );
1928 }
1929
1930 #if 0
1931 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
1932 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1933 /*
1934 * Scale back depth component.
1935 */
1936 for (i = 0; i < 4; i++)
1937 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
1938 }
1939 #endif
1940
1941 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
1942 }
1943
1944