gallium: simplify tgsi_full_immediate struct
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29
30
31 /**
32 * Generate SPU fragment program/shader code.
33 *
34 * Note that we generate SOA-style code here. So each TGSI instruction
35 * operates on four pixels (and is translated into four SPU instructions,
36 * generally speaking).
37 *
38 * \author Brian Paul
39 */
40
41 #include <math.h>
42 #include "pipe/p_defines.h"
43 #include "pipe/p_state.h"
44 #include "pipe/p_shader_tokens.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "tgsi/tgsi_dump.h"
49 #include "rtasm/rtasm_ppc_spe.h"
50 #include "util/u_memory.h"
51 #include "cell_context.h"
52 #include "cell_gen_fp.h"
53
54
55 #define MAX_TEMPS 16
56 #define MAX_IMMED 8
57
58 #define CHAN_X 0
59 #define CHAN_Y 1
60 #define CHAN_Z 2
61 #define CHAN_W 3
62
63 /**
64 * Context needed during code generation.
65 */
66 struct codegen
67 {
68 struct cell_context *cell;
69 int inputs_reg; /**< 1st function parameter */
70 int outputs_reg; /**< 2nd function parameter */
71 int constants_reg; /**< 3rd function parameter */
72 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
73 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
74
75 int num_imm; /**< number of immediates */
76
77 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78
79 int addr_reg; /**< address register, integer values */
80
81 /** Per-instruction temps / intermediate temps */
82 int num_itemps;
83 int itemps[12];
84
85 /** Current IF/ELSE/ENDIF nesting level */
86 int if_nesting;
87 /** Current BGNLOOP/ENDLOOP nesting level */
88 int loop_nesting;
89 /** Location of start of current loop */
90 int loop_start;
91
92 /** Index of if/conditional mask register */
93 int cond_mask_reg;
94 /** Index of loop mask register */
95 int loop_mask_reg;
96
97 /** Index of master execution mask register */
98 int exec_mask_reg;
99
100 /** KIL mask: indicates which fragments have been killed */
101 int kill_mask_reg;
102
103 int frame_size; /**< Stack frame size, in words */
104
105 struct spe_function *f;
106 boolean error;
107 };
108
109
110 /**
111 * Allocate an intermediate temporary register.
112 */
113 static int
114 get_itemp(struct codegen *gen)
115 {
116 int t = spe_allocate_available_register(gen->f);
117 assert(gen->num_itemps < Elements(gen->itemps));
118 gen->itemps[gen->num_itemps++] = t;
119 return t;
120 }
121
122 /**
123 * Free all intermediate temporary registers. To be called after each
124 * instruction has been emitted.
125 */
126 static void
127 free_itemps(struct codegen *gen)
128 {
129 int i;
130 for (i = 0; i < gen->num_itemps; i++) {
131 spe_release_register(gen->f, gen->itemps[i]);
132 }
133 gen->num_itemps = 0;
134 }
135
136
137 /**
138 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
139 * The register is allocated and initialized upon the first call.
140 */
141 static int
142 get_const_one_reg(struct codegen *gen)
143 {
144 if (gen->one_reg <= 0) {
145 gen->one_reg = spe_allocate_available_register(gen->f);
146
147 spe_indent(gen->f, 4);
148 spe_comment(gen->f, -4, "init constant reg = 1.0:");
149
150 /* one = {1.0, 1.0, 1.0, 1.0} */
151 spe_load_float(gen->f, gen->one_reg, 1.0f);
152
153 spe_indent(gen->f, -4);
154 }
155
156 return gen->one_reg;
157 }
158
159
160 /**
161 * Return index of the address register.
162 * Used for indirect register loads/stores.
163 */
164 static int
165 get_address_reg(struct codegen *gen)
166 {
167 if (gen->addr_reg <= 0) {
168 gen->addr_reg = spe_allocate_available_register(gen->f);
169
170 spe_indent(gen->f, 4);
171 spe_comment(gen->f, -4, "init address reg = 0:");
172
173 /* init addr = {0, 0, 0, 0} */
174 spe_zero(gen->f, gen->addr_reg);
175
176 spe_indent(gen->f, -4);
177 }
178
179 return gen->addr_reg;
180 }
181
182
183 /**
184 * Return index of the master execution mask.
185 * The register is allocated an initialized upon the first call.
186 *
187 * The master execution mask controls which pixels in a quad are
188 * modified, according to surrounding conditionals, loops, etc.
189 */
190 static int
191 get_exec_mask_reg(struct codegen *gen)
192 {
193 if (gen->exec_mask_reg <= 0) {
194 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
195
196 /* XXX this may not be needed */
197 spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
198 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
199 }
200
201 return gen->exec_mask_reg;
202 }
203
204
205 /** Return index of the conditional (if/else) execution mask register */
206 static int
207 get_cond_mask_reg(struct codegen *gen)
208 {
209 if (gen->cond_mask_reg <= 0) {
210 gen->cond_mask_reg = spe_allocate_available_register(gen->f);
211 }
212
213 return gen->cond_mask_reg;
214 }
215
216
217 /** Return index of the loop execution mask register */
218 static int
219 get_loop_mask_reg(struct codegen *gen)
220 {
221 if (gen->loop_mask_reg <= 0) {
222 gen->loop_mask_reg = spe_allocate_available_register(gen->f);
223 }
224
225 return gen->loop_mask_reg;
226 }
227
228
229
230 static boolean
231 is_register_src(struct codegen *gen, int channel,
232 const struct tgsi_full_src_register *src)
233 {
234 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
235 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
236
237 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
238 return FALSE;
239 }
240 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
241 src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
242 return TRUE;
243 }
244 return FALSE;
245 }
246
247
248 static boolean
249 is_memory_dst(struct codegen *gen, int channel,
250 const struct tgsi_full_dst_register *dst)
251 {
252 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
253 return TRUE;
254 }
255 else {
256 return FALSE;
257 }
258 }
259
260
261 /**
262 * Return the index of the SPU temporary containing the named TGSI
263 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
264 * just return the corresponding SPE register. If the TGIS register
265 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
266 * and emit an SPE load instruction.
267 */
268 static int
269 get_src_reg(struct codegen *gen,
270 int channel,
271 const struct tgsi_full_src_register *src)
272 {
273 int reg = -1;
274 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
275 boolean reg_is_itemp = FALSE;
276 uint sign_op;
277
278 assert(swizzle >= TGSI_SWIZZLE_X);
279 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
280
281 if (swizzle == TGSI_EXTSWIZZLE_ONE) {
282 /* Load const one float and early out */
283 reg = get_const_one_reg(gen);
284 }
285 else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
286 /* Load const zero float and early out */
287 reg = get_itemp(gen);
288 spe_xor(gen->f, reg, reg, reg);
289 }
290 else {
291 int index = src->SrcRegister.Index;
292
293 assert(swizzle < 4);
294
295 if (src->SrcRegister.Indirect) {
296 /* XXX unfinished */
297 }
298
299 switch (src->SrcRegister.File) {
300 case TGSI_FILE_TEMPORARY:
301 reg = gen->temp_regs[index][swizzle];
302 break;
303 case TGSI_FILE_INPUT:
304 {
305 /* offset is measured in quadwords, not bytes */
306 int offset = index * 4 + swizzle;
307 reg = get_itemp(gen);
308 reg_is_itemp = TRUE;
309 /* Load: reg = memory[(machine_reg) + offset] */
310 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
311 }
312 break;
313 case TGSI_FILE_IMMEDIATE:
314 reg = gen->imm_regs[index][swizzle];
315 break;
316 case TGSI_FILE_CONSTANT:
317 {
318 /* offset is measured in quadwords, not bytes */
319 int offset = index * 4 + swizzle;
320 reg = get_itemp(gen);
321 reg_is_itemp = TRUE;
322 /* Load: reg = memory[(machine_reg) + offset] */
323 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
324 }
325 break;
326 default:
327 assert(0);
328 }
329 }
330
331 /*
332 * Handle absolute value, negate or set-negative of src register.
333 */
334 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
335 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
336 /*
337 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
338 */
339 const int bit31mask_reg = get_itemp(gen);
340 int result_reg;
341
342 if (reg_is_itemp) {
343 /* re-use 'reg' for the result */
344 result_reg = reg;
345 }
346 else {
347 /* alloc a new reg for the result */
348 result_reg = get_itemp(gen);
349 }
350
351 /* mask with bit 31 set, the rest cleared */
352 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
353
354 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
355 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
356 }
357 else if (sign_op == TGSI_UTIL_SIGN_SET) {
358 spe_and(gen->f, result_reg, reg, bit31mask_reg);
359 }
360 else {
361 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
362 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
363 }
364
365 reg = result_reg;
366 }
367
368 return reg;
369 }
370
371
372 /**
373 * Return the index of an SPE register to use for the given TGSI register.
374 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
375 * corresponding SPE register is returned. If the TGSI register is
376 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
377 * See store_dest_reg() below...
378 */
379 static int
380 get_dst_reg(struct codegen *gen,
381 int channel,
382 const struct tgsi_full_dst_register *dest)
383 {
384 int reg = -1;
385
386 switch (dest->DstRegister.File) {
387 case TGSI_FILE_TEMPORARY:
388 if (gen->if_nesting > 0 || gen->loop_nesting > 0)
389 reg = get_itemp(gen);
390 else
391 reg = gen->temp_regs[dest->DstRegister.Index][channel];
392 break;
393 case TGSI_FILE_OUTPUT:
394 reg = get_itemp(gen);
395 break;
396 default:
397 assert(0);
398 }
399
400 return reg;
401 }
402
403
404 /**
405 * When a TGSI instruction is writing to an output register, this
406 * function emits the SPE store instruction to store the value_reg.
407 * \param value_reg the SPE register containing the value to store.
408 * This would have been returned by get_dst_reg().
409 */
410 static void
411 store_dest_reg(struct codegen *gen,
412 int value_reg, int channel,
413 const struct tgsi_full_dst_register *dest)
414 {
415 /*
416 * XXX need to implement dst reg clamping/saturation
417 */
418 #if 0
419 switch (inst->Instruction.Saturate) {
420 case TGSI_SAT_NONE:
421 break;
422 case TGSI_SAT_ZERO_ONE:
423 break;
424 case TGSI_SAT_MINUS_PLUS_ONE:
425 break;
426 default:
427 assert( 0 );
428 }
429 #endif
430
431 switch (dest->DstRegister.File) {
432 case TGSI_FILE_TEMPORARY:
433 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
434 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
435 int exec_reg = get_exec_mask_reg(gen);
436 /* Mix d with new value according to exec mask:
437 * d[i] = mask_reg[i] ? value_reg : d_reg
438 */
439 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
440 }
441 else {
442 /* we're not inside a condition or loop: do nothing special */
443
444 }
445 break;
446 case TGSI_FILE_OUTPUT:
447 {
448 /* offset is measured in quadwords, not bytes */
449 int offset = dest->DstRegister.Index * 4 + channel;
450 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
451 int exec_reg = get_exec_mask_reg(gen);
452 int curval_reg = get_itemp(gen);
453 /* First read the current value from memory:
454 * Load: curval = memory[(machine_reg) + offset]
455 */
456 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
457 /* Mix curval with newvalue according to exec mask:
458 * d[i] = mask_reg[i] ? value_reg : d_reg
459 */
460 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
461 /* Store: memory[(machine_reg) + offset] = curval */
462 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
463 }
464 else {
465 /* Store: memory[(machine_reg) + offset] = reg */
466 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
467 }
468 }
469 break;
470 default:
471 assert(0);
472 }
473 }
474
475
476
477 static void
478 emit_prologue(struct codegen *gen)
479 {
480 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
481
482 spe_comment(gen->f, 0, "Function prologue:");
483
484 /* save $lr on stack # stqd $lr,16($sp) */
485 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
486
487 if (gen->frame_size >= 512) {
488 /* offset is too large for ai instruction */
489 int offset_reg = spe_allocate_available_register(gen->f);
490 int sp_reg = spe_allocate_available_register(gen->f);
491 /* offset = -framesize */
492 spe_load_int(gen->f, offset_reg, -gen->frame_size);
493 /* sp = $sp */
494 spe_move(gen->f, sp_reg, SPE_REG_SP);
495 /* $sp = $sp + offset_reg */
496 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
497 /* save $sp in stack frame */
498 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
499 /* clean up */
500 spe_release_register(gen->f, offset_reg);
501 spe_release_register(gen->f, sp_reg);
502 }
503 else {
504 /* save stack pointer # stqd $sp,-frameSize($sp) */
505 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
506
507 /* adjust stack pointer # ai $sp,$sp,-frameSize */
508 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
509 }
510 }
511
512
513 static void
514 emit_epilogue(struct codegen *gen)
515 {
516 const int return_reg = 3;
517
518 spe_comment(gen->f, 0, "Function epilogue:");
519
520 spe_comment(gen->f, 0, "return the killed mask");
521 if (gen->kill_mask_reg > 0) {
522 /* shader called KIL, return the "alive" mask */
523 spe_move(gen->f, return_reg, gen->kill_mask_reg);
524 }
525 else {
526 /* return {0,0,0,0} */
527 spe_load_uint(gen->f, return_reg, 0);
528 }
529
530 spe_comment(gen->f, 0, "restore stack and return");
531 if (gen->frame_size >= 512) {
532 /* offset is too large for ai instruction */
533 int offset_reg = spe_allocate_available_register(gen->f);
534 /* offset = framesize */
535 spe_load_int(gen->f, offset_reg, gen->frame_size);
536 /* $sp = $sp + offset */
537 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
538 /* clean up */
539 spe_release_register(gen->f, offset_reg);
540 }
541 else {
542 /* restore stack pointer # ai $sp,$sp,frameSize */
543 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
544 }
545
546 /* restore $lr # lqd $lr,16($sp) */
547 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
548
549 /* return from function call */
550 spe_bi(gen->f, SPE_REG_RA, 0, 0);
551 }
552
553
554 #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
555 for (ch = 0; ch < 4; ch++) \
556 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
557
558
559 static boolean
560 emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
561 {
562 int ch = 0, src_reg, addr_reg;
563
564 src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
565 addr_reg = get_address_reg(gen);
566
567 /* convert float to int */
568 spe_cflts(gen->f, addr_reg, src_reg, 0);
569
570 free_itemps(gen);
571
572 return TRUE;
573 }
574
575
576 static boolean
577 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
578 {
579 int ch, src_reg[4], dst_reg[4];
580
581 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
582 src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
583 dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
584 }
585
586 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
587 if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
588 is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
589 /* special-case: register to memory store */
590 store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
591 }
592 else {
593 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
594 store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
595 }
596 }
597
598 free_itemps(gen);
599
600 return TRUE;
601 }
602
603 /**
604 * Emit binary operation
605 */
606 static boolean
607 emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
608 {
609 int ch, s1_reg[4], s2_reg[4], d_reg[4];
610
611 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
612 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
613 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
614 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
615 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
616 }
617
618 /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
619 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
620 /* Emit actual SPE instruction: d = s1 + s2 */
621 switch (inst->Instruction.Opcode) {
622 case TGSI_OPCODE_ADD:
623 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
624 break;
625 case TGSI_OPCODE_SUB:
626 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
627 break;
628 case TGSI_OPCODE_MUL:
629 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
630 break;
631 default:
632 ;
633 }
634 }
635
636 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
637 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
638 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
639 }
640
641 /* Free any intermediate temps we allocated */
642 free_itemps(gen);
643
644 return TRUE;
645 }
646
647
648 /**
649 * Emit multiply add. See emit_ADD for comments.
650 */
651 static boolean
652 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
653 {
654 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
655
656 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
657 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
658 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
659 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
660 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
661 }
662 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
663 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
664 }
665 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
666 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
667 }
668 free_itemps(gen);
669 return TRUE;
670 }
671
672
673 /**
674 * Emit linear interpolate. See emit_ADD for comments.
675 */
676 static boolean
677 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
678 {
679 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
680
681 /* setup/get src/dst/temp regs */
682 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
683 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
684 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
685 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
686 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
687 tmp_reg[ch] = get_itemp(gen);
688 }
689
690 /* d = s3 + s1(s2 - s3) */
691 /* do all subtracts, then all fma, then all stores to better pipeline */
692 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
693 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
694 }
695 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
696 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
697 }
698 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
699 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
700 }
701 free_itemps(gen);
702 return TRUE;
703 }
704
705
706
707 /**
708 * Emit reciprocal or recip sqrt.
709 */
710 static boolean
711 emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
712 {
713 int ch, s1_reg[4], d_reg[4], tmp_reg[4];
714
715 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
716 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
717 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
718 tmp_reg[ch] = get_itemp(gen);
719 }
720
721 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
722 if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
723 /* tmp = 1/s1 */
724 spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
725 }
726 else {
727 /* tmp = 1/sqrt(s1) */
728 spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
729 }
730 }
731
732 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
733 /* d = float_interp(s1, tmp) */
734 spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
735 }
736
737 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
738 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
739 }
740
741 free_itemps(gen);
742 return TRUE;
743 }
744
745
746 /**
747 * Emit absolute value. See emit_ADD for comments.
748 */
749 static boolean
750 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
751 {
752 int ch, s1_reg[4], d_reg[4];
753 const int bit31mask_reg = get_itemp(gen);
754
755 /* mask with bit 31 set, the rest cleared */
756 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
757
758 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
759 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
760 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
761 }
762
763 /* d = sign bit cleared in s1 */
764 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
765 spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
766 }
767
768 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
769 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
770 }
771
772 free_itemps(gen);
773 return TRUE;
774 }
775
776 /**
777 * Emit 3 component dot product. See emit_ADD for comments.
778 */
779 static boolean
780 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
781 {
782 int ch;
783 int s1x_reg, s1y_reg, s1z_reg;
784 int s2x_reg, s2y_reg, s2z_reg;
785 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
786
787 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
788 s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
789 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
790 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
791 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
792 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
793
794 /* t0 = x0 * x1 */
795 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
796
797 /* t1 = y0 * y1 */
798 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
799
800 /* t0 = z0 * z1 + t0 */
801 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
802
803 /* t0 = t0 + t1 */
804 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
805
806 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
807 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
808 spe_move(gen->f, d_reg, t0_reg);
809 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
810 }
811
812 free_itemps(gen);
813 return TRUE;
814 }
815
816 /**
817 * Emit 4 component dot product. See emit_ADD for comments.
818 */
819 static boolean
820 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
821 {
822 int ch;
823 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
824 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
825 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
826
827 s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
828 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
829 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
830 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
831 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
832 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
833 s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
834 s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
835
836 /* t0 = x0 * x1 */
837 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
838
839 /* t1 = y0 * y1 */
840 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
841
842 /* t0 = z0 * z1 + t0 */
843 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
844
845 /* t1 = w0 * w1 + t1 */
846 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
847
848 /* t0 = t0 + t1 */
849 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
850
851 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
852 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
853 spe_move(gen->f, d_reg, t0_reg);
854 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
855 }
856
857 free_itemps(gen);
858 return TRUE;
859 }
860
861 /**
862 * Emit homogeneous dot product. See emit_ADD for comments.
863 */
864 static boolean
865 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
866 {
867 /* XXX rewrite this function to look more like DP3/DP4 */
868 int ch;
869 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
870 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
871 int tmp_reg = get_itemp(gen);
872
873 /* t = x0 * x1 */
874 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
875
876 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
877 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
878 /* t = y0 * y1 + t */
879 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
880
881 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
882 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
883 /* t = z0 * z1 + t */
884 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
885
886 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
887 /* t = w1 + t */
888 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
889
890 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
891 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
892 spe_move(gen->f, d_reg, tmp_reg);
893 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
894 }
895
896 free_itemps(gen);
897 return TRUE;
898 }
899
900 /**
901 * Emit 3-component vector normalize.
902 */
903 static boolean
904 emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
905 {
906 int ch;
907 int src_reg[3];
908 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
909
910 src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
911 src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
912 src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
913
914 /* t0 = x * x */
915 spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
916
917 /* t1 = y * y */
918 spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
919
920 /* t0 = z * z + t0 */
921 spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
922
923 /* t0 = t0 + t1 */
924 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
925
926 /* t1 = 1.0 / sqrt(t0) */
927 spe_frsqest(gen->f, t1_reg, t0_reg);
928 spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
929
930 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
931 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
932 /* dst = src[ch] * t1 */
933 spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
934 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
935 }
936
937 free_itemps(gen);
938 return TRUE;
939 }
940
941
942 /**
943 * Emit cross product. See emit_ADD for comments.
944 */
945 static boolean
946 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
947 {
948 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
949 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
950 int tmp_reg = get_itemp(gen);
951
952 /* t = z0 * y1 */
953 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
954
955 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
956 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
957 /* t = y0 * z1 - t */
958 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
959
960 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
961 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
962 }
963
964 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
965 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
966 /* t = x0 * z1 */
967 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
968
969 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
970 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
971 /* t = z0 * x1 - t */
972 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
973
974 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
975 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
976 }
977
978 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
979 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
980 /* t = y0 * x1 */
981 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
982
983 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
984 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
985 /* t = x0 * y1 - t */
986 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
987
988 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
989 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
990 }
991
992 free_itemps(gen);
993 return TRUE;
994 }
995
996
997 /**
998 * Emit inequality instruction.
999 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
1000 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
1001 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
1002 */
1003 static boolean
1004 emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
1005 {
1006 int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
1007 bool complement = FALSE;
1008
1009 one_reg = get_const_one_reg(gen);
1010
1011 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1012 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1013 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1014 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1015 }
1016
1017 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1018 switch (inst->Instruction.Opcode) {
1019 case TGSI_OPCODE_SGT:
1020 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1021 break;
1022 case TGSI_OPCODE_SLT:
1023 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1024 break;
1025 case TGSI_OPCODE_SGE:
1026 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1027 complement = TRUE;
1028 break;
1029 case TGSI_OPCODE_SLE:
1030 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1031 complement = TRUE;
1032 break;
1033 case TGSI_OPCODE_SEQ:
1034 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1035 break;
1036 case TGSI_OPCODE_SNE:
1037 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1038 complement = TRUE;
1039 break;
1040 default:
1041 assert(0);
1042 }
1043 }
1044
1045 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1046 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1047 /* d = d & one_reg */
1048 if (complement)
1049 spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1050 else
1051 spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1052 }
1053
1054 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1055 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1056 }
1057
1058 free_itemps(gen);
1059 return TRUE;
1060 }
1061
1062
1063 /**
1064 * Emit compare.
1065 */
1066 static boolean
1067 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1068 {
1069 int ch;
1070
1071 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1072 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1073 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1074 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
1075 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1076 int zero_reg = get_itemp(gen);
1077
1078 spe_zero(gen->f, zero_reg);
1079
1080 /* d = (s1 < 0) ? s2 : s3 */
1081 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1082 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1083
1084 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1085 free_itemps(gen);
1086 }
1087
1088 return TRUE;
1089 }
1090
1091 /**
1092 * Emit trunc.
1093 * Convert float to signed int
1094 * Convert signed int to float
1095 */
1096 static boolean
1097 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1098 {
1099 int ch, s1_reg[4], d_reg[4];
1100
1101 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1102 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1103 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1104 }
1105
1106 /* Convert float to int */
1107 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1108 spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
1109 }
1110
1111 /* Convert int to float */
1112 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1113 spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
1114 }
1115
1116 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1117 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1118 }
1119
1120 free_itemps(gen);
1121 return TRUE;
1122 }
1123
1124
1125 /**
1126 * Emit floor.
1127 * If negative int subtract one
1128 * Convert float to signed int
1129 * Convert signed int to float
1130 */
1131 static boolean
1132 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1133 {
1134 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1135
1136 zero_reg = get_itemp(gen);
1137 spe_zero(gen->f, zero_reg);
1138 one_reg = get_const_one_reg(gen);
1139
1140 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1141 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1142 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1143 tmp_reg[ch] = get_itemp(gen);
1144 }
1145
1146 /* If negative, subtract 1.0 */
1147 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1148 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1149 }
1150 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1151 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1152 }
1153 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1154 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1155 }
1156
1157 /* Convert float to int */
1158 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1159 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1160 }
1161
1162 /* Convert int to float */
1163 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1164 spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
1165 }
1166
1167 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1168 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1169 }
1170
1171 free_itemps(gen);
1172 return TRUE;
1173 }
1174
1175
1176 /**
1177 * Compute frac = Input - FLR(Input)
1178 */
1179 static boolean
1180 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1181 {
1182 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1183
1184 zero_reg = get_itemp(gen);
1185 spe_zero(gen->f, zero_reg);
1186 one_reg = get_const_one_reg(gen);
1187
1188 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1189 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1190 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1191 tmp_reg[ch] = get_itemp(gen);
1192 }
1193
1194 /* If negative, subtract 1.0 */
1195 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1196 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1197 }
1198 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1199 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1200 }
1201 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1202 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1203 }
1204
1205 /* Convert float to int */
1206 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1207 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1208 }
1209
1210 /* Convert int to float */
1211 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1212 spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1213 }
1214
1215 /* d = s1 - FLR(s1) */
1216 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1217 spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
1218 }
1219
1220 /* store result */
1221 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1222 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1223 }
1224
1225 free_itemps(gen);
1226 return TRUE;
1227 }
1228
1229
1230 #if 0
1231 static void
1232 print_functions(struct cell_context *cell)
1233 {
1234 struct cell_spu_function_info *funcs = &cell->spu_functions;
1235 uint i;
1236 for (i = 0; i < funcs->num; i++) {
1237 printf("SPU func %u: %s at %u\n",
1238 i, funcs->names[i], funcs->addrs[i]);
1239 }
1240 }
1241 #endif
1242
1243
1244 static uint
1245 lookup_function(struct cell_context *cell, const char *funcname)
1246 {
1247 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1248 uint i, addr = 0;
1249 for (i = 0; i < funcs->num; i++) {
1250 if (strcmp(funcs->names[i], funcname) == 0) {
1251 addr = funcs->addrs[i];
1252 }
1253 }
1254 assert(addr && "spu function not found");
1255 return addr / 4; /* discard 2 least significant bits */
1256 }
1257
1258
1259 /**
1260 * Emit code to call a SPU function.
1261 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1262 * If scalar, only the X components of the src regs are used, and the
1263 * result is replicated across the dest register's XYZW components.
1264 */
1265 static boolean
1266 emit_function_call(struct codegen *gen,
1267 const struct tgsi_full_instruction *inst,
1268 char *funcname, uint num_args, boolean scalar)
1269 {
1270 const uint addr = lookup_function(gen->cell, funcname);
1271 char comment[100];
1272 int s_regs[3];
1273 int func_called = FALSE;
1274 uint a, ch;
1275 int retval_reg = -1;
1276
1277 assert(num_args <= 3);
1278
1279 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1280 spe_comment(gen->f, -4, comment);
1281
1282 if (scalar) {
1283 for (a = 0; a < num_args; a++) {
1284 s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
1285 }
1286 /* we'll call the function, put the return value in this register,
1287 * then replicate it across all write-enabled components in d_reg.
1288 */
1289 retval_reg = spe_allocate_available_register(gen->f);
1290 }
1291
1292 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1293 int d_reg;
1294 ubyte usedRegs[SPE_NUM_REGS];
1295 uint i, numUsed;
1296
1297 if (!scalar) {
1298 for (a = 0; a < num_args; a++) {
1299 s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
1300 }
1301 }
1302
1303 d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1304
1305 if (!scalar || !func_called) {
1306 /* for a scalar function, we'll really only call the function once */
1307
1308 numUsed = spe_get_registers_used(gen->f, usedRegs);
1309 assert(numUsed < gen->frame_size / 16 - 2);
1310
1311 /* save registers to stack */
1312 for (i = 0; i < numUsed; i++) {
1313 uint reg = usedRegs[i];
1314 int offset = 2 + i;
1315 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1316 }
1317
1318 /* setup function arguments */
1319 for (a = 0; a < num_args; a++) {
1320 spe_move(gen->f, 3 + a, s_regs[a]);
1321 }
1322
1323 /* branch to function, save return addr */
1324 spe_brasl(gen->f, SPE_REG_RA, addr);
1325
1326 /* save function's return value */
1327 if (scalar)
1328 spe_move(gen->f, retval_reg, 3);
1329 else
1330 spe_move(gen->f, d_reg, 3);
1331
1332 /* restore registers from stack */
1333 for (i = 0; i < numUsed; i++) {
1334 uint reg = usedRegs[i];
1335 if (reg != d_reg && reg != retval_reg) {
1336 int offset = 2 + i;
1337 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1338 }
1339 }
1340
1341 func_called = TRUE;
1342 }
1343
1344 if (scalar) {
1345 spe_move(gen->f, d_reg, retval_reg);
1346 }
1347
1348 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1349 free_itemps(gen);
1350 }
1351
1352 if (scalar) {
1353 spe_release_register(gen->f, retval_reg);
1354 }
1355
1356 return TRUE;
1357 }
1358
1359
1360 static boolean
1361 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1362 {
1363 const uint target = inst->InstructionExtTexture.Texture;
1364 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1365 uint addr;
1366 int ch;
1367 int coord_regs[4], d_regs[4];
1368
1369 switch (target) {
1370 case TGSI_TEXTURE_1D:
1371 case TGSI_TEXTURE_2D:
1372 addr = lookup_function(gen->cell, "spu_tex_2d");
1373 break;
1374 case TGSI_TEXTURE_3D:
1375 addr = lookup_function(gen->cell, "spu_tex_3d");
1376 break;
1377 case TGSI_TEXTURE_CUBE:
1378 addr = lookup_function(gen->cell, "spu_tex_cube");
1379 break;
1380 default:
1381 ASSERT(0 && "unsupported texture target");
1382 return FALSE;
1383 }
1384
1385 assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
1386
1387 spe_comment(gen->f, -4, "CALL tex:");
1388
1389 /* get src/dst reg info */
1390 for (ch = 0; ch < 4; ch++) {
1391 coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1392 d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1393 }
1394
1395 {
1396 ubyte usedRegs[SPE_NUM_REGS];
1397 uint i, numUsed;
1398
1399 numUsed = spe_get_registers_used(gen->f, usedRegs);
1400 assert(numUsed < gen->frame_size / 16 - 2);
1401
1402 /* save registers to stack */
1403 for (i = 0; i < numUsed; i++) {
1404 uint reg = usedRegs[i];
1405 int offset = 2 + i;
1406 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1407 }
1408
1409 /* setup function arguments (XXX depends on target) */
1410 for (i = 0; i < 4; i++) {
1411 spe_move(gen->f, 3 + i, coord_regs[i]);
1412 }
1413 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1414
1415 /* branch to function, save return addr */
1416 spe_brasl(gen->f, SPE_REG_RA, addr);
1417
1418 /* save function's return values (four pixel's colors) */
1419 for (i = 0; i < 4; i++) {
1420 spe_move(gen->f, d_regs[i], 3 + i);
1421 }
1422
1423 /* restore registers from stack */
1424 for (i = 0; i < numUsed; i++) {
1425 uint reg = usedRegs[i];
1426 if (reg != d_regs[0] &&
1427 reg != d_regs[1] &&
1428 reg != d_regs[2] &&
1429 reg != d_regs[3]) {
1430 int offset = 2 + i;
1431 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1432 }
1433 }
1434 }
1435
1436 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1437 store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
1438 free_itemps(gen);
1439 }
1440
1441 return TRUE;
1442 }
1443
1444
1445 /**
1446 * KILL if any of src reg values are less than zero.
1447 */
1448 static boolean
1449 emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1450 {
1451 int ch;
1452 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1453
1454 spe_comment(gen->f, -4, "CALL kil:");
1455
1456 /* zero = {0,0,0,0} */
1457 zero_reg = get_itemp(gen);
1458 spe_zero(gen->f, zero_reg);
1459
1460 cmp_reg = get_itemp(gen);
1461
1462 /* get src regs */
1463 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1464 s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1465 }
1466
1467 /* test if any src regs are < 0 */
1468 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1469 if (kil_reg >= 0) {
1470 /* cmp = 0 > src ? : ~0 : 0 */
1471 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1472 /* kil = kil | cmp */
1473 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1474 }
1475 else {
1476 kil_reg = get_itemp(gen);
1477 /* kil = 0 > src ? : ~0 : 0 */
1478 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1479 }
1480 }
1481
1482 if (gen->if_nesting || gen->loop_nesting) {
1483 /* may have been a conditional kil */
1484 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1485 }
1486
1487 /* allocate the kill mask reg if needed */
1488 if (gen->kill_mask_reg <= 0) {
1489 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1490 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1491 }
1492 else {
1493 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1494 }
1495
1496 free_itemps(gen);
1497
1498 return TRUE;
1499 }
1500
1501
1502
1503 /**
1504 * Emit min or max.
1505 */
1506 static boolean
1507 emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1508 {
1509 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1510
1511 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1512 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1513 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1514 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1515 tmp_reg[ch] = get_itemp(gen);
1516 }
1517
1518 /* d = (s0 > s1) ? s0 : s1 */
1519 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1520 if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
1521 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1522 else
1523 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1524 }
1525 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1526 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1527 }
1528
1529 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1530 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1531 }
1532
1533 free_itemps(gen);
1534 return TRUE;
1535 }
1536
1537
1538 /**
1539 * Emit code to update the execution mask.
1540 * This needs to be done whenever the execution status of a conditional
1541 * or loop is changed.
1542 */
1543 static void
1544 emit_update_exec_mask(struct codegen *gen)
1545 {
1546 const int exec_reg = get_exec_mask_reg(gen);
1547 const int cond_reg = gen->cond_mask_reg;
1548 const int loop_reg = gen->loop_mask_reg;
1549
1550 spe_comment(gen->f, 0, "Update master execution mask");
1551
1552 if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
1553 /* exec_mask = cond_mask & loop_mask */
1554 assert(cond_reg > 0);
1555 assert(loop_reg > 0);
1556 spe_and(gen->f, exec_reg, cond_reg, loop_reg);
1557 }
1558 else if (gen->if_nesting > 0) {
1559 assert(cond_reg > 0);
1560 spe_move(gen->f, exec_reg, cond_reg);
1561 }
1562 else if (gen->loop_nesting > 0) {
1563 assert(loop_reg > 0);
1564 spe_move(gen->f, exec_reg, loop_reg);
1565 }
1566 else {
1567 spe_load_int(gen->f, exec_reg, ~0x0);
1568 }
1569 }
1570
1571
1572 static boolean
1573 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1574 {
1575 const int channel = 0;
1576 int cond_reg;
1577
1578 cond_reg = get_cond_mask_reg(gen);
1579
1580 /* XXX push cond exec mask */
1581
1582 spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
1583 spe_load_int(gen->f, cond_reg, ~0);
1584
1585 /* update conditional execution mask with the predicate register */
1586 int tmp_reg = get_itemp(gen);
1587 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1588
1589 /* tmp = (s1_reg == 0) */
1590 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1591 /* tmp = !tmp */
1592 spe_complement(gen->f, tmp_reg, tmp_reg);
1593 /* cond_mask = cond_mask & tmp */
1594 spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
1595
1596 gen->if_nesting++;
1597
1598 /* update the master execution mask */
1599 emit_update_exec_mask(gen);
1600
1601 free_itemps(gen);
1602
1603 return TRUE;
1604 }
1605
1606
1607 static boolean
1608 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1609 {
1610 const int cond_reg = get_cond_mask_reg(gen);
1611
1612 spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
1613 spe_complement(gen->f, cond_reg, cond_reg);
1614 emit_update_exec_mask(gen);
1615
1616 return TRUE;
1617 }
1618
1619
1620 static boolean
1621 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1622 {
1623 /* XXX todo: pop cond exec mask */
1624
1625 gen->if_nesting--;
1626
1627 emit_update_exec_mask(gen);
1628
1629 return TRUE;
1630 }
1631
1632
1633 static boolean
1634 emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1635 {
1636 int exec_reg, loop_reg;
1637
1638 exec_reg = get_exec_mask_reg(gen);
1639 loop_reg = get_loop_mask_reg(gen);
1640
1641 /* XXX push loop_exec mask */
1642
1643 spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
1644 spe_load_int(gen->f, loop_reg, ~0x0);
1645
1646 gen->loop_nesting++;
1647 gen->loop_start = spe_code_size(gen->f); /* in bytes */
1648
1649 return TRUE;
1650 }
1651
1652
1653 static boolean
1654 emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1655 {
1656 const int loop_reg = get_loop_mask_reg(gen);
1657 const int tmp_reg = get_itemp(gen);
1658 int offset;
1659
1660 /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
1661 spe_orx(gen->f, tmp_reg, loop_reg);
1662
1663 offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
1664
1665 /* branch back to top of loop if tmp_reg != 0 */
1666 spe_brnz(gen->f, tmp_reg, offset / 4);
1667
1668 /* XXX pop loop_exec mask */
1669
1670 gen->loop_nesting--;
1671
1672 emit_update_exec_mask(gen);
1673
1674 return TRUE;
1675 }
1676
1677
1678 static boolean
1679 emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
1680 {
1681 const int exec_reg = get_exec_mask_reg(gen);
1682 const int loop_reg = get_loop_mask_reg(gen);
1683
1684 assert(gen->loop_nesting > 0);
1685
1686 spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
1687 spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
1688
1689 emit_update_exec_mask(gen);
1690
1691 return TRUE;
1692 }
1693
1694
1695 static boolean
1696 emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
1697 {
1698 assert(gen->loop_nesting > 0);
1699
1700 return TRUE;
1701 }
1702
1703
1704 static boolean
1705 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1706 boolean ddx)
1707 {
1708 int ch;
1709
1710 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1711 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1712 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1713
1714 int t1_reg = get_itemp(gen);
1715 int t2_reg = get_itemp(gen);
1716
1717 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1718 if (ddx) {
1719 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1720 }
1721 else {
1722 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1723 }
1724 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1725
1726 free_itemps(gen);
1727 }
1728
1729 return TRUE;
1730 }
1731
1732
1733
1734
1735 /**
1736 * Emit END instruction.
1737 * We just return from the shader function at this point.
1738 *
1739 * Note that there may be more code after this that would be
1740 * called by TGSI_OPCODE_CALL.
1741 */
1742 static boolean
1743 emit_END(struct codegen *gen)
1744 {
1745 emit_epilogue(gen);
1746 return TRUE;
1747 }
1748
1749
1750 /**
1751 * Emit code for the given instruction. Just a big switch stmt.
1752 */
1753 static boolean
1754 emit_instruction(struct codegen *gen,
1755 const struct tgsi_full_instruction *inst)
1756 {
1757 switch (inst->Instruction.Opcode) {
1758 case TGSI_OPCODE_ARL:
1759 return emit_ARL(gen, inst);
1760 case TGSI_OPCODE_MOV:
1761 case TGSI_OPCODE_SWZ:
1762 return emit_MOV(gen, inst);
1763 case TGSI_OPCODE_ADD:
1764 case TGSI_OPCODE_SUB:
1765 case TGSI_OPCODE_MUL:
1766 return emit_binop(gen, inst);
1767 case TGSI_OPCODE_MAD:
1768 return emit_MAD(gen, inst);
1769 case TGSI_OPCODE_LERP:
1770 return emit_LERP(gen, inst);
1771 case TGSI_OPCODE_DP3:
1772 return emit_DP3(gen, inst);
1773 case TGSI_OPCODE_DP4:
1774 return emit_DP4(gen, inst);
1775 case TGSI_OPCODE_DPH:
1776 return emit_DPH(gen, inst);
1777 case TGSI_OPCODE_NRM:
1778 return emit_NRM3(gen, inst);
1779 case TGSI_OPCODE_XPD:
1780 return emit_XPD(gen, inst);
1781 case TGSI_OPCODE_RCP:
1782 case TGSI_OPCODE_RSQ:
1783 return emit_RCP_RSQ(gen, inst);
1784 case TGSI_OPCODE_ABS:
1785 return emit_ABS(gen, inst);
1786 case TGSI_OPCODE_SGT:
1787 case TGSI_OPCODE_SLT:
1788 case TGSI_OPCODE_SGE:
1789 case TGSI_OPCODE_SLE:
1790 case TGSI_OPCODE_SEQ:
1791 case TGSI_OPCODE_SNE:
1792 return emit_inequality(gen, inst);
1793 case TGSI_OPCODE_CMP:
1794 return emit_CMP(gen, inst);
1795 case TGSI_OPCODE_MIN:
1796 case TGSI_OPCODE_MAX:
1797 return emit_MIN_MAX(gen, inst);
1798 case TGSI_OPCODE_TRUNC:
1799 return emit_TRUNC(gen, inst);
1800 case TGSI_OPCODE_FLR:
1801 return emit_FLR(gen, inst);
1802 case TGSI_OPCODE_FRC:
1803 return emit_FRC(gen, inst);
1804 case TGSI_OPCODE_END:
1805 return emit_END(gen);
1806
1807 case TGSI_OPCODE_COS:
1808 return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
1809 case TGSI_OPCODE_SIN:
1810 return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
1811 case TGSI_OPCODE_POW:
1812 return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
1813 case TGSI_OPCODE_EXPBASE2:
1814 return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
1815 case TGSI_OPCODE_LOGBASE2:
1816 return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
1817 case TGSI_OPCODE_TEX:
1818 /* fall-through for now */
1819 case TGSI_OPCODE_TXD:
1820 /* fall-through for now */
1821 case TGSI_OPCODE_TXB:
1822 /* fall-through for now */
1823 case TGSI_OPCODE_TXL:
1824 /* fall-through for now */
1825 case TGSI_OPCODE_TXP:
1826 return emit_TEX(gen, inst);
1827 case TGSI_OPCODE_KIL:
1828 return emit_KIL(gen, inst);
1829
1830 case TGSI_OPCODE_IF:
1831 return emit_IF(gen, inst);
1832 case TGSI_OPCODE_ELSE:
1833 return emit_ELSE(gen, inst);
1834 case TGSI_OPCODE_ENDIF:
1835 return emit_ENDIF(gen, inst);
1836
1837 case TGSI_OPCODE_BGNLOOP2:
1838 return emit_BGNLOOP(gen, inst);
1839 case TGSI_OPCODE_ENDLOOP2:
1840 return emit_ENDLOOP(gen, inst);
1841 case TGSI_OPCODE_BRK:
1842 return emit_BRK(gen, inst);
1843 case TGSI_OPCODE_CONT:
1844 return emit_CONT(gen, inst);
1845
1846 case TGSI_OPCODE_DDX:
1847 return emit_DDX_DDY(gen, inst, TRUE);
1848 case TGSI_OPCODE_DDY:
1849 return emit_DDX_DDY(gen, inst, FALSE);
1850
1851 /* XXX lots more cases to do... */
1852
1853 default:
1854 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1855 inst->Instruction.Opcode);
1856 return FALSE;
1857 }
1858
1859 return TRUE;
1860 }
1861
1862
1863
1864 /**
1865 * Emit code for a TGSI immediate value (vector of four floats).
1866 * This involves register allocation and initialization.
1867 * XXX the initialization should be done by a "prepare" stage, not
1868 * per quad execution!
1869 */
1870 static boolean
1871 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1872 {
1873 int ch;
1874
1875 assert(gen->num_imm < MAX_TEMPS);
1876
1877 for (ch = 0; ch < 4; ch++) {
1878 float val = immed->u[ch].Float;
1879
1880 if (ch > 0 && val == immed->u[ch - 1].Float) {
1881 /* re-use previous register */
1882 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1883 }
1884 else {
1885 char str[100];
1886 int reg = spe_allocate_available_register(gen->f);
1887
1888 if (reg < 0)
1889 return FALSE;
1890
1891 sprintf(str, "init $%d = %f", reg, val);
1892 spe_comment(gen->f, 0, str);
1893
1894 /* update immediate map */
1895 gen->imm_regs[gen->num_imm][ch] = reg;
1896
1897 /* emit initializer instruction */
1898 spe_load_float(gen->f, reg, val);
1899 }
1900 }
1901
1902 gen->num_imm++;
1903
1904 return TRUE;
1905 }
1906
1907
1908
1909 /**
1910 * Emit "code" for a TGSI declaration.
1911 * We only care about TGSI TEMPORARY register declarations at this time.
1912 * For each TGSI TEMPORARY we allocate four SPE registers.
1913 */
1914 static boolean
1915 emit_declaration(struct cell_context *cell,
1916 struct codegen *gen, const struct tgsi_full_declaration *decl)
1917 {
1918 int i, ch;
1919
1920 switch (decl->Declaration.File) {
1921 case TGSI_FILE_TEMPORARY:
1922 for (i = decl->DeclarationRange.First;
1923 i <= decl->DeclarationRange.Last;
1924 i++) {
1925 assert(i < MAX_TEMPS);
1926 for (ch = 0; ch < 4; ch++) {
1927 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1928 if (gen->temp_regs[i][ch] < 0)
1929 return FALSE; /* out of regs */
1930 }
1931
1932 /* XXX if we run out of SPE registers, we need to spill
1933 * to SPU memory. someday...
1934 */
1935
1936 {
1937 char buf[100];
1938 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1939 gen->temp_regs[i][0], gen->temp_regs[i][1],
1940 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1941 spe_comment(gen->f, 0, buf);
1942 }
1943 }
1944 break;
1945 default:
1946 ; /* ignore */
1947 }
1948
1949 return TRUE;
1950 }
1951
1952
1953
1954 /**
1955 * Translate TGSI shader code to SPE instructions. This is done when
1956 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1957 *
1958 * \param cell the rendering context (in)
1959 * \param tokens the TGSI shader (in)
1960 * \param f the generated function (out)
1961 */
1962 boolean
1963 cell_gen_fragment_program(struct cell_context *cell,
1964 const struct tgsi_token *tokens,
1965 struct spe_function *f)
1966 {
1967 struct tgsi_parse_context parse;
1968 struct codegen gen;
1969 uint ic = 0;
1970
1971 memset(&gen, 0, sizeof(gen));
1972 gen.cell = cell;
1973 gen.f = f;
1974
1975 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1976 gen.inputs_reg = 3; /* pointer to inputs array */
1977 gen.outputs_reg = 4; /* pointer to outputs array */
1978 gen.constants_reg = 5; /* pointer to constants array */
1979
1980 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1981 spe_allocate_register(f, gen.inputs_reg);
1982 spe_allocate_register(f, gen.outputs_reg);
1983 spe_allocate_register(f, gen.constants_reg);
1984
1985 if (cell->debug_flags & CELL_DEBUG_ASM) {
1986 spe_print_code(f, TRUE);
1987 spe_indent(f, 2*8);
1988 printf("Begin %s\n", __FUNCTION__);
1989 tgsi_dump(tokens, 0);
1990 }
1991
1992 tgsi_parse_init(&parse, tokens);
1993
1994 emit_prologue(&gen);
1995
1996 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1997 tgsi_parse_token(&parse);
1998
1999 switch (parse.FullToken.Token.Type) {
2000 case TGSI_TOKEN_TYPE_IMMEDIATE:
2001 if (f->print) {
2002 _debug_printf(" # ");
2003 tgsi_dump_immediate(&parse.FullToken.FullImmediate);
2004 }
2005 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
2006 gen.error = TRUE;
2007 break;
2008
2009 case TGSI_TOKEN_TYPE_DECLARATION:
2010 if (f->print) {
2011 _debug_printf(" # ");
2012 tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
2013 }
2014 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
2015 gen.error = TRUE;
2016 break;
2017
2018 case TGSI_TOKEN_TYPE_INSTRUCTION:
2019 if (f->print) {
2020 _debug_printf(" # ");
2021 ic++;
2022 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
2023 }
2024 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
2025 gen.error = TRUE;
2026 break;
2027
2028 default:
2029 assert(0);
2030 }
2031 }
2032
2033 if (gen.error) {
2034 /* terminate the SPE code */
2035 return emit_END(&gen);
2036 }
2037
2038 if (cell->debug_flags & CELL_DEBUG_ASM) {
2039 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
2040 printf("End %s\n", __FUNCTION__);
2041 }
2042
2043 tgsi_parse_free( &parse );
2044
2045 return !gen.error;
2046 }