tgsi: rename fields of tgsi_full_declaration to reduce verbosity
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29
30
31 /**
32 * Generate SPU fragment program/shader code.
33 *
34 * Note that we generate SOA-style code here. So each TGSI instruction
35 * operates on four pixels (and is translated into four SPU instructions,
36 * generally speaking).
37 *
38 * \author Brian Paul
39 */
40
41 #include <math.h>
42 #include "pipe/p_defines.h"
43 #include "pipe/p_state.h"
44 #include "pipe/p_shader_tokens.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "tgsi/tgsi_dump.h"
49 #include "rtasm/rtasm_ppc_spe.h"
50 #include "util/u_memory.h"
51 #include "cell_context.h"
52 #include "cell_gen_fp.h"
53
54
55 #define MAX_TEMPS 16
56 #define MAX_IMMED 8
57
58 #define CHAN_X 0
59 #define CHAN_Y 1
60 #define CHAN_Z 2
61 #define CHAN_W 3
62
63 /**
64 * Context needed during code generation.
65 */
66 struct codegen
67 {
68 struct cell_context *cell;
69 int inputs_reg; /**< 1st function parameter */
70 int outputs_reg; /**< 2nd function parameter */
71 int constants_reg; /**< 3rd function parameter */
72 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
73 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
74
75 int num_imm; /**< number of immediates */
76
77 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78
79 int addr_reg; /**< address register, integer values */
80
81 /** Per-instruction temps / intermediate temps */
82 int num_itemps;
83 int itemps[12];
84
85 /** Current IF/ELSE/ENDIF nesting level */
86 int if_nesting;
87 /** Current BGNLOOP/ENDLOOP nesting level */
88 int loop_nesting;
89 /** Location of start of current loop */
90 int loop_start;
91
92 /** Index of if/conditional mask register */
93 int cond_mask_reg;
94 /** Index of loop mask register */
95 int loop_mask_reg;
96
97 /** Index of master execution mask register */
98 int exec_mask_reg;
99
100 /** KIL mask: indicates which fragments have been killed */
101 int kill_mask_reg;
102
103 int frame_size; /**< Stack frame size, in words */
104
105 struct spe_function *f;
106 boolean error;
107 };
108
109
110 /**
111 * Allocate an intermediate temporary register.
112 */
113 static int
114 get_itemp(struct codegen *gen)
115 {
116 int t = spe_allocate_available_register(gen->f);
117 assert(gen->num_itemps < Elements(gen->itemps));
118 gen->itemps[gen->num_itemps++] = t;
119 return t;
120 }
121
122 /**
123 * Free all intermediate temporary registers. To be called after each
124 * instruction has been emitted.
125 */
126 static void
127 free_itemps(struct codegen *gen)
128 {
129 int i;
130 for (i = 0; i < gen->num_itemps; i++) {
131 spe_release_register(gen->f, gen->itemps[i]);
132 }
133 gen->num_itemps = 0;
134 }
135
136
137 /**
138 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
139 * The register is allocated and initialized upon the first call.
140 */
141 static int
142 get_const_one_reg(struct codegen *gen)
143 {
144 if (gen->one_reg <= 0) {
145 gen->one_reg = spe_allocate_available_register(gen->f);
146
147 spe_indent(gen->f, 4);
148 spe_comment(gen->f, -4, "init constant reg = 1.0:");
149
150 /* one = {1.0, 1.0, 1.0, 1.0} */
151 spe_load_float(gen->f, gen->one_reg, 1.0f);
152
153 spe_indent(gen->f, -4);
154 }
155
156 return gen->one_reg;
157 }
158
159
160 /**
161 * Return index of the address register.
162 * Used for indirect register loads/stores.
163 */
164 static int
165 get_address_reg(struct codegen *gen)
166 {
167 if (gen->addr_reg <= 0) {
168 gen->addr_reg = spe_allocate_available_register(gen->f);
169
170 spe_indent(gen->f, 4);
171 spe_comment(gen->f, -4, "init address reg = 0:");
172
173 /* init addr = {0, 0, 0, 0} */
174 spe_zero(gen->f, gen->addr_reg);
175
176 spe_indent(gen->f, -4);
177 }
178
179 return gen->addr_reg;
180 }
181
182
183 /**
184 * Return index of the master execution mask.
185 * The register is allocated an initialized upon the first call.
186 *
187 * The master execution mask controls which pixels in a quad are
188 * modified, according to surrounding conditionals, loops, etc.
189 */
190 static int
191 get_exec_mask_reg(struct codegen *gen)
192 {
193 if (gen->exec_mask_reg <= 0) {
194 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
195
196 /* XXX this may not be needed */
197 spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
198 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
199 }
200
201 return gen->exec_mask_reg;
202 }
203
204
205 /** Return index of the conditional (if/else) execution mask register */
206 static int
207 get_cond_mask_reg(struct codegen *gen)
208 {
209 if (gen->cond_mask_reg <= 0) {
210 gen->cond_mask_reg = spe_allocate_available_register(gen->f);
211 }
212
213 return gen->cond_mask_reg;
214 }
215
216
217 /** Return index of the loop execution mask register */
218 static int
219 get_loop_mask_reg(struct codegen *gen)
220 {
221 if (gen->loop_mask_reg <= 0) {
222 gen->loop_mask_reg = spe_allocate_available_register(gen->f);
223 }
224
225 return gen->loop_mask_reg;
226 }
227
228
229
230 static boolean
231 is_register_src(struct codegen *gen, int channel,
232 const struct tgsi_full_src_register *src)
233 {
234 int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
235 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
236
237 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
238 return FALSE;
239 }
240 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
241 src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
242 return TRUE;
243 }
244 return FALSE;
245 }
246
247
248 static boolean
249 is_memory_dst(struct codegen *gen, int channel,
250 const struct tgsi_full_dst_register *dst)
251 {
252 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
253 return TRUE;
254 }
255 else {
256 return FALSE;
257 }
258 }
259
260
261 /**
262 * Return the index of the SPU temporary containing the named TGSI
263 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
264 * just return the corresponding SPE register. If the TGIS register
265 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
266 * and emit an SPE load instruction.
267 */
268 static int
269 get_src_reg(struct codegen *gen,
270 int channel,
271 const struct tgsi_full_src_register *src)
272 {
273 int reg = -1;
274 int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
275 boolean reg_is_itemp = FALSE;
276 uint sign_op;
277
278 assert(swizzle >= TGSI_SWIZZLE_X);
279 assert(swizzle <= TGSI_SWIZZLE_W);
280
281 {
282 int index = src->SrcRegister.Index;
283
284 assert(swizzle < 4);
285
286 if (src->SrcRegister.Indirect) {
287 /* XXX unfinished */
288 }
289
290 switch (src->SrcRegister.File) {
291 case TGSI_FILE_TEMPORARY:
292 reg = gen->temp_regs[index][swizzle];
293 break;
294 case TGSI_FILE_INPUT:
295 {
296 /* offset is measured in quadwords, not bytes */
297 int offset = index * 4 + swizzle;
298 reg = get_itemp(gen);
299 reg_is_itemp = TRUE;
300 /* Load: reg = memory[(machine_reg) + offset] */
301 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
302 }
303 break;
304 case TGSI_FILE_IMMEDIATE:
305 reg = gen->imm_regs[index][swizzle];
306 break;
307 case TGSI_FILE_CONSTANT:
308 {
309 /* offset is measured in quadwords, not bytes */
310 int offset = index * 4 + swizzle;
311 reg = get_itemp(gen);
312 reg_is_itemp = TRUE;
313 /* Load: reg = memory[(machine_reg) + offset] */
314 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
315 }
316 break;
317 default:
318 assert(0);
319 }
320 }
321
322 /*
323 * Handle absolute value, negate or set-negative of src register.
324 */
325 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
326 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
327 /*
328 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
329 */
330 const int bit31mask_reg = get_itemp(gen);
331 int result_reg;
332
333 if (reg_is_itemp) {
334 /* re-use 'reg' for the result */
335 result_reg = reg;
336 }
337 else {
338 /* alloc a new reg for the result */
339 result_reg = get_itemp(gen);
340 }
341
342 /* mask with bit 31 set, the rest cleared */
343 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
344
345 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
346 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
347 }
348 else if (sign_op == TGSI_UTIL_SIGN_SET) {
349 spe_and(gen->f, result_reg, reg, bit31mask_reg);
350 }
351 else {
352 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
353 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
354 }
355
356 reg = result_reg;
357 }
358
359 return reg;
360 }
361
362
363 /**
364 * Return the index of an SPE register to use for the given TGSI register.
365 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
366 * corresponding SPE register is returned. If the TGSI register is
367 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
368 * See store_dest_reg() below...
369 */
370 static int
371 get_dst_reg(struct codegen *gen,
372 int channel,
373 const struct tgsi_full_dst_register *dest)
374 {
375 int reg = -1;
376
377 switch (dest->DstRegister.File) {
378 case TGSI_FILE_TEMPORARY:
379 if (gen->if_nesting > 0 || gen->loop_nesting > 0)
380 reg = get_itemp(gen);
381 else
382 reg = gen->temp_regs[dest->DstRegister.Index][channel];
383 break;
384 case TGSI_FILE_OUTPUT:
385 reg = get_itemp(gen);
386 break;
387 default:
388 assert(0);
389 }
390
391 return reg;
392 }
393
394
395 /**
396 * When a TGSI instruction is writing to an output register, this
397 * function emits the SPE store instruction to store the value_reg.
398 * \param value_reg the SPE register containing the value to store.
399 * This would have been returned by get_dst_reg().
400 */
401 static void
402 store_dest_reg(struct codegen *gen,
403 int value_reg, int channel,
404 const struct tgsi_full_dst_register *dest)
405 {
406 /*
407 * XXX need to implement dst reg clamping/saturation
408 */
409 #if 0
410 switch (inst->Instruction.Saturate) {
411 case TGSI_SAT_NONE:
412 break;
413 case TGSI_SAT_ZERO_ONE:
414 break;
415 case TGSI_SAT_MINUS_PLUS_ONE:
416 break;
417 default:
418 assert( 0 );
419 }
420 #endif
421
422 switch (dest->DstRegister.File) {
423 case TGSI_FILE_TEMPORARY:
424 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
425 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
426 int exec_reg = get_exec_mask_reg(gen);
427 /* Mix d with new value according to exec mask:
428 * d[i] = mask_reg[i] ? value_reg : d_reg
429 */
430 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
431 }
432 else {
433 /* we're not inside a condition or loop: do nothing special */
434
435 }
436 break;
437 case TGSI_FILE_OUTPUT:
438 {
439 /* offset is measured in quadwords, not bytes */
440 int offset = dest->DstRegister.Index * 4 + channel;
441 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
442 int exec_reg = get_exec_mask_reg(gen);
443 int curval_reg = get_itemp(gen);
444 /* First read the current value from memory:
445 * Load: curval = memory[(machine_reg) + offset]
446 */
447 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
448 /* Mix curval with newvalue according to exec mask:
449 * d[i] = mask_reg[i] ? value_reg : d_reg
450 */
451 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
452 /* Store: memory[(machine_reg) + offset] = curval */
453 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
454 }
455 else {
456 /* Store: memory[(machine_reg) + offset] = reg */
457 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
458 }
459 }
460 break;
461 default:
462 assert(0);
463 }
464 }
465
466
467
468 static void
469 emit_prologue(struct codegen *gen)
470 {
471 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
472
473 spe_comment(gen->f, 0, "Function prologue:");
474
475 /* save $lr on stack # stqd $lr,16($sp) */
476 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
477
478 if (gen->frame_size >= 512) {
479 /* offset is too large for ai instruction */
480 int offset_reg = spe_allocate_available_register(gen->f);
481 int sp_reg = spe_allocate_available_register(gen->f);
482 /* offset = -framesize */
483 spe_load_int(gen->f, offset_reg, -gen->frame_size);
484 /* sp = $sp */
485 spe_move(gen->f, sp_reg, SPE_REG_SP);
486 /* $sp = $sp + offset_reg */
487 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
488 /* save $sp in stack frame */
489 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
490 /* clean up */
491 spe_release_register(gen->f, offset_reg);
492 spe_release_register(gen->f, sp_reg);
493 }
494 else {
495 /* save stack pointer # stqd $sp,-frameSize($sp) */
496 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
497
498 /* adjust stack pointer # ai $sp,$sp,-frameSize */
499 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
500 }
501 }
502
503
504 static void
505 emit_epilogue(struct codegen *gen)
506 {
507 const int return_reg = 3;
508
509 spe_comment(gen->f, 0, "Function epilogue:");
510
511 spe_comment(gen->f, 0, "return the killed mask");
512 if (gen->kill_mask_reg > 0) {
513 /* shader called KIL, return the "alive" mask */
514 spe_move(gen->f, return_reg, gen->kill_mask_reg);
515 }
516 else {
517 /* return {0,0,0,0} */
518 spe_load_uint(gen->f, return_reg, 0);
519 }
520
521 spe_comment(gen->f, 0, "restore stack and return");
522 if (gen->frame_size >= 512) {
523 /* offset is too large for ai instruction */
524 int offset_reg = spe_allocate_available_register(gen->f);
525 /* offset = framesize */
526 spe_load_int(gen->f, offset_reg, gen->frame_size);
527 /* $sp = $sp + offset */
528 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
529 /* clean up */
530 spe_release_register(gen->f, offset_reg);
531 }
532 else {
533 /* restore stack pointer # ai $sp,$sp,frameSize */
534 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
535 }
536
537 /* restore $lr # lqd $lr,16($sp) */
538 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
539
540 /* return from function call */
541 spe_bi(gen->f, SPE_REG_RA, 0, 0);
542 }
543
544
545 #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
546 for (ch = 0; ch < 4; ch++) \
547 if (inst->Dst[0].DstRegister.WriteMask & (1 << ch))
548
549
550 static boolean
551 emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
552 {
553 int ch = 0, src_reg, addr_reg;
554
555 src_reg = get_src_reg(gen, ch, &inst->Src[0]);
556 addr_reg = get_address_reg(gen);
557
558 /* convert float to int */
559 spe_cflts(gen->f, addr_reg, src_reg, 0);
560
561 free_itemps(gen);
562
563 return TRUE;
564 }
565
566
567 static boolean
568 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
569 {
570 int ch, src_reg[4], dst_reg[4];
571
572 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
573 src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
574 dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
575 }
576
577 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
578 if (is_register_src(gen, ch, &inst->Src[0]) &&
579 is_memory_dst(gen, ch, &inst->Dst[0])) {
580 /* special-case: register to memory store */
581 store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
582 }
583 else {
584 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
585 store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
586 }
587 }
588
589 free_itemps(gen);
590
591 return TRUE;
592 }
593
594 /**
595 * Emit binary operation
596 */
597 static boolean
598 emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
599 {
600 int ch, s1_reg[4], s2_reg[4], d_reg[4];
601
602 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
603 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
604 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
605 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
606 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
607 }
608
609 /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
610 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
611 /* Emit actual SPE instruction: d = s1 + s2 */
612 switch (inst->Instruction.Opcode) {
613 case TGSI_OPCODE_ADD:
614 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
615 break;
616 case TGSI_OPCODE_SUB:
617 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
618 break;
619 case TGSI_OPCODE_MUL:
620 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
621 break;
622 default:
623 ;
624 }
625 }
626
627 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
628 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
629 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
630 }
631
632 /* Free any intermediate temps we allocated */
633 free_itemps(gen);
634
635 return TRUE;
636 }
637
638
639 /**
640 * Emit multiply add. See emit_ADD for comments.
641 */
642 static boolean
643 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
644 {
645 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
646
647 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
648 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
649 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
650 s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
651 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
652 }
653 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
654 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
655 }
656 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
657 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
658 }
659 free_itemps(gen);
660 return TRUE;
661 }
662
663
664 /**
665 * Emit linear interpolate. See emit_ADD for comments.
666 */
667 static boolean
668 emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
669 {
670 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
671
672 /* setup/get src/dst/temp regs */
673 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
674 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
675 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
676 s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
677 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
678 tmp_reg[ch] = get_itemp(gen);
679 }
680
681 /* d = s3 + s1(s2 - s3) */
682 /* do all subtracts, then all fma, then all stores to better pipeline */
683 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
684 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
685 }
686 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
687 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
688 }
689 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
690 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
691 }
692 free_itemps(gen);
693 return TRUE;
694 }
695
696
697
698 /**
699 * Emit reciprocal or recip sqrt.
700 */
701 static boolean
702 emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
703 {
704 int ch, s1_reg[4], d_reg[4], tmp_reg[4];
705
706 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
707 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
708 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
709 tmp_reg[ch] = get_itemp(gen);
710 }
711
712 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
713 if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
714 /* tmp = 1/s1 */
715 spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
716 }
717 else {
718 /* tmp = 1/sqrt(s1) */
719 spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
720 }
721 }
722
723 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
724 /* d = float_interp(s1, tmp) */
725 spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
726 }
727
728 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
729 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
730 }
731
732 free_itemps(gen);
733 return TRUE;
734 }
735
736
737 /**
738 * Emit absolute value. See emit_ADD for comments.
739 */
740 static boolean
741 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
742 {
743 int ch, s1_reg[4], d_reg[4];
744 const int bit31mask_reg = get_itemp(gen);
745
746 /* mask with bit 31 set, the rest cleared */
747 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
748
749 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
750 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
751 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
752 }
753
754 /* d = sign bit cleared in s1 */
755 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
756 spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
757 }
758
759 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
760 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
761 }
762
763 free_itemps(gen);
764 return TRUE;
765 }
766
767 /**
768 * Emit 3 component dot product. See emit_ADD for comments.
769 */
770 static boolean
771 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
772 {
773 int ch;
774 int s1x_reg, s1y_reg, s1z_reg;
775 int s2x_reg, s2y_reg, s2z_reg;
776 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
777
778 s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
779 s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
780 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
781 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
782 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
783 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
784
785 /* t0 = x0 * x1 */
786 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
787
788 /* t1 = y0 * y1 */
789 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
790
791 /* t0 = z0 * z1 + t0 */
792 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
793
794 /* t0 = t0 + t1 */
795 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
796
797 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
798 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
799 spe_move(gen->f, d_reg, t0_reg);
800 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
801 }
802
803 free_itemps(gen);
804 return TRUE;
805 }
806
807 /**
808 * Emit 4 component dot product. See emit_ADD for comments.
809 */
810 static boolean
811 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
812 {
813 int ch;
814 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
815 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
816 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
817
818 s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
819 s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
820 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
821 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
822 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
823 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
824 s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
825 s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
826
827 /* t0 = x0 * x1 */
828 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
829
830 /* t1 = y0 * y1 */
831 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
832
833 /* t0 = z0 * z1 + t0 */
834 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
835
836 /* t1 = w0 * w1 + t1 */
837 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
838
839 /* t0 = t0 + t1 */
840 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
841
842 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
843 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
844 spe_move(gen->f, d_reg, t0_reg);
845 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
846 }
847
848 free_itemps(gen);
849 return TRUE;
850 }
851
852 /**
853 * Emit homogeneous dot product. See emit_ADD for comments.
854 */
855 static boolean
856 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
857 {
858 /* XXX rewrite this function to look more like DP3/DP4 */
859 int ch;
860 int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
861 int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
862 int tmp_reg = get_itemp(gen);
863
864 /* t = x0 * x1 */
865 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
866
867 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
868 s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
869 /* t = y0 * y1 + t */
870 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
871
872 s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
873 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
874 /* t = z0 * z1 + t */
875 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
876
877 s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
878 /* t = w1 + t */
879 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
880
881 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
882 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
883 spe_move(gen->f, d_reg, tmp_reg);
884 store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
885 }
886
887 free_itemps(gen);
888 return TRUE;
889 }
890
891 /**
892 * Emit 3-component vector normalize.
893 */
894 static boolean
895 emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
896 {
897 int ch;
898 int src_reg[3];
899 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
900
901 src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
902 src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
903 src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
904
905 /* t0 = x * x */
906 spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
907
908 /* t1 = y * y */
909 spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
910
911 /* t0 = z * z + t0 */
912 spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
913
914 /* t0 = t0 + t1 */
915 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
916
917 /* t1 = 1.0 / sqrt(t0) */
918 spe_frsqest(gen->f, t1_reg, t0_reg);
919 spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
920
921 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
922 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
923 /* dst = src[ch] * t1 */
924 spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
925 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
926 }
927
928 free_itemps(gen);
929 return TRUE;
930 }
931
932
933 /**
934 * Emit cross product. See emit_ADD for comments.
935 */
936 static boolean
937 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
938 {
939 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
940 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
941 int tmp_reg = get_itemp(gen);
942
943 /* t = z0 * y1 */
944 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
945
946 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
947 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
948 /* t = y0 * z1 - t */
949 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
950
951 if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_X)) {
952 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
953 }
954
955 s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
956 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
957 /* t = x0 * z1 */
958 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
959
960 s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
961 s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
962 /* t = z0 * x1 - t */
963 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
964
965 if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
966 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
967 }
968
969 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
970 s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
971 /* t = y0 * x1 */
972 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
973
974 s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
975 s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
976 /* t = x0 * y1 - t */
977 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
978
979 if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
980 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
981 }
982
983 free_itemps(gen);
984 return TRUE;
985 }
986
987
988 /**
989 * Emit inequality instruction.
990 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
991 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
992 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
993 */
994 static boolean
995 emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
996 {
997 int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
998 bool complement = FALSE;
999
1000 one_reg = get_const_one_reg(gen);
1001
1002 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1003 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1004 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
1005 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1006 }
1007
1008 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1009 switch (inst->Instruction.Opcode) {
1010 case TGSI_OPCODE_SGT:
1011 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1012 break;
1013 case TGSI_OPCODE_SLT:
1014 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1015 break;
1016 case TGSI_OPCODE_SGE:
1017 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1018 complement = TRUE;
1019 break;
1020 case TGSI_OPCODE_SLE:
1021 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1022 complement = TRUE;
1023 break;
1024 case TGSI_OPCODE_SEQ:
1025 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1026 break;
1027 case TGSI_OPCODE_SNE:
1028 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1029 complement = TRUE;
1030 break;
1031 default:
1032 assert(0);
1033 }
1034 }
1035
1036 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1037 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1038 /* d = d & one_reg */
1039 if (complement)
1040 spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1041 else
1042 spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1043 }
1044
1045 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1046 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1047 }
1048
1049 free_itemps(gen);
1050 return TRUE;
1051 }
1052
1053
1054 /**
1055 * Emit compare.
1056 */
1057 static boolean
1058 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1059 {
1060 int ch;
1061
1062 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1063 int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
1064 int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
1065 int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
1066 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1067 int zero_reg = get_itemp(gen);
1068
1069 spe_zero(gen->f, zero_reg);
1070
1071 /* d = (s1 < 0) ? s2 : s3 */
1072 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1073 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1074
1075 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
1076 free_itemps(gen);
1077 }
1078
1079 return TRUE;
1080 }
1081
1082 /**
1083 * Emit trunc.
1084 * Convert float to signed int
1085 * Convert signed int to float
1086 */
1087 static boolean
1088 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1089 {
1090 int ch, s1_reg[4], d_reg[4];
1091
1092 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1093 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1094 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1095 }
1096
1097 /* Convert float to int */
1098 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1099 spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
1100 }
1101
1102 /* Convert int to float */
1103 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1104 spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
1105 }
1106
1107 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1108 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1109 }
1110
1111 free_itemps(gen);
1112 return TRUE;
1113 }
1114
1115
1116 /**
1117 * Emit floor.
1118 * If negative int subtract one
1119 * Convert float to signed int
1120 * Convert signed int to float
1121 */
1122 static boolean
1123 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1124 {
1125 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1126
1127 zero_reg = get_itemp(gen);
1128 spe_zero(gen->f, zero_reg);
1129 one_reg = get_const_one_reg(gen);
1130
1131 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1132 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1133 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1134 tmp_reg[ch] = get_itemp(gen);
1135 }
1136
1137 /* If negative, subtract 1.0 */
1138 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1139 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1140 }
1141 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1142 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1143 }
1144 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1145 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1146 }
1147
1148 /* Convert float to int */
1149 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1150 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1151 }
1152
1153 /* Convert int to float */
1154 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1155 spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
1156 }
1157
1158 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1159 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1160 }
1161
1162 free_itemps(gen);
1163 return TRUE;
1164 }
1165
1166
1167 /**
1168 * Compute frac = Input - FLR(Input)
1169 */
1170 static boolean
1171 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1172 {
1173 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1174
1175 zero_reg = get_itemp(gen);
1176 spe_zero(gen->f, zero_reg);
1177 one_reg = get_const_one_reg(gen);
1178
1179 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1180 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1181 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1182 tmp_reg[ch] = get_itemp(gen);
1183 }
1184
1185 /* If negative, subtract 1.0 */
1186 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1187 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1188 }
1189 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1190 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1191 }
1192 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1193 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1194 }
1195
1196 /* Convert float to int */
1197 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1198 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1199 }
1200
1201 /* Convert int to float */
1202 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1203 spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1204 }
1205
1206 /* d = s1 - FLR(s1) */
1207 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1208 spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
1209 }
1210
1211 /* store result */
1212 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1213 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1214 }
1215
1216 free_itemps(gen);
1217 return TRUE;
1218 }
1219
1220
1221 #if 0
1222 static void
1223 print_functions(struct cell_context *cell)
1224 {
1225 struct cell_spu_function_info *funcs = &cell->spu_functions;
1226 uint i;
1227 for (i = 0; i < funcs->num; i++) {
1228 printf("SPU func %u: %s at %u\n",
1229 i, funcs->names[i], funcs->addrs[i]);
1230 }
1231 }
1232 #endif
1233
1234
1235 static uint
1236 lookup_function(struct cell_context *cell, const char *funcname)
1237 {
1238 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1239 uint i, addr = 0;
1240 for (i = 0; i < funcs->num; i++) {
1241 if (strcmp(funcs->names[i], funcname) == 0) {
1242 addr = funcs->addrs[i];
1243 }
1244 }
1245 assert(addr && "spu function not found");
1246 return addr / 4; /* discard 2 least significant bits */
1247 }
1248
1249
1250 /**
1251 * Emit code to call a SPU function.
1252 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1253 * If scalar, only the X components of the src regs are used, and the
1254 * result is replicated across the dest register's XYZW components.
1255 */
1256 static boolean
1257 emit_function_call(struct codegen *gen,
1258 const struct tgsi_full_instruction *inst,
1259 char *funcname, uint num_args, boolean scalar)
1260 {
1261 const uint addr = lookup_function(gen->cell, funcname);
1262 char comment[100];
1263 int s_regs[3];
1264 int func_called = FALSE;
1265 uint a, ch;
1266 int retval_reg = -1;
1267
1268 assert(num_args <= 3);
1269
1270 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1271 spe_comment(gen->f, -4, comment);
1272
1273 if (scalar) {
1274 for (a = 0; a < num_args; a++) {
1275 s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
1276 }
1277 /* we'll call the function, put the return value in this register,
1278 * then replicate it across all write-enabled components in d_reg.
1279 */
1280 retval_reg = spe_allocate_available_register(gen->f);
1281 }
1282
1283 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1284 int d_reg;
1285 ubyte usedRegs[SPE_NUM_REGS];
1286 uint i, numUsed;
1287
1288 if (!scalar) {
1289 for (a = 0; a < num_args; a++) {
1290 s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
1291 }
1292 }
1293
1294 d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1295
1296 if (!scalar || !func_called) {
1297 /* for a scalar function, we'll really only call the function once */
1298
1299 numUsed = spe_get_registers_used(gen->f, usedRegs);
1300 assert(numUsed < gen->frame_size / 16 - 2);
1301
1302 /* save registers to stack */
1303 for (i = 0; i < numUsed; i++) {
1304 uint reg = usedRegs[i];
1305 int offset = 2 + i;
1306 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1307 }
1308
1309 /* setup function arguments */
1310 for (a = 0; a < num_args; a++) {
1311 spe_move(gen->f, 3 + a, s_regs[a]);
1312 }
1313
1314 /* branch to function, save return addr */
1315 spe_brasl(gen->f, SPE_REG_RA, addr);
1316
1317 /* save function's return value */
1318 if (scalar)
1319 spe_move(gen->f, retval_reg, 3);
1320 else
1321 spe_move(gen->f, d_reg, 3);
1322
1323 /* restore registers from stack */
1324 for (i = 0; i < numUsed; i++) {
1325 uint reg = usedRegs[i];
1326 if (reg != d_reg && reg != retval_reg) {
1327 int offset = 2 + i;
1328 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1329 }
1330 }
1331
1332 func_called = TRUE;
1333 }
1334
1335 if (scalar) {
1336 spe_move(gen->f, d_reg, retval_reg);
1337 }
1338
1339 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
1340 free_itemps(gen);
1341 }
1342
1343 if (scalar) {
1344 spe_release_register(gen->f, retval_reg);
1345 }
1346
1347 return TRUE;
1348 }
1349
1350
1351 static boolean
1352 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1353 {
1354 const uint target = inst->InstructionExtTexture.Texture;
1355 const uint unit = inst->Src[1].SrcRegister.Index;
1356 uint addr;
1357 int ch;
1358 int coord_regs[4], d_regs[4];
1359
1360 switch (target) {
1361 case TGSI_TEXTURE_1D:
1362 case TGSI_TEXTURE_2D:
1363 addr = lookup_function(gen->cell, "spu_tex_2d");
1364 break;
1365 case TGSI_TEXTURE_3D:
1366 addr = lookup_function(gen->cell, "spu_tex_3d");
1367 break;
1368 case TGSI_TEXTURE_CUBE:
1369 addr = lookup_function(gen->cell, "spu_tex_cube");
1370 break;
1371 default:
1372 ASSERT(0 && "unsupported texture target");
1373 return FALSE;
1374 }
1375
1376 assert(inst->Src[1].SrcRegister.File == TGSI_FILE_SAMPLER);
1377
1378 spe_comment(gen->f, -4, "CALL tex:");
1379
1380 /* get src/dst reg info */
1381 for (ch = 0; ch < 4; ch++) {
1382 coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1383 d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1384 }
1385
1386 {
1387 ubyte usedRegs[SPE_NUM_REGS];
1388 uint i, numUsed;
1389
1390 numUsed = spe_get_registers_used(gen->f, usedRegs);
1391 assert(numUsed < gen->frame_size / 16 - 2);
1392
1393 /* save registers to stack */
1394 for (i = 0; i < numUsed; i++) {
1395 uint reg = usedRegs[i];
1396 int offset = 2 + i;
1397 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1398 }
1399
1400 /* setup function arguments (XXX depends on target) */
1401 for (i = 0; i < 4; i++) {
1402 spe_move(gen->f, 3 + i, coord_regs[i]);
1403 }
1404 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1405
1406 /* branch to function, save return addr */
1407 spe_brasl(gen->f, SPE_REG_RA, addr);
1408
1409 /* save function's return values (four pixel's colors) */
1410 for (i = 0; i < 4; i++) {
1411 spe_move(gen->f, d_regs[i], 3 + i);
1412 }
1413
1414 /* restore registers from stack */
1415 for (i = 0; i < numUsed; i++) {
1416 uint reg = usedRegs[i];
1417 if (reg != d_regs[0] &&
1418 reg != d_regs[1] &&
1419 reg != d_regs[2] &&
1420 reg != d_regs[3]) {
1421 int offset = 2 + i;
1422 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1423 }
1424 }
1425 }
1426
1427 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1428 store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
1429 free_itemps(gen);
1430 }
1431
1432 return TRUE;
1433 }
1434
1435
1436 /**
1437 * KILL if any of src reg values are less than zero.
1438 */
1439 static boolean
1440 emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1441 {
1442 int ch;
1443 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1444
1445 spe_comment(gen->f, -4, "CALL kil:");
1446
1447 /* zero = {0,0,0,0} */
1448 zero_reg = get_itemp(gen);
1449 spe_zero(gen->f, zero_reg);
1450
1451 cmp_reg = get_itemp(gen);
1452
1453 /* get src regs */
1454 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1455 s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1456 }
1457
1458 /* test if any src regs are < 0 */
1459 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1460 if (kil_reg >= 0) {
1461 /* cmp = 0 > src ? : ~0 : 0 */
1462 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1463 /* kil = kil | cmp */
1464 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1465 }
1466 else {
1467 kil_reg = get_itemp(gen);
1468 /* kil = 0 > src ? : ~0 : 0 */
1469 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1470 }
1471 }
1472
1473 if (gen->if_nesting || gen->loop_nesting) {
1474 /* may have been a conditional kil */
1475 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1476 }
1477
1478 /* allocate the kill mask reg if needed */
1479 if (gen->kill_mask_reg <= 0) {
1480 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1481 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1482 }
1483 else {
1484 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1485 }
1486
1487 free_itemps(gen);
1488
1489 return TRUE;
1490 }
1491
1492
1493
1494 /**
1495 * Emit min or max.
1496 */
1497 static boolean
1498 emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1499 {
1500 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1501
1502 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1503 s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1504 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
1505 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1506 tmp_reg[ch] = get_itemp(gen);
1507 }
1508
1509 /* d = (s0 > s1) ? s0 : s1 */
1510 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1511 if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
1512 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1513 else
1514 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1515 }
1516 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1517 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1518 }
1519
1520 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1521 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1522 }
1523
1524 free_itemps(gen);
1525 return TRUE;
1526 }
1527
1528
1529 /**
1530 * Emit code to update the execution mask.
1531 * This needs to be done whenever the execution status of a conditional
1532 * or loop is changed.
1533 */
1534 static void
1535 emit_update_exec_mask(struct codegen *gen)
1536 {
1537 const int exec_reg = get_exec_mask_reg(gen);
1538 const int cond_reg = gen->cond_mask_reg;
1539 const int loop_reg = gen->loop_mask_reg;
1540
1541 spe_comment(gen->f, 0, "Update master execution mask");
1542
1543 if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
1544 /* exec_mask = cond_mask & loop_mask */
1545 assert(cond_reg > 0);
1546 assert(loop_reg > 0);
1547 spe_and(gen->f, exec_reg, cond_reg, loop_reg);
1548 }
1549 else if (gen->if_nesting > 0) {
1550 assert(cond_reg > 0);
1551 spe_move(gen->f, exec_reg, cond_reg);
1552 }
1553 else if (gen->loop_nesting > 0) {
1554 assert(loop_reg > 0);
1555 spe_move(gen->f, exec_reg, loop_reg);
1556 }
1557 else {
1558 spe_load_int(gen->f, exec_reg, ~0x0);
1559 }
1560 }
1561
1562
1563 static boolean
1564 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1565 {
1566 const int channel = 0;
1567 int cond_reg;
1568
1569 cond_reg = get_cond_mask_reg(gen);
1570
1571 /* XXX push cond exec mask */
1572
1573 spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
1574 spe_load_int(gen->f, cond_reg, ~0);
1575
1576 /* update conditional execution mask with the predicate register */
1577 int tmp_reg = get_itemp(gen);
1578 int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
1579
1580 /* tmp = (s1_reg == 0) */
1581 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1582 /* tmp = !tmp */
1583 spe_complement(gen->f, tmp_reg, tmp_reg);
1584 /* cond_mask = cond_mask & tmp */
1585 spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
1586
1587 gen->if_nesting++;
1588
1589 /* update the master execution mask */
1590 emit_update_exec_mask(gen);
1591
1592 free_itemps(gen);
1593
1594 return TRUE;
1595 }
1596
1597
1598 static boolean
1599 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1600 {
1601 const int cond_reg = get_cond_mask_reg(gen);
1602
1603 spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
1604 spe_complement(gen->f, cond_reg, cond_reg);
1605 emit_update_exec_mask(gen);
1606
1607 return TRUE;
1608 }
1609
1610
1611 static boolean
1612 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1613 {
1614 /* XXX todo: pop cond exec mask */
1615
1616 gen->if_nesting--;
1617
1618 emit_update_exec_mask(gen);
1619
1620 return TRUE;
1621 }
1622
1623
1624 static boolean
1625 emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1626 {
1627 int exec_reg, loop_reg;
1628
1629 exec_reg = get_exec_mask_reg(gen);
1630 loop_reg = get_loop_mask_reg(gen);
1631
1632 /* XXX push loop_exec mask */
1633
1634 spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
1635 spe_load_int(gen->f, loop_reg, ~0x0);
1636
1637 gen->loop_nesting++;
1638 gen->loop_start = spe_code_size(gen->f); /* in bytes */
1639
1640 return TRUE;
1641 }
1642
1643
1644 static boolean
1645 emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1646 {
1647 const int loop_reg = get_loop_mask_reg(gen);
1648 const int tmp_reg = get_itemp(gen);
1649 int offset;
1650
1651 /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
1652 spe_orx(gen->f, tmp_reg, loop_reg);
1653
1654 offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
1655
1656 /* branch back to top of loop if tmp_reg != 0 */
1657 spe_brnz(gen->f, tmp_reg, offset / 4);
1658
1659 /* XXX pop loop_exec mask */
1660
1661 gen->loop_nesting--;
1662
1663 emit_update_exec_mask(gen);
1664
1665 return TRUE;
1666 }
1667
1668
1669 static boolean
1670 emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
1671 {
1672 const int exec_reg = get_exec_mask_reg(gen);
1673 const int loop_reg = get_loop_mask_reg(gen);
1674
1675 assert(gen->loop_nesting > 0);
1676
1677 spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
1678 spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
1679
1680 emit_update_exec_mask(gen);
1681
1682 return TRUE;
1683 }
1684
1685
1686 static boolean
1687 emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
1688 {
1689 assert(gen->loop_nesting > 0);
1690
1691 return TRUE;
1692 }
1693
1694
1695 static boolean
1696 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1697 boolean ddx)
1698 {
1699 int ch;
1700
1701 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1702 int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
1703 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1704
1705 int t1_reg = get_itemp(gen);
1706 int t2_reg = get_itemp(gen);
1707
1708 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1709 if (ddx) {
1710 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1711 }
1712 else {
1713 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1714 }
1715 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1716
1717 free_itemps(gen);
1718 }
1719
1720 return TRUE;
1721 }
1722
1723
1724
1725
1726 /**
1727 * Emit END instruction.
1728 * We just return from the shader function at this point.
1729 *
1730 * Note that there may be more code after this that would be
1731 * called by TGSI_OPCODE_CALL.
1732 */
1733 static boolean
1734 emit_END(struct codegen *gen)
1735 {
1736 emit_epilogue(gen);
1737 return TRUE;
1738 }
1739
1740
1741 /**
1742 * Emit code for the given instruction. Just a big switch stmt.
1743 */
1744 static boolean
1745 emit_instruction(struct codegen *gen,
1746 const struct tgsi_full_instruction *inst)
1747 {
1748 switch (inst->Instruction.Opcode) {
1749 case TGSI_OPCODE_ARL:
1750 return emit_ARL(gen, inst);
1751 case TGSI_OPCODE_MOV:
1752 return emit_MOV(gen, inst);
1753 case TGSI_OPCODE_ADD:
1754 case TGSI_OPCODE_SUB:
1755 case TGSI_OPCODE_MUL:
1756 return emit_binop(gen, inst);
1757 case TGSI_OPCODE_MAD:
1758 return emit_MAD(gen, inst);
1759 case TGSI_OPCODE_LRP:
1760 return emit_LRP(gen, inst);
1761 case TGSI_OPCODE_DP3:
1762 return emit_DP3(gen, inst);
1763 case TGSI_OPCODE_DP4:
1764 return emit_DP4(gen, inst);
1765 case TGSI_OPCODE_DPH:
1766 return emit_DPH(gen, inst);
1767 case TGSI_OPCODE_NRM:
1768 return emit_NRM3(gen, inst);
1769 case TGSI_OPCODE_XPD:
1770 return emit_XPD(gen, inst);
1771 case TGSI_OPCODE_RCP:
1772 case TGSI_OPCODE_RSQ:
1773 return emit_RCP_RSQ(gen, inst);
1774 case TGSI_OPCODE_ABS:
1775 return emit_ABS(gen, inst);
1776 case TGSI_OPCODE_SGT:
1777 case TGSI_OPCODE_SLT:
1778 case TGSI_OPCODE_SGE:
1779 case TGSI_OPCODE_SLE:
1780 case TGSI_OPCODE_SEQ:
1781 case TGSI_OPCODE_SNE:
1782 return emit_inequality(gen, inst);
1783 case TGSI_OPCODE_CMP:
1784 return emit_CMP(gen, inst);
1785 case TGSI_OPCODE_MIN:
1786 case TGSI_OPCODE_MAX:
1787 return emit_MIN_MAX(gen, inst);
1788 case TGSI_OPCODE_TRUNC:
1789 return emit_TRUNC(gen, inst);
1790 case TGSI_OPCODE_FLR:
1791 return emit_FLR(gen, inst);
1792 case TGSI_OPCODE_FRC:
1793 return emit_FRC(gen, inst);
1794 case TGSI_OPCODE_END:
1795 return emit_END(gen);
1796
1797 case TGSI_OPCODE_COS:
1798 return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
1799 case TGSI_OPCODE_SIN:
1800 return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
1801 case TGSI_OPCODE_POW:
1802 return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
1803 case TGSI_OPCODE_EX2:
1804 return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
1805 case TGSI_OPCODE_LG2:
1806 return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
1807 case TGSI_OPCODE_TEX:
1808 /* fall-through for now */
1809 case TGSI_OPCODE_TXD:
1810 /* fall-through for now */
1811 case TGSI_OPCODE_TXB:
1812 /* fall-through for now */
1813 case TGSI_OPCODE_TXL:
1814 /* fall-through for now */
1815 case TGSI_OPCODE_TXP:
1816 return emit_TEX(gen, inst);
1817 case TGSI_OPCODE_KIL:
1818 return emit_KIL(gen, inst);
1819
1820 case TGSI_OPCODE_IF:
1821 return emit_IF(gen, inst);
1822 case TGSI_OPCODE_ELSE:
1823 return emit_ELSE(gen, inst);
1824 case TGSI_OPCODE_ENDIF:
1825 return emit_ENDIF(gen, inst);
1826
1827 case TGSI_OPCODE_BGNLOOP:
1828 return emit_BGNLOOP(gen, inst);
1829 case TGSI_OPCODE_ENDLOOP:
1830 return emit_ENDLOOP(gen, inst);
1831 case TGSI_OPCODE_BRK:
1832 return emit_BRK(gen, inst);
1833 case TGSI_OPCODE_CONT:
1834 return emit_CONT(gen, inst);
1835
1836 case TGSI_OPCODE_DDX:
1837 return emit_DDX_DDY(gen, inst, TRUE);
1838 case TGSI_OPCODE_DDY:
1839 return emit_DDX_DDY(gen, inst, FALSE);
1840
1841 /* XXX lots more cases to do... */
1842
1843 default:
1844 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1845 inst->Instruction.Opcode);
1846 return FALSE;
1847 }
1848
1849 return TRUE;
1850 }
1851
1852
1853
1854 /**
1855 * Emit code for a TGSI immediate value (vector of four floats).
1856 * This involves register allocation and initialization.
1857 * XXX the initialization should be done by a "prepare" stage, not
1858 * per quad execution!
1859 */
1860 static boolean
1861 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1862 {
1863 int ch;
1864
1865 assert(gen->num_imm < MAX_TEMPS);
1866
1867 for (ch = 0; ch < 4; ch++) {
1868 float val = immed->u[ch].Float;
1869
1870 if (ch > 0 && val == immed->u[ch - 1].Float) {
1871 /* re-use previous register */
1872 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1873 }
1874 else {
1875 char str[100];
1876 int reg = spe_allocate_available_register(gen->f);
1877
1878 if (reg < 0)
1879 return FALSE;
1880
1881 sprintf(str, "init $%d = %f", reg, val);
1882 spe_comment(gen->f, 0, str);
1883
1884 /* update immediate map */
1885 gen->imm_regs[gen->num_imm][ch] = reg;
1886
1887 /* emit initializer instruction */
1888 spe_load_float(gen->f, reg, val);
1889 }
1890 }
1891
1892 gen->num_imm++;
1893
1894 return TRUE;
1895 }
1896
1897
1898
1899 /**
1900 * Emit "code" for a TGSI declaration.
1901 * We only care about TGSI TEMPORARY register declarations at this time.
1902 * For each TGSI TEMPORARY we allocate four SPE registers.
1903 */
1904 static boolean
1905 emit_declaration(struct cell_context *cell,
1906 struct codegen *gen, const struct tgsi_full_declaration *decl)
1907 {
1908 int i, ch;
1909
1910 switch (decl->Declaration.File) {
1911 case TGSI_FILE_TEMPORARY:
1912 for (i = decl->Range.First;
1913 i <= decl->Range.Last;
1914 i++) {
1915 assert(i < MAX_TEMPS);
1916 for (ch = 0; ch < 4; ch++) {
1917 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1918 if (gen->temp_regs[i][ch] < 0)
1919 return FALSE; /* out of regs */
1920 }
1921
1922 /* XXX if we run out of SPE registers, we need to spill
1923 * to SPU memory. someday...
1924 */
1925
1926 {
1927 char buf[100];
1928 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1929 gen->temp_regs[i][0], gen->temp_regs[i][1],
1930 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1931 spe_comment(gen->f, 0, buf);
1932 }
1933 }
1934 break;
1935 default:
1936 ; /* ignore */
1937 }
1938
1939 return TRUE;
1940 }
1941
1942
1943
1944 /**
1945 * Translate TGSI shader code to SPE instructions. This is done when
1946 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1947 *
1948 * \param cell the rendering context (in)
1949 * \param tokens the TGSI shader (in)
1950 * \param f the generated function (out)
1951 */
1952 boolean
1953 cell_gen_fragment_program(struct cell_context *cell,
1954 const struct tgsi_token *tokens,
1955 struct spe_function *f)
1956 {
1957 struct tgsi_parse_context parse;
1958 struct codegen gen;
1959 uint ic = 0;
1960
1961 memset(&gen, 0, sizeof(gen));
1962 gen.cell = cell;
1963 gen.f = f;
1964
1965 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1966 gen.inputs_reg = 3; /* pointer to inputs array */
1967 gen.outputs_reg = 4; /* pointer to outputs array */
1968 gen.constants_reg = 5; /* pointer to constants array */
1969
1970 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1971 spe_allocate_register(f, gen.inputs_reg);
1972 spe_allocate_register(f, gen.outputs_reg);
1973 spe_allocate_register(f, gen.constants_reg);
1974
1975 if (cell->debug_flags & CELL_DEBUG_ASM) {
1976 spe_print_code(f, TRUE);
1977 spe_indent(f, 2*8);
1978 printf("Begin %s\n", __FUNCTION__);
1979 tgsi_dump(tokens, 0);
1980 }
1981
1982 tgsi_parse_init(&parse, tokens);
1983
1984 emit_prologue(&gen);
1985
1986 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1987 tgsi_parse_token(&parse);
1988
1989 switch (parse.FullToken.Token.Type) {
1990 case TGSI_TOKEN_TYPE_IMMEDIATE:
1991 if (f->print) {
1992 _debug_printf(" # ");
1993 tgsi_dump_immediate(&parse.FullToken.FullImmediate);
1994 }
1995 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1996 gen.error = TRUE;
1997 break;
1998
1999 case TGSI_TOKEN_TYPE_DECLARATION:
2000 if (f->print) {
2001 _debug_printf(" # ");
2002 tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
2003 }
2004 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
2005 gen.error = TRUE;
2006 break;
2007
2008 case TGSI_TOKEN_TYPE_INSTRUCTION:
2009 if (f->print) {
2010 _debug_printf(" # ");
2011 ic++;
2012 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
2013 }
2014 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
2015 gen.error = TRUE;
2016 break;
2017
2018 default:
2019 assert(0);
2020 }
2021 }
2022
2023 if (gen.error) {
2024 /* terminate the SPE code */
2025 return emit_END(&gen);
2026 }
2027
2028 if (cell->debug_flags & CELL_DEBUG_ASM) {
2029 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
2030 printf("End %s\n", __FUNCTION__);
2031 }
2032
2033 tgsi_parse_free( &parse );
2034
2035 return !gen.error;
2036 }