1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
63 * Context needed during code generation.
67 struct cell_context
*cell
;
68 int inputs_reg
; /**< 1st function parameter */
69 int outputs_reg
; /**< 2nd function parameter */
70 int constants_reg
; /**< 3rd function parameter */
71 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
74 int num_imm
; /**< number of immediates */
76 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78 /** Per-instruction temps / intermediate temps */
82 /** Current IF/ELSE/ENDIF nesting level */
84 /** Index of execution mask register */
87 struct spe_function
*f
;
93 * Allocate an intermediate temporary register.
96 get_itemp(struct codegen
*gen
)
98 int t
= spe_allocate_available_register(gen
->f
);
99 assert(gen
->num_itemps
< Elements(gen
->itemps
));
100 gen
->itemps
[gen
->num_itemps
++] = t
;
105 * Free all intermediate temporary registers. To be called after each
106 * instruction has been emitted.
109 free_itemps(struct codegen
*gen
)
112 for (i
= 0; i
< gen
->num_itemps
; i
++) {
113 spe_release_register(gen
->f
, gen
->itemps
[i
]);
120 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
121 * The register is allocated and initialized upon the first call.
124 get_const_one_reg(struct codegen
*gen
)
126 if (gen
->one_reg
<= 0) {
127 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
129 spe_indent(gen
->f
, 4);
130 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
132 /* one = {1.0, 1.0, 1.0, 1.0} */
133 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
135 spe_indent(gen
->f
, -4);
143 * Return index of the pixel execution mask.
144 * The register is allocated an initialized upon the first call.
146 * The pixel execution mask controls which pixels in a quad are
147 * modified, according to surrounding conditionals, loops, etc.
150 get_exec_mask_reg(struct codegen
*gen
)
152 if (gen
->exec_mask_reg
<= 0) {
153 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
155 spe_indent(gen
->f
, 4);
156 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
158 /* exec_mask = {~0, ~0, ~0, ~0} */
159 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
161 spe_indent(gen
->f
, -4);
164 return gen
->exec_mask_reg
;
169 * Return the index of the SPU temporary containing the named TGSI
170 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
171 * just return the corresponding SPE register. If the TGIS register
172 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
173 * and emit an SPE load instruction.
176 get_src_reg(struct codegen
*gen
,
178 const struct tgsi_full_src_register
*src
)
181 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
182 boolean reg_is_itemp
= FALSE
;
185 assert(swizzle
>= TGSI_SWIZZLE_X
);
186 assert(swizzle
<= TGSI_EXTSWIZZLE_ONE
);
188 switch (src
->SrcRegister
.File
) {
189 case TGSI_FILE_TEMPORARY
:
190 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][swizzle
];
192 case TGSI_FILE_INPUT
:
194 if (swizzle
== TGSI_EXTSWIZZLE_ONE
) {
195 /* Load const one float and early out */
196 reg
= get_const_one_reg(gen
);
198 else if (swizzle
== TGSI_EXTSWIZZLE_ZERO
) {
199 /* Load const zero float and early out */
200 reg
= get_itemp(gen
);
201 spe_xor(gen
->f
, reg
, reg
, reg
);
204 /* offset is measured in quadwords, not bytes */
205 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
206 reg
= get_itemp(gen
);
208 /* Load: reg = memory[(machine_reg) + offset] */
209 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
);
213 case TGSI_FILE_IMMEDIATE
:
214 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][swizzle
];
216 case TGSI_FILE_CONSTANT
:
217 /* xxx fall-through for now / fix */
223 * Handle absolute value, negate or set-negative of src register.
225 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
226 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
228 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
230 const int bit31mask_reg
= get_itemp(gen
);
234 /* re-use 'reg' for the result */
238 /* alloc a new reg for the result */
239 result_reg
= get_itemp(gen
);
242 /* mask with bit 31 set, the rest cleared */
243 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
245 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
246 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
248 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
249 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
252 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
253 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
264 * Return the index of an SPE register to use for the given TGSI register.
265 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
266 * corresponding SPE register is returned. If the TGSI register is
267 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
268 * See store_dest_reg() below...
271 get_dst_reg(struct codegen
*gen
,
273 const struct tgsi_full_dst_register
*dest
)
277 switch (dest
->DstRegister
.File
) {
278 case TGSI_FILE_TEMPORARY
:
279 if (gen
->if_nesting
> 0)
280 reg
= get_itemp(gen
);
282 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
284 case TGSI_FILE_OUTPUT
:
285 reg
= get_itemp(gen
);
296 * When a TGSI instruction is writing to an output register, this
297 * function emits the SPE store instruction to store the value_reg.
298 * \param value_reg the SPE register containing the value to store.
299 * This would have been returned by get_dst_reg().
302 store_dest_reg(struct codegen
*gen
,
303 int value_reg
, int channel
,
304 const struct tgsi_full_dst_register
*dest
)
306 switch (dest
->DstRegister
.File
) {
307 case TGSI_FILE_TEMPORARY
:
308 if (gen
->if_nesting
> 0) {
309 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
310 int exec_reg
= get_exec_mask_reg(gen
);
311 /* Mix d with new value according to exec mask:
312 * d[i] = mask_reg[i] ? value_reg : d_reg
314 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
317 /* we're not inside a condition or loop: do nothing special */
320 case TGSI_FILE_OUTPUT
:
322 /* offset is measured in quadwords, not bytes */
323 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
324 if (gen
->if_nesting
> 0) {
325 int exec_reg
= get_exec_mask_reg(gen
);
326 int curval_reg
= get_itemp(gen
);
327 /* First read the current value from memory:
328 * Load: curval = memory[(machine_reg) + offset]
330 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
331 /* Mix curval with newvalue according to exec mask:
332 * d[i] = mask_reg[i] ? value_reg : d_reg
334 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
335 /* Store: memory[(machine_reg) + offset] = curval */
336 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
339 /* Store: memory[(machine_reg) + offset] = reg */
340 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
);
351 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
354 spe_comment(gen
->f
, -4, "MOV:");
355 for (ch
= 0; ch
< 4; ch
++) {
356 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
357 int src_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
358 int dst_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
359 /* XXX we don't always need to actually emit a mov instruction here */
360 spe_move(gen
->f
, dst_reg
, src_reg
);
361 store_dest_reg(gen
, dst_reg
, ch
, &inst
->FullDstRegisters
[0]);
369 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
370 * becomes (up to) four SPU "fa" instructions because we're doing SOA
374 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
377 spe_comment(gen
->f
, -4, "ADD:");
378 /* Loop over Red/Green/Blue/Alpha channels */
379 for (ch
= 0; ch
< 4; ch
++) {
380 /* If the dest R, G, B or A writemask is enabled... */
381 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
382 /* get indexes of the two src, one dest SPE registers */
383 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
384 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
385 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
387 /* Emit actual SPE instruction: d = s1 + s2 */
388 spe_fa(gen
->f
, d_reg
, s1_reg
, s2_reg
);
390 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
391 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
392 /* Free any intermediate temps we allocated */
400 * Emit subtract. See emit_ADD for comments.
403 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
406 spe_comment(gen
->f
, -4, "SUB:");
407 /* Loop over Red/Green/Blue/Alpha channels */
408 for (ch
= 0; ch
< 4; ch
++) {
409 /* If the dest R, G, B or A writemask is enabled... */
410 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
411 /* get indexes of the two src, one dest SPE registers */
412 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
413 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
414 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
416 /* Emit actual SPE instruction: d = s1 - s2 */
417 spe_fs(gen
->f
, d_reg
, s1_reg
, s2_reg
);
419 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
420 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
421 /* Free any intermediate temps we allocated */
429 * Emit multiply add. See emit_ADD for comments.
432 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
435 spe_comment(gen
->f
, -4, "MAD:");
436 for (ch
= 0; ch
< 4; ch
++) {
437 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
438 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
439 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
440 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
441 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
442 /* d = s1 * s2 + s3 */
443 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, s3_reg
);
444 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
453 * Emit linear interpolate. See emit_ADD for comments.
456 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
459 spe_comment(gen
->f
, -4, "LERP:");
460 for (ch
= 0; ch
< 4; ch
++) {
461 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
462 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
463 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
464 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
465 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
466 /* d = s3 + s1(s2 - s3) */
467 spe_fs(gen
->f
, d_reg
, s2_reg
, s3_reg
);
468 spe_fma(gen
->f
, d_reg
, d_reg
, s1_reg
, s3_reg
);
469 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
477 * Emit multiply. See emit_ADD for comments.
480 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
483 spe_comment(gen
->f
, -4, "MUL:");
484 for (ch
= 0; ch
< 4; ch
++) {
485 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
486 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
487 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
488 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
490 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
491 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
499 * Emit reciprocal. See emit_ADD for comments.
502 emit_RCP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
505 spe_comment(gen
->f
, -4, "RCP:");
506 for (ch
= 0; ch
< 4; ch
++) {
507 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
508 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
509 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
511 spe_frest(gen
->f
, d_reg
, s1_reg
);
512 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
513 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
521 * Emit reciprocal sqrt. See emit_ADD for comments.
524 emit_RSQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
527 spe_comment(gen
->f
, -4, "RSQ:");
528 for (ch
= 0; ch
< 4; ch
++) {
529 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
530 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
531 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
533 spe_frsqest(gen
->f
, d_reg
, s1_reg
);
534 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
535 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
543 * Emit absolute value. See emit_ADD for comments.
546 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
549 spe_comment(gen
->f
, -4, "ABS:");
550 for (ch
= 0; ch
< 4; ch
++) {
551 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
552 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
553 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
554 const int bit31mask_reg
= get_itemp(gen
);
556 /* mask with bit 31 set, the rest cleared */
557 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
559 /* d = sign bit cleared in s1 */
560 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
562 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
570 * Emit 3 component dot product. See emit_ADD for comments.
573 emit_DP3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
576 spe_comment(gen
->f
, -4, "DP3:");
578 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
579 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
580 int tmp_reg
= get_itemp(gen
);
582 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
584 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
585 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
586 /* t = y0 * y1 + t */
587 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
589 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
590 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
591 /* t = z0 * z1 + t */
592 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
594 for (ch
= 0; ch
< 4; ch
++) {
595 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
596 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
605 * Emit 4 component dot product. See emit_ADD for comments.
608 emit_DP4(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
611 spe_comment(gen
->f
, -4, "DP4:");
613 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
614 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
615 int tmp_reg
= get_itemp(gen
);
617 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
619 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
620 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
621 /* t = y0 * y1 + t */
622 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
624 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
625 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
626 /* t = z0 * z1 + t */
627 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
629 s1_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[0]);
630 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
631 /* t = w0 * w1 + t */
632 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
634 for (ch
= 0; ch
< 4; ch
++) {
635 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
636 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
645 * Emit homogeneous dot product. See emit_ADD for comments.
648 emit_DPH(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
651 spe_comment(gen
->f
, -4, "DPH:");
653 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
654 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
655 int tmp_reg
= get_itemp(gen
);
658 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
660 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
661 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
662 /* t = y0 * y1 + t */
663 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
665 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
666 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
667 /* t = z0 * z1 + t */
668 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
670 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
672 spe_fa(gen
->f
, tmp_reg
, s2_reg
, tmp_reg
);
674 for (ch
= 0; ch
< 4; ch
++) {
675 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
676 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
685 * Emit cross product. See emit_ADD for comments.
688 emit_XPD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
690 spe_comment(gen
->f
, -4, "XPD:");
692 int s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
693 int s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
694 int tmp_reg
= get_itemp(gen
);
697 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
699 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
700 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
701 /* t = y0 * z1 - t */
702 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
704 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_X
)) {
705 store_dest_reg(gen
, tmp_reg
, CHAN_X
, &inst
->FullDstRegisters
[0]);
708 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
709 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
711 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
713 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
714 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
715 /* t = z0 * x1 - t */
716 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
718 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Y
)) {
719 store_dest_reg(gen
, tmp_reg
, CHAN_Y
, &inst
->FullDstRegisters
[0]);
722 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
723 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
725 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
727 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
728 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
729 /* t = x0 * y1 - t */
730 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
732 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Z
)) {
733 store_dest_reg(gen
, tmp_reg
, CHAN_Z
, &inst
->FullDstRegisters
[0]);
741 * Emit set-if-greater-than.
742 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
743 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
744 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
747 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
751 spe_comment(gen
->f
, -4, "SGT:");
753 for (ch
= 0; ch
< 4; ch
++) {
754 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
755 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
756 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
757 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
760 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
762 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
763 /* d = d & one_reg */
764 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
766 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
775 * Emit set-if_less-then. See emit_SGT for comments.
778 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
782 spe_comment(gen
->f
, -4, "SLT:");
784 for (ch
= 0; ch
< 4; ch
++) {
785 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
786 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
787 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
788 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
791 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
793 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
794 /* d = d & one_reg */
795 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
797 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
806 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
809 emit_SGE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
813 spe_comment(gen
->f
, -4, "SGE:");
815 for (ch
= 0; ch
< 4; ch
++) {
816 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
817 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
818 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
819 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
822 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
824 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
825 /* d = ~d & one_reg */
826 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
828 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
837 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
840 emit_SLE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
844 spe_comment(gen
->f
, -4, "SLE:");
846 for (ch
= 0; ch
< 4; ch
++) {
847 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
848 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
849 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
850 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
853 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
855 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
856 /* d = ~d & one_reg */
857 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
859 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
868 * Emit set-if_equal. See emit_SGT for comments.
871 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
875 spe_comment(gen
->f
, -4, "SEQ:");
877 for (ch
= 0; ch
< 4; ch
++) {
878 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
879 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
880 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
881 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
884 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
886 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
887 /* d = d & one_reg */
888 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
890 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
899 * Emit set-if_not_equal. See emit_SGT for comments.
902 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
906 spe_comment(gen
->f
, -4, "SNE:");
908 for (ch
= 0; ch
< 4; ch
++) {
909 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
910 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
911 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
912 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
915 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
916 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
918 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
919 /* d = d & one_reg */
920 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
922 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
931 * Emit compare. See emit_SGT for comments.
934 emit_CMP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
938 spe_comment(gen
->f
, -4, "CMP:");
940 for (ch
= 0; ch
< 4; ch
++) {
941 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
942 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
943 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
944 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
945 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
946 int zero_reg
= get_itemp(gen
);
948 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
950 /* d = (s1 < 0) ? s2 : s3 */
951 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
952 spe_selb(gen
->f
, d_reg
, s3_reg
, s2_reg
, d_reg
);
954 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
964 * Convert float to signed int
965 * Convert signed int to float
968 emit_TRUNC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
972 spe_comment(gen
->f
, -4, "TRUNC:");
974 for (ch
= 0; ch
< 4; ch
++) {
975 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
976 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
977 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
979 /* Convert float to int */
980 spe_cflts(gen
->f
, d_reg
, s1_reg
, 0);
982 /* Convert int to float */
983 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
985 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
995 * If negative int subtract one
996 * Convert float to signed int
997 * Convert signed int to float
1000 emit_FLR(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1004 spe_comment(gen
->f
, -4, "FLR:");
1006 int zero_reg
= get_itemp(gen
);
1007 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1009 for (ch
= 0; ch
< 4; ch
++) {
1010 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1011 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1012 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1013 int tmp_reg
= get_itemp(gen
);
1015 /* If negative, subtract 1.0 */
1016 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1017 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1018 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1020 /* Convert float to int */
1021 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1023 /* Convert int to float */
1024 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1026 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1036 * Input - FLR(Input)
1039 emit_FRC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1043 spe_comment(gen
->f
, -4, "FLR:");
1045 int zero_reg
= get_itemp(gen
);
1046 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1048 for (ch
= 0; ch
< 4; ch
++) {
1049 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1050 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1051 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1052 int tmp_reg
= get_itemp(gen
);
1054 /* If negative, subtract 1.0 */
1055 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1056 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1057 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1059 /* Convert float to int */
1060 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1062 /* Convert int to float */
1063 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1065 /* d = s1 - FLR(s1) */
1066 spe_fs(gen
->f
, d_reg
, s1_reg
, d_reg
);
1068 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1079 print_functions(struct cell_context
*cell
)
1081 struct cell_spu_function_info
*funcs
= &cell
->spu_functions
;
1083 for (i
= 0; i
< funcs
->num
; i
++) {
1084 printf("SPU func %u: %s at %u\n",
1085 i
, funcs
->names
[i
], funcs
->addrs
[i
]);
1092 * Emit code to call a SPU function.
1093 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1096 emit_function_call(struct codegen
*gen
,
1097 const struct tgsi_full_instruction
*inst
,
1098 char *funcname
, uint num_args
)
1100 const struct cell_spu_function_info
*funcs
= &gen
->cell
->spu_functions
;
1105 /* XXX temporary value */
1106 const int frameSize
= 64; /* stack frame (activation record) size */
1108 assert(num_args
<= 3);
1110 /* lookup function address */
1114 for (i
= 0; i
< funcs
->num
; i
++) {
1115 if (strcmp(funcs
->names
[i
], funcname
) == 0) {
1116 addr
= funcs
->addrs
[i
];
1119 assert(addr
&& "spu function not found");
1122 addr
/= 4; /* discard 2 least significant bits */
1124 snprintf(comment
, sizeof(comment
), "CALL %s:", funcname
);
1125 spe_comment(gen
->f
, -4, comment
);
1127 for (ch
= 0; ch
< 4; ch
++) {
1128 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1129 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1132 for (a
= 0; a
< num_args
; a
++) {
1133 s_regs
[a
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[a
]);
1137 * save registers on stack
1138 * move parameters to registers 3, 4, 5...
1140 * save return value (reg 3)
1141 * restore registers from stack
1144 /* XXX hack: load first function param */
1145 spe_move(gen
->f
, 3, s_regs
[0]);
1147 /* save $lr on stack # stqd $lr,16($sp) */
1148 spe_stqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
1149 /* save stack pointer # stqd $sp,-frameSize($sp) */
1150 spe_stqd(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -frameSize
);
1152 /* XXX save registers to stack here */
1154 /* adjust stack pointer # ai $sp,$sp,-frameSize */
1155 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -frameSize
);
1157 /* branch to function, save return addr */
1158 spe_brasl(gen
->f
, SPE_REG_RA
, addr
);
1160 /* restore stack pointer # ai $sp,$sp,frameSize */
1161 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, frameSize
);
1163 /* XXX restore registers from stack here */
1165 /* restore $lr # lqd $lr,16($sp) */
1166 spe_lqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
1168 /* XXX hack: save function's return value */
1169 spe_move(gen
->f
, d_reg
, 3);
1171 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1181 * Emit max. See emit_SGT for comments.
1184 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1188 spe_comment(gen
->f
, -4, "MAX:");
1190 for (ch
= 0; ch
< 4; ch
++) {
1191 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1192 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1193 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1194 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1196 /* d = (s1 > s2) ? s1 : s2 */
1197 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1198 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
1200 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1209 * Emit max. See emit_SGT for comments.
1212 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1216 spe_comment(gen
->f
, -4, "MIN:");
1218 for (ch
= 0; ch
< 4; ch
++) {
1219 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1220 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1221 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1222 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1224 /* d = (s2 > s1) ? s1 : s2 */
1225 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
1226 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
1228 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1237 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1239 const int channel
= 0;
1240 const int exec_reg
= get_exec_mask_reg(gen
);
1242 spe_comment(gen
->f
, -4, "IF:");
1244 /* update execution mask with the predicate register */
1245 int tmp_reg
= get_itemp(gen
);
1246 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
1248 /* tmp = (s1_reg == 0) */
1249 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
1251 spe_complement(gen
->f
, tmp_reg
, tmp_reg
);
1252 /* exec_mask = exec_mask & tmp */
1253 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
1264 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1266 const int exec_reg
= get_exec_mask_reg(gen
);
1268 spe_comment(gen
->f
, -4, "ELSE:");
1270 /* exec_mask = !exec_mask */
1271 spe_complement(gen
->f
, exec_reg
, exec_reg
);
1278 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1280 const int exec_reg
= get_exec_mask_reg(gen
);
1282 spe_comment(gen
->f
, -4, "ENDIF:");
1284 /* XXX todo: pop execution mask */
1286 spe_load_int(gen
->f
, exec_reg
, ~0x0);
1294 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
1299 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
1301 for (ch
= 0; ch
< 4; ch
++) {
1302 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1303 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1304 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1306 int t1_reg
= get_itemp(gen
);
1307 int t2_reg
= get_itemp(gen
);
1309 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
1311 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
1314 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
1316 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
1329 * Emit END instruction.
1330 * We just return from the shader function at this point.
1332 * Note that there may be more code after this that would be
1333 * called by TGSI_OPCODE_CALL.
1336 emit_END(struct codegen
*gen
)
1338 spe_comment(gen
->f
, -4, "END:");
1339 /* return from function call */
1340 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
1346 * Emit code for the given instruction. Just a big switch stmt.
1349 emit_instruction(struct codegen
*gen
,
1350 const struct tgsi_full_instruction
*inst
)
1352 switch (inst
->Instruction
.Opcode
) {
1353 case TGSI_OPCODE_MOV
:
1354 case TGSI_OPCODE_SWZ
:
1355 return emit_MOV(gen
, inst
);
1356 case TGSI_OPCODE_MUL
:
1357 return emit_MUL(gen
, inst
);
1358 case TGSI_OPCODE_ADD
:
1359 return emit_ADD(gen
, inst
);
1360 case TGSI_OPCODE_SUB
:
1361 return emit_SUB(gen
, inst
);
1362 case TGSI_OPCODE_MAD
:
1363 return emit_MAD(gen
, inst
);
1364 case TGSI_OPCODE_LERP
:
1365 return emit_LERP(gen
, inst
);
1366 case TGSI_OPCODE_DP3
:
1367 return emit_DP3(gen
, inst
);
1368 case TGSI_OPCODE_DP4
:
1369 return emit_DP4(gen
, inst
);
1370 case TGSI_OPCODE_DPH
:
1371 return emit_DPH(gen
, inst
);
1372 case TGSI_OPCODE_XPD
:
1373 return emit_XPD(gen
, inst
);
1374 case TGSI_OPCODE_RCP
:
1375 return emit_RCP(gen
, inst
);
1376 case TGSI_OPCODE_RSQ
:
1377 return emit_RSQ(gen
, inst
);
1378 case TGSI_OPCODE_ABS
:
1379 return emit_ABS(gen
, inst
);
1380 case TGSI_OPCODE_SGT
:
1381 return emit_SGT(gen
, inst
);
1382 case TGSI_OPCODE_SLT
:
1383 return emit_SLT(gen
, inst
);
1384 case TGSI_OPCODE_SGE
:
1385 return emit_SGE(gen
, inst
);
1386 case TGSI_OPCODE_SLE
:
1387 return emit_SLE(gen
, inst
);
1388 case TGSI_OPCODE_SEQ
:
1389 return emit_SEQ(gen
, inst
);
1390 case TGSI_OPCODE_SNE
:
1391 return emit_SNE(gen
, inst
);
1392 case TGSI_OPCODE_CMP
:
1393 return emit_CMP(gen
, inst
);
1394 case TGSI_OPCODE_MAX
:
1395 return emit_MAX(gen
, inst
);
1396 case TGSI_OPCODE_MIN
:
1397 return emit_MIN(gen
, inst
);
1398 case TGSI_OPCODE_TRUNC
:
1399 return emit_TRUNC(gen
, inst
);
1400 case TGSI_OPCODE_FLR
:
1401 return emit_FLR(gen
, inst
);
1402 case TGSI_OPCODE_FRC
:
1403 return emit_FRC(gen
, inst
);
1404 case TGSI_OPCODE_END
:
1405 return emit_END(gen
);
1407 case TGSI_OPCODE_COS
:
1408 return emit_function_call(gen
, inst
, "spu_cos", 1);
1409 case TGSI_OPCODE_SIN
:
1410 return emit_function_call(gen
, inst
, "spu_sin", 1);
1411 case TGSI_OPCODE_POW
:
1412 return emit_function_call(gen
, inst
, "spu_pow", 2);
1414 case TGSI_OPCODE_IF
:
1415 return emit_IF(gen
, inst
);
1416 case TGSI_OPCODE_ELSE
:
1417 return emit_ELSE(gen
, inst
);
1418 case TGSI_OPCODE_ENDIF
:
1419 return emit_ENDIF(gen
, inst
);
1421 case TGSI_OPCODE_DDX
:
1422 return emit_DDX_DDY(gen
, inst
, true);
1423 case TGSI_OPCODE_DDY
:
1424 return emit_DDX_DDY(gen
, inst
, false);
1426 /* XXX lots more cases to do... */
1429 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
1430 inst
->Instruction
.Opcode
);
1440 * Emit code for a TGSI immediate value (vector of four floats).
1441 * This involves register allocation and initialization.
1442 * XXX the initialization should be done by a "prepare" stage, not
1443 * per quad execution!
1446 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
1450 assert(gen
->num_imm
< MAX_TEMPS
);
1452 spe_comment(gen
->f
, -4, "IMMEDIATE:");
1454 for (ch
= 0; ch
< 4; ch
++) {
1455 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
1456 int reg
= spe_allocate_available_register(gen
->f
);
1461 /* update immediate map */
1462 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
1464 /* emit initializer instruction */
1465 spe_load_float(gen
->f
, reg
, val
);
1476 * Emit "code" for a TGSI declaration.
1477 * We only care about TGSI TEMPORARY register declarations at this time.
1478 * For each TGSI TEMPORARY we allocate four SPE registers.
1481 emit_declaration(struct cell_context
*cell
,
1482 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
1486 switch (decl
->Declaration
.File
) {
1487 case TGSI_FILE_TEMPORARY
:
1488 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1489 printf("Declare temp reg %d .. %d\n",
1490 decl
->DeclarationRange
.First
,
1491 decl
->DeclarationRange
.Last
);
1494 for (i
= decl
->DeclarationRange
.First
;
1495 i
<= decl
->DeclarationRange
.Last
;
1497 assert(i
< MAX_TEMPS
);
1498 for (ch
= 0; ch
< 4; ch
++) {
1499 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
1500 if (gen
->temp_regs
[i
][ch
] < 0)
1501 return false; /* out of regs */
1504 /* XXX if we run out of SPE registers, we need to spill
1505 * to SPU memory. someday...
1508 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1509 printf(" SPE regs: %d %d %d %d\n",
1510 gen
->temp_regs
[i
][0],
1511 gen
->temp_regs
[i
][1],
1512 gen
->temp_regs
[i
][2],
1513 gen
->temp_regs
[i
][3]);
1526 * Translate TGSI shader code to SPE instructions. This is done when
1527 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1529 * \param cell the rendering context (in)
1530 * \param tokens the TGSI shader (in)
1531 * \param f the generated function (out)
1534 cell_gen_fragment_program(struct cell_context
*cell
,
1535 const struct tgsi_token
*tokens
,
1536 struct spe_function
*f
)
1538 struct tgsi_parse_context parse
;
1541 memset(&gen
, 0, sizeof(gen
));
1545 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1546 gen
.inputs_reg
= 3; /* pointer to inputs array */
1547 gen
.outputs_reg
= 4; /* pointer to outputs array */
1548 gen
.constants_reg
= 5; /* pointer to constants array */
1550 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
1551 spe_allocate_register(f
, gen
.inputs_reg
);
1552 spe_allocate_register(f
, gen
.outputs_reg
);
1553 spe_allocate_register(f
, gen
.constants_reg
);
1555 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1556 spe_print_code(f
, true);
1558 printf("Begin %s\n", __FUNCTION__
);
1559 tgsi_dump(tokens
, 0);
1562 tgsi_parse_init(&parse
, tokens
);
1564 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
1565 tgsi_parse_token(&parse
);
1567 switch (parse
.FullToken
.Token
.Type
) {
1568 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1569 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
1573 case TGSI_TOKEN_TYPE_DECLARATION
:
1574 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
1578 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1579 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
1590 /* terminate the SPE code */
1591 return emit_END(&gen
);
1594 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1595 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
1596 printf("End %s\n", __FUNCTION__
);
1599 tgsi_parse_free( &parse
);