1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
63 * Context needed during code generation.
67 struct cell_context
*cell
;
68 int inputs_reg
; /**< 1st function parameter */
69 int outputs_reg
; /**< 2nd function parameter */
70 int constants_reg
; /**< 3rd function parameter */
71 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
74 int num_imm
; /**< number of immediates */
76 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78 /** Per-instruction temps / intermediate temps */
82 /** Current IF/ELSE/ENDIF nesting level */
84 /** Index of execution mask register */
87 int frame_size
; /**< Stack frame size, in words */
89 struct spe_function
*f
;
95 * Allocate an intermediate temporary register.
98 get_itemp(struct codegen
*gen
)
100 int t
= spe_allocate_available_register(gen
->f
);
101 assert(gen
->num_itemps
< Elements(gen
->itemps
));
102 gen
->itemps
[gen
->num_itemps
++] = t
;
107 * Free all intermediate temporary registers. To be called after each
108 * instruction has been emitted.
111 free_itemps(struct codegen
*gen
)
114 for (i
= 0; i
< gen
->num_itemps
; i
++) {
115 spe_release_register(gen
->f
, gen
->itemps
[i
]);
122 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
123 * The register is allocated and initialized upon the first call.
126 get_const_one_reg(struct codegen
*gen
)
128 if (gen
->one_reg
<= 0) {
129 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
131 spe_indent(gen
->f
, 4);
132 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
134 /* one = {1.0, 1.0, 1.0, 1.0} */
135 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
137 spe_indent(gen
->f
, -4);
145 * Return index of the pixel execution mask.
146 * The register is allocated an initialized upon the first call.
148 * The pixel execution mask controls which pixels in a quad are
149 * modified, according to surrounding conditionals, loops, etc.
152 get_exec_mask_reg(struct codegen
*gen
)
154 if (gen
->exec_mask_reg
<= 0) {
155 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
157 spe_indent(gen
->f
, 4);
158 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
160 /* exec_mask = {~0, ~0, ~0, ~0} */
161 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
163 spe_indent(gen
->f
, -4);
166 return gen
->exec_mask_reg
;
171 * Return the index of the SPU temporary containing the named TGSI
172 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
173 * just return the corresponding SPE register. If the TGIS register
174 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
175 * and emit an SPE load instruction.
178 get_src_reg(struct codegen
*gen
,
180 const struct tgsi_full_src_register
*src
)
183 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
184 boolean reg_is_itemp
= FALSE
;
187 assert(swizzle
>= TGSI_SWIZZLE_X
);
188 assert(swizzle
<= TGSI_EXTSWIZZLE_ONE
);
190 if (swizzle
== TGSI_EXTSWIZZLE_ONE
) {
191 /* Load const one float and early out */
192 reg
= get_const_one_reg(gen
);
194 else if (swizzle
== TGSI_EXTSWIZZLE_ZERO
) {
195 /* Load const zero float and early out */
196 reg
= get_itemp(gen
);
197 spe_xor(gen
->f
, reg
, reg
, reg
);
202 switch (src
->SrcRegister
.File
) {
203 case TGSI_FILE_TEMPORARY
:
204 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][swizzle
];
206 case TGSI_FILE_INPUT
:
208 /* offset is measured in quadwords, not bytes */
209 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
210 reg
= get_itemp(gen
);
212 /* Load: reg = memory[(machine_reg) + offset] */
213 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
* 16);
216 case TGSI_FILE_IMMEDIATE
:
217 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][swizzle
];
219 case TGSI_FILE_CONSTANT
:
221 /* offset is measured in quadwords, not bytes */
222 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
223 reg
= get_itemp(gen
);
225 /* Load: reg = memory[(machine_reg) + offset] */
226 spe_lqd(gen
->f
, reg
, gen
->constants_reg
, offset
* 16);
229 case TGSI_FILE_SAMPLER
:
231 reg
= 3; /* XXX total hack */
240 * Handle absolute value, negate or set-negative of src register.
242 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
243 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
245 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
247 const int bit31mask_reg
= get_itemp(gen
);
251 /* re-use 'reg' for the result */
255 /* alloc a new reg for the result */
256 result_reg
= get_itemp(gen
);
259 /* mask with bit 31 set, the rest cleared */
260 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
262 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
263 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
265 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
266 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
269 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
270 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
281 * Return the index of an SPE register to use for the given TGSI register.
282 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
283 * corresponding SPE register is returned. If the TGSI register is
284 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
285 * See store_dest_reg() below...
288 get_dst_reg(struct codegen
*gen
,
290 const struct tgsi_full_dst_register
*dest
)
294 switch (dest
->DstRegister
.File
) {
295 case TGSI_FILE_TEMPORARY
:
296 if (gen
->if_nesting
> 0)
297 reg
= get_itemp(gen
);
299 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
301 case TGSI_FILE_OUTPUT
:
302 reg
= get_itemp(gen
);
313 * When a TGSI instruction is writing to an output register, this
314 * function emits the SPE store instruction to store the value_reg.
315 * \param value_reg the SPE register containing the value to store.
316 * This would have been returned by get_dst_reg().
319 store_dest_reg(struct codegen
*gen
,
320 int value_reg
, int channel
,
321 const struct tgsi_full_dst_register
*dest
)
323 switch (dest
->DstRegister
.File
) {
324 case TGSI_FILE_TEMPORARY
:
325 if (gen
->if_nesting
> 0) {
326 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
327 int exec_reg
= get_exec_mask_reg(gen
);
328 /* Mix d with new value according to exec mask:
329 * d[i] = mask_reg[i] ? value_reg : d_reg
331 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
334 /* we're not inside a condition or loop: do nothing special */
338 case TGSI_FILE_OUTPUT
:
340 /* offset is measured in quadwords, not bytes */
341 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
342 if (gen
->if_nesting
> 0) {
343 int exec_reg
= get_exec_mask_reg(gen
);
344 int curval_reg
= get_itemp(gen
);
345 /* First read the current value from memory:
346 * Load: curval = memory[(machine_reg) + offset]
348 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
* 16);
349 /* Mix curval with newvalue according to exec mask:
350 * d[i] = mask_reg[i] ? value_reg : d_reg
352 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
353 /* Store: memory[(machine_reg) + offset] = curval */
354 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
* 16);
357 /* Store: memory[(machine_reg) + offset] = reg */
358 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
* 16);
370 emit_prologue(struct codegen
*gen
)
372 gen
->frame_size
= 256+128; /* XXX temporary */
374 spe_comment(gen
->f
, -4, "Function prologue:");
376 /* save $lr on stack # stqd $lr,16($sp) */
377 spe_stqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
379 /* save stack pointer # stqd $sp,-frameSize($sp) */
380 spe_stqd(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -gen
->frame_size
);
382 /* adjust stack pointer # ai $sp,$sp,-frameSize */
383 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -gen
->frame_size
);
388 emit_epilogue(struct codegen
*gen
)
390 spe_comment(gen
->f
, -4, "Function epilogue:");
392 /* restore stack pointer # ai $sp,$sp,frameSize */
393 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, gen
->frame_size
);
395 /* restore $lr # lqd $lr,16($sp) */
396 spe_lqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
398 /* return from function call */
399 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
404 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
406 int ch
, src_reg
[4], dst_reg
[4];
407 spe_comment(gen
->f
, -4, "MOV:");
408 for (ch
= 0; ch
< 4; ch
++) {
409 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
410 src_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
411 dst_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
414 for (ch
= 0; ch
< 4; ch
++) {
415 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
416 /* XXX we don't always need to actually emit a mov instruction here */
417 spe_move(gen
->f
, dst_reg
[ch
], src_reg
[ch
]);
418 store_dest_reg(gen
, dst_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
426 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
427 * becomes (up to) four SPU "fa" instructions because we're doing SOA
431 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
433 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
435 spe_comment(gen
->f
, -4, "ADD:");
436 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
437 for (ch
= 0; ch
< 4; ch
++) {
438 /* If the dest R, G, B or A writemask is enabled... */
439 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
440 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
441 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
442 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
445 /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
446 for (ch
= 0; ch
< 4; ch
++) {
447 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
448 /* Emit actual SPE instruction: d = s1 + s2 */
449 spe_fa(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
450 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
451 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
452 /* Free any intermediate temps we allocated */
460 * Emit subtract. See emit_ADD for comments.
463 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
465 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
466 spe_comment(gen
->f
, -4, "SUB:");
467 for (ch
= 0; ch
< 4; ch
++) {
468 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
469 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
470 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
471 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
474 for (ch
= 0; ch
< 4; ch
++) {
475 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
477 spe_fm(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
478 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
486 * Emit multiply add. See emit_ADD for comments.
489 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
491 int ch
, s1_reg
[4], s2_reg
[4], s3_reg
[4], d_reg
[4];
492 spe_comment(gen
->f
, -4, "MAD:");
493 for (ch
= 0; ch
< 4; ch
++) {
494 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
495 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
496 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
497 s3_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
498 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
501 for (ch
= 0; ch
< 4; ch
++) {
502 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
503 /* d = s1 * s2 + s3 */
504 spe_fma(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
], s3_reg
[ch
]);
505 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
514 * Emit linear interpolate. See emit_ADD for comments.
517 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
520 spe_comment(gen
->f
, -4, "LERP:");
521 for (ch
= 0; ch
< 4; ch
++) {
522 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
523 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
524 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
525 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
526 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
527 /* d = s3 + s1(s2 - s3) */
528 spe_fs(gen
->f
, d_reg
, s2_reg
, s3_reg
);
529 spe_fma(gen
->f
, d_reg
, d_reg
, s1_reg
, s3_reg
);
530 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
538 * Emit multiply. See emit_ADD for comments.
541 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
543 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
544 spe_comment(gen
->f
, -4, "MUL:");
545 for (ch
= 0; ch
< 4; ch
++) {
546 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
547 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
548 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
549 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
552 for (ch
= 0; ch
< 4; ch
++) {
553 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
555 spe_fm(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
556 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
564 * Emit reciprocal. See emit_ADD for comments.
567 emit_RCP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
570 spe_comment(gen
->f
, -4, "RCP:");
571 for (ch
= 0; ch
< 4; ch
++) {
572 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
573 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
574 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
576 spe_frest(gen
->f
, d_reg
, s1_reg
);
577 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
578 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
586 * Emit reciprocal sqrt. See emit_ADD for comments.
589 emit_RSQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
592 spe_comment(gen
->f
, -4, "RSQ:");
593 for (ch
= 0; ch
< 4; ch
++) {
594 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
595 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
596 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
598 spe_frsqest(gen
->f
, d_reg
, s1_reg
);
599 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
600 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
608 * Emit absolute value. See emit_ADD for comments.
611 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
614 spe_comment(gen
->f
, -4, "ABS:");
615 for (ch
= 0; ch
< 4; ch
++) {
616 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
617 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
618 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
619 const int bit31mask_reg
= get_itemp(gen
);
621 /* mask with bit 31 set, the rest cleared */
622 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
624 /* d = sign bit cleared in s1 */
625 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
627 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
635 * Emit 3 component dot product. See emit_ADD for comments.
638 emit_DP3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
641 int s1x_reg
, s1y_reg
, s1z_reg
;
642 int s2x_reg
, s2y_reg
, s2z_reg
;
643 int t0_reg
= get_itemp(gen
), t1_reg
= get_itemp(gen
);
645 spe_comment(gen
->f
, -4, "DP3:");
647 s1x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
648 s2x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
649 s1y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
650 s2y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
651 s1z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
652 s2z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
655 spe_fm(gen
->f
, t0_reg
, s1x_reg
, s2x_reg
);
658 spe_fm(gen
->f
, t1_reg
, s1y_reg
, s2y_reg
);
660 /* t0 = z0 * z1 + t0 */
661 spe_fma(gen
->f
, t0_reg
, s1z_reg
, s2z_reg
, t0_reg
);
664 spe_fa(gen
->f
, t0_reg
, t0_reg
, t1_reg
);
666 for (ch
= 0; ch
< 4; ch
++) {
667 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
668 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
669 spe_move(gen
->f
, d_reg
, t0_reg
);
670 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
679 * Emit 4 component dot product. See emit_ADD for comments.
682 emit_DP4(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
685 spe_comment(gen
->f
, -4, "DP4:");
687 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
688 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
689 int tmp_reg
= get_itemp(gen
);
692 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
694 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
695 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
696 /* t = y0 * y1 + t */
697 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
699 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
700 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
701 /* t = z0 * z1 + t */
702 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
704 s1_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[0]);
705 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
706 /* t = w0 * w1 + t */
707 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
709 for (ch
= 0; ch
< 4; ch
++) {
710 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
711 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
712 spe_move(gen
->f
, d_reg
, tmp_reg
);
713 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
722 * Emit homogeneous dot product. See emit_ADD for comments.
725 emit_DPH(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
728 spe_comment(gen
->f
, -4, "DPH:");
730 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
731 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
732 int tmp_reg
= get_itemp(gen
);
735 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
737 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
738 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
739 /* t = y0 * y1 + t */
740 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
742 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
743 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
744 /* t = z0 * z1 + t */
745 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
747 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
749 spe_fa(gen
->f
, tmp_reg
, s2_reg
, tmp_reg
);
751 for (ch
= 0; ch
< 4; ch
++) {
752 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
753 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
754 spe_move(gen
->f
, d_reg
, tmp_reg
);
755 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
764 * Emit cross product. See emit_ADD for comments.
767 emit_XPD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
769 spe_comment(gen
->f
, -4, "XPD:");
771 int s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
772 int s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
773 int tmp_reg
= get_itemp(gen
);
776 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
778 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
779 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
780 /* t = y0 * z1 - t */
781 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
783 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_X
)) {
784 store_dest_reg(gen
, tmp_reg
, CHAN_X
, &inst
->FullDstRegisters
[0]);
787 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
788 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
790 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
792 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
793 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
794 /* t = z0 * x1 - t */
795 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
797 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Y
)) {
798 store_dest_reg(gen
, tmp_reg
, CHAN_Y
, &inst
->FullDstRegisters
[0]);
801 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
802 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
804 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
806 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
807 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
808 /* t = x0 * y1 - t */
809 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
811 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Z
)) {
812 store_dest_reg(gen
, tmp_reg
, CHAN_Z
, &inst
->FullDstRegisters
[0]);
820 * Emit set-if-greater-than.
821 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
822 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
823 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
826 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
830 spe_comment(gen
->f
, -4, "SGT:");
832 for (ch
= 0; ch
< 4; ch
++) {
833 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
834 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
835 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
836 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
839 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
841 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
842 /* d = d & one_reg */
843 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
845 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
854 * Emit set-if_less-then. See emit_SGT for comments.
857 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
861 spe_comment(gen
->f
, -4, "SLT:");
863 for (ch
= 0; ch
< 4; ch
++) {
864 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
865 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
866 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
867 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
870 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
872 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
873 /* d = d & one_reg */
874 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
876 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
885 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
888 emit_SGE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
892 spe_comment(gen
->f
, -4, "SGE:");
894 for (ch
= 0; ch
< 4; ch
++) {
895 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
896 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
897 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
898 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
901 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
903 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
904 /* d = ~d & one_reg */
905 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
907 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
916 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
919 emit_SLE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
923 spe_comment(gen
->f
, -4, "SLE:");
925 for (ch
= 0; ch
< 4; ch
++) {
926 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
927 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
928 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
929 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
932 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
934 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
935 /* d = ~d & one_reg */
936 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
938 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
947 * Emit set-if_equal. See emit_SGT for comments.
950 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
954 spe_comment(gen
->f
, -4, "SEQ:");
956 for (ch
= 0; ch
< 4; ch
++) {
957 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
958 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
959 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
960 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
963 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
965 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
966 /* d = d & one_reg */
967 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
969 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
978 * Emit set-if_not_equal. See emit_SGT for comments.
981 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
985 spe_comment(gen
->f
, -4, "SNE:");
987 for (ch
= 0; ch
< 4; ch
++) {
988 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
989 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
990 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
991 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
994 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
995 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
997 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
998 /* d = d & one_reg */
999 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
1001 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1010 * Emit compare. See emit_SGT for comments.
1013 emit_CMP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1017 spe_comment(gen
->f
, -4, "CMP:");
1019 for (ch
= 0; ch
< 4; ch
++) {
1020 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1021 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1022 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1023 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
1024 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1025 int zero_reg
= get_itemp(gen
);
1027 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1029 /* d = (s1 < 0) ? s2 : s3 */
1030 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1031 spe_selb(gen
->f
, d_reg
, s3_reg
, s2_reg
, d_reg
);
1033 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1043 * Convert float to signed int
1044 * Convert signed int to float
1047 emit_TRUNC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1051 spe_comment(gen
->f
, -4, "TRUNC:");
1053 for (ch
= 0; ch
< 4; ch
++) {
1054 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1055 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1056 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1058 /* Convert float to int */
1059 spe_cflts(gen
->f
, d_reg
, s1_reg
, 0);
1061 /* Convert int to float */
1062 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1064 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1074 * If negative int subtract one
1075 * Convert float to signed int
1076 * Convert signed int to float
1079 emit_FLR(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1083 spe_comment(gen
->f
, -4, "FLR:");
1085 int zero_reg
= get_itemp(gen
);
1086 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1088 for (ch
= 0; ch
< 4; ch
++) {
1089 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1090 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1091 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1092 int tmp_reg
= get_itemp(gen
);
1094 /* If negative, subtract 1.0 */
1095 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1096 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1097 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1099 /* Convert float to int */
1100 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1102 /* Convert int to float */
1103 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1105 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1115 * Input - FLR(Input)
1118 emit_FRC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1122 spe_comment(gen
->f
, -4, "FLR:");
1124 int zero_reg
= get_itemp(gen
);
1125 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1127 for (ch
= 0; ch
< 4; ch
++) {
1128 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1129 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1130 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1131 int tmp_reg
= get_itemp(gen
);
1133 /* If negative, subtract 1.0 */
1134 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1135 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1136 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1138 /* Convert float to int */
1139 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1141 /* Convert int to float */
1142 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1144 /* d = s1 - FLR(s1) */
1145 spe_fs(gen
->f
, d_reg
, s1_reg
, d_reg
);
1147 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1158 print_functions(struct cell_context
*cell
)
1160 struct cell_spu_function_info
*funcs
= &cell
->spu_functions
;
1162 for (i
= 0; i
< funcs
->num
; i
++) {
1163 printf("SPU func %u: %s at %u\n",
1164 i
, funcs
->names
[i
], funcs
->addrs
[i
]);
1171 lookup_function(struct cell_context
*cell
, const char *funcname
)
1173 const struct cell_spu_function_info
*funcs
= &cell
->spu_functions
;
1175 for (i
= 0; i
< funcs
->num
; i
++) {
1176 if (strcmp(funcs
->names
[i
], funcname
) == 0) {
1177 addr
= funcs
->addrs
[i
];
1180 assert(addr
&& "spu function not found");
1181 return addr
/ 4; /* discard 2 least significant bits */
1186 * Emit code to call a SPU function.
1187 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1190 emit_function_call(struct codegen
*gen
,
1191 const struct tgsi_full_instruction
*inst
,
1192 char *funcname
, uint num_args
)
1194 const uint addr
= lookup_function(gen
->cell
, funcname
);
1198 assert(num_args
<= 3);
1200 snprintf(comment
, sizeof(comment
), "CALL %s:", funcname
);
1201 spe_comment(gen
->f
, -4, comment
);
1203 for (ch
= 0; ch
< 4; ch
++) {
1204 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1205 int s_regs
[3], d_reg
;
1206 ubyte usedRegs
[SPE_NUM_REGS
];
1209 for (a
= 0; a
< num_args
; a
++) {
1210 s_regs
[a
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[a
]);
1212 d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1214 numUsed
= spe_get_registers_used(gen
->f
, usedRegs
);
1215 assert(numUsed
< gen
->frame_size
/ 16 - 32);
1217 /* save registers to stack */
1218 for (i
= 0; i
< numUsed
; i
++) {
1219 uint reg
= usedRegs
[i
];
1221 spe_stqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1224 /* setup function arguments */
1225 for (a
= 0; a
< num_args
; a
++) {
1226 spe_move(gen
->f
, 3 + a
, s_regs
[a
]);
1229 /* branch to function, save return addr */
1230 spe_brasl(gen
->f
, SPE_REG_RA
, addr
);
1232 /* save function's return value */
1233 spe_move(gen
->f
, d_reg
, 3);
1235 /* restore registers from stack */
1236 for (i
= 0; i
< numUsed
; i
++) {
1237 uint reg
= usedRegs
[i
];
1240 spe_lqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1244 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1254 emit_TXP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1256 const uint addr
= lookup_function(gen
->cell
, "spu_txp");
1258 int coord_regs
[4], d_regs
[4];
1260 spe_comment(gen
->f
, -4, "CALL txp:");
1262 /* get src/dst reg info */
1263 for (ch
= 0; ch
< 4; ch
++) {
1264 coord_regs
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1265 d_regs
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1269 ubyte usedRegs
[SPE_NUM_REGS
];
1272 numUsed
= spe_get_registers_used(gen
->f
, usedRegs
);
1273 assert(numUsed
< gen
->frame_size
/ 16 - 32);
1275 /* save registers to stack */
1276 for (i
= 0; i
< numUsed
; i
++) {
1277 uint reg
= usedRegs
[i
];
1279 spe_stqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1282 /* setup function arguments */
1283 for (i
= 0; i
< 4; i
++) {
1284 spe_move(gen
->f
, 3 + i
, coord_regs
[i
]);
1287 /* branch to function, save return addr */
1288 spe_brasl(gen
->f
, SPE_REG_RA
, addr
);
1290 /* save function's return values (four pixel's colors) */
1291 for (i
= 0; i
< 4; i
++) {
1292 spe_move(gen
->f
, d_regs
[i
], 3 + i
);
1295 /* restore registers from stack */
1296 for (i
= 0; i
< numUsed
; i
++) {
1297 uint reg
= usedRegs
[i
];
1298 if (reg
!= d_regs
[0] &&
1303 spe_lqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1308 for (ch
= 0; ch
< 4; ch
++) {
1309 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1310 store_dest_reg(gen
, d_regs
[ch
], ch
, &inst
->FullDstRegisters
[0]);
1320 * Emit max. See emit_SGT for comments.
1323 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1327 spe_comment(gen
->f
, -4, "MAX:");
1329 for (ch
= 0; ch
< 4; ch
++) {
1330 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1331 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1332 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1333 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1334 int tmp_reg
= get_itemp(gen
);
1336 /* d = (s1 > s2) ? s1 : s2 */
1337 spe_fcgt(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
1338 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, tmp_reg
);
1340 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1349 * Emit max. See emit_SGT for comments.
1352 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1356 spe_comment(gen
->f
, -4, "MIN:");
1358 for (ch
= 0; ch
< 4; ch
++) {
1359 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1360 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1361 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1362 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1363 int tmp_reg
= get_itemp(gen
);
1365 /* d = (s2 > s1) ? s1 : s2 */
1366 spe_fcgt(gen
->f
, tmp_reg
, s2_reg
, s1_reg
);
1367 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, tmp_reg
);
1369 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1378 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1380 const int channel
= 0;
1381 const int exec_reg
= get_exec_mask_reg(gen
);
1383 spe_comment(gen
->f
, -4, "IF:");
1385 /* update execution mask with the predicate register */
1386 int tmp_reg
= get_itemp(gen
);
1387 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
1389 /* tmp = (s1_reg == 0) */
1390 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
1392 spe_complement(gen
->f
, tmp_reg
, tmp_reg
);
1393 /* exec_mask = exec_mask & tmp */
1394 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
1405 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1407 const int exec_reg
= get_exec_mask_reg(gen
);
1409 spe_comment(gen
->f
, -4, "ELSE:");
1411 /* exec_mask = !exec_mask */
1412 spe_complement(gen
->f
, exec_reg
, exec_reg
);
1419 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1421 const int exec_reg
= get_exec_mask_reg(gen
);
1423 spe_comment(gen
->f
, -4, "ENDIF:");
1425 /* XXX todo: pop execution mask */
1427 spe_load_int(gen
->f
, exec_reg
, ~0x0);
1435 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
1440 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
1442 for (ch
= 0; ch
< 4; ch
++) {
1443 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1444 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1445 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1447 int t1_reg
= get_itemp(gen
);
1448 int t2_reg
= get_itemp(gen
);
1450 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
1452 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
1455 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
1457 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
1470 * Emit END instruction.
1471 * We just return from the shader function at this point.
1473 * Note that there may be more code after this that would be
1474 * called by TGSI_OPCODE_CALL.
1477 emit_END(struct codegen
*gen
)
1479 spe_comment(gen
->f
, -4, "END:");
1486 * Emit code for the given instruction. Just a big switch stmt.
1489 emit_instruction(struct codegen
*gen
,
1490 const struct tgsi_full_instruction
*inst
)
1492 switch (inst
->Instruction
.Opcode
) {
1493 case TGSI_OPCODE_MOV
:
1494 case TGSI_OPCODE_SWZ
:
1495 return emit_MOV(gen
, inst
);
1496 case TGSI_OPCODE_MUL
:
1497 return emit_MUL(gen
, inst
);
1498 case TGSI_OPCODE_ADD
:
1499 return emit_ADD(gen
, inst
);
1500 case TGSI_OPCODE_SUB
:
1501 return emit_SUB(gen
, inst
);
1502 case TGSI_OPCODE_MAD
:
1503 return emit_MAD(gen
, inst
);
1504 case TGSI_OPCODE_LERP
:
1505 return emit_LERP(gen
, inst
);
1506 case TGSI_OPCODE_DP3
:
1507 return emit_DP3(gen
, inst
);
1508 case TGSI_OPCODE_DP4
:
1509 return emit_DP4(gen
, inst
);
1510 case TGSI_OPCODE_DPH
:
1511 return emit_DPH(gen
, inst
);
1512 case TGSI_OPCODE_XPD
:
1513 return emit_XPD(gen
, inst
);
1514 case TGSI_OPCODE_RCP
:
1515 return emit_RCP(gen
, inst
);
1516 case TGSI_OPCODE_RSQ
:
1517 return emit_RSQ(gen
, inst
);
1518 case TGSI_OPCODE_ABS
:
1519 return emit_ABS(gen
, inst
);
1520 case TGSI_OPCODE_SGT
:
1521 return emit_SGT(gen
, inst
);
1522 case TGSI_OPCODE_SLT
:
1523 return emit_SLT(gen
, inst
);
1524 case TGSI_OPCODE_SGE
:
1525 return emit_SGE(gen
, inst
);
1526 case TGSI_OPCODE_SLE
:
1527 return emit_SLE(gen
, inst
);
1528 case TGSI_OPCODE_SEQ
:
1529 return emit_SEQ(gen
, inst
);
1530 case TGSI_OPCODE_SNE
:
1531 return emit_SNE(gen
, inst
);
1532 case TGSI_OPCODE_CMP
:
1533 return emit_CMP(gen
, inst
);
1534 case TGSI_OPCODE_MAX
:
1535 return emit_MAX(gen
, inst
);
1536 case TGSI_OPCODE_MIN
:
1537 return emit_MIN(gen
, inst
);
1538 case TGSI_OPCODE_TRUNC
:
1539 return emit_TRUNC(gen
, inst
);
1540 case TGSI_OPCODE_FLR
:
1541 return emit_FLR(gen
, inst
);
1542 case TGSI_OPCODE_FRC
:
1543 return emit_FRC(gen
, inst
);
1544 case TGSI_OPCODE_END
:
1545 return emit_END(gen
);
1547 case TGSI_OPCODE_COS
:
1548 return emit_function_call(gen
, inst
, "spu_cos", 1);
1549 case TGSI_OPCODE_SIN
:
1550 return emit_function_call(gen
, inst
, "spu_sin", 1);
1551 case TGSI_OPCODE_POW
:
1552 return emit_function_call(gen
, inst
, "spu_pow", 2);
1553 case TGSI_OPCODE_EXPBASE2
:
1554 return emit_function_call(gen
, inst
, "spu_exp2", 1);
1555 case TGSI_OPCODE_LOGBASE2
:
1556 return emit_function_call(gen
, inst
, "spu_log2", 1);
1557 case TGSI_OPCODE_TEX
:
1558 /* fall-through for now */
1559 case TGSI_OPCODE_TXD
:
1560 /* fall-through for now */
1561 case TGSI_OPCODE_TXP
:
1562 return emit_TXP(gen
, inst
);
1564 case TGSI_OPCODE_IF
:
1565 return emit_IF(gen
, inst
);
1566 case TGSI_OPCODE_ELSE
:
1567 return emit_ELSE(gen
, inst
);
1568 case TGSI_OPCODE_ENDIF
:
1569 return emit_ENDIF(gen
, inst
);
1571 case TGSI_OPCODE_DDX
:
1572 return emit_DDX_DDY(gen
, inst
, true);
1573 case TGSI_OPCODE_DDY
:
1574 return emit_DDX_DDY(gen
, inst
, false);
1576 /* XXX lots more cases to do... */
1579 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
1580 inst
->Instruction
.Opcode
);
1590 * Emit code for a TGSI immediate value (vector of four floats).
1591 * This involves register allocation and initialization.
1592 * XXX the initialization should be done by a "prepare" stage, not
1593 * per quad execution!
1596 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
1600 assert(gen
->num_imm
< MAX_TEMPS
);
1602 spe_comment(gen
->f
, -4, "IMMEDIATE:");
1604 for (ch
= 0; ch
< 4; ch
++) {
1605 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
1607 if (ch
> 0 && val
== immed
->u
.ImmediateFloat32
[ch
- 1].Float
) {
1608 /* re-use previous register */
1609 gen
->imm_regs
[gen
->num_imm
][ch
] = gen
->imm_regs
[gen
->num_imm
][ch
- 1];
1612 int reg
= spe_allocate_available_register(gen
->f
);
1617 /* update immediate map */
1618 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
1620 /* emit initializer instruction */
1621 spe_load_float(gen
->f
, reg
, val
);
1633 * Emit "code" for a TGSI declaration.
1634 * We only care about TGSI TEMPORARY register declarations at this time.
1635 * For each TGSI TEMPORARY we allocate four SPE registers.
1638 emit_declaration(struct cell_context
*cell
,
1639 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
1643 switch (decl
->Declaration
.File
) {
1644 case TGSI_FILE_TEMPORARY
:
1645 for (i
= decl
->DeclarationRange
.First
;
1646 i
<= decl
->DeclarationRange
.Last
;
1648 assert(i
< MAX_TEMPS
);
1649 for (ch
= 0; ch
< 4; ch
++) {
1650 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
1651 if (gen
->temp_regs
[i
][ch
] < 0)
1652 return false; /* out of regs */
1655 /* XXX if we run out of SPE registers, we need to spill
1656 * to SPU memory. someday...
1661 sprintf(buf
, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i
,
1662 gen
->temp_regs
[i
][0], gen
->temp_regs
[i
][1],
1663 gen
->temp_regs
[i
][2], gen
->temp_regs
[i
][3]);
1664 spe_comment(gen
->f
, -4, buf
);
1678 * Translate TGSI shader code to SPE instructions. This is done when
1679 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1681 * \param cell the rendering context (in)
1682 * \param tokens the TGSI shader (in)
1683 * \param f the generated function (out)
1686 cell_gen_fragment_program(struct cell_context
*cell
,
1687 const struct tgsi_token
*tokens
,
1688 struct spe_function
*f
)
1690 struct tgsi_parse_context parse
;
1693 memset(&gen
, 0, sizeof(gen
));
1697 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1698 gen
.inputs_reg
= 3; /* pointer to inputs array */
1699 gen
.outputs_reg
= 4; /* pointer to outputs array */
1700 gen
.constants_reg
= 5; /* pointer to constants array */
1702 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
1703 spe_allocate_register(f
, gen
.inputs_reg
);
1704 spe_allocate_register(f
, gen
.outputs_reg
);
1705 spe_allocate_register(f
, gen
.constants_reg
);
1707 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1708 spe_print_code(f
, true);
1710 printf("Begin %s\n", __FUNCTION__
);
1711 tgsi_dump(tokens
, 0);
1714 tgsi_parse_init(&parse
, tokens
);
1716 emit_prologue(&gen
);
1718 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
1719 tgsi_parse_token(&parse
);
1721 switch (parse
.FullToken
.Token
.Type
) {
1722 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1723 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
1727 case TGSI_TOKEN_TYPE_DECLARATION
:
1728 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
1732 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1733 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
1743 /* terminate the SPE code */
1744 return emit_END(&gen
);
1747 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1748 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
1749 printf("End %s\n", __FUNCTION__
);
1752 tgsi_parse_free( &parse
);