1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
63 * Context needed during code generation.
67 struct cell_context
*cell
;
68 int inputs_reg
; /**< 1st function parameter */
69 int outputs_reg
; /**< 2nd function parameter */
70 int constants_reg
; /**< 3rd function parameter */
71 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
74 int num_imm
; /**< number of immediates */
76 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78 /** Per-instruction temps / intermediate temps */
82 /** Current IF/ELSE/ENDIF nesting level */
84 /** Index of execution mask register */
87 /** KIL mask: indicates which fragments have been killed */
90 int frame_size
; /**< Stack frame size, in words */
92 struct spe_function
*f
;
98 * Allocate an intermediate temporary register.
101 get_itemp(struct codegen
*gen
)
103 int t
= spe_allocate_available_register(gen
->f
);
104 assert(gen
->num_itemps
< Elements(gen
->itemps
));
105 gen
->itemps
[gen
->num_itemps
++] = t
;
110 * Free all intermediate temporary registers. To be called after each
111 * instruction has been emitted.
114 free_itemps(struct codegen
*gen
)
117 for (i
= 0; i
< gen
->num_itemps
; i
++) {
118 spe_release_register(gen
->f
, gen
->itemps
[i
]);
125 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
126 * The register is allocated and initialized upon the first call.
129 get_const_one_reg(struct codegen
*gen
)
131 if (gen
->one_reg
<= 0) {
132 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
134 spe_indent(gen
->f
, 4);
135 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
137 /* one = {1.0, 1.0, 1.0, 1.0} */
138 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
140 spe_indent(gen
->f
, -4);
148 * Return index of the pixel execution mask.
149 * The register is allocated an initialized upon the first call.
151 * The pixel execution mask controls which pixels in a quad are
152 * modified, according to surrounding conditionals, loops, etc.
155 get_exec_mask_reg(struct codegen
*gen
)
157 if (gen
->exec_mask_reg
<= 0) {
158 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
160 spe_indent(gen
->f
, 4);
161 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
163 /* exec_mask = {~0, ~0, ~0, ~0} */
164 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
166 spe_indent(gen
->f
, -4);
169 return gen
->exec_mask_reg
;
174 is_register_src(struct codegen
*gen
, int channel
,
175 const struct tgsi_full_src_register
*src
)
177 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
178 int sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
180 if (swizzle
> TGSI_SWIZZLE_W
|| sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
183 if (src
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
||
184 src
->SrcRegister
.File
== TGSI_FILE_IMMEDIATE
) {
192 is_memory_dst(struct codegen
*gen
, int channel
,
193 const struct tgsi_full_dst_register
*dst
)
195 if (dst
->DstRegister
.File
== TGSI_FILE_OUTPUT
) {
205 * Return the index of the SPU temporary containing the named TGSI
206 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
207 * just return the corresponding SPE register. If the TGIS register
208 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
209 * and emit an SPE load instruction.
212 get_src_reg(struct codegen
*gen
,
214 const struct tgsi_full_src_register
*src
)
217 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
218 boolean reg_is_itemp
= FALSE
;
221 assert(swizzle
>= TGSI_SWIZZLE_X
);
222 assert(swizzle
<= TGSI_EXTSWIZZLE_ONE
);
224 if (swizzle
== TGSI_EXTSWIZZLE_ONE
) {
225 /* Load const one float and early out */
226 reg
= get_const_one_reg(gen
);
228 else if (swizzle
== TGSI_EXTSWIZZLE_ZERO
) {
229 /* Load const zero float and early out */
230 reg
= get_itemp(gen
);
231 spe_xor(gen
->f
, reg
, reg
, reg
);
236 switch (src
->SrcRegister
.File
) {
237 case TGSI_FILE_TEMPORARY
:
238 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][swizzle
];
240 case TGSI_FILE_INPUT
:
242 /* offset is measured in quadwords, not bytes */
243 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
244 reg
= get_itemp(gen
);
246 /* Load: reg = memory[(machine_reg) + offset] */
247 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
* 16);
250 case TGSI_FILE_IMMEDIATE
:
251 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][swizzle
];
253 case TGSI_FILE_CONSTANT
:
255 /* offset is measured in quadwords, not bytes */
256 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
257 reg
= get_itemp(gen
);
259 /* Load: reg = memory[(machine_reg) + offset] */
260 spe_lqd(gen
->f
, reg
, gen
->constants_reg
, offset
* 16);
269 * Handle absolute value, negate or set-negative of src register.
271 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
272 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
274 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
276 const int bit31mask_reg
= get_itemp(gen
);
280 /* re-use 'reg' for the result */
284 /* alloc a new reg for the result */
285 result_reg
= get_itemp(gen
);
288 /* mask with bit 31 set, the rest cleared */
289 spe_load_uint(gen
->f
, bit31mask_reg
, (1 << 31));
291 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
292 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
294 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
295 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
298 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
299 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
310 * Return the index of an SPE register to use for the given TGSI register.
311 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
312 * corresponding SPE register is returned. If the TGSI register is
313 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
314 * See store_dest_reg() below...
317 get_dst_reg(struct codegen
*gen
,
319 const struct tgsi_full_dst_register
*dest
)
323 switch (dest
->DstRegister
.File
) {
324 case TGSI_FILE_TEMPORARY
:
325 if (gen
->if_nesting
> 0)
326 reg
= get_itemp(gen
);
328 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
330 case TGSI_FILE_OUTPUT
:
331 reg
= get_itemp(gen
);
342 * When a TGSI instruction is writing to an output register, this
343 * function emits the SPE store instruction to store the value_reg.
344 * \param value_reg the SPE register containing the value to store.
345 * This would have been returned by get_dst_reg().
348 store_dest_reg(struct codegen
*gen
,
349 int value_reg
, int channel
,
350 const struct tgsi_full_dst_register
*dest
)
353 * XXX need to implement dst reg clamping/saturation
356 switch (inst
->Instruction
.Saturate
) {
359 case TGSI_SAT_ZERO_ONE
:
361 case TGSI_SAT_MINUS_PLUS_ONE
:
368 switch (dest
->DstRegister
.File
) {
369 case TGSI_FILE_TEMPORARY
:
370 if (gen
->if_nesting
> 0) {
371 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
372 int exec_reg
= get_exec_mask_reg(gen
);
373 /* Mix d with new value according to exec mask:
374 * d[i] = mask_reg[i] ? value_reg : d_reg
376 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
379 /* we're not inside a condition or loop: do nothing special */
383 case TGSI_FILE_OUTPUT
:
385 /* offset is measured in quadwords, not bytes */
386 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
387 if (gen
->if_nesting
> 0) {
388 int exec_reg
= get_exec_mask_reg(gen
);
389 int curval_reg
= get_itemp(gen
);
390 /* First read the current value from memory:
391 * Load: curval = memory[(machine_reg) + offset]
393 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
* 16);
394 /* Mix curval with newvalue according to exec mask:
395 * d[i] = mask_reg[i] ? value_reg : d_reg
397 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
398 /* Store: memory[(machine_reg) + offset] = curval */
399 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
* 16);
402 /* Store: memory[(machine_reg) + offset] = reg */
403 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
* 16);
415 emit_prologue(struct codegen
*gen
)
417 gen
->frame_size
= 1024; /* XXX temporary, should be dynamic */
419 spe_comment(gen
->f
, -4, "Function prologue:");
421 /* save $lr on stack # stqd $lr,16($sp) */
422 spe_stqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
424 if (gen
->frame_size
>= 512) {
425 /* offset is too large for ai instruction */
426 int offset_reg
= spe_allocate_available_register(gen
->f
);
427 int sp_reg
= spe_allocate_available_register(gen
->f
);
428 /* offset = -framesize */
429 spe_load_int(gen
->f
, offset_reg
, -gen
->frame_size
);
431 spe_move(gen
->f
, sp_reg
, SPE_REG_SP
);
432 /* $sp = $sp + offset_reg */
433 spe_a(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, offset_reg
);
434 /* save $sp in stack frame */
435 spe_stqd(gen
->f
, sp_reg
, SPE_REG_SP
, 0);
437 spe_release_register(gen
->f
, offset_reg
);
438 spe_release_register(gen
->f
, sp_reg
);
441 /* save stack pointer # stqd $sp,-frameSize($sp) */
442 spe_stqd(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -gen
->frame_size
);
444 /* adjust stack pointer # ai $sp,$sp,-frameSize */
445 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, -gen
->frame_size
);
451 emit_epilogue(struct codegen
*gen
)
453 const int return_reg
= 3;
455 spe_comment(gen
->f
, -4, "Function epilogue:");
457 spe_comment(gen
->f
, 0, "return the killed mask");
458 if (gen
->kill_mask_reg
> 0) {
459 /* shader called KIL, return the "alive" mask */
460 spe_move(gen
->f
, return_reg
, gen
->kill_mask_reg
);
463 /* return {0,0,0,0} */
464 spe_load_uint(gen
->f
, return_reg
, 0);
467 spe_comment(gen
->f
, 0, "restore stack and return");
468 if (gen
->frame_size
>= 512) {
469 /* offset is too large for ai instruction */
470 int offset_reg
= spe_allocate_available_register(gen
->f
);
471 /* offset = framesize */
472 spe_load_int(gen
->f
, offset_reg
, gen
->frame_size
);
473 /* $sp = $sp + offset */
474 spe_a(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, offset_reg
);
476 spe_release_register(gen
->f
, offset_reg
);
479 /* restore stack pointer # ai $sp,$sp,frameSize */
480 spe_ai(gen
->f
, SPE_REG_SP
, SPE_REG_SP
, gen
->frame_size
);
483 /* restore $lr # lqd $lr,16($sp) */
484 spe_lqd(gen
->f
, SPE_REG_RA
, SPE_REG_SP
, 16);
486 /* return from function call */
487 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
492 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
494 int ch
, src_reg
[4], dst_reg
[4];
496 spe_comment(gen
->f
, -4, "MOV:");
497 for (ch
= 0; ch
< 4; ch
++) {
498 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
499 src_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
500 dst_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
504 for (ch
= 0; ch
< 4; ch
++) {
505 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
506 if (is_register_src(gen
, ch
, &inst
->FullSrcRegisters
[0]) &&
507 is_memory_dst(gen
, ch
, &inst
->FullDstRegisters
[0])) {
508 /* special-case: register to memory store */
509 store_dest_reg(gen
, src_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
512 spe_move(gen
->f
, dst_reg
[ch
], src_reg
[ch
]);
513 store_dest_reg(gen
, dst_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
522 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
523 * becomes (up to) four SPU "fa" instructions because we're doing SOA
527 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
529 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
531 spe_comment(gen
->f
, -4, "ADD:");
532 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
533 for (ch
= 0; ch
< 4; ch
++) {
534 /* If the dest R, G, B or A writemask is enabled... */
535 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
536 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
537 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
538 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
541 /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
542 for (ch
= 0; ch
< 4; ch
++) {
543 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
544 /* Emit actual SPE instruction: d = s1 + s2 */
545 spe_fa(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
546 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
547 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
548 /* Free any intermediate temps we allocated */
556 * Emit subtract. See emit_ADD for comments.
559 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
561 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
562 spe_comment(gen
->f
, -4, "SUB:");
563 for (ch
= 0; ch
< 4; ch
++) {
564 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
565 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
566 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
567 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
570 for (ch
= 0; ch
< 4; ch
++) {
571 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
573 spe_fs(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
574 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
582 * Emit multiply add. See emit_ADD for comments.
585 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
587 int ch
, s1_reg
[4], s2_reg
[4], s3_reg
[4], d_reg
[4];
588 spe_comment(gen
->f
, -4, "MAD:");
589 for (ch
= 0; ch
< 4; ch
++) {
590 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
591 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
592 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
593 s3_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
594 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
597 for (ch
= 0; ch
< 4; ch
++) {
598 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
599 /* d = s1 * s2 + s3 */
600 spe_fma(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
], s3_reg
[ch
]);
601 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
610 * Emit linear interpolate. See emit_ADD for comments.
613 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
615 int ch
, s1_reg
[4], s2_reg
[4], s3_reg
[4], d_reg
[4], tmp_reg
[4];
616 spe_comment(gen
->f
, -4, "LERP:");
617 /* setup/get src/dst/temp regs */
618 for (ch
= 0; ch
< 4; ch
++) {
619 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
620 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
621 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
622 s3_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
623 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
624 tmp_reg
[ch
] = get_itemp(gen
);
628 /* d = s3 + s1(s2 - s3) */
629 /* do all subtracts, then all fma, then all stores to better pipeline */
630 for (ch
= 0; ch
< 4; ch
++) {
631 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
632 spe_fs(gen
->f
, tmp_reg
[ch
], s2_reg
[ch
], s3_reg
[ch
]);
635 for (ch
= 0; ch
< 4; ch
++) {
636 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
637 spe_fma(gen
->f
, d_reg
[ch
], tmp_reg
[ch
], s1_reg
[ch
], s3_reg
[ch
]);
640 for (ch
= 0; ch
< 4; ch
++) {
641 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
642 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
650 * Emit multiply. See emit_ADD for comments.
653 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
655 int ch
, s1_reg
[4], s2_reg
[4], d_reg
[4];
656 spe_comment(gen
->f
, -4, "MUL:");
657 for (ch
= 0; ch
< 4; ch
++) {
658 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
659 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
660 s2_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
661 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
664 for (ch
= 0; ch
< 4; ch
++) {
665 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
667 spe_fm(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s2_reg
[ch
]);
668 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
676 * Emit reciprocal. See emit_ADD for comments.
679 emit_RCP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
682 spe_comment(gen
->f
, -4, "RCP:");
683 for (ch
= 0; ch
< 4; ch
++) {
684 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
685 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
686 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
688 spe_frest(gen
->f
, d_reg
, s1_reg
);
689 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
690 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
698 * Emit reciprocal sqrt. See emit_ADD for comments.
701 emit_RSQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
704 spe_comment(gen
->f
, -4, "RSQ:");
705 for (ch
= 0; ch
< 4; ch
++) {
706 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
707 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
708 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
710 spe_frsqest(gen
->f
, d_reg
, s1_reg
);
711 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
712 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
720 * Emit absolute value. See emit_ADD for comments.
723 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
726 spe_comment(gen
->f
, -4, "ABS:");
727 for (ch
= 0; ch
< 4; ch
++) {
728 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
729 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
730 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
731 const int bit31mask_reg
= get_itemp(gen
);
733 /* mask with bit 31 set, the rest cleared */
734 spe_load_uint(gen
->f
, bit31mask_reg
, (1 << 31));
736 /* d = sign bit cleared in s1 */
737 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
739 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
747 * Emit 3 component dot product. See emit_ADD for comments.
750 emit_DP3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
753 int s1x_reg
, s1y_reg
, s1z_reg
;
754 int s2x_reg
, s2y_reg
, s2z_reg
;
755 int t0_reg
= get_itemp(gen
), t1_reg
= get_itemp(gen
);
757 spe_comment(gen
->f
, -4, "DP3:");
759 s1x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
760 s2x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
761 s1y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
762 s2y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
763 s1z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
764 s2z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
767 spe_fm(gen
->f
, t0_reg
, s1x_reg
, s2x_reg
);
770 spe_fm(gen
->f
, t1_reg
, s1y_reg
, s2y_reg
);
772 /* t0 = z0 * z1 + t0 */
773 spe_fma(gen
->f
, t0_reg
, s1z_reg
, s2z_reg
, t0_reg
);
776 spe_fa(gen
->f
, t0_reg
, t0_reg
, t1_reg
);
778 for (ch
= 0; ch
< 4; ch
++) {
779 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
780 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
781 spe_move(gen
->f
, d_reg
, t0_reg
);
782 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
791 * Emit 4 component dot product. See emit_ADD for comments.
794 emit_DP4(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
797 int s0x_reg
, s0y_reg
, s0z_reg
, s0w_reg
;
798 int s1x_reg
, s1y_reg
, s1z_reg
, s1w_reg
;
799 int t0_reg
= get_itemp(gen
), t1_reg
= get_itemp(gen
);
801 spe_comment(gen
->f
, -4, "DP4:");
803 s0x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
804 s1x_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
805 s0y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
806 s1y_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
807 s0z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
808 s1z_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
809 s0w_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[0]);
810 s1w_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
813 spe_fm(gen
->f
, t0_reg
, s0x_reg
, s1x_reg
);
816 spe_fm(gen
->f
, t1_reg
, s0y_reg
, s1y_reg
);
818 /* t0 = z0 * z1 + t0 */
819 spe_fma(gen
->f
, t0_reg
, s0z_reg
, s1z_reg
, t0_reg
);
821 /* t1 = w0 * w1 + t1 */
822 spe_fma(gen
->f
, t1_reg
, s0w_reg
, s1w_reg
, t1_reg
);
825 spe_fa(gen
->f
, t0_reg
, t0_reg
, t1_reg
);
827 for (ch
= 0; ch
< 4; ch
++) {
828 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
829 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
830 spe_move(gen
->f
, d_reg
, t0_reg
);
831 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
840 * Emit homogeneous dot product. See emit_ADD for comments.
843 emit_DPH(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
845 /* XXX rewrite this function to look more like DP3/DP4 */
847 spe_comment(gen
->f
, -4, "DPH:");
849 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
850 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
851 int tmp_reg
= get_itemp(gen
);
854 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
856 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
857 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
858 /* t = y0 * y1 + t */
859 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
861 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
862 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
863 /* t = z0 * z1 + t */
864 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
866 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
868 spe_fa(gen
->f
, tmp_reg
, s2_reg
, tmp_reg
);
870 for (ch
= 0; ch
< 4; ch
++) {
871 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
872 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
873 spe_move(gen
->f
, d_reg
, tmp_reg
);
874 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
883 * Emit 3-component vector normalize.
886 emit_NRM3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
890 int t0_reg
= get_itemp(gen
), t1_reg
= get_itemp(gen
);
892 spe_comment(gen
->f
, -4, "NRM3:");
894 src_reg
[0] = get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
895 src_reg
[1] = get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
896 src_reg
[2] = get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
899 spe_fm(gen
->f
, t0_reg
, src_reg
[0], src_reg
[0]);
902 spe_fm(gen
->f
, t1_reg
, src_reg
[1], src_reg
[1]);
904 /* t0 = z * z + t0 */
905 spe_fma(gen
->f
, t0_reg
, src_reg
[2], src_reg
[2], t0_reg
);
908 spe_fa(gen
->f
, t0_reg
, t0_reg
, t1_reg
);
910 /* t1 = 1.0 / sqrt(t0) */
911 spe_frsqest(gen
->f
, t1_reg
, t0_reg
);
912 spe_fi(gen
->f
, t1_reg
, t0_reg
, t1_reg
);
914 for (ch
= 0; ch
< 3; ch
++) { /* NOTE: omit W channel */
915 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
916 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
917 /* dst = src[ch] * t1 */
918 spe_fm(gen
->f
, d_reg
, src_reg
[ch
], t1_reg
);
919 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
929 * Emit cross product. See emit_ADD for comments.
932 emit_XPD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
934 spe_comment(gen
->f
, -4, "XPD:");
936 int s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
937 int s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
938 int tmp_reg
= get_itemp(gen
);
941 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
943 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
944 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
945 /* t = y0 * z1 - t */
946 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
948 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_X
)) {
949 store_dest_reg(gen
, tmp_reg
, CHAN_X
, &inst
->FullDstRegisters
[0]);
952 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
953 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
955 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
957 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
958 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
959 /* t = z0 * x1 - t */
960 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
962 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Y
)) {
963 store_dest_reg(gen
, tmp_reg
, CHAN_Y
, &inst
->FullDstRegisters
[0]);
966 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
967 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
969 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
971 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
972 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
973 /* t = x0 * y1 - t */
974 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
976 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Z
)) {
977 store_dest_reg(gen
, tmp_reg
, CHAN_Z
, &inst
->FullDstRegisters
[0]);
985 * Emit set-if-greater-than.
986 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
987 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
988 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
991 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
995 spe_comment(gen
->f
, -4, "SGT:");
997 for (ch
= 0; ch
< 4; ch
++) {
998 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
999 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1000 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1001 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1004 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1006 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1007 /* d = d & one_reg */
1008 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
1010 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1019 * Emit set-if_less-then. See emit_SGT for comments.
1022 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1026 spe_comment(gen
->f
, -4, "SLT:");
1028 for (ch
= 0; ch
< 4; ch
++) {
1029 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1030 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1031 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1032 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1035 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
1037 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1038 /* d = d & one_reg */
1039 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
1041 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1050 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
1053 emit_SGE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1057 spe_comment(gen
->f
, -4, "SGE:");
1059 for (ch
= 0; ch
< 4; ch
++) {
1060 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1061 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1062 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1063 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1065 /* d = (s1 >= s2) */
1066 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
1068 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1069 /* d = ~d & one_reg */
1070 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
1072 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1081 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
1084 emit_SLE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1088 spe_comment(gen
->f
, -4, "SLE:");
1090 for (ch
= 0; ch
< 4; ch
++) {
1091 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1092 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1093 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1094 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1096 /* d = (s1 <= s2) */
1097 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1099 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1100 /* d = ~d & one_reg */
1101 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
1103 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1112 * Emit set-if_equal. See emit_SGT for comments.
1115 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1119 spe_comment(gen
->f
, -4, "SEQ:");
1121 for (ch
= 0; ch
< 4; ch
++) {
1122 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1123 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1124 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1125 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1127 /* d = (s1 == s2) */
1128 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1130 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1131 /* d = d & one_reg */
1132 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
1134 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1143 * Emit set-if_not_equal. See emit_SGT for comments.
1146 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1150 spe_comment(gen
->f
, -4, "SNE:");
1152 for (ch
= 0; ch
< 4; ch
++) {
1153 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1154 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1155 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1156 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1158 /* d = (s1 != s2) */
1159 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1160 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
1162 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1163 /* d = d & one_reg */
1164 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
1166 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1175 * Emit compare. See emit_SGT for comments.
1178 emit_CMP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1182 spe_comment(gen
->f
, -4, "CMP:");
1184 for (ch
= 0; ch
< 4; ch
++) {
1185 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1186 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1187 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1188 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
1189 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1190 int zero_reg
= get_itemp(gen
);
1192 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1194 /* d = (s1 < 0) ? s2 : s3 */
1195 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1196 spe_selb(gen
->f
, d_reg
, s3_reg
, s2_reg
, d_reg
);
1198 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1208 * Convert float to signed int
1209 * Convert signed int to float
1212 emit_TRUNC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1216 spe_comment(gen
->f
, -4, "TRUNC:");
1218 for (ch
= 0; ch
< 4; ch
++) {
1219 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1220 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1221 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1223 /* Convert float to int */
1224 spe_cflts(gen
->f
, d_reg
, s1_reg
, 0);
1226 /* Convert int to float */
1227 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1229 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1239 * If negative int subtract one
1240 * Convert float to signed int
1241 * Convert signed int to float
1244 emit_FLR(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1248 spe_comment(gen
->f
, -4, "FLR:");
1250 int zero_reg
= get_itemp(gen
);
1251 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1253 for (ch
= 0; ch
< 4; ch
++) {
1254 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1255 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1256 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1257 int tmp_reg
= get_itemp(gen
);
1259 /* If negative, subtract 1.0 */
1260 spe_fcgt(gen
->f
, tmp_reg
, zero_reg
, s1_reg
);
1261 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), tmp_reg
);
1262 spe_fs(gen
->f
, tmp_reg
, s1_reg
, tmp_reg
);
1264 /* Convert float to int */
1265 spe_cflts(gen
->f
, tmp_reg
, tmp_reg
, 0);
1267 /* Convert int to float */
1268 spe_csflt(gen
->f
, d_reg
, tmp_reg
, 0);
1270 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1279 * Compute frac = Input - FLR(Input)
1282 emit_FRC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1286 spe_comment(gen
->f
, -4, "FRC:");
1288 int zero_reg
= get_itemp(gen
);
1289 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1291 for (ch
= 0; ch
< 4; ch
++) {
1292 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1293 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1294 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1295 int tmp_reg
= get_itemp(gen
);
1297 /* If negative, subtract 1.0 */
1298 spe_fcgt(gen
->f
, tmp_reg
, zero_reg
, s1_reg
);
1299 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), tmp_reg
);
1300 spe_fs(gen
->f
, tmp_reg
, s1_reg
, tmp_reg
);
1302 /* Convert float to int */
1303 spe_cflts(gen
->f
, tmp_reg
, tmp_reg
, 0);
1305 /* Convert int to float */
1306 spe_csflt(gen
->f
, tmp_reg
, tmp_reg
, 0);
1308 /* d = s1 - FLR(s1) */
1309 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1311 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1322 print_functions(struct cell_context
*cell
)
1324 struct cell_spu_function_info
*funcs
= &cell
->spu_functions
;
1326 for (i
= 0; i
< funcs
->num
; i
++) {
1327 printf("SPU func %u: %s at %u\n",
1328 i
, funcs
->names
[i
], funcs
->addrs
[i
]);
1335 lookup_function(struct cell_context
*cell
, const char *funcname
)
1337 const struct cell_spu_function_info
*funcs
= &cell
->spu_functions
;
1339 for (i
= 0; i
< funcs
->num
; i
++) {
1340 if (strcmp(funcs
->names
[i
], funcname
) == 0) {
1341 addr
= funcs
->addrs
[i
];
1344 assert(addr
&& "spu function not found");
1345 return addr
/ 4; /* discard 2 least significant bits */
1350 * Emit code to call a SPU function.
1351 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1352 * If scalar, only the X components of the src regs are used, and the
1353 * result is replicated across the dest register's XYZW components.
1356 emit_function_call(struct codegen
*gen
,
1357 const struct tgsi_full_instruction
*inst
,
1358 char *funcname
, uint num_args
, boolean scalar
)
1360 const uint addr
= lookup_function(gen
->cell
, funcname
);
1363 int func_called
= FALSE
;
1365 int retval_reg
= -1;
1367 assert(num_args
<= 3);
1369 snprintf(comment
, sizeof(comment
), "CALL %s:", funcname
);
1370 spe_comment(gen
->f
, -4, comment
);
1373 for (a
= 0; a
< num_args
; a
++) {
1374 s_regs
[a
] = get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[a
]);
1376 /* we'll call the function, put the return value in this register,
1377 * then replicate it across all write-enabled components in d_reg.
1379 retval_reg
= spe_allocate_available_register(gen
->f
);
1382 for (ch
= 0; ch
< 4; ch
++) {
1383 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1385 ubyte usedRegs
[SPE_NUM_REGS
];
1389 for (a
= 0; a
< num_args
; a
++) {
1390 s_regs
[a
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[a
]);
1394 d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1396 if (!scalar
|| !func_called
) {
1397 /* for a scalar function, we'll really only call the function once */
1399 numUsed
= spe_get_registers_used(gen
->f
, usedRegs
);
1400 assert(numUsed
< gen
->frame_size
/ 16 - 2);
1402 /* save registers to stack */
1403 for (i
= 0; i
< numUsed
; i
++) {
1404 uint reg
= usedRegs
[i
];
1406 spe_stqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1409 /* setup function arguments */
1410 for (a
= 0; a
< num_args
; a
++) {
1411 spe_move(gen
->f
, 3 + a
, s_regs
[a
]);
1414 /* branch to function, save return addr */
1415 spe_brasl(gen
->f
, SPE_REG_RA
, addr
);
1417 /* save function's return value */
1419 spe_move(gen
->f
, retval_reg
, 3);
1421 spe_move(gen
->f
, d_reg
, 3);
1423 /* restore registers from stack */
1424 for (i
= 0; i
< numUsed
; i
++) {
1425 uint reg
= usedRegs
[i
];
1426 if (reg
!= d_reg
&& reg
!= retval_reg
) {
1428 spe_lqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1436 spe_move(gen
->f
, d_reg
, retval_reg
);
1439 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1445 spe_release_register(gen
->f
, retval_reg
);
1453 emit_TEX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1455 const uint target
= inst
->InstructionExtTexture
.Texture
;
1456 const uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
1459 int coord_regs
[4], d_regs
[4];
1462 case TGSI_TEXTURE_1D
:
1463 case TGSI_TEXTURE_2D
:
1464 addr
= lookup_function(gen
->cell
, "spu_tex_2d");
1466 case TGSI_TEXTURE_3D
:
1467 addr
= lookup_function(gen
->cell
, "spu_tex_3d");
1469 case TGSI_TEXTURE_CUBE
:
1470 addr
= lookup_function(gen
->cell
, "spu_tex_cube");
1473 ASSERT(0 && "unsupported texture target");
1477 assert(inst
->FullSrcRegisters
[1].SrcRegister
.File
== TGSI_FILE_SAMPLER
);
1479 spe_comment(gen
->f
, -4, "CALL tex:");
1481 /* get src/dst reg info */
1482 for (ch
= 0; ch
< 4; ch
++) {
1483 coord_regs
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1484 d_regs
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1488 ubyte usedRegs
[SPE_NUM_REGS
];
1491 numUsed
= spe_get_registers_used(gen
->f
, usedRegs
);
1492 assert(numUsed
< gen
->frame_size
/ 16 - 2);
1494 /* save registers to stack */
1495 for (i
= 0; i
< numUsed
; i
++) {
1496 uint reg
= usedRegs
[i
];
1498 spe_stqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1501 /* setup function arguments (XXX depends on target) */
1502 for (i
= 0; i
< 4; i
++) {
1503 spe_move(gen
->f
, 3 + i
, coord_regs
[i
]);
1505 spe_load_uint(gen
->f
, 7, unit
); /* sampler unit */
1507 /* branch to function, save return addr */
1508 spe_brasl(gen
->f
, SPE_REG_RA
, addr
);
1510 /* save function's return values (four pixel's colors) */
1511 for (i
= 0; i
< 4; i
++) {
1512 spe_move(gen
->f
, d_regs
[i
], 3 + i
);
1515 /* restore registers from stack */
1516 for (i
= 0; i
< numUsed
; i
++) {
1517 uint reg
= usedRegs
[i
];
1518 if (reg
!= d_regs
[0] &&
1523 spe_lqd(gen
->f
, reg
, SPE_REG_SP
, 16 * offset
);
1528 for (ch
= 0; ch
< 4; ch
++) {
1529 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1530 store_dest_reg(gen
, d_regs
[ch
], ch
, &inst
->FullDstRegisters
[0]);
1540 * KILL if any of src reg values are less than zero.
1543 emit_KIL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1546 int s_regs
[4], kil_reg
= -1, cmp_reg
, zero_reg
;
1548 spe_comment(gen
->f
, -4, "CALL kil:");
1550 /* zero = {0,0,0,0} */
1551 zero_reg
= get_itemp(gen
);
1552 spe_load_uint(gen
->f
, zero_reg
, 0);
1554 cmp_reg
= get_itemp(gen
);
1557 for (ch
= 0; ch
< 4; ch
++) {
1558 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1559 s_regs
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1563 /* test if any src regs are < 0 */
1564 for (ch
= 0; ch
< 4; ch
++) {
1565 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1567 /* cmp = 0 > src ? : ~0 : 0 */
1568 spe_fcgt(gen
->f
, cmp_reg
, zero_reg
, s_regs
[ch
]);
1569 /* kil = kil | cmp */
1570 spe_or(gen
->f
, kil_reg
, kil_reg
, cmp_reg
);
1573 kil_reg
= get_itemp(gen
);
1574 /* kil = 0 > src ? : ~0 : 0 */
1575 spe_fcgt(gen
->f
, kil_reg
, zero_reg
, s_regs
[ch
]);
1580 if (gen
->if_nesting
) {
1581 /* may have been a conditional kil */
1582 spe_and(gen
->f
, kil_reg
, kil_reg
, gen
->exec_mask_reg
);
1585 /* allocate the kill mask reg if needed */
1586 if (gen
->kill_mask_reg
<= 0) {
1587 gen
->kill_mask_reg
= spe_allocate_available_register(gen
->f
);
1588 spe_move(gen
->f
, gen
->kill_mask_reg
, kil_reg
);
1591 spe_or(gen
->f
, gen
->kill_mask_reg
, gen
->kill_mask_reg
, kil_reg
);
1602 * Emit max. See emit_SGT for comments.
1605 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1607 int ch
, s0_reg
[4], s1_reg
[4], d_reg
[4], tmp_reg
[4];
1609 spe_comment(gen
->f
, -4, "MAX:");
1611 for (ch
= 0; ch
< 4; ch
++) {
1612 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1613 s0_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1614 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1615 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1616 tmp_reg
[ch
] = get_itemp(gen
);
1620 /* d = (s0 > s1) ? s0 : s1 */
1621 for (ch
= 0; ch
< 4; ch
++) {
1622 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1623 spe_fcgt(gen
->f
, tmp_reg
[ch
], s0_reg
[ch
], s1_reg
[ch
]);
1626 for (ch
= 0; ch
< 4; ch
++) {
1627 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1628 spe_selb(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s0_reg
[ch
], tmp_reg
[ch
]);
1632 for (ch
= 0; ch
< 4; ch
++) {
1633 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1634 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
1643 * Emit max. See emit_SGT for comments.
1646 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1648 int ch
, s0_reg
[4], s1_reg
[4], d_reg
[4], tmp_reg
[4];
1650 spe_comment(gen
->f
, -4, "MIN:");
1652 for (ch
= 0; ch
< 4; ch
++) {
1653 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1654 s0_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1655 s1_reg
[ch
] = get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1656 d_reg
[ch
] = get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1657 tmp_reg
[ch
] = get_itemp(gen
);
1661 /* d = (s1 > s0) ? s0 : s1 */
1662 for (ch
= 0; ch
< 4; ch
++) {
1663 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1664 spe_fcgt(gen
->f
, tmp_reg
[ch
], s1_reg
[ch
], s0_reg
[ch
]);
1667 for (ch
= 0; ch
< 4; ch
++) {
1668 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1669 spe_selb(gen
->f
, d_reg
[ch
], s1_reg
[ch
], s0_reg
[ch
], tmp_reg
[ch
]);
1673 for (ch
= 0; ch
< 4; ch
++) {
1674 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1675 store_dest_reg(gen
, d_reg
[ch
], ch
, &inst
->FullDstRegisters
[0]);
1684 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1686 const int channel
= 0;
1687 const int exec_reg
= get_exec_mask_reg(gen
);
1689 spe_comment(gen
->f
, -4, "IF:");
1691 /* update execution mask with the predicate register */
1692 int tmp_reg
= get_itemp(gen
);
1693 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
1695 /* tmp = (s1_reg == 0) */
1696 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
1698 spe_complement(gen
->f
, tmp_reg
, tmp_reg
);
1699 /* exec_mask = exec_mask & tmp */
1700 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
1711 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1713 const int exec_reg
= get_exec_mask_reg(gen
);
1715 spe_comment(gen
->f
, -4, "ELSE:");
1717 /* exec_mask = !exec_mask */
1718 spe_complement(gen
->f
, exec_reg
, exec_reg
);
1725 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1727 const int exec_reg
= get_exec_mask_reg(gen
);
1729 spe_comment(gen
->f
, -4, "ENDIF:");
1731 /* XXX todo: pop execution mask */
1733 spe_load_int(gen
->f
, exec_reg
, ~0x0);
1741 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
1746 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
1748 for (ch
= 0; ch
< 4; ch
++) {
1749 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1750 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1751 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1753 int t1_reg
= get_itemp(gen
);
1754 int t2_reg
= get_itemp(gen
);
1756 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
1758 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
1761 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
1763 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
1776 * Emit END instruction.
1777 * We just return from the shader function at this point.
1779 * Note that there may be more code after this that would be
1780 * called by TGSI_OPCODE_CALL.
1783 emit_END(struct codegen
*gen
)
1785 spe_comment(gen
->f
, -4, "END:");
1792 * Emit code for the given instruction. Just a big switch stmt.
1795 emit_instruction(struct codegen
*gen
,
1796 const struct tgsi_full_instruction
*inst
)
1798 switch (inst
->Instruction
.Opcode
) {
1799 case TGSI_OPCODE_MOV
:
1800 case TGSI_OPCODE_SWZ
:
1801 return emit_MOV(gen
, inst
);
1802 case TGSI_OPCODE_MUL
:
1803 return emit_MUL(gen
, inst
);
1804 case TGSI_OPCODE_ADD
:
1805 return emit_ADD(gen
, inst
);
1806 case TGSI_OPCODE_SUB
:
1807 return emit_SUB(gen
, inst
);
1808 case TGSI_OPCODE_MAD
:
1809 return emit_MAD(gen
, inst
);
1810 case TGSI_OPCODE_LERP
:
1811 return emit_LERP(gen
, inst
);
1812 case TGSI_OPCODE_DP3
:
1813 return emit_DP3(gen
, inst
);
1814 case TGSI_OPCODE_DP4
:
1815 return emit_DP4(gen
, inst
);
1816 case TGSI_OPCODE_DPH
:
1817 return emit_DPH(gen
, inst
);
1818 case TGSI_OPCODE_NRM
:
1819 return emit_NRM3(gen
, inst
);
1820 case TGSI_OPCODE_XPD
:
1821 return emit_XPD(gen
, inst
);
1822 case TGSI_OPCODE_RCP
:
1823 return emit_RCP(gen
, inst
);
1824 case TGSI_OPCODE_RSQ
:
1825 return emit_RSQ(gen
, inst
);
1826 case TGSI_OPCODE_ABS
:
1827 return emit_ABS(gen
, inst
);
1828 case TGSI_OPCODE_SGT
:
1829 return emit_SGT(gen
, inst
);
1830 case TGSI_OPCODE_SLT
:
1831 return emit_SLT(gen
, inst
);
1832 case TGSI_OPCODE_SGE
:
1833 return emit_SGE(gen
, inst
);
1834 case TGSI_OPCODE_SLE
:
1835 return emit_SLE(gen
, inst
);
1836 case TGSI_OPCODE_SEQ
:
1837 return emit_SEQ(gen
, inst
);
1838 case TGSI_OPCODE_SNE
:
1839 return emit_SNE(gen
, inst
);
1840 case TGSI_OPCODE_CMP
:
1841 return emit_CMP(gen
, inst
);
1842 case TGSI_OPCODE_MAX
:
1843 return emit_MAX(gen
, inst
);
1844 case TGSI_OPCODE_MIN
:
1845 return emit_MIN(gen
, inst
);
1846 case TGSI_OPCODE_TRUNC
:
1847 return emit_TRUNC(gen
, inst
);
1848 case TGSI_OPCODE_FLR
:
1849 return emit_FLR(gen
, inst
);
1850 case TGSI_OPCODE_FRC
:
1851 return emit_FRC(gen
, inst
);
1852 case TGSI_OPCODE_END
:
1853 return emit_END(gen
);
1855 case TGSI_OPCODE_COS
:
1856 return emit_function_call(gen
, inst
, "spu_cos", 1, TRUE
);
1857 case TGSI_OPCODE_SIN
:
1858 return emit_function_call(gen
, inst
, "spu_sin", 1, TRUE
);
1859 case TGSI_OPCODE_POW
:
1860 return emit_function_call(gen
, inst
, "spu_pow", 2, TRUE
);
1861 case TGSI_OPCODE_EXPBASE2
:
1862 return emit_function_call(gen
, inst
, "spu_exp2", 1, TRUE
);
1863 case TGSI_OPCODE_LOGBASE2
:
1864 return emit_function_call(gen
, inst
, "spu_log2", 1, TRUE
);
1865 case TGSI_OPCODE_TEX
:
1866 /* fall-through for now */
1867 case TGSI_OPCODE_TXD
:
1868 /* fall-through for now */
1869 case TGSI_OPCODE_TXB
:
1870 /* fall-through for now */
1871 case TGSI_OPCODE_TXL
:
1872 /* fall-through for now */
1873 case TGSI_OPCODE_TXP
:
1874 return emit_TEX(gen
, inst
);
1875 case TGSI_OPCODE_KIL
:
1876 return emit_KIL(gen
, inst
);
1878 case TGSI_OPCODE_IF
:
1879 return emit_IF(gen
, inst
);
1880 case TGSI_OPCODE_ELSE
:
1881 return emit_ELSE(gen
, inst
);
1882 case TGSI_OPCODE_ENDIF
:
1883 return emit_ENDIF(gen
, inst
);
1885 case TGSI_OPCODE_DDX
:
1886 return emit_DDX_DDY(gen
, inst
, true);
1887 case TGSI_OPCODE_DDY
:
1888 return emit_DDX_DDY(gen
, inst
, false);
1890 /* XXX lots more cases to do... */
1893 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
1894 inst
->Instruction
.Opcode
);
1904 * Emit code for a TGSI immediate value (vector of four floats).
1905 * This involves register allocation and initialization.
1906 * XXX the initialization should be done by a "prepare" stage, not
1907 * per quad execution!
1910 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
1914 assert(gen
->num_imm
< MAX_TEMPS
);
1916 spe_comment(gen
->f
, -4, "IMMEDIATE:");
1918 for (ch
= 0; ch
< 4; ch
++) {
1919 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
1921 if (ch
> 0 && val
== immed
->u
.ImmediateFloat32
[ch
- 1].Float
) {
1922 /* re-use previous register */
1923 gen
->imm_regs
[gen
->num_imm
][ch
] = gen
->imm_regs
[gen
->num_imm
][ch
- 1];
1926 int reg
= spe_allocate_available_register(gen
->f
);
1931 /* update immediate map */
1932 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
1934 /* emit initializer instruction */
1935 spe_load_float(gen
->f
, reg
, val
);
1947 * Emit "code" for a TGSI declaration.
1948 * We only care about TGSI TEMPORARY register declarations at this time.
1949 * For each TGSI TEMPORARY we allocate four SPE registers.
1952 emit_declaration(struct cell_context
*cell
,
1953 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
1957 switch (decl
->Declaration
.File
) {
1958 case TGSI_FILE_TEMPORARY
:
1959 for (i
= decl
->DeclarationRange
.First
;
1960 i
<= decl
->DeclarationRange
.Last
;
1962 assert(i
< MAX_TEMPS
);
1963 for (ch
= 0; ch
< 4; ch
++) {
1964 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
1965 if (gen
->temp_regs
[i
][ch
] < 0)
1966 return false; /* out of regs */
1969 /* XXX if we run out of SPE registers, we need to spill
1970 * to SPU memory. someday...
1975 sprintf(buf
, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i
,
1976 gen
->temp_regs
[i
][0], gen
->temp_regs
[i
][1],
1977 gen
->temp_regs
[i
][2], gen
->temp_regs
[i
][3]);
1978 spe_comment(gen
->f
, -4, buf
);
1992 * Translate TGSI shader code to SPE instructions. This is done when
1993 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1995 * \param cell the rendering context (in)
1996 * \param tokens the TGSI shader (in)
1997 * \param f the generated function (out)
2000 cell_gen_fragment_program(struct cell_context
*cell
,
2001 const struct tgsi_token
*tokens
,
2002 struct spe_function
*f
)
2004 struct tgsi_parse_context parse
;
2007 memset(&gen
, 0, sizeof(gen
));
2011 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
2012 gen
.inputs_reg
= 3; /* pointer to inputs array */
2013 gen
.outputs_reg
= 4; /* pointer to outputs array */
2014 gen
.constants_reg
= 5; /* pointer to constants array */
2016 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
2017 spe_allocate_register(f
, gen
.inputs_reg
);
2018 spe_allocate_register(f
, gen
.outputs_reg
);
2019 spe_allocate_register(f
, gen
.constants_reg
);
2021 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
2022 spe_print_code(f
, true);
2024 printf("Begin %s\n", __FUNCTION__
);
2025 tgsi_dump(tokens
, 0);
2028 tgsi_parse_init(&parse
, tokens
);
2030 emit_prologue(&gen
);
2032 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
2033 tgsi_parse_token(&parse
);
2035 switch (parse
.FullToken
.Token
.Type
) {
2036 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2037 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
2041 case TGSI_TOKEN_TYPE_DECLARATION
:
2042 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
2046 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2047 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
2057 /* terminate the SPE code */
2058 return emit_END(&gen
);
2061 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
2062 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
2063 printf("End %s\n", __FUNCTION__
);
2066 tgsi_parse_free( &parse
);