1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
59 * Context needed during code generation.
63 int inputs_reg
; /**< 1st function parameter */
64 int outputs_reg
; /**< 2nd function parameter */
65 int constants_reg
; /**< 3rd function parameter */
66 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
67 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
69 int num_imm
; /**< number of immediates */
71 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
73 /** Per-instruction temps / intermediate temps */
77 /** Current IF/ELSE/ENDIF nesting level */
79 /** Index of execution mask register */
82 struct spe_function
*f
;
88 * Allocate an intermediate temporary register.
91 get_itemp(struct codegen
*gen
)
93 int t
= spe_allocate_available_register(gen
->f
);
94 assert(gen
->num_itemps
< Elements(gen
->itemps
));
95 gen
->itemps
[gen
->num_itemps
++] = t
;
100 * Free all intermediate temporary registers. To be called after each
101 * instruction has been emitted.
104 free_itemps(struct codegen
*gen
)
107 for (i
= 0; i
< gen
->num_itemps
; i
++) {
108 spe_release_register(gen
->f
, gen
->itemps
[i
]);
115 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
116 * The register is allocated and initialized upon the first call.
119 get_const_one_reg(struct codegen
*gen
)
121 if (gen
->one_reg
<= 0) {
122 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
124 spe_indent(gen
->f
, 4);
125 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
127 /* one = {1.0, 1.0, 1.0, 1.0} */
128 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
130 spe_indent(gen
->f
, -4);
138 * Return index of the pixel execution mask.
139 * The register is allocated an initialized upon the first call.
141 * The pixel execution mask controls which pixels in a quad are
142 * modified, according to surrounding conditionals, loops, etc.
145 get_exec_mask_reg(struct codegen
*gen
)
147 if (gen
->exec_mask_reg
<= 0) {
148 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
150 spe_indent(gen
->f
, 4);
151 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
153 /* exec_mask = {~0, ~0, ~0, ~0} */
154 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
156 spe_indent(gen
->f
, -4);
159 return gen
->exec_mask_reg
;
164 * Return the index of the SPU temporary containing the named TGSI
165 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
166 * just return the corresponding SPE register. If the TGIS register
167 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
168 * and emit an SPE load instruction.
171 get_src_reg(struct codegen
*gen
,
173 const struct tgsi_full_src_register
*src
)
176 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
177 boolean reg_is_itemp
= FALSE
;
180 assert(swizzle
>= 0);
181 assert(swizzle
<= 3);
185 switch (src
->SrcRegister
.File
) {
186 case TGSI_FILE_TEMPORARY
:
187 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][channel
];
189 case TGSI_FILE_INPUT
:
191 /* offset is measured in quadwords, not bytes */
192 int offset
= src
->SrcRegister
.Index
* 4 + channel
;
193 reg
= get_itemp(gen
);
195 /* Load: reg = memory[(machine_reg) + offset] */
196 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
);
199 case TGSI_FILE_IMMEDIATE
:
200 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][channel
];
202 case TGSI_FILE_CONSTANT
:
203 /* xxx fall-through for now / fix */
209 * Handle absolute value, negate or set-negative of src register.
211 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
212 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
214 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
216 const int bit31mask_reg
= get_itemp(gen
);
220 /* re-use 'reg' for the result */
224 /* alloc a new reg for the result */
225 result_reg
= get_itemp(gen
);
228 /* mask with bit 31 set, the rest cleared */
229 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
231 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
232 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
234 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
235 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
238 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
239 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
250 * Return the index of an SPE register to use for the given TGSI register.
251 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
252 * corresponding SPE register is returned. If the TGSI register is
253 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
254 * See store_dest_reg() below...
257 get_dst_reg(struct codegen
*gen
,
259 const struct tgsi_full_dst_register
*dest
)
263 switch (dest
->DstRegister
.File
) {
264 case TGSI_FILE_TEMPORARY
:
265 if (gen
->if_nesting
> 0)
266 reg
= get_itemp(gen
);
268 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
270 case TGSI_FILE_OUTPUT
:
271 reg
= get_itemp(gen
);
282 * When a TGSI instruction is writing to an output register, this
283 * function emits the SPE store instruction to store the value_reg.
284 * \param value_reg the SPE register containing the value to store.
285 * This would have been returned by get_dst_reg().
288 store_dest_reg(struct codegen
*gen
,
289 int value_reg
, int channel
,
290 const struct tgsi_full_dst_register
*dest
)
292 switch (dest
->DstRegister
.File
) {
293 case TGSI_FILE_TEMPORARY
:
294 if (gen
->if_nesting
> 0) {
295 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
296 int exec_reg
= get_exec_mask_reg(gen
);
297 /* Mix d with new value according to exec mask:
298 * d[i] = mask_reg[i] ? value_reg : d_reg
300 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
303 /* we're not inside a condition or loop: do nothing special */
306 case TGSI_FILE_OUTPUT
:
308 /* offset is measured in quadwords, not bytes */
309 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
310 if (gen
->if_nesting
> 0) {
311 int exec_reg
= get_exec_mask_reg(gen
);
312 int curval_reg
= get_itemp(gen
);
313 /* First read the current value from memory:
314 * Load: curval = memory[(machine_reg) + offset]
316 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
317 /* Mix curval with newvalue according to exec mask:
318 * d[i] = mask_reg[i] ? value_reg : d_reg
320 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
321 /* Store: memory[(machine_reg) + offset] = curval */
322 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
325 /* Store: memory[(machine_reg) + offset] = reg */
326 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
);
337 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
340 spe_comment(gen
->f
, -4, "MOV:");
341 for (ch
= 0; ch
< 4; ch
++) {
342 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
343 int src_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
344 int dst_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
345 /* XXX we don't always need to actually emit a mov instruction here */
346 spe_move(gen
->f
, dst_reg
, src_reg
);
347 store_dest_reg(gen
, dst_reg
, ch
, &inst
->FullDstRegisters
[0]);
357 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
358 * becomes (up to) four SPU "fa" instructions because we're doing SOA
362 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
365 spe_comment(gen
->f
, -4, "ADD:");
366 /* Loop over Red/Green/Blue/Alpha channels */
367 for (ch
= 0; ch
< 4; ch
++) {
368 /* If the dest R, G, B or A writemask is enabled... */
369 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
370 /* get indexes of the two src, one dest SPE registers */
371 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
372 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
373 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
375 /* Emit actual SPE instruction: d = s1 + s2 */
376 spe_fa(gen
->f
, d_reg
, s1_reg
, s2_reg
);
378 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
379 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
380 /* Free any intermediate temps we allocated */
388 * Emit subtract. See emit_ADD for comments.
391 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
394 spe_comment(gen
->f
, -4, "SUB:");
395 /* Loop over Red/Green/Blue/Alpha channels */
396 for (ch
= 0; ch
< 4; ch
++) {
397 /* If the dest R, G, B or A writemask is enabled... */
398 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
399 /* get indexes of the two src, one dest SPE registers */
400 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
401 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
402 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
404 /* Emit actual SPE instruction: d = s1 - s2 */
405 spe_fs(gen
->f
, d_reg
, s1_reg
, s2_reg
);
407 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
408 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
409 /* Free any intermediate temps we allocated */
417 * Emit multiply add. See emit_ADD for comments.
420 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
423 spe_comment(gen
->f
, -4, "MAD:");
424 for (ch
= 0; ch
< 4; ch
++) {
425 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
426 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
427 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
428 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
429 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
430 /* d = s1 * s2 + s3 */
431 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, s3_reg
);
432 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
441 * Emit linear interpolate. See emit_ADD for comments.
444 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
447 spe_comment(gen
->f
, -4, "LERP:");
448 for (ch
= 0; ch
< 4; ch
++) {
449 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
450 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
451 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
452 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
453 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
454 /* d = s3 + s1(s2 - s3) */
455 spe_fs(gen
->f
, d_reg
, s2_reg
, s3_reg
);
456 spe_fm(gen
->f
, d_reg
, d_reg
, s1_reg
);
457 spe_fa(gen
->f
, d_reg
, d_reg
, s3_reg
);
458 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
466 * Emit multiply. See emit_ADD for comments.
469 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
472 spe_comment(gen
->f
, -4, "MUL:");
473 for (ch
= 0; ch
< 4; ch
++) {
474 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
475 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
476 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
477 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
479 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
480 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
488 * Emit absolute value. See emit_ADD for comments.
491 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
494 spe_comment(gen
->f
, -4, "ABS:");
495 for (ch
= 0; ch
< 4; ch
++) {
496 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
497 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
498 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
499 const int bit31mask_reg
= get_itemp(gen
);
501 /* mask with bit 31 set, the rest cleared */
502 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
504 /* d = sign bit cleared in s1 */
505 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
507 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
515 * Emit set-if-greater-than.
516 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
517 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
518 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
521 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
525 spe_comment(gen
->f
, -4, "SGT:");
527 for (ch
= 0; ch
< 4; ch
++) {
528 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
529 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
530 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
531 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
534 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
536 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
537 /* d = d & one_reg */
538 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
540 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
549 * Emit set-if_less-then. See emit_SGT for comments.
552 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
556 spe_comment(gen
->f
, -4, "SLT:");
558 for (ch
= 0; ch
< 4; ch
++) {
559 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
560 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
561 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
562 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
565 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
567 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
568 /* d = d & one_reg */
569 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
571 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
580 * Emit set-if_equal. See emit_SGT for comments.
583 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
587 spe_comment(gen
->f
, -4, "SEQ:");
589 for (ch
= 0; ch
< 4; ch
++) {
590 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
591 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
592 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
593 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
596 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
598 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
599 /* d = d & one_reg */
600 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
602 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
611 * Emit set-if_not_equal. See emit_SGT for comments.
614 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
618 spe_comment(gen
->f
, -4, "SNE:");
620 for (ch
= 0; ch
< 4; ch
++) {
621 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
622 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
623 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
624 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
627 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
628 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
630 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
631 /* d = d & one_reg */
632 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
634 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
643 * Emit max. See emit_SGT for comments.
646 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
650 spe_comment(gen
->f
, -4, "MAX:");
652 for (ch
= 0; ch
< 4; ch
++) {
653 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
654 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
655 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
656 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
658 /* d = (s1 > s2) ? s1 : s2 */
659 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
660 spe_and(gen
->f
, d_reg
, d_reg
, s1_reg
);
661 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
662 spe_and(gen
->f
, d_reg
, d_reg
, s2_reg
);
664 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
673 * Emit max. See emit_SGT for comments.
676 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
680 spe_comment(gen
->f
, -4, "MIN:");
682 for (ch
= 0; ch
< 4; ch
++) {
683 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
684 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
685 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
686 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
688 /* d = (s1 < s2) ? s1 : s2 */
689 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
690 spe_and(gen
->f
, d_reg
, d_reg
, s1_reg
);
691 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
692 spe_and(gen
->f
, d_reg
, d_reg
, s2_reg
);
694 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
703 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
705 const int channel
= 0;
706 const int exec_reg
= get_exec_mask_reg(gen
);
708 spe_comment(gen
->f
, -4, "IF:");
710 /* update execution mask with the predicate register */
711 int tmp_reg
= get_itemp(gen
);
712 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
714 /* tmp = (s1_reg == 0) */
715 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
717 spe_complement(gen
->f
, tmp_reg
);
718 /* exec_mask = exec_mask & tmp */
719 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
730 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
732 const int exec_reg
= get_exec_mask_reg(gen
);
734 spe_comment(gen
->f
, -4, "ELSE:");
736 /* exec_mask = !exec_mask */
737 spe_complement(gen
->f
, exec_reg
);
744 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
746 const int exec_reg
= get_exec_mask_reg(gen
);
748 spe_comment(gen
->f
, -4, "ENDIF:");
750 /* XXX todo: pop execution mask */
752 spe_load_int(gen
->f
, exec_reg
, ~0x0);
760 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
765 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
767 for (ch
= 0; ch
< 4; ch
++) {
768 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
769 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
770 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
772 int t1_reg
= get_itemp(gen
);
773 int t2_reg
= get_itemp(gen
);
775 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
777 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
780 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
782 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
795 * Emit END instruction.
796 * We just return from the shader function at this point.
798 * Note that there may be more code after this that would be
799 * called by TGSI_OPCODE_CALL.
802 emit_END(struct codegen
*gen
)
804 spe_comment(gen
->f
, -4, "END:");
805 /* return from function call */
806 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
812 * Emit code for the given instruction. Just a big switch stmt.
815 emit_instruction(struct codegen
*gen
,
816 const struct tgsi_full_instruction
*inst
)
818 switch (inst
->Instruction
.Opcode
) {
819 case TGSI_OPCODE_MOV
:
820 return emit_MOV(gen
, inst
);
821 case TGSI_OPCODE_MUL
:
822 return emit_MUL(gen
, inst
);
823 case TGSI_OPCODE_ADD
:
824 return emit_ADD(gen
, inst
);
825 case TGSI_OPCODE_SUB
:
826 return emit_SUB(gen
, inst
);
827 case TGSI_OPCODE_MAD
:
828 return emit_MAD(gen
, inst
);
829 case TGSI_OPCODE_LERP
:
830 return emit_LERP(gen
, inst
);
831 case TGSI_OPCODE_ABS
:
832 return emit_ABS(gen
, inst
);
833 case TGSI_OPCODE_SGT
:
834 return emit_SGT(gen
, inst
);
835 case TGSI_OPCODE_SLT
:
836 return emit_SLT(gen
, inst
);
837 case TGSI_OPCODE_SEQ
:
838 return emit_SEQ(gen
, inst
);
839 case TGSI_OPCODE_SNE
:
840 return emit_SNE(gen
, inst
);
841 case TGSI_OPCODE_MAX
:
842 return emit_MAX(gen
, inst
);
843 case TGSI_OPCODE_MIN
:
844 return emit_MIN(gen
, inst
);
845 case TGSI_OPCODE_END
:
846 return emit_END(gen
);
849 return emit_IF(gen
, inst
);
850 case TGSI_OPCODE_ELSE
:
851 return emit_ELSE(gen
, inst
);
852 case TGSI_OPCODE_ENDIF
:
853 return emit_ENDIF(gen
, inst
);
855 case TGSI_OPCODE_DDX
:
856 return emit_DDX_DDY(gen
, inst
, true);
857 case TGSI_OPCODE_DDY
:
858 return emit_DDX_DDY(gen
, inst
, false);
860 /* XXX lots more cases to do... */
863 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
864 inst
->Instruction
.Opcode
);
874 * Emit code for a TGSI immediate value (vector of four floats).
875 * This involves register allocation and initialization.
876 * XXX the initialization should be done by a "prepare" stage, not
877 * per quad execution!
880 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
884 assert(gen
->num_imm
< MAX_TEMPS
);
886 spe_comment(gen
->f
, -4, "IMMEDIATE:");
888 for (ch
= 0; ch
< 4; ch
++) {
889 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
890 int reg
= spe_allocate_available_register(gen
->f
);
895 /* update immediate map */
896 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
898 /* emit initializer instruction */
899 spe_load_float(gen
->f
, reg
, val
);
910 * Emit "code" for a TGSI declaration.
911 * We only care about TGSI TEMPORARY register declarations at this time.
912 * For each TGSI TEMPORARY we allocate four SPE registers.
915 emit_declaration(struct cell_context
*cell
,
916 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
920 switch (decl
->Declaration
.File
) {
921 case TGSI_FILE_TEMPORARY
:
922 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
923 printf("Declare temp reg %d .. %d\n",
924 decl
->DeclarationRange
.First
,
925 decl
->DeclarationRange
.Last
);
928 for (i
= decl
->DeclarationRange
.First
;
929 i
<= decl
->DeclarationRange
.Last
;
931 assert(i
< MAX_TEMPS
);
932 for (ch
= 0; ch
< 4; ch
++) {
933 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
934 if (gen
->temp_regs
[i
][ch
] < 0)
935 return false; /* out of regs */
938 /* XXX if we run out of SPE registers, we need to spill
939 * to SPU memory. someday...
942 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
943 printf(" SPE regs: %d %d %d %d\n",
944 gen
->temp_regs
[i
][0],
945 gen
->temp_regs
[i
][1],
946 gen
->temp_regs
[i
][2],
947 gen
->temp_regs
[i
][3]);
960 * Translate TGSI shader code to SPE instructions. This is done when
961 * the state tracker gives us a new shader (via pipe->create_fs_state()).
963 * \param cell the rendering context (in)
964 * \param tokens the TGSI shader (in)
965 * \param f the generated function (out)
968 cell_gen_fragment_program(struct cell_context
*cell
,
969 const struct tgsi_token
*tokens
,
970 struct spe_function
*f
)
972 struct tgsi_parse_context parse
;
975 memset(&gen
, 0, sizeof(gen
));
978 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
979 gen
.inputs_reg
= 3; /* pointer to inputs array */
980 gen
.outputs_reg
= 4; /* pointer to outputs array */
981 gen
.constants_reg
= 5; /* pointer to constants array */
983 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
984 spe_allocate_register(f
, gen
.inputs_reg
);
985 spe_allocate_register(f
, gen
.outputs_reg
);
986 spe_allocate_register(f
, gen
.constants_reg
);
988 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
989 spe_print_code(f
, true);
991 printf("Begin %s\n", __FUNCTION__
);
992 tgsi_dump(tokens
, 0);
995 tgsi_parse_init(&parse
, tokens
);
997 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
998 tgsi_parse_token(&parse
);
1000 switch (parse
.FullToken
.Token
.Type
) {
1001 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1002 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
1006 case TGSI_TOKEN_TYPE_DECLARATION
:
1007 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
1011 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1012 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
1023 /* terminate the SPE code */
1024 return emit_END(&gen
);
1027 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1028 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
1029 printf("End %s\n", __FUNCTION__
);
1032 tgsi_parse_free( &parse
);