1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
63 * Context needed during code generation.
67 int inputs_reg
; /**< 1st function parameter */
68 int outputs_reg
; /**< 2nd function parameter */
69 int constants_reg
; /**< 3rd function parameter */
70 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
71 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
73 int num_imm
; /**< number of immediates */
75 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77 /** Per-instruction temps / intermediate temps */
81 /** Current IF/ELSE/ENDIF nesting level */
83 /** Index of execution mask register */
86 struct spe_function
*f
;
92 * Allocate an intermediate temporary register.
95 get_itemp(struct codegen
*gen
)
97 int t
= spe_allocate_available_register(gen
->f
);
98 assert(gen
->num_itemps
< Elements(gen
->itemps
));
99 gen
->itemps
[gen
->num_itemps
++] = t
;
104 * Free all intermediate temporary registers. To be called after each
105 * instruction has been emitted.
108 free_itemps(struct codegen
*gen
)
111 for (i
= 0; i
< gen
->num_itemps
; i
++) {
112 spe_release_register(gen
->f
, gen
->itemps
[i
]);
119 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
120 * The register is allocated and initialized upon the first call.
123 get_const_one_reg(struct codegen
*gen
)
125 if (gen
->one_reg
<= 0) {
126 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
128 spe_indent(gen
->f
, 4);
129 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
131 /* one = {1.0, 1.0, 1.0, 1.0} */
132 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
134 spe_indent(gen
->f
, -4);
142 * Return index of the pixel execution mask.
143 * The register is allocated an initialized upon the first call.
145 * The pixel execution mask controls which pixels in a quad are
146 * modified, according to surrounding conditionals, loops, etc.
149 get_exec_mask_reg(struct codegen
*gen
)
151 if (gen
->exec_mask_reg
<= 0) {
152 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
154 spe_indent(gen
->f
, 4);
155 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
157 /* exec_mask = {~0, ~0, ~0, ~0} */
158 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
160 spe_indent(gen
->f
, -4);
163 return gen
->exec_mask_reg
;
168 * Return the index of the SPU temporary containing the named TGSI
169 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
170 * just return the corresponding SPE register. If the TGIS register
171 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
172 * and emit an SPE load instruction.
175 get_src_reg(struct codegen
*gen
,
177 const struct tgsi_full_src_register
*src
)
180 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
181 boolean reg_is_itemp
= FALSE
;
184 assert(swizzle
>= TGSI_SWIZZLE_X
);
185 assert(swizzle
<= TGSI_EXTSWIZZLE_ONE
);
187 switch (src
->SrcRegister
.File
) {
188 case TGSI_FILE_TEMPORARY
:
189 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][swizzle
];
191 case TGSI_FILE_INPUT
:
193 if(swizzle
== TGSI_EXTSWIZZLE_ONE
)
195 /* Load const one float and early out */
196 reg
= get_const_one_reg(gen
);
198 else if(swizzle
== TGSI_EXTSWIZZLE_ZERO
)
200 /* Load const zero float and early out */
201 reg
= get_itemp(gen
);
202 spe_xor(gen
->f
, reg
, reg
, reg
);
206 /* offset is measured in quadwords, not bytes */
207 int offset
= src
->SrcRegister
.Index
* 4 + swizzle
;
208 reg
= get_itemp(gen
);
210 /* Load: reg = memory[(machine_reg) + offset] */
211 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
);
215 case TGSI_FILE_IMMEDIATE
:
216 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][swizzle
];
218 case TGSI_FILE_CONSTANT
:
219 /* xxx fall-through for now / fix */
225 * Handle absolute value, negate or set-negative of src register.
227 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
228 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
230 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
232 const int bit31mask_reg
= get_itemp(gen
);
236 /* re-use 'reg' for the result */
240 /* alloc a new reg for the result */
241 result_reg
= get_itemp(gen
);
244 /* mask with bit 31 set, the rest cleared */
245 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
247 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
248 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
250 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
251 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
254 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
255 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
266 * Return the index of an SPE register to use for the given TGSI register.
267 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
268 * corresponding SPE register is returned. If the TGSI register is
269 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
270 * See store_dest_reg() below...
273 get_dst_reg(struct codegen
*gen
,
275 const struct tgsi_full_dst_register
*dest
)
279 switch (dest
->DstRegister
.File
) {
280 case TGSI_FILE_TEMPORARY
:
281 if (gen
->if_nesting
> 0)
282 reg
= get_itemp(gen
);
284 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
286 case TGSI_FILE_OUTPUT
:
287 reg
= get_itemp(gen
);
298 * When a TGSI instruction is writing to an output register, this
299 * function emits the SPE store instruction to store the value_reg.
300 * \param value_reg the SPE register containing the value to store.
301 * This would have been returned by get_dst_reg().
304 store_dest_reg(struct codegen
*gen
,
305 int value_reg
, int channel
,
306 const struct tgsi_full_dst_register
*dest
)
308 switch (dest
->DstRegister
.File
) {
309 case TGSI_FILE_TEMPORARY
:
310 if (gen
->if_nesting
> 0) {
311 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
312 int exec_reg
= get_exec_mask_reg(gen
);
313 /* Mix d with new value according to exec mask:
314 * d[i] = mask_reg[i] ? value_reg : d_reg
316 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
319 /* we're not inside a condition or loop: do nothing special */
322 case TGSI_FILE_OUTPUT
:
324 /* offset is measured in quadwords, not bytes */
325 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
326 if (gen
->if_nesting
> 0) {
327 int exec_reg
= get_exec_mask_reg(gen
);
328 int curval_reg
= get_itemp(gen
);
329 /* First read the current value from memory:
330 * Load: curval = memory[(machine_reg) + offset]
332 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
333 /* Mix curval with newvalue according to exec mask:
334 * d[i] = mask_reg[i] ? value_reg : d_reg
336 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
337 /* Store: memory[(machine_reg) + offset] = curval */
338 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
341 /* Store: memory[(machine_reg) + offset] = reg */
342 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
);
353 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
356 spe_comment(gen
->f
, -4, "MOV:");
357 for (ch
= 0; ch
< 4; ch
++) {
358 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
359 int src_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
360 int dst_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
361 /* XXX we don't always need to actually emit a mov instruction here */
362 spe_move(gen
->f
, dst_reg
, src_reg
);
363 store_dest_reg(gen
, dst_reg
, ch
, &inst
->FullDstRegisters
[0]);
371 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
372 * becomes (up to) four SPU "fa" instructions because we're doing SOA
376 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
379 spe_comment(gen
->f
, -4, "ADD:");
380 /* Loop over Red/Green/Blue/Alpha channels */
381 for (ch
= 0; ch
< 4; ch
++) {
382 /* If the dest R, G, B or A writemask is enabled... */
383 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
384 /* get indexes of the two src, one dest SPE registers */
385 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
386 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
387 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
389 /* Emit actual SPE instruction: d = s1 + s2 */
390 spe_fa(gen
->f
, d_reg
, s1_reg
, s2_reg
);
392 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
393 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
394 /* Free any intermediate temps we allocated */
402 * Emit subtract. See emit_ADD for comments.
405 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
408 spe_comment(gen
->f
, -4, "SUB:");
409 /* Loop over Red/Green/Blue/Alpha channels */
410 for (ch
= 0; ch
< 4; ch
++) {
411 /* If the dest R, G, B or A writemask is enabled... */
412 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
413 /* get indexes of the two src, one dest SPE registers */
414 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
415 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
416 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
418 /* Emit actual SPE instruction: d = s1 - s2 */
419 spe_fs(gen
->f
, d_reg
, s1_reg
, s2_reg
);
421 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
422 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
423 /* Free any intermediate temps we allocated */
431 * Emit multiply add. See emit_ADD for comments.
434 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
437 spe_comment(gen
->f
, -4, "MAD:");
438 for (ch
= 0; ch
< 4; ch
++) {
439 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
440 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
441 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
442 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
443 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
444 /* d = s1 * s2 + s3 */
445 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, s3_reg
);
446 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
455 * Emit linear interpolate. See emit_ADD for comments.
458 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
461 spe_comment(gen
->f
, -4, "LERP:");
462 for (ch
= 0; ch
< 4; ch
++) {
463 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
464 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
465 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
466 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
467 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
468 /* d = s3 + s1(s2 - s3) */
469 spe_fs(gen
->f
, d_reg
, s2_reg
, s3_reg
);
470 spe_fma(gen
->f
, d_reg
, d_reg
, s1_reg
, s3_reg
);
471 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
479 * Emit multiply. See emit_ADD for comments.
482 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
485 spe_comment(gen
->f
, -4, "MUL:");
486 for (ch
= 0; ch
< 4; ch
++) {
487 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
488 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
489 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
490 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
492 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
493 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
501 * Emit reciprocal. See emit_ADD for comments.
504 emit_RCP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
507 spe_comment(gen
->f
, -4, "RCP:");
508 for (ch
= 0; ch
< 4; ch
++) {
509 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
510 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
511 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
513 spe_frest(gen
->f
, d_reg
, s1_reg
);
514 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
515 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
523 * Emit reciprocal sqrt. See emit_ADD for comments.
526 emit_RSQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
529 spe_comment(gen
->f
, -4, "RSQ:");
530 for (ch
= 0; ch
< 4; ch
++) {
531 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
532 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
533 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
535 spe_frsqest(gen
->f
, d_reg
, s1_reg
);
536 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
537 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
545 * Emit absolute value. See emit_ADD for comments.
548 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
551 spe_comment(gen
->f
, -4, "ABS:");
552 for (ch
= 0; ch
< 4; ch
++) {
553 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
554 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
555 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
556 const int bit31mask_reg
= get_itemp(gen
);
558 /* mask with bit 31 set, the rest cleared */
559 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
561 /* d = sign bit cleared in s1 */
562 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
564 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
572 * Emit 3 component dot product. See emit_ADD for comments.
575 emit_DP3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
578 spe_comment(gen
->f
, -4, "DP3:");
580 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
581 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
582 int tmp_reg
= get_itemp(gen
);
584 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
586 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
587 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
588 /* t = y0 * y1 + t */
589 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
591 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
592 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
593 /* t = z0 * z1 + t */
594 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
596 for (ch
= 0; ch
< 4; ch
++) {
597 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
598 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
607 * Emit 4 component dot product. See emit_ADD for comments.
610 emit_DP4(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
613 spe_comment(gen
->f
, -4, "DP4:");
615 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
616 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
617 int tmp_reg
= get_itemp(gen
);
619 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
621 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
622 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
623 /* t = y0 * y1 + t */
624 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
626 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
627 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
628 /* t = z0 * z1 + t */
629 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
631 s1_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[0]);
632 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
633 /* t = w0 * w1 + t */
634 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
636 for (ch
= 0; ch
< 4; ch
++) {
637 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
638 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
647 * Emit homogeneous dot product. See emit_ADD for comments.
650 emit_DPH(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
653 spe_comment(gen
->f
, -4, "DPH:");
655 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
656 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
657 int tmp_reg
= get_itemp(gen
);
660 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
662 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
663 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
664 /* t = y0 * y1 + t */
665 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
667 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
668 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
669 /* t = z0 * z1 + t */
670 spe_fma(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
672 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
674 spe_fa(gen
->f
, tmp_reg
, s2_reg
, tmp_reg
);
676 for (ch
= 0; ch
< 4; ch
++) {
677 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
678 store_dest_reg(gen
, tmp_reg
, ch
, &inst
->FullDstRegisters
[0]);
687 * Emit cross product. See emit_ADD for comments.
690 emit_XPD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
692 spe_comment(gen
->f
, -4, "XPD:");
694 int s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
695 int s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
696 int tmp_reg
= get_itemp(gen
);
699 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
701 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
702 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
703 /* t = y0 * z1 - t */
704 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
706 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_X
)) {
707 store_dest_reg(gen
, tmp_reg
, CHAN_X
, &inst
->FullDstRegisters
[0]);
710 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
711 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
713 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
715 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
716 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
717 /* t = z0 * x1 - t */
718 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
720 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Y
)) {
721 store_dest_reg(gen
, tmp_reg
, CHAN_Y
, &inst
->FullDstRegisters
[0]);
724 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
725 s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
727 spe_fm(gen
->f
, tmp_reg
, s1_reg
, s2_reg
);
729 s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
730 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
731 /* t = x0 * y1 - t */
732 spe_fms(gen
->f
, tmp_reg
, s1_reg
, s2_reg
, tmp_reg
);
734 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << CHAN_Z
)) {
735 store_dest_reg(gen
, tmp_reg
, CHAN_Z
, &inst
->FullDstRegisters
[0]);
743 * Emit set-if-greater-than.
744 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
745 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
746 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
749 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
753 spe_comment(gen
->f
, -4, "SGT:");
755 for (ch
= 0; ch
< 4; ch
++) {
756 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
757 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
758 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
759 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
762 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
764 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
765 /* d = d & one_reg */
766 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
768 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
777 * Emit set-if_less-then. See emit_SGT for comments.
780 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
784 spe_comment(gen
->f
, -4, "SLT:");
786 for (ch
= 0; ch
< 4; ch
++) {
787 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
788 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
789 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
790 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
793 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
795 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
796 /* d = d & one_reg */
797 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
799 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
808 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
811 emit_SGE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
815 spe_comment(gen
->f
, -4, "SGE:");
817 for (ch
= 0; ch
< 4; ch
++) {
818 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
819 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
820 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
821 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
824 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
826 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
827 /* d = ~d & one_reg */
828 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
830 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
839 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
842 emit_SLE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
846 spe_comment(gen
->f
, -4, "SLE:");
848 for (ch
= 0; ch
< 4; ch
++) {
849 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
850 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
851 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
852 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
855 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
857 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
858 /* d = ~d & one_reg */
859 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
861 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
870 * Emit set-if_equal. See emit_SGT for comments.
873 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
877 spe_comment(gen
->f
, -4, "SEQ:");
879 for (ch
= 0; ch
< 4; ch
++) {
880 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
881 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
882 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
883 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
886 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
888 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
889 /* d = d & one_reg */
890 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
892 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
901 * Emit set-if_not_equal. See emit_SGT for comments.
904 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
908 spe_comment(gen
->f
, -4, "SNE:");
910 for (ch
= 0; ch
< 4; ch
++) {
911 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
912 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
913 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
914 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
917 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
918 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
920 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
921 /* d = d & one_reg */
922 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
924 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
933 * Emit compare. See emit_SGT for comments.
936 emit_CMP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
940 spe_comment(gen
->f
, -4, "CMP:");
942 for (ch
= 0; ch
< 4; ch
++) {
943 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
944 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
945 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
946 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
947 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
948 int zero_reg
= get_itemp(gen
);
950 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
952 /* d = (s1 < 0) ? s2 : s3 */
953 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
954 spe_selb(gen
->f
, d_reg
, s3_reg
, s2_reg
, d_reg
);
956 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
966 * Convert float to signed int
967 * Convert signed int to float
970 emit_TRUNC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
974 spe_comment(gen
->f
, -4, "TRUNC:");
976 for (ch
= 0; ch
< 4; ch
++) {
977 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
978 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
979 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
981 /* Convert float to int */
982 spe_cflts(gen
->f
, d_reg
, s1_reg
, 0);
984 /* Convert int to float */
985 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
987 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
997 * If negative int subtract one
998 * Convert float to signed int
999 * Convert signed int to float
1002 emit_FLR(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1006 spe_comment(gen
->f
, -4, "FLR:");
1008 int zero_reg
= get_itemp(gen
);
1009 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1011 for (ch
= 0; ch
< 4; ch
++) {
1012 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1013 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1014 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1015 int tmp_reg
= get_itemp(gen
);
1017 /* If negative, subtract 1.0 */
1018 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1019 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1020 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1022 /* Convert float to int */
1023 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1025 /* Convert int to float */
1026 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1028 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1038 * Input - FLR(Input)
1041 emit_FRC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1045 spe_comment(gen
->f
, -4, "FLR:");
1047 int zero_reg
= get_itemp(gen
);
1048 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
1050 for (ch
= 0; ch
< 4; ch
++) {
1051 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1052 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1053 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1054 int tmp_reg
= get_itemp(gen
);
1056 /* If negative, subtract 1.0 */
1057 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
1058 spe_selb(gen
->f
, tmp_reg
, zero_reg
, get_const_one_reg(gen
), d_reg
);
1059 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
1061 /* Convert float to int */
1062 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
1064 /* Convert int to float */
1065 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
1067 /* d = s1 - FLR(s1) */
1068 spe_fs(gen
->f
, d_reg
, s1_reg
, d_reg
);
1070 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1080 * Emit max. See emit_SGT for comments.
1083 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1087 spe_comment(gen
->f
, -4, "MAX:");
1089 for (ch
= 0; ch
< 4; ch
++) {
1090 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1091 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1092 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1093 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1095 /* d = (s1 > s2) ? s1 : s2 */
1096 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
1097 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
1099 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1108 * Emit max. See emit_SGT for comments.
1111 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1115 spe_comment(gen
->f
, -4, "MIN:");
1117 for (ch
= 0; ch
< 4; ch
++) {
1118 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1119 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1120 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1121 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1123 /* d = (s2 > s1) ? s1 : s2 */
1124 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
1125 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
1127 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1136 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1138 const int channel
= 0;
1139 const int exec_reg
= get_exec_mask_reg(gen
);
1141 spe_comment(gen
->f
, -4, "IF:");
1143 /* update execution mask with the predicate register */
1144 int tmp_reg
= get_itemp(gen
);
1145 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
1147 /* tmp = (s1_reg == 0) */
1148 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
1150 spe_complement(gen
->f
, tmp_reg
, tmp_reg
);
1151 /* exec_mask = exec_mask & tmp */
1152 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
1163 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1165 const int exec_reg
= get_exec_mask_reg(gen
);
1167 spe_comment(gen
->f
, -4, "ELSE:");
1169 /* exec_mask = !exec_mask */
1170 spe_complement(gen
->f
, exec_reg
, exec_reg
);
1177 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1179 const int exec_reg
= get_exec_mask_reg(gen
);
1181 spe_comment(gen
->f
, -4, "ENDIF:");
1183 /* XXX todo: pop execution mask */
1185 spe_load_int(gen
->f
, exec_reg
, ~0x0);
1193 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
1198 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
1200 for (ch
= 0; ch
< 4; ch
++) {
1201 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1202 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1203 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1205 int t1_reg
= get_itemp(gen
);
1206 int t2_reg
= get_itemp(gen
);
1208 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
1210 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
1213 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
1215 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
1228 * Emit END instruction.
1229 * We just return from the shader function at this point.
1231 * Note that there may be more code after this that would be
1232 * called by TGSI_OPCODE_CALL.
1235 emit_END(struct codegen
*gen
)
1237 spe_comment(gen
->f
, -4, "END:");
1238 /* return from function call */
1239 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
1245 * Emit code for the given instruction. Just a big switch stmt.
1248 emit_instruction(struct codegen
*gen
,
1249 const struct tgsi_full_instruction
*inst
)
1251 switch (inst
->Instruction
.Opcode
) {
1252 case TGSI_OPCODE_MOV
:
1253 case TGSI_OPCODE_SWZ
:
1254 return emit_MOV(gen
, inst
);
1255 case TGSI_OPCODE_MUL
:
1256 return emit_MUL(gen
, inst
);
1257 case TGSI_OPCODE_ADD
:
1258 return emit_ADD(gen
, inst
);
1259 case TGSI_OPCODE_SUB
:
1260 return emit_SUB(gen
, inst
);
1261 case TGSI_OPCODE_MAD
:
1262 return emit_MAD(gen
, inst
);
1263 case TGSI_OPCODE_LERP
:
1264 return emit_LERP(gen
, inst
);
1265 case TGSI_OPCODE_DP3
:
1266 return emit_DP3(gen
, inst
);
1267 case TGSI_OPCODE_DP4
:
1268 return emit_DP4(gen
, inst
);
1269 case TGSI_OPCODE_DPH
:
1270 return emit_DPH(gen
, inst
);
1271 case TGSI_OPCODE_XPD
:
1272 return emit_XPD(gen
, inst
);
1273 case TGSI_OPCODE_RCP
:
1274 return emit_RCP(gen
, inst
);
1275 case TGSI_OPCODE_RSQ
:
1276 return emit_RSQ(gen
, inst
);
1277 case TGSI_OPCODE_ABS
:
1278 return emit_ABS(gen
, inst
);
1279 case TGSI_OPCODE_SGT
:
1280 return emit_SGT(gen
, inst
);
1281 case TGSI_OPCODE_SLT
:
1282 return emit_SLT(gen
, inst
);
1283 case TGSI_OPCODE_SGE
:
1284 return emit_SGE(gen
, inst
);
1285 case TGSI_OPCODE_SLE
:
1286 return emit_SLE(gen
, inst
);
1287 case TGSI_OPCODE_SEQ
:
1288 return emit_SEQ(gen
, inst
);
1289 case TGSI_OPCODE_SNE
:
1290 return emit_SNE(gen
, inst
);
1291 case TGSI_OPCODE_CMP
:
1292 return emit_CMP(gen
, inst
);
1293 case TGSI_OPCODE_MAX
:
1294 return emit_MAX(gen
, inst
);
1295 case TGSI_OPCODE_MIN
:
1296 return emit_MIN(gen
, inst
);
1297 case TGSI_OPCODE_TRUNC
:
1298 return emit_TRUNC(gen
, inst
);
1299 case TGSI_OPCODE_FLR
:
1300 return emit_FLR(gen
, inst
);
1301 case TGSI_OPCODE_FRC
:
1302 return emit_FRC(gen
, inst
);
1303 case TGSI_OPCODE_END
:
1304 return emit_END(gen
);
1306 case TGSI_OPCODE_IF
:
1307 return emit_IF(gen
, inst
);
1308 case TGSI_OPCODE_ELSE
:
1309 return emit_ELSE(gen
, inst
);
1310 case TGSI_OPCODE_ENDIF
:
1311 return emit_ENDIF(gen
, inst
);
1313 case TGSI_OPCODE_DDX
:
1314 return emit_DDX_DDY(gen
, inst
, true);
1315 case TGSI_OPCODE_DDY
:
1316 return emit_DDX_DDY(gen
, inst
, false);
1318 /* XXX lots more cases to do... */
1321 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
1322 inst
->Instruction
.Opcode
);
1332 * Emit code for a TGSI immediate value (vector of four floats).
1333 * This involves register allocation and initialization.
1334 * XXX the initialization should be done by a "prepare" stage, not
1335 * per quad execution!
1338 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
1342 assert(gen
->num_imm
< MAX_TEMPS
);
1344 spe_comment(gen
->f
, -4, "IMMEDIATE:");
1346 for (ch
= 0; ch
< 4; ch
++) {
1347 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
1348 int reg
= spe_allocate_available_register(gen
->f
);
1353 /* update immediate map */
1354 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
1356 /* emit initializer instruction */
1357 spe_load_float(gen
->f
, reg
, val
);
1368 * Emit "code" for a TGSI declaration.
1369 * We only care about TGSI TEMPORARY register declarations at this time.
1370 * For each TGSI TEMPORARY we allocate four SPE registers.
1373 emit_declaration(struct cell_context
*cell
,
1374 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
1378 switch (decl
->Declaration
.File
) {
1379 case TGSI_FILE_TEMPORARY
:
1380 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1381 printf("Declare temp reg %d .. %d\n",
1382 decl
->DeclarationRange
.First
,
1383 decl
->DeclarationRange
.Last
);
1386 for (i
= decl
->DeclarationRange
.First
;
1387 i
<= decl
->DeclarationRange
.Last
;
1389 assert(i
< MAX_TEMPS
);
1390 for (ch
= 0; ch
< 4; ch
++) {
1391 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
1392 if (gen
->temp_regs
[i
][ch
] < 0)
1393 return false; /* out of regs */
1396 /* XXX if we run out of SPE registers, we need to spill
1397 * to SPU memory. someday...
1400 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1401 printf(" SPE regs: %d %d %d %d\n",
1402 gen
->temp_regs
[i
][0],
1403 gen
->temp_regs
[i
][1],
1404 gen
->temp_regs
[i
][2],
1405 gen
->temp_regs
[i
][3]);
1418 * Translate TGSI shader code to SPE instructions. This is done when
1419 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1421 * \param cell the rendering context (in)
1422 * \param tokens the TGSI shader (in)
1423 * \param f the generated function (out)
1426 cell_gen_fragment_program(struct cell_context
*cell
,
1427 const struct tgsi_token
*tokens
,
1428 struct spe_function
*f
)
1430 struct tgsi_parse_context parse
;
1433 memset(&gen
, 0, sizeof(gen
));
1436 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1437 gen
.inputs_reg
= 3; /* pointer to inputs array */
1438 gen
.outputs_reg
= 4; /* pointer to outputs array */
1439 gen
.constants_reg
= 5; /* pointer to constants array */
1441 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
1442 spe_allocate_register(f
, gen
.inputs_reg
);
1443 spe_allocate_register(f
, gen
.outputs_reg
);
1444 spe_allocate_register(f
, gen
.constants_reg
);
1446 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1447 spe_print_code(f
, true);
1449 printf("Begin %s\n", __FUNCTION__
);
1450 tgsi_dump(tokens
, 0);
1453 tgsi_parse_init(&parse
, tokens
);
1455 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
1456 tgsi_parse_token(&parse
);
1458 switch (parse
.FullToken
.Token
.Type
) {
1459 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1460 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
1464 case TGSI_TOKEN_TYPE_DECLARATION
:
1465 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
1469 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1470 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
1481 /* terminate the SPE code */
1482 return emit_END(&gen
);
1485 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1486 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
1487 printf("End %s\n", __FUNCTION__
);
1490 tgsi_parse_free( &parse
);