1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU fragment program/shader code.
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
63 * Context needed during code generation.
67 int inputs_reg
; /**< 1st function parameter */
68 int outputs_reg
; /**< 2nd function parameter */
69 int constants_reg
; /**< 3rd function parameter */
70 int temp_regs
[MAX_TEMPS
][4]; /**< maps TGSI temps to SPE registers */
71 int imm_regs
[MAX_IMMED
][4]; /**< maps TGSI immediates to SPE registers */
73 int num_imm
; /**< number of immediates */
75 int one_reg
; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77 /** Per-instruction temps / intermediate temps */
81 /** Current IF/ELSE/ENDIF nesting level */
83 /** Index of execution mask register */
86 struct spe_function
*f
;
92 * Allocate an intermediate temporary register.
95 get_itemp(struct codegen
*gen
)
97 int t
= spe_allocate_available_register(gen
->f
);
98 assert(gen
->num_itemps
< Elements(gen
->itemps
));
99 gen
->itemps
[gen
->num_itemps
++] = t
;
104 * Free all intermediate temporary registers. To be called after each
105 * instruction has been emitted.
108 free_itemps(struct codegen
*gen
)
111 for (i
= 0; i
< gen
->num_itemps
; i
++) {
112 spe_release_register(gen
->f
, gen
->itemps
[i
]);
119 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
120 * The register is allocated and initialized upon the first call.
123 get_const_one_reg(struct codegen
*gen
)
125 if (gen
->one_reg
<= 0) {
126 gen
->one_reg
= spe_allocate_available_register(gen
->f
);
128 spe_indent(gen
->f
, 4);
129 spe_comment(gen
->f
, -4, "INIT CONSTANT 1.0:");
131 /* one = {1.0, 1.0, 1.0, 1.0} */
132 spe_load_float(gen
->f
, gen
->one_reg
, 1.0f
);
134 spe_indent(gen
->f
, -4);
142 * Return index of the pixel execution mask.
143 * The register is allocated an initialized upon the first call.
145 * The pixel execution mask controls which pixels in a quad are
146 * modified, according to surrounding conditionals, loops, etc.
149 get_exec_mask_reg(struct codegen
*gen
)
151 if (gen
->exec_mask_reg
<= 0) {
152 gen
->exec_mask_reg
= spe_allocate_available_register(gen
->f
);
154 spe_indent(gen
->f
, 4);
155 spe_comment(gen
->f
, -4, "INIT EXEC MASK = ~0:");
157 /* exec_mask = {~0, ~0, ~0, ~0} */
158 spe_load_int(gen
->f
, gen
->exec_mask_reg
, ~0);
160 spe_indent(gen
->f
, -4);
163 return gen
->exec_mask_reg
;
168 * Return the index of the SPU temporary containing the named TGSI
169 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
170 * just return the corresponding SPE register. If the TGIS register
171 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
172 * and emit an SPE load instruction.
175 get_src_reg(struct codegen
*gen
,
177 const struct tgsi_full_src_register
*src
)
180 int swizzle
= tgsi_util_get_full_src_register_extswizzle(src
, channel
);
181 boolean reg_is_itemp
= FALSE
;
184 assert(swizzle
>= 0);
185 assert(swizzle
<= 3);
189 switch (src
->SrcRegister
.File
) {
190 case TGSI_FILE_TEMPORARY
:
191 reg
= gen
->temp_regs
[src
->SrcRegister
.Index
][channel
];
193 case TGSI_FILE_INPUT
:
195 /* offset is measured in quadwords, not bytes */
196 int offset
= src
->SrcRegister
.Index
* 4 + channel
;
197 reg
= get_itemp(gen
);
199 /* Load: reg = memory[(machine_reg) + offset] */
200 spe_lqd(gen
->f
, reg
, gen
->inputs_reg
, offset
);
203 case TGSI_FILE_IMMEDIATE
:
204 reg
= gen
->imm_regs
[src
->SrcRegister
.Index
][channel
];
206 case TGSI_FILE_CONSTANT
:
207 /* xxx fall-through for now / fix */
213 * Handle absolute value, negate or set-negative of src register.
215 sign_op
= tgsi_util_get_full_src_register_sign_mode(src
, channel
);
216 if (sign_op
!= TGSI_UTIL_SIGN_KEEP
) {
218 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
220 const int bit31mask_reg
= get_itemp(gen
);
224 /* re-use 'reg' for the result */
228 /* alloc a new reg for the result */
229 result_reg
= get_itemp(gen
);
232 /* mask with bit 31 set, the rest cleared */
233 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
235 if (sign_op
== TGSI_UTIL_SIGN_CLEAR
) {
236 spe_andc(gen
->f
, result_reg
, reg
, bit31mask_reg
);
238 else if (sign_op
== TGSI_UTIL_SIGN_SET
) {
239 spe_and(gen
->f
, result_reg
, reg
, bit31mask_reg
);
242 assert(sign_op
== TGSI_UTIL_SIGN_TOGGLE
);
243 spe_xor(gen
->f
, result_reg
, reg
, bit31mask_reg
);
254 * Return the index of an SPE register to use for the given TGSI register.
255 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
256 * corresponding SPE register is returned. If the TGSI register is
257 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
258 * See store_dest_reg() below...
261 get_dst_reg(struct codegen
*gen
,
263 const struct tgsi_full_dst_register
*dest
)
267 switch (dest
->DstRegister
.File
) {
268 case TGSI_FILE_TEMPORARY
:
269 if (gen
->if_nesting
> 0)
270 reg
= get_itemp(gen
);
272 reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
274 case TGSI_FILE_OUTPUT
:
275 reg
= get_itemp(gen
);
286 * When a TGSI instruction is writing to an output register, this
287 * function emits the SPE store instruction to store the value_reg.
288 * \param value_reg the SPE register containing the value to store.
289 * This would have been returned by get_dst_reg().
292 store_dest_reg(struct codegen
*gen
,
293 int value_reg
, int channel
,
294 const struct tgsi_full_dst_register
*dest
)
296 switch (dest
->DstRegister
.File
) {
297 case TGSI_FILE_TEMPORARY
:
298 if (gen
->if_nesting
> 0) {
299 int d_reg
= gen
->temp_regs
[dest
->DstRegister
.Index
][channel
];
300 int exec_reg
= get_exec_mask_reg(gen
);
301 /* Mix d with new value according to exec mask:
302 * d[i] = mask_reg[i] ? value_reg : d_reg
304 spe_selb(gen
->f
, d_reg
, d_reg
, value_reg
, exec_reg
);
307 /* we're not inside a condition or loop: do nothing special */
310 case TGSI_FILE_OUTPUT
:
312 /* offset is measured in quadwords, not bytes */
313 int offset
= dest
->DstRegister
.Index
* 4 + channel
;
314 if (gen
->if_nesting
> 0) {
315 int exec_reg
= get_exec_mask_reg(gen
);
316 int curval_reg
= get_itemp(gen
);
317 /* First read the current value from memory:
318 * Load: curval = memory[(machine_reg) + offset]
320 spe_lqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
321 /* Mix curval with newvalue according to exec mask:
322 * d[i] = mask_reg[i] ? value_reg : d_reg
324 spe_selb(gen
->f
, curval_reg
, curval_reg
, value_reg
, exec_reg
);
325 /* Store: memory[(machine_reg) + offset] = curval */
326 spe_stqd(gen
->f
, curval_reg
, gen
->outputs_reg
, offset
);
329 /* Store: memory[(machine_reg) + offset] = reg */
330 spe_stqd(gen
->f
, value_reg
, gen
->outputs_reg
, offset
);
341 emit_MOV(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
344 spe_comment(gen
->f
, -4, "MOV:");
345 for (ch
= 0; ch
< 4; ch
++) {
346 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
347 int src_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
348 int dst_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
349 /* XXX we don't always need to actually emit a mov instruction here */
350 spe_move(gen
->f
, dst_reg
, src_reg
);
351 store_dest_reg(gen
, dst_reg
, ch
, &inst
->FullDstRegisters
[0]);
361 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
362 * becomes (up to) four SPU "fa" instructions because we're doing SOA
366 emit_ADD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
369 spe_comment(gen
->f
, -4, "ADD:");
370 /* Loop over Red/Green/Blue/Alpha channels */
371 for (ch
= 0; ch
< 4; ch
++) {
372 /* If the dest R, G, B or A writemask is enabled... */
373 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
374 /* get indexes of the two src, one dest SPE registers */
375 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
376 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
377 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
379 /* Emit actual SPE instruction: d = s1 + s2 */
380 spe_fa(gen
->f
, d_reg
, s1_reg
, s2_reg
);
382 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
383 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
384 /* Free any intermediate temps we allocated */
392 * Emit subtract. See emit_ADD for comments.
395 emit_SUB(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
398 spe_comment(gen
->f
, -4, "SUB:");
399 /* Loop over Red/Green/Blue/Alpha channels */
400 for (ch
= 0; ch
< 4; ch
++) {
401 /* If the dest R, G, B or A writemask is enabled... */
402 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
403 /* get indexes of the two src, one dest SPE registers */
404 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
405 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
406 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
408 /* Emit actual SPE instruction: d = s1 - s2 */
409 spe_fs(gen
->f
, d_reg
, s1_reg
, s2_reg
);
411 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
412 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
413 /* Free any intermediate temps we allocated */
421 * Emit multiply add. See emit_ADD for comments.
424 emit_MAD(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
427 spe_comment(gen
->f
, -4, "MAD:");
428 for (ch
= 0; ch
< 4; ch
++) {
429 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
430 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
431 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
432 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
433 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
434 /* d = s1 * s2 + s3 */
435 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, s3_reg
);
436 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
445 * Emit linear interpolate. See emit_ADD for comments.
448 emit_LERP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
451 spe_comment(gen
->f
, -4, "LERP:");
452 for (ch
= 0; ch
< 4; ch
++) {
453 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
454 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
455 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
456 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
457 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
458 /* d = s3 + s1(s2 - s3) */
459 spe_fs(gen
->f
, d_reg
, s2_reg
, s3_reg
);
460 spe_fma(gen
->f
, d_reg
, d_reg
, s1_reg
, s3_reg
);
461 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
469 * Emit multiply. See emit_ADD for comments.
472 emit_MUL(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
475 spe_comment(gen
->f
, -4, "MUL:");
476 for (ch
= 0; ch
< 4; ch
++) {
477 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
478 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
479 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
480 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
482 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
483 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
491 * Emit reciprocal. See emit_ADD for comments.
494 emit_RCP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
497 spe_comment(gen
->f
, -4, "RCP:");
498 for (ch
= 0; ch
< 4; ch
++) {
499 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
500 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
501 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
503 spe_frest(gen
->f
, d_reg
, s1_reg
);
504 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
505 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
513 * Emit reciprocal sqrt. See emit_ADD for comments.
516 emit_RSQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
519 spe_comment(gen
->f
, -4, "RSQ:");
520 for (ch
= 0; ch
< 4; ch
++) {
521 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
522 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
523 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
525 spe_frsqest(gen
->f
, d_reg
, s1_reg
);
526 spe_fi(gen
->f
, d_reg
, s1_reg
, d_reg
);
527 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
535 * Emit absolute value. See emit_ADD for comments.
538 emit_ABS(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
541 spe_comment(gen
->f
, -4, "ABS:");
542 for (ch
= 0; ch
< 4; ch
++) {
543 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
544 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
545 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
546 const int bit31mask_reg
= get_itemp(gen
);
548 /* mask with bit 31 set, the rest cleared */
549 spe_load_int(gen
->f
, bit31mask_reg
, (1 << 31));
551 /* d = sign bit cleared in s1 */
552 spe_andc(gen
->f
, d_reg
, s1_reg
, bit31mask_reg
);
554 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
562 * Emit 3 component dot product. See emit_ADD for comments.
565 emit_DP3(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
568 spe_comment(gen
->f
, -4, "DP3:");
570 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
571 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
572 int d_reg
= get_dst_reg(gen
, CHAN_X
, &inst
->FullDstRegisters
[0]);
574 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
576 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
577 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
579 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
581 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
582 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
584 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
586 for (ch
= 0; ch
< 4; ch
++) {
587 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
588 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
597 * Emit 4 component dot product. See emit_ADD for comments.
600 emit_DP4(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
603 spe_comment(gen
->f
, -4, "DP3:");
605 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
606 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
607 int d_reg
= get_dst_reg(gen
, CHAN_X
, &inst
->FullDstRegisters
[0]);
609 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
611 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
612 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
614 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
616 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
617 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
619 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
621 s1_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[0]);
622 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
624 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
626 for (ch
= 0; ch
< 4; ch
++) {
627 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
628 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
637 * Emit homogeneous dot product. See emit_ADD for comments.
640 emit_DPH(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
643 spe_comment(gen
->f
, -4, "DPH:");
645 int s1_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[0]);
646 int s2_reg
= get_src_reg(gen
, CHAN_X
, &inst
->FullSrcRegisters
[1]);
647 int d_reg
= get_dst_reg(gen
, CHAN_X
, &inst
->FullDstRegisters
[0]);
649 spe_fm(gen
->f
, d_reg
, s1_reg
, s2_reg
);
651 s1_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[0]);
652 s2_reg
= get_src_reg(gen
, CHAN_Y
, &inst
->FullSrcRegisters
[1]);
654 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
656 s1_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[0]);
657 s2_reg
= get_src_reg(gen
, CHAN_Z
, &inst
->FullSrcRegisters
[1]);
659 spe_fma(gen
->f
, d_reg
, s1_reg
, s2_reg
, d_reg
);
661 s2_reg
= get_src_reg(gen
, CHAN_W
, &inst
->FullSrcRegisters
[1]);
663 spe_fa(gen
->f
, d_reg
, s2_reg
, d_reg
);
665 for (ch
= 0; ch
< 4; ch
++) {
666 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
667 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
676 * Emit set-if-greater-than.
677 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
678 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
679 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
682 emit_SGT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
686 spe_comment(gen
->f
, -4, "SGT:");
688 for (ch
= 0; ch
< 4; ch
++) {
689 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
690 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
691 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
692 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
695 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
697 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
698 /* d = d & one_reg */
699 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
701 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
710 * Emit set-if_less-then. See emit_SGT for comments.
713 emit_SLT(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
717 spe_comment(gen
->f
, -4, "SLT:");
719 for (ch
= 0; ch
< 4; ch
++) {
720 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
721 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
722 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
723 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
726 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
728 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
729 /* d = d & one_reg */
730 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
732 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
741 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
744 emit_SGE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
748 spe_comment(gen
->f
, -4, "SGE:");
750 for (ch
= 0; ch
< 4; ch
++) {
751 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
752 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
753 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
754 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
757 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
759 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
760 /* d = ~d & one_reg */
761 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
763 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
772 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
775 emit_SLE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
779 spe_comment(gen
->f
, -4, "SLE:");
781 for (ch
= 0; ch
< 4; ch
++) {
782 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
783 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
784 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
785 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
788 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
790 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
791 /* d = ~d & one_reg */
792 spe_andc(gen
->f
, d_reg
, get_const_one_reg(gen
), d_reg
);
794 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
803 * Emit set-if_equal. See emit_SGT for comments.
806 emit_SEQ(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
810 spe_comment(gen
->f
, -4, "SEQ:");
812 for (ch
= 0; ch
< 4; ch
++) {
813 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
814 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
815 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
816 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
819 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
821 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
822 /* d = d & one_reg */
823 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
825 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
834 * Emit set-if_not_equal. See emit_SGT for comments.
837 emit_SNE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
841 spe_comment(gen
->f
, -4, "SNE:");
843 for (ch
= 0; ch
< 4; ch
++) {
844 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
845 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
846 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
847 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
850 spe_fceq(gen
->f
, d_reg
, s1_reg
, s2_reg
);
851 spe_nor(gen
->f
, d_reg
, d_reg
, d_reg
);
853 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
854 /* d = d & one_reg */
855 spe_and(gen
->f
, d_reg
, d_reg
, get_const_one_reg(gen
));
857 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
866 * Emit compare. See emit_SGT for comments.
869 emit_CMP(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
873 spe_comment(gen
->f
, -4, "CMP:");
875 for (ch
= 0; ch
< 4; ch
++) {
876 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
877 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
878 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
879 int s3_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[2]);
880 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
881 int zero_reg
= get_itemp(gen
);
883 spe_xor(gen
->f
, zero_reg
, zero_reg
, zero_reg
);
885 /* d = (s1 < 0) ? s2 : s3 */
886 spe_fcgt(gen
->f
, d_reg
, zero_reg
, s1_reg
);
887 spe_selb(gen
->f
, d_reg
, s3_reg
, s2_reg
, d_reg
);
889 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
899 * If negative int subtract one
900 * Convert float to signed int
901 * Convert signed int to float
904 emit_FLR(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
908 spe_comment(gen
->f
, -4, "FLR:");
910 for (ch
= 0; ch
< 4; ch
++) {
911 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
912 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
913 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
914 int tmp_reg
= get_itemp(gen
);
916 /* If negative, subtract 1.0 */
917 spe_xor(gen
->f
, tmp_reg
, tmp_reg
, tmp_reg
);
918 spe_fcgt(gen
->f
, d_reg
, tmp_reg
, s1_reg
);
919 spe_selb(gen
->f
, tmp_reg
, tmp_reg
, get_const_one_reg(gen
), d_reg
);
920 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
922 /* Convert float to int */
923 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
925 /* Convert int to float */
926 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
928 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
941 emit_FRC(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
945 spe_comment(gen
->f
, -4, "FLR:");
947 for (ch
= 0; ch
< 4; ch
++) {
948 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
949 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
950 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
951 int tmp_reg
= get_itemp(gen
);
953 /* If negative, subtract 1.0 */
954 spe_xor(gen
->f
, tmp_reg
, tmp_reg
, tmp_reg
);
955 spe_fcgt(gen
->f
, d_reg
, tmp_reg
, s1_reg
);
956 spe_selb(gen
->f
, tmp_reg
, tmp_reg
, get_const_one_reg(gen
), d_reg
);
957 spe_fs(gen
->f
, d_reg
, s1_reg
, tmp_reg
);
959 /* Convert float to int */
960 spe_cflts(gen
->f
, d_reg
, d_reg
, 0);
962 /* Convert int to float */
963 spe_csflt(gen
->f
, d_reg
, d_reg
, 0);
965 /* d = s1 - FLR(s1) */
966 spe_fs(gen
->f
, d_reg
, s1_reg
, d_reg
);
968 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
978 * Emit max. See emit_SGT for comments.
981 emit_MAX(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
985 spe_comment(gen
->f
, -4, "MAX:");
987 for (ch
= 0; ch
< 4; ch
++) {
988 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
989 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
990 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
991 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
993 /* d = (s1 > s2) ? s1 : s2 */
994 spe_fcgt(gen
->f
, d_reg
, s1_reg
, s2_reg
);
995 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
997 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1006 * Emit max. See emit_SGT for comments.
1009 emit_MIN(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1013 spe_comment(gen
->f
, -4, "MIN:");
1015 for (ch
= 0; ch
< 4; ch
++) {
1016 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1017 int s1_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1018 int s2_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[1]);
1019 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1021 /* d = (s2 > s1) ? s1 : s2 */
1022 spe_fcgt(gen
->f
, d_reg
, s2_reg
, s1_reg
);
1023 spe_selb(gen
->f
, d_reg
, s2_reg
, s1_reg
, d_reg
);
1025 store_dest_reg(gen
, d_reg
, ch
, &inst
->FullDstRegisters
[0]);
1034 emit_IF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1036 const int channel
= 0;
1037 const int exec_reg
= get_exec_mask_reg(gen
);
1039 spe_comment(gen
->f
, -4, "IF:");
1041 /* update execution mask with the predicate register */
1042 int tmp_reg
= get_itemp(gen
);
1043 int s1_reg
= get_src_reg(gen
, channel
, &inst
->FullSrcRegisters
[0]);
1045 /* tmp = (s1_reg == 0) */
1046 spe_ceqi(gen
->f
, tmp_reg
, s1_reg
, 0);
1048 spe_complement(gen
->f
, tmp_reg
, tmp_reg
);
1049 /* exec_mask = exec_mask & tmp */
1050 spe_and(gen
->f
, exec_reg
, exec_reg
, tmp_reg
);
1061 emit_ELSE(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1063 const int exec_reg
= get_exec_mask_reg(gen
);
1065 spe_comment(gen
->f
, -4, "ELSE:");
1067 /* exec_mask = !exec_mask */
1068 spe_complement(gen
->f
, exec_reg
, exec_reg
);
1075 emit_ENDIF(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
)
1077 const int exec_reg
= get_exec_mask_reg(gen
);
1079 spe_comment(gen
->f
, -4, "ENDIF:");
1081 /* XXX todo: pop execution mask */
1083 spe_load_int(gen
->f
, exec_reg
, ~0x0);
1091 emit_DDX_DDY(struct codegen
*gen
, const struct tgsi_full_instruction
*inst
,
1096 spe_comment(gen
->f
, -4, ddx
? "DDX:" : "DDY:");
1098 for (ch
= 0; ch
< 4; ch
++) {
1099 if (inst
->FullDstRegisters
[0].DstRegister
.WriteMask
& (1 << ch
)) {
1100 int s_reg
= get_src_reg(gen
, ch
, &inst
->FullSrcRegisters
[0]);
1101 int d_reg
= get_dst_reg(gen
, ch
, &inst
->FullDstRegisters
[0]);
1103 int t1_reg
= get_itemp(gen
);
1104 int t2_reg
= get_itemp(gen
);
1106 spe_splat_word(gen
->f
, t1_reg
, s_reg
, 0); /* upper-left pixel */
1108 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 1); /* upper-right pixel */
1111 spe_splat_word(gen
->f
, t2_reg
, s_reg
, 2); /* lower-left pixel */
1113 spe_fs(gen
->f
, d_reg
, t2_reg
, t1_reg
);
1126 * Emit END instruction.
1127 * We just return from the shader function at this point.
1129 * Note that there may be more code after this that would be
1130 * called by TGSI_OPCODE_CALL.
1133 emit_END(struct codegen
*gen
)
1135 spe_comment(gen
->f
, -4, "END:");
1136 /* return from function call */
1137 spe_bi(gen
->f
, SPE_REG_RA
, 0, 0);
1143 * Emit code for the given instruction. Just a big switch stmt.
1146 emit_instruction(struct codegen
*gen
,
1147 const struct tgsi_full_instruction
*inst
)
1149 switch (inst
->Instruction
.Opcode
) {
1150 case TGSI_OPCODE_MOV
:
1151 return emit_MOV(gen
, inst
);
1152 case TGSI_OPCODE_MUL
:
1153 return emit_MUL(gen
, inst
);
1154 case TGSI_OPCODE_ADD
:
1155 return emit_ADD(gen
, inst
);
1156 case TGSI_OPCODE_SUB
:
1157 return emit_SUB(gen
, inst
);
1158 case TGSI_OPCODE_MAD
:
1159 return emit_MAD(gen
, inst
);
1160 case TGSI_OPCODE_LERP
:
1161 return emit_LERP(gen
, inst
);
1162 case TGSI_OPCODE_DP3
:
1163 return emit_DP3(gen
, inst
);
1164 case TGSI_OPCODE_DP4
:
1165 return emit_DP4(gen
, inst
);
1166 case TGSI_OPCODE_DPH
:
1167 return emit_DPH(gen
, inst
);
1168 case TGSI_OPCODE_RCP
:
1169 return emit_RCP(gen
, inst
);
1170 case TGSI_OPCODE_RSQ
:
1171 return emit_RSQ(gen
, inst
);
1172 case TGSI_OPCODE_ABS
:
1173 return emit_ABS(gen
, inst
);
1174 case TGSI_OPCODE_SGT
:
1175 return emit_SGT(gen
, inst
);
1176 case TGSI_OPCODE_SLT
:
1177 return emit_SLT(gen
, inst
);
1178 case TGSI_OPCODE_SGE
:
1179 return emit_SGE(gen
, inst
);
1180 case TGSI_OPCODE_SLE
:
1181 return emit_SLE(gen
, inst
);
1182 case TGSI_OPCODE_SEQ
:
1183 return emit_SEQ(gen
, inst
);
1184 case TGSI_OPCODE_SNE
:
1185 return emit_SNE(gen
, inst
);
1186 case TGSI_OPCODE_CMP
:
1187 return emit_CMP(gen
, inst
);
1188 case TGSI_OPCODE_MAX
:
1189 return emit_MAX(gen
, inst
);
1190 case TGSI_OPCODE_MIN
:
1191 return emit_MIN(gen
, inst
);
1192 case TGSI_OPCODE_FLR
:
1193 return emit_FLR(gen
, inst
);
1194 case TGSI_OPCODE_FRC
:
1195 return emit_FRC(gen
, inst
);
1196 case TGSI_OPCODE_END
:
1197 return emit_END(gen
);
1199 case TGSI_OPCODE_IF
:
1200 return emit_IF(gen
, inst
);
1201 case TGSI_OPCODE_ELSE
:
1202 return emit_ELSE(gen
, inst
);
1203 case TGSI_OPCODE_ENDIF
:
1204 return emit_ENDIF(gen
, inst
);
1206 case TGSI_OPCODE_DDX
:
1207 return emit_DDX_DDY(gen
, inst
, true);
1208 case TGSI_OPCODE_DDY
:
1209 return emit_DDX_DDY(gen
, inst
, false);
1211 /* XXX lots more cases to do... */
1214 fprintf(stderr
, "Cell: unimplemented TGSI instruction %d!\n",
1215 inst
->Instruction
.Opcode
);
1225 * Emit code for a TGSI immediate value (vector of four floats).
1226 * This involves register allocation and initialization.
1227 * XXX the initialization should be done by a "prepare" stage, not
1228 * per quad execution!
1231 emit_immediate(struct codegen
*gen
, const struct tgsi_full_immediate
*immed
)
1235 assert(gen
->num_imm
< MAX_TEMPS
);
1237 spe_comment(gen
->f
, -4, "IMMEDIATE:");
1239 for (ch
= 0; ch
< 4; ch
++) {
1240 float val
= immed
->u
.ImmediateFloat32
[ch
].Float
;
1241 int reg
= spe_allocate_available_register(gen
->f
);
1246 /* update immediate map */
1247 gen
->imm_regs
[gen
->num_imm
][ch
] = reg
;
1249 /* emit initializer instruction */
1250 spe_load_float(gen
->f
, reg
, val
);
1261 * Emit "code" for a TGSI declaration.
1262 * We only care about TGSI TEMPORARY register declarations at this time.
1263 * For each TGSI TEMPORARY we allocate four SPE registers.
1266 emit_declaration(struct cell_context
*cell
,
1267 struct codegen
*gen
, const struct tgsi_full_declaration
*decl
)
1271 switch (decl
->Declaration
.File
) {
1272 case TGSI_FILE_TEMPORARY
:
1273 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1274 printf("Declare temp reg %d .. %d\n",
1275 decl
->DeclarationRange
.First
,
1276 decl
->DeclarationRange
.Last
);
1279 for (i
= decl
->DeclarationRange
.First
;
1280 i
<= decl
->DeclarationRange
.Last
;
1282 assert(i
< MAX_TEMPS
);
1283 for (ch
= 0; ch
< 4; ch
++) {
1284 gen
->temp_regs
[i
][ch
] = spe_allocate_available_register(gen
->f
);
1285 if (gen
->temp_regs
[i
][ch
] < 0)
1286 return false; /* out of regs */
1289 /* XXX if we run out of SPE registers, we need to spill
1290 * to SPU memory. someday...
1293 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1294 printf(" SPE regs: %d %d %d %d\n",
1295 gen
->temp_regs
[i
][0],
1296 gen
->temp_regs
[i
][1],
1297 gen
->temp_regs
[i
][2],
1298 gen
->temp_regs
[i
][3]);
1311 * Translate TGSI shader code to SPE instructions. This is done when
1312 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1314 * \param cell the rendering context (in)
1315 * \param tokens the TGSI shader (in)
1316 * \param f the generated function (out)
1319 cell_gen_fragment_program(struct cell_context
*cell
,
1320 const struct tgsi_token
*tokens
,
1321 struct spe_function
*f
)
1323 struct tgsi_parse_context parse
;
1326 memset(&gen
, 0, sizeof(gen
));
1329 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1330 gen
.inputs_reg
= 3; /* pointer to inputs array */
1331 gen
.outputs_reg
= 4; /* pointer to outputs array */
1332 gen
.constants_reg
= 5; /* pointer to constants array */
1334 spe_init_func(f
, SPU_MAX_FRAGMENT_PROGRAM_INSTS
* SPE_INST_SIZE
);
1335 spe_allocate_register(f
, gen
.inputs_reg
);
1336 spe_allocate_register(f
, gen
.outputs_reg
);
1337 spe_allocate_register(f
, gen
.constants_reg
);
1339 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1340 spe_print_code(f
, true);
1342 printf("Begin %s\n", __FUNCTION__
);
1343 tgsi_dump(tokens
, 0);
1346 tgsi_parse_init(&parse
, tokens
);
1348 while (!tgsi_parse_end_of_tokens(&parse
) && !gen
.error
) {
1349 tgsi_parse_token(&parse
);
1351 switch (parse
.FullToken
.Token
.Type
) {
1352 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1353 if (!emit_immediate(&gen
, &parse
.FullToken
.FullImmediate
))
1357 case TGSI_TOKEN_TYPE_DECLARATION
:
1358 if (!emit_declaration(cell
, &gen
, &parse
.FullToken
.FullDeclaration
))
1362 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1363 if (!emit_instruction(&gen
, &parse
.FullToken
.FullInstruction
))
1374 /* terminate the SPE code */
1375 return emit_END(&gen
);
1378 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1379 printf("cell_gen_fragment_program nr instructions: %d\n", f
->num_inst
);
1380 printf("End %s\n", __FUNCTION__
);
1383 tgsi_parse_free( &parse
);