2 * Mesa 3-D graphics library
5 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Emit program instructions (PI code) from IR trees.
34 *** To emit GPU instructions, we basically just do an in-order traversal
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
51 #define PEEPHOLE_OPTIMIZATIONS 1
59 struct gl_program
*prog
;
60 struct gl_program
**Subroutines
;
61 GLuint NumSubroutines
;
63 /* code-gen options */
64 GLboolean EmitHighLevelInstructions
;
65 GLboolean EmitCondCodes
;
66 GLboolean EmitComments
;
67 GLboolean EmitBeginEndSub
; /* XXX TEMPORARY */
72 static struct gl_program
*
73 new_subroutine(slang_emit_info
*emitInfo
, GLuint
*id
)
75 GET_CURRENT_CONTEXT(ctx
);
76 const GLuint n
= emitInfo
->NumSubroutines
;
78 emitInfo
->Subroutines
= (struct gl_program
**)
79 _mesa_realloc(emitInfo
->Subroutines
,
80 n
* sizeof(struct gl_program
),
81 (n
+ 1) * sizeof(struct gl_program
));
82 emitInfo
->Subroutines
[n
] = ctx
->Driver
.NewProgram(ctx
, emitInfo
->prog
->Target
, 0);
83 emitInfo
->Subroutines
[n
]->Parameters
= emitInfo
->prog
->Parameters
;
84 emitInfo
->NumSubroutines
++;
86 return emitInfo
->Subroutines
[n
];
91 * Convert a writemask to a swizzle. Used for testing cond codes because
92 * we only want to test the cond code component(s) that was set by the
93 * previous instruction.
96 writemask_to_swizzle(GLuint writemask
)
98 if (writemask
== WRITEMASK_X
)
100 if (writemask
== WRITEMASK_Y
)
102 if (writemask
== WRITEMASK_Z
)
104 if (writemask
== WRITEMASK_W
)
106 return SWIZZLE_XYZW
; /* shouldn't be hit */
111 * Convert a swizzle mask to a writemask.
112 * Note that the slang_ir_storage->Swizzle field can represent either a
113 * swizzle mask or a writemask, depending on how it's used. For example,
114 * when we parse "direction.yz" alone, we don't know whether .yz is a
115 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
116 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
117 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
118 * used as an L-value, we convert it to a writemask.
121 swizzle_to_writemask(GLuint swizzle
)
123 GLuint i
, writemask
= 0x0;
124 for (i
= 0; i
< 4; i
++) {
125 GLuint swz
= GET_SWZ(swizzle
, i
);
126 if (swz
<= SWIZZLE_W
) {
127 writemask
|= (1 << swz
);
135 * Swizzle a swizzle (function composition).
136 * That is, return swz2(swz1), or said another way: swz1.szw2
137 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
140 _slang_swizzle_swizzle(GLuint swz1
, GLuint swz2
)
143 for (i
= 0; i
< 4; i
++) {
144 GLuint c
= GET_SWZ(swz2
, i
);
146 s
[i
] = GET_SWZ(swz1
, c
);
150 swz
= MAKE_SWIZZLE4(s
[0], s
[1], s
[2], s
[3]);
156 * Allocate storage for the given node (if it hasn't already been allocated).
158 * Typically this is temporary storage for an intermediate result (such as
159 * for a multiply or add, etc).
161 * If n->Store does not exist it will be created and will be of the size
162 * specified by defaultSize.
165 alloc_node_storage(slang_emit_info
*emitInfo
, slang_ir_node
*n
,
170 assert(defaultSize
> 0);
171 n
->Store
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, defaultSize
);
174 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
175 if (n
->Store
->Index
< 0) {
176 if (!_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
177 slang_info_log_error(emitInfo
->log
,
178 "Ran out of registers, too many temporaries");
179 _slang_free(n
->Store
);
189 * Free temporary storage, if n->Store is, in fact, temp storage.
193 free_node_storage(slang_var_table
*vt
, slang_ir_node
*n
)
195 if (n
->Store
->File
== PROGRAM_TEMPORARY
&&
196 n
->Store
->Index
>= 0 &&
197 n
->Opcode
!= IR_SWIZZLE
) {
198 if (_slang_is_temp(vt
, n
->Store
)) {
199 _slang_free_temp(vt
, n
->Store
);
200 n
->Store
->Index
= -1;
201 n
->Store
= NULL
; /* XXX this may not be needed */
208 * Helper function to allocate a short-term temporary.
209 * Free it with _slang_free_temp().
212 alloc_local_temp(slang_emit_info
*emitInfo
, slang_ir_storage
*temp
, GLint size
)
216 _mesa_bzero(temp
, sizeof(*temp
));
218 temp
->File
= PROGRAM_TEMPORARY
;
220 return _slang_alloc_temp(emitInfo
->vt
, temp
);
225 * Remove any SWIZZLE_NIL terms from given swizzle mask.
226 * For a swizzle like .z??? generate .zzzz (replicate single component).
227 * Else, for .wx?? generate .wxzw (insert default component for the position).
230 fix_swizzle(GLuint swizzle
)
232 GLuint c0
= GET_SWZ(swizzle
, 0),
233 c1
= GET_SWZ(swizzle
, 1),
234 c2
= GET_SWZ(swizzle
, 2),
235 c3
= GET_SWZ(swizzle
, 3);
236 if (c1
== SWIZZLE_NIL
&& c2
== SWIZZLE_NIL
&& c3
== SWIZZLE_NIL
) {
237 /* smear first component across all positions */
241 /* insert default swizzle components */
242 if (c0
== SWIZZLE_NIL
)
244 if (c1
== SWIZZLE_NIL
)
246 if (c2
== SWIZZLE_NIL
)
248 if (c3
== SWIZZLE_NIL
)
251 return MAKE_SWIZZLE4(c0
, c1
, c2
, c3
);
257 * Convert IR storage to an instruction dst register.
260 storage_to_dst_reg(struct prog_dst_register
*dst
, const slang_ir_storage
*st
,
263 const GLint size
= st
->Size
;
264 GLint index
= st
->Index
;
265 GLuint swizzle
= st
->Swizzle
;
267 /* if this is storage relative to some parent storage, walk up the tree */
271 swizzle
= _slang_swizzle_swizzle(st
->Swizzle
, swizzle
);
274 assert(st
->File
!= PROGRAM_UNDEFINED
);
275 dst
->File
= st
->File
;
285 GLuint comp
= GET_SWZ(swizzle
, 0);
287 dst
->WriteMask
= WRITEMASK_X
<< comp
;
290 dst
->WriteMask
= writemask
;
293 if (swizzle
!= SWIZZLE_XYZW
) {
294 dst
->WriteMask
= swizzle_to_writemask(swizzle
);
297 dst
->WriteMask
= writemask
;
304 * Convert IR storage to an instruction src register.
307 storage_to_src_reg(struct prog_src_register
*src
, const slang_ir_storage
*st
)
309 const GLboolean relAddr
= st
->RelAddr
;
310 GLint index
= st
->Index
;
311 GLuint swizzle
= st
->Swizzle
;
313 /* if this is storage relative to some parent storage, walk up the tree */
317 swizzle
= _slang_swizzle_swizzle(fix_swizzle(st
->Swizzle
), swizzle
);
320 assert(st
->File
>= 0);
321 #if 1 /* XXX temporary */
322 if (st
->File
== PROGRAM_UNDEFINED
) {
323 slang_ir_storage
*st0
= (slang_ir_storage
*) st
;
324 st0
->File
= PROGRAM_TEMPORARY
;
327 assert(st
->File
< PROGRAM_UNDEFINED
);
328 src
->File
= st
->File
;
333 swizzle
= fix_swizzle(swizzle
);
334 assert(GET_SWZ(swizzle
, 0) <= SWIZZLE_W
);
335 assert(GET_SWZ(swizzle
, 1) <= SWIZZLE_W
);
336 assert(GET_SWZ(swizzle
, 2) <= SWIZZLE_W
);
337 assert(GET_SWZ(swizzle
, 3) <= SWIZZLE_W
);
338 src
->Swizzle
= swizzle
;
340 src
->RelAddr
= relAddr
;
345 * Setup storage pointing to a scalar constant/literal.
348 constant_to_storage(slang_emit_info
*emitInfo
,
350 slang_ir_storage
*store
)
357 reg
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
360 memset(store
, 0, sizeof(*store
));
361 store
->File
= PROGRAM_CONSTANT
;
363 store
->Swizzle
= swizzle
;
368 * Add new instruction at end of given program.
369 * \param prog the program to append instruction onto
370 * \param opcode opcode for the new instruction
371 * \return pointer to the new instruction
373 static struct prog_instruction
*
374 new_instruction(slang_emit_info
*emitInfo
, gl_inst_opcode opcode
)
376 struct gl_program
*prog
= emitInfo
->prog
;
377 struct prog_instruction
*inst
;
380 /* print prev inst */
381 if (prog
->NumInstructions
> 0) {
382 _mesa_print_instruction(prog
->Instructions
+ prog
->NumInstructions
- 1);
385 prog
->Instructions
= _mesa_realloc_instructions(prog
->Instructions
,
386 prog
->NumInstructions
,
387 prog
->NumInstructions
+ 1);
388 inst
= prog
->Instructions
+ prog
->NumInstructions
;
389 prog
->NumInstructions
++;
390 _mesa_init_instructions(inst
, 1);
391 inst
->Opcode
= opcode
;
392 inst
->BranchTarget
= -1; /* invalid */
394 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
395 _mesa_opcode_string(inst->Opcode));
402 * Emit a new instruction with given opcode, operands.
404 static struct prog_instruction
*
405 emit_instruction(slang_emit_info
*emitInfo
,
406 gl_inst_opcode opcode
,
407 const slang_ir_storage
*dst
,
408 const slang_ir_storage
*src1
,
409 const slang_ir_storage
*src2
,
410 const slang_ir_storage
*src3
)
412 struct gl_program
*prog
= emitInfo
->prog
;
413 struct prog_instruction
*inst
;
415 prog
->Instructions
= _mesa_realloc_instructions(prog
->Instructions
,
416 prog
->NumInstructions
,
417 prog
->NumInstructions
+ 1);
418 inst
= prog
->Instructions
+ prog
->NumInstructions
;
419 prog
->NumInstructions
++;
421 _mesa_init_instructions(inst
, 1);
422 inst
->Opcode
= opcode
;
423 inst
->BranchTarget
= -1; /* invalid */
429 writemask
= WRITEMASK_XYZW
;
432 writemask
= WRITEMASK_XYZ
;
435 writemask
= WRITEMASK_XY
;
438 writemask
= WRITEMASK_X
<< GET_SWZ(dst
->Swizzle
, 0);
441 writemask
= WRITEMASK_XYZW
;
444 storage_to_dst_reg(&inst
->DstReg
, dst
, writemask
);
448 storage_to_src_reg(&inst
->SrcReg
[0], src1
);
450 storage_to_src_reg(&inst
->SrcReg
[1], src2
);
452 storage_to_src_reg(&inst
->SrcReg
[2], src3
);
459 * Emit an ARL instruction.
461 static struct prog_instruction
*
462 emit_arl_instruction(slang_emit_info
*emitInfo
,
464 const slang_ir_storage
*src
)
466 struct prog_instruction
*inst
;
468 assert(addrReg
== 0); /* only one addr reg at this time */
469 inst
= new_instruction(emitInfo
, OPCODE_ARL
);
470 storage_to_src_reg(&inst
->SrcReg
[0], src
);
471 inst
->DstReg
.File
= PROGRAM_ADDRESS
;
472 inst
->DstReg
.Index
= addrReg
;
473 inst
->DstReg
.WriteMask
= WRITEMASK_X
;
480 * Put a comment on the given instruction.
483 inst_comment(struct prog_instruction
*inst
, const char *comment
)
486 inst
->Comment
= _mesa_strdup(comment
);
492 * Return pointer to last instruction in program.
494 static struct prog_instruction
*
495 prev_instruction(slang_emit_info
*emitInfo
)
497 struct gl_program
*prog
= emitInfo
->prog
;
498 if (prog
->NumInstructions
== 0)
501 return prog
->Instructions
+ prog
->NumInstructions
- 1;
505 static struct prog_instruction
*
506 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
);
510 * Return an annotation string for given node's storage.
513 storage_annotation(const slang_ir_node
*n
, const struct gl_program
*prog
)
516 const slang_ir_storage
*st
= n
->Store
;
517 static char s
[100] = "";
520 return _mesa_strdup("");
523 case PROGRAM_CONSTANT
:
524 if (st
->Index
>= 0) {
525 const GLfloat
*val
= prog
->Parameters
->ParameterValues
[st
->Index
];
526 if (st
->Swizzle
== SWIZZLE_NOOP
)
527 sprintf(s
, "{%g, %g, %g, %g}", val
[0], val
[1], val
[2], val
[3]);
529 sprintf(s
, "%g", val
[GET_SWZ(st
->Swizzle
, 0)]);
533 case PROGRAM_TEMPORARY
:
535 sprintf(s
, "%s", (char *) n
->Var
->a_name
);
537 sprintf(s
, "t[%d]", st
->Index
);
539 case PROGRAM_STATE_VAR
:
540 case PROGRAM_UNIFORM
:
541 sprintf(s
, "%s", prog
->Parameters
->Parameters
[st
->Index
].Name
);
543 case PROGRAM_VARYING
:
544 sprintf(s
, "%s", prog
->Varying
->Parameters
[st
->Index
].Name
);
547 sprintf(s
, "input[%d]", st
->Index
);
550 sprintf(s
, "output[%d]", st
->Index
);
555 return _mesa_strdup(s
);
563 * Return an annotation string for an instruction.
566 instruction_annotation(gl_inst_opcode opcode
, char *dstAnnot
,
567 char *srcAnnot0
, char *srcAnnot1
, char *srcAnnot2
)
570 const char *operator;
575 len
+= strlen(dstAnnot
);
577 dstAnnot
= _mesa_strdup("");
580 len
+= strlen(srcAnnot0
);
582 srcAnnot0
= _mesa_strdup("");
585 len
+= strlen(srcAnnot1
);
587 srcAnnot1
= _mesa_strdup("");
590 len
+= strlen(srcAnnot2
);
592 srcAnnot2
= _mesa_strdup("");
623 s
= (char *) malloc(len
);
624 sprintf(s
, "%s = %s %s %s %s", dstAnnot
,
625 srcAnnot0
, operator, srcAnnot1
, srcAnnot2
);
626 assert(_mesa_strlen(s
) < len
);
641 * Emit an instruction that's just a comment.
643 static struct prog_instruction
*
644 emit_comment(slang_emit_info
*emitInfo
, const char *comment
)
646 struct prog_instruction
*inst
= new_instruction(emitInfo
, OPCODE_NOP
);
647 inst_comment(inst
, comment
);
653 * Generate code for a simple arithmetic instruction.
654 * Either 1, 2 or 3 operands.
656 static struct prog_instruction
*
657 emit_arith(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
659 const slang_ir_info
*info
= _slang_ir_info(n
->Opcode
);
660 struct prog_instruction
*inst
;
664 assert(info
->InstOpcode
!= OPCODE_NOP
);
666 #if PEEPHOLE_OPTIMIZATIONS
667 /* Look for MAD opportunity */
668 if (info
->NumParams
== 2 &&
669 n
->Opcode
== IR_ADD
&& n
->Children
[0]->Opcode
== IR_MUL
) {
670 /* found pattern IR_ADD(IR_MUL(A, B), C) */
671 emit(emitInfo
, n
->Children
[0]->Children
[0]); /* A */
672 emit(emitInfo
, n
->Children
[0]->Children
[1]); /* B */
673 emit(emitInfo
, n
->Children
[1]); /* C */
674 alloc_node_storage(emitInfo
, n
, -1); /* dest */
676 inst
= emit_instruction(emitInfo
,
679 n
->Children
[0]->Children
[0]->Store
,
680 n
->Children
[0]->Children
[1]->Store
,
681 n
->Children
[1]->Store
);
683 free_node_storage(emitInfo
->vt
, n
->Children
[0]->Children
[0]);
684 free_node_storage(emitInfo
->vt
, n
->Children
[0]->Children
[1]);
685 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
689 if (info
->NumParams
== 2 &&
690 n
->Opcode
== IR_ADD
&& n
->Children
[1]->Opcode
== IR_MUL
) {
691 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
692 emit(emitInfo
, n
->Children
[0]); /* A */
693 emit(emitInfo
, n
->Children
[1]->Children
[0]); /* B */
694 emit(emitInfo
, n
->Children
[1]->Children
[1]); /* C */
695 alloc_node_storage(emitInfo
, n
, -1); /* dest */
697 inst
= emit_instruction(emitInfo
,
700 n
->Children
[1]->Children
[0]->Store
,
701 n
->Children
[1]->Children
[1]->Store
,
702 n
->Children
[0]->Store
);
704 free_node_storage(emitInfo
->vt
, n
->Children
[1]->Children
[0]);
705 free_node_storage(emitInfo
->vt
, n
->Children
[1]->Children
[1]);
706 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
711 /* gen code for children, may involve temp allocation */
712 for (i
= 0; i
< info
->NumParams
; i
++) {
713 emit(emitInfo
, n
->Children
[i
]);
714 if (!n
->Children
[i
] || !n
->Children
[i
]->Store
) {
721 alloc_node_storage(emitInfo
, n
, -1);
723 inst
= emit_instruction(emitInfo
,
726 (info
->NumParams
> 0 ? n
->Children
[0]->Store
: NULL
),
727 (info
->NumParams
> 1 ? n
->Children
[1]->Store
: NULL
),
728 (info
->NumParams
> 2 ? n
->Children
[2]->Store
: NULL
)
732 for (i
= 0; i
< info
->NumParams
; i
++)
733 free_node_storage(emitInfo
->vt
, n
->Children
[i
]);
740 * Emit code for == and != operators. These could normally be handled
741 * by emit_arith() except we need to be able to handle structure comparisons.
743 static struct prog_instruction
*
744 emit_compare(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
746 struct prog_instruction
*inst
= NULL
;
749 assert(n
->Opcode
== IR_EQUAL
|| n
->Opcode
== IR_NOTEQUAL
);
751 /* gen code for children */
752 emit(emitInfo
, n
->Children
[0]);
753 emit(emitInfo
, n
->Children
[1]);
755 if (n
->Children
[0]->Store
->Size
!= n
->Children
[1]->Store
->Size
) {
756 slang_info_log_error(emitInfo
->log
, "invalid operands to == or !=");
760 /* final result is 1 bool */
761 if (!alloc_node_storage(emitInfo
, n
, 1))
764 size
= n
->Children
[0]->Store
->Size
;
767 gl_inst_opcode opcode
= n
->Opcode
== IR_EQUAL
? OPCODE_SEQ
: OPCODE_SNE
;
768 inst
= emit_instruction(emitInfo
,
771 n
->Children
[0]->Store
,
772 n
->Children
[1]->Store
,
775 else if (size
<= 4) {
776 /* compare two vectors.
777 * Unfortunately, there's no instruction to compare vectors and
778 * return a scalar result. Do it with some compare and dot product
782 gl_inst_opcode dotOp
;
783 slang_ir_storage tempStore
;
785 if (!alloc_local_temp(emitInfo
, &tempStore
, 4)) {
792 swizzle
= SWIZZLE_XYZW
;
794 else if (size
== 3) {
796 swizzle
= SWIZZLE_XYZW
;
801 swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
);
804 /* Compute inequality (temp = (A != B)) */
805 inst
= emit_instruction(emitInfo
,
808 n
->Children
[0]->Store
,
809 n
->Children
[1]->Store
,
811 inst_comment(inst
, "Compare values");
813 /* Compute val = DOT(temp, temp) (reduction) */
814 inst
= emit_instruction(emitInfo
,
820 inst
->SrcReg
[0].Swizzle
= inst
->SrcReg
[1].Swizzle
= swizzle
; /*override*/
821 inst_comment(inst
, "Reduce vec to bool");
823 _slang_free_temp(emitInfo
->vt
, &tempStore
); /* free temp */
825 if (n
->Opcode
== IR_EQUAL
) {
826 /* compute val = !val.x with SEQ val, val, 0; */
827 slang_ir_storage zero
;
828 constant_to_storage(emitInfo
, 0.0, &zero
);
829 inst
= emit_instruction(emitInfo
,
835 inst_comment(inst
, "Invert true/false");
839 /* size > 4, struct or array compare.
840 * XXX this won't work reliably for structs with padding!!
842 GLint i
, num
= (n
->Children
[0]->Store
->Size
+ 3) / 4;
843 slang_ir_storage accTemp
, sneTemp
;
845 if (!alloc_local_temp(emitInfo
, &accTemp
, 4))
848 if (!alloc_local_temp(emitInfo
, &sneTemp
, 4))
851 for (i
= 0; i
< num
; i
++) {
852 slang_ir_storage srcStore0
= *n
->Children
[0]->Store
;
853 slang_ir_storage srcStore1
= *n
->Children
[1]->Store
;
854 srcStore0
.Index
+= i
;
855 srcStore1
.Index
+= i
;
858 /* SNE accTemp, left[i], right[i] */
859 inst
= emit_instruction(emitInfo
, OPCODE_SNE
,
864 inst_comment(inst
, "Begin struct/array comparison");
867 /* SNE sneTemp, left[i], right[i] */
868 inst
= emit_instruction(emitInfo
, OPCODE_SNE
,
873 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
874 inst
= emit_instruction(emitInfo
, OPCODE_ADD
,
882 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
883 inst
= emit_instruction(emitInfo
, OPCODE_DP4
,
888 inst_comment(inst
, "End struct/array comparison");
890 if (n
->Opcode
== IR_EQUAL
) {
891 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
892 slang_ir_storage zero
;
893 constant_to_storage(emitInfo
, 0.0, &zero
);
894 inst
= emit_instruction(emitInfo
, OPCODE_SEQ
,
899 inst_comment(inst
, "Invert true/false");
902 _slang_free_temp(emitInfo
->vt
, &accTemp
);
903 _slang_free_temp(emitInfo
->vt
, &sneTemp
);
907 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
908 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
916 * Generate code for an IR_CLAMP instruction.
918 static struct prog_instruction
*
919 emit_clamp(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
921 struct prog_instruction
*inst
;
922 slang_ir_node tmpNode
;
924 assert(n
->Opcode
== IR_CLAMP
);
930 inst
= emit(emitInfo
, n
->Children
[0]);
932 /* If lower limit == 0.0 and upper limit == 1.0,
933 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
935 * emit OPCODE_MIN, OPCODE_MAX sequence.
938 /* XXX this isn't quite finished yet */
939 if (n
->Children
[1]->Opcode
== IR_FLOAT
&&
940 n
->Children
[1]->Value
[0] == 0.0 &&
941 n
->Children
[1]->Value
[1] == 0.0 &&
942 n
->Children
[1]->Value
[2] == 0.0 &&
943 n
->Children
[1]->Value
[3] == 0.0 &&
944 n
->Children
[2]->Opcode
== IR_FLOAT
&&
945 n
->Children
[2]->Value
[0] == 1.0 &&
946 n
->Children
[2]->Value
[1] == 1.0 &&
947 n
->Children
[2]->Value
[2] == 1.0 &&
948 n
->Children
[2]->Value
[3] == 1.0) {
950 inst
= prev_instruction(prog
);
952 if (inst
&& inst
->Opcode
!= OPCODE_NOP
) {
953 /* and prev instruction's DstReg matches n->Children[0]->Store */
954 inst
->SaturateMode
= SATURATE_ZERO_ONE
;
955 n
->Store
= n
->Children
[0]->Store
;
961 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
964 emit(emitInfo
, n
->Children
[1]);
965 emit(emitInfo
, n
->Children
[2]);
967 /* Some GPUs don't allow reading from output registers. So if the
968 * dest for this clamp() is an output reg, we can't use that reg for
969 * the intermediate result. Use a temp register instead.
971 _mesa_bzero(&tmpNode
, sizeof(tmpNode
));
972 alloc_node_storage(emitInfo
, &tmpNode
, n
->Store
->Size
);
974 /* tmp = max(ch[0], ch[1]) */
975 inst
= emit_instruction(emitInfo
, OPCODE_MAX
,
976 tmpNode
.Store
, /* dest */
977 n
->Children
[0]->Store
,
978 n
->Children
[1]->Store
,
981 /* n->dest = min(tmp, ch[2]) */
982 inst
= emit_instruction(emitInfo
, OPCODE_MIN
,
985 n
->Children
[2]->Store
,
988 free_node_storage(emitInfo
->vt
, &tmpNode
);
994 static struct prog_instruction
*
995 emit_negation(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
997 /* Implement as MOV dst, -src; */
998 /* XXX we could look at the previous instruction and in some circumstances
999 * modify it to accomplish the negation.
1001 struct prog_instruction
*inst
;
1003 emit(emitInfo
, n
->Children
[0]);
1005 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1008 inst
= emit_instruction(emitInfo
,
1010 n
->Store
, /* dest */
1011 n
->Children
[0]->Store
,
1014 inst
->SrcReg
[0].NegateBase
= NEGATE_XYZW
;
1019 static struct prog_instruction
*
1020 emit_label(slang_emit_info
*emitInfo
, const slang_ir_node
*n
)
1024 /* XXX this fails in loop tail code - investigate someday */
1025 assert(_slang_label_get_location(n
->Label
) < 0);
1026 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1029 if (_slang_label_get_location(n
->Label
) < 0)
1030 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1038 * Emit code for a function call.
1039 * Note that for each time a function is called, we emit the function's
1040 * body code again because the set of available registers may be different.
1042 static struct prog_instruction
*
1043 emit_fcall(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1045 struct gl_program
*progSave
;
1046 struct prog_instruction
*inst
;
1047 GLuint subroutineId
;
1049 assert(n
->Opcode
== IR_CALL
);
1052 /* save/push cur program */
1053 progSave
= emitInfo
->prog
;
1054 emitInfo
->prog
= new_subroutine(emitInfo
, &subroutineId
);
1056 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1059 if (emitInfo
->EmitBeginEndSub
) {
1060 /* BGNSUB isn't a real instruction.
1061 * We require a label (i.e. "foobar:") though, if we're going to
1062 * print the program in the NV format. The BNGSUB instruction is
1063 * really just a NOP to attach the label to.
1065 inst
= new_instruction(emitInfo
, OPCODE_BGNSUB
);
1066 inst_comment(inst
, n
->Label
->Name
);
1069 /* body of function: */
1070 emit(emitInfo
, n
->Children
[0]);
1071 n
->Store
= n
->Children
[0]->Store
;
1073 /* add RET instruction now, if needed */
1074 inst
= prev_instruction(emitInfo
);
1075 if (inst
&& inst
->Opcode
!= OPCODE_RET
) {
1076 inst
= new_instruction(emitInfo
, OPCODE_RET
);
1079 if (emitInfo
->EmitBeginEndSub
) {
1080 inst
= new_instruction(emitInfo
, OPCODE_ENDSUB
);
1081 inst_comment(inst
, n
->Label
->Name
);
1084 /* pop/restore cur program */
1085 emitInfo
->prog
= progSave
;
1087 /* emit the function call */
1088 inst
= new_instruction(emitInfo
, OPCODE_CAL
);
1089 /* The branch target is just the subroutine number (changed later) */
1090 inst
->BranchTarget
= subroutineId
;
1091 inst_comment(inst
, n
->Label
->Name
);
1092 assert(inst
->BranchTarget
>= 0);
1099 * Emit code for a 'return' statement.
1101 static struct prog_instruction
*
1102 emit_return(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1104 struct prog_instruction
*inst
;
1106 assert(n
->Opcode
== IR_RETURN
);
1108 inst
= new_instruction(emitInfo
, OPCODE_RET
);
1109 inst
->DstReg
.CondMask
= COND_TR
; /* always return */
1114 static struct prog_instruction
*
1115 emit_kill(slang_emit_info
*emitInfo
)
1117 struct gl_fragment_program
*fp
;
1118 struct prog_instruction
*inst
;
1119 /* NV-KILL - discard fragment depending on condition code.
1120 * Note that ARB-KILL depends on sign of vector operand.
1122 inst
= new_instruction(emitInfo
, OPCODE_KIL_NV
);
1123 inst
->DstReg
.CondMask
= COND_TR
; /* always kill */
1125 assert(emitInfo
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
);
1126 fp
= (struct gl_fragment_program
*) emitInfo
->prog
;
1127 fp
->UsesKill
= GL_TRUE
;
1133 static struct prog_instruction
*
1134 emit_tex(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1136 struct prog_instruction
*inst
;
1137 gl_inst_opcode opcode
;
1139 if (n
->Opcode
== IR_TEX
) {
1140 opcode
= OPCODE_TEX
;
1142 else if (n
->Opcode
== IR_TEXB
) {
1143 opcode
= OPCODE_TXB
;
1146 assert(n
->Opcode
== IR_TEXP
);
1147 opcode
= OPCODE_TXP
;
1150 /* emit code for the texcoord operand */
1151 (void) emit(emitInfo
, n
->Children
[1]);
1153 /* alloc storage for result of texture fetch */
1154 if (!alloc_node_storage(emitInfo
, n
, 4))
1157 /* emit TEX instruction; Child[1] is the texcoord */
1158 inst
= emit_instruction(emitInfo
,
1161 n
->Children
[1]->Store
,
1165 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1166 assert(n
->Children
[0]->Store
);
1167 /* Store->Index is the sampler index */
1168 assert(n
->Children
[0]->Store
->Index
>= 0);
1169 /* Store->Size is the texture target */
1170 assert(n
->Children
[0]->Store
->Size
>= TEXTURE_1D_INDEX
);
1171 assert(n
->Children
[0]->Store
->Size
<= TEXTURE_RECT_INDEX
);
1173 inst
->TexSrcTarget
= n
->Children
[0]->Store
->Size
;
1174 inst
->TexSrcUnit
= n
->Children
[0]->Store
->Index
; /* i.e. uniform's index */
1183 static struct prog_instruction
*
1184 emit_copy(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1186 struct prog_instruction
*inst
;
1188 assert(n
->Opcode
== IR_COPY
);
1191 emit(emitInfo
, n
->Children
[0]);
1192 if (!n
->Children
[0]->Store
|| n
->Children
[0]->Store
->Index
< 0) {
1193 /* an error should have been already recorded */
1198 assert(n
->Children
[1]);
1199 inst
= emit(emitInfo
, n
->Children
[1]);
1201 if (!n
->Children
[1]->Store
|| n
->Children
[1]->Store
->Index
< 0) {
1202 if (!emitInfo
->log
->text
) {
1203 slang_info_log_error(emitInfo
->log
, "invalid assignment");
1208 assert(n
->Children
[1]->Store
->Index
>= 0);
1210 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1212 n
->Store
= n
->Children
[0]->Store
;
1214 if (n
->Store
->File
== PROGRAM_SAMPLER
) {
1215 /* no code generated for sampler assignments,
1216 * just copy the sampler index at compile time.
1218 n
->Store
->Index
= n
->Children
[1]->Store
->Index
;
1222 #if PEEPHOLE_OPTIMIZATIONS
1224 _slang_is_temp(emitInfo
->vt
, n
->Children
[1]->Store
) &&
1225 (inst
->DstReg
.File
== n
->Children
[1]->Store
->File
) &&
1226 (inst
->DstReg
.Index
== n
->Children
[1]->Store
->Index
)) {
1227 /* Peephole optimization:
1228 * The Right-Hand-Side has its results in a temporary place.
1229 * Modify the RHS (and the prev instruction) to store its results
1230 * in the destination specified by n->Children[0].
1231 * Then, this MOVE is a no-op.
1238 if (n
->Children
[1]->Opcode
!= IR_SWIZZLE
)
1239 _slang_free_temp(emitInfo
->vt
, n
->Children
[1]->Store
);
1240 *n
->Children
[1]->Store
= *n
->Children
[0]->Store
;
1242 /* fixup the previous instruction (which stored the RHS result) */
1243 assert(n
->Children
[0]->Store
->Index
>= 0);
1245 /* use tighter writemask when possible */
1247 if (n
->Writemask
== WRITEMASK_XYZW
) {
1248 n
->Writemask
= inst
->DstReg
.WriteMask
;
1249 printf("Narrow writemask to 0x%x\n", n
->Writemask
);
1252 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
, n
->Writemask
);
1258 if (n
->Children
[0]->Store
->Size
> 4) {
1259 /* move matrix/struct etc (block of registers) */
1260 slang_ir_storage dstStore
= *n
->Children
[0]->Store
;
1261 slang_ir_storage srcStore
= *n
->Children
[1]->Store
;
1262 GLint size
= srcStore
.Size
;
1263 ASSERT(n
->Children
[0]->Writemask
== WRITEMASK_XYZW
);
1264 ASSERT(n
->Children
[1]->Store
->Swizzle
== SWIZZLE_NOOP
);
1268 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1273 inst_comment(inst
, "IR_COPY block");
1280 /* single register move */
1281 char *srcAnnot
, *dstAnnot
;
1282 assert(n
->Children
[0]->Store
->Index
>= 0);
1283 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1284 n
->Children
[0]->Store
, /* dest */
1285 n
->Children
[1]->Store
,
1288 dstAnnot
= storage_annotation(n
->Children
[0], emitInfo
->prog
);
1289 srcAnnot
= storage_annotation(n
->Children
[1], emitInfo
->prog
);
1290 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
,
1291 srcAnnot
, NULL
, NULL
);
1293 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
1300 * An IR_COND node wraps a boolean expression which is used by an
1301 * IF or WHILE test. This is where we'll set condition codes, if needed.
1303 static struct prog_instruction
*
1304 emit_cond(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1306 struct prog_instruction
*inst
;
1308 assert(n
->Opcode
== IR_COND
);
1310 if (!n
->Children
[0])
1313 /* emit code for the expression */
1314 inst
= emit(emitInfo
, n
->Children
[0]);
1316 if (!n
->Children
[0]->Store
) {
1317 /* error recovery */
1321 assert(n
->Children
[0]->Store
);
1322 /*assert(n->Children[0]->Store->Size == 1);*/
1324 if (emitInfo
->EmitCondCodes
) {
1326 n
->Children
[0]->Store
&&
1327 inst
->DstReg
.File
== n
->Children
[0]->Store
->File
&&
1328 inst
->DstReg
.Index
== n
->Children
[0]->Store
->Index
) {
1329 /* The previous instruction wrote to the register who's value
1330 * we're testing. Just fix that instruction so that the
1331 * condition codes are computed.
1333 inst
->CondUpdate
= GL_TRUE
;
1334 n
->Store
= n
->Children
[0]->Store
;
1338 /* This'll happen for things like "if (i) ..." where no code
1339 * is normally generated for the expression "i".
1340 * Generate a move instruction just to set condition codes.
1342 if (!alloc_node_storage(emitInfo
, n
, 1))
1344 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1345 n
->Store
, /* dest */
1346 n
->Children
[0]->Store
,
1349 inst
->CondUpdate
= GL_TRUE
;
1350 inst_comment(inst
, "COND expr");
1351 _slang_free_temp(emitInfo
->vt
, n
->Store
);
1356 /* No-op: the boolean result of the expression is in a regular reg */
1357 n
->Store
= n
->Children
[0]->Store
;
1366 static struct prog_instruction
*
1367 emit_not(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1369 static const struct {
1370 gl_inst_opcode op
, opNot
;
1372 { OPCODE_SLT
, OPCODE_SGE
},
1373 { OPCODE_SLE
, OPCODE_SGT
},
1374 { OPCODE_SGT
, OPCODE_SLE
},
1375 { OPCODE_SGE
, OPCODE_SLT
},
1376 { OPCODE_SEQ
, OPCODE_SNE
},
1377 { OPCODE_SNE
, OPCODE_SEQ
},
1380 struct prog_instruction
*inst
;
1381 slang_ir_storage zero
;
1385 inst
= emit(emitInfo
, n
->Children
[0]);
1387 #if PEEPHOLE_OPTIMIZATIONS
1389 /* if the prev instruction was a comparison instruction, invert it */
1390 for (i
= 0; operators
[i
].op
; i
++) {
1391 if (inst
->Opcode
== operators
[i
].op
) {
1392 inst
->Opcode
= operators
[i
].opNot
;
1393 n
->Store
= n
->Children
[0]->Store
;
1400 /* else, invert using SEQ (v = v == 0) */
1401 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1404 constant_to_storage(emitInfo
, 0.0, &zero
);
1405 inst
= emit_instruction(emitInfo
,
1408 n
->Children
[0]->Store
,
1411 inst_comment(inst
, "NOT");
1413 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
1419 static struct prog_instruction
*
1420 emit_if(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1422 struct gl_program
*prog
= emitInfo
->prog
;
1423 GLuint ifInstLoc
, elseInstLoc
= 0;
1424 GLuint condWritemask
= 0;
1426 /* emit condition expression code */
1428 struct prog_instruction
*inst
;
1429 inst
= emit(emitInfo
, n
->Children
[0]);
1430 if (emitInfo
->EmitCondCodes
) {
1432 /* error recovery */
1435 condWritemask
= inst
->DstReg
.WriteMask
;
1439 if (!n
->Children
[0]->Store
)
1443 assert(n
->Children
[0]->Store
->Size
== 1); /* a bool! */
1446 ifInstLoc
= prog
->NumInstructions
;
1447 if (emitInfo
->EmitHighLevelInstructions
) {
1448 if (emitInfo
->EmitCondCodes
) {
1449 /* IF condcode THEN ... */
1450 struct prog_instruction
*ifInst
;
1451 ifInst
= new_instruction(emitInfo
, OPCODE_IF
);
1452 ifInst
->DstReg
.CondMask
= COND_NE
; /* if cond is non-zero */
1453 /* only test the cond code (1 of 4) that was updated by the
1454 * previous instruction.
1456 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1459 /* IF src[0] THEN ... */
1460 emit_instruction(emitInfo
, OPCODE_IF
,
1462 n
->Children
[0]->Store
, /* op0 */
1468 /* conditional jump to else, or endif */
1469 struct prog_instruction
*ifInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1470 ifInst
->DstReg
.CondMask
= COND_EQ
; /* BRA if cond is zero */
1471 inst_comment(ifInst
, "if zero");
1472 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1476 emit(emitInfo
, n
->Children
[1]);
1478 if (n
->Children
[2]) {
1479 /* have else body */
1480 elseInstLoc
= prog
->NumInstructions
;
1481 if (emitInfo
->EmitHighLevelInstructions
) {
1482 (void) new_instruction(emitInfo
, OPCODE_ELSE
);
1485 /* jump to endif instruction */
1486 struct prog_instruction
*inst
;
1487 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1488 inst_comment(inst
, "else");
1489 inst
->DstReg
.CondMask
= COND_TR
; /* always branch */
1491 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1492 emit(emitInfo
, n
->Children
[2]);
1496 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1499 if (emitInfo
->EmitHighLevelInstructions
) {
1500 (void) new_instruction(emitInfo
, OPCODE_ENDIF
);
1503 if (n
->Children
[2]) {
1504 prog
->Instructions
[elseInstLoc
].BranchTarget
= prog
->NumInstructions
;
1510 static struct prog_instruction
*
1511 emit_loop(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1513 struct gl_program
*prog
= emitInfo
->prog
;
1514 struct prog_instruction
*endInst
;
1515 GLuint beginInstLoc
, tailInstLoc
, endInstLoc
;
1518 /* emit OPCODE_BGNLOOP */
1519 beginInstLoc
= prog
->NumInstructions
;
1520 if (emitInfo
->EmitHighLevelInstructions
) {
1521 (void) new_instruction(emitInfo
, OPCODE_BGNLOOP
);
1525 emit(emitInfo
, n
->Children
[0]);
1528 tailInstLoc
= prog
->NumInstructions
;
1529 if (n
->Children
[1]) {
1530 if (emitInfo
->EmitComments
)
1531 emit_comment(emitInfo
, "Loop tail code:");
1532 emit(emitInfo
, n
->Children
[1]);
1535 endInstLoc
= prog
->NumInstructions
;
1536 if (emitInfo
->EmitHighLevelInstructions
) {
1537 /* emit OPCODE_ENDLOOP */
1538 endInst
= new_instruction(emitInfo
, OPCODE_ENDLOOP
);
1541 /* emit unconditional BRA-nch */
1542 endInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1543 endInst
->DstReg
.CondMask
= COND_TR
; /* always true */
1545 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1546 endInst
->BranchTarget
= beginInstLoc
;
1548 if (emitInfo
->EmitHighLevelInstructions
) {
1549 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1550 prog
->Instructions
[beginInstLoc
].BranchTarget
= prog
->NumInstructions
-1;
1553 /* Done emitting loop code. Now walk over the loop's linked list of
1554 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1555 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1557 for (ir
= n
->List
; ir
; ir
= ir
->List
) {
1558 struct prog_instruction
*inst
= prog
->Instructions
+ ir
->InstLocation
;
1559 assert(inst
->BranchTarget
< 0);
1560 if (ir
->Opcode
== IR_BREAK
||
1561 ir
->Opcode
== IR_BREAK_IF_TRUE
) {
1562 assert(inst
->Opcode
== OPCODE_BRK
||
1563 inst
->Opcode
== OPCODE_BRA
);
1564 /* go to instruction after end of loop */
1565 inst
->BranchTarget
= endInstLoc
+ 1;
1568 assert(ir
->Opcode
== IR_CONT
||
1569 ir
->Opcode
== IR_CONT_IF_TRUE
);
1570 assert(inst
->Opcode
== OPCODE_CONT
||
1571 inst
->Opcode
== OPCODE_BRA
);
1572 /* go to instruction at tail of loop */
1573 inst
->BranchTarget
= endInstLoc
;
1581 * Unconditional "continue" or "break" statement.
1582 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1584 static struct prog_instruction
*
1585 emit_cont_break(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1587 gl_inst_opcode opcode
;
1588 struct prog_instruction
*inst
;
1590 if (n
->Opcode
== IR_CONT
) {
1591 /* we need to execute the loop's tail code before doing CONT */
1593 assert(n
->Parent
->Opcode
== IR_LOOP
);
1594 if (n
->Parent
->Children
[1]) {
1595 /* emit tail code */
1596 if (emitInfo
->EmitComments
) {
1597 emit_comment(emitInfo
, "continue - tail code:");
1599 emit(emitInfo
, n
->Parent
->Children
[1]);
1603 /* opcode selection */
1604 if (emitInfo
->EmitHighLevelInstructions
) {
1605 opcode
= (n
->Opcode
== IR_CONT
) ? OPCODE_CONT
: OPCODE_BRK
;
1608 opcode
= OPCODE_BRA
;
1610 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1611 inst
= new_instruction(emitInfo
, opcode
);
1612 inst
->DstReg
.CondMask
= COND_TR
; /* always true */
1618 * Conditional "continue" or "break" statement.
1619 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1621 static struct prog_instruction
*
1622 emit_cont_break_if_true(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1624 struct prog_instruction
*inst
;
1626 assert(n
->Opcode
== IR_CONT_IF_TRUE
||
1627 n
->Opcode
== IR_BREAK_IF_TRUE
);
1629 /* evaluate condition expr, setting cond codes */
1630 inst
= emit(emitInfo
, n
->Children
[0]);
1631 if (emitInfo
->EmitCondCodes
) {
1633 inst
->CondUpdate
= GL_TRUE
;
1636 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1638 /* opcode selection */
1639 if (emitInfo
->EmitHighLevelInstructions
) {
1640 const gl_inst_opcode opcode
1641 = (n
->Opcode
== IR_CONT_IF_TRUE
) ? OPCODE_CONT
: OPCODE_BRK
;
1642 if (emitInfo
->EmitCondCodes
) {
1643 /* Get the writemask from the previous instruction which set
1644 * the condcodes. Use that writemask as the CondSwizzle.
1646 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1647 inst
= new_instruction(emitInfo
, opcode
);
1648 inst
->DstReg
.CondMask
= COND_NE
;
1649 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1658 ifInstLoc
= emitInfo
->prog
->NumInstructions
;
1659 inst
= emit_instruction(emitInfo
, OPCODE_IF
,
1661 n
->Children
[0]->Store
,
1664 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1666 inst
= new_instruction(emitInfo
, opcode
);
1667 inst
= new_instruction(emitInfo
, OPCODE_ENDIF
);
1669 emitInfo
->prog
->Instructions
[ifInstLoc
].BranchTarget
1670 = emitInfo
->prog
->NumInstructions
;
1675 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1676 assert(emitInfo
->EmitCondCodes
);
1677 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1678 inst
->DstReg
.CondMask
= COND_NE
;
1679 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1685 static struct prog_instruction
*
1686 emit_swizzle(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1688 struct prog_instruction
*inst
;
1690 inst
= emit(emitInfo
, n
->Children
[0]);
1692 /* setup storage info, if needed */
1693 if (!n
->Store
->Parent
)
1694 n
->Store
->Parent
= n
->Children
[0]->Store
;
1696 assert(n
->Store
->Parent
);
1703 * Move a block registers from src to dst (or move a single register).
1704 * \param size size of block, in floats (<=4 means one register)
1706 static struct prog_instruction
*
1707 move_block(slang_emit_info
*emitInfo
,
1708 GLuint size
, GLboolean relAddr
,
1709 const slang_ir_storage
*dst
,
1710 const slang_ir_storage
*src
)
1712 struct prog_instruction
*inst
;
1715 /* move matrix/struct etc (block of registers) */
1716 slang_ir_storage dstStore
= *dst
;
1717 slang_ir_storage srcStore
= *src
;
1718 //GLint size = srcStore.Size;
1719 /*ASSERT(n->Children[0]->Writemask == WRITEMASK_XYZW);
1720 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1725 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1730 inst
->SrcReg
[0].RelAddr
= relAddr
;
1731 inst_comment(inst
, "IR_COPY block");
1738 /* single register move */
1739 inst
= emit_instruction(emitInfo
,
1745 inst
->SrcReg
[0].RelAddr
= relAddr
;
1753 * Dereference array element. Just resolve storage for the array
1754 * element represented by this node.
1755 * This is typically where Indirect addressing comes into play.
1756 * See comments on struct slang_ir_storage.
1758 static struct prog_instruction
*
1759 emit_array_element(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1761 assert(n
->Opcode
== IR_ELEMENT
);
1763 assert(n
->Store
->File
== PROGRAM_UNDEFINED
);
1764 assert(n
->Store
->Parent
);
1765 assert(n
->Store
->Size
> 0);
1768 slang_ir_storage
*root
= n
->Store
;
1769 while (root
->Parent
)
1770 root
= root
->Parent
;
1772 if (root
->File
== PROGRAM_STATE_VAR
) {
1773 GLint index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1774 assert(n
->Store
->Index
== index
);
1779 /* do codegen for array */
1780 emit(emitInfo
, n
->Children
[0]);
1782 if (n
->Children
[1]->Opcode
== IR_FLOAT
) {
1783 /* Constant array index.
1784 * Set Store's index to be the offset of the array element in
1785 * the register file.
1787 const GLint element
= (GLint
) n
->Children
[1]->Value
[0];
1788 const GLint sz
= (n
->Store
->Size
+ 3) / 4; /* size in slots/registers */
1790 n
->Store
->Index
= sz
* element
;
1791 assert(n
->Store
->Parent
);
1794 /* Variable array index */
1795 struct prog_instruction
*inst
;
1797 /* do codegen for array index expression */
1798 emit(emitInfo
, n
->Children
[1]);
1800 /* allocate temp storage for the array element */
1801 assert(n
->Store
->Index
< 0);
1802 n
->Store
->File
= PROGRAM_TEMPORARY
;
1803 n
->Store
->Parent
= NULL
;
1804 alloc_node_storage(emitInfo
, n
, -1);
1806 if (n
->Store
->Size
> 4) {
1807 /* need to multiply the index by the element size */
1808 const GLint elemSize
= (n
->Store
->Size
+ 3) / 4;
1809 slang_ir_storage indexTemp
, elemSizeStore
;
1811 /* constant containing the element size */
1812 constant_to_storage(emitInfo
, (float) elemSize
, &elemSizeStore
);
1814 /* allocate 1 float indexTemp */
1815 alloc_local_temp(emitInfo
, &indexTemp
, 1);
1817 /* MUL temp, index, elemSize */
1818 inst
= emit_instruction(emitInfo
, OPCODE_MUL
,
1819 &indexTemp
, /* dest */
1820 n
->Children
[1]->Store
, /* the index */
1824 /* load ADDR[0].X = temp */
1825 inst
= emit_arl_instruction(emitInfo
, 0, &indexTemp
);
1827 _slang_free_temp(emitInfo
->vt
, &indexTemp
);
1830 /* simply load address reg w/ array index */
1831 inst
= emit_arl_instruction(emitInfo
, 0, n
->Children
[1]->Store
);
1834 /* copy from array element to temp storage */
1835 move_block(emitInfo
, n
->Store
->Size
, GL_TRUE
,
1836 n
->Store
, n
->Children
[0]->Store
);
1839 /* if array element size is one, make sure we only access X */
1840 if (n
->Store
->Size
== 1)
1841 n
->Store
->Swizzle
= SWIZZLE_XXXX
;
1843 return NULL
; /* no instruction */
1848 * Resolve storage for accessing a structure field.
1850 static struct prog_instruction
*
1851 emit_struct_field(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1853 slang_ir_storage
*root
= n
->Store
;
1855 assert(n
->Opcode
== IR_FIELD
);
1857 while (root
->Parent
)
1858 root
= root
->Parent
;
1860 /* If this is the field of a state var, allocate constant/uniform
1861 * storage for it now if we haven't already.
1862 * Note that we allocate storage (uniform/constant slots) for state
1863 * variables here rather than at declaration time so we only allocate
1864 * space for the ones that we actually use!
1866 if (root
->File
== PROGRAM_STATE_VAR
) {
1867 root
->Index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1868 if (root
->Index
< 0) {
1869 slang_info_log_error(emitInfo
->log
, "Error parsing state variable");
1874 /* do codegen for struct */
1875 emit(emitInfo
, n
->Children
[0]);
1878 return NULL
; /* no instruction */
1883 * Emit code for a variable declaration.
1884 * This usually doesn't result in any code generation, but just
1885 * memory allocation.
1887 static struct prog_instruction
*
1888 emit_var_decl(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1891 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1892 assert(n
->Store
->Size
> 0);
1893 /*assert(n->Store->Index < 0);*/
1895 if (!n
->Var
|| n
->Var
->isTemp
) {
1896 /* a nameless/temporary variable, will be freed after first use */
1898 if (n
->Store
->Index
< 0 && !_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
1899 slang_info_log_error(emitInfo
->log
,
1900 "Ran out of registers, too many temporaries");
1905 /* a regular variable */
1906 _slang_add_variable(emitInfo
->vt
, n
->Var
);
1907 if (!_slang_alloc_var(emitInfo
->vt
, n
->Store
)) {
1908 slang_info_log_error(emitInfo
->log
,
1909 "Ran out of registers, too many variables");
1913 printf("IR_VAR_DECL %s %d store %p\n",
1914 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
1916 assert(n
->Var
->aux
== n
->Store
);
1918 if (emitInfo
->EmitComments
) {
1919 /* emit NOP with comment describing the variable's storage location */
1921 sprintf(s
, "TEMP[%d]%s = variable %s (size %d)",
1923 _mesa_swizzle_string(n
->Store
->Swizzle
, 0, GL_FALSE
),
1924 (n
->Var
? (char *) n
->Var
->a_name
: "anonymous"),
1926 emit_comment(emitInfo
, s
);
1933 * Emit code for a reference to a variable.
1934 * Actually, no code is generated but we may do some memory alloation.
1935 * In particular, state vars (uniforms) are allocated on an as-needed basis.
1937 static struct prog_instruction
*
1938 emit_var_ref(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1941 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1943 if (n
->Store
->File
== PROGRAM_STATE_VAR
&& n
->Store
->Index
< 0) {
1944 n
->Store
->Index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1946 else if (n
->Store
->File
== PROGRAM_UNIFORM
) {
1947 /* mark var as used */
1948 _mesa_use_uniform(emitInfo
->prog
->Parameters
, (char *) n
->Var
->a_name
);
1951 if (n
->Store
->Index
< 0) {
1952 /* probably ran out of registers */
1955 assert(n
->Store
->Size
> 0);
1961 static struct prog_instruction
*
1962 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1964 struct prog_instruction
*inst
;
1968 if (emitInfo
->log
->error_flag
) {
1972 switch (n
->Opcode
) {
1974 /* sequence of two sub-trees */
1975 assert(n
->Children
[0]);
1976 assert(n
->Children
[1]);
1977 emit(emitInfo
, n
->Children
[0]);
1978 if (emitInfo
->log
->error_flag
)
1980 inst
= emit(emitInfo
, n
->Children
[1]);
1984 n
->Store
= n
->Children
[1]->Store
;
1988 /* new variable scope */
1989 _slang_push_var_table(emitInfo
->vt
);
1990 inst
= emit(emitInfo
, n
->Children
[0]);
1991 _slang_pop_var_table(emitInfo
->vt
);
1995 /* Variable declaration - allocate a register for it */
1996 inst
= emit_var_decl(emitInfo
, n
);
2000 /* Reference to a variable
2001 * Storage should have already been resolved/allocated.
2003 return emit_var_ref(emitInfo
, n
);
2006 return emit_array_element(emitInfo
, n
);
2008 return emit_struct_field(emitInfo
, n
);
2010 return emit_swizzle(emitInfo
, n
);
2012 /* Simple arithmetic */
2049 /* trinary operators */
2051 return emit_arith(emitInfo
, n
);
2055 return emit_compare(emitInfo
, n
);
2058 return emit_clamp(emitInfo
, n
);
2062 return emit_tex(emitInfo
, n
);
2064 return emit_negation(emitInfo
, n
);
2066 /* find storage location for this float constant */
2067 n
->Store
->Index
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
2070 &n
->Store
->Swizzle
);
2071 if (n
->Store
->Index
< 0) {
2072 slang_info_log_error(emitInfo
->log
, "Ran out of space for constants");
2078 return emit_copy(emitInfo
, n
);
2081 return emit_cond(emitInfo
, n
);
2084 return emit_not(emitInfo
, n
);
2087 return emit_label(emitInfo
, n
);
2090 return emit_kill(emitInfo
);
2093 /* new variable scope for subroutines/function calls */
2094 _slang_push_var_table(emitInfo
->vt
);
2095 inst
= emit_fcall(emitInfo
, n
);
2096 _slang_pop_var_table(emitInfo
->vt
);
2100 return emit_if(emitInfo
, n
);
2103 return emit_loop(emitInfo
, n
);
2104 case IR_BREAK_IF_TRUE
:
2105 case IR_CONT_IF_TRUE
:
2106 return emit_cont_break_if_true(emitInfo
, n
);
2110 return emit_cont_break(emitInfo
, n
);
2113 return new_instruction(emitInfo
, OPCODE_BGNSUB
);
2115 return new_instruction(emitInfo
, OPCODE_ENDSUB
);
2117 return emit_return(emitInfo
, n
);
2123 _mesa_problem(NULL
, "Unexpected IR opcode in emit()\n");
2130 * After code generation, any subroutines will be in separate program
2131 * objects. This function appends all the subroutines onto the main
2132 * program and resolves the linking of all the branch/call instructions.
2133 * XXX this logic should really be part of the linking process...
2136 _slang_resolve_subroutines(slang_emit_info
*emitInfo
)
2138 GET_CURRENT_CONTEXT(ctx
);
2139 struct gl_program
*mainP
= emitInfo
->prog
;
2140 GLuint
*subroutineLoc
, i
, total
;
2143 = (GLuint
*) _mesa_malloc(emitInfo
->NumSubroutines
* sizeof(GLuint
));
2145 /* total number of instructions */
2146 total
= mainP
->NumInstructions
;
2147 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2148 subroutineLoc
[i
] = total
;
2149 total
+= emitInfo
->Subroutines
[i
]->NumInstructions
;
2152 /* adjust BrancTargets within the functions */
2153 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2154 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
2156 for (j
= 0; j
< sub
->NumInstructions
; j
++) {
2157 struct prog_instruction
*inst
= sub
->Instructions
+ j
;
2158 if (inst
->Opcode
!= OPCODE_CAL
&& inst
->BranchTarget
>= 0) {
2159 inst
->BranchTarget
+= subroutineLoc
[i
];
2164 /* append subroutines' instructions after main's instructions */
2165 mainP
->Instructions
= _mesa_realloc_instructions(mainP
->Instructions
,
2166 mainP
->NumInstructions
,
2168 mainP
->NumInstructions
= total
;
2169 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2170 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
2171 _mesa_copy_instructions(mainP
->Instructions
+ subroutineLoc
[i
],
2173 sub
->NumInstructions
);
2174 /* delete subroutine code */
2175 sub
->Parameters
= NULL
; /* prevent double-free */
2176 _mesa_reference_program(ctx
, &emitInfo
->Subroutines
[i
], NULL
);
2179 /* free subroutine list */
2180 if (emitInfo
->Subroutines
) {
2181 _mesa_free(emitInfo
->Subroutines
);
2182 emitInfo
->Subroutines
= NULL
;
2184 emitInfo
->NumSubroutines
= 0;
2186 /* Examine CAL instructions.
2187 * At this point, the BranchTarget field of the CAL instruction is
2188 * the number/id of the subroutine to call (an index into the
2189 * emitInfo->Subroutines list).
2190 * Translate that into an actual instruction location now.
2192 for (i
= 0; i
< mainP
->NumInstructions
; i
++) {
2193 struct prog_instruction
*inst
= mainP
->Instructions
+ i
;
2194 if (inst
->Opcode
== OPCODE_CAL
) {
2195 const GLuint f
= inst
->BranchTarget
;
2196 inst
->BranchTarget
= subroutineLoc
[f
];
2200 _mesa_free(subroutineLoc
);
2207 _slang_emit_code(slang_ir_node
*n
, slang_var_table
*vt
,
2208 struct gl_program
*prog
, GLboolean withEnd
,
2209 slang_info_log
*log
)
2211 GET_CURRENT_CONTEXT(ctx
);
2213 slang_emit_info emitInfo
;
2218 emitInfo
.prog
= prog
;
2219 emitInfo
.Subroutines
= NULL
;
2220 emitInfo
.NumSubroutines
= 0;
2222 emitInfo
.EmitHighLevelInstructions
= ctx
->Shader
.EmitHighLevelInstructions
;
2223 emitInfo
.EmitCondCodes
= ctx
->Shader
.EmitCondCodes
;
2224 emitInfo
.EmitComments
= ctx
->Shader
.EmitComments
;
2225 emitInfo
.EmitBeginEndSub
= GL_TRUE
;
2227 if (!emitInfo
.EmitCondCodes
) {
2228 emitInfo
.EmitHighLevelInstructions
= GL_TRUE
;
2231 /* Check uniform/constant limits */
2232 if (prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
2233 maxUniforms
= ctx
->Const
.FragmentProgram
.MaxUniformComponents
/ 4;
2236 assert(prog
->Target
== GL_VERTEX_PROGRAM_ARB
);
2237 maxUniforms
= ctx
->Const
.VertexProgram
.MaxUniformComponents
/ 4;
2239 if (prog
->Parameters
->NumParameters
> maxUniforms
) {
2240 slang_info_log_error(log
, "Constant/uniform register limit exceeded");
2244 (void) emit(&emitInfo
, n
);
2246 /* finish up by adding the END opcode to program */
2248 struct prog_instruction
*inst
;
2249 inst
= new_instruction(&emitInfo
, OPCODE_END
);
2252 _slang_resolve_subroutines(&emitInfo
);
2257 printf("*********** End emit code (%u inst):\n", prog
->NumInstructions
);
2258 _mesa_print_program(prog
);
2259 _mesa_print_program_parameters(ctx
,prog
);