2 * Mesa 3-D graphics library
5 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Emit program instructions (PI code) from IR trees.
34 *** To emit GPU instructions, we basically just do an in-order traversal
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
51 #define PEEPHOLE_OPTIMIZATIONS 1
59 struct gl_program
*prog
;
60 struct gl_program
**Subroutines
;
61 GLuint NumSubroutines
;
63 /* code-gen options */
64 GLboolean EmitHighLevelInstructions
;
65 GLboolean EmitCondCodes
;
66 GLboolean EmitComments
;
67 GLboolean EmitBeginEndSub
; /* XXX TEMPORARY */
72 static struct gl_program
*
73 new_subroutine(slang_emit_info
*emitInfo
, GLuint
*id
)
75 GET_CURRENT_CONTEXT(ctx
);
76 const GLuint n
= emitInfo
->NumSubroutines
;
78 emitInfo
->Subroutines
= (struct gl_program
**)
79 _mesa_realloc(emitInfo
->Subroutines
,
80 n
* sizeof(struct gl_program
),
81 (n
+ 1) * sizeof(struct gl_program
));
82 emitInfo
->Subroutines
[n
] = ctx
->Driver
.NewProgram(ctx
, emitInfo
->prog
->Target
, 0);
83 emitInfo
->Subroutines
[n
]->Parameters
= emitInfo
->prog
->Parameters
;
84 emitInfo
->NumSubroutines
++;
86 return emitInfo
->Subroutines
[n
];
91 * Convert a writemask to a swizzle. Used for testing cond codes because
92 * we only want to test the cond code component(s) that was set by the
93 * previous instruction.
96 writemask_to_swizzle(GLuint writemask
)
98 if (writemask
== WRITEMASK_X
)
100 if (writemask
== WRITEMASK_Y
)
102 if (writemask
== WRITEMASK_Z
)
104 if (writemask
== WRITEMASK_W
)
106 return SWIZZLE_XYZW
; /* shouldn't be hit */
111 * Swizzle a swizzle (function composition).
112 * That is, return swz2(swz1), or said another way: swz1.szw2
113 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
116 _slang_swizzle_swizzle(GLuint swz1
, GLuint swz2
)
119 for (i
= 0; i
< 4; i
++) {
120 GLuint c
= GET_SWZ(swz2
, i
);
122 s
[i
] = GET_SWZ(swz1
, c
);
126 swz
= MAKE_SWIZZLE4(s
[0], s
[1], s
[2], s
[3]);
132 * Allocate storage for the given node (if it hasn't already been allocated).
134 * Typically this is temporary storage for an intermediate result (such as
135 * for a multiply or add, etc).
137 * If n->Store does not exist it will be created and will be of the size
138 * specified by defaultSize.
141 alloc_node_storage(slang_emit_info
*emitInfo
, slang_ir_node
*n
,
146 assert(defaultSize
> 0);
147 n
->Store
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, defaultSize
);
150 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
151 if (n
->Store
->Index
< 0) {
152 if (!_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
153 slang_info_log_error(emitInfo
->log
,
154 "Ran out of registers, too many temporaries");
155 _slang_free(n
->Store
);
165 * Free temporary storage, if n->Store is, in fact, temp storage.
169 free_node_storage(slang_var_table
*vt
, slang_ir_node
*n
)
171 if (n
->Store
->File
== PROGRAM_TEMPORARY
&&
172 n
->Store
->Index
>= 0 &&
173 n
->Opcode
!= IR_SWIZZLE
) {
174 if (_slang_is_temp(vt
, n
->Store
)) {
175 _slang_free_temp(vt
, n
->Store
);
176 n
->Store
->Index
= -1;
177 n
->Store
= NULL
; /* XXX this may not be needed */
184 * Helper function to allocate a short-term temporary.
185 * Free it with _slang_free_temp().
188 alloc_local_temp(slang_emit_info
*emitInfo
, slang_ir_storage
*temp
, GLint size
)
192 _mesa_bzero(temp
, sizeof(*temp
));
194 temp
->File
= PROGRAM_TEMPORARY
;
196 return _slang_alloc_temp(emitInfo
->vt
, temp
);
201 * Remove any SWIZZLE_NIL terms from given swizzle mask.
202 * For a swizzle like .z??? generate .zzzz (replicate single component).
203 * Else, for .wx?? generate .wxzw (insert default component for the position).
206 fix_swizzle(GLuint swizzle
)
208 GLuint c0
= GET_SWZ(swizzle
, 0),
209 c1
= GET_SWZ(swizzle
, 1),
210 c2
= GET_SWZ(swizzle
, 2),
211 c3
= GET_SWZ(swizzle
, 3);
212 if (c1
== SWIZZLE_NIL
&& c2
== SWIZZLE_NIL
&& c3
== SWIZZLE_NIL
) {
213 /* smear first component across all positions */
217 /* insert default swizzle components */
218 if (c0
== SWIZZLE_NIL
)
220 if (c1
== SWIZZLE_NIL
)
222 if (c2
== SWIZZLE_NIL
)
224 if (c3
== SWIZZLE_NIL
)
227 return MAKE_SWIZZLE4(c0
, c1
, c2
, c3
);
233 * Convert IR storage to an instruction dst register.
236 storage_to_dst_reg(struct prog_dst_register
*dst
, const slang_ir_storage
*st
,
239 const GLint size
= st
->Size
;
240 GLint index
= st
->Index
;
241 GLuint swizzle
= st
->Swizzle
;
243 /* if this is storage relative to some parent storage, walk up the tree */
247 swizzle
= _slang_swizzle_swizzle(st
->Swizzle
, swizzle
);
250 assert(st
->File
!= PROGRAM_UNDEFINED
);
251 dst
->File
= st
->File
;
260 GLuint comp
= GET_SWZ(swizzle
, 0);
262 dst
->WriteMask
= WRITEMASK_X
<< comp
;
265 dst
->WriteMask
= writemask
;
271 * Convert IR storage to an instruction src register.
274 storage_to_src_reg(struct prog_src_register
*src
, const slang_ir_storage
*st
)
276 const GLboolean relAddr
= st
->RelAddr
;
277 GLint index
= st
->Index
;
278 GLuint swizzle
= st
->Swizzle
;
280 /* if this is storage relative to some parent storage, walk up the tree */
284 swizzle
= _slang_swizzle_swizzle(fix_swizzle(st
->Swizzle
), swizzle
);
287 assert(st
->File
>= 0);
288 #if 1 /* XXX temporary */
289 if (st
->File
== PROGRAM_UNDEFINED
) {
290 slang_ir_storage
*st0
= (slang_ir_storage
*) st
;
291 st0
->File
= PROGRAM_TEMPORARY
;
294 assert(st
->File
< PROGRAM_UNDEFINED
);
295 src
->File
= st
->File
;
300 swizzle
= fix_swizzle(swizzle
);
301 assert(GET_SWZ(swizzle
, 0) <= SWIZZLE_W
);
302 assert(GET_SWZ(swizzle
, 1) <= SWIZZLE_W
);
303 assert(GET_SWZ(swizzle
, 2) <= SWIZZLE_W
);
304 assert(GET_SWZ(swizzle
, 3) <= SWIZZLE_W
);
305 src
->Swizzle
= swizzle
;
307 src
->RelAddr
= relAddr
;
312 * Setup an instrucion src register to point to a scalar constant.
315 constant_to_src_reg(struct prog_src_register
*src
, GLfloat val
,
316 slang_emit_info
*emitInfo
)
323 zeroReg
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
324 value
, 1, &zeroSwizzle
);
325 assert(zeroReg
>= 0);
327 src
->File
= PROGRAM_CONSTANT
;
328 src
->Index
= zeroReg
;
329 src
->Swizzle
= zeroSwizzle
;
334 * Add new instruction at end of given program.
335 * \param prog the program to append instruction onto
336 * \param opcode opcode for the new instruction
337 * \return pointer to the new instruction
339 static struct prog_instruction
*
340 new_instruction(slang_emit_info
*emitInfo
, gl_inst_opcode opcode
)
342 struct gl_program
*prog
= emitInfo
->prog
;
343 struct prog_instruction
*inst
;
346 /* print prev inst */
347 if (prog
->NumInstructions
> 0) {
348 _mesa_print_instruction(prog
->Instructions
+ prog
->NumInstructions
- 1);
351 prog
->Instructions
= _mesa_realloc_instructions(prog
->Instructions
,
352 prog
->NumInstructions
,
353 prog
->NumInstructions
+ 1);
354 inst
= prog
->Instructions
+ prog
->NumInstructions
;
355 prog
->NumInstructions
++;
356 _mesa_init_instructions(inst
, 1);
357 inst
->Opcode
= opcode
;
358 inst
->BranchTarget
= -1; /* invalid */
360 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
361 _mesa_opcode_string(inst->Opcode));
368 * Return pointer to last instruction in program.
370 static struct prog_instruction
*
371 prev_instruction(slang_emit_info
*emitInfo
)
373 struct gl_program
*prog
= emitInfo
->prog
;
374 if (prog
->NumInstructions
== 0)
377 return prog
->Instructions
+ prog
->NumInstructions
- 1;
381 static struct prog_instruction
*
382 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
);
386 * Return an annotation string for given node's storage.
389 storage_annotation(const slang_ir_node
*n
, const struct gl_program
*prog
)
392 const slang_ir_storage
*st
= n
->Store
;
393 static char s
[100] = "";
396 return _mesa_strdup("");
399 case PROGRAM_CONSTANT
:
400 if (st
->Index
>= 0) {
401 const GLfloat
*val
= prog
->Parameters
->ParameterValues
[st
->Index
];
402 if (st
->Swizzle
== SWIZZLE_NOOP
)
403 sprintf(s
, "{%g, %g, %g, %g}", val
[0], val
[1], val
[2], val
[3]);
405 sprintf(s
, "%g", val
[GET_SWZ(st
->Swizzle
, 0)]);
409 case PROGRAM_TEMPORARY
:
411 sprintf(s
, "%s", (char *) n
->Var
->a_name
);
413 sprintf(s
, "t[%d]", st
->Index
);
415 case PROGRAM_STATE_VAR
:
416 case PROGRAM_UNIFORM
:
417 sprintf(s
, "%s", prog
->Parameters
->Parameters
[st
->Index
].Name
);
419 case PROGRAM_VARYING
:
420 sprintf(s
, "%s", prog
->Varying
->Parameters
[st
->Index
].Name
);
423 sprintf(s
, "input[%d]", st
->Index
);
426 sprintf(s
, "output[%d]", st
->Index
);
431 return _mesa_strdup(s
);
439 * Return an annotation string for an instruction.
442 instruction_annotation(gl_inst_opcode opcode
, char *dstAnnot
,
443 char *srcAnnot0
, char *srcAnnot1
, char *srcAnnot2
)
446 const char *operator;
451 len
+= strlen(dstAnnot
);
453 dstAnnot
= _mesa_strdup("");
456 len
+= strlen(srcAnnot0
);
458 srcAnnot0
= _mesa_strdup("");
461 len
+= strlen(srcAnnot1
);
463 srcAnnot1
= _mesa_strdup("");
466 len
+= strlen(srcAnnot2
);
468 srcAnnot2
= _mesa_strdup("");
499 s
= (char *) malloc(len
);
500 sprintf(s
, "%s = %s %s %s %s", dstAnnot
,
501 srcAnnot0
, operator, srcAnnot1
, srcAnnot2
);
502 assert(_mesa_strlen(s
) < len
);
517 * Emit an instruction that's just a comment.
519 static struct prog_instruction
*
520 emit_comment(slang_emit_info
*emitInfo
, const char *s
)
522 struct prog_instruction
*inst
= new_instruction(emitInfo
, OPCODE_NOP
);
524 inst
->Comment
= _mesa_strdup(s
);
531 * Generate code for a simple arithmetic instruction.
532 * Either 1, 2 or 3 operands.
534 static struct prog_instruction
*
535 emit_arith(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
537 struct prog_instruction
*inst
;
538 const slang_ir_info
*info
= _slang_ir_info(n
->Opcode
);
539 char *srcAnnot
[3], *dstAnnot
;
541 slang_ir_node
*temps
[3];
543 /* we'll save pointers to nodes/storage to free in temps[] until
546 temps
[0] = temps
[1] = temps
[2] = NULL
;
549 assert(info
->InstOpcode
!= OPCODE_NOP
);
551 srcAnnot
[0] = srcAnnot
[1] = srcAnnot
[2] = dstAnnot
= NULL
;
553 #if PEEPHOLE_OPTIMIZATIONS
554 /* Look for MAD opportunity */
555 if (info
->NumParams
== 2 &&
556 n
->Opcode
== IR_ADD
&& n
->Children
[0]->Opcode
== IR_MUL
) {
557 /* found pattern IR_ADD(IR_MUL(A, B), C) */
558 emit(emitInfo
, n
->Children
[0]->Children
[0]); /* A */
559 emit(emitInfo
, n
->Children
[0]->Children
[1]); /* B */
560 emit(emitInfo
, n
->Children
[1]); /* C */
561 /* generate MAD instruction */
562 inst
= new_instruction(emitInfo
, OPCODE_MAD
);
563 /* operands: A, B, C: */
564 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Children
[0]->Store
);
565 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[0]->Children
[1]->Store
);
566 storage_to_src_reg(&inst
->SrcReg
[2], n
->Children
[1]->Store
);
567 temps
[0] = n
->Children
[0]->Children
[0];
568 temps
[1] = n
->Children
[0]->Children
[1];
569 temps
[2] = n
->Children
[1];
571 else if (info
->NumParams
== 2 &&
572 n
->Opcode
== IR_ADD
&& n
->Children
[1]->Opcode
== IR_MUL
) {
573 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
574 emit(emitInfo
, n
->Children
[0]); /* A */
575 emit(emitInfo
, n
->Children
[1]->Children
[0]); /* B */
576 emit(emitInfo
, n
->Children
[1]->Children
[1]); /* C */
577 /* generate MAD instruction */
578 inst
= new_instruction(emitInfo
, OPCODE_MAD
);
579 /* operands: B, C, A */
580 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Children
[0]->Store
);
581 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Children
[1]->Store
);
582 storage_to_src_reg(&inst
->SrcReg
[2], n
->Children
[0]->Store
);
583 temps
[0] = n
->Children
[1]->Children
[0];
584 temps
[1] = n
->Children
[1]->Children
[1];
585 temps
[2] = n
->Children
[0];
592 /* gen code for children */
593 for (i
= 0; i
< info
->NumParams
; i
++) {
594 emit(emitInfo
, n
->Children
[i
]);
595 if (!n
->Children
[i
] || !n
->Children
[i
]->Store
) {
601 /* gen this instruction and src registers */
602 inst
= new_instruction(emitInfo
, info
->InstOpcode
);
603 for (i
= 0; i
< info
->NumParams
; i
++)
604 storage_to_src_reg(&inst
->SrcReg
[i
], n
->Children
[i
]->Store
);
607 for (i
= 0; i
< info
->NumParams
; i
++)
608 srcAnnot
[i
] = storage_annotation(n
->Children
[i
], emitInfo
->prog
);
610 /* record (potential) temps to free */
611 for (i
= 0; i
< info
->NumParams
; i
++)
612 temps
[i
] = n
->Children
[i
];
616 alloc_node_storage(emitInfo
, n
, -1);
617 assert(n
->Store
->Index
>= 0);
618 if (n
->Store
->Size
== 2)
619 n
->Writemask
= WRITEMASK_XY
;
620 else if (n
->Store
->Size
== 3)
621 n
->Writemask
= WRITEMASK_XYZ
;
622 else if (n
->Store
->Size
== 1)
623 n
->Writemask
= WRITEMASK_X
<< GET_SWZ(n
->Store
->Swizzle
, 0);
626 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
628 dstAnnot
= storage_annotation(n
, emitInfo
->prog
);
630 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
, srcAnnot
[0],
631 srcAnnot
[1], srcAnnot
[2]);
633 /* really free temps now */
634 for (i
= 0; i
< 3; i
++)
636 free_node_storage(emitInfo
->vt
, temps
[i
]);
638 /*_mesa_print_instruction(inst);*/
644 * Emit code for == and != operators. These could normally be handled
645 * by emit_arith() except we need to be able to handle structure comparisons.
647 static struct prog_instruction
*
648 emit_compare(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
650 struct prog_instruction
*inst
;
653 assert(n
->Opcode
== IR_EQUAL
|| n
->Opcode
== IR_NOTEQUAL
);
655 /* gen code for children */
656 emit(emitInfo
, n
->Children
[0]);
657 emit(emitInfo
, n
->Children
[1]);
659 if (n
->Children
[0]->Store
->Size
!= n
->Children
[1]->Store
->Size
) {
660 slang_info_log_error(emitInfo
->log
, "invalid operands to == or !=");
664 /* final result is 1 bool */
665 if (!alloc_node_storage(emitInfo
, n
, 1))
668 size
= n
->Children
[0]->Store
->Size
;
671 gl_inst_opcode opcode
;
673 opcode
= n
->Opcode
== IR_EQUAL
? OPCODE_SEQ
: OPCODE_SNE
;
674 inst
= new_instruction(emitInfo
, opcode
);
675 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
676 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Store
);
677 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
679 else if (size
<= 4) {
681 gl_inst_opcode dotOp
;
682 slang_ir_storage tempStore
;
684 if (!alloc_local_temp(emitInfo
, &tempStore
, 4)) {
691 swizzle
= SWIZZLE_XYZW
;
693 else if (size
== 3) {
695 swizzle
= SWIZZLE_XYZW
;
700 swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
);
703 /* Compute inequality (temp = (A != B)) */
704 inst
= new_instruction(emitInfo
, OPCODE_SNE
);
705 storage_to_dst_reg(&inst
->DstReg
, &tempStore
, n
->Writemask
);
706 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
707 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Store
);
708 inst
->Comment
= _mesa_strdup("Compare values");
710 /* Compute val = DOT(temp, temp) (reduction) */
711 inst
= new_instruction(emitInfo
, dotOp
);
712 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
713 storage_to_src_reg(&inst
->SrcReg
[0], &tempStore
);
714 storage_to_src_reg(&inst
->SrcReg
[1], &tempStore
);
715 inst
->SrcReg
[0].Swizzle
= inst
->SrcReg
[1].Swizzle
= swizzle
; /*override*/
716 inst
->Comment
= _mesa_strdup("Reduce vec to bool");
718 _slang_free_temp(emitInfo
->vt
, &tempStore
); /* free temp */
720 if (n
->Opcode
== IR_EQUAL
) {
721 /* compute val = !val.x with SEQ val, val, 0; */
722 inst
= new_instruction(emitInfo
, OPCODE_SEQ
);
723 storage_to_src_reg(&inst
->SrcReg
[0], n
->Store
);
724 constant_to_src_reg(&inst
->SrcReg
[1], 0.0, emitInfo
);
725 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
726 inst
->Comment
= _mesa_strdup("Invert true/false");
730 /* size > 4, struct or array compare.
731 * XXX this won't work reliably for structs with padding!!
733 GLint i
, num
= (n
->Children
[0]->Store
->Size
+ 3) / 4;
734 slang_ir_storage accTemp
, sneTemp
;
736 if (!alloc_local_temp(emitInfo
, &accTemp
, 4))
739 if (!alloc_local_temp(emitInfo
, &sneTemp
, 4))
742 for (i
= 0; i
< num
; i
++) {
743 /* SNE sneTemp, left[i], right[i] */
744 inst
= new_instruction(emitInfo
, OPCODE_SNE
);
745 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
746 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Store
);
747 inst
->SrcReg
[0].Index
+= i
;
748 inst
->SrcReg
[1].Index
+= i
;
750 storage_to_dst_reg(&inst
->DstReg
, &accTemp
, WRITEMASK_XYZW
);
751 inst
->Comment
= _mesa_strdup("Begin struct/array comparison");
754 storage_to_dst_reg(&inst
->DstReg
, &sneTemp
, WRITEMASK_XYZW
);
756 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
757 inst
= new_instruction(emitInfo
, OPCODE_ADD
);
758 storage_to_dst_reg(&inst
->DstReg
, &accTemp
, WRITEMASK_XYZW
);
759 storage_to_src_reg(&inst
->SrcReg
[0], &accTemp
);
760 storage_to_src_reg(&inst
->SrcReg
[1], &sneTemp
);
764 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
765 inst
= new_instruction(emitInfo
, OPCODE_DP4
);
766 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
767 storage_to_src_reg(&inst
->SrcReg
[0], &accTemp
);
768 storage_to_src_reg(&inst
->SrcReg
[1], &accTemp
);
769 inst
->Comment
= _mesa_strdup("End struct/array comparison");
771 if (n
->Opcode
== IR_EQUAL
) {
772 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
773 inst
= new_instruction(emitInfo
, OPCODE_SEQ
);
774 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
775 storage_to_src_reg(&inst
->SrcReg
[0], n
->Store
);
776 constant_to_src_reg(&inst
->SrcReg
[1], 0.0, emitInfo
);
777 inst
->Comment
= _mesa_strdup("Invert true/false");
780 _slang_free_temp(emitInfo
->vt
, &accTemp
);
781 _slang_free_temp(emitInfo
->vt
, &sneTemp
);
785 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
786 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
794 * Generate code for an IR_CLAMP instruction.
796 static struct prog_instruction
*
797 emit_clamp(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
799 struct prog_instruction
*inst
;
800 slang_ir_node tmpNode
;
802 assert(n
->Opcode
== IR_CLAMP
);
808 inst
= emit(emitInfo
, n
->Children
[0]);
810 /* If lower limit == 0.0 and upper limit == 1.0,
811 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
813 * emit OPCODE_MIN, OPCODE_MAX sequence.
816 /* XXX this isn't quite finished yet */
817 if (n
->Children
[1]->Opcode
== IR_FLOAT
&&
818 n
->Children
[1]->Value
[0] == 0.0 &&
819 n
->Children
[1]->Value
[1] == 0.0 &&
820 n
->Children
[1]->Value
[2] == 0.0 &&
821 n
->Children
[1]->Value
[3] == 0.0 &&
822 n
->Children
[2]->Opcode
== IR_FLOAT
&&
823 n
->Children
[2]->Value
[0] == 1.0 &&
824 n
->Children
[2]->Value
[1] == 1.0 &&
825 n
->Children
[2]->Value
[2] == 1.0 &&
826 n
->Children
[2]->Value
[3] == 1.0) {
828 inst
= prev_instruction(prog
);
830 if (inst
&& inst
->Opcode
!= OPCODE_NOP
) {
831 /* and prev instruction's DstReg matches n->Children[0]->Store */
832 inst
->SaturateMode
= SATURATE_ZERO_ONE
;
833 n
->Store
= n
->Children
[0]->Store
;
839 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
842 emit(emitInfo
, n
->Children
[1]);
843 emit(emitInfo
, n
->Children
[2]);
845 /* Some GPUs don't allow reading from output registers. So if the
846 * dest for this clamp() is an output reg, we can't use that reg for
847 * the intermediate result. Use a temp register instead.
849 _mesa_bzero(&tmpNode
, sizeof(tmpNode
));
850 alloc_node_storage(emitInfo
, &tmpNode
, n
->Store
->Size
);
852 /* tmp = max(ch[0], ch[1]) */
853 inst
= new_instruction(emitInfo
, OPCODE_MAX
);
854 storage_to_dst_reg(&inst
->DstReg
, tmpNode
.Store
, n
->Writemask
);
855 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
856 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Store
);
858 /* n->dest = min(tmp, ch[2]) */
859 inst
= new_instruction(emitInfo
, OPCODE_MIN
);
860 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
861 storage_to_src_reg(&inst
->SrcReg
[0], tmpNode
.Store
);
862 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[2]->Store
);
864 free_node_storage(emitInfo
->vt
, &tmpNode
);
870 static struct prog_instruction
*
871 emit_negation(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
873 /* Implement as MOV dst, -src; */
874 /* XXX we could look at the previous instruction and in some circumstances
875 * modify it to accomplish the negation.
877 struct prog_instruction
*inst
;
879 emit(emitInfo
, n
->Children
[0]);
881 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
884 inst
= new_instruction(emitInfo
, OPCODE_MOV
);
885 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
886 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
887 inst
->SrcReg
[0].NegateBase
= NEGATE_XYZW
;
892 static struct prog_instruction
*
893 emit_label(slang_emit_info
*emitInfo
, const slang_ir_node
*n
)
897 /* XXX this fails in loop tail code - investigate someday */
898 assert(_slang_label_get_location(n
->Label
) < 0);
899 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
902 if (_slang_label_get_location(n
->Label
) < 0)
903 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
911 * Emit code for a function call.
912 * Note that for each time a function is called, we emit the function's
913 * body code again because the set of available registers may be different.
915 static struct prog_instruction
*
916 emit_fcall(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
918 struct gl_program
*progSave
;
919 struct prog_instruction
*inst
;
922 assert(n
->Opcode
== IR_CALL
);
925 /* save/push cur program */
926 progSave
= emitInfo
->prog
;
927 emitInfo
->prog
= new_subroutine(emitInfo
, &subroutineId
);
929 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
932 if (emitInfo
->EmitBeginEndSub
) {
933 /* BGNSUB isn't a real instruction.
934 * We require a label (i.e. "foobar:") though, if we're going to
935 * print the program in the NV format. The BNGSUB instruction is
936 * really just a NOP to attach the label to.
938 inst
= new_instruction(emitInfo
, OPCODE_BGNSUB
);
939 inst
->Comment
= _mesa_strdup(n
->Label
->Name
);
942 /* body of function: */
943 emit(emitInfo
, n
->Children
[0]);
944 n
->Store
= n
->Children
[0]->Store
;
946 /* add RET instruction now, if needed */
947 inst
= prev_instruction(emitInfo
);
948 if (inst
&& inst
->Opcode
!= OPCODE_RET
) {
949 inst
= new_instruction(emitInfo
, OPCODE_RET
);
952 if (emitInfo
->EmitBeginEndSub
) {
953 inst
= new_instruction(emitInfo
, OPCODE_ENDSUB
);
954 inst
->Comment
= _mesa_strdup(n
->Label
->Name
);
957 /* pop/restore cur program */
958 emitInfo
->prog
= progSave
;
960 /* emit the function call */
961 inst
= new_instruction(emitInfo
, OPCODE_CAL
);
962 /* The branch target is just the subroutine number (changed later) */
963 inst
->BranchTarget
= subroutineId
;
964 inst
->Comment
= _mesa_strdup(n
->Label
->Name
);
965 assert(inst
->BranchTarget
>= 0);
972 * Emit code for a 'return' statement.
974 static struct prog_instruction
*
975 emit_return(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
977 struct prog_instruction
*inst
;
979 assert(n
->Opcode
== IR_RETURN
);
981 inst
= new_instruction(emitInfo
, OPCODE_RET
);
982 inst
->DstReg
.CondMask
= COND_TR
; /* always return */
987 static struct prog_instruction
*
988 emit_kill(slang_emit_info
*emitInfo
)
990 struct gl_fragment_program
*fp
;
991 struct prog_instruction
*inst
;
992 /* NV-KILL - discard fragment depending on condition code.
993 * Note that ARB-KILL depends on sign of vector operand.
995 inst
= new_instruction(emitInfo
, OPCODE_KIL_NV
);
996 inst
->DstReg
.CondMask
= COND_TR
; /* always kill */
998 assert(emitInfo
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
);
999 fp
= (struct gl_fragment_program
*) emitInfo
->prog
;
1000 fp
->UsesKill
= GL_TRUE
;
1006 static struct prog_instruction
*
1007 emit_tex(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1009 struct prog_instruction
*inst
;
1011 (void) emit(emitInfo
, n
->Children
[1]);
1013 if (n
->Opcode
== IR_TEX
) {
1014 inst
= new_instruction(emitInfo
, OPCODE_TEX
);
1016 else if (n
->Opcode
== IR_TEXB
) {
1017 inst
= new_instruction(emitInfo
, OPCODE_TXB
);
1020 assert(n
->Opcode
== IR_TEXP
);
1021 inst
= new_instruction(emitInfo
, OPCODE_TXP
);
1024 if (!alloc_node_storage(emitInfo
, n
, 4))
1027 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
1029 /* Child[1] is the coord */
1030 assert(n
->Children
[1]->Store
->Index
>= 0);
1031 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Store
);
1033 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1034 assert(n
->Children
[0]->Store
);
1035 /* Store->Index is the sampler index */
1036 assert(n
->Children
[0]->Store
->Index
>= 0);
1037 /* Store->Size is the texture target */
1038 assert(n
->Children
[0]->Store
->Size
>= TEXTURE_1D_INDEX
);
1039 assert(n
->Children
[0]->Store
->Size
<= TEXTURE_RECT_INDEX
);
1041 inst
->TexSrcTarget
= n
->Children
[0]->Store
->Size
;
1043 inst
->TexSrcUnit
= 27; /* Dummy value; the TexSrcUnit will be computed at
1044 * link time, using the sampler uniform's value.
1046 inst
->Sampler
= n
->Children
[0]->Store
->Index
; /* i.e. uniform's index */
1048 inst
->TexSrcUnit
= n
->Children
[0]->Store
->Index
; /* i.e. uniform's index */
1057 static struct prog_instruction
*
1058 emit_copy(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1060 struct prog_instruction
*inst
;
1062 assert(n
->Opcode
== IR_COPY
);
1065 emit(emitInfo
, n
->Children
[0]);
1066 if (!n
->Children
[0]->Store
|| n
->Children
[0]->Store
->Index
< 0) {
1067 /* an error should have been already recorded */
1072 assert(n
->Children
[1]);
1073 inst
= emit(emitInfo
, n
->Children
[1]);
1075 if (!n
->Children
[1]->Store
|| n
->Children
[1]->Store
->Index
< 0) {
1076 if (!emitInfo
->log
->text
) {
1077 slang_info_log_error(emitInfo
->log
, "invalid assignment");
1082 assert(n
->Children
[1]->Store
->Index
>= 0);
1084 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1086 n
->Store
= n
->Children
[0]->Store
;
1088 if (n
->Store
->File
== PROGRAM_SAMPLER
) {
1089 /* no code generated for sampler assignments,
1090 * just copy the sampler index at compile time.
1092 n
->Store
->Index
= n
->Children
[1]->Store
->Index
;
1096 #if PEEPHOLE_OPTIMIZATIONS
1098 _slang_is_temp(emitInfo
->vt
, n
->Children
[1]->Store
) &&
1099 (inst
->DstReg
.File
== n
->Children
[1]->Store
->File
) &&
1100 (inst
->DstReg
.Index
== n
->Children
[1]->Store
->Index
)) {
1101 /* Peephole optimization:
1102 * The Right-Hand-Side has its results in a temporary place.
1103 * Modify the RHS (and the prev instruction) to store its results
1104 * in the destination specified by n->Children[0].
1105 * Then, this MOVE is a no-op.
1107 if (n
->Children
[1]->Opcode
!= IR_SWIZZLE
)
1108 _slang_free_temp(emitInfo
->vt
, n
->Children
[1]->Store
);
1109 *n
->Children
[1]->Store
= *n
->Children
[0]->Store
;
1111 /* fixup the previous instruction (which stored the RHS result) */
1112 assert(n
->Children
[0]->Store
->Index
>= 0);
1114 /* use tighter writemask when possible */
1115 if (n
->Writemask
== WRITEMASK_XYZW
)
1116 n
->Writemask
= inst
->DstReg
.WriteMask
;
1118 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
, n
->Writemask
);
1124 if (n
->Children
[0]->Store
->Size
> 4) {
1125 /* move matrix/struct etc (block of registers) */
1126 slang_ir_storage dstStore
= *n
->Children
[0]->Store
;
1127 slang_ir_storage srcStore
= *n
->Children
[1]->Store
;
1128 GLint size
= srcStore
.Size
;
1129 ASSERT(n
->Children
[0]->Writemask
== WRITEMASK_XYZW
);
1130 ASSERT(n
->Children
[1]->Store
->Swizzle
== SWIZZLE_NOOP
);
1134 inst
= new_instruction(emitInfo
, OPCODE_MOV
);
1135 inst
->Comment
= _mesa_strdup("IR_COPY block");
1136 storage_to_dst_reg(&inst
->DstReg
, &dstStore
, n
->Writemask
);
1137 storage_to_src_reg(&inst
->SrcReg
[0], &srcStore
);
1144 /* single register move */
1145 char *srcAnnot
, *dstAnnot
;
1146 inst
= new_instruction(emitInfo
, OPCODE_MOV
);
1147 assert(n
->Children
[0]->Store
->Index
>= 0);
1148 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
, n
->Writemask
);
1149 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Store
);
1150 dstAnnot
= storage_annotation(n
->Children
[0], emitInfo
->prog
);
1151 srcAnnot
= storage_annotation(n
->Children
[1], emitInfo
->prog
);
1152 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
,
1153 srcAnnot
, NULL
, NULL
);
1155 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
1162 * An IR_COND node wraps a boolean expression which is used by an
1163 * IF or WHILE test. This is where we'll set condition codes, if needed.
1165 static struct prog_instruction
*
1166 emit_cond(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1168 struct prog_instruction
*inst
;
1170 assert(n
->Opcode
== IR_COND
);
1172 if (!n
->Children
[0])
1175 /* emit code for the expression */
1176 inst
= emit(emitInfo
, n
->Children
[0]);
1178 if (!n
->Children
[0]->Store
) {
1179 /* error recovery */
1183 assert(n
->Children
[0]->Store
);
1184 /*assert(n->Children[0]->Store->Size == 1);*/
1186 if (emitInfo
->EmitCondCodes
) {
1188 n
->Children
[0]->Store
&&
1189 inst
->DstReg
.File
== n
->Children
[0]->Store
->File
&&
1190 inst
->DstReg
.Index
== n
->Children
[0]->Store
->Index
) {
1191 /* The previous instruction wrote to the register who's value
1192 * we're testing. Just fix that instruction so that the
1193 * condition codes are computed.
1195 inst
->CondUpdate
= GL_TRUE
;
1196 n
->Store
= n
->Children
[0]->Store
;
1200 /* This'll happen for things like "if (i) ..." where no code
1201 * is normally generated for the expression "i".
1202 * Generate a move instruction just to set condition codes.
1204 if (!alloc_node_storage(emitInfo
, n
, 1))
1206 inst
= new_instruction(emitInfo
, OPCODE_MOV
);
1207 inst
->CondUpdate
= GL_TRUE
;
1208 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
1209 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
1210 _slang_free_temp(emitInfo
->vt
, n
->Store
);
1211 inst
->Comment
= _mesa_strdup("COND expr");
1216 /* No-op: the boolean result of the expression is in a regular reg */
1217 n
->Store
= n
->Children
[0]->Store
;
1226 static struct prog_instruction
*
1227 emit_not(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1229 static const struct {
1230 gl_inst_opcode op
, opNot
;
1232 { OPCODE_SLT
, OPCODE_SGE
},
1233 { OPCODE_SLE
, OPCODE_SGT
},
1234 { OPCODE_SGT
, OPCODE_SLE
},
1235 { OPCODE_SGE
, OPCODE_SLT
},
1236 { OPCODE_SEQ
, OPCODE_SNE
},
1237 { OPCODE_SNE
, OPCODE_SEQ
},
1240 struct prog_instruction
*inst
;
1244 inst
= emit(emitInfo
, n
->Children
[0]);
1246 #if PEEPHOLE_OPTIMIZATIONS
1248 /* if the prev instruction was a comparison instruction, invert it */
1249 for (i
= 0; operators
[i
].op
; i
++) {
1250 if (inst
->Opcode
== operators
[i
].op
) {
1251 inst
->Opcode
= operators
[i
].opNot
;
1252 n
->Store
= n
->Children
[0]->Store
;
1259 /* else, invert using SEQ (v = v == 0) */
1260 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1263 inst
= new_instruction(emitInfo
, OPCODE_SEQ
);
1264 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
1265 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
1266 constant_to_src_reg(&inst
->SrcReg
[1], 0.0, emitInfo
);
1267 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
1269 inst
->Comment
= _mesa_strdup("NOT");
1274 static struct prog_instruction
*
1275 emit_if(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1277 struct gl_program
*prog
= emitInfo
->prog
;
1278 GLuint ifInstLoc
, elseInstLoc
= 0;
1279 GLuint condWritemask
= 0;
1281 /* emit condition expression code */
1283 struct prog_instruction
*inst
;
1284 inst
= emit(emitInfo
, n
->Children
[0]);
1285 if (emitInfo
->EmitCondCodes
) {
1287 /* error recovery */
1290 condWritemask
= inst
->DstReg
.WriteMask
;
1294 if (!n
->Children
[0]->Store
)
1298 assert(n
->Children
[0]->Store
->Size
== 1); /* a bool! */
1301 ifInstLoc
= prog
->NumInstructions
;
1302 if (emitInfo
->EmitHighLevelInstructions
) {
1303 struct prog_instruction
*ifInst
= new_instruction(emitInfo
, OPCODE_IF
);
1304 if (emitInfo
->EmitCondCodes
) {
1305 ifInst
->DstReg
.CondMask
= COND_NE
; /* if cond is non-zero */
1306 /* only test the cond code (1 of 4) that was updated by the
1307 * previous instruction.
1309 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1313 storage_to_src_reg(&ifInst
->SrcReg
[0], n
->Children
[0]->Store
);
1317 /* conditional jump to else, or endif */
1318 struct prog_instruction
*ifInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1319 ifInst
->DstReg
.CondMask
= COND_EQ
; /* BRA if cond is zero */
1320 ifInst
->Comment
= _mesa_strdup("if zero");
1321 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1325 emit(emitInfo
, n
->Children
[1]);
1327 if (n
->Children
[2]) {
1328 /* have else body */
1329 elseInstLoc
= prog
->NumInstructions
;
1330 if (emitInfo
->EmitHighLevelInstructions
) {
1331 (void) new_instruction(emitInfo
, OPCODE_ELSE
);
1334 /* jump to endif instruction */
1335 struct prog_instruction
*inst
;
1336 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1337 inst
->Comment
= _mesa_strdup("else");
1338 inst
->DstReg
.CondMask
= COND_TR
; /* always branch */
1340 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1341 emit(emitInfo
, n
->Children
[2]);
1345 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1348 if (emitInfo
->EmitHighLevelInstructions
) {
1349 (void) new_instruction(emitInfo
, OPCODE_ENDIF
);
1352 if (n
->Children
[2]) {
1353 prog
->Instructions
[elseInstLoc
].BranchTarget
= prog
->NumInstructions
;
1359 static struct prog_instruction
*
1360 emit_loop(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1362 struct gl_program
*prog
= emitInfo
->prog
;
1363 struct prog_instruction
*endInst
;
1364 GLuint beginInstLoc
, tailInstLoc
, endInstLoc
;
1367 /* emit OPCODE_BGNLOOP */
1368 beginInstLoc
= prog
->NumInstructions
;
1369 if (emitInfo
->EmitHighLevelInstructions
) {
1370 (void) new_instruction(emitInfo
, OPCODE_BGNLOOP
);
1374 emit(emitInfo
, n
->Children
[0]);
1377 tailInstLoc
= prog
->NumInstructions
;
1378 if (n
->Children
[1]) {
1379 if (emitInfo
->EmitComments
)
1380 emit_comment(emitInfo
, "Loop tail code:");
1381 emit(emitInfo
, n
->Children
[1]);
1384 endInstLoc
= prog
->NumInstructions
;
1385 if (emitInfo
->EmitHighLevelInstructions
) {
1386 /* emit OPCODE_ENDLOOP */
1387 endInst
= new_instruction(emitInfo
, OPCODE_ENDLOOP
);
1390 /* emit unconditional BRA-nch */
1391 endInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1392 endInst
->DstReg
.CondMask
= COND_TR
; /* always true */
1394 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1395 endInst
->BranchTarget
= beginInstLoc
;
1397 if (emitInfo
->EmitHighLevelInstructions
) {
1398 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1399 prog
->Instructions
[beginInstLoc
].BranchTarget
= prog
->NumInstructions
-1;
1402 /* Done emitting loop code. Now walk over the loop's linked list of
1403 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1404 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1406 for (ir
= n
->List
; ir
; ir
= ir
->List
) {
1407 struct prog_instruction
*inst
= prog
->Instructions
+ ir
->InstLocation
;
1408 assert(inst
->BranchTarget
< 0);
1409 if (ir
->Opcode
== IR_BREAK
||
1410 ir
->Opcode
== IR_BREAK_IF_TRUE
) {
1411 assert(inst
->Opcode
== OPCODE_BRK
||
1412 inst
->Opcode
== OPCODE_BRA
);
1413 /* go to instruction after end of loop */
1414 inst
->BranchTarget
= endInstLoc
+ 1;
1417 assert(ir
->Opcode
== IR_CONT
||
1418 ir
->Opcode
== IR_CONT_IF_TRUE
);
1419 assert(inst
->Opcode
== OPCODE_CONT
||
1420 inst
->Opcode
== OPCODE_BRA
);
1421 /* go to instruction at tail of loop */
1422 inst
->BranchTarget
= endInstLoc
;
1430 * Unconditional "continue" or "break" statement.
1431 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1433 static struct prog_instruction
*
1434 emit_cont_break(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1436 gl_inst_opcode opcode
;
1437 struct prog_instruction
*inst
;
1439 if (n
->Opcode
== IR_CONT
) {
1440 /* we need to execute the loop's tail code before doing CONT */
1442 assert(n
->Parent
->Opcode
== IR_LOOP
);
1443 if (n
->Parent
->Children
[1]) {
1444 /* emit tail code */
1445 if (emitInfo
->EmitComments
) {
1446 emit_comment(emitInfo
, "continue - tail code:");
1448 emit(emitInfo
, n
->Parent
->Children
[1]);
1452 /* opcode selection */
1453 if (emitInfo
->EmitHighLevelInstructions
) {
1454 opcode
= (n
->Opcode
== IR_CONT
) ? OPCODE_CONT
: OPCODE_BRK
;
1457 opcode
= OPCODE_BRA
;
1459 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1460 inst
= new_instruction(emitInfo
, opcode
);
1461 inst
->DstReg
.CondMask
= COND_TR
; /* always true */
1467 * Conditional "continue" or "break" statement.
1468 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1470 static struct prog_instruction
*
1471 emit_cont_break_if_true(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1473 struct prog_instruction
*inst
;
1475 assert(n
->Opcode
== IR_CONT_IF_TRUE
||
1476 n
->Opcode
== IR_BREAK_IF_TRUE
);
1478 /* evaluate condition expr, setting cond codes */
1479 inst
= emit(emitInfo
, n
->Children
[0]);
1480 if (emitInfo
->EmitCondCodes
) {
1482 inst
->CondUpdate
= GL_TRUE
;
1485 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1487 /* opcode selection */
1488 if (emitInfo
->EmitHighLevelInstructions
) {
1489 const gl_inst_opcode opcode
1490 = (n
->Opcode
== IR_CONT_IF_TRUE
) ? OPCODE_CONT
: OPCODE_BRK
;
1491 if (emitInfo
->EmitCondCodes
) {
1492 /* Get the writemask from the previous instruction which set
1493 * the condcodes. Use that writemask as the CondSwizzle.
1495 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1496 inst
= new_instruction(emitInfo
, opcode
);
1497 inst
->DstReg
.CondMask
= COND_NE
;
1498 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1507 ifInstLoc
= emitInfo
->prog
->NumInstructions
;
1508 inst
= new_instruction(emitInfo
, OPCODE_IF
);
1509 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
1510 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1512 inst
= new_instruction(emitInfo
, opcode
);
1513 inst
= new_instruction(emitInfo
, OPCODE_ENDIF
);
1515 emitInfo
->prog
->Instructions
[ifInstLoc
].BranchTarget
1516 = emitInfo
->prog
->NumInstructions
;
1521 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1522 assert(emitInfo
->EmitCondCodes
);
1523 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1524 inst
->DstReg
.CondMask
= COND_NE
;
1525 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1531 static struct prog_instruction
*
1532 emit_swizzle(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1534 struct prog_instruction
*inst
;
1536 inst
= emit(emitInfo
, n
->Children
[0]);
1538 /* setup storage info, if needed */
1539 if (!n
->Store
->Parent
)
1540 n
->Store
->Parent
= n
->Children
[0]->Store
;
1542 assert(n
->Store
->Parent
);
1549 * Dereference array element. Just resolve storage for the array
1550 * element represented by this node.
1552 static struct prog_instruction
*
1553 emit_array_element(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1555 slang_ir_storage
*root
;
1557 assert(n
->Opcode
== IR_ELEMENT
);
1559 assert(n
->Store
->File
== PROGRAM_UNDEFINED
);
1560 assert(n
->Store
->Parent
);
1561 assert(n
->Store
->Size
> 0);
1564 while (root
->Parent
)
1565 root
= root
->Parent
;
1567 if (root
->File
== PROGRAM_STATE_VAR
) {
1568 GLint index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1569 assert(n
->Store
->Index
== index
);
1573 /* do codegen for array */
1574 emit(emitInfo
, n
->Children
[0]);
1576 if (n
->Children
[1]->Opcode
== IR_FLOAT
) {
1577 /* Constant array index.
1578 * Set Store's index to be the offset of the array element in
1579 * the register file.
1581 const GLint element
= (GLint
) n
->Children
[1]->Value
[0];
1582 const GLint sz
= (n
->Store
->Size
+ 3) / 4; /* size in slots/registers */
1584 n
->Store
->Index
= sz
* element
;
1585 assert(n
->Store
->Parent
);
1588 /* Variable array index */
1589 struct prog_instruction
*inst
;
1590 slang_ir_storage dstStore
= *n
->Store
;
1592 /* do codegen for array index expression */
1593 emit(emitInfo
, n
->Children
[1]);
1595 inst
= new_instruction(emitInfo
, OPCODE_ARL
);
1597 if (dstStore
.Size
> 4)
1598 dstStore
.Size
= 4; /* only emit one instruction */
1600 storage_to_dst_reg(&inst
->DstReg
, &dstStore
, n
->Writemask
);
1601 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Store
);
1603 inst
->DstReg
.File
= PROGRAM_ADDRESS
;
1604 inst
->DstReg
.Index
= 0; /* always address register [0] */
1605 inst
->Comment
= _mesa_strdup("ARL ADDR");
1607 n
->Store
->RelAddr
= GL_TRUE
;
1610 /* if array element size is one, make sure we only access X */
1611 if (n
->Store
->Size
== 1)
1612 n
->Store
->Swizzle
= SWIZZLE_XXXX
;
1614 return NULL
; /* no instruction */
1619 * Resolve storage for accessing a structure field.
1621 static struct prog_instruction
*
1622 emit_struct_field(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1624 slang_ir_storage
*root
= n
->Store
;
1626 assert(n
->Opcode
== IR_FIELD
);
1628 while (root
->Parent
)
1629 root
= root
->Parent
;
1631 /* If this is the field of a state var, allocate constant/uniform
1632 * storage for it now if we haven't already.
1633 * Note that we allocate storage (uniform/constant slots) for state
1634 * variables here rather than at declaration time so we only allocate
1635 * space for the ones that we actually use!
1637 if (root
->File
== PROGRAM_STATE_VAR
) {
1638 root
->Index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1639 if (root
->Index
< 0) {
1640 slang_info_log_error(emitInfo
->log
, "Error parsing state variable");
1645 /* do codegen for struct */
1646 emit(emitInfo
, n
->Children
[0]);
1649 return NULL
; /* no instruction */
1654 * Emit code for a variable declaration.
1655 * This usually doesn't result in any code generation, but just
1656 * memory allocation.
1658 static struct prog_instruction
*
1659 emit_var_decl(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1661 struct prog_instruction
*inst
;
1664 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1665 assert(n
->Store
->Size
> 0);
1666 /*assert(n->Store->Index < 0);*/
1668 if (!n
->Var
|| n
->Var
->isTemp
) {
1669 /* a nameless/temporary variable, will be freed after first use */
1671 if (n
->Store
->Index
< 0 && !_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
1672 slang_info_log_error(emitInfo
->log
,
1673 "Ran out of registers, too many temporaries");
1678 /* a regular variable */
1679 _slang_add_variable(emitInfo
->vt
, n
->Var
);
1680 if (!_slang_alloc_var(emitInfo
->vt
, n
->Store
)) {
1681 slang_info_log_error(emitInfo
->log
,
1682 "Ran out of registers, too many variables");
1686 printf("IR_VAR_DECL %s %d store %p\n",
1687 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
1689 assert(n
->Var
->aux
== n
->Store
);
1691 if (emitInfo
->EmitComments
) {
1692 /* emit NOP with comment describing the variable's storage location */
1694 sprintf(s
, "TEMP[%d]%s = variable %s (size %d)",
1696 _mesa_swizzle_string(n
->Store
->Swizzle
, 0, GL_FALSE
),
1697 (n
->Var
? (char *) n
->Var
->a_name
: "anonymous"),
1699 inst
= emit_comment(emitInfo
, s
);
1707 * Emit code for a reference to a variable.
1708 * Actually, no code is generated but we may do some memory alloation.
1709 * In particular, state vars (uniforms) are allocated on an as-needed basis.
1711 static struct prog_instruction
*
1712 emit_var_ref(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1715 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1717 if (n
->Store
->File
== PROGRAM_STATE_VAR
&& n
->Store
->Index
< 0) {
1718 n
->Store
->Index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
);
1720 else if (n
->Store
->File
== PROGRAM_UNIFORM
) {
1721 /* mark var as used */
1722 _mesa_use_uniform(emitInfo
->prog
->Parameters
, (char *) n
->Var
->a_name
);
1725 if (n
->Store
->Index
< 0) {
1726 /* probably ran out of registers */
1729 assert(n
->Store
->Size
> 0);
1735 static struct prog_instruction
*
1736 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1738 struct prog_instruction
*inst
;
1742 if (emitInfo
->log
->error_flag
) {
1746 switch (n
->Opcode
) {
1748 /* sequence of two sub-trees */
1749 assert(n
->Children
[0]);
1750 assert(n
->Children
[1]);
1751 emit(emitInfo
, n
->Children
[0]);
1752 if (emitInfo
->log
->error_flag
)
1754 inst
= emit(emitInfo
, n
->Children
[1]);
1758 n
->Store
= n
->Children
[1]->Store
;
1762 /* new variable scope */
1763 _slang_push_var_table(emitInfo
->vt
);
1764 inst
= emit(emitInfo
, n
->Children
[0]);
1765 _slang_pop_var_table(emitInfo
->vt
);
1769 /* Variable declaration - allocate a register for it */
1770 inst
= emit_var_decl(emitInfo
, n
);
1774 /* Reference to a variable
1775 * Storage should have already been resolved/allocated.
1777 return emit_var_ref(emitInfo
, n
);
1780 return emit_array_element(emitInfo
, n
);
1782 return emit_struct_field(emitInfo
, n
);
1784 return emit_swizzle(emitInfo
, n
);
1786 /* Simple arithmetic */
1823 /* trinary operators */
1825 return emit_arith(emitInfo
, n
);
1829 return emit_compare(emitInfo
, n
);
1832 return emit_clamp(emitInfo
, n
);
1836 return emit_tex(emitInfo
, n
);
1838 return emit_negation(emitInfo
, n
);
1840 /* find storage location for this float constant */
1841 n
->Store
->Index
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
1844 &n
->Store
->Swizzle
);
1845 if (n
->Store
->Index
< 0) {
1846 slang_info_log_error(emitInfo
->log
, "Ran out of space for constants");
1852 return emit_copy(emitInfo
, n
);
1855 return emit_cond(emitInfo
, n
);
1858 return emit_not(emitInfo
, n
);
1861 return emit_label(emitInfo
, n
);
1864 return emit_kill(emitInfo
);
1867 /* new variable scope for subroutines/function calls */
1868 _slang_push_var_table(emitInfo
->vt
);
1869 inst
= emit_fcall(emitInfo
, n
);
1870 _slang_pop_var_table(emitInfo
->vt
);
1874 return emit_if(emitInfo
, n
);
1877 return emit_loop(emitInfo
, n
);
1878 case IR_BREAK_IF_TRUE
:
1879 case IR_CONT_IF_TRUE
:
1880 return emit_cont_break_if_true(emitInfo
, n
);
1884 return emit_cont_break(emitInfo
, n
);
1887 return new_instruction(emitInfo
, OPCODE_BGNSUB
);
1889 return new_instruction(emitInfo
, OPCODE_ENDSUB
);
1891 return emit_return(emitInfo
, n
);
1897 _mesa_problem(NULL
, "Unexpected IR opcode in emit()\n");
1904 * After code generation, any subroutines will be in separate program
1905 * objects. This function appends all the subroutines onto the main
1906 * program and resolves the linking of all the branch/call instructions.
1907 * XXX this logic should really be part of the linking process...
1910 _slang_resolve_subroutines(slang_emit_info
*emitInfo
)
1912 GET_CURRENT_CONTEXT(ctx
);
1913 struct gl_program
*mainP
= emitInfo
->prog
;
1914 GLuint
*subroutineLoc
, i
, total
;
1917 = (GLuint
*) _mesa_malloc(emitInfo
->NumSubroutines
* sizeof(GLuint
));
1919 /* total number of instructions */
1920 total
= mainP
->NumInstructions
;
1921 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
1922 subroutineLoc
[i
] = total
;
1923 total
+= emitInfo
->Subroutines
[i
]->NumInstructions
;
1926 /* adjust BrancTargets within the functions */
1927 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
1928 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
1930 for (j
= 0; j
< sub
->NumInstructions
; j
++) {
1931 struct prog_instruction
*inst
= sub
->Instructions
+ j
;
1932 if (inst
->Opcode
!= OPCODE_CAL
&& inst
->BranchTarget
>= 0) {
1933 inst
->BranchTarget
+= subroutineLoc
[i
];
1938 /* append subroutines' instructions after main's instructions */
1939 mainP
->Instructions
= _mesa_realloc_instructions(mainP
->Instructions
,
1940 mainP
->NumInstructions
,
1942 mainP
->NumInstructions
= total
;
1943 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
1944 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
1945 _mesa_copy_instructions(mainP
->Instructions
+ subroutineLoc
[i
],
1947 sub
->NumInstructions
);
1948 /* delete subroutine code */
1949 sub
->Parameters
= NULL
; /* prevent double-free */
1950 _mesa_reference_program(ctx
, &emitInfo
->Subroutines
[i
], NULL
);
1953 /* free subroutine list */
1954 if (emitInfo
->Subroutines
) {
1955 _mesa_free(emitInfo
->Subroutines
);
1956 emitInfo
->Subroutines
= NULL
;
1958 emitInfo
->NumSubroutines
= 0;
1960 /* Examine CAL instructions.
1961 * At this point, the BranchTarget field of the CAL instruction is
1962 * the number/id of the subroutine to call (an index into the
1963 * emitInfo->Subroutines list).
1964 * Translate that into an actual instruction location now.
1966 for (i
= 0; i
< mainP
->NumInstructions
; i
++) {
1967 struct prog_instruction
*inst
= mainP
->Instructions
+ i
;
1968 if (inst
->Opcode
== OPCODE_CAL
) {
1969 const GLuint f
= inst
->BranchTarget
;
1970 inst
->BranchTarget
= subroutineLoc
[f
];
1974 _mesa_free(subroutineLoc
);
1981 _slang_emit_code(slang_ir_node
*n
, slang_var_table
*vt
,
1982 struct gl_program
*prog
, GLboolean withEnd
,
1983 slang_info_log
*log
)
1985 GET_CURRENT_CONTEXT(ctx
);
1987 slang_emit_info emitInfo
;
1992 emitInfo
.prog
= prog
;
1993 emitInfo
.Subroutines
= NULL
;
1994 emitInfo
.NumSubroutines
= 0;
1996 emitInfo
.EmitHighLevelInstructions
= ctx
->Shader
.EmitHighLevelInstructions
;
1997 emitInfo
.EmitCondCodes
= ctx
->Shader
.EmitCondCodes
;
1998 emitInfo
.EmitComments
= ctx
->Shader
.EmitComments
;
1999 emitInfo
.EmitBeginEndSub
= GL_TRUE
;
2001 if (!emitInfo
.EmitCondCodes
) {
2002 emitInfo
.EmitHighLevelInstructions
= GL_TRUE
;
2005 /* Check uniform/constant limits */
2006 if (prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
2007 maxUniforms
= ctx
->Const
.FragmentProgram
.MaxUniformComponents
/ 4;
2010 assert(prog
->Target
== GL_VERTEX_PROGRAM_ARB
);
2011 maxUniforms
= ctx
->Const
.VertexProgram
.MaxUniformComponents
/ 4;
2013 if (prog
->Parameters
->NumParameters
> maxUniforms
) {
2014 slang_info_log_error(log
, "Constant/uniform register limit exceeded");
2018 (void) emit(&emitInfo
, n
);
2020 /* finish up by adding the END opcode to program */
2022 struct prog_instruction
*inst
;
2023 inst
= new_instruction(&emitInfo
, OPCODE_END
);
2026 _slang_resolve_subroutines(&emitInfo
);
2031 printf("*********** End emit code (%u inst):\n", prog
->NumInstructions
);
2032 _mesa_print_program(prog
);
2033 _mesa_print_program_parameters(ctx
,prog
);