2 * Mesa 3-D graphics library
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Emit program instructions (PI code) from IR trees.
34 *** To emit GPU instructions, we basically just do an in-order traversal
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
51 #define PEEPHOLE_OPTIMIZATIONS 1
59 struct gl_program
*prog
;
60 struct gl_program
**Subroutines
;
61 GLuint NumSubroutines
;
63 GLuint MaxInstructions
; /**< size of prog->Instructions[] buffer */
65 /* code-gen options */
66 GLboolean EmitHighLevelInstructions
;
67 GLboolean EmitCondCodes
;
68 GLboolean EmitComments
;
69 GLboolean EmitBeginEndSub
; /* XXX TEMPORARY */
74 static struct gl_program
*
75 new_subroutine(slang_emit_info
*emitInfo
, GLuint
*id
)
77 GET_CURRENT_CONTEXT(ctx
);
78 const GLuint n
= emitInfo
->NumSubroutines
;
80 emitInfo
->Subroutines
= (struct gl_program
**)
81 _mesa_realloc(emitInfo
->Subroutines
,
82 n
* sizeof(struct gl_program
),
83 (n
+ 1) * sizeof(struct gl_program
));
84 emitInfo
->Subroutines
[n
] = ctx
->Driver
.NewProgram(ctx
, emitInfo
->prog
->Target
, 0);
85 emitInfo
->Subroutines
[n
]->Parameters
= emitInfo
->prog
->Parameters
;
86 emitInfo
->NumSubroutines
++;
88 return emitInfo
->Subroutines
[n
];
93 * Convert a writemask to a swizzle. Used for testing cond codes because
94 * we only want to test the cond code component(s) that was set by the
95 * previous instruction.
98 writemask_to_swizzle(GLuint writemask
)
100 if (writemask
== WRITEMASK_X
)
102 if (writemask
== WRITEMASK_Y
)
104 if (writemask
== WRITEMASK_Z
)
106 if (writemask
== WRITEMASK_W
)
108 return SWIZZLE_XYZW
; /* shouldn't be hit */
113 * Convert a swizzle mask to a writemask.
114 * Note that the slang_ir_storage->Swizzle field can represent either a
115 * swizzle mask or a writemask, depending on how it's used. For example,
116 * when we parse "direction.yz" alone, we don't know whether .yz is a
117 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
118 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
119 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
120 * used as an L-value, we convert it to a writemask.
123 swizzle_to_writemask(GLuint swizzle
)
125 GLuint i
, writemask
= 0x0;
126 for (i
= 0; i
< 4; i
++) {
127 GLuint swz
= GET_SWZ(swizzle
, i
);
128 if (swz
<= SWIZZLE_W
) {
129 writemask
|= (1 << swz
);
137 * Swizzle a swizzle (function composition).
138 * That is, return swz2(swz1), or said another way: swz1.szw2
139 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
142 _slang_swizzle_swizzle(GLuint swz1
, GLuint swz2
)
145 for (i
= 0; i
< 4; i
++) {
146 GLuint c
= GET_SWZ(swz2
, i
);
148 s
[i
] = GET_SWZ(swz1
, c
);
152 swz
= MAKE_SWIZZLE4(s
[0], s
[1], s
[2], s
[3]);
158 * Return the default swizzle mask for accessing a variable of the
159 * given size (in floats). If size = 1, comp is used to identify
160 * which component [0..3] of the register holds the variable.
163 _slang_var_swizzle(GLint size
, GLint comp
)
167 return MAKE_SWIZZLE4(comp
, comp
, comp
, comp
);
169 return MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_NIL
, SWIZZLE_NIL
);
171 return MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_NIL
);
180 * Allocate storage for the given node (if it hasn't already been allocated).
182 * Typically this is temporary storage for an intermediate result (such as
183 * for a multiply or add, etc).
185 * If n->Store does not exist it will be created and will be of the size
186 * specified by defaultSize.
189 alloc_node_storage(slang_emit_info
*emitInfo
, slang_ir_node
*n
,
194 assert(defaultSize
> 0);
195 n
->Store
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, defaultSize
);
198 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
199 if (n
->Store
->Index
< 0) {
200 if (!_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
201 slang_info_log_error(emitInfo
->log
,
202 "Ran out of registers, too many temporaries");
203 _slang_free(n
->Store
);
213 * Free temporary storage, if n->Store is, in fact, temp storage.
217 free_node_storage(slang_var_table
*vt
, slang_ir_node
*n
)
219 if (n
->Store
->File
== PROGRAM_TEMPORARY
&&
220 n
->Store
->Index
>= 0 &&
221 n
->Opcode
!= IR_SWIZZLE
) {
222 if (_slang_is_temp(vt
, n
->Store
)) {
223 _slang_free_temp(vt
, n
->Store
);
224 n
->Store
->Index
= -1;
225 n
->Store
= NULL
; /* XXX this may not be needed */
232 * Helper function to allocate a short-term temporary.
233 * Free it with _slang_free_temp().
236 alloc_local_temp(slang_emit_info
*emitInfo
, slang_ir_storage
*temp
, GLint size
)
240 _mesa_bzero(temp
, sizeof(*temp
));
242 temp
->File
= PROGRAM_TEMPORARY
;
244 return _slang_alloc_temp(emitInfo
->vt
, temp
);
249 * Remove any SWIZZLE_NIL terms from given swizzle mask.
250 * For a swizzle like .z??? generate .zzzz (replicate single component).
251 * Else, for .wx?? generate .wxzw (insert default component for the position).
254 fix_swizzle(GLuint swizzle
)
256 GLuint c0
= GET_SWZ(swizzle
, 0),
257 c1
= GET_SWZ(swizzle
, 1),
258 c2
= GET_SWZ(swizzle
, 2),
259 c3
= GET_SWZ(swizzle
, 3);
260 if (c1
== SWIZZLE_NIL
&& c2
== SWIZZLE_NIL
&& c3
== SWIZZLE_NIL
) {
261 /* smear first component across all positions */
265 /* insert default swizzle components */
266 if (c0
== SWIZZLE_NIL
)
268 if (c1
== SWIZZLE_NIL
)
270 if (c2
== SWIZZLE_NIL
)
272 if (c3
== SWIZZLE_NIL
)
275 return MAKE_SWIZZLE4(c0
, c1
, c2
, c3
);
281 * Convert IR storage to an instruction dst register.
284 storage_to_dst_reg(struct prog_dst_register
*dst
, const slang_ir_storage
*st
)
286 const GLboolean relAddr
= st
->RelAddr
;
287 const GLint size
= st
->Size
;
288 GLint index
= st
->Index
;
289 GLuint swizzle
= st
->Swizzle
;
292 /* if this is storage relative to some parent storage, walk up the tree */
295 assert(st
->Index
>= 0);
297 swizzle
= _slang_swizzle_swizzle(st
->Swizzle
, swizzle
);
300 assert(st
->File
!= PROGRAM_UNDEFINED
);
301 dst
->File
= st
->File
;
309 if (swizzle
!= SWIZZLE_XYZW
) {
310 dst
->WriteMask
= swizzle_to_writemask(swizzle
);
315 dst
->WriteMask
= WRITEMASK_X
<< GET_SWZ(st
->Swizzle
, 0);
318 dst
->WriteMask
= WRITEMASK_XY
;
321 dst
->WriteMask
= WRITEMASK_XYZ
;
324 dst
->WriteMask
= WRITEMASK_XYZW
;
327 ; /* error would have been caught above */
331 dst
->RelAddr
= relAddr
;
336 * Convert IR storage to an instruction src register.
339 storage_to_src_reg(struct prog_src_register
*src
, const slang_ir_storage
*st
)
341 const GLboolean relAddr
= st
->RelAddr
;
342 GLint index
= st
->Index
;
343 GLuint swizzle
= st
->Swizzle
;
345 /* if this is storage relative to some parent storage, walk up the tree */
350 /* an error should have been reported already */
353 assert(st
->Index
>= 0);
355 swizzle
= _slang_swizzle_swizzle(fix_swizzle(st
->Swizzle
), swizzle
);
358 assert(st
->File
>= 0);
359 #if 1 /* XXX temporary */
360 if (st
->File
== PROGRAM_UNDEFINED
) {
361 slang_ir_storage
*st0
= (slang_ir_storage
*) st
;
362 st0
->File
= PROGRAM_TEMPORARY
;
365 assert(st
->File
< PROGRAM_UNDEFINED
);
366 src
->File
= st
->File
;
371 swizzle
= fix_swizzle(swizzle
);
372 assert(GET_SWZ(swizzle
, 0) <= SWIZZLE_W
);
373 assert(GET_SWZ(swizzle
, 1) <= SWIZZLE_W
);
374 assert(GET_SWZ(swizzle
, 2) <= SWIZZLE_W
);
375 assert(GET_SWZ(swizzle
, 3) <= SWIZZLE_W
);
376 src
->Swizzle
= swizzle
;
378 src
->RelAddr
= relAddr
;
383 * Setup storage pointing to a scalar constant/literal.
386 constant_to_storage(slang_emit_info
*emitInfo
,
388 slang_ir_storage
*store
)
395 reg
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
398 memset(store
, 0, sizeof(*store
));
399 store
->File
= PROGRAM_CONSTANT
;
401 store
->Swizzle
= swizzle
;
406 * Add new instruction at end of given program.
407 * \param prog the program to append instruction onto
408 * \param opcode opcode for the new instruction
409 * \return pointer to the new instruction
411 static struct prog_instruction
*
412 new_instruction(slang_emit_info
*emitInfo
, gl_inst_opcode opcode
)
414 struct gl_program
*prog
= emitInfo
->prog
;
415 struct prog_instruction
*inst
;
418 /* print prev inst */
419 if (prog
->NumInstructions
> 0) {
420 _mesa_print_instruction(prog
->Instructions
+ prog
->NumInstructions
- 1);
423 assert(prog
->NumInstructions
<= emitInfo
->MaxInstructions
);
425 if (prog
->NumInstructions
== emitInfo
->MaxInstructions
) {
426 /* grow the instruction buffer */
427 emitInfo
->MaxInstructions
+= 20;
429 _mesa_realloc_instructions(prog
->Instructions
,
430 prog
->NumInstructions
,
431 emitInfo
->MaxInstructions
);
434 inst
= prog
->Instructions
+ prog
->NumInstructions
;
435 prog
->NumInstructions
++;
436 _mesa_init_instructions(inst
, 1);
437 inst
->Opcode
= opcode
;
438 inst
->BranchTarget
= -1; /* invalid */
440 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
441 _mesa_opcode_string(inst->Opcode));
447 static struct prog_instruction
*
448 emit_arl_load(slang_emit_info
*emitInfo
,
449 enum register_file file
, GLint index
, GLuint swizzle
)
451 struct prog_instruction
*inst
= new_instruction(emitInfo
, OPCODE_ARL
);
452 inst
->SrcReg
[0].File
= file
;
453 inst
->SrcReg
[0].Index
= index
;
454 inst
->SrcReg
[0].Swizzle
= swizzle
;
455 inst
->DstReg
.File
= PROGRAM_ADDRESS
;
456 inst
->DstReg
.Index
= 0;
457 inst
->DstReg
.WriteMask
= WRITEMASK_X
;
463 * Emit a new instruction with given opcode, operands.
464 * At this point the instruction may have multiple indirect register
465 * loads/stores. We convert those into ARL loads and address-relative
466 * operands. See comments inside.
467 * At some point in the future we could directly emit indirectly addressed
468 * registers in Mesa GPU instructions.
470 static struct prog_instruction
*
471 emit_instruction(slang_emit_info
*emitInfo
,
472 gl_inst_opcode opcode
,
473 const slang_ir_storage
*dst
,
474 const slang_ir_storage
*src0
,
475 const slang_ir_storage
*src1
,
476 const slang_ir_storage
*src2
)
478 struct prog_instruction
*inst
;
479 GLuint numIndirect
= 0;
480 const slang_ir_storage
*src
[3];
481 slang_ir_storage newSrc
[3], newDst
;
485 isTemp
[0] = isTemp
[1] = isTemp
[2] = GL_FALSE
;
491 /* count up how many operands are indirect loads */
492 for (i
= 0; i
< 3; i
++) {
493 if (src
[i
] && src
[i
]->IsIndirect
)
496 if (dst
&& dst
->IsIndirect
)
499 /* Take special steps for indirect register loads.
500 * If we had multiple address registers this would be simpler.
501 * For example, this GLSL code:
502 * x[i] = y[j] + z[k];
503 * would translate into something like:
507 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
508 * But since we currently only have one address register we have to do this:
510 * MOV t1, TEMP[ADDR.x+9];
512 * MOV t2, TEMP[ADDR.x+4];
514 * ADD TEMP[ADDR.x+5], t1, t2;
515 * The code here figures this out...
517 if (numIndirect
> 0) {
518 for (i
= 0; i
< 3; i
++) {
519 if (src
[i
] && src
[i
]->IsIndirect
) {
520 /* load the ARL register with the indirect register */
521 emit_arl_load(emitInfo
,
522 src
[i
]->IndirectFile
,
523 src
[i
]->IndirectIndex
,
524 src
[i
]->IndirectSwizzle
);
526 if (numIndirect
> 1) {
527 /* Need to load src[i] into a temporary register */
528 slang_ir_storage srcRelAddr
;
529 alloc_local_temp(emitInfo
, &newSrc
[i
], src
[i
]->Size
);
532 /* set RelAddr flag on src register */
533 srcRelAddr
= *src
[i
];
534 srcRelAddr
.RelAddr
= GL_TRUE
;
535 srcRelAddr
.IsIndirect
= GL_FALSE
; /* not really needed */
537 /* MOV newSrc, srcRelAddr; */
538 inst
= emit_instruction(emitInfo
,
548 /* just rewrite the src[i] storage to be ARL-relative */
550 newSrc
[i
].RelAddr
= GL_TRUE
;
551 newSrc
[i
].IsIndirect
= GL_FALSE
; /* not really needed */
558 /* Take special steps for indirect dest register write */
559 if (dst
&& dst
->IsIndirect
) {
560 /* load the ARL register with the indirect register */
561 emit_arl_load(emitInfo
,
564 dst
->IndirectSwizzle
);
566 newDst
.RelAddr
= GL_TRUE
;
567 newDst
.IsIndirect
= GL_FALSE
;
571 /* OK, emit the instruction and its dst, src regs */
572 inst
= new_instruction(emitInfo
, opcode
);
577 storage_to_dst_reg(&inst
->DstReg
, dst
);
579 for (i
= 0; i
< 3; i
++) {
581 storage_to_src_reg(&inst
->SrcReg
[i
], src
[i
]);
584 /* Free any temp registers that we allocated above */
585 for (i
= 0; i
< 3; i
++) {
587 _slang_free_temp(emitInfo
->vt
, &newSrc
[i
]);
596 * Put a comment on the given instruction.
599 inst_comment(struct prog_instruction
*inst
, const char *comment
)
602 inst
->Comment
= _mesa_strdup(comment
);
608 * Return pointer to last instruction in program.
610 static struct prog_instruction
*
611 prev_instruction(slang_emit_info
*emitInfo
)
613 struct gl_program
*prog
= emitInfo
->prog
;
614 if (prog
->NumInstructions
== 0)
617 return prog
->Instructions
+ prog
->NumInstructions
- 1;
621 static struct prog_instruction
*
622 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
);
626 * Return an annotation string for given node's storage.
629 storage_annotation(const slang_ir_node
*n
, const struct gl_program
*prog
)
632 const slang_ir_storage
*st
= n
->Store
;
633 static char s
[100] = "";
636 return _mesa_strdup("");
639 case PROGRAM_CONSTANT
:
640 if (st
->Index
>= 0) {
641 const GLfloat
*val
= prog
->Parameters
->ParameterValues
[st
->Index
];
642 if (st
->Swizzle
== SWIZZLE_NOOP
)
643 sprintf(s
, "{%g, %g, %g, %g}", val
[0], val
[1], val
[2], val
[3]);
645 sprintf(s
, "%g", val
[GET_SWZ(st
->Swizzle
, 0)]);
649 case PROGRAM_TEMPORARY
:
651 sprintf(s
, "%s", (char *) n
->Var
->a_name
);
653 sprintf(s
, "t[%d]", st
->Index
);
655 case PROGRAM_STATE_VAR
:
656 case PROGRAM_UNIFORM
:
657 sprintf(s
, "%s", prog
->Parameters
->Parameters
[st
->Index
].Name
);
659 case PROGRAM_VARYING
:
660 sprintf(s
, "%s", prog
->Varying
->Parameters
[st
->Index
].Name
);
663 sprintf(s
, "input[%d]", st
->Index
);
666 sprintf(s
, "output[%d]", st
->Index
);
671 return _mesa_strdup(s
);
679 * Return an annotation string for an instruction.
682 instruction_annotation(gl_inst_opcode opcode
, char *dstAnnot
,
683 char *srcAnnot0
, char *srcAnnot1
, char *srcAnnot2
)
686 const char *operator;
691 len
+= strlen(dstAnnot
);
693 dstAnnot
= _mesa_strdup("");
696 len
+= strlen(srcAnnot0
);
698 srcAnnot0
= _mesa_strdup("");
701 len
+= strlen(srcAnnot1
);
703 srcAnnot1
= _mesa_strdup("");
706 len
+= strlen(srcAnnot2
);
708 srcAnnot2
= _mesa_strdup("");
742 s
= (char *) malloc(len
);
743 sprintf(s
, "%s = %s %s %s %s", dstAnnot
,
744 srcAnnot0
, operator, srcAnnot1
, srcAnnot2
);
745 assert(_mesa_strlen(s
) < len
);
760 * Emit an instruction that's just a comment.
762 static struct prog_instruction
*
763 emit_comment(slang_emit_info
*emitInfo
, const char *comment
)
765 struct prog_instruction
*inst
= new_instruction(emitInfo
, OPCODE_NOP
);
766 inst_comment(inst
, comment
);
772 * Generate code for a simple arithmetic instruction.
773 * Either 1, 2 or 3 operands.
775 static struct prog_instruction
*
776 emit_arith(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
778 const slang_ir_info
*info
= _slang_ir_info(n
->Opcode
);
779 struct prog_instruction
*inst
;
783 assert(info
->InstOpcode
!= OPCODE_NOP
);
785 #if PEEPHOLE_OPTIMIZATIONS
786 /* Look for MAD opportunity */
787 if (info
->NumParams
== 2 &&
788 n
->Opcode
== IR_ADD
&& n
->Children
[0]->Opcode
== IR_MUL
) {
789 /* found pattern IR_ADD(IR_MUL(A, B), C) */
790 emit(emitInfo
, n
->Children
[0]->Children
[0]); /* A */
791 emit(emitInfo
, n
->Children
[0]->Children
[1]); /* B */
792 emit(emitInfo
, n
->Children
[1]); /* C */
793 alloc_node_storage(emitInfo
, n
, -1); /* dest */
795 inst
= emit_instruction(emitInfo
,
798 n
->Children
[0]->Children
[0]->Store
,
799 n
->Children
[0]->Children
[1]->Store
,
800 n
->Children
[1]->Store
);
802 free_node_storage(emitInfo
->vt
, n
->Children
[0]->Children
[0]);
803 free_node_storage(emitInfo
->vt
, n
->Children
[0]->Children
[1]);
804 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
808 if (info
->NumParams
== 2 &&
809 n
->Opcode
== IR_ADD
&& n
->Children
[1]->Opcode
== IR_MUL
) {
810 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
811 emit(emitInfo
, n
->Children
[0]); /* A */
812 emit(emitInfo
, n
->Children
[1]->Children
[0]); /* B */
813 emit(emitInfo
, n
->Children
[1]->Children
[1]); /* C */
814 alloc_node_storage(emitInfo
, n
, -1); /* dest */
816 inst
= emit_instruction(emitInfo
,
819 n
->Children
[1]->Children
[0]->Store
,
820 n
->Children
[1]->Children
[1]->Store
,
821 n
->Children
[0]->Store
);
823 free_node_storage(emitInfo
->vt
, n
->Children
[1]->Children
[0]);
824 free_node_storage(emitInfo
->vt
, n
->Children
[1]->Children
[1]);
825 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
830 /* gen code for children, may involve temp allocation */
831 for (i
= 0; i
< info
->NumParams
; i
++) {
832 emit(emitInfo
, n
->Children
[i
]);
833 if (!n
->Children
[i
] || !n
->Children
[i
]->Store
) {
840 alloc_node_storage(emitInfo
, n
, -1);
842 inst
= emit_instruction(emitInfo
,
845 (info
->NumParams
> 0 ? n
->Children
[0]->Store
: NULL
),
846 (info
->NumParams
> 1 ? n
->Children
[1]->Store
: NULL
),
847 (info
->NumParams
> 2 ? n
->Children
[2]->Store
: NULL
)
851 for (i
= 0; i
< info
->NumParams
; i
++)
852 free_node_storage(emitInfo
->vt
, n
->Children
[i
]);
859 * Emit code for == and != operators. These could normally be handled
860 * by emit_arith() except we need to be able to handle structure comparisons.
862 static struct prog_instruction
*
863 emit_compare(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
865 struct prog_instruction
*inst
= NULL
;
868 assert(n
->Opcode
== IR_EQUAL
|| n
->Opcode
== IR_NOTEQUAL
);
870 /* gen code for children */
871 emit(emitInfo
, n
->Children
[0]);
872 emit(emitInfo
, n
->Children
[1]);
874 if (n
->Children
[0]->Store
->Size
!= n
->Children
[1]->Store
->Size
) {
875 slang_info_log_error(emitInfo
->log
, "invalid operands to == or !=");
879 /* final result is 1 bool */
880 if (!alloc_node_storage(emitInfo
, n
, 1))
883 size
= n
->Children
[0]->Store
->Size
;
886 gl_inst_opcode opcode
= n
->Opcode
== IR_EQUAL
? OPCODE_SEQ
: OPCODE_SNE
;
887 inst
= emit_instruction(emitInfo
,
890 n
->Children
[0]->Store
,
891 n
->Children
[1]->Store
,
894 else if (size
<= 4) {
895 /* compare two vectors.
896 * Unfortunately, there's no instruction to compare vectors and
897 * return a scalar result. Do it with some compare and dot product
901 gl_inst_opcode dotOp
;
902 slang_ir_storage tempStore
;
904 if (!alloc_local_temp(emitInfo
, &tempStore
, 4)) {
911 swizzle
= SWIZZLE_XYZW
;
913 else if (size
== 3) {
915 swizzle
= SWIZZLE_XYZW
;
919 dotOp
= OPCODE_DP3
; /* XXX use OPCODE_DP2 eventually */
920 swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
);
923 /* Compute inequality (temp = (A != B)) */
924 inst
= emit_instruction(emitInfo
,
927 n
->Children
[0]->Store
,
928 n
->Children
[1]->Store
,
930 inst_comment(inst
, "Compare values");
932 /* Compute val = DOT(temp, temp) (reduction) */
933 inst
= emit_instruction(emitInfo
,
939 inst
->SrcReg
[0].Swizzle
= inst
->SrcReg
[1].Swizzle
= swizzle
; /*override*/
940 inst_comment(inst
, "Reduce vec to bool");
942 _slang_free_temp(emitInfo
->vt
, &tempStore
); /* free temp */
944 if (n
->Opcode
== IR_EQUAL
) {
945 /* compute val = !val.x with SEQ val, val, 0; */
946 slang_ir_storage zero
;
947 constant_to_storage(emitInfo
, 0.0, &zero
);
948 inst
= emit_instruction(emitInfo
,
954 inst_comment(inst
, "Invert true/false");
958 /* size > 4, struct or array compare.
959 * XXX this won't work reliably for structs with padding!!
961 GLint i
, num
= (n
->Children
[0]->Store
->Size
+ 3) / 4;
962 slang_ir_storage accTemp
, sneTemp
;
964 if (!alloc_local_temp(emitInfo
, &accTemp
, 4))
967 if (!alloc_local_temp(emitInfo
, &sneTemp
, 4))
970 for (i
= 0; i
< num
; i
++) {
971 slang_ir_storage srcStore0
= *n
->Children
[0]->Store
;
972 slang_ir_storage srcStore1
= *n
->Children
[1]->Store
;
973 srcStore0
.Index
+= i
;
974 srcStore1
.Index
+= i
;
977 /* SNE accTemp, left[i], right[i] */
978 inst
= emit_instruction(emitInfo
, OPCODE_SNE
,
983 inst_comment(inst
, "Begin struct/array comparison");
986 /* SNE sneTemp, left[i], right[i] */
987 inst
= emit_instruction(emitInfo
, OPCODE_SNE
,
992 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
993 inst
= emit_instruction(emitInfo
, OPCODE_ADD
,
1001 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1002 inst
= emit_instruction(emitInfo
, OPCODE_DP4
,
1007 inst_comment(inst
, "End struct/array comparison");
1009 if (n
->Opcode
== IR_EQUAL
) {
1010 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1011 slang_ir_storage zero
;
1012 constant_to_storage(emitInfo
, 0.0, &zero
);
1013 inst
= emit_instruction(emitInfo
, OPCODE_SEQ
,
1014 n
->Store
, /* dest */
1018 inst_comment(inst
, "Invert true/false");
1021 _slang_free_temp(emitInfo
->vt
, &accTemp
);
1022 _slang_free_temp(emitInfo
->vt
, &sneTemp
);
1026 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
1027 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
1035 * Generate code for an IR_CLAMP instruction.
1037 static struct prog_instruction
*
1038 emit_clamp(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1040 struct prog_instruction
*inst
;
1041 slang_ir_node tmpNode
;
1043 assert(n
->Opcode
== IR_CLAMP
);
1049 inst
= emit(emitInfo
, n
->Children
[0]);
1051 /* If lower limit == 0.0 and upper limit == 1.0,
1052 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1054 * emit OPCODE_MIN, OPCODE_MAX sequence.
1057 /* XXX this isn't quite finished yet */
1058 if (n
->Children
[1]->Opcode
== IR_FLOAT
&&
1059 n
->Children
[1]->Value
[0] == 0.0 &&
1060 n
->Children
[1]->Value
[1] == 0.0 &&
1061 n
->Children
[1]->Value
[2] == 0.0 &&
1062 n
->Children
[1]->Value
[3] == 0.0 &&
1063 n
->Children
[2]->Opcode
== IR_FLOAT
&&
1064 n
->Children
[2]->Value
[0] == 1.0 &&
1065 n
->Children
[2]->Value
[1] == 1.0 &&
1066 n
->Children
[2]->Value
[2] == 1.0 &&
1067 n
->Children
[2]->Value
[3] == 1.0) {
1069 inst
= prev_instruction(prog
);
1071 if (inst
&& inst
->Opcode
!= OPCODE_NOP
) {
1072 /* and prev instruction's DstReg matches n->Children[0]->Store */
1073 inst
->SaturateMode
= SATURATE_ZERO_ONE
;
1074 n
->Store
= n
->Children
[0]->Store
;
1080 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1083 emit(emitInfo
, n
->Children
[1]);
1084 emit(emitInfo
, n
->Children
[2]);
1086 /* Some GPUs don't allow reading from output registers. So if the
1087 * dest for this clamp() is an output reg, we can't use that reg for
1088 * the intermediate result. Use a temp register instead.
1090 _mesa_bzero(&tmpNode
, sizeof(tmpNode
));
1091 alloc_node_storage(emitInfo
, &tmpNode
, n
->Store
->Size
);
1093 /* tmp = max(ch[0], ch[1]) */
1094 inst
= emit_instruction(emitInfo
, OPCODE_MAX
,
1095 tmpNode
.Store
, /* dest */
1096 n
->Children
[0]->Store
,
1097 n
->Children
[1]->Store
,
1100 /* n->dest = min(tmp, ch[2]) */
1101 inst
= emit_instruction(emitInfo
, OPCODE_MIN
,
1102 n
->Store
, /* dest */
1104 n
->Children
[2]->Store
,
1107 free_node_storage(emitInfo
->vt
, &tmpNode
);
1113 static struct prog_instruction
*
1114 emit_negation(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1116 /* Implement as MOV dst, -src; */
1117 /* XXX we could look at the previous instruction and in some circumstances
1118 * modify it to accomplish the negation.
1120 struct prog_instruction
*inst
;
1122 emit(emitInfo
, n
->Children
[0]);
1124 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1127 inst
= emit_instruction(emitInfo
,
1129 n
->Store
, /* dest */
1130 n
->Children
[0]->Store
,
1133 inst
->SrcReg
[0].NegateBase
= NEGATE_XYZW
;
1138 static struct prog_instruction
*
1139 emit_label(slang_emit_info
*emitInfo
, const slang_ir_node
*n
)
1143 /* XXX this fails in loop tail code - investigate someday */
1144 assert(_slang_label_get_location(n
->Label
) < 0);
1145 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1148 if (_slang_label_get_location(n
->Label
) < 0)
1149 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1157 * Emit code for a function call.
1158 * Note that for each time a function is called, we emit the function's
1159 * body code again because the set of available registers may be different.
1161 static struct prog_instruction
*
1162 emit_fcall(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1164 struct gl_program
*progSave
;
1165 struct prog_instruction
*inst
;
1166 GLuint subroutineId
;
1169 assert(n
->Opcode
== IR_CALL
);
1172 /* save/push cur program */
1173 maxInstSave
= emitInfo
->MaxInstructions
;
1174 progSave
= emitInfo
->prog
;
1176 emitInfo
->prog
= new_subroutine(emitInfo
, &subroutineId
);
1177 emitInfo
->MaxInstructions
= emitInfo
->prog
->NumInstructions
;
1179 _slang_label_set_location(n
->Label
, emitInfo
->prog
->NumInstructions
,
1182 if (emitInfo
->EmitBeginEndSub
) {
1183 /* BGNSUB isn't a real instruction.
1184 * We require a label (i.e. "foobar:") though, if we're going to
1185 * print the program in the NV format. The BNGSUB instruction is
1186 * really just a NOP to attach the label to.
1188 inst
= new_instruction(emitInfo
, OPCODE_BGNSUB
);
1189 inst_comment(inst
, n
->Label
->Name
);
1192 /* body of function: */
1193 emit(emitInfo
, n
->Children
[0]);
1194 n
->Store
= n
->Children
[0]->Store
;
1196 /* add RET instruction now, if needed */
1197 inst
= prev_instruction(emitInfo
);
1198 if (inst
&& inst
->Opcode
!= OPCODE_RET
) {
1199 inst
= new_instruction(emitInfo
, OPCODE_RET
);
1202 if (emitInfo
->EmitBeginEndSub
) {
1203 inst
= new_instruction(emitInfo
, OPCODE_ENDSUB
);
1204 inst_comment(inst
, n
->Label
->Name
);
1207 /* pop/restore cur program */
1208 emitInfo
->prog
= progSave
;
1209 emitInfo
->MaxInstructions
= maxInstSave
;
1211 /* emit the function call */
1212 inst
= new_instruction(emitInfo
, OPCODE_CAL
);
1213 /* The branch target is just the subroutine number (changed later) */
1214 inst
->BranchTarget
= subroutineId
;
1215 inst_comment(inst
, n
->Label
->Name
);
1216 assert(inst
->BranchTarget
>= 0);
1223 * Emit code for a 'return' statement.
1225 static struct prog_instruction
*
1226 emit_return(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1228 struct prog_instruction
*inst
;
1230 assert(n
->Opcode
== IR_RETURN
);
1232 inst
= new_instruction(emitInfo
, OPCODE_RET
);
1233 inst
->DstReg
.CondMask
= COND_TR
; /* always return */
1238 static struct prog_instruction
*
1239 emit_kill(slang_emit_info
*emitInfo
)
1241 struct gl_fragment_program
*fp
;
1242 struct prog_instruction
*inst
;
1243 /* NV-KILL - discard fragment depending on condition code.
1244 * Note that ARB-KILL depends on sign of vector operand.
1246 inst
= new_instruction(emitInfo
, OPCODE_KIL_NV
);
1247 inst
->DstReg
.CondMask
= COND_TR
; /* always kill */
1249 assert(emitInfo
->prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
);
1250 fp
= (struct gl_fragment_program
*) emitInfo
->prog
;
1251 fp
->UsesKill
= GL_TRUE
;
1257 static struct prog_instruction
*
1258 emit_tex(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1260 struct prog_instruction
*inst
;
1261 gl_inst_opcode opcode
;
1263 if (n
->Opcode
== IR_TEX
) {
1264 opcode
= OPCODE_TEX
;
1266 else if (n
->Opcode
== IR_TEXB
) {
1267 opcode
= OPCODE_TXB
;
1270 assert(n
->Opcode
== IR_TEXP
);
1271 opcode
= OPCODE_TXP
;
1274 /* emit code for the texcoord operand */
1275 (void) emit(emitInfo
, n
->Children
[1]);
1277 /* alloc storage for result of texture fetch */
1278 if (!alloc_node_storage(emitInfo
, n
, 4))
1281 /* emit TEX instruction; Child[1] is the texcoord */
1282 inst
= emit_instruction(emitInfo
,
1285 n
->Children
[1]->Store
,
1289 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1290 assert(n
->Children
[0]->Store
);
1291 assert(n
->Children
[0]->Store
->File
== PROGRAM_SAMPLER
);
1292 /* Store->Index is the sampler index */
1293 assert(n
->Children
[0]->Store
->Index
>= 0);
1294 /* Store->Size is the texture target */
1295 assert(n
->Children
[0]->Store
->Size
>= TEXTURE_1D_INDEX
);
1296 assert(n
->Children
[0]->Store
->Size
<= TEXTURE_RECT_INDEX
);
1298 inst
->TexSrcTarget
= n
->Children
[0]->Store
->Size
;
1299 inst
->TexSrcUnit
= n
->Children
[0]->Store
->Index
; /* i.e. uniform's index */
1301 /* mark the sampler as being used */
1302 _mesa_use_uniform(emitInfo
->prog
->Parameters
,
1303 (char *) n
->Children
[0]->Var
->a_name
);
1312 static struct prog_instruction
*
1313 emit_copy(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1315 struct prog_instruction
*inst
;
1317 assert(n
->Opcode
== IR_COPY
);
1320 emit(emitInfo
, n
->Children
[0]);
1321 if (!n
->Children
[0]->Store
|| n
->Children
[0]->Store
->Index
< 0) {
1322 /* an error should have been already recorded */
1327 assert(n
->Children
[1]);
1328 inst
= emit(emitInfo
, n
->Children
[1]);
1330 if (!n
->Children
[1]->Store
|| n
->Children
[1]->Store
->Index
< 0) {
1331 if (!emitInfo
->log
->text
) {
1332 slang_info_log_error(emitInfo
->log
, "invalid assignment");
1337 assert(n
->Children
[1]->Store
->Index
>= 0);
1339 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1341 n
->Store
= n
->Children
[0]->Store
;
1343 if (n
->Store
->File
== PROGRAM_SAMPLER
) {
1344 /* no code generated for sampler assignments,
1345 * just copy the sampler index at compile time.
1347 n
->Store
->Index
= n
->Children
[1]->Store
->Index
;
1351 #if PEEPHOLE_OPTIMIZATIONS
1353 _slang_is_temp(emitInfo
->vt
, n
->Children
[1]->Store
) &&
1354 (inst
->DstReg
.File
== n
->Children
[1]->Store
->File
) &&
1355 (inst
->DstReg
.Index
== n
->Children
[1]->Store
->Index
) &&
1356 !n
->Children
[0]->Store
->IsIndirect
&&
1357 n
->Children
[0]->Store
->Size
<= 4) {
1358 /* Peephole optimization:
1359 * The Right-Hand-Side has its results in a temporary place.
1360 * Modify the RHS (and the prev instruction) to store its results
1361 * in the destination specified by n->Children[0].
1362 * Then, this MOVE is a no-op.
1369 if (n
->Children
[1]->Opcode
!= IR_SWIZZLE
)
1370 _slang_free_temp(emitInfo
->vt
, n
->Children
[1]->Store
);
1371 *n
->Children
[1]->Store
= *n
->Children
[0]->Store
;
1373 /* fixup the previous instruction (which stored the RHS result) */
1374 assert(n
->Children
[0]->Store
->Index
>= 0);
1376 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
);
1382 if (n
->Children
[0]->Store
->Size
> 4) {
1383 /* move matrix/struct etc (block of registers) */
1384 slang_ir_storage dstStore
= *n
->Children
[0]->Store
;
1385 slang_ir_storage srcStore
= *n
->Children
[1]->Store
;
1386 GLint size
= srcStore
.Size
;
1387 ASSERT(n
->Children
[1]->Store
->Swizzle
== SWIZZLE_NOOP
);
1391 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1396 inst_comment(inst
, "IR_COPY block");
1403 /* single register move */
1404 char *srcAnnot
, *dstAnnot
;
1405 assert(n
->Children
[0]->Store
->Index
>= 0);
1406 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1407 n
->Children
[0]->Store
, /* dest */
1408 n
->Children
[1]->Store
,
1411 dstAnnot
= storage_annotation(n
->Children
[0], emitInfo
->prog
);
1412 srcAnnot
= storage_annotation(n
->Children
[1], emitInfo
->prog
);
1413 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
,
1414 srcAnnot
, NULL
, NULL
);
1416 free_node_storage(emitInfo
->vt
, n
->Children
[1]);
1423 * An IR_COND node wraps a boolean expression which is used by an
1424 * IF or WHILE test. This is where we'll set condition codes, if needed.
1426 static struct prog_instruction
*
1427 emit_cond(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1429 struct prog_instruction
*inst
;
1431 assert(n
->Opcode
== IR_COND
);
1433 if (!n
->Children
[0])
1436 /* emit code for the expression */
1437 inst
= emit(emitInfo
, n
->Children
[0]);
1439 if (!n
->Children
[0]->Store
) {
1440 /* error recovery */
1444 assert(n
->Children
[0]->Store
);
1445 /*assert(n->Children[0]->Store->Size == 1);*/
1447 if (emitInfo
->EmitCondCodes
) {
1449 n
->Children
[0]->Store
&&
1450 inst
->DstReg
.File
== n
->Children
[0]->Store
->File
&&
1451 inst
->DstReg
.Index
== n
->Children
[0]->Store
->Index
) {
1452 /* The previous instruction wrote to the register who's value
1453 * we're testing. Just fix that instruction so that the
1454 * condition codes are computed.
1456 inst
->CondUpdate
= GL_TRUE
;
1457 n
->Store
= n
->Children
[0]->Store
;
1461 /* This'll happen for things like "if (i) ..." where no code
1462 * is normally generated for the expression "i".
1463 * Generate a move instruction just to set condition codes.
1465 if (!alloc_node_storage(emitInfo
, n
, 1))
1467 inst
= emit_instruction(emitInfo
, OPCODE_MOV
,
1468 n
->Store
, /* dest */
1469 n
->Children
[0]->Store
,
1472 inst
->CondUpdate
= GL_TRUE
;
1473 inst_comment(inst
, "COND expr");
1474 _slang_free_temp(emitInfo
->vt
, n
->Store
);
1479 /* No-op: the boolean result of the expression is in a regular reg */
1480 n
->Store
= n
->Children
[0]->Store
;
1489 static struct prog_instruction
*
1490 emit_not(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1492 static const struct {
1493 gl_inst_opcode op
, opNot
;
1495 { OPCODE_SLT
, OPCODE_SGE
},
1496 { OPCODE_SLE
, OPCODE_SGT
},
1497 { OPCODE_SGT
, OPCODE_SLE
},
1498 { OPCODE_SGE
, OPCODE_SLT
},
1499 { OPCODE_SEQ
, OPCODE_SNE
},
1500 { OPCODE_SNE
, OPCODE_SEQ
},
1503 struct prog_instruction
*inst
;
1504 slang_ir_storage zero
;
1508 inst
= emit(emitInfo
, n
->Children
[0]);
1510 #if PEEPHOLE_OPTIMIZATIONS
1512 /* if the prev instruction was a comparison instruction, invert it */
1513 for (i
= 0; operators
[i
].op
; i
++) {
1514 if (inst
->Opcode
== operators
[i
].op
) {
1515 inst
->Opcode
= operators
[i
].opNot
;
1516 n
->Store
= n
->Children
[0]->Store
;
1523 /* else, invert using SEQ (v = v == 0) */
1524 if (!alloc_node_storage(emitInfo
, n
, n
->Children
[0]->Store
->Size
))
1527 constant_to_storage(emitInfo
, 0.0, &zero
);
1528 inst
= emit_instruction(emitInfo
,
1531 n
->Children
[0]->Store
,
1534 inst_comment(inst
, "NOT");
1536 free_node_storage(emitInfo
->vt
, n
->Children
[0]);
1542 static struct prog_instruction
*
1543 emit_if(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1545 struct gl_program
*prog
= emitInfo
->prog
;
1546 GLuint ifInstLoc
, elseInstLoc
= 0;
1547 GLuint condWritemask
= 0;
1549 /* emit condition expression code */
1551 struct prog_instruction
*inst
;
1552 inst
= emit(emitInfo
, n
->Children
[0]);
1553 if (emitInfo
->EmitCondCodes
) {
1555 /* error recovery */
1558 condWritemask
= inst
->DstReg
.WriteMask
;
1562 if (!n
->Children
[0]->Store
)
1566 assert(n
->Children
[0]->Store
->Size
== 1); /* a bool! */
1569 ifInstLoc
= prog
->NumInstructions
;
1570 if (emitInfo
->EmitHighLevelInstructions
) {
1571 if (emitInfo
->EmitCondCodes
) {
1572 /* IF condcode THEN ... */
1573 struct prog_instruction
*ifInst
;
1574 ifInst
= new_instruction(emitInfo
, OPCODE_IF
);
1575 ifInst
->DstReg
.CondMask
= COND_NE
; /* if cond is non-zero */
1576 /* only test the cond code (1 of 4) that was updated by the
1577 * previous instruction.
1579 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1582 /* IF src[0] THEN ... */
1583 emit_instruction(emitInfo
, OPCODE_IF
,
1585 n
->Children
[0]->Store
, /* op0 */
1591 /* conditional jump to else, or endif */
1592 struct prog_instruction
*ifInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1593 ifInst
->DstReg
.CondMask
= COND_EQ
; /* BRA if cond is zero */
1594 inst_comment(ifInst
, "if zero");
1595 ifInst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1599 emit(emitInfo
, n
->Children
[1]);
1601 if (n
->Children
[2]) {
1602 /* have else body */
1603 elseInstLoc
= prog
->NumInstructions
;
1604 if (emitInfo
->EmitHighLevelInstructions
) {
1605 (void) new_instruction(emitInfo
, OPCODE_ELSE
);
1608 /* jump to endif instruction */
1609 struct prog_instruction
*inst
;
1610 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1611 inst_comment(inst
, "else");
1612 inst
->DstReg
.CondMask
= COND_TR
; /* always branch */
1614 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1615 emit(emitInfo
, n
->Children
[2]);
1619 prog
->Instructions
[ifInstLoc
].BranchTarget
= prog
->NumInstructions
;
1622 if (emitInfo
->EmitHighLevelInstructions
) {
1623 (void) new_instruction(emitInfo
, OPCODE_ENDIF
);
1626 if (n
->Children
[2]) {
1627 prog
->Instructions
[elseInstLoc
].BranchTarget
= prog
->NumInstructions
;
1633 static struct prog_instruction
*
1634 emit_loop(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1636 struct gl_program
*prog
= emitInfo
->prog
;
1637 struct prog_instruction
*endInst
;
1638 GLuint beginInstLoc
, tailInstLoc
, endInstLoc
;
1641 /* emit OPCODE_BGNLOOP */
1642 beginInstLoc
= prog
->NumInstructions
;
1643 if (emitInfo
->EmitHighLevelInstructions
) {
1644 (void) new_instruction(emitInfo
, OPCODE_BGNLOOP
);
1648 emit(emitInfo
, n
->Children
[0]);
1651 tailInstLoc
= prog
->NumInstructions
;
1652 if (n
->Children
[1]) {
1653 if (emitInfo
->EmitComments
)
1654 emit_comment(emitInfo
, "Loop tail code:");
1655 emit(emitInfo
, n
->Children
[1]);
1658 endInstLoc
= prog
->NumInstructions
;
1659 if (emitInfo
->EmitHighLevelInstructions
) {
1660 /* emit OPCODE_ENDLOOP */
1661 endInst
= new_instruction(emitInfo
, OPCODE_ENDLOOP
);
1664 /* emit unconditional BRA-nch */
1665 endInst
= new_instruction(emitInfo
, OPCODE_BRA
);
1666 endInst
->DstReg
.CondMask
= COND_TR
; /* always true */
1668 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1669 endInst
->BranchTarget
= beginInstLoc
;
1671 if (emitInfo
->EmitHighLevelInstructions
) {
1672 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1673 prog
->Instructions
[beginInstLoc
].BranchTarget
= prog
->NumInstructions
-1;
1676 /* Done emitting loop code. Now walk over the loop's linked list of
1677 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1678 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1680 for (ir
= n
->List
; ir
; ir
= ir
->List
) {
1681 struct prog_instruction
*inst
= prog
->Instructions
+ ir
->InstLocation
;
1682 assert(inst
->BranchTarget
< 0);
1683 if (ir
->Opcode
== IR_BREAK
||
1684 ir
->Opcode
== IR_BREAK_IF_TRUE
) {
1685 assert(inst
->Opcode
== OPCODE_BRK
||
1686 inst
->Opcode
== OPCODE_BRA
);
1687 /* go to instruction after end of loop */
1688 inst
->BranchTarget
= endInstLoc
+ 1;
1691 assert(ir
->Opcode
== IR_CONT
||
1692 ir
->Opcode
== IR_CONT_IF_TRUE
);
1693 assert(inst
->Opcode
== OPCODE_CONT
||
1694 inst
->Opcode
== OPCODE_BRA
);
1695 /* go to instruction at tail of loop */
1696 inst
->BranchTarget
= endInstLoc
;
1704 * Unconditional "continue" or "break" statement.
1705 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1707 static struct prog_instruction
*
1708 emit_cont_break(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1710 gl_inst_opcode opcode
;
1711 struct prog_instruction
*inst
;
1713 if (n
->Opcode
== IR_CONT
) {
1714 /* we need to execute the loop's tail code before doing CONT */
1716 assert(n
->Parent
->Opcode
== IR_LOOP
);
1717 if (n
->Parent
->Children
[1]) {
1718 /* emit tail code */
1719 if (emitInfo
->EmitComments
) {
1720 emit_comment(emitInfo
, "continue - tail code:");
1722 emit(emitInfo
, n
->Parent
->Children
[1]);
1726 /* opcode selection */
1727 if (emitInfo
->EmitHighLevelInstructions
) {
1728 opcode
= (n
->Opcode
== IR_CONT
) ? OPCODE_CONT
: OPCODE_BRK
;
1731 opcode
= OPCODE_BRA
;
1733 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1734 inst
= new_instruction(emitInfo
, opcode
);
1735 inst
->DstReg
.CondMask
= COND_TR
; /* always true */
1741 * Conditional "continue" or "break" statement.
1742 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1744 static struct prog_instruction
*
1745 emit_cont_break_if_true(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1747 struct prog_instruction
*inst
;
1749 assert(n
->Opcode
== IR_CONT_IF_TRUE
||
1750 n
->Opcode
== IR_BREAK_IF_TRUE
);
1752 /* evaluate condition expr, setting cond codes */
1753 inst
= emit(emitInfo
, n
->Children
[0]);
1754 if (emitInfo
->EmitCondCodes
) {
1756 inst
->CondUpdate
= GL_TRUE
;
1759 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1761 /* opcode selection */
1762 if (emitInfo
->EmitHighLevelInstructions
) {
1763 const gl_inst_opcode opcode
1764 = (n
->Opcode
== IR_CONT_IF_TRUE
) ? OPCODE_CONT
: OPCODE_BRK
;
1765 if (emitInfo
->EmitCondCodes
) {
1766 /* Get the writemask from the previous instruction which set
1767 * the condcodes. Use that writemask as the CondSwizzle.
1769 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1770 inst
= new_instruction(emitInfo
, opcode
);
1771 inst
->DstReg
.CondMask
= COND_NE
;
1772 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1781 ifInstLoc
= emitInfo
->prog
->NumInstructions
;
1782 inst
= emit_instruction(emitInfo
, OPCODE_IF
,
1784 n
->Children
[0]->Store
,
1787 n
->InstLocation
= emitInfo
->prog
->NumInstructions
;
1789 inst
= new_instruction(emitInfo
, opcode
);
1790 inst
= new_instruction(emitInfo
, OPCODE_ENDIF
);
1792 emitInfo
->prog
->Instructions
[ifInstLoc
].BranchTarget
1793 = emitInfo
->prog
->NumInstructions
;
1798 const GLuint condWritemask
= inst
->DstReg
.WriteMask
;
1799 assert(emitInfo
->EmitCondCodes
);
1800 inst
= new_instruction(emitInfo
, OPCODE_BRA
);
1801 inst
->DstReg
.CondMask
= COND_NE
;
1802 inst
->DstReg
.CondSwizzle
= writemask_to_swizzle(condWritemask
);
1808 static struct prog_instruction
*
1809 emit_swizzle(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1811 struct prog_instruction
*inst
;
1813 inst
= emit(emitInfo
, n
->Children
[0]);
1816 assert(n
->Store
->Parent
);
1817 /* Apply this node's swizzle to parent's storage */
1818 GLuint swizzle
= n
->Store
->Swizzle
;
1819 _slang_copy_ir_storage(n
->Store
, n
->Store
->Parent
);
1820 n
->Store
->Swizzle
= _slang_swizzle_swizzle(n
->Store
->Swizzle
, swizzle
);
1821 assert(!n
->Store
->Parent
);
1828 * Dereference array element: element == array[index]
1829 * This basically involves emitting code for computing the array index
1830 * and updating the node/element's storage info.
1832 static struct prog_instruction
*
1833 emit_array_element(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1835 slang_ir_storage
*arrayStore
, *indexStore
;
1836 const int elemSize
= n
->Store
->Size
; /* number of floats */
1837 const GLint elemSizeVec
= (elemSize
+ 3) / 4; /* number of vec4 */
1838 struct prog_instruction
*inst
;
1840 assert(n
->Opcode
== IR_ELEMENT
);
1841 assert(elemSize
> 0);
1843 /* special case for built-in state variables, like light state */
1845 slang_ir_storage
*root
= n
->Store
;
1846 assert(!root
->Parent
);
1847 while (root
->Parent
)
1848 root
= root
->Parent
;
1850 if (root
->File
== PROGRAM_STATE_VAR
) {
1853 _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
, &direct
);
1859 n
->Store
->Index
= index
;
1860 return NULL
; /* all done */
1865 /* do codegen for array itself */
1866 emit(emitInfo
, n
->Children
[0]);
1867 arrayStore
= n
->Children
[0]->Store
;
1869 /* The initial array element storage is the array's storage,
1870 * then modified below.
1872 _slang_copy_ir_storage(n
->Store
, arrayStore
);
1875 if (n
->Children
[1]->Opcode
== IR_FLOAT
) {
1876 /* Constant array index */
1877 const GLint element
= (GLint
) n
->Children
[1]->Value
[0];
1879 /* this element's storage is the array's storage, plus constant offset */
1880 n
->Store
->Index
+= elemSizeVec
* element
;
1883 /* Variable array index */
1885 /* do codegen for array index expression */
1886 emit(emitInfo
, n
->Children
[1]);
1887 indexStore
= n
->Children
[1]->Store
;
1889 if (indexStore
->IsIndirect
) {
1890 /* need to put the array index into a temporary since we can't
1891 * directly support a[b[i]] constructs.
1895 /*indexStore = tempstore();*/
1900 /* need to multiply array index by array element size */
1901 struct prog_instruction
*inst
;
1902 slang_ir_storage
*indexTemp
;
1903 slang_ir_storage elemSizeStore
;
1905 /* allocate 1 float indexTemp */
1906 indexTemp
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, 1);
1907 _slang_alloc_temp(emitInfo
->vt
, indexTemp
);
1909 /* allocate a constant containing the element size */
1910 constant_to_storage(emitInfo
, (float) elemSizeVec
, &elemSizeStore
);
1912 /* multiply array index by element size */
1913 inst
= emit_instruction(emitInfo
,
1915 indexTemp
, /* dest */
1916 indexStore
, /* the index */
1920 indexStore
= indexTemp
;
1923 if (arrayStore
->IsIndirect
) {
1924 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1925 /* Need to add indexStore to arrayStore->Indirect store */
1926 slang_ir_storage indirectArray
;
1927 slang_ir_storage
*indexTemp
;
1929 _slang_init_ir_storage(&indirectArray
,
1930 arrayStore
->IndirectFile
,
1931 arrayStore
->IndirectIndex
,
1933 arrayStore
->IndirectSwizzle
);
1935 /* allocate 1 float indexTemp */
1936 indexTemp
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, 1);
1937 _slang_alloc_temp(emitInfo
->vt
, indexTemp
);
1939 inst
= emit_instruction(emitInfo
,
1941 indexTemp
, /* dest */
1942 indexStore
, /* the index */
1943 &indirectArray
, /* indirect array base */
1946 indexStore
= indexTemp
;
1949 /* update the array element storage info */
1950 n
->Store
->IsIndirect
= GL_TRUE
;
1951 n
->Store
->IndirectFile
= indexStore
->File
;
1952 n
->Store
->IndirectIndex
= indexStore
->Index
;
1953 n
->Store
->IndirectSwizzle
= indexStore
->Swizzle
;
1956 n
->Store
->Size
= elemSize
;
1957 n
->Store
->Swizzle
= _slang_var_swizzle(elemSize
, 0);
1959 return NULL
; /* no instruction */
1964 * Resolve storage for accessing a structure field.
1966 static struct prog_instruction
*
1967 emit_struct_field(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
1969 slang_ir_storage
*root
= n
->Store
;
1970 GLint fieldOffset
, fieldSize
;
1972 assert(n
->Opcode
== IR_FIELD
);
1974 assert(!root
->Parent
);
1975 while (root
->Parent
)
1976 root
= root
->Parent
;
1978 /* If this is the field of a state var, allocate constant/uniform
1979 * storage for it now if we haven't already.
1980 * Note that we allocate storage (uniform/constant slots) for state
1981 * variables here rather than at declaration time so we only allocate
1982 * space for the ones that we actually use!
1984 if (root
->File
== PROGRAM_STATE_VAR
) {
1986 GLint index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
, &direct
);
1988 slang_info_log_error(emitInfo
->log
, "Error parsing state variable");
1992 root
->Index
= index
;
1993 return NULL
; /* all done */
1997 /* do codegen for struct */
1998 emit(emitInfo
, n
->Children
[0]);
1999 assert(n
->Children
[0]->Store
->Index
>= 0);
2002 fieldOffset
= n
->Store
->Index
;
2003 fieldSize
= n
->Store
->Size
;
2005 _slang_copy_ir_storage(n
->Store
, n
->Children
[0]->Store
);
2007 n
->Store
->Index
= n
->Children
[0]->Store
->Index
+ fieldOffset
/ 4;
2008 n
->Store
->Size
= fieldSize
;
2010 switch (fieldSize
) {
2013 GLint swz
= fieldOffset
% 4;
2014 n
->Store
->Swizzle
= MAKE_SWIZZLE4(swz
, swz
, swz
, swz
);
2018 n
->Store
->Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
,
2019 SWIZZLE_NIL
, SWIZZLE_NIL
);
2022 n
->Store
->Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
,
2023 SWIZZLE_Z
, SWIZZLE_NIL
);
2026 n
->Store
->Swizzle
= SWIZZLE_XYZW
;
2029 assert(n
->Store
->Index
>= 0);
2031 return NULL
; /* no instruction */
2036 * Emit code for a variable declaration.
2037 * This usually doesn't result in any code generation, but just
2038 * memory allocation.
2040 static struct prog_instruction
*
2041 emit_var_decl(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
2044 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
2045 assert(n
->Store
->Size
> 0);
2046 /*assert(n->Store->Index < 0);*/
2048 if (!n
->Var
|| n
->Var
->isTemp
) {
2049 /* a nameless/temporary variable, will be freed after first use */
2051 if (n
->Store
->Index
< 0 && !_slang_alloc_temp(emitInfo
->vt
, n
->Store
)) {
2052 slang_info_log_error(emitInfo
->log
,
2053 "Ran out of registers, too many temporaries");
2058 /* a regular variable */
2059 _slang_add_variable(emitInfo
->vt
, n
->Var
);
2060 if (!_slang_alloc_var(emitInfo
->vt
, n
->Store
)) {
2061 slang_info_log_error(emitInfo
->log
,
2062 "Ran out of registers, too many variables");
2066 printf("IR_VAR_DECL %s %d store %p\n",
2067 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2069 assert(n
->Var
->store
== n
->Store
);
2071 if (emitInfo
->EmitComments
) {
2072 /* emit NOP with comment describing the variable's storage location */
2074 sprintf(s
, "TEMP[%d]%s = variable %s (size %d)",
2076 _mesa_swizzle_string(n
->Store
->Swizzle
, 0, GL_FALSE
),
2077 (n
->Var
? (char *) n
->Var
->a_name
: "anonymous"),
2079 emit_comment(emitInfo
, s
);
2086 * Emit code for a reference to a variable.
2087 * Actually, no code is generated but we may do some memory allocation.
2088 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2090 static struct prog_instruction
*
2091 emit_var_ref(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
2094 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
2096 if (n
->Store
->File
== PROGRAM_STATE_VAR
&& n
->Store
->Index
< 0) {
2098 GLint index
= _slang_alloc_statevar(n
, emitInfo
->prog
->Parameters
, &direct
);
2102 _mesa_snprintf(s
, sizeof(s
), "Undefined variable '%s'",
2103 (char *) n
->Var
->a_name
);
2104 slang_info_log_error(emitInfo
->log
, s
);
2108 n
->Store
->Index
= index
;
2110 else if (n
->Store
->File
== PROGRAM_UNIFORM
||
2111 n
->Store
->File
== PROGRAM_SAMPLER
) {
2112 /* mark var as used */
2113 _mesa_use_uniform(emitInfo
->prog
->Parameters
, (char *) n
->Var
->a_name
);
2116 if (n
->Store
->Index
< 0) {
2117 /* probably ran out of registers */
2120 assert(n
->Store
->Size
> 0);
2126 static struct prog_instruction
*
2127 emit(slang_emit_info
*emitInfo
, slang_ir_node
*n
)
2129 struct prog_instruction
*inst
;
2133 if (emitInfo
->log
->error_flag
) {
2137 switch (n
->Opcode
) {
2139 /* sequence of two sub-trees */
2140 assert(n
->Children
[0]);
2141 assert(n
->Children
[1]);
2142 emit(emitInfo
, n
->Children
[0]);
2143 if (emitInfo
->log
->error_flag
)
2145 inst
= emit(emitInfo
, n
->Children
[1]);
2149 n
->Store
= n
->Children
[1]->Store
;
2153 /* new variable scope */
2154 _slang_push_var_table(emitInfo
->vt
);
2155 inst
= emit(emitInfo
, n
->Children
[0]);
2156 _slang_pop_var_table(emitInfo
->vt
);
2160 /* Variable declaration - allocate a register for it */
2161 inst
= emit_var_decl(emitInfo
, n
);
2165 /* Reference to a variable
2166 * Storage should have already been resolved/allocated.
2168 return emit_var_ref(emitInfo
, n
);
2171 return emit_array_element(emitInfo
, n
);
2173 return emit_struct_field(emitInfo
, n
);
2175 return emit_swizzle(emitInfo
, n
);
2177 /* Simple arithmetic */
2217 /* trinary operators */
2219 return emit_arith(emitInfo
, n
);
2223 return emit_compare(emitInfo
, n
);
2226 return emit_clamp(emitInfo
, n
);
2230 return emit_tex(emitInfo
, n
);
2232 return emit_negation(emitInfo
, n
);
2234 /* find storage location for this float constant */
2235 n
->Store
->Index
= _mesa_add_unnamed_constant(emitInfo
->prog
->Parameters
,
2238 &n
->Store
->Swizzle
);
2239 if (n
->Store
->Index
< 0) {
2240 slang_info_log_error(emitInfo
->log
, "Ran out of space for constants");
2246 return emit_copy(emitInfo
, n
);
2249 return emit_cond(emitInfo
, n
);
2252 return emit_not(emitInfo
, n
);
2255 return emit_label(emitInfo
, n
);
2258 return emit_kill(emitInfo
);
2261 /* new variable scope for subroutines/function calls */
2262 _slang_push_var_table(emitInfo
->vt
);
2263 inst
= emit_fcall(emitInfo
, n
);
2264 _slang_pop_var_table(emitInfo
->vt
);
2268 return emit_if(emitInfo
, n
);
2271 return emit_loop(emitInfo
, n
);
2272 case IR_BREAK_IF_TRUE
:
2273 case IR_CONT_IF_TRUE
:
2274 return emit_cont_break_if_true(emitInfo
, n
);
2278 return emit_cont_break(emitInfo
, n
);
2281 return new_instruction(emitInfo
, OPCODE_BGNSUB
);
2283 return new_instruction(emitInfo
, OPCODE_ENDSUB
);
2285 return emit_return(emitInfo
, n
);
2291 _mesa_problem(NULL
, "Unexpected IR opcode in emit()\n");
2298 * After code generation, any subroutines will be in separate program
2299 * objects. This function appends all the subroutines onto the main
2300 * program and resolves the linking of all the branch/call instructions.
2301 * XXX this logic should really be part of the linking process...
2304 _slang_resolve_subroutines(slang_emit_info
*emitInfo
)
2306 GET_CURRENT_CONTEXT(ctx
);
2307 struct gl_program
*mainP
= emitInfo
->prog
;
2308 GLuint
*subroutineLoc
, i
, total
;
2311 = (GLuint
*) _mesa_malloc(emitInfo
->NumSubroutines
* sizeof(GLuint
));
2313 /* total number of instructions */
2314 total
= mainP
->NumInstructions
;
2315 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2316 subroutineLoc
[i
] = total
;
2317 total
+= emitInfo
->Subroutines
[i
]->NumInstructions
;
2320 /* adjust BranchTargets within the functions */
2321 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2322 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
2324 for (j
= 0; j
< sub
->NumInstructions
; j
++) {
2325 struct prog_instruction
*inst
= sub
->Instructions
+ j
;
2326 if (inst
->Opcode
!= OPCODE_CAL
&& inst
->BranchTarget
>= 0) {
2327 inst
->BranchTarget
+= subroutineLoc
[i
];
2332 /* append subroutines' instructions after main's instructions */
2333 mainP
->Instructions
= _mesa_realloc_instructions(mainP
->Instructions
,
2334 mainP
->NumInstructions
,
2336 mainP
->NumInstructions
= total
;
2337 for (i
= 0; i
< emitInfo
->NumSubroutines
; i
++) {
2338 struct gl_program
*sub
= emitInfo
->Subroutines
[i
];
2339 _mesa_copy_instructions(mainP
->Instructions
+ subroutineLoc
[i
],
2341 sub
->NumInstructions
);
2342 /* delete subroutine code */
2343 sub
->Parameters
= NULL
; /* prevent double-free */
2344 _mesa_reference_program(ctx
, &emitInfo
->Subroutines
[i
], NULL
);
2347 /* free subroutine list */
2348 if (emitInfo
->Subroutines
) {
2349 _mesa_free(emitInfo
->Subroutines
);
2350 emitInfo
->Subroutines
= NULL
;
2352 emitInfo
->NumSubroutines
= 0;
2354 /* Examine CAL instructions.
2355 * At this point, the BranchTarget field of the CAL instruction is
2356 * the number/id of the subroutine to call (an index into the
2357 * emitInfo->Subroutines list).
2358 * Translate that into an actual instruction location now.
2360 for (i
= 0; i
< mainP
->NumInstructions
; i
++) {
2361 struct prog_instruction
*inst
= mainP
->Instructions
+ i
;
2362 if (inst
->Opcode
== OPCODE_CAL
) {
2363 const GLuint f
= inst
->BranchTarget
;
2364 inst
->BranchTarget
= subroutineLoc
[f
];
2368 _mesa_free(subroutineLoc
);
2375 _slang_emit_code(slang_ir_node
*n
, slang_var_table
*vt
,
2376 struct gl_program
*prog
, GLboolean withEnd
,
2377 slang_info_log
*log
)
2379 GET_CURRENT_CONTEXT(ctx
);
2381 slang_emit_info emitInfo
;
2386 emitInfo
.prog
= prog
;
2387 emitInfo
.Subroutines
= NULL
;
2388 emitInfo
.NumSubroutines
= 0;
2389 emitInfo
.MaxInstructions
= prog
->NumInstructions
;
2391 emitInfo
.EmitHighLevelInstructions
= ctx
->Shader
.EmitHighLevelInstructions
;
2392 emitInfo
.EmitCondCodes
= ctx
->Shader
.EmitCondCodes
;
2393 emitInfo
.EmitComments
= ctx
->Shader
.EmitComments
;
2394 emitInfo
.EmitBeginEndSub
= GL_TRUE
;
2396 if (!emitInfo
.EmitCondCodes
) {
2397 emitInfo
.EmitHighLevelInstructions
= GL_TRUE
;
2400 /* Check uniform/constant limits */
2401 if (prog
->Target
== GL_FRAGMENT_PROGRAM_ARB
) {
2402 maxUniforms
= ctx
->Const
.FragmentProgram
.MaxUniformComponents
/ 4;
2405 assert(prog
->Target
== GL_VERTEX_PROGRAM_ARB
);
2406 maxUniforms
= ctx
->Const
.VertexProgram
.MaxUniformComponents
/ 4;
2408 if (prog
->Parameters
->NumParameters
> maxUniforms
) {
2409 slang_info_log_error(log
, "Constant/uniform register limit exceeded");
2413 (void) emit(&emitInfo
, n
);
2415 /* finish up by adding the END opcode to program */
2417 struct prog_instruction
*inst
;
2418 inst
= new_instruction(&emitInfo
, OPCODE_END
);
2421 _slang_resolve_subroutines(&emitInfo
);
2426 printf("*********** End emit code (%u inst):\n", prog
->NumInstructions
);
2427 _mesa_print_program(prog
);
2428 _mesa_print_program_parameters(ctx
,prog
);