2 * Mesa 3-D graphics library
5 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Emit program instructions (PI code) from IR trees.
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "slang_emit.h"
39 #include "slang_error.h"
42 #define PEEPHOLE_OPTIMIZATIONS 1
46 static GLboolean EmitHighLevelInstructions
= GL_FALSE
;
50 * Assembly and IR info
54 slang_ir_opcode IrOpcode
;
56 gl_inst_opcode InstOpcode
;
57 GLuint ResultSize
, NumParams
;
62 static slang_ir_info IrInfo
[] = {
64 { IR_ADD
, "IR_ADD", OPCODE_ADD
, 4, 2 },
65 { IR_SUB
, "IR_SUB", OPCODE_SUB
, 4, 2 },
66 { IR_MUL
, "IR_MUL", OPCODE_MUL
, 4, 2 },
67 { IR_DIV
, "IR_DIV", OPCODE_NOP
, 0, 2 }, /* XXX broke */
68 { IR_DOT4
, "IR_DOT_4", OPCODE_DP4
, 1, 2 },
69 { IR_DOT3
, "IR_DOT_3", OPCODE_DP3
, 1, 2 },
70 { IR_CROSS
, "IR_CROSS", OPCODE_XPD
, 3, 2 },
71 { IR_LRP
, "IR_LRP", OPCODE_LRP
, 4, 3 },
72 { IR_MIN
, "IR_MIN", OPCODE_MIN
, 4, 2 },
73 { IR_MAX
, "IR_MAX", OPCODE_MAX
, 4, 2 },
74 { IR_CLAMP
, "IR_CLAMP", OPCODE_NOP
, 4, 3 }, /* special case: emit_clamp() */
75 { IR_SEQUAL
, "IR_SEQUAL", OPCODE_SEQ
, 4, 2 },
76 { IR_SNEQUAL
, "IR_SNEQUAL", OPCODE_SNE
, 4, 2 },
77 { IR_SGE
, "IR_SGE", OPCODE_SGE
, 4, 2 },
78 { IR_SGT
, "IR_SGT", OPCODE_SGT
, 4, 2 },
79 { IR_POW
, "IR_POW", OPCODE_POW
, 1, 2 },
81 { IR_I_TO_F
, "IR_I_TO_F", OPCODE_NOP
, 1, 1 },
82 { IR_F_TO_I
, "IR_F_TO_I", OPCODE_INT
, 4, 1 }, /* 4 floats to 4 ints */
83 { IR_EXP
, "IR_EXP", OPCODE_EXP
, 1, 1 },
84 { IR_EXP2
, "IR_EXP2", OPCODE_EX2
, 1, 1 },
85 { IR_LOG2
, "IR_LOG2", OPCODE_LG2
, 1, 1 },
86 { IR_RSQ
, "IR_RSQ", OPCODE_RSQ
, 1, 1 },
87 { IR_RCP
, "IR_RCP", OPCODE_RCP
, 1, 1 },
88 { IR_FLOOR
, "IR_FLOOR", OPCODE_FLR
, 4, 1 },
89 { IR_FRAC
, "IR_FRAC", OPCODE_FRC
, 4, 1 },
90 { IR_ABS
, "IR_ABS", OPCODE_ABS
, 4, 1 },
91 { IR_NEG
, "IR_NEG", OPCODE_NOP
, 4, 1 }, /* special case: emit_negation() */
92 { IR_DDX
, "IR_DDX", OPCODE_DDX
, 4, 1 },
93 { IR_DDX
, "IR_DDY", OPCODE_DDX
, 4, 1 },
94 { IR_SIN
, "IR_SIN", OPCODE_SIN
, 1, 1 },
95 { IR_COS
, "IR_COS", OPCODE_COS
, 1, 1 },
96 { IR_NOISE1
, "IR_NOISE1", OPCODE_NOISE1
, 1, 1 },
97 { IR_NOISE2
, "IR_NOISE2", OPCODE_NOISE2
, 1, 1 },
98 { IR_NOISE3
, "IR_NOISE3", OPCODE_NOISE3
, 1, 1 },
99 { IR_NOISE4
, "IR_NOISE4", OPCODE_NOISE4
, 1, 1 },
102 { IR_SEQ
, "IR_SEQ", OPCODE_NOP
, 0, 0 },
103 { IR_SCOPE
, "IR_SCOPE", OPCODE_NOP
, 0, 0 },
104 { IR_LABEL
, "IR_LABEL", OPCODE_NOP
, 0, 0 },
105 { IR_JUMP
, "IR_JUMP", OPCODE_NOP
, 0, 0 },
106 { IR_CJUMP0
, "IR_CJUMP0", OPCODE_NOP
, 0, 0 },
107 { IR_CJUMP1
, "IR_CJUMP1", OPCODE_NOP
, 0, 0 },
108 { IR_IF
, "IR_IF", OPCODE_NOP
, 0, 0 },
109 { IR_KILL
, "IR_KILL", OPCODE_NOP
, 0, 0 },
110 { IR_COND
, "IR_COND", OPCODE_NOP
, 0, 0 },
111 { IR_CALL
, "IR_CALL", OPCODE_NOP
, 0, 0 },
112 { IR_MOVE
, "IR_MOVE", OPCODE_NOP
, 0, 1 },
113 { IR_NOT
, "IR_NOT", OPCODE_NOP
, 1, 1 },
114 { IR_VAR
, "IR_VAR", OPCODE_NOP
, 0, 0 },
115 { IR_VAR_DECL
, "IR_VAR_DECL", OPCODE_NOP
, 0, 0 },
116 { IR_TEX
, "IR_TEX", OPCODE_TEX
, 4, 1 },
117 { IR_TEXB
, "IR_TEXB", OPCODE_TXB
, 4, 1 },
118 { IR_TEXP
, "IR_TEXP", OPCODE_TXP
, 4, 1 },
119 { IR_FLOAT
, "IR_FLOAT", OPCODE_NOP
, 0, 0 },
120 { IR_FIELD
, "IR_FIELD", OPCODE_NOP
, 0, 0 },
121 { IR_ELEMENT
, "IR_ELEMENT", OPCODE_NOP
, 0, 0 },
122 { IR_SWIZZLE
, "IR_SWIZZLE", OPCODE_NOP
, 0, 0 },
123 { IR_NOP
, NULL
, OPCODE_NOP
, 0, 0 }
127 static slang_ir_info
*
128 slang_find_ir_info(slang_ir_opcode opcode
)
131 for (i
= 0; IrInfo
[i
].IrName
; i
++) {
132 if (IrInfo
[i
].IrOpcode
== opcode
) {
140 slang_ir_name(slang_ir_opcode opcode
)
142 return slang_find_ir_info(opcode
)->IrName
;
147 * Swizzle a swizzle. That is, return swz2(swz1)
150 swizzle_swizzle(GLuint swz1
, GLuint swz2
)
153 for (i
= 0; i
< 4; i
++) {
154 GLuint c
= GET_SWZ(swz2
, i
);
155 s
[i
] = GET_SWZ(swz1
, c
);
157 swz
= MAKE_SWIZZLE4(s
[0], s
[1], s
[2], s
[3]);
163 _slang_new_ir_storage(enum register_file file
, GLint index
, GLint size
)
165 slang_ir_storage
*st
;
166 st
= (slang_ir_storage
*) _mesa_calloc(sizeof(slang_ir_storage
));
171 st
->Swizzle
= SWIZZLE_NOOP
;
178 swizzle_string(GLuint swizzle
)
183 for (i
= 1; i
< 5; i
++) {
184 s
[i
] = "xyzw"[GET_SWZ(swizzle
, i
-1)];
191 writemask_string(GLuint writemask
)
196 for (i
= 0; i
< 4; i
++) {
197 if (writemask
& (1 << i
))
205 storage_string(const slang_ir_storage
*st
)
207 static const char *files
[] = {
225 sprintf(s
, "%s[%d]", files
[st
->File
], st
->Index
);
227 sprintf(s
, "%s[%d..%d]", files
[st
->File
], st
->Index
,
228 st
->Index
+ st
->Size
- 1);
230 assert(st
->File
< (GLint
) (sizeof(files
) / sizeof(files
[0])));
231 sprintf(s
, "%s[%d]", files
[st
->File
], st
->Index
);
246 slang_print_ir(const slang_ir_node
*n
, int indent
)
251 if (n
->Opcode
!= IR_SEQ
)
253 printf("%3d:", indent
);
260 printf("SEQ at %p\n", (void*) n
);
262 assert(n
->Children
[0]);
263 assert(n
->Children
[1]);
264 slang_print_ir(n
->Children
[0], indent
+ IND
);
265 slang_print_ir(n
->Children
[1], indent
+ IND
);
268 printf("NEW SCOPE\n");
269 assert(!n
->Children
[1]);
270 slang_print_ir(n
->Children
[0], indent
+ 3);
273 printf("MOVE (writemask = %s)\n", writemask_string(n
->Writemask
));
274 slang_print_ir(n
->Children
[0], indent
+3);
275 slang_print_ir(n
->Children
[1], indent
+3);
278 printf("LABEL: %s\n", n
->Target
);
282 slang_print_ir(n
->Children
[0], indent
+ 3);
285 printf("JUMP %s\n", n
->Target
);
288 printf("CJUMP0 %s\n", n
->Target
);
289 slang_print_ir(n
->Children
[0], indent
+3);
292 printf("CJUMP1 %s\n", n
->Target
);
293 slang_print_ir(n
->Children
[0], indent
+3);
298 slang_print_ir(n
->Children
[0], indent
+3);
301 slang_print_ir(n
->Children
[1], indent
+3);
302 if (n
->Children
[2]) {
305 slang_print_ir(n
->Children
[2], indent
+3);
311 printf("BEGIN_SUB\n");
325 slang_print_ir(n
->Children
[0], indent
+3);
337 printf("VAR %s%s at %s store %p\n",
338 (n
->Var
? (char *) n
->Var
->a_name
: "TEMP"),
339 swizzle_string(n
->Store
->Swizzle
),
340 storage_string(n
->Store
), (void*) n
->Store
);
343 printf("VAR_DECL %s (%p) at %s store %p\n",
344 (n
->Var
? (char *) n
->Var
->a_name
: "TEMP"),
345 (void*) n
->Var
, storage_string(n
->Store
),
349 printf("FIELD %s of\n", n
->Target
);
350 slang_print_ir(n
->Children
[0], indent
+3);
353 printf("FLOAT %f %f %f %f\n",
354 n
->Value
[0], n
->Value
[1], n
->Value
[2], n
->Value
[3]);
357 printf("INT_TO_FLOAT %d\n", (int) n
->Value
[0]);
360 printf("SWIZZLE %s of (store %p) \n",
361 swizzle_string(n
->Store
->Swizzle
), (void*) n
->Store
);
362 slang_print_ir(n
->Children
[0], indent
+ 3);
365 printf("%s (%p, %p) (store %p)\n", slang_ir_name(n
->Opcode
),
366 (void*) n
->Children
[0], (void*) n
->Children
[1], (void*) n
->Store
);
367 slang_print_ir(n
->Children
[0], indent
+3);
368 slang_print_ir(n
->Children
[1], indent
+3);
374 * Allocate temporary storage for an intermediate result (such as for
375 * a multiply or add, etc.
378 alloc_temp_storage(slang_var_table
*vt
, slang_ir_node
*n
, GLint size
)
383 n
->Store
= _slang_new_ir_storage(PROGRAM_TEMPORARY
, -1, size
);
384 if (!_slang_alloc_temp(vt
, n
->Store
)) {
385 RETURN_ERROR("Ran out of registers, too many temporaries", 0);
392 * Free temporary storage, if n->Store is, in fact, temp storage.
396 free_temp_storage(slang_var_table
*vt
, slang_ir_node
*n
)
398 if (n
->Store
->File
== PROGRAM_TEMPORARY
&& n
->Store
->Index
>= 0) {
399 if (_slang_is_temp(vt
, n
->Store
)) {
400 _slang_free_temp(vt
, n
->Store
);
401 n
->Store
->Index
= -1;
409 * Convert IR storage to an instruction dst register.
412 storage_to_dst_reg(struct prog_dst_register
*dst
, const slang_ir_storage
*st
,
415 static const GLuint defaultWritemask
[4] = {
417 WRITEMASK_X
| WRITEMASK_Y
,
418 WRITEMASK_X
| WRITEMASK_Y
| WRITEMASK_Z
,
419 WRITEMASK_X
| WRITEMASK_Y
| WRITEMASK_Z
| WRITEMASK_W
421 assert(st
->Index
>= 0 && st
->Index
<= 16);
422 dst
->File
= st
->File
;
423 dst
->Index
= st
->Index
;
424 assert(st
->File
!= PROGRAM_UNDEFINED
);
425 assert(st
->Size
>= 1);
426 assert(st
->Size
<= 4);
428 GLuint comp
= GET_SWZ(st
->Swizzle
, 0);
430 assert(writemask
& WRITEMASK_X
);
431 dst
->WriteMask
= WRITEMASK_X
<< comp
;
434 dst
->WriteMask
= defaultWritemask
[st
->Size
- 1] & writemask
;
440 * Convert IR storage to an instruction src register.
443 storage_to_src_reg(struct prog_src_register
*src
, const slang_ir_storage
*st
)
445 static const GLuint defaultSwizzle
[4] = {
446 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
447 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
),
448 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
),
449 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
)
451 assert(st
->File
>= 0 && st
->File
<= 16);
452 src
->File
= st
->File
;
453 src
->Index
= st
->Index
;
454 assert(st
->File
!= PROGRAM_UNDEFINED
);
455 assert(st
->Size
>= 1);
456 assert(st
->Size
<= 4);
457 if (st
->Swizzle
!= SWIZZLE_NOOP
)
458 src
->Swizzle
= st
->Swizzle
;
460 src
->Swizzle
= defaultSwizzle
[st
->Size
- 1]; /*XXX really need this?*/
462 assert(GET_SWZ(src
->Swizzle
, 0) != SWIZZLE_NIL
);
463 assert(GET_SWZ(src
->Swizzle
, 1) != SWIZZLE_NIL
);
464 assert(GET_SWZ(src
->Swizzle
, 2) != SWIZZLE_NIL
);
465 assert(GET_SWZ(src
->Swizzle
, 3) != SWIZZLE_NIL
);
471 * Add new instruction at end of given program.
472 * \param prog the program to append instruction onto
473 * \param opcode opcode for the new instruction
474 * \return pointer to the new instruction
476 static struct prog_instruction
*
477 new_instruction(struct gl_program
*prog
, gl_inst_opcode opcode
)
479 struct prog_instruction
*inst
;
480 prog
->Instructions
= _mesa_realloc_instructions(prog
->Instructions
,
481 prog
->NumInstructions
,
482 prog
->NumInstructions
+ 1);
483 inst
= prog
->Instructions
+ prog
->NumInstructions
;
484 prog
->NumInstructions
++;
485 _mesa_init_instructions(inst
, 1);
486 inst
->Opcode
= opcode
;
492 * Return pointer to last instruction in program.
494 static struct prog_instruction
*
495 prev_instruction(struct gl_program
*prog
)
497 if (prog
->NumInstructions
== 0)
500 return prog
->Instructions
+ prog
->NumInstructions
- 1;
504 static struct prog_instruction
*
505 emit(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
);
509 * Return an annotation string for given node's storage.
512 storage_annotation(const slang_ir_node
*n
, const struct gl_program
*prog
)
515 const slang_ir_storage
*st
= n
->Store
;
516 static char s
[100] = "";
519 return _mesa_strdup("");
522 case PROGRAM_CONSTANT
:
523 if (st
->Index
>= 0) {
524 const GLfloat
*val
= prog
->Parameters
->ParameterValues
[st
->Index
];
525 if (st
->Swizzle
== SWIZZLE_NOOP
)
526 sprintf(s
, "{%f, %f, %f, %f}", val
[0], val
[1], val
[2], val
[3]);
528 sprintf(s
, "%f", val
[GET_SWZ(st
->Swizzle
, 0)]);
532 case PROGRAM_TEMPORARY
:
534 sprintf(s
, "%s", (char *) n
->Var
->a_name
);
536 sprintf(s
, "t[%d]", st
->Index
);
538 case PROGRAM_STATE_VAR
:
539 case PROGRAM_UNIFORM
:
540 sprintf(s
, "%s", prog
->Parameters
->Parameters
[st
->Index
].Name
);
542 case PROGRAM_VARYING
:
543 sprintf(s
, "%s", prog
->Varying
->Parameters
[st
->Index
].Name
);
546 sprintf(s
, "input[%d]", st
->Index
);
549 sprintf(s
, "output[%d]", st
->Index
);
554 return _mesa_strdup(s
);
562 * Return an annotation string for an instruction.
565 instruction_annotation(gl_inst_opcode opcode
, char *dstAnnot
,
566 char *srcAnnot0
, char *srcAnnot1
, char *srcAnnot2
)
569 const char *operator;
574 len
+= strlen(dstAnnot
);
576 dstAnnot
= _mesa_strdup("");
579 len
+= strlen(srcAnnot0
);
581 srcAnnot0
= _mesa_strdup("");
584 len
+= strlen(srcAnnot1
);
586 srcAnnot1
= _mesa_strdup("");
589 len
+= strlen(srcAnnot2
);
591 srcAnnot2
= _mesa_strdup("");
622 s
= (char *) malloc(len
);
623 sprintf(s
, "%s = %s %s %s %s", dstAnnot
,
624 srcAnnot0
, operator, srcAnnot1
, srcAnnot2
);
625 assert(_mesa_strlen(s
) < len
);
641 * Generate code for a simple arithmetic instruction.
642 * Either 1, 2 or 3 operands.
644 static struct prog_instruction
*
645 emit_arith(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
647 struct prog_instruction
*inst
;
648 const slang_ir_info
*info
= slang_find_ir_info(n
->Opcode
);
649 char *srcAnnot
[3], *dstAnnot
;
653 assert(info
->InstOpcode
!= OPCODE_NOP
);
655 srcAnnot
[0] = srcAnnot
[1] = srcAnnot
[2] = dstAnnot
= NULL
;
657 #if PEEPHOLE_OPTIMIZATIONS
658 /* Look for MAD opportunity */
659 if (info
->NumParams
== 2 &&
660 n
->Opcode
== IR_ADD
&& n
->Children
[0]->Opcode
== IR_MUL
) {
661 /* found pattern IR_ADD(IR_MUL(A, B), C) */
662 emit(vt
, n
->Children
[0]->Children
[0], prog
); /* A */
663 emit(vt
, n
->Children
[0]->Children
[1], prog
); /* B */
664 emit(vt
, n
->Children
[1], prog
); /* C */
665 /* generate MAD instruction */
666 inst
= new_instruction(prog
, OPCODE_MAD
);
667 /* operands: A, B, C: */
668 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Children
[0]->Store
);
669 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[0]->Children
[1]->Store
);
670 storage_to_src_reg(&inst
->SrcReg
[2], n
->Children
[1]->Store
);
671 free_temp_storage(vt
, n
->Children
[0]->Children
[0]);
672 free_temp_storage(vt
, n
->Children
[0]->Children
[1]);
673 free_temp_storage(vt
, n
->Children
[1]);
675 else if (info
->NumParams
== 2 &&
676 n
->Opcode
== IR_ADD
&& n
->Children
[1]->Opcode
== IR_MUL
) {
677 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
678 emit(vt
, n
->Children
[0], prog
); /* A */
679 emit(vt
, n
->Children
[1]->Children
[0], prog
); /* B */
680 emit(vt
, n
->Children
[1]->Children
[1], prog
); /* C */
681 /* generate MAD instruction */
682 inst
= new_instruction(prog
, OPCODE_MAD
);
683 /* operands: B, C, A */
684 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Children
[0]->Store
);
685 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Children
[1]->Store
);
686 storage_to_src_reg(&inst
->SrcReg
[2], n
->Children
[0]->Store
);
687 free_temp_storage(vt
, n
->Children
[1]->Children
[0]);
688 free_temp_storage(vt
, n
->Children
[1]->Children
[1]);
689 free_temp_storage(vt
, n
->Children
[0]);
696 /* gen code for children */
697 for (i
= 0; i
< info
->NumParams
; i
++)
698 emit(vt
, n
->Children
[i
], prog
);
700 /* gen this instruction and src registers */
701 inst
= new_instruction(prog
, info
->InstOpcode
);
702 for (i
= 0; i
< info
->NumParams
; i
++)
703 storage_to_src_reg(&inst
->SrcReg
[i
], n
->Children
[i
]->Store
);
706 for (i
= 0; i
< info
->NumParams
; i
++)
707 srcAnnot
[i
] = storage_annotation(n
->Children
[i
], prog
);
710 for (i
= 0; i
< info
->NumParams
; i
++)
711 free_temp_storage(vt
, n
->Children
[i
]);
716 if (!alloc_temp_storage(vt
, n
, info
->ResultSize
))
719 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
721 dstAnnot
= storage_annotation(n
, prog
);
723 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
, srcAnnot
[0],
724 srcAnnot
[1], srcAnnot
[2]);
726 /*_mesa_print_instruction(inst);*/
732 * Generate code for an IR_CLAMP instruction.
734 static struct prog_instruction
*
735 emit_clamp(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
737 struct prog_instruction
*inst
;
739 assert(n
->Opcode
== IR_CLAMP
);
745 inst
= emit(vt
, n
->Children
[0], prog
);
747 /* If lower limit == 0.0 and upper limit == 1.0,
748 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
750 * emit OPCODE_MIN, OPCODE_MAX sequence.
753 /* XXX this isn't quite finished yet */
754 if (n
->Children
[1]->Opcode
== IR_FLOAT
&&
755 n
->Children
[1]->Value
[0] == 0.0 &&
756 n
->Children
[1]->Value
[1] == 0.0 &&
757 n
->Children
[1]->Value
[2] == 0.0 &&
758 n
->Children
[1]->Value
[3] == 0.0 &&
759 n
->Children
[2]->Opcode
== IR_FLOAT
&&
760 n
->Children
[2]->Value
[0] == 1.0 &&
761 n
->Children
[2]->Value
[1] == 1.0 &&
762 n
->Children
[2]->Value
[2] == 1.0 &&
763 n
->Children
[2]->Value
[3] == 1.0) {
765 inst
= prev_instruction(prog
);
767 if (inst
&& inst
->Opcode
!= OPCODE_NOP
) {
768 /* and prev instruction's DstReg matches n->Children[0]->Store */
769 inst
->SaturateMode
= SATURATE_ZERO_ONE
;
770 n
->Store
= n
->Children
[0]->Store
;
777 if (!alloc_temp_storage(vt
, n
, n
->Children
[0]->Store
->Size
))
780 emit(vt
, n
->Children
[1], prog
);
781 emit(vt
, n
->Children
[2], prog
);
783 /* tmp = max(ch[0], ch[1]) */
784 inst
= new_instruction(prog
, OPCODE_MAX
);
785 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
786 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
787 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[1]->Store
);
789 /* tmp = min(tmp, ch[2]) */
790 inst
= new_instruction(prog
, OPCODE_MIN
);
791 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
792 storage_to_src_reg(&inst
->SrcReg
[0], n
->Store
);
793 storage_to_src_reg(&inst
->SrcReg
[1], n
->Children
[2]->Store
);
799 static struct prog_instruction
*
800 emit_negation(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
802 /* Implement as MOV dst, -src; */
803 /* XXX we could look at the previous instruction and in some circumstances
804 * modify it to accomplish the negation.
806 struct prog_instruction
*inst
;
808 emit(vt
, n
->Children
[0], prog
);
811 if (!alloc_temp_storage(vt
, n
, n
->Children
[0]->Store
->Size
))
814 inst
= new_instruction(prog
, OPCODE_MOV
);
815 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
816 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
817 inst
->SrcReg
[0].NegateBase
= NEGATE_XYZW
;
818 inst
->Comment
= n
->Comment
;
823 static struct prog_instruction
*
824 emit_label(const char *target
, struct gl_program
*prog
)
826 struct prog_instruction
*inst
;
827 inst
= new_instruction(prog
, OPCODE_NOP
);
828 inst
->Comment
= _mesa_strdup(target
);
833 static struct prog_instruction
*
834 emit_cjump(const char *target
, struct gl_program
*prog
, GLuint zeroOrOne
)
836 struct prog_instruction
*inst
;
837 inst
= new_instruction(prog
, OPCODE_BRA
);
839 inst
->DstReg
.CondMask
= COND_NE
; /* branch if non-zero */
841 inst
->DstReg
.CondMask
= COND_EQ
; /* branch if equal to zero */
842 inst
->DstReg
.CondSwizzle
= SWIZZLE_X
;
843 inst
->Comment
= _mesa_strdup(target
);
848 static struct prog_instruction
*
849 emit_jump(const char *target
, struct gl_program
*prog
)
851 struct prog_instruction
*inst
;
852 inst
= new_instruction(prog
, OPCODE_BRA
);
853 inst
->DstReg
.CondMask
= COND_TR
; /* always branch */
854 /*inst->DstReg.CondSwizzle = SWIZZLE_X;*/
855 inst
->Comment
= _mesa_strdup(target
);
860 static struct prog_instruction
*
861 emit_kill(struct gl_program
*prog
)
863 struct prog_instruction
*inst
;
864 /* NV-KILL - discard fragment depending on condition code.
865 * Note that ARB-KILL depends on sign of vector operand.
867 inst
= new_instruction(prog
, OPCODE_KIL_NV
);
868 inst
->DstReg
.CondMask
= COND_TR
; /* always branch */
873 static struct prog_instruction
*
874 emit_tex(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
876 struct prog_instruction
*inst
;
877 if (n
->Opcode
== IR_TEX
) {
878 inst
= new_instruction(prog
, OPCODE_TEX
);
880 else if (n
->Opcode
== IR_TEXB
) {
881 inst
= new_instruction(prog
, OPCODE_TXB
);
884 assert(n
->Opcode
== IR_TEXP
);
885 inst
= new_instruction(prog
, OPCODE_TXP
);
889 if (!alloc_temp_storage(vt
, n
, 4))
892 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
894 (void) emit(vt
, n
->Children
[1], prog
);
896 /* Child[1] is the coord */
897 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Store
);
899 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
900 assert(n
->Children
[0]->Store
);
901 assert(n
->Children
[0]->Store
->Size
>= TEXTURE_1D_INDEX
);
903 inst
->Sampler
= n
->Children
[0]->Store
->Index
; /* i.e. uniform's index */
904 inst
->TexSrcTarget
= n
->Children
[0]->Store
->Size
;
905 inst
->TexSrcUnit
= 27; /* Dummy value; the TexSrcUnit will be computed at
906 * link time, using the sampler uniform's value.
912 static struct prog_instruction
*
913 emit_move(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
915 struct prog_instruction
*inst
;
918 assert(n
->Children
[1]);
919 inst
= emit(vt
, n
->Children
[1], prog
);
921 assert(n
->Children
[1]->Store
->Index
>= 0);
924 emit(vt
, n
->Children
[0], prog
);
927 n
->Store
= n
->Children
[0]->Store
;
929 #if PEEPHOLE_OPTIMIZATIONS
930 if (inst
&& _slang_is_temp(vt
, n
->Children
[1]->Store
)) {
931 /* Peephole optimization:
932 * Just modify the RHS to put its result into the dest of this
933 * MOVE operation. Then, this MOVE is a no-op.
935 _slang_free_temp(vt
, n
->Children
[1]->Store
);
936 *n
->Children
[1]->Store
= *n
->Children
[0]->Store
;
937 /* fixup the prev (RHS) instruction */
938 assert(n
->Children
[0]->Store
->Index
>= 0);
939 assert(n
->Children
[0]->Store
->Index
< 16);
940 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
, n
->Writemask
);
946 if (n
->Children
[0]->Store
->Size
> 4) {
947 /* move matrix/struct etc (block of registers) */
948 slang_ir_storage dstStore
= *n
->Children
[0]->Store
;
949 slang_ir_storage srcStore
= *n
->Children
[1]->Store
;
950 GLint size
= srcStore
.Size
;
951 ASSERT(n
->Children
[0]->Writemask
== WRITEMASK_XYZW
);
952 ASSERT(n
->Children
[1]->Store
->Swizzle
== SWIZZLE_NOOP
);
956 inst
= new_instruction(prog
, OPCODE_MOV
);
957 inst
->Comment
= _mesa_strdup("IR_MOVE block");
958 storage_to_dst_reg(&inst
->DstReg
, &dstStore
, n
->Writemask
);
959 storage_to_src_reg(&inst
->SrcReg
[0], &srcStore
);
966 /* single register move */
967 char *srcAnnot
, *dstAnnot
;
968 inst
= new_instruction(prog
, OPCODE_MOV
);
969 assert(n
->Children
[0]->Store
->Index
>= 0);
970 assert(n
->Children
[0]->Store
->Index
< 16);
971 storage_to_dst_reg(&inst
->DstReg
, n
->Children
[0]->Store
, n
->Writemask
);
972 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[1]->Store
);
973 dstAnnot
= storage_annotation(n
->Children
[0], prog
);
974 srcAnnot
= storage_annotation(n
->Children
[1], prog
);
975 inst
->Comment
= instruction_annotation(inst
->Opcode
, dstAnnot
,
976 srcAnnot
, NULL
, NULL
);
978 free_temp_storage(vt
, n
->Children
[1]);
984 static struct prog_instruction
*
985 emit_cond(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
987 /* Conditional expression (in if/while/for stmts).
988 * Need to update condition code register.
989 * Next instruction is typically an IR_CJUMP0/1.
991 /* last child expr instruction: */
992 struct prog_instruction
*inst
= emit(vt
, n
->Children
[0], prog
);
994 /* set inst's CondUpdate flag */
995 inst
->CondUpdate
= GL_TRUE
;
996 return inst
; /* XXX or null? */
999 /* This'll happen for things like "if (i) ..." where no code
1000 * is normally generated for the expression "i".
1001 * Generate a move instruction just to set condition codes.
1002 * Note: must use full 4-component vector since all four
1003 * condition codes must be set identically.
1005 if (!alloc_temp_storage(vt
, n
, 4))
1007 inst
= new_instruction(prog
, OPCODE_MOV
);
1008 inst
->CondUpdate
= GL_TRUE
;
1009 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
1010 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
1011 _slang_free_temp(vt
, n
->Store
);
1012 inst
->Comment
= _mesa_strdup("COND expr");
1013 return inst
; /* XXX or null? */
1021 static struct prog_instruction
*
1022 emit_not(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
1025 slang_ir_storage st
;
1026 struct prog_instruction
*inst
;
1028 /* need zero constant */
1029 st
.File
= PROGRAM_CONSTANT
;
1031 st
.Index
= _mesa_add_unnamed_constant(prog
->Parameters
, &zero
,
1035 (void) emit(vt
, n
->Children
[0], prog
);
1036 /* XXXX if child instr is SGT convert to SLE, if SEQ, SNE, etc */
1039 if (!alloc_temp_storage(vt
, n
, n
->Children
[0]->Store
->Size
))
1042 inst
= new_instruction(prog
, OPCODE_SEQ
);
1043 storage_to_dst_reg(&inst
->DstReg
, n
->Store
, n
->Writemask
);
1044 storage_to_src_reg(&inst
->SrcReg
[0], n
->Children
[0]->Store
);
1045 storage_to_src_reg(&inst
->SrcReg
[1], &st
);
1047 free_temp_storage(vt
, n
->Children
[0]);
1049 inst
->Comment
= _mesa_strdup("NOT");
1054 static struct prog_instruction
*
1055 emit_if(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
1057 struct prog_instruction
*ifInst
;
1058 GLuint ifInstLoc
, elseInstLoc
;
1060 emit(vt
, n
->Children
[0], prog
); /* the condition */
1061 ifInstLoc
= prog
->NumInstructions
;
1062 ifInst
= new_instruction(prog
, OPCODE_IF
);
1063 ifInst
->DstReg
.CondMask
= COND_NE
; /* if cond is non-zero */
1064 ifInst
->DstReg
.CondSwizzle
= SWIZZLE_X
;
1067 emit(vt
, n
->Children
[1], prog
);
1069 if (n
->Children
[2]) {
1071 elseInstLoc
= prog
->NumInstructions
;
1072 (void) new_instruction(prog
, OPCODE_ELSE
);
1073 ifInst
= prog
->Instructions
+ ifInstLoc
;
1074 ifInst
->BranchTarget
= prog
->NumInstructions
;
1076 emit(vt
, n
->Children
[2], prog
);
1079 ifInst
= prog
->Instructions
+ ifInstLoc
;
1080 ifInst
->BranchTarget
= prog
->NumInstructions
+ 1;
1083 (void) new_instruction(prog
, OPCODE_ENDIF
);
1084 if (n
->Children
[2]) {
1085 struct prog_instruction
*elseInst
;
1086 elseInst
= prog
->Instructions
+ elseInstLoc
;
1087 elseInst
->BranchTarget
= prog
->NumInstructions
;
1094 * Remove any SWIZZLE_NIL terms from given swizzle mask (smear prev term).
1095 * Ex: fix_swizzle("zyNN") -> "zyyy"
1098 fix_swizzle(GLuint swizzle
)
1101 for (i
= 0; i
< 4; i
++) {
1102 swz
[i
] = GET_SWZ(swizzle
, i
);
1103 if (swz
[i
] == SWIZZLE_NIL
) {
1104 swz
[i
] = swz
[i
- 1];
1107 return MAKE_SWIZZLE4(swz
[0], swz
[1], swz
[2], swz
[3]);
1111 static struct prog_instruction
*
1112 emit_swizzle(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
1116 /* swizzled storage access */
1117 (void) emit(vt
, n
->Children
[0], prog
);
1119 /* "pull-up" the child's storage info, applying our swizzle info */
1120 n
->Store
->File
= n
->Children
[0]->Store
->File
;
1121 n
->Store
->Index
= n
->Children
[0]->Store
->Index
;
1122 n
->Store
->Size
= n
->Children
[0]->Store
->Size
;
1123 /*n->Var = n->Children[0]->Var; XXX for debug */
1124 assert(n
->Store
->Index
>= 0);
1126 swizzle
= fix_swizzle(n
->Store
->Swizzle
);
1129 GLuint s
= n
->Children
[0]->Store
->Swizzle
;
1130 assert(GET_SWZ(s
, 0) != SWIZZLE_NIL
);
1131 assert(GET_SWZ(s
, 1) != SWIZZLE_NIL
);
1132 assert(GET_SWZ(s
, 2) != SWIZZLE_NIL
);
1133 assert(GET_SWZ(s
, 3) != SWIZZLE_NIL
);
1137 /* apply this swizzle to child's swizzle to get composed swizzle */
1138 n
->Store
->Swizzle
= swizzle_swizzle(n
->Children
[0]->Store
->Swizzle
,
1144 static struct prog_instruction
*
1145 emit(slang_var_table
*vt
, slang_ir_node
*n
, struct gl_program
*prog
)
1147 struct prog_instruction
*inst
;
1151 switch (n
->Opcode
) {
1153 /* sequence of two sub-trees */
1154 assert(n
->Children
[0]);
1155 assert(n
->Children
[1]);
1156 emit(vt
, n
->Children
[0], prog
);
1157 inst
= emit(vt
, n
->Children
[1], prog
);
1159 n
->Store
= n
->Children
[1]->Store
;
1163 /* new variable scope */
1164 _slang_push_var_table(vt
);
1165 inst
= emit(vt
, n
->Children
[0], prog
);
1166 _slang_pop_var_table(vt
);
1170 /* Variable declaration - allocate a register for it */
1172 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1173 assert(n
->Store
->Size
> 0);
1174 assert(n
->Store
->Index
< 0);
1175 if (!n
->Var
|| n
->Var
->isTemp
) {
1176 /* a nameless/temporary variable, will be freed after first use */
1177 if (!_slang_alloc_temp(vt
, n
->Store
))
1178 RETURN_ERROR("Ran out of registers, too many temporaries", 0);
1181 /* a regular variable */
1182 _slang_add_variable(vt
, n
->Var
);
1183 if (!_slang_alloc_var(vt
, n
->Store
))
1184 RETURN_ERROR("Ran out of registers, too many variables", 0);
1186 printf("IR_VAR_DECL %s %d store %p\n",
1187 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
1189 assert(n
->Var
->aux
== n
->Store
);
1194 /* Reference to a variable
1195 * Storage should have already been resolved/allocated.
1198 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1199 if (n
->Store
->Index
< 0) {
1200 printf("#### VAR %s not allocated!\n", (char*)n
->Var
->a_name
);
1202 assert(n
->Store
->Index
>= 0);
1203 assert(n
->Store
->Size
> 0);
1207 /* Dereference array element. Just resolve storage for the array
1208 * element represented by this node.
1211 assert(n
->Store
->File
!= PROGRAM_UNDEFINED
);
1212 assert(n
->Store
->Size
> 0);
1213 if (n
->Children
[1]->Opcode
== IR_FLOAT
) {
1214 /* OK, constant index */
1215 const GLint arrayAddr
= n
->Children
[0]->Store
->Index
;
1216 const GLint index
= (GLint
) n
->Children
[1]->Value
[0];
1217 n
->Store
->Index
= arrayAddr
+ index
;
1220 /* Problem: variable index */
1221 const GLint arrayAddr
= n
->Children
[0]->Store
->Index
;
1222 const GLint index
= 0;
1223 _mesa_problem(NULL
, "variable array indexes not supported yet!");
1224 n
->Store
->Index
= arrayAddr
+ index
;
1226 return NULL
; /* no instruction */
1229 return emit_swizzle(vt
, n
, prog
);
1231 /* Simple arithmetic */
1263 /* trinary operators */
1265 return emit_arith(vt
, n
, prog
);
1267 return emit_clamp(vt
, n
, prog
);
1271 return emit_tex(vt
, n
, prog
);
1273 return emit_negation(vt
, n
, prog
);
1275 /* find storage location for this float constant */
1276 n
->Store
->Index
= _mesa_add_unnamed_constant(prog
->Parameters
, n
->Value
,
1278 &n
->Store
->Swizzle
);
1279 if (n
->Store
->Index
< 0) {
1280 RETURN_ERROR("Ran out of space for constants.", 0);
1285 return emit_move(vt
, n
, prog
);
1288 return emit_cond(vt
, n
, prog
);
1291 return emit_not(vt
, n
, prog
);
1294 return emit_label(n
->Target
, prog
);
1296 return emit_jump(n
->Target
, prog
);
1298 return emit_cjump(n
->Target
, prog
, 0);
1300 return emit_cjump(n
->Target
, prog
, 1);
1302 return emit_kill(prog
);
1305 return emit_if(vt
, n
, prog
);
1309 struct prog_instruction
*beginInst
, *endInst
;
1310 GLuint beginInstLoc
, endInstLoc
;
1313 /* emit OPCODE_BGNLOOP */
1314 beginInstLoc
= prog
->NumInstructions
;
1315 if (EmitHighLevelInstructions
) {
1316 (void) new_instruction(prog
, OPCODE_BGNLOOP
);
1320 emit(vt
, n
->Children
[0], prog
);
1322 endInstLoc
= prog
->NumInstructions
;
1323 if (EmitHighLevelInstructions
) {
1324 /* emit OPCODE_ENDLOOP */
1325 endInst
= new_instruction(prog
, OPCODE_ENDLOOP
);
1328 /* emit unconditional BRA-nch */
1329 endInst
= new_instruction(prog
, OPCODE_BRA
);
1330 endInst
->DstReg
.CondMask
= COND_TR
; /* always true */
1332 /* end instruction's BranchTarget points to top of loop */
1333 endInst
->BranchTarget
= beginInstLoc
;
1335 if (EmitHighLevelInstructions
) {
1336 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1337 beginInst
= prog
->Instructions
+ beginInstLoc
;
1338 beginInst
->BranchTarget
= prog
->NumInstructions
- 1;
1341 /* Done emitting loop code. Now walk over the loop's linked list
1342 * of BREAK and CONT nodes, filling in their BranchTarget fields
1343 * (which will point to the ENDLOOP or ENDLOOP+1 instructions).
1345 for (ir
= n
->BranchNode
; ir
; ir
= ir
->BranchNode
) {
1346 struct prog_instruction
*inst
1347 = prog
->Instructions
+ ir
->InstLocation
;
1348 if (ir
->Opcode
== IR_BREAK
) {
1349 assert(inst
->Opcode
== OPCODE_BRK
||
1350 inst
->Opcode
== OPCODE_BRA
);
1351 inst
->BranchTarget
= endInstLoc
+ 1;
1354 assert(ir
->Opcode
== IR_CONT
);
1355 assert(inst
->Opcode
== OPCODE_CONT
||
1356 inst
->Opcode
== OPCODE_BRA
);
1357 inst
->BranchTarget
= endInstLoc
;
1366 gl_inst_opcode opcode
;
1367 struct prog_instruction
*inst
;
1368 n
->InstLocation
= prog
->NumInstructions
;
1369 if (EmitHighLevelInstructions
) {
1370 opcode
= (n
->Opcode
== IR_CONT
) ? OPCODE_CONT
: OPCODE_BRK
;
1373 opcode
= OPCODE_BRA
;
1375 inst
= new_instruction(prog
, opcode
);
1376 inst
->DstReg
.CondMask
= COND_TR
; /* always true */
1381 return new_instruction(prog
, OPCODE_BGNSUB
);
1383 return new_instruction(prog
, OPCODE_ENDSUB
);
1385 return new_instruction(prog
, OPCODE_RET
);
1391 _mesa_problem(NULL
, "Unexpected IR opcode in emit()\n");
1399 _slang_emit_code(slang_ir_node
*n
, slang_var_table
*vt
,
1400 struct gl_program
*prog
, GLboolean withEnd
)
1404 if (emit(vt
, n
, prog
)) {
1405 /* finish up by adding the END opcode to program */
1407 struct prog_instruction
*inst
;
1408 inst
= new_instruction(prog
, OPCODE_END
);
1413 /* record an error? */
1417 printf("*********** End generate code (%u inst):\n", prog
->NumInstructions
);
1419 _mesa_print_program(prog
);
1420 _mesa_print_program_parameters(ctx
,prog
);