2 * Mesa 3-D graphics library
5 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/macros.h"
31 #include "prog_instruction.h"
32 #include "prog_optimize.h"
33 #include "prog_print.h"
36 #define MAX_LOOP_NESTING 50
39 static GLboolean dbg
= GL_FALSE
;
43 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
44 * \return number of instructions removed
47 remove_instructions(struct gl_program
*prog
, const GLboolean
*removeFlags
)
49 GLint i
, removeEnd
= 0, removeCount
= 0;
50 GLuint totalRemoved
= 0;
53 for (i
= prog
->NumInstructions
- 1; i
>= 0; i
--) {
56 if (removeCount
== 0) {
57 /* begin a run of instructions to remove */
62 /* extend the run of instructions to remove */
67 /* don't remove this instruction, but check if the preceeding
68 * instructions are to be removed.
70 if (removeCount
> 0) {
71 GLint removeStart
= removeEnd
- removeCount
+ 1;
72 _mesa_delete_instructions(prog
, removeStart
, removeCount
);
73 removeStart
= removeCount
= 0; /* reset removal info */
82 * Remap register indexes according to map.
83 * \param prog the program to search/replace
84 * \param file the type of register file to search/replace
85 * \param map maps old register indexes to new indexes
88 replace_regs(struct gl_program
*prog
, gl_register_file file
, const GLint map
[])
92 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
93 struct prog_instruction
*inst
= prog
->Instructions
+ i
;
94 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
96 for (j
= 0; j
< numSrc
; j
++) {
97 if (inst
->SrcReg
[j
].File
== file
) {
98 GLuint index
= inst
->SrcReg
[j
].Index
;
99 ASSERT(map
[index
] >= 0);
100 inst
->SrcReg
[j
].Index
= map
[index
];
103 if (inst
->DstReg
.File
== file
) {
104 const GLuint index
= inst
->DstReg
.Index
;
105 ASSERT(map
[index
] >= 0);
106 inst
->DstReg
.Index
= map
[index
];
113 * Consolidate temporary registers to use low numbers. For example, if the
114 * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
117 _mesa_consolidate_registers(struct gl_program
*prog
)
119 GLboolean tempUsed
[MAX_PROGRAM_TEMPS
];
120 GLint tempMap
[MAX_PROGRAM_TEMPS
];
121 GLuint tempMax
= 0, i
;
124 _mesa_printf("Optimize: Begin register consolidation\n");
127 memset(tempUsed
, 0, sizeof(tempUsed
));
129 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++) {
133 /* set tempUsed[i] if temporary [i] is referenced */
134 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
135 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
136 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
138 for (j
= 0; j
< numSrc
; j
++) {
139 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
140 const GLuint index
= inst
->SrcReg
[j
].Index
;
141 ASSERT(index
< MAX_PROGRAM_TEMPS
);
142 tempUsed
[index
] = GL_TRUE
;
143 tempMax
= MAX2(tempMax
, index
);
147 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
148 const GLuint index
= inst
->DstReg
.Index
;
149 ASSERT(index
< MAX_PROGRAM_TEMPS
);
150 tempUsed
[index
] = GL_TRUE
;
151 tempMax
= MAX2(tempMax
, index
);
155 /* allocate a new index for each temp that's used */
158 for (i
= 0; i
<= tempMax
; i
++) {
160 tempMap
[i
] = freeTemp
++;
161 /*_mesa_printf("replace %u with %u\n", i, tempMap[i]);*/
164 if (freeTemp
== tempMax
+ 1) {
165 /* no consolidation possible */
169 _mesa_printf("Replace regs 0..%u with 0..%u\n", tempMax
, freeTemp
-1);
173 replace_regs(prog
, PROGRAM_TEMPORARY
, tempMap
);
176 _mesa_printf("Optimize: End register consolidation\n");
182 * Remove dead instructions from the given program.
183 * This is very primitive for now. Basically look for temp registers
184 * that are written to but never read. Remove any instructions that
185 * write to such registers. Be careful with condition code setters.
188 _mesa_remove_dead_code(struct gl_program
*prog
)
190 GLboolean tempWritten
[MAX_PROGRAM_TEMPS
], tempRead
[MAX_PROGRAM_TEMPS
];
191 GLboolean
*removeInst
; /* per-instruction removal flag */
194 memset(tempWritten
, 0, sizeof(tempWritten
));
195 memset(tempRead
, 0, sizeof(tempRead
));
198 _mesa_printf("Optimize: Begin dead code removal\n");
199 /*_mesa_print_program(prog);*/
202 removeInst
= (GLboolean
*)
203 _mesa_calloc(prog
->NumInstructions
* sizeof(GLboolean
));
205 /* Determine which temps are read and written */
206 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
207 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
208 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
212 for (j
= 0; j
< numSrc
; j
++) {
213 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
214 const GLuint index
= inst
->SrcReg
[j
].Index
;
215 ASSERT(index
< MAX_PROGRAM_TEMPS
);
217 if (inst
->SrcReg
[j
].RelAddr
) {
219 _mesa_printf("abort remove dead code (indirect temp)\n");
223 tempRead
[index
] = GL_TRUE
;
228 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
229 const GLuint index
= inst
->DstReg
.Index
;
230 ASSERT(index
< MAX_PROGRAM_TEMPS
);
232 if (inst
->DstReg
.RelAddr
) {
234 _mesa_printf("abort remove dead code (indirect temp)\n");
238 tempWritten
[index
] = GL_TRUE
;
239 if (inst
->CondUpdate
) {
240 /* If we're writing to this register and setting condition
241 * codes we cannot remove the instruction. Prevent removal
242 * by setting the 'read' flag.
244 tempRead
[index
] = GL_TRUE
;
250 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++) {
251 if (tempWritten
[i
] && !tempRead
[i
])
252 _mesa_printf("Remove writes to tmp %u\n", i
);
256 /* find instructions that write to dead registers, flag for removal */
257 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
258 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
259 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
260 GLint index
= inst
->DstReg
.Index
;
261 removeInst
[i
] = (tempWritten
[index
] && !tempRead
[index
]);
262 if (dbg
&& removeInst
[i
]) {
263 _mesa_printf("Remove inst %u: ", i
);
264 _mesa_print_instruction(inst
);
269 /* now remove the instructions which aren't needed */
270 rem
= remove_instructions(prog
, removeInst
);
272 _mesa_free(removeInst
);
275 _mesa_printf("Optimize: End dead code removal. %u instructions removed\n", rem
);
276 /*_mesa_print_program(prog);*/
290 * Scan forward in program from 'start' for the next occurance of TEMP[index].
291 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
292 * that we can't look further.
295 find_next_temp_use(const struct gl_program
*prog
, GLuint start
, GLuint index
)
299 for (i
= start
; i
< prog
->NumInstructions
; i
++) {
300 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
301 switch (inst
->Opcode
) {
309 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
311 for (j
= 0; j
< numSrc
; j
++) {
312 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
&&
313 inst
->SrcReg
[j
].Index
== index
)
316 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
317 inst
->DstReg
.Index
== index
)
328 * Try to remove extraneous MOV instructions from the given program.
331 _mesa_remove_extra_moves(struct gl_program
*prog
)
333 GLboolean
*removeInst
; /* per-instruction removal flag */
334 GLuint i
, rem
, loopNesting
= 0, subroutineNesting
= 0;
337 _mesa_printf("Optimize: Begin remove extra moves\n");
338 _mesa_print_program(prog
);
341 removeInst
= (GLboolean
*)
342 _mesa_calloc(prog
->NumInstructions
* sizeof(GLboolean
));
345 * Look for sequences such as this:
346 * FOO tmpX, arg0, arg1;
349 * FOO tmpY, arg0, arg1;
352 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
353 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
355 switch (inst
->Opcode
) {
371 subroutineNesting
== 0 &&
372 inst
->SrcReg
[0].File
== PROGRAM_TEMPORARY
&&
373 inst
->SrcReg
[0].Swizzle
== SWIZZLE_XYZW
) {
374 /* see if this MOV can be removed */
375 const GLuint tempIndex
= inst
->SrcReg
[0].Index
;
376 struct prog_instruction
*prevInst
;
379 /* get pointer to previous instruction */
381 while (removeInst
[prevI
] && prevI
> 0)
383 prevInst
= prog
->Instructions
+ prevI
;
385 if (prevInst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
386 prevInst
->DstReg
.Index
== tempIndex
&&
387 prevInst
->DstReg
.WriteMask
== WRITEMASK_XYZW
) {
389 enum temp_use next_use
=
390 find_next_temp_use(prog
, i
+ 1, tempIndex
);
392 if (next_use
== WRITE
|| next_use
== END
) {
393 /* OK, we can safely remove this MOV instruction.
395 * prevI: FOO tempIndex, x, y;
396 * i: MOV z, tempIndex;
398 * prevI: FOO z, x, y;
401 /* patch up prev inst */
402 prevInst
->DstReg
.File
= inst
->DstReg
.File
;
403 prevInst
->DstReg
.Index
= inst
->DstReg
.Index
;
405 /* flag this instruction for removal */
406 removeInst
[i
] = GL_TRUE
;
409 _mesa_printf("Remove MOV at %u\n", i
);
410 _mesa_printf("new prev inst %u: ", prevI
);
411 _mesa_print_instruction(prevInst
);
422 /* now remove the instructions which aren't needed */
423 rem
= remove_instructions(prog
, removeInst
);
426 _mesa_printf("Optimize: End remove extra moves. %u instructions removed\n", rem
);
427 /*_mesa_print_program(prog);*/
432 /** A live register interval */
435 GLuint Reg
; /** The temporary register index */
436 GLuint Start
, End
; /** Start/end instruction numbers */
440 /** A list of register intervals */
444 struct interval Intervals
[MAX_PROGRAM_TEMPS
];
449 append_interval(struct interval_list
*list
, const struct interval
*inv
)
451 list
->Intervals
[list
->Num
++] = *inv
;
455 /** Insert interval inv into list, sorted by interval end */
457 insert_interval_by_end(struct interval_list
*list
, const struct interval
*inv
)
459 /* XXX we could do a binary search insertion here since list is sorted */
460 GLint i
= list
->Num
- 1;
461 while (i
>= 0 && list
->Intervals
[i
].End
> inv
->End
) {
462 list
->Intervals
[i
+ 1] = list
->Intervals
[i
];
465 list
->Intervals
[i
+ 1] = *inv
;
471 for (i
= 0; i
+ 1 < list
->Num
; i
++) {
472 ASSERT(list
->Intervals
[i
].End
<= list
->Intervals
[i
+ 1].End
);
479 /** Remove the given interval from the interval list */
481 remove_interval(struct interval_list
*list
, const struct interval
*inv
)
483 /* XXX we could binary search since list is sorted */
485 for (k
= 0; k
< list
->Num
; k
++) {
486 if (list
->Intervals
[k
].Reg
== inv
->Reg
) {
487 /* found, remove it */
488 ASSERT(list
->Intervals
[k
].Start
== inv
->Start
);
489 ASSERT(list
->Intervals
[k
].End
== inv
->End
);
490 while (k
< list
->Num
- 1) {
491 list
->Intervals
[k
] = list
->Intervals
[k
+ 1];
501 /** called by qsort() */
503 compare_start(const void *a
, const void *b
)
505 const struct interval
*ia
= (const struct interval
*) a
;
506 const struct interval
*ib
= (const struct interval
*) b
;
507 if (ia
->Start
< ib
->Start
)
509 else if (ia
->Start
> ib
->Start
)
515 /** sort the interval list according to interval starts */
517 sort_interval_list_by_start(struct interval_list
*list
)
519 qsort(list
->Intervals
, list
->Num
, sizeof(struct interval
), compare_start
);
523 for (i
= 0; i
+ 1 < list
->Num
; i
++) {
524 ASSERT(list
->Intervals
[i
].Start
<= list
->Intervals
[i
+ 1].Start
);
532 * Update the intermediate interval info for register 'index' and
536 update_interval(GLint intBegin
[], GLint intEnd
[], GLuint index
, GLuint ic
)
538 ASSERT(index
< MAX_PROGRAM_TEMPS
);
539 if (intBegin
[index
] == -1) {
540 ASSERT(intEnd
[index
] == -1);
541 intBegin
[index
] = intEnd
[index
] = ic
;
550 * Find first/last instruction that references each temporary register.
553 _mesa_find_temp_intervals(const struct prog_instruction
*instructions
,
554 GLuint numInstructions
,
555 GLint intBegin
[MAX_PROGRAM_TEMPS
],
556 GLint intEnd
[MAX_PROGRAM_TEMPS
])
560 GLuint Start
, End
; /**< Start, end instructions of loop */
562 struct loop_info loopStack
[MAX_LOOP_NESTING
];
563 GLuint loopStackDepth
= 0;
566 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++){
567 intBegin
[i
] = intEnd
[i
] = -1;
570 /* Scan instructions looking for temporary registers */
571 for (i
= 0; i
< numInstructions
; i
++) {
572 const struct prog_instruction
*inst
= instructions
+ i
;
573 if (inst
->Opcode
== OPCODE_BGNLOOP
) {
574 loopStack
[loopStackDepth
].Start
= i
;
575 loopStack
[loopStackDepth
].End
= inst
->BranchTarget
;
578 else if (inst
->Opcode
== OPCODE_ENDLOOP
) {
581 else if (inst
->Opcode
== OPCODE_CAL
) {
585 const GLuint numSrc
= 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
587 for (j
= 0; j
< numSrc
; j
++) {
588 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
589 const GLuint index
= inst
->SrcReg
[j
].Index
;
590 if (inst
->SrcReg
[j
].RelAddr
)
592 update_interval(intBegin
, intEnd
, index
, i
);
593 if (loopStackDepth
> 0) {
594 /* extend temp register's interval to end of loop */
595 GLuint loopEnd
= loopStack
[loopStackDepth
- 1].End
;
596 update_interval(intBegin
, intEnd
, index
, loopEnd
);
600 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
601 const GLuint index
= inst
->DstReg
.Index
;
602 if (inst
->DstReg
.RelAddr
)
604 update_interval(intBegin
, intEnd
, index
, i
);
605 if (loopStackDepth
> 0) {
606 /* extend temp register's interval to end of loop */
607 GLuint loopEnd
= loopStack
[loopStackDepth
- 1].End
;
608 update_interval(intBegin
, intEnd
, index
, loopEnd
);
619 * Find the live intervals for each temporary register in the program.
620 * For register R, the interval [A,B] indicates that R is referenced
621 * from instruction A through instruction B.
622 * Special consideration is needed for loops and subroutines.
623 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
626 find_live_intervals(struct gl_program
*prog
,
627 struct interval_list
*liveIntervals
)
629 GLint intBegin
[MAX_PROGRAM_TEMPS
], intEnd
[MAX_PROGRAM_TEMPS
];
633 * Note: we'll return GL_FALSE below if we find relative indexing
634 * into the TEMP register file. We can't handle that yet.
635 * We also give up on subroutines for now.
639 _mesa_printf("Optimize: Begin find intervals\n");
642 /* build intermediate arrays */
643 if (!_mesa_find_temp_intervals(prog
->Instructions
, prog
->NumInstructions
,
647 /* Build live intervals list from intermediate arrays */
648 liveIntervals
->Num
= 0;
649 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++) {
650 if (intBegin
[i
] >= 0) {
653 inv
.Start
= intBegin
[i
];
655 append_interval(liveIntervals
, &inv
);
659 /* Sort the list according to interval starts */
660 sort_interval_list_by_start(liveIntervals
);
663 /* print interval info */
664 for (i
= 0; i
< liveIntervals
->Num
; i
++) {
665 const struct interval
*inv
= liveIntervals
->Intervals
+ i
;
666 _mesa_printf("Reg[%d] live [%d, %d]:",
667 inv
->Reg
, inv
->Start
, inv
->End
);
670 for (j
= 0; j
< inv
->Start
; j
++)
672 for (j
= inv
->Start
; j
<= inv
->End
; j
++)
683 /** Scan the array of used register flags to find free entry */
685 alloc_register(GLboolean usedRegs
[MAX_PROGRAM_TEMPS
])
688 for (k
= 0; k
< MAX_PROGRAM_TEMPS
; k
++) {
690 usedRegs
[k
] = GL_TRUE
;
699 * This function implements "Linear Scan Register Allocation" to reduce
700 * the number of temporary registers used by the program.
702 * We compute the "live interval" for all temporary registers then
703 * examine the overlap of the intervals to allocate new registers.
704 * Basically, if two intervals do not overlap, they can use the same register.
707 _mesa_reallocate_registers(struct gl_program
*prog
)
709 struct interval_list liveIntervals
;
710 GLint registerMap
[MAX_PROGRAM_TEMPS
];
711 GLboolean usedRegs
[MAX_PROGRAM_TEMPS
];
716 _mesa_printf("Optimize: Begin live-interval register reallocation\n");
717 _mesa_print_program(prog
);
720 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++){
722 usedRegs
[i
] = GL_FALSE
;
725 if (!find_live_intervals(prog
, &liveIntervals
)) {
727 _mesa_printf("Aborting register reallocation\n");
732 struct interval_list activeIntervals
;
733 activeIntervals
.Num
= 0;
735 /* loop over live intervals, allocating a new register for each */
736 for (i
= 0; i
< liveIntervals
.Num
; i
++) {
737 const struct interval
*live
= liveIntervals
.Intervals
+ i
;
740 _mesa_printf("Consider register %u\n", live
->Reg
);
742 /* Expire old intervals. Intervals which have ended with respect
743 * to the live interval can have their remapped registers freed.
747 for (j
= 0; j
< activeIntervals
.Num
; j
++) {
748 const struct interval
*inv
= activeIntervals
.Intervals
+ j
;
749 if (inv
->End
>= live
->Start
) {
750 /* Stop now. Since the activeInterval list is sorted
751 * we know we don't have to go further.
756 /* Interval 'inv' has expired */
757 const GLint regNew
= registerMap
[inv
->Reg
];
761 _mesa_printf(" expire interval for reg %u\n", inv
->Reg
);
763 /* remove interval j from active list */
764 remove_interval(&activeIntervals
, inv
);
765 j
--; /* counter-act j++ in for-loop above */
767 /* return register regNew to the free pool */
769 _mesa_printf(" free reg %d\n", regNew
);
770 ASSERT(usedRegs
[regNew
] == GL_TRUE
);
771 usedRegs
[regNew
] = GL_FALSE
;
776 /* find a free register for this live interval */
778 const GLint k
= alloc_register(usedRegs
);
780 /* out of registers, give up */
783 registerMap
[live
->Reg
] = k
;
784 maxTemp
= MAX2(maxTemp
, k
);
786 _mesa_printf(" remap register %u -> %d\n", live
->Reg
, k
);
789 /* Insert this live interval into the active list which is sorted
790 * by increasing end points.
792 insert_interval_by_end(&activeIntervals
, live
);
796 if (maxTemp
+ 1 < liveIntervals
.Num
) {
797 /* OK, we've reduced the number of registers needed.
798 * Scan the program and replace all the old temporary register
799 * indexes with the new indexes.
801 replace_regs(prog
, PROGRAM_TEMPORARY
, registerMap
);
803 prog
->NumTemporaries
= maxTemp
+ 1;
807 _mesa_printf("Optimize: End live-interval register reallocation\n");
808 _mesa_printf("Num temp regs before: %u after: %u\n",
809 liveIntervals
.Num
, maxTemp
+ 1);
810 _mesa_print_program(prog
);
816 * Apply optimizations to the given program to eliminate unnecessary
817 * instructions, temp regs, etc.
820 _mesa_optimize_program(GLcontext
*ctx
, struct gl_program
*program
)
823 _mesa_remove_dead_code(program
);
825 if (0) /* not tested much yet */
826 _mesa_remove_extra_moves(program
);
829 _mesa_consolidate_registers(program
);
831 _mesa_reallocate_registers(program
);