2 * Mesa 3-D graphics library
5 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/macros.h"
31 #include "prog_instruction.h"
32 #include "prog_optimize.h"
33 #include "prog_print.h"
36 #define MAX_LOOP_NESTING 50
37 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
38 * register allocate many temporary values into that small number of
39 * temps. So allow large temporary indices coming into the register
42 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
44 static GLboolean dbg
= GL_FALSE
;
46 /* Returns the mask of channels read from the given srcreg in this instruction.
49 get_src_arg_mask(const struct prog_instruction
*inst
, int arg
)
51 int writemask
= inst
->DstReg
.WriteMask
;
54 writemask
= WRITEMASK_XYZW
;
56 switch (inst
->Opcode
) {
76 return WRITEMASK_XYZW
;
81 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
82 * \return number of instructions removed
85 remove_instructions(struct gl_program
*prog
, const GLboolean
*removeFlags
)
87 GLint i
, removeEnd
= 0, removeCount
= 0;
88 GLuint totalRemoved
= 0;
91 for (i
= prog
->NumInstructions
- 1; i
>= 0; i
--) {
94 if (removeCount
== 0) {
95 /* begin a run of instructions to remove */
100 /* extend the run of instructions to remove */
105 /* don't remove this instruction, but check if the preceeding
106 * instructions are to be removed.
108 if (removeCount
> 0) {
109 GLint removeStart
= removeEnd
- removeCount
+ 1;
110 _mesa_delete_instructions(prog
, removeStart
, removeCount
);
111 removeStart
= removeCount
= 0; /* reset removal info */
115 /* Finish removing if the first instruction was to be removed. */
116 if (removeCount
> 0) {
117 GLint removeStart
= removeEnd
- removeCount
+ 1;
118 _mesa_delete_instructions(prog
, removeStart
, removeCount
);
125 * Remap register indexes according to map.
126 * \param prog the program to search/replace
127 * \param file the type of register file to search/replace
128 * \param map maps old register indexes to new indexes
131 replace_regs(struct gl_program
*prog
, gl_register_file file
, const GLint map
[])
135 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
136 struct prog_instruction
*inst
= prog
->Instructions
+ i
;
137 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
139 for (j
= 0; j
< numSrc
; j
++) {
140 if (inst
->SrcReg
[j
].File
== file
) {
141 GLuint index
= inst
->SrcReg
[j
].Index
;
142 ASSERT(map
[index
] >= 0);
143 inst
->SrcReg
[j
].Index
= map
[index
];
146 if (inst
->DstReg
.File
== file
) {
147 const GLuint index
= inst
->DstReg
.Index
;
148 ASSERT(map
[index
] >= 0);
149 inst
->DstReg
.Index
= map
[index
];
156 * Consolidate temporary registers to use low numbers. For example, if the
157 * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
160 _mesa_consolidate_registers(struct gl_program
*prog
)
162 GLboolean tempUsed
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
163 GLint tempMap
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
164 GLuint tempMax
= 0, i
;
167 printf("Optimize: Begin register consolidation\n");
170 memset(tempUsed
, 0, sizeof(tempUsed
));
172 for (i
= 0; i
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
; i
++) {
176 /* set tempUsed[i] if temporary [i] is referenced */
177 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
178 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
179 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
181 for (j
= 0; j
< numSrc
; j
++) {
182 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
183 const GLuint index
= inst
->SrcReg
[j
].Index
;
184 ASSERT(index
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
);
185 tempUsed
[index
] = GL_TRUE
;
186 tempMax
= MAX2(tempMax
, index
);
190 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
191 const GLuint index
= inst
->DstReg
.Index
;
192 ASSERT(index
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
);
193 tempUsed
[index
] = GL_TRUE
;
194 tempMax
= MAX2(tempMax
, index
);
198 /* allocate a new index for each temp that's used */
201 for (i
= 0; i
<= tempMax
; i
++) {
203 tempMap
[i
] = freeTemp
++;
204 /*printf("replace %u with %u\n", i, tempMap[i]);*/
207 if (freeTemp
== tempMax
+ 1) {
208 /* no consolidation possible */
212 printf("Replace regs 0..%u with 0..%u\n", tempMax
, freeTemp
-1);
216 replace_regs(prog
, PROGRAM_TEMPORARY
, tempMap
);
219 printf("Optimize: End register consolidation\n");
225 * Remove dead instructions from the given program.
226 * This is very primitive for now. Basically look for temp registers
227 * that are written to but never read. Remove any instructions that
228 * write to such registers. Be careful with condition code setters.
231 _mesa_remove_dead_code(struct gl_program
*prog
)
233 GLboolean tempRead
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
][4];
234 GLboolean
*removeInst
; /* per-instruction removal flag */
235 GLuint i
, rem
= 0, comp
;
237 memset(tempRead
, 0, sizeof(tempRead
));
240 printf("Optimize: Begin dead code removal\n");
241 /*_mesa_print_program(prog);*/
244 removeInst
= (GLboolean
*)
245 calloc(1, prog
->NumInstructions
* sizeof(GLboolean
));
247 /* Determine which temps are read and written */
248 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
249 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
250 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
254 for (j
= 0; j
< numSrc
; j
++) {
255 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
256 const GLuint index
= inst
->SrcReg
[j
].Index
;
258 ASSERT(index
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
);
259 read_mask
= get_src_arg_mask(inst
, j
);
261 if (inst
->SrcReg
[j
].RelAddr
) {
263 printf("abort remove dead code (indirect temp)\n");
267 for (comp
= 0; comp
< 4; comp
++) {
268 GLuint swz
= (inst
->SrcReg
[j
].Swizzle
>> (3 * comp
)) & 0x7;
270 if ((read_mask
& (1 << comp
)) == 0)
275 tempRead
[index
][0] = GL_TRUE
;
278 tempRead
[index
][1] = GL_TRUE
;
281 tempRead
[index
][2] = GL_TRUE
;
284 tempRead
[index
][3] = GL_TRUE
;
292 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
293 const GLuint index
= inst
->DstReg
.Index
;
294 ASSERT(index
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
);
296 if (inst
->DstReg
.RelAddr
) {
298 printf("abort remove dead code (indirect temp)\n");
302 if (inst
->CondUpdate
) {
303 /* If we're writing to this register and setting condition
304 * codes we cannot remove the instruction. Prevent removal
305 * by setting the 'read' flag.
307 tempRead
[index
][0] = GL_TRUE
;
308 tempRead
[index
][1] = GL_TRUE
;
309 tempRead
[index
][2] = GL_TRUE
;
310 tempRead
[index
][3] = GL_TRUE
;
315 /* find instructions that write to dead registers, flag for removal */
316 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
317 struct prog_instruction
*inst
= prog
->Instructions
+ i
;
318 const GLuint numDst
= _mesa_num_inst_dst_regs(inst
->Opcode
);
320 if (numDst
!= 0 && inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
321 GLint chan
, index
= inst
->DstReg
.Index
;
323 for (chan
= 0; chan
< 4; chan
++) {
324 if (!tempRead
[index
][chan
] &&
325 inst
->DstReg
.WriteMask
& (1 << chan
)) {
327 printf("Remove writemask on %u.%c\n", i
,
328 chan
== 3 ? 'w' : 'x' + chan
);
330 inst
->DstReg
.WriteMask
&= ~(1 << chan
);
335 if (inst
->DstReg
.WriteMask
== 0) {
336 /* If we cleared all writes, the instruction can be removed. */
338 printf("Remove instruction %u: \n", i
);
339 removeInst
[i
] = GL_TRUE
;
344 /* now remove the instructions which aren't needed */
345 rem
= remove_instructions(prog
, removeInst
);
348 printf("Optimize: End dead code removal.\n");
349 printf(" %u channel writes removed\n", rem
);
350 printf(" %u instructions removed\n", rem
);
351 /*_mesa_print_program(prog);*/
368 * Scan forward in program from 'start' for the next occurance of TEMP[index].
369 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
370 * that we can't look further.
373 find_next_temp_use(const struct gl_program
*prog
, GLuint start
, GLuint index
)
377 for (i
= start
; i
< prog
->NumInstructions
; i
++) {
378 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
379 switch (inst
->Opcode
) {
387 const GLuint numSrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
389 for (j
= 0; j
< numSrc
; j
++) {
390 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
&&
391 inst
->SrcReg
[j
].Index
== index
)
394 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
395 inst
->DstReg
.Index
== index
)
404 static GLboolean
_mesa_is_flow_control_opcode(enum prog_opcode opcode
)
426 * Try to remove use of extraneous MOV instructions, to free them up for dead
430 _mesa_remove_extra_move_use(struct gl_program
*prog
)
435 printf("Optimize: Begin remove extra move use\n");
436 _mesa_print_program(prog
);
440 * Look for sequences such as this:
443 * FOO tmpY, tmpX, arg1;
447 * FOO tmpY, arg0, arg1;
450 for (i
= 0; i
+ 1 < prog
->NumInstructions
; i
++) {
451 const struct prog_instruction
*mov
= prog
->Instructions
+ i
;
453 if (mov
->Opcode
!= OPCODE_MOV
||
454 mov
->DstReg
.File
!= PROGRAM_TEMPORARY
||
455 mov
->DstReg
.RelAddr
||
456 mov
->DstReg
.CondMask
!= COND_TR
||
457 mov
->SaturateMode
!= SATURATE_OFF
||
458 mov
->SrcReg
[0].RelAddr
)
461 /* Walk through remaining instructions until the or src reg gets
462 * rewritten or we get into some flow-control, eliminating the use of
465 for (j
= i
+ 1; j
< prog
->NumInstructions
; j
++) {
466 struct prog_instruction
*inst2
= prog
->Instructions
+ j
;
469 if (_mesa_is_flow_control_opcode(inst2
->Opcode
))
472 /* First rewrite this instruction's args if appropriate. */
473 for (arg
= 0; arg
< _mesa_num_inst_src_regs(inst2
->Opcode
); arg
++) {
475 int read_mask
= get_src_arg_mask(inst2
, arg
);
477 if (inst2
->SrcReg
[arg
].File
!= mov
->DstReg
.File
||
478 inst2
->SrcReg
[arg
].Index
!= mov
->DstReg
.Index
||
479 inst2
->SrcReg
[arg
].RelAddr
||
480 inst2
->SrcReg
[arg
].Abs
)
483 /* Check that all the sources for this arg of inst2 come from inst1
486 for (comp
= 0; comp
< 4; comp
++) {
487 int src_swz
= GET_SWZ(inst2
->SrcReg
[arg
].Swizzle
, comp
);
489 /* If the MOV didn't write that channel, can't use it. */
490 if ((read_mask
& (1 << comp
)) &&
491 src_swz
<= SWIZZLE_W
&&
492 (mov
->DstReg
.WriteMask
& (1 << src_swz
)) == 0)
498 /* Adjust the swizzles of inst2 to point at MOV's source */
499 for (comp
= 0; comp
< 4; comp
++) {
500 int inst2_swz
= GET_SWZ(inst2
->SrcReg
[arg
].Swizzle
, comp
);
502 if (inst2_swz
<= SWIZZLE_W
) {
503 GLuint s
= GET_SWZ(mov
->SrcReg
[0].Swizzle
, inst2_swz
);
504 inst2
->SrcReg
[arg
].Swizzle
&= ~(7 << (3 * comp
));
505 inst2
->SrcReg
[arg
].Swizzle
|= s
<< (3 * comp
);
506 inst2
->SrcReg
[arg
].Negate
^= (((mov
->SrcReg
[0].Negate
>>
507 inst2_swz
) & 0x1) << comp
);
510 inst2
->SrcReg
[arg
].File
= mov
->SrcReg
[0].File
;
511 inst2
->SrcReg
[arg
].Index
= mov
->SrcReg
[0].Index
;
514 /* If this instruction overwrote part of the move, our time is up. */
515 if ((inst2
->DstReg
.File
== mov
->DstReg
.File
&&
516 (inst2
->DstReg
.RelAddr
||
517 inst2
->DstReg
.Index
== mov
->DstReg
.Index
)) ||
518 (inst2
->DstReg
.File
== mov
->SrcReg
[0].File
&&
519 (inst2
->DstReg
.RelAddr
||
520 inst2
->DstReg
.Index
== mov
->SrcReg
[0].Index
)))
526 printf("Optimize: End remove extra move use.\n");
527 /*_mesa_print_program(prog);*/
532 * Try to remove extraneous MOV instructions from the given program.
535 _mesa_remove_extra_moves(struct gl_program
*prog
)
537 GLboolean
*removeInst
; /* per-instruction removal flag */
538 GLuint i
, rem
, loopNesting
= 0, subroutineNesting
= 0;
541 printf("Optimize: Begin remove extra moves\n");
542 _mesa_print_program(prog
);
545 removeInst
= (GLboolean
*)
546 calloc(1, prog
->NumInstructions
* sizeof(GLboolean
));
549 * Look for sequences such as this:
550 * FOO tmpX, arg0, arg1;
553 * FOO tmpY, arg0, arg1;
556 for (i
= 0; i
< prog
->NumInstructions
; i
++) {
557 const struct prog_instruction
*inst
= prog
->Instructions
+ i
;
559 switch (inst
->Opcode
) {
575 subroutineNesting
== 0 &&
576 inst
->SrcReg
[0].File
== PROGRAM_TEMPORARY
&&
577 inst
->SrcReg
[0].Swizzle
== SWIZZLE_XYZW
) {
578 /* see if this MOV can be removed */
579 const GLuint tempIndex
= inst
->SrcReg
[0].Index
;
580 struct prog_instruction
*prevInst
;
583 /* get pointer to previous instruction */
585 while (prevI
> 0 && removeInst
[prevI
])
587 prevInst
= prog
->Instructions
+ prevI
;
589 if (prevInst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
590 prevInst
->DstReg
.Index
== tempIndex
&&
591 prevInst
->DstReg
.WriteMask
== WRITEMASK_XYZW
) {
593 enum temp_use next_use
=
594 find_next_temp_use(prog
, i
+ 1, tempIndex
);
596 if (next_use
== WRITE
|| next_use
== END
) {
597 /* OK, we can safely remove this MOV instruction.
599 * prevI: FOO tempIndex, x, y;
600 * i: MOV z, tempIndex;
602 * prevI: FOO z, x, y;
605 /* patch up prev inst */
606 prevInst
->DstReg
.File
= inst
->DstReg
.File
;
607 prevInst
->DstReg
.Index
= inst
->DstReg
.Index
;
609 /* flag this instruction for removal */
610 removeInst
[i
] = GL_TRUE
;
613 printf("Remove MOV at %u\n", i
);
614 printf("new prev inst %u: ", prevI
);
615 _mesa_print_instruction(prevInst
);
626 /* now remove the instructions which aren't needed */
627 rem
= remove_instructions(prog
, removeInst
);
632 printf("Optimize: End remove extra moves. %u instructions removed\n", rem
);
633 /*_mesa_print_program(prog);*/
638 /** A live register interval */
641 GLuint Reg
; /** The temporary register index */
642 GLuint Start
, End
; /** Start/end instruction numbers */
646 /** A list of register intervals */
650 struct interval Intervals
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
655 append_interval(struct interval_list
*list
, const struct interval
*inv
)
657 list
->Intervals
[list
->Num
++] = *inv
;
661 /** Insert interval inv into list, sorted by interval end */
663 insert_interval_by_end(struct interval_list
*list
, const struct interval
*inv
)
665 /* XXX we could do a binary search insertion here since list is sorted */
666 GLint i
= list
->Num
- 1;
667 while (i
>= 0 && list
->Intervals
[i
].End
> inv
->End
) {
668 list
->Intervals
[i
+ 1] = list
->Intervals
[i
];
671 list
->Intervals
[i
+ 1] = *inv
;
677 for (i
= 0; i
+ 1 < list
->Num
; i
++) {
678 ASSERT(list
->Intervals
[i
].End
<= list
->Intervals
[i
+ 1].End
);
685 /** Remove the given interval from the interval list */
687 remove_interval(struct interval_list
*list
, const struct interval
*inv
)
689 /* XXX we could binary search since list is sorted */
691 for (k
= 0; k
< list
->Num
; k
++) {
692 if (list
->Intervals
[k
].Reg
== inv
->Reg
) {
693 /* found, remove it */
694 ASSERT(list
->Intervals
[k
].Start
== inv
->Start
);
695 ASSERT(list
->Intervals
[k
].End
== inv
->End
);
696 while (k
< list
->Num
- 1) {
697 list
->Intervals
[k
] = list
->Intervals
[k
+ 1];
707 /** called by qsort() */
709 compare_start(const void *a
, const void *b
)
711 const struct interval
*ia
= (const struct interval
*) a
;
712 const struct interval
*ib
= (const struct interval
*) b
;
713 if (ia
->Start
< ib
->Start
)
715 else if (ia
->Start
> ib
->Start
)
721 /** sort the interval list according to interval starts */
723 sort_interval_list_by_start(struct interval_list
*list
)
725 qsort(list
->Intervals
, list
->Num
, sizeof(struct interval
), compare_start
);
729 for (i
= 0; i
+ 1 < list
->Num
; i
++) {
730 ASSERT(list
->Intervals
[i
].Start
<= list
->Intervals
[i
+ 1].Start
);
738 GLuint Start
, End
; /**< Start, end instructions of loop */
742 * Update the intermediate interval info for register 'index' and
746 update_interval(GLint intBegin
[], GLint intEnd
[],
747 struct loop_info
*loopStack
, GLuint loopStackDepth
,
748 GLuint index
, GLuint ic
)
752 /* If the register is used in a loop, extend its lifetime through the end
753 * of the outermost loop that doesn't contain its definition.
755 for (i
= 0; i
< loopStackDepth
; i
++) {
756 if (intBegin
[index
] < loopStack
[i
].Start
) {
757 ic
= loopStack
[i
].End
;
762 ASSERT(index
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
);
763 if (intBegin
[index
] == -1) {
764 ASSERT(intEnd
[index
] == -1);
765 intBegin
[index
] = intEnd
[index
] = ic
;
774 * Find first/last instruction that references each temporary register.
777 _mesa_find_temp_intervals(const struct prog_instruction
*instructions
,
778 GLuint numInstructions
,
779 GLint intBegin
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
],
780 GLint intEnd
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
])
782 struct loop_info loopStack
[MAX_LOOP_NESTING
];
783 GLuint loopStackDepth
= 0;
786 for (i
= 0; i
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
; i
++){
787 intBegin
[i
] = intEnd
[i
] = -1;
790 /* Scan instructions looking for temporary registers */
791 for (i
= 0; i
< numInstructions
; i
++) {
792 const struct prog_instruction
*inst
= instructions
+ i
;
793 if (inst
->Opcode
== OPCODE_BGNLOOP
) {
794 loopStack
[loopStackDepth
].Start
= i
;
795 loopStack
[loopStackDepth
].End
= inst
->BranchTarget
;
798 else if (inst
->Opcode
== OPCODE_ENDLOOP
) {
801 else if (inst
->Opcode
== OPCODE_CAL
) {
805 const GLuint numSrc
= 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
807 for (j
= 0; j
< numSrc
; j
++) {
808 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
809 const GLuint index
= inst
->SrcReg
[j
].Index
;
810 if (inst
->SrcReg
[j
].RelAddr
)
812 update_interval(intBegin
, intEnd
, loopStack
, loopStackDepth
,
816 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
817 const GLuint index
= inst
->DstReg
.Index
;
818 if (inst
->DstReg
.RelAddr
)
820 update_interval(intBegin
, intEnd
, loopStack
, loopStackDepth
,
831 * Find the live intervals for each temporary register in the program.
832 * For register R, the interval [A,B] indicates that R is referenced
833 * from instruction A through instruction B.
834 * Special consideration is needed for loops and subroutines.
835 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
838 find_live_intervals(struct gl_program
*prog
,
839 struct interval_list
*liveIntervals
)
841 GLint intBegin
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
842 GLint intEnd
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
846 * Note: we'll return GL_FALSE below if we find relative indexing
847 * into the TEMP register file. We can't handle that yet.
848 * We also give up on subroutines for now.
852 printf("Optimize: Begin find intervals\n");
855 /* build intermediate arrays */
856 if (!_mesa_find_temp_intervals(prog
->Instructions
, prog
->NumInstructions
,
860 /* Build live intervals list from intermediate arrays */
861 liveIntervals
->Num
= 0;
862 for (i
= 0; i
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
; i
++) {
863 if (intBegin
[i
] >= 0) {
866 inv
.Start
= intBegin
[i
];
868 append_interval(liveIntervals
, &inv
);
872 /* Sort the list according to interval starts */
873 sort_interval_list_by_start(liveIntervals
);
876 /* print interval info */
877 for (i
= 0; i
< liveIntervals
->Num
; i
++) {
878 const struct interval
*inv
= liveIntervals
->Intervals
+ i
;
879 printf("Reg[%d] live [%d, %d]:",
880 inv
->Reg
, inv
->Start
, inv
->End
);
883 for (j
= 0; j
< inv
->Start
; j
++)
885 for (j
= inv
->Start
; j
<= inv
->End
; j
++)
896 /** Scan the array of used register flags to find free entry */
898 alloc_register(GLboolean usedRegs
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
])
901 for (k
= 0; k
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
; k
++) {
903 usedRegs
[k
] = GL_TRUE
;
912 * This function implements "Linear Scan Register Allocation" to reduce
913 * the number of temporary registers used by the program.
915 * We compute the "live interval" for all temporary registers then
916 * examine the overlap of the intervals to allocate new registers.
917 * Basically, if two intervals do not overlap, they can use the same register.
920 _mesa_reallocate_registers(struct gl_program
*prog
)
922 struct interval_list liveIntervals
;
923 GLint registerMap
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
924 GLboolean usedRegs
[REG_ALLOCATE_MAX_PROGRAM_TEMPS
];
929 printf("Optimize: Begin live-interval register reallocation\n");
930 _mesa_print_program(prog
);
933 for (i
= 0; i
< REG_ALLOCATE_MAX_PROGRAM_TEMPS
; i
++){
935 usedRegs
[i
] = GL_FALSE
;
938 if (!find_live_intervals(prog
, &liveIntervals
)) {
940 printf("Aborting register reallocation\n");
945 struct interval_list activeIntervals
;
946 activeIntervals
.Num
= 0;
948 /* loop over live intervals, allocating a new register for each */
949 for (i
= 0; i
< liveIntervals
.Num
; i
++) {
950 const struct interval
*live
= liveIntervals
.Intervals
+ i
;
953 printf("Consider register %u\n", live
->Reg
);
955 /* Expire old intervals. Intervals which have ended with respect
956 * to the live interval can have their remapped registers freed.
960 for (j
= 0; j
< (GLint
) activeIntervals
.Num
; j
++) {
961 const struct interval
*inv
= activeIntervals
.Intervals
+ j
;
962 if (inv
->End
>= live
->Start
) {
963 /* Stop now. Since the activeInterval list is sorted
964 * we know we don't have to go further.
969 /* Interval 'inv' has expired */
970 const GLint regNew
= registerMap
[inv
->Reg
];
974 printf(" expire interval for reg %u\n", inv
->Reg
);
976 /* remove interval j from active list */
977 remove_interval(&activeIntervals
, inv
);
978 j
--; /* counter-act j++ in for-loop above */
980 /* return register regNew to the free pool */
982 printf(" free reg %d\n", regNew
);
983 ASSERT(usedRegs
[regNew
] == GL_TRUE
);
984 usedRegs
[regNew
] = GL_FALSE
;
989 /* find a free register for this live interval */
991 const GLint k
= alloc_register(usedRegs
);
993 /* out of registers, give up */
996 registerMap
[live
->Reg
] = k
;
997 maxTemp
= MAX2(maxTemp
, k
);
999 printf(" remap register %u -> %d\n", live
->Reg
, k
);
1002 /* Insert this live interval into the active list which is sorted
1003 * by increasing end points.
1005 insert_interval_by_end(&activeIntervals
, live
);
1009 if (maxTemp
+ 1 < (GLint
) liveIntervals
.Num
) {
1010 /* OK, we've reduced the number of registers needed.
1011 * Scan the program and replace all the old temporary register
1012 * indexes with the new indexes.
1014 replace_regs(prog
, PROGRAM_TEMPORARY
, registerMap
);
1016 prog
->NumTemporaries
= maxTemp
+ 1;
1020 printf("Optimize: End live-interval register reallocation\n");
1021 printf("Num temp regs before: %u after: %u\n",
1022 liveIntervals
.Num
, maxTemp
+ 1);
1023 _mesa_print_program(prog
);
1029 * Apply optimizations to the given program to eliminate unnecessary
1030 * instructions, temp regs, etc.
1033 _mesa_optimize_program(GLcontext
*ctx
, struct gl_program
*program
)
1035 _mesa_remove_extra_move_use(program
);
1038 _mesa_remove_dead_code(program
);
1040 if (0) /* not tested much yet */
1041 _mesa_remove_extra_moves(program
);
1044 _mesa_consolidate_registers(program
);
1046 _mesa_reallocate_registers(program
);