mesa: Allow large temporary indices coming into the temporary reg allocator.
[mesa.git] / src / mesa / program / prog_optimize.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.5
4 *
5 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/macros.h"
30 #include "program.h"
31 #include "prog_instruction.h"
32 #include "prog_optimize.h"
33 #include "prog_print.h"
34
35
36 #define MAX_LOOP_NESTING 50
37 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
38 * register allocate many temporary values into that small number of
39 * temps. So allow large temporary indices coming into the register
40 * allocator.
41 */
42 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
43
44 static GLboolean dbg = GL_FALSE;
45
46 /* Returns the mask of channels read from the given srcreg in this instruction.
47 */
48 static GLuint
49 get_src_arg_mask(const struct prog_instruction *inst, int arg)
50 {
51 int writemask = inst->DstReg.WriteMask;
52
53 if (inst->CondUpdate)
54 writemask = WRITEMASK_XYZW;
55
56 switch (inst->Opcode) {
57 case OPCODE_MOV:
58 case OPCODE_ABS:
59 case OPCODE_ADD:
60 case OPCODE_MUL:
61 case OPCODE_SUB:
62 return writemask;
63 case OPCODE_RCP:
64 case OPCODE_SIN:
65 case OPCODE_COS:
66 case OPCODE_RSQ:
67 case OPCODE_POW:
68 case OPCODE_EX2:
69 return WRITEMASK_X;
70 case OPCODE_DP2:
71 return WRITEMASK_XY;
72 case OPCODE_DP3:
73 case OPCODE_XPD:
74 return WRITEMASK_XYZ;
75 default:
76 return WRITEMASK_XYZW;
77 }
78 }
79
80 /**
81 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
82 * \return number of instructions removed
83 */
84 static GLuint
85 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
86 {
87 GLint i, removeEnd = 0, removeCount = 0;
88 GLuint totalRemoved = 0;
89
90 /* go backward */
91 for (i = prog->NumInstructions - 1; i >= 0; i--) {
92 if (removeFlags[i]) {
93 totalRemoved++;
94 if (removeCount == 0) {
95 /* begin a run of instructions to remove */
96 removeEnd = i;
97 removeCount = 1;
98 }
99 else {
100 /* extend the run of instructions to remove */
101 removeCount++;
102 }
103 }
104 else {
105 /* don't remove this instruction, but check if the preceeding
106 * instructions are to be removed.
107 */
108 if (removeCount > 0) {
109 GLint removeStart = removeEnd - removeCount + 1;
110 _mesa_delete_instructions(prog, removeStart, removeCount);
111 removeStart = removeCount = 0; /* reset removal info */
112 }
113 }
114 }
115 /* Finish removing if the first instruction was to be removed. */
116 if (removeCount > 0) {
117 GLint removeStart = removeEnd - removeCount + 1;
118 _mesa_delete_instructions(prog, removeStart, removeCount);
119 }
120 return totalRemoved;
121 }
122
123
124 /**
125 * Remap register indexes according to map.
126 * \param prog the program to search/replace
127 * \param file the type of register file to search/replace
128 * \param map maps old register indexes to new indexes
129 */
130 static void
131 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
132 {
133 GLuint i;
134
135 for (i = 0; i < prog->NumInstructions; i++) {
136 struct prog_instruction *inst = prog->Instructions + i;
137 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
138 GLuint j;
139 for (j = 0; j < numSrc; j++) {
140 if (inst->SrcReg[j].File == file) {
141 GLuint index = inst->SrcReg[j].Index;
142 ASSERT(map[index] >= 0);
143 inst->SrcReg[j].Index = map[index];
144 }
145 }
146 if (inst->DstReg.File == file) {
147 const GLuint index = inst->DstReg.Index;
148 ASSERT(map[index] >= 0);
149 inst->DstReg.Index = map[index];
150 }
151 }
152 }
153
154
155 /**
156 * Consolidate temporary registers to use low numbers. For example, if the
157 * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
158 */
159 static void
160 _mesa_consolidate_registers(struct gl_program *prog)
161 {
162 GLboolean tempUsed[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
163 GLint tempMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
164 GLuint tempMax = 0, i;
165
166 if (dbg) {
167 printf("Optimize: Begin register consolidation\n");
168 }
169
170 memset(tempUsed, 0, sizeof(tempUsed));
171
172 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
173 tempMap[i] = -1;
174 }
175
176 /* set tempUsed[i] if temporary [i] is referenced */
177 for (i = 0; i < prog->NumInstructions; i++) {
178 const struct prog_instruction *inst = prog->Instructions + i;
179 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
180 GLuint j;
181 for (j = 0; j < numSrc; j++) {
182 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
183 const GLuint index = inst->SrcReg[j].Index;
184 ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
185 tempUsed[index] = GL_TRUE;
186 tempMax = MAX2(tempMax, index);
187 break;
188 }
189 }
190 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
191 const GLuint index = inst->DstReg.Index;
192 ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
193 tempUsed[index] = GL_TRUE;
194 tempMax = MAX2(tempMax, index);
195 }
196 }
197
198 /* allocate a new index for each temp that's used */
199 {
200 GLuint freeTemp = 0;
201 for (i = 0; i <= tempMax; i++) {
202 if (tempUsed[i]) {
203 tempMap[i] = freeTemp++;
204 /*printf("replace %u with %u\n", i, tempMap[i]);*/
205 }
206 }
207 if (freeTemp == tempMax + 1) {
208 /* no consolidation possible */
209 return;
210 }
211 if (dbg) {
212 printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
213 }
214 }
215
216 replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
217
218 if (dbg) {
219 printf("Optimize: End register consolidation\n");
220 }
221 }
222
223
224 /**
225 * Remove dead instructions from the given program.
226 * This is very primitive for now. Basically look for temp registers
227 * that are written to but never read. Remove any instructions that
228 * write to such registers. Be careful with condition code setters.
229 */
230 static void
231 _mesa_remove_dead_code(struct gl_program *prog)
232 {
233 GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
234 GLboolean *removeInst; /* per-instruction removal flag */
235 GLuint i, rem = 0, comp;
236
237 memset(tempRead, 0, sizeof(tempRead));
238
239 if (dbg) {
240 printf("Optimize: Begin dead code removal\n");
241 /*_mesa_print_program(prog);*/
242 }
243
244 removeInst = (GLboolean *)
245 calloc(1, prog->NumInstructions * sizeof(GLboolean));
246
247 /* Determine which temps are read and written */
248 for (i = 0; i < prog->NumInstructions; i++) {
249 const struct prog_instruction *inst = prog->Instructions + i;
250 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
251 GLuint j;
252
253 /* check src regs */
254 for (j = 0; j < numSrc; j++) {
255 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
256 const GLuint index = inst->SrcReg[j].Index;
257 GLuint read_mask;
258 ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
259 read_mask = get_src_arg_mask(inst, j);
260
261 if (inst->SrcReg[j].RelAddr) {
262 if (dbg)
263 printf("abort remove dead code (indirect temp)\n");
264 goto done;
265 }
266
267 for (comp = 0; comp < 4; comp++) {
268 GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
269
270 if ((read_mask & (1 << comp)) == 0)
271 continue;
272
273 switch (swz) {
274 case SWIZZLE_X:
275 tempRead[index][0] = GL_TRUE;
276 break;
277 case SWIZZLE_Y:
278 tempRead[index][1] = GL_TRUE;
279 break;
280 case SWIZZLE_Z:
281 tempRead[index][2] = GL_TRUE;
282 break;
283 case SWIZZLE_W:
284 tempRead[index][3] = GL_TRUE;
285 break;
286 }
287 }
288 }
289 }
290
291 /* check dst reg */
292 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
293 const GLuint index = inst->DstReg.Index;
294 ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
295
296 if (inst->DstReg.RelAddr) {
297 if (dbg)
298 printf("abort remove dead code (indirect temp)\n");
299 goto done;
300 }
301
302 if (inst->CondUpdate) {
303 /* If we're writing to this register and setting condition
304 * codes we cannot remove the instruction. Prevent removal
305 * by setting the 'read' flag.
306 */
307 tempRead[index][0] = GL_TRUE;
308 tempRead[index][1] = GL_TRUE;
309 tempRead[index][2] = GL_TRUE;
310 tempRead[index][3] = GL_TRUE;
311 }
312 }
313 }
314
315 /* find instructions that write to dead registers, flag for removal */
316 for (i = 0; i < prog->NumInstructions; i++) {
317 struct prog_instruction *inst = prog->Instructions + i;
318 const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
319
320 if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
321 GLint chan, index = inst->DstReg.Index;
322
323 for (chan = 0; chan < 4; chan++) {
324 if (!tempRead[index][chan] &&
325 inst->DstReg.WriteMask & (1 << chan)) {
326 if (dbg) {
327 printf("Remove writemask on %u.%c\n", i,
328 chan == 3 ? 'w' : 'x' + chan);
329 }
330 inst->DstReg.WriteMask &= ~(1 << chan);
331 rem++;
332 }
333 }
334
335 if (inst->DstReg.WriteMask == 0) {
336 /* If we cleared all writes, the instruction can be removed. */
337 if (dbg)
338 printf("Remove instruction %u: \n", i);
339 removeInst[i] = GL_TRUE;
340 }
341 }
342 }
343
344 /* now remove the instructions which aren't needed */
345 rem = remove_instructions(prog, removeInst);
346
347 if (dbg) {
348 printf("Optimize: End dead code removal.\n");
349 printf(" %u channel writes removed\n", rem);
350 printf(" %u instructions removed\n", rem);
351 /*_mesa_print_program(prog);*/
352 }
353
354 done:
355 free(removeInst);
356 }
357
358
359 enum temp_use
360 {
361 READ,
362 WRITE,
363 FLOW,
364 END
365 };
366
367 /**
368 * Scan forward in program from 'start' for the next occurance of TEMP[index].
369 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
370 * that we can't look further.
371 */
372 static enum temp_use
373 find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
374 {
375 GLuint i;
376
377 for (i = start; i < prog->NumInstructions; i++) {
378 const struct prog_instruction *inst = prog->Instructions + i;
379 switch (inst->Opcode) {
380 case OPCODE_BGNLOOP:
381 case OPCODE_ENDLOOP:
382 case OPCODE_BGNSUB:
383 case OPCODE_ENDSUB:
384 return FLOW;
385 default:
386 {
387 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
388 GLuint j;
389 for (j = 0; j < numSrc; j++) {
390 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
391 inst->SrcReg[j].Index == index)
392 return READ;
393 }
394 if (inst->DstReg.File == PROGRAM_TEMPORARY &&
395 inst->DstReg.Index == index)
396 return WRITE;
397 }
398 }
399 }
400
401 return END;
402 }
403
404 static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
405 {
406 switch (opcode) {
407 case OPCODE_BGNLOOP:
408 case OPCODE_BGNSUB:
409 case OPCODE_BRA:
410 case OPCODE_CAL:
411 case OPCODE_CONT:
412 case OPCODE_IF:
413 case OPCODE_ELSE:
414 case OPCODE_END:
415 case OPCODE_ENDIF:
416 case OPCODE_ENDLOOP:
417 case OPCODE_ENDSUB:
418 case OPCODE_RET:
419 return GL_TRUE;
420 default:
421 return GL_FALSE;
422 }
423 }
424
425 /**
426 * Try to remove use of extraneous MOV instructions, to free them up for dead
427 * code removal.
428 */
429 static void
430 _mesa_remove_extra_move_use(struct gl_program *prog)
431 {
432 GLuint i, j;
433
434 if (dbg) {
435 printf("Optimize: Begin remove extra move use\n");
436 _mesa_print_program(prog);
437 }
438
439 /*
440 * Look for sequences such as this:
441 * MOV tmpX, arg0;
442 * ...
443 * FOO tmpY, tmpX, arg1;
444 * and convert into:
445 * MOV tmpX, arg0;
446 * ...
447 * FOO tmpY, arg0, arg1;
448 */
449
450 for (i = 0; i + 1 < prog->NumInstructions; i++) {
451 const struct prog_instruction *mov = prog->Instructions + i;
452
453 if (mov->Opcode != OPCODE_MOV ||
454 mov->DstReg.File != PROGRAM_TEMPORARY ||
455 mov->DstReg.RelAddr ||
456 mov->DstReg.CondMask != COND_TR ||
457 mov->SaturateMode != SATURATE_OFF ||
458 mov->SrcReg[0].RelAddr)
459 continue;
460
461 /* Walk through remaining instructions until the or src reg gets
462 * rewritten or we get into some flow-control, eliminating the use of
463 * this MOV.
464 */
465 for (j = i + 1; j < prog->NumInstructions; j++) {
466 struct prog_instruction *inst2 = prog->Instructions + j;
467 GLuint arg;
468
469 if (_mesa_is_flow_control_opcode(inst2->Opcode))
470 break;
471
472 /* First rewrite this instruction's args if appropriate. */
473 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
474 int comp;
475 int read_mask = get_src_arg_mask(inst2, arg);
476
477 if (inst2->SrcReg[arg].File != mov->DstReg.File ||
478 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
479 inst2->SrcReg[arg].RelAddr ||
480 inst2->SrcReg[arg].Abs)
481 continue;
482
483 /* Check that all the sources for this arg of inst2 come from inst1
484 * or constants.
485 */
486 for (comp = 0; comp < 4; comp++) {
487 int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
488
489 /* If the MOV didn't write that channel, can't use it. */
490 if ((read_mask & (1 << comp)) &&
491 src_swz <= SWIZZLE_W &&
492 (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
493 break;
494 }
495 if (comp != 4)
496 continue;
497
498 /* Adjust the swizzles of inst2 to point at MOV's source */
499 for (comp = 0; comp < 4; comp++) {
500 int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
501
502 if (inst2_swz <= SWIZZLE_W) {
503 GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
504 inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
505 inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
506 inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
507 inst2_swz) & 0x1) << comp);
508 }
509 }
510 inst2->SrcReg[arg].File = mov->SrcReg[0].File;
511 inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
512 }
513
514 /* If this instruction overwrote part of the move, our time is up. */
515 if ((inst2->DstReg.File == mov->DstReg.File &&
516 (inst2->DstReg.RelAddr ||
517 inst2->DstReg.Index == mov->DstReg.Index)) ||
518 (inst2->DstReg.File == mov->SrcReg[0].File &&
519 (inst2->DstReg.RelAddr ||
520 inst2->DstReg.Index == mov->SrcReg[0].Index)))
521 break;
522 }
523 }
524
525 if (dbg) {
526 printf("Optimize: End remove extra move use.\n");
527 /*_mesa_print_program(prog);*/
528 }
529 }
530
531 /**
532 * Try to remove extraneous MOV instructions from the given program.
533 */
534 static void
535 _mesa_remove_extra_moves(struct gl_program *prog)
536 {
537 GLboolean *removeInst; /* per-instruction removal flag */
538 GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
539
540 if (dbg) {
541 printf("Optimize: Begin remove extra moves\n");
542 _mesa_print_program(prog);
543 }
544
545 removeInst = (GLboolean *)
546 calloc(1, prog->NumInstructions * sizeof(GLboolean));
547
548 /*
549 * Look for sequences such as this:
550 * FOO tmpX, arg0, arg1;
551 * MOV tmpY, tmpX;
552 * and convert into:
553 * FOO tmpY, arg0, arg1;
554 */
555
556 for (i = 0; i < prog->NumInstructions; i++) {
557 const struct prog_instruction *inst = prog->Instructions + i;
558
559 switch (inst->Opcode) {
560 case OPCODE_BGNLOOP:
561 loopNesting++;
562 break;
563 case OPCODE_ENDLOOP:
564 loopNesting--;
565 break;
566 case OPCODE_BGNSUB:
567 subroutineNesting++;
568 break;
569 case OPCODE_ENDSUB:
570 subroutineNesting--;
571 break;
572 case OPCODE_MOV:
573 if (i > 0 &&
574 loopNesting == 0 &&
575 subroutineNesting == 0 &&
576 inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
577 inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
578 /* see if this MOV can be removed */
579 const GLuint tempIndex = inst->SrcReg[0].Index;
580 struct prog_instruction *prevInst;
581 GLuint prevI;
582
583 /* get pointer to previous instruction */
584 prevI = i - 1;
585 while (prevI > 0 && removeInst[prevI])
586 prevI--;
587 prevInst = prog->Instructions + prevI;
588
589 if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
590 prevInst->DstReg.Index == tempIndex &&
591 prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
592
593 enum temp_use next_use =
594 find_next_temp_use(prog, i + 1, tempIndex);
595
596 if (next_use == WRITE || next_use == END) {
597 /* OK, we can safely remove this MOV instruction.
598 * Transform:
599 * prevI: FOO tempIndex, x, y;
600 * i: MOV z, tempIndex;
601 * Into:
602 * prevI: FOO z, x, y;
603 */
604
605 /* patch up prev inst */
606 prevInst->DstReg.File = inst->DstReg.File;
607 prevInst->DstReg.Index = inst->DstReg.Index;
608
609 /* flag this instruction for removal */
610 removeInst[i] = GL_TRUE;
611
612 if (dbg) {
613 printf("Remove MOV at %u\n", i);
614 printf("new prev inst %u: ", prevI);
615 _mesa_print_instruction(prevInst);
616 }
617 }
618 }
619 }
620 break;
621 default:
622 ; /* nothing */
623 }
624 }
625
626 /* now remove the instructions which aren't needed */
627 rem = remove_instructions(prog, removeInst);
628
629 free(removeInst);
630
631 if (dbg) {
632 printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
633 /*_mesa_print_program(prog);*/
634 }
635 }
636
637
638 /** A live register interval */
639 struct interval
640 {
641 GLuint Reg; /** The temporary register index */
642 GLuint Start, End; /** Start/end instruction numbers */
643 };
644
645
646 /** A list of register intervals */
647 struct interval_list
648 {
649 GLuint Num;
650 struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
651 };
652
653
654 static void
655 append_interval(struct interval_list *list, const struct interval *inv)
656 {
657 list->Intervals[list->Num++] = *inv;
658 }
659
660
661 /** Insert interval inv into list, sorted by interval end */
662 static void
663 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
664 {
665 /* XXX we could do a binary search insertion here since list is sorted */
666 GLint i = list->Num - 1;
667 while (i >= 0 && list->Intervals[i].End > inv->End) {
668 list->Intervals[i + 1] = list->Intervals[i];
669 i--;
670 }
671 list->Intervals[i + 1] = *inv;
672 list->Num++;
673
674 #ifdef DEBUG
675 {
676 GLuint i;
677 for (i = 0; i + 1 < list->Num; i++) {
678 ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
679 }
680 }
681 #endif
682 }
683
684
685 /** Remove the given interval from the interval list */
686 static void
687 remove_interval(struct interval_list *list, const struct interval *inv)
688 {
689 /* XXX we could binary search since list is sorted */
690 GLuint k;
691 for (k = 0; k < list->Num; k++) {
692 if (list->Intervals[k].Reg == inv->Reg) {
693 /* found, remove it */
694 ASSERT(list->Intervals[k].Start == inv->Start);
695 ASSERT(list->Intervals[k].End == inv->End);
696 while (k < list->Num - 1) {
697 list->Intervals[k] = list->Intervals[k + 1];
698 k++;
699 }
700 list->Num--;
701 return;
702 }
703 }
704 }
705
706
707 /** called by qsort() */
708 static int
709 compare_start(const void *a, const void *b)
710 {
711 const struct interval *ia = (const struct interval *) a;
712 const struct interval *ib = (const struct interval *) b;
713 if (ia->Start < ib->Start)
714 return -1;
715 else if (ia->Start > ib->Start)
716 return +1;
717 else
718 return 0;
719 }
720
721 /** sort the interval list according to interval starts */
722 static void
723 sort_interval_list_by_start(struct interval_list *list)
724 {
725 qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
726 #ifdef DEBUG
727 {
728 GLuint i;
729 for (i = 0; i + 1 < list->Num; i++) {
730 ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
731 }
732 }
733 #endif
734 }
735
736 struct loop_info
737 {
738 GLuint Start, End; /**< Start, end instructions of loop */
739 };
740
741 /**
742 * Update the intermediate interval info for register 'index' and
743 * instruction 'ic'.
744 */
745 static void
746 update_interval(GLint intBegin[], GLint intEnd[],
747 struct loop_info *loopStack, GLuint loopStackDepth,
748 GLuint index, GLuint ic)
749 {
750 int i;
751
752 /* If the register is used in a loop, extend its lifetime through the end
753 * of the outermost loop that doesn't contain its definition.
754 */
755 for (i = 0; i < loopStackDepth; i++) {
756 if (intBegin[index] < loopStack[i].Start) {
757 ic = loopStack[i].End;
758 break;
759 }
760 }
761
762 ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
763 if (intBegin[index] == -1) {
764 ASSERT(intEnd[index] == -1);
765 intBegin[index] = intEnd[index] = ic;
766 }
767 else {
768 intEnd[index] = ic;
769 }
770 }
771
772
773 /**
774 * Find first/last instruction that references each temporary register.
775 */
776 GLboolean
777 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
778 GLuint numInstructions,
779 GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
780 GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
781 {
782 struct loop_info loopStack[MAX_LOOP_NESTING];
783 GLuint loopStackDepth = 0;
784 GLuint i;
785
786 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
787 intBegin[i] = intEnd[i] = -1;
788 }
789
790 /* Scan instructions looking for temporary registers */
791 for (i = 0; i < numInstructions; i++) {
792 const struct prog_instruction *inst = instructions + i;
793 if (inst->Opcode == OPCODE_BGNLOOP) {
794 loopStack[loopStackDepth].Start = i;
795 loopStack[loopStackDepth].End = inst->BranchTarget;
796 loopStackDepth++;
797 }
798 else if (inst->Opcode == OPCODE_ENDLOOP) {
799 loopStackDepth--;
800 }
801 else if (inst->Opcode == OPCODE_CAL) {
802 return GL_FALSE;
803 }
804 else {
805 const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
806 GLuint j;
807 for (j = 0; j < numSrc; j++) {
808 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
809 const GLuint index = inst->SrcReg[j].Index;
810 if (inst->SrcReg[j].RelAddr)
811 return GL_FALSE;
812 update_interval(intBegin, intEnd, loopStack, loopStackDepth,
813 index, i);
814 }
815 }
816 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
817 const GLuint index = inst->DstReg.Index;
818 if (inst->DstReg.RelAddr)
819 return GL_FALSE;
820 update_interval(intBegin, intEnd, loopStack, loopStackDepth,
821 index, i);
822 }
823 }
824 }
825
826 return GL_TRUE;
827 }
828
829
830 /**
831 * Find the live intervals for each temporary register in the program.
832 * For register R, the interval [A,B] indicates that R is referenced
833 * from instruction A through instruction B.
834 * Special consideration is needed for loops and subroutines.
835 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
836 */
837 static GLboolean
838 find_live_intervals(struct gl_program *prog,
839 struct interval_list *liveIntervals)
840 {
841 GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
842 GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
843 GLuint i;
844
845 /*
846 * Note: we'll return GL_FALSE below if we find relative indexing
847 * into the TEMP register file. We can't handle that yet.
848 * We also give up on subroutines for now.
849 */
850
851 if (dbg) {
852 printf("Optimize: Begin find intervals\n");
853 }
854
855 /* build intermediate arrays */
856 if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
857 intBegin, intEnd))
858 return GL_FALSE;
859
860 /* Build live intervals list from intermediate arrays */
861 liveIntervals->Num = 0;
862 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
863 if (intBegin[i] >= 0) {
864 struct interval inv;
865 inv.Reg = i;
866 inv.Start = intBegin[i];
867 inv.End = intEnd[i];
868 append_interval(liveIntervals, &inv);
869 }
870 }
871
872 /* Sort the list according to interval starts */
873 sort_interval_list_by_start(liveIntervals);
874
875 if (dbg) {
876 /* print interval info */
877 for (i = 0; i < liveIntervals->Num; i++) {
878 const struct interval *inv = liveIntervals->Intervals + i;
879 printf("Reg[%d] live [%d, %d]:",
880 inv->Reg, inv->Start, inv->End);
881 if (1) {
882 GLuint j;
883 for (j = 0; j < inv->Start; j++)
884 printf(" ");
885 for (j = inv->Start; j <= inv->End; j++)
886 printf("x");
887 }
888 printf("\n");
889 }
890 }
891
892 return GL_TRUE;
893 }
894
895
896 /** Scan the array of used register flags to find free entry */
897 static GLint
898 alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
899 {
900 GLuint k;
901 for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
902 if (!usedRegs[k]) {
903 usedRegs[k] = GL_TRUE;
904 return k;
905 }
906 }
907 return -1;
908 }
909
910
911 /**
912 * This function implements "Linear Scan Register Allocation" to reduce
913 * the number of temporary registers used by the program.
914 *
915 * We compute the "live interval" for all temporary registers then
916 * examine the overlap of the intervals to allocate new registers.
917 * Basically, if two intervals do not overlap, they can use the same register.
918 */
919 static void
920 _mesa_reallocate_registers(struct gl_program *prog)
921 {
922 struct interval_list liveIntervals;
923 GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
924 GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
925 GLuint i;
926 GLint maxTemp = -1;
927
928 if (dbg) {
929 printf("Optimize: Begin live-interval register reallocation\n");
930 _mesa_print_program(prog);
931 }
932
933 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
934 registerMap[i] = -1;
935 usedRegs[i] = GL_FALSE;
936 }
937
938 if (!find_live_intervals(prog, &liveIntervals)) {
939 if (dbg)
940 printf("Aborting register reallocation\n");
941 return;
942 }
943
944 {
945 struct interval_list activeIntervals;
946 activeIntervals.Num = 0;
947
948 /* loop over live intervals, allocating a new register for each */
949 for (i = 0; i < liveIntervals.Num; i++) {
950 const struct interval *live = liveIntervals.Intervals + i;
951
952 if (dbg)
953 printf("Consider register %u\n", live->Reg);
954
955 /* Expire old intervals. Intervals which have ended with respect
956 * to the live interval can have their remapped registers freed.
957 */
958 {
959 GLint j;
960 for (j = 0; j < (GLint) activeIntervals.Num; j++) {
961 const struct interval *inv = activeIntervals.Intervals + j;
962 if (inv->End >= live->Start) {
963 /* Stop now. Since the activeInterval list is sorted
964 * we know we don't have to go further.
965 */
966 break;
967 }
968 else {
969 /* Interval 'inv' has expired */
970 const GLint regNew = registerMap[inv->Reg];
971 ASSERT(regNew >= 0);
972
973 if (dbg)
974 printf(" expire interval for reg %u\n", inv->Reg);
975
976 /* remove interval j from active list */
977 remove_interval(&activeIntervals, inv);
978 j--; /* counter-act j++ in for-loop above */
979
980 /* return register regNew to the free pool */
981 if (dbg)
982 printf(" free reg %d\n", regNew);
983 ASSERT(usedRegs[regNew] == GL_TRUE);
984 usedRegs[regNew] = GL_FALSE;
985 }
986 }
987 }
988
989 /* find a free register for this live interval */
990 {
991 const GLint k = alloc_register(usedRegs);
992 if (k < 0) {
993 /* out of registers, give up */
994 return;
995 }
996 registerMap[live->Reg] = k;
997 maxTemp = MAX2(maxTemp, k);
998 if (dbg)
999 printf(" remap register %u -> %d\n", live->Reg, k);
1000 }
1001
1002 /* Insert this live interval into the active list which is sorted
1003 * by increasing end points.
1004 */
1005 insert_interval_by_end(&activeIntervals, live);
1006 }
1007 }
1008
1009 if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1010 /* OK, we've reduced the number of registers needed.
1011 * Scan the program and replace all the old temporary register
1012 * indexes with the new indexes.
1013 */
1014 replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1015
1016 prog->NumTemporaries = maxTemp + 1;
1017 }
1018
1019 if (dbg) {
1020 printf("Optimize: End live-interval register reallocation\n");
1021 printf("Num temp regs before: %u after: %u\n",
1022 liveIntervals.Num, maxTemp + 1);
1023 _mesa_print_program(prog);
1024 }
1025 }
1026
1027
1028 /**
1029 * Apply optimizations to the given program to eliminate unnecessary
1030 * instructions, temp regs, etc.
1031 */
1032 void
1033 _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
1034 {
1035 _mesa_remove_extra_move_use(program);
1036
1037 if (1)
1038 _mesa_remove_dead_code(program);
1039
1040 if (0) /* not tested much yet */
1041 _mesa_remove_extra_moves(program);
1042
1043 if (0)
1044 _mesa_consolidate_registers(program);
1045 else
1046 _mesa_reallocate_registers(program);
1047 }