mesa: more/better program optimizations
[mesa.git] / src / mesa / program / prog_optimize.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.5
4 *
5 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/macros.h"
30 #include "program.h"
31 #include "prog_instruction.h"
32 #include "prog_optimize.h"
33 #include "prog_print.h"
34
35
36 #define MAX_LOOP_NESTING 50
37
38
39 static GLboolean dbg = GL_FALSE;
40
41 #define NO_MASK 0xf
42
43 /** Returns the mask of channels read from the given src in this instruction, We
44 * also provide one optional masks which may mask other components in the dst
45 * register
46 */
47 static GLuint
48 get_src_arg_mask(const struct prog_instruction *inst, int arg, int dst_mask)
49 {
50 int read_mask, channel_mask;
51 int comp;
52 ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
53
54 /* Form the dst register, find the written channels */
55 if (inst->CondUpdate) {
56 channel_mask = WRITEMASK_XYZW;
57 }
58 else {
59 switch (inst->Opcode) {
60 case OPCODE_MOV:
61 case OPCODE_MIN:
62 case OPCODE_MAX:
63 case OPCODE_ABS:
64 case OPCODE_ADD:
65 case OPCODE_MAD:
66 case OPCODE_MUL:
67 case OPCODE_SUB:
68 channel_mask = inst->DstReg.WriteMask & dst_mask;
69 break;
70 case OPCODE_RCP:
71 case OPCODE_SIN:
72 case OPCODE_COS:
73 case OPCODE_RSQ:
74 case OPCODE_POW:
75 case OPCODE_EX2:
76 case OPCODE_LOG:
77 channel_mask = WRITEMASK_X;
78 break;
79 case OPCODE_DP2:
80 channel_mask = WRITEMASK_XY;
81 break;
82 case OPCODE_DP3:
83 case OPCODE_XPD:
84 channel_mask = WRITEMASK_XYZ;
85 break;
86 default:
87 channel_mask = WRITEMASK_XYZW;
88 break;
89 }
90 }
91
92 /* Now, given the src swizzle and the written channels, find which
93 * components are actually read
94 */
95 read_mask = 0;
96 for (comp = 0; comp < 4; ++comp) {
97 const int coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
98 ASSERT(coord < 4);
99 if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
100 read_mask |= 1 << coord;
101 }
102
103 return read_mask;
104 }
105
106 /** For a MOV instruction, compute a write mask when src register also has
107 * a mask
108 */
109 static GLuint
110 get_dst_mask_for_mov(const struct prog_instruction *mov, int src_mask)
111 {
112 const int mask = mov->DstReg.WriteMask;
113 int comp;
114 int updated_mask = 0;
115 ASSERT(mov->Opcode == OPCODE_MOV);
116
117 for (comp = 0; comp < 4; ++comp) {
118 int src_comp;
119 if ((mask & (1 << comp)) == 0)
120 continue;
121 src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
122 if ((src_mask & (1 << src_comp)) == 0)
123 continue;
124 updated_mask |= 1 << comp;
125 }
126
127 return updated_mask;
128 }
129
130 /** Ensure that the swizzle is regular */
131 static int
132 is_swizzle_regular(int swz)
133 {
134 return GET_SWZ(swz,0) <= SWIZZLE_W &&
135 GET_SWZ(swz,1) <= SWIZZLE_W &&
136 GET_SWZ(swz,2) <= SWIZZLE_W &&
137 GET_SWZ(swz,3) <= SWIZZLE_W;
138 }
139
140 /**
141 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
142 * \return number of instructions removed
143 */
144 static GLuint
145 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
146 {
147 GLint i, removeEnd = 0, removeCount = 0;
148 GLuint totalRemoved = 0;
149
150 /* go backward */
151 for (i = prog->NumInstructions - 1; i >= 0; i--) {
152 if (removeFlags[i]) {
153 totalRemoved++;
154 if (removeCount == 0) {
155 /* begin a run of instructions to remove */
156 removeEnd = i;
157 removeCount = 1;
158 }
159 else {
160 /* extend the run of instructions to remove */
161 removeCount++;
162 }
163 }
164 else {
165 /* don't remove this instruction, but check if the preceeding
166 * instructions are to be removed.
167 */
168 if (removeCount > 0) {
169 GLint removeStart = removeEnd - removeCount + 1;
170 _mesa_delete_instructions(prog, removeStart, removeCount);
171 removeStart = removeCount = 0; /* reset removal info */
172 }
173 }
174 }
175 /* Finish removing if the first instruction was to be removed. */
176 if (removeCount > 0) {
177 GLint removeStart = removeEnd - removeCount + 1;
178 _mesa_delete_instructions(prog, removeStart, removeCount);
179 }
180 return totalRemoved;
181 }
182
183
184 /**
185 * Remap register indexes according to map.
186 * \param prog the program to search/replace
187 * \param file the type of register file to search/replace
188 * \param map maps old register indexes to new indexes
189 */
190 static void
191 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
192 {
193 GLuint i;
194
195 for (i = 0; i < prog->NumInstructions; i++) {
196 struct prog_instruction *inst = prog->Instructions + i;
197 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
198 GLuint j;
199 for (j = 0; j < numSrc; j++) {
200 if (inst->SrcReg[j].File == file) {
201 GLuint index = inst->SrcReg[j].Index;
202 ASSERT(map[index] >= 0);
203 inst->SrcReg[j].Index = map[index];
204 }
205 }
206 if (inst->DstReg.File == file) {
207 const GLuint index = inst->DstReg.Index;
208 ASSERT(map[index] >= 0);
209 inst->DstReg.Index = map[index];
210 }
211 }
212 }
213
214 /**
215 * Remove dead instructions from the given program.
216 * This is very primitive for now. Basically look for temp registers
217 * that are written to but never read. Remove any instructions that
218 * write to such registers. Be careful with condition code setters.
219 */
220 static GLboolean
221 _mesa_remove_dead_code_global(struct gl_program *prog)
222 {
223 GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
224 GLboolean *removeInst; /* per-instruction removal flag */
225 GLuint i, rem = 0, comp;
226
227 memset(tempRead, 0, sizeof(tempRead));
228
229 if (dbg) {
230 printf("Optimize: Begin dead code removal\n");
231 /*_mesa_print_program(prog);*/
232 }
233
234 removeInst = (GLboolean *)
235 calloc(1, prog->NumInstructions * sizeof(GLboolean));
236
237 /* Determine which temps are read and written */
238 for (i = 0; i < prog->NumInstructions; i++) {
239 const struct prog_instruction *inst = prog->Instructions + i;
240 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
241 GLuint j;
242
243 /* check src regs */
244 for (j = 0; j < numSrc; j++) {
245 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
246 const GLuint index = inst->SrcReg[j].Index;
247 GLuint read_mask;
248 ASSERT(index < MAX_PROGRAM_TEMPS);
249 read_mask = get_src_arg_mask(inst, j, NO_MASK);
250
251 if (inst->SrcReg[j].RelAddr) {
252 if (dbg)
253 printf("abort remove dead code (indirect temp)\n");
254 goto done;
255 }
256
257 for (comp = 0; comp < 4; comp++) {
258 const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
259 ASSERT(swz < 4);
260 if ((read_mask & (1 << swz)) == 0)
261 continue;
262 if (swz <= SWIZZLE_W)
263 tempRead[index][swz] = GL_TRUE;
264 }
265 }
266 }
267
268 /* check dst reg */
269 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
270 const GLuint index = inst->DstReg.Index;
271 ASSERT(index < MAX_PROGRAM_TEMPS);
272
273 if (inst->DstReg.RelAddr) {
274 if (dbg)
275 printf("abort remove dead code (indirect temp)\n");
276 goto done;
277 }
278
279 if (inst->CondUpdate) {
280 /* If we're writing to this register and setting condition
281 * codes we cannot remove the instruction. Prevent removal
282 * by setting the 'read' flag.
283 */
284 tempRead[index][0] = GL_TRUE;
285 tempRead[index][1] = GL_TRUE;
286 tempRead[index][2] = GL_TRUE;
287 tempRead[index][3] = GL_TRUE;
288 }
289 }
290 }
291
292 /* find instructions that write to dead registers, flag for removal */
293 for (i = 0; i < prog->NumInstructions; i++) {
294 struct prog_instruction *inst = prog->Instructions + i;
295 const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
296
297 if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
298 GLint chan, index = inst->DstReg.Index;
299
300 for (chan = 0; chan < 4; chan++) {
301 if (!tempRead[index][chan] &&
302 inst->DstReg.WriteMask & (1 << chan)) {
303 if (dbg) {
304 printf("Remove writemask on %u.%c\n", i,
305 chan == 3 ? 'w' : 'x' + chan);
306 }
307 inst->DstReg.WriteMask &= ~(1 << chan);
308 rem++;
309 }
310 }
311
312 if (inst->DstReg.WriteMask == 0) {
313 /* If we cleared all writes, the instruction can be removed. */
314 if (dbg)
315 printf("Remove instruction %u: \n", i);
316 removeInst[i] = GL_TRUE;
317 }
318 }
319 }
320
321 /* now remove the instructions which aren't needed */
322 rem = remove_instructions(prog, removeInst);
323
324 if (dbg) {
325 printf("Optimize: End dead code removal.\n");
326 printf(" %u channel writes removed\n", rem);
327 printf(" %u instructions removed\n", rem);
328 /*_mesa_print_program(prog);*/
329 }
330
331 done:
332 free(removeInst);
333 return rem != 0;
334 }
335
336
337 enum inst_use
338 {
339 READ,
340 WRITE,
341 FLOW,
342 END
343 };
344
345 /**
346 * Scan forward in program from 'start' for the next occurances of TEMP[index].
347 * We look if an instruction reads the component given by the masks and if they
348 * are overwritten.
349 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
350 * that we can't look further.
351 */
352 static enum inst_use
353 find_next_use(const struct gl_program *prog,
354 GLuint start,
355 GLuint index,
356 GLuint mask)
357 {
358 GLuint i;
359
360 for (i = start; i < prog->NumInstructions; i++) {
361 const struct prog_instruction *inst = prog->Instructions + i;
362 switch (inst->Opcode) {
363 case OPCODE_BGNLOOP:
364 case OPCODE_BGNSUB:
365 case OPCODE_BRA:
366 case OPCODE_CAL:
367 case OPCODE_CONT:
368 case OPCODE_IF:
369 case OPCODE_ELSE:
370 case OPCODE_ENDIF:
371 case OPCODE_ENDLOOP:
372 case OPCODE_ENDSUB:
373 case OPCODE_RET:
374 return FLOW;
375 case OPCODE_END:
376 return END;
377 default:
378 {
379 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
380 GLuint j;
381 for (j = 0; j < numSrc; j++) {
382 if (inst->SrcReg[j].RelAddr ||
383 (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
384 inst->SrcReg[j].Index == index &&
385 (get_src_arg_mask(inst,j,NO_MASK) & mask)))
386 return READ;
387 }
388 if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
389 inst->DstReg.File == PROGRAM_TEMPORARY &&
390 inst->DstReg.Index == index) {
391 mask &= ~inst->DstReg.WriteMask;
392 if (mask == 0)
393 return WRITE;
394 }
395 }
396 }
397 }
398 return END;
399 }
400
401 static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
402 {
403 switch (opcode) {
404 case OPCODE_BGNLOOP:
405 case OPCODE_BGNSUB:
406 case OPCODE_BRA:
407 case OPCODE_CAL:
408 case OPCODE_CONT:
409 case OPCODE_IF:
410 case OPCODE_ELSE:
411 case OPCODE_END:
412 case OPCODE_ENDIF:
413 case OPCODE_ENDLOOP:
414 case OPCODE_ENDSUB:
415 case OPCODE_RET:
416 return GL_TRUE;
417 default:
418 return GL_FALSE;
419 }
420 }
421
422 static GLboolean
423 can_downward_mov_be_modifed(const struct prog_instruction *mov)
424 {
425 return
426 mov->Opcode == OPCODE_MOV &&
427 mov->CondUpdate == GL_FALSE &&
428 mov->SrcReg[0].RelAddr == 0 &&
429 mov->SrcReg[0].Negate == 0 &&
430 mov->SrcReg[0].Abs == 0 &&
431 mov->SrcReg[0].HasIndex2 == 0 &&
432 mov->SrcReg[0].RelAddr2 == 0 &&
433 mov->DstReg.RelAddr == 0 &&
434 mov->DstReg.CondMask == COND_TR &&
435 mov->SaturateMode == SATURATE_OFF;
436 }
437
438 static GLboolean
439 can_upward_mov_be_modifed(const struct prog_instruction *mov)
440 {
441 return
442 can_downward_mov_be_modifed(mov) &&
443 mov->DstReg.File == PROGRAM_TEMPORARY;
444 }
445
446 /**
447 * Try to remove use of extraneous MOV instructions, to free them up for dead
448 * code removal.
449 */
450 static void
451 _mesa_remove_extra_move_use(struct gl_program *prog)
452 {
453 GLuint i, j;
454
455 if (dbg) {
456 printf("Optimize: Begin remove extra move use\n");
457 _mesa_print_program(prog);
458 }
459
460 /*
461 * Look for sequences such as this:
462 * MOV tmpX, arg0;
463 * ...
464 * FOO tmpY, tmpX, arg1;
465 * and convert into:
466 * MOV tmpX, arg0;
467 * ...
468 * FOO tmpY, arg0, arg1;
469 */
470
471 for (i = 0; i + 1 < prog->NumInstructions; i++) {
472 const struct prog_instruction *mov = prog->Instructions + i;
473 int dst_mask, src_mask;
474 if (can_upward_mov_be_modifed(mov) == GL_FALSE)
475 continue;
476
477 /* Scanning the code, we maintain the components which are still active in
478 * these two masks
479 */
480 dst_mask = mov->DstReg.WriteMask;
481 src_mask = get_src_arg_mask(mov, 0, NO_MASK);
482
483 /* Walk through remaining instructions until the or src reg gets
484 * rewritten or we get into some flow-control, eliminating the use of
485 * this MOV.
486 */
487 for (j = i + 1; j < prog->NumInstructions; j++) {
488 struct prog_instruction *inst2 = prog->Instructions + j;
489 GLuint arg;
490
491 if (_mesa_is_flow_control_opcode(inst2->Opcode))
492 break;
493
494 /* First rewrite this instruction's args if appropriate. */
495 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
496 int comp, read_mask;
497
498 if (inst2->SrcReg[arg].File != mov->DstReg.File ||
499 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
500 inst2->SrcReg[arg].RelAddr ||
501 inst2->SrcReg[arg].Abs)
502 continue;
503 read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
504
505 /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
506 * components read still come from the mov instructions
507 */
508 if(is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
509 (read_mask & dst_mask) == read_mask) {
510 for (comp = 0; comp < 4; comp++) {
511 int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
512
513 GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
514 inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
515 inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
516 inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
517 inst2_swz) & 0x1) << comp);
518 }
519 inst2->SrcReg[arg].File = mov->SrcReg[0].File;
520 inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
521 }
522 }
523
524 /* The source of MOV is written. This potentially deactivates some
525 * components from the src and dst of the MOV instruction
526 */
527 if (inst2->DstReg.File == mov->DstReg.File &&
528 (inst2->DstReg.RelAddr ||
529 inst2->DstReg.Index == mov->DstReg.Index)) {
530 dst_mask &= ~inst2->DstReg.WriteMask;
531 src_mask = get_src_arg_mask(mov, 0, dst_mask);
532 }
533
534 /* Idem when the destination of mov is written */
535 if (inst2->DstReg.File == mov->SrcReg[0].File &&
536 (inst2->DstReg.RelAddr ||
537 inst2->DstReg.Index == mov->SrcReg[0].Index)) {
538 src_mask &= ~inst2->DstReg.WriteMask;
539 dst_mask &= get_dst_mask_for_mov(mov, src_mask);
540 }
541 if (dst_mask == 0)
542 break;
543 }
544 }
545
546 if (dbg) {
547 printf("Optimize: End remove extra move use.\n");
548 /*_mesa_print_program(prog);*/
549 }
550 }
551
552 /**
553 * Complements dead_code_global. Try to remove code in block of code by
554 * carefully monitoring the swizzles. Both functions should be merged into one
555 * with a proper control flow graph
556 */
557 static GLboolean
558 _mesa_remove_dead_code_local(struct gl_program *prog)
559 {
560 GLboolean *removeInst;
561 GLuint i, arg, rem = 0;
562
563 removeInst = (GLboolean *)
564 calloc(1, prog->NumInstructions * sizeof(GLboolean));
565
566 for (i = 0; i < prog->NumInstructions; i++) {
567 const struct prog_instruction *inst = prog->Instructions + i;
568 const GLuint index = inst->DstReg.Index;
569 const GLuint mask = inst->DstReg.WriteMask;
570 enum inst_use use;
571
572 /* We must deactivate the pass as soon as some indirection is used */
573 if (inst->DstReg.RelAddr)
574 goto done;
575 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
576 if (inst->SrcReg[arg].RelAddr)
577 goto done;
578
579 if (_mesa_is_flow_control_opcode(inst->Opcode) ||
580 _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
581 inst->DstReg.File != PROGRAM_TEMPORARY ||
582 inst->DstReg.RelAddr)
583 continue;
584
585 use = find_next_use(prog, i+1, index, mask);
586 if (use == WRITE || use == END)
587 removeInst[i] = GL_TRUE;
588 }
589
590 rem = remove_instructions(prog, removeInst);
591
592 done:
593 free(removeInst);
594 return rem != 0;
595 }
596
597 /**
598 * Try to inject the destination of mov as the destination of inst and recompute
599 * the swizzles operators for the sources of inst if required. Return GL_TRUE
600 * of the substitution was possible, GL_FALSE otherwise
601 */
602 static GLboolean
603 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
604 const struct prog_instruction *mov)
605 {
606 /* Indirection table which associates destination and source components for
607 * the mov instruction
608 */
609 const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
610
611 /* Some components are not written by inst. We cannot remove the mov */
612 if (mask != (inst->DstReg.WriteMask & mask))
613 return GL_FALSE;
614
615 /* Depending on the instruction, we may need to recompute the swizzles.
616 * Also, some other instructions (like TEX) are not linear. We will only
617 * consider completely active sources and destinations
618 */
619 switch (inst->Opcode) {
620
621 /* Carstesian instructions: we compute the swizzle */
622 case OPCODE_MOV:
623 case OPCODE_MIN:
624 case OPCODE_MAX:
625 case OPCODE_ABS:
626 case OPCODE_ADD:
627 case OPCODE_MAD:
628 case OPCODE_MUL:
629 case OPCODE_SUB:
630 {
631 int dst_to_src_comp[4] = {0,0,0,0};
632 int dst_comp, arg;
633 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
634 if (mov->DstReg.WriteMask & (1 << dst_comp)) {
635 const int src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
636 ASSERT(src_comp < 4);
637 dst_to_src_comp[dst_comp] = src_comp;
638 }
639 }
640
641 /* Patch each source of the instruction */
642 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
643 const int arg_swz = inst->SrcReg[arg].Swizzle;
644 inst->SrcReg[arg].Swizzle = 0;
645
646 /* Reset each active component of the swizzle */
647 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
648 int src_comp, arg_comp;
649 if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
650 continue;
651 src_comp = dst_to_src_comp[dst_comp];
652 ASSERT(src_comp < 4);
653 arg_comp = GET_SWZ(arg_swz, src_comp);
654 ASSERT(arg_comp < 4);
655 inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
656 }
657 }
658 inst->DstReg = mov->DstReg;
659 return GL_TRUE;
660 }
661
662 /* Dot products and scalar instructions: we only change the destination */
663 case OPCODE_RCP:
664 case OPCODE_SIN:
665 case OPCODE_COS:
666 case OPCODE_RSQ:
667 case OPCODE_POW:
668 case OPCODE_EX2:
669 case OPCODE_LOG:
670 case OPCODE_DP2:
671 case OPCODE_DP3:
672 case OPCODE_DP4:
673 inst->DstReg = mov->DstReg;
674 return GL_TRUE;
675
676 /* All other instructions require fully active components with no swizzle */
677 default:
678 if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
679 inst->DstReg.WriteMask != WRITEMASK_XYZW)
680 return GL_FALSE;
681 inst->DstReg = mov->DstReg;
682 return GL_TRUE;
683 }
684 }
685
686 /**
687 * Try to remove extraneous MOV instructions from the given program.
688 */
689 static GLboolean
690 _mesa_remove_extra_moves(struct gl_program *prog)
691 {
692 GLboolean *removeInst; /* per-instruction removal flag */
693 GLuint i, rem = 0, nesting = 0;
694
695 if (dbg) {
696 printf("Optimize: Begin remove extra moves\n");
697 _mesa_print_program(prog);
698 }
699
700 removeInst = (GLboolean *)
701 calloc(1, prog->NumInstructions * sizeof(GLboolean));
702
703 /*
704 * Look for sequences such as this:
705 * FOO tmpX, arg0, arg1;
706 * MOV tmpY, tmpX;
707 * and convert into:
708 * FOO tmpY, arg0, arg1;
709 */
710
711 for (i = 0; i < prog->NumInstructions; i++) {
712 const struct prog_instruction *mov = prog->Instructions + i;
713
714 switch (mov->Opcode) {
715 case OPCODE_BGNLOOP:
716 case OPCODE_BGNSUB:
717 case OPCODE_IF:
718 nesting++;
719 break;
720 case OPCODE_ENDLOOP:
721 case OPCODE_ENDSUB:
722 case OPCODE_ENDIF:
723 nesting--;
724 break;
725 case OPCODE_MOV:
726 if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) {
727
728 /* see if this MOV can be removed */
729 const GLuint id = mov->SrcReg[0].Index;
730 struct prog_instruction *prevInst;
731 GLuint prevI;
732
733 /* get pointer to previous instruction */
734 prevI = i - 1;
735 while (prevI > 0 && removeInst[prevI])
736 prevI--;
737 prevInst = prog->Instructions + prevI;
738
739 if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
740 prevInst->DstReg.Index == id &&
741 prevInst->DstReg.RelAddr == 0 &&
742 prevInst->DstReg.CondSrc == 0 &&
743 prevInst->DstReg.CondMask == COND_TR) {
744
745 const GLuint dst_mask = prevInst->DstReg.WriteMask;
746 enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
747
748 if (next_use == WRITE || next_use == END) {
749 /* OK, we can safely remove this MOV instruction.
750 * Transform:
751 * prevI: FOO tempIndex, x, y;
752 * i: MOV z, tempIndex;
753 * Into:
754 * prevI: FOO z, x, y;
755 */
756 if (_mesa_merge_mov_into_inst(prevInst, mov)) {
757 removeInst[i] = GL_TRUE;
758 if (dbg) {
759 printf("Remove MOV at %u\n", i);
760 printf("new prev inst %u: ", prevI);
761 _mesa_print_instruction(prevInst);
762 }
763 }
764 }
765 }
766 }
767 break;
768 default:
769 ; /* nothing */
770 }
771 }
772
773 /* now remove the instructions which aren't needed */
774 rem = remove_instructions(prog, removeInst);
775
776 free(removeInst);
777
778 if (dbg) {
779 printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
780 /*_mesa_print_program(prog);*/
781 }
782
783 return rem != 0;
784 }
785
786
787 /** A live register interval */
788 struct interval
789 {
790 GLuint Reg; /** The temporary register index */
791 GLuint Start, End; /** Start/end instruction numbers */
792 };
793
794
795 /** A list of register intervals */
796 struct interval_list
797 {
798 GLuint Num;
799 struct interval Intervals[MAX_PROGRAM_TEMPS];
800 };
801
802
803 static void
804 append_interval(struct interval_list *list, const struct interval *inv)
805 {
806 list->Intervals[list->Num++] = *inv;
807 }
808
809
810 /** Insert interval inv into list, sorted by interval end */
811 static void
812 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
813 {
814 /* XXX we could do a binary search insertion here since list is sorted */
815 GLint i = list->Num - 1;
816 while (i >= 0 && list->Intervals[i].End > inv->End) {
817 list->Intervals[i + 1] = list->Intervals[i];
818 i--;
819 }
820 list->Intervals[i + 1] = *inv;
821 list->Num++;
822
823 #ifdef DEBUG
824 {
825 GLuint i;
826 for (i = 0; i + 1 < list->Num; i++) {
827 ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
828 }
829 }
830 #endif
831 }
832
833
834 /** Remove the given interval from the interval list */
835 static void
836 remove_interval(struct interval_list *list, const struct interval *inv)
837 {
838 /* XXX we could binary search since list is sorted */
839 GLuint k;
840 for (k = 0; k < list->Num; k++) {
841 if (list->Intervals[k].Reg == inv->Reg) {
842 /* found, remove it */
843 ASSERT(list->Intervals[k].Start == inv->Start);
844 ASSERT(list->Intervals[k].End == inv->End);
845 while (k < list->Num - 1) {
846 list->Intervals[k] = list->Intervals[k + 1];
847 k++;
848 }
849 list->Num--;
850 return;
851 }
852 }
853 }
854
855
856 /** called by qsort() */
857 static int
858 compare_start(const void *a, const void *b)
859 {
860 const struct interval *ia = (const struct interval *) a;
861 const struct interval *ib = (const struct interval *) b;
862 if (ia->Start < ib->Start)
863 return -1;
864 else if (ia->Start > ib->Start)
865 return +1;
866 else
867 return 0;
868 }
869
870 /** sort the interval list according to interval starts */
871 static void
872 sort_interval_list_by_start(struct interval_list *list)
873 {
874 qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
875 #ifdef DEBUG
876 {
877 GLuint i;
878 for (i = 0; i + 1 < list->Num; i++) {
879 ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
880 }
881 }
882 #endif
883 }
884
885
886 /**
887 * Update the intermediate interval info for register 'index' and
888 * instruction 'ic'.
889 */
890 static void
891 update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
892 {
893 ASSERT(index < MAX_PROGRAM_TEMPS);
894 if (intBegin[index] == -1) {
895 ASSERT(intEnd[index] == -1);
896 intBegin[index] = intEnd[index] = ic;
897 }
898 else {
899 intEnd[index] = ic;
900 }
901 }
902
903
904 /**
905 * Find first/last instruction that references each temporary register.
906 */
907 GLboolean
908 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
909 GLuint numInstructions,
910 GLint intBegin[MAX_PROGRAM_TEMPS],
911 GLint intEnd[MAX_PROGRAM_TEMPS])
912 {
913 struct loop_info
914 {
915 GLuint Start, End; /**< Start, end instructions of loop */
916 };
917 struct loop_info loopStack[MAX_LOOP_NESTING];
918 GLuint loopStackDepth = 0;
919 GLuint i;
920
921 for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
922 intBegin[i] = intEnd[i] = -1;
923 }
924
925 /* Scan instructions looking for temporary registers */
926 for (i = 0; i < numInstructions; i++) {
927 const struct prog_instruction *inst = instructions + i;
928 if (inst->Opcode == OPCODE_BGNLOOP) {
929 loopStack[loopStackDepth].Start = i;
930 loopStack[loopStackDepth].End = inst->BranchTarget;
931 loopStackDepth++;
932 }
933 else if (inst->Opcode == OPCODE_ENDLOOP) {
934 loopStackDepth--;
935 }
936 else if (inst->Opcode == OPCODE_CAL) {
937 return GL_FALSE;
938 }
939 else {
940 const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
941 GLuint j;
942 for (j = 0; j < numSrc; j++) {
943 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
944 const GLuint index = inst->SrcReg[j].Index;
945 if (inst->SrcReg[j].RelAddr)
946 return GL_FALSE;
947 update_interval(intBegin, intEnd, index, i);
948 if (loopStackDepth > 0) {
949 /* extend temp register's interval to end of loop */
950 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
951 update_interval(intBegin, intEnd, index, loopEnd);
952 }
953 }
954 }
955 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
956 const GLuint index = inst->DstReg.Index;
957 if (inst->DstReg.RelAddr)
958 return GL_FALSE;
959 update_interval(intBegin, intEnd, index, i);
960 if (loopStackDepth > 0) {
961 /* extend temp register's interval to end of loop */
962 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
963 update_interval(intBegin, intEnd, index, loopEnd);
964 }
965 }
966 }
967 }
968
969 return GL_TRUE;
970 }
971
972
973 /**
974 * Find the live intervals for each temporary register in the program.
975 * For register R, the interval [A,B] indicates that R is referenced
976 * from instruction A through instruction B.
977 * Special consideration is needed for loops and subroutines.
978 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
979 */
980 static GLboolean
981 find_live_intervals(struct gl_program *prog,
982 struct interval_list *liveIntervals)
983 {
984 GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
985 GLuint i;
986
987 /*
988 * Note: we'll return GL_FALSE below if we find relative indexing
989 * into the TEMP register file. We can't handle that yet.
990 * We also give up on subroutines for now.
991 */
992
993 if (dbg) {
994 printf("Optimize: Begin find intervals\n");
995 }
996
997 /* build intermediate arrays */
998 if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
999 intBegin, intEnd))
1000 return GL_FALSE;
1001
1002 /* Build live intervals list from intermediate arrays */
1003 liveIntervals->Num = 0;
1004 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
1005 if (intBegin[i] >= 0) {
1006 struct interval inv;
1007 inv.Reg = i;
1008 inv.Start = intBegin[i];
1009 inv.End = intEnd[i];
1010 append_interval(liveIntervals, &inv);
1011 }
1012 }
1013
1014 /* Sort the list according to interval starts */
1015 sort_interval_list_by_start(liveIntervals);
1016
1017 if (dbg) {
1018 /* print interval info */
1019 for (i = 0; i < liveIntervals->Num; i++) {
1020 const struct interval *inv = liveIntervals->Intervals + i;
1021 printf("Reg[%d] live [%d, %d]:",
1022 inv->Reg, inv->Start, inv->End);
1023 if (1) {
1024 GLuint j;
1025 for (j = 0; j < inv->Start; j++)
1026 printf(" ");
1027 for (j = inv->Start; j <= inv->End; j++)
1028 printf("x");
1029 }
1030 printf("\n");
1031 }
1032 }
1033
1034 return GL_TRUE;
1035 }
1036
1037
1038 /** Scan the array of used register flags to find free entry */
1039 static GLint
1040 alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
1041 {
1042 GLuint k;
1043 for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
1044 if (!usedRegs[k]) {
1045 usedRegs[k] = GL_TRUE;
1046 return k;
1047 }
1048 }
1049 return -1;
1050 }
1051
1052
1053 /**
1054 * This function implements "Linear Scan Register Allocation" to reduce
1055 * the number of temporary registers used by the program.
1056 *
1057 * We compute the "live interval" for all temporary registers then
1058 * examine the overlap of the intervals to allocate new registers.
1059 * Basically, if two intervals do not overlap, they can use the same register.
1060 */
1061 static void
1062 _mesa_reallocate_registers(struct gl_program *prog)
1063 {
1064 struct interval_list liveIntervals;
1065 GLint registerMap[MAX_PROGRAM_TEMPS];
1066 GLboolean usedRegs[MAX_PROGRAM_TEMPS];
1067 GLuint i;
1068 GLint maxTemp = -1;
1069
1070 if (dbg) {
1071 printf("Optimize: Begin live-interval register reallocation\n");
1072 _mesa_print_program(prog);
1073 }
1074
1075 for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
1076 registerMap[i] = -1;
1077 usedRegs[i] = GL_FALSE;
1078 }
1079
1080 if (!find_live_intervals(prog, &liveIntervals)) {
1081 if (dbg)
1082 printf("Aborting register reallocation\n");
1083 return;
1084 }
1085
1086 {
1087 struct interval_list activeIntervals;
1088 activeIntervals.Num = 0;
1089
1090 /* loop over live intervals, allocating a new register for each */
1091 for (i = 0; i < liveIntervals.Num; i++) {
1092 const struct interval *live = liveIntervals.Intervals + i;
1093
1094 if (dbg)
1095 printf("Consider register %u\n", live->Reg);
1096
1097 /* Expire old intervals. Intervals which have ended with respect
1098 * to the live interval can have their remapped registers freed.
1099 */
1100 {
1101 GLint j;
1102 for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1103 const struct interval *inv = activeIntervals.Intervals + j;
1104 if (inv->End >= live->Start) {
1105 /* Stop now. Since the activeInterval list is sorted
1106 * we know we don't have to go further.
1107 */
1108 break;
1109 }
1110 else {
1111 /* Interval 'inv' has expired */
1112 const GLint regNew = registerMap[inv->Reg];
1113 ASSERT(regNew >= 0);
1114
1115 if (dbg)
1116 printf(" expire interval for reg %u\n", inv->Reg);
1117
1118 /* remove interval j from active list */
1119 remove_interval(&activeIntervals, inv);
1120 j--; /* counter-act j++ in for-loop above */
1121
1122 /* return register regNew to the free pool */
1123 if (dbg)
1124 printf(" free reg %d\n", regNew);
1125 ASSERT(usedRegs[regNew] == GL_TRUE);
1126 usedRegs[regNew] = GL_FALSE;
1127 }
1128 }
1129 }
1130
1131 /* find a free register for this live interval */
1132 {
1133 const GLint k = alloc_register(usedRegs);
1134 if (k < 0) {
1135 /* out of registers, give up */
1136 return;
1137 }
1138 registerMap[live->Reg] = k;
1139 maxTemp = MAX2(maxTemp, k);
1140 if (dbg)
1141 printf(" remap register %u -> %d\n", live->Reg, k);
1142 }
1143
1144 /* Insert this live interval into the active list which is sorted
1145 * by increasing end points.
1146 */
1147 insert_interval_by_end(&activeIntervals, live);
1148 }
1149 }
1150
1151 if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1152 /* OK, we've reduced the number of registers needed.
1153 * Scan the program and replace all the old temporary register
1154 * indexes with the new indexes.
1155 */
1156 replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1157
1158 prog->NumTemporaries = maxTemp + 1;
1159 }
1160
1161 if (dbg) {
1162 printf("Optimize: End live-interval register reallocation\n");
1163 printf("Num temp regs before: %u after: %u\n",
1164 liveIntervals.Num, maxTemp + 1);
1165 _mesa_print_program(prog);
1166 }
1167 }
1168
1169 #if 0
1170 static void
1171 print_it(GLcontext *ctx, struct gl_program *program, const char *txt) {
1172 fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
1173 _mesa_print_program(program);
1174 _mesa_print_program_parameters(ctx, program);
1175 fprintf(stderr, "\n\n");
1176 }
1177 #endif
1178
1179 /**
1180 * Apply optimizations to the given program to eliminate unnecessary
1181 * instructions, temp regs, etc.
1182 */
1183 void
1184 _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
1185 {
1186 GLboolean any_change;
1187
1188 /* Stop when no modifications were output */
1189 do {
1190 any_change = GL_FALSE;
1191 _mesa_remove_extra_move_use(program);
1192 if (_mesa_remove_dead_code_global(program))
1193 any_change = GL_TRUE;
1194 if (_mesa_remove_extra_moves(program))
1195 any_change = GL_TRUE;
1196 if (_mesa_remove_dead_code_local(program))
1197 any_change = GL_TRUE;
1198 _mesa_reallocate_registers(program);
1199 } while (any_change);
1200 }
1201