util: Remove check_os_katmai_support.
[mesa.git] / src / mesa / program / prog_optimize.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.5
4 *
5 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26
27 #include "main/glheader.h"
28 #include "main/context.h"
29 #include "main/macros.h"
30 #include "program.h"
31 #include "prog_instruction.h"
32 #include "prog_optimize.h"
33 #include "prog_print.h"
34
35
36 #define MAX_LOOP_NESTING 50
37
38
39 static GLboolean dbg = GL_FALSE;
40
41 #define NO_MASK 0xf
42
43 /**
44 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
45 * are read from the given src in this instruction, We also provide
46 * one optional masks which may mask other components in the dst
47 * register
48 */
49 static GLuint
50 get_src_arg_mask(const struct prog_instruction *inst,
51 GLuint arg, GLuint dst_mask)
52 {
53 GLuint read_mask, channel_mask;
54 GLuint comp;
55
56 ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
57
58 /* Form the dst register, find the written channels */
59 if (inst->CondUpdate) {
60 channel_mask = WRITEMASK_XYZW;
61 }
62 else {
63 switch (inst->Opcode) {
64 case OPCODE_MOV:
65 case OPCODE_MIN:
66 case OPCODE_MAX:
67 case OPCODE_ABS:
68 case OPCODE_ADD:
69 case OPCODE_MAD:
70 case OPCODE_MUL:
71 case OPCODE_SUB:
72 channel_mask = inst->DstReg.WriteMask & dst_mask;
73 break;
74 case OPCODE_RCP:
75 case OPCODE_SIN:
76 case OPCODE_COS:
77 case OPCODE_RSQ:
78 case OPCODE_POW:
79 case OPCODE_EX2:
80 case OPCODE_LOG:
81 channel_mask = WRITEMASK_X;
82 break;
83 case OPCODE_DP2:
84 channel_mask = WRITEMASK_XY;
85 break;
86 case OPCODE_DP3:
87 case OPCODE_XPD:
88 channel_mask = WRITEMASK_XYZ;
89 break;
90 default:
91 channel_mask = WRITEMASK_XYZW;
92 break;
93 }
94 }
95
96 /* Now, given the src swizzle and the written channels, find which
97 * components are actually read
98 */
99 read_mask = 0x0;
100 for (comp = 0; comp < 4; ++comp) {
101 const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
102 ASSERT(coord < 4);
103 if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
104 read_mask |= 1 << coord;
105 }
106
107 return read_mask;
108 }
109
110
111 /**
112 * For a MOV instruction, compute a write mask when src register also has
113 * a mask
114 */
115 static GLuint
116 get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
117 {
118 const GLuint mask = mov->DstReg.WriteMask;
119 GLuint comp;
120 GLuint updated_mask = 0x0;
121
122 ASSERT(mov->Opcode == OPCODE_MOV);
123
124 for (comp = 0; comp < 4; ++comp) {
125 GLuint src_comp;
126 if ((mask & (1 << comp)) == 0)
127 continue;
128 src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
129 if ((src_mask & (1 << src_comp)) == 0)
130 continue;
131 updated_mask |= 1 << comp;
132 }
133
134 return updated_mask;
135 }
136
137
138 /**
139 * Ensure that the swizzle is regular. That is, all of the swizzle
140 * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
141 */
142 static GLboolean
143 is_swizzle_regular(GLuint swz)
144 {
145 return GET_SWZ(swz,0) <= SWIZZLE_W &&
146 GET_SWZ(swz,1) <= SWIZZLE_W &&
147 GET_SWZ(swz,2) <= SWIZZLE_W &&
148 GET_SWZ(swz,3) <= SWIZZLE_W;
149 }
150
151
152 /**
153 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
154 * \return number of instructions removed
155 */
156 static GLuint
157 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
158 {
159 GLint i, removeEnd = 0, removeCount = 0;
160 GLuint totalRemoved = 0;
161
162 /* go backward */
163 for (i = prog->NumInstructions - 1; i >= 0; i--) {
164 if (removeFlags[i]) {
165 totalRemoved++;
166 if (removeCount == 0) {
167 /* begin a run of instructions to remove */
168 removeEnd = i;
169 removeCount = 1;
170 }
171 else {
172 /* extend the run of instructions to remove */
173 removeCount++;
174 }
175 }
176 else {
177 /* don't remove this instruction, but check if the preceeding
178 * instructions are to be removed.
179 */
180 if (removeCount > 0) {
181 GLint removeStart = removeEnd - removeCount + 1;
182 _mesa_delete_instructions(prog, removeStart, removeCount);
183 removeStart = removeCount = 0; /* reset removal info */
184 }
185 }
186 }
187 /* Finish removing if the first instruction was to be removed. */
188 if (removeCount > 0) {
189 GLint removeStart = removeEnd - removeCount + 1;
190 _mesa_delete_instructions(prog, removeStart, removeCount);
191 }
192 return totalRemoved;
193 }
194
195
196 /**
197 * Remap register indexes according to map.
198 * \param prog the program to search/replace
199 * \param file the type of register file to search/replace
200 * \param map maps old register indexes to new indexes
201 */
202 static void
203 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
204 {
205 GLuint i;
206
207 for (i = 0; i < prog->NumInstructions; i++) {
208 struct prog_instruction *inst = prog->Instructions + i;
209 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
210 GLuint j;
211 for (j = 0; j < numSrc; j++) {
212 if (inst->SrcReg[j].File == file) {
213 GLuint index = inst->SrcReg[j].Index;
214 ASSERT(map[index] >= 0);
215 inst->SrcReg[j].Index = map[index];
216 }
217 }
218 if (inst->DstReg.File == file) {
219 const GLuint index = inst->DstReg.Index;
220 ASSERT(map[index] >= 0);
221 inst->DstReg.Index = map[index];
222 }
223 }
224 }
225
226
227 /**
228 * Remove dead instructions from the given program.
229 * This is very primitive for now. Basically look for temp registers
230 * that are written to but never read. Remove any instructions that
231 * write to such registers. Be careful with condition code setters.
232 */
233 static GLboolean
234 _mesa_remove_dead_code_global(struct gl_program *prog)
235 {
236 GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
237 GLboolean *removeInst; /* per-instruction removal flag */
238 GLuint i, rem = 0, comp;
239
240 memset(tempRead, 0, sizeof(tempRead));
241
242 if (dbg) {
243 printf("Optimize: Begin dead code removal\n");
244 /*_mesa_print_program(prog);*/
245 }
246
247 removeInst = (GLboolean *)
248 calloc(1, prog->NumInstructions * sizeof(GLboolean));
249
250 /* Determine which temps are read and written */
251 for (i = 0; i < prog->NumInstructions; i++) {
252 const struct prog_instruction *inst = prog->Instructions + i;
253 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
254 GLuint j;
255
256 /* check src regs */
257 for (j = 0; j < numSrc; j++) {
258 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
259 const GLuint index = inst->SrcReg[j].Index;
260 GLuint read_mask;
261 ASSERT(index < MAX_PROGRAM_TEMPS);
262 read_mask = get_src_arg_mask(inst, j, NO_MASK);
263
264 if (inst->SrcReg[j].RelAddr) {
265 if (dbg)
266 printf("abort remove dead code (indirect temp)\n");
267 goto done;
268 }
269
270 for (comp = 0; comp < 4; comp++) {
271 const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
272 ASSERT(swz < 4);
273 if ((read_mask & (1 << swz)) == 0)
274 continue;
275 if (swz <= SWIZZLE_W)
276 tempRead[index][swz] = GL_TRUE;
277 }
278 }
279 }
280
281 /* check dst reg */
282 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
283 const GLuint index = inst->DstReg.Index;
284 ASSERT(index < MAX_PROGRAM_TEMPS);
285
286 if (inst->DstReg.RelAddr) {
287 if (dbg)
288 printf("abort remove dead code (indirect temp)\n");
289 goto done;
290 }
291
292 if (inst->CondUpdate) {
293 /* If we're writing to this register and setting condition
294 * codes we cannot remove the instruction. Prevent removal
295 * by setting the 'read' flag.
296 */
297 tempRead[index][0] = GL_TRUE;
298 tempRead[index][1] = GL_TRUE;
299 tempRead[index][2] = GL_TRUE;
300 tempRead[index][3] = GL_TRUE;
301 }
302 }
303 }
304
305 /* find instructions that write to dead registers, flag for removal */
306 for (i = 0; i < prog->NumInstructions; i++) {
307 struct prog_instruction *inst = prog->Instructions + i;
308 const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
309
310 if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
311 GLint chan, index = inst->DstReg.Index;
312
313 for (chan = 0; chan < 4; chan++) {
314 if (!tempRead[index][chan] &&
315 inst->DstReg.WriteMask & (1 << chan)) {
316 if (dbg) {
317 printf("Remove writemask on %u.%c\n", i,
318 chan == 3 ? 'w' : 'x' + chan);
319 }
320 inst->DstReg.WriteMask &= ~(1 << chan);
321 rem++;
322 }
323 }
324
325 if (inst->DstReg.WriteMask == 0) {
326 /* If we cleared all writes, the instruction can be removed. */
327 if (dbg)
328 printf("Remove instruction %u: \n", i);
329 removeInst[i] = GL_TRUE;
330 }
331 }
332 }
333
334 /* now remove the instructions which aren't needed */
335 rem = remove_instructions(prog, removeInst);
336
337 if (dbg) {
338 printf("Optimize: End dead code removal.\n");
339 printf(" %u channel writes removed\n", rem);
340 printf(" %u instructions removed\n", rem);
341 /*_mesa_print_program(prog);*/
342 }
343
344 done:
345 free(removeInst);
346 return rem != 0;
347 }
348
349
350 enum inst_use
351 {
352 READ,
353 WRITE,
354 FLOW,
355 END
356 };
357
358
359 /**
360 * Scan forward in program from 'start' for the next occurances of TEMP[index].
361 * We look if an instruction reads the component given by the masks and if they
362 * are overwritten.
363 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
364 * that we can't look further.
365 */
366 static enum inst_use
367 find_next_use(const struct gl_program *prog,
368 GLuint start,
369 GLuint index,
370 GLuint mask)
371 {
372 GLuint i;
373
374 for (i = start; i < prog->NumInstructions; i++) {
375 const struct prog_instruction *inst = prog->Instructions + i;
376 switch (inst->Opcode) {
377 case OPCODE_BGNLOOP:
378 case OPCODE_BGNSUB:
379 case OPCODE_BRA:
380 case OPCODE_CAL:
381 case OPCODE_CONT:
382 case OPCODE_IF:
383 case OPCODE_ELSE:
384 case OPCODE_ENDIF:
385 case OPCODE_ENDLOOP:
386 case OPCODE_ENDSUB:
387 case OPCODE_RET:
388 return FLOW;
389 case OPCODE_END:
390 return END;
391 default:
392 {
393 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
394 GLuint j;
395 for (j = 0; j < numSrc; j++) {
396 if (inst->SrcReg[j].RelAddr ||
397 (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
398 inst->SrcReg[j].Index == index &&
399 (get_src_arg_mask(inst,j,NO_MASK) & mask)))
400 return READ;
401 }
402 if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
403 inst->DstReg.File == PROGRAM_TEMPORARY &&
404 inst->DstReg.Index == index) {
405 mask &= ~inst->DstReg.WriteMask;
406 if (mask == 0)
407 return WRITE;
408 }
409 }
410 }
411 }
412 return END;
413 }
414
415
416 /**
417 * Is the given instruction opcode a flow-control opcode?
418 * XXX maybe move this into prog_instruction.[ch]
419 */
420 static GLboolean
421 _mesa_is_flow_control_opcode(enum prog_opcode opcode)
422 {
423 switch (opcode) {
424 case OPCODE_BGNLOOP:
425 case OPCODE_BGNSUB:
426 case OPCODE_BRA:
427 case OPCODE_CAL:
428 case OPCODE_CONT:
429 case OPCODE_IF:
430 case OPCODE_ELSE:
431 case OPCODE_END:
432 case OPCODE_ENDIF:
433 case OPCODE_ENDLOOP:
434 case OPCODE_ENDSUB:
435 case OPCODE_RET:
436 return GL_TRUE;
437 default:
438 return GL_FALSE;
439 }
440 }
441
442
443 /**
444 * Test if the given instruction is a simple MOV (no conditional updating,
445 * not relative addressing, no negation/abs, etc).
446 */
447 static GLboolean
448 can_downward_mov_be_modifed(const struct prog_instruction *mov)
449 {
450 return
451 mov->Opcode == OPCODE_MOV &&
452 mov->CondUpdate == GL_FALSE &&
453 mov->SrcReg[0].RelAddr == 0 &&
454 mov->SrcReg[0].Negate == 0 &&
455 mov->SrcReg[0].Abs == 0 &&
456 mov->SrcReg[0].HasIndex2 == 0 &&
457 mov->SrcReg[0].RelAddr2 == 0 &&
458 mov->DstReg.RelAddr == 0 &&
459 mov->DstReg.CondMask == COND_TR &&
460 mov->SaturateMode == SATURATE_OFF;
461 }
462
463
464 static GLboolean
465 can_upward_mov_be_modifed(const struct prog_instruction *mov)
466 {
467 return
468 can_downward_mov_be_modifed(mov) &&
469 mov->DstReg.File == PROGRAM_TEMPORARY;
470 }
471
472
473 /**
474 * Try to remove use of extraneous MOV instructions, to free them up for dead
475 * code removal.
476 */
477 static void
478 _mesa_remove_extra_move_use(struct gl_program *prog)
479 {
480 GLuint i, j;
481
482 if (dbg) {
483 printf("Optimize: Begin remove extra move use\n");
484 _mesa_print_program(prog);
485 }
486
487 /*
488 * Look for sequences such as this:
489 * MOV tmpX, arg0;
490 * ...
491 * FOO tmpY, tmpX, arg1;
492 * and convert into:
493 * MOV tmpX, arg0;
494 * ...
495 * FOO tmpY, arg0, arg1;
496 */
497
498 for (i = 0; i + 1 < prog->NumInstructions; i++) {
499 const struct prog_instruction *mov = prog->Instructions + i;
500 GLuint dst_mask, src_mask;
501 if (can_upward_mov_be_modifed(mov) == GL_FALSE)
502 continue;
503
504 /* Scanning the code, we maintain the components which are still active in
505 * these two masks
506 */
507 dst_mask = mov->DstReg.WriteMask;
508 src_mask = get_src_arg_mask(mov, 0, NO_MASK);
509
510 /* Walk through remaining instructions until the or src reg gets
511 * rewritten or we get into some flow-control, eliminating the use of
512 * this MOV.
513 */
514 for (j = i + 1; j < prog->NumInstructions; j++) {
515 struct prog_instruction *inst2 = prog->Instructions + j;
516 GLuint arg;
517
518 if (_mesa_is_flow_control_opcode(inst2->Opcode))
519 break;
520
521 /* First rewrite this instruction's args if appropriate. */
522 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
523 GLuint comp, read_mask;
524
525 if (inst2->SrcReg[arg].File != mov->DstReg.File ||
526 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
527 inst2->SrcReg[arg].RelAddr ||
528 inst2->SrcReg[arg].Abs)
529 continue;
530 read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
531
532 /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
533 * components read still come from the mov instructions
534 */
535 if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
536 (read_mask & dst_mask) == read_mask) {
537 for (comp = 0; comp < 4; comp++) {
538 const GLuint inst2_swz =
539 GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
540 const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
541 inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
542 inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
543 inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
544 inst2_swz) & 0x1) << comp);
545 }
546 inst2->SrcReg[arg].File = mov->SrcReg[0].File;
547 inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
548 }
549 }
550
551 /* The source of MOV is written. This potentially deactivates some
552 * components from the src and dst of the MOV instruction
553 */
554 if (inst2->DstReg.File == mov->DstReg.File &&
555 (inst2->DstReg.RelAddr ||
556 inst2->DstReg.Index == mov->DstReg.Index)) {
557 dst_mask &= ~inst2->DstReg.WriteMask;
558 src_mask = get_src_arg_mask(mov, 0, dst_mask);
559 }
560
561 /* Idem when the destination of mov is written */
562 if (inst2->DstReg.File == mov->SrcReg[0].File &&
563 (inst2->DstReg.RelAddr ||
564 inst2->DstReg.Index == mov->SrcReg[0].Index)) {
565 src_mask &= ~inst2->DstReg.WriteMask;
566 dst_mask &= get_dst_mask_for_mov(mov, src_mask);
567 }
568 if (dst_mask == 0)
569 break;
570 }
571 }
572
573 if (dbg) {
574 printf("Optimize: End remove extra move use.\n");
575 /*_mesa_print_program(prog);*/
576 }
577 }
578
579
580 /**
581 * Complements dead_code_global. Try to remove code in block of code by
582 * carefully monitoring the swizzles. Both functions should be merged into one
583 * with a proper control flow graph
584 */
585 static GLboolean
586 _mesa_remove_dead_code_local(struct gl_program *prog)
587 {
588 GLboolean *removeInst;
589 GLuint i, arg, rem = 0;
590
591 removeInst = (GLboolean *)
592 calloc(1, prog->NumInstructions * sizeof(GLboolean));
593
594 for (i = 0; i < prog->NumInstructions; i++) {
595 const struct prog_instruction *inst = prog->Instructions + i;
596 const GLuint index = inst->DstReg.Index;
597 const GLuint mask = inst->DstReg.WriteMask;
598 enum inst_use use;
599
600 /* We must deactivate the pass as soon as some indirection is used */
601 if (inst->DstReg.RelAddr)
602 goto done;
603 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
604 if (inst->SrcReg[arg].RelAddr)
605 goto done;
606
607 if (_mesa_is_flow_control_opcode(inst->Opcode) ||
608 _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
609 inst->DstReg.File != PROGRAM_TEMPORARY ||
610 inst->DstReg.RelAddr)
611 continue;
612
613 use = find_next_use(prog, i+1, index, mask);
614 if (use == WRITE || use == END)
615 removeInst[i] = GL_TRUE;
616 }
617
618 rem = remove_instructions(prog, removeInst);
619
620 done:
621 free(removeInst);
622 return rem != 0;
623 }
624
625
626 /**
627 * Try to inject the destination of mov as the destination of inst and recompute
628 * the swizzles operators for the sources of inst if required. Return GL_TRUE
629 * of the substitution was possible, GL_FALSE otherwise
630 */
631 static GLboolean
632 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
633 const struct prog_instruction *mov)
634 {
635 /* Indirection table which associates destination and source components for
636 * the mov instruction
637 */
638 const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
639
640 /* Some components are not written by inst. We cannot remove the mov */
641 if (mask != (inst->DstReg.WriteMask & mask))
642 return GL_FALSE;
643
644 /* Depending on the instruction, we may need to recompute the swizzles.
645 * Also, some other instructions (like TEX) are not linear. We will only
646 * consider completely active sources and destinations
647 */
648 switch (inst->Opcode) {
649
650 /* Carstesian instructions: we compute the swizzle */
651 case OPCODE_MOV:
652 case OPCODE_MIN:
653 case OPCODE_MAX:
654 case OPCODE_ABS:
655 case OPCODE_ADD:
656 case OPCODE_MAD:
657 case OPCODE_MUL:
658 case OPCODE_SUB:
659 {
660 GLuint dst_to_src_comp[4] = {0,0,0,0};
661 GLuint dst_comp, arg;
662 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
663 if (mov->DstReg.WriteMask & (1 << dst_comp)) {
664 const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
665 ASSERT(src_comp < 4);
666 dst_to_src_comp[dst_comp] = src_comp;
667 }
668 }
669
670 /* Patch each source of the instruction */
671 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
672 const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
673 inst->SrcReg[arg].Swizzle = 0;
674
675 /* Reset each active component of the swizzle */
676 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
677 GLuint src_comp, arg_comp;
678 if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
679 continue;
680 src_comp = dst_to_src_comp[dst_comp];
681 ASSERT(src_comp < 4);
682 arg_comp = GET_SWZ(arg_swz, src_comp);
683 ASSERT(arg_comp < 4);
684 inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
685 }
686 }
687 inst->DstReg = mov->DstReg;
688 return GL_TRUE;
689 }
690
691 /* Dot products and scalar instructions: we only change the destination */
692 case OPCODE_RCP:
693 case OPCODE_SIN:
694 case OPCODE_COS:
695 case OPCODE_RSQ:
696 case OPCODE_POW:
697 case OPCODE_EX2:
698 case OPCODE_LOG:
699 case OPCODE_DP2:
700 case OPCODE_DP3:
701 case OPCODE_DP4:
702 inst->DstReg = mov->DstReg;
703 return GL_TRUE;
704
705 /* All other instructions require fully active components with no swizzle */
706 default:
707 if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
708 inst->DstReg.WriteMask != WRITEMASK_XYZW)
709 return GL_FALSE;
710 inst->DstReg = mov->DstReg;
711 return GL_TRUE;
712 }
713 }
714
715
716 /**
717 * Try to remove extraneous MOV instructions from the given program.
718 */
719 static GLboolean
720 _mesa_remove_extra_moves(struct gl_program *prog)
721 {
722 GLboolean *removeInst; /* per-instruction removal flag */
723 GLuint i, rem = 0, nesting = 0;
724
725 if (dbg) {
726 printf("Optimize: Begin remove extra moves\n");
727 _mesa_print_program(prog);
728 }
729
730 removeInst = (GLboolean *)
731 calloc(1, prog->NumInstructions * sizeof(GLboolean));
732
733 /*
734 * Look for sequences such as this:
735 * FOO tmpX, arg0, arg1;
736 * MOV tmpY, tmpX;
737 * and convert into:
738 * FOO tmpY, arg0, arg1;
739 */
740
741 for (i = 0; i < prog->NumInstructions; i++) {
742 const struct prog_instruction *mov = prog->Instructions + i;
743
744 switch (mov->Opcode) {
745 case OPCODE_BGNLOOP:
746 case OPCODE_BGNSUB:
747 case OPCODE_IF:
748 nesting++;
749 break;
750 case OPCODE_ENDLOOP:
751 case OPCODE_ENDSUB:
752 case OPCODE_ENDIF:
753 nesting--;
754 break;
755 case OPCODE_MOV:
756 if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) {
757
758 /* see if this MOV can be removed */
759 const GLuint id = mov->SrcReg[0].Index;
760 struct prog_instruction *prevInst;
761 GLuint prevI;
762
763 /* get pointer to previous instruction */
764 prevI = i - 1;
765 while (prevI > 0 && removeInst[prevI])
766 prevI--;
767 prevInst = prog->Instructions + prevI;
768
769 if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
770 prevInst->DstReg.Index == id &&
771 prevInst->DstReg.RelAddr == 0 &&
772 prevInst->DstReg.CondSrc == 0 &&
773 prevInst->DstReg.CondMask == COND_TR) {
774
775 const GLuint dst_mask = prevInst->DstReg.WriteMask;
776 enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
777
778 if (next_use == WRITE || next_use == END) {
779 /* OK, we can safely remove this MOV instruction.
780 * Transform:
781 * prevI: FOO tempIndex, x, y;
782 * i: MOV z, tempIndex;
783 * Into:
784 * prevI: FOO z, x, y;
785 */
786 if (_mesa_merge_mov_into_inst(prevInst, mov)) {
787 removeInst[i] = GL_TRUE;
788 if (dbg) {
789 printf("Remove MOV at %u\n", i);
790 printf("new prev inst %u: ", prevI);
791 _mesa_print_instruction(prevInst);
792 }
793 }
794 }
795 }
796 }
797 break;
798 default:
799 ; /* nothing */
800 }
801 }
802
803 /* now remove the instructions which aren't needed */
804 rem = remove_instructions(prog, removeInst);
805
806 free(removeInst);
807
808 if (dbg) {
809 printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
810 /*_mesa_print_program(prog);*/
811 }
812
813 return rem != 0;
814 }
815
816
817 /** A live register interval */
818 struct interval
819 {
820 GLuint Reg; /** The temporary register index */
821 GLuint Start, End; /** Start/end instruction numbers */
822 };
823
824
825 /** A list of register intervals */
826 struct interval_list
827 {
828 GLuint Num;
829 struct interval Intervals[MAX_PROGRAM_TEMPS];
830 };
831
832
833 static void
834 append_interval(struct interval_list *list, const struct interval *inv)
835 {
836 list->Intervals[list->Num++] = *inv;
837 }
838
839
840 /** Insert interval inv into list, sorted by interval end */
841 static void
842 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
843 {
844 /* XXX we could do a binary search insertion here since list is sorted */
845 GLint i = list->Num - 1;
846 while (i >= 0 && list->Intervals[i].End > inv->End) {
847 list->Intervals[i + 1] = list->Intervals[i];
848 i--;
849 }
850 list->Intervals[i + 1] = *inv;
851 list->Num++;
852
853 #ifdef DEBUG
854 {
855 GLuint i;
856 for (i = 0; i + 1 < list->Num; i++) {
857 ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
858 }
859 }
860 #endif
861 }
862
863
864 /** Remove the given interval from the interval list */
865 static void
866 remove_interval(struct interval_list *list, const struct interval *inv)
867 {
868 /* XXX we could binary search since list is sorted */
869 GLuint k;
870 for (k = 0; k < list->Num; k++) {
871 if (list->Intervals[k].Reg == inv->Reg) {
872 /* found, remove it */
873 ASSERT(list->Intervals[k].Start == inv->Start);
874 ASSERT(list->Intervals[k].End == inv->End);
875 while (k < list->Num - 1) {
876 list->Intervals[k] = list->Intervals[k + 1];
877 k++;
878 }
879 list->Num--;
880 return;
881 }
882 }
883 }
884
885
886 /** called by qsort() */
887 static int
888 compare_start(const void *a, const void *b)
889 {
890 const struct interval *ia = (const struct interval *) a;
891 const struct interval *ib = (const struct interval *) b;
892 if (ia->Start < ib->Start)
893 return -1;
894 else if (ia->Start > ib->Start)
895 return +1;
896 else
897 return 0;
898 }
899
900
901 /** sort the interval list according to interval starts */
902 static void
903 sort_interval_list_by_start(struct interval_list *list)
904 {
905 qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
906 #ifdef DEBUG
907 {
908 GLuint i;
909 for (i = 0; i + 1 < list->Num; i++) {
910 ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
911 }
912 }
913 #endif
914 }
915
916
917 /**
918 * Update the intermediate interval info for register 'index' and
919 * instruction 'ic'.
920 */
921 static void
922 update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
923 {
924 ASSERT(index < MAX_PROGRAM_TEMPS);
925 if (intBegin[index] == -1) {
926 ASSERT(intEnd[index] == -1);
927 intBegin[index] = intEnd[index] = ic;
928 }
929 else {
930 intEnd[index] = ic;
931 }
932 }
933
934
935 /**
936 * Find first/last instruction that references each temporary register.
937 */
938 GLboolean
939 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
940 GLuint numInstructions,
941 GLint intBegin[MAX_PROGRAM_TEMPS],
942 GLint intEnd[MAX_PROGRAM_TEMPS])
943 {
944 struct loop_info
945 {
946 GLuint Start, End; /**< Start, end instructions of loop */
947 };
948 struct loop_info loopStack[MAX_LOOP_NESTING];
949 GLuint loopStackDepth = 0;
950 GLuint i;
951
952 for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
953 intBegin[i] = intEnd[i] = -1;
954 }
955
956 /* Scan instructions looking for temporary registers */
957 for (i = 0; i < numInstructions; i++) {
958 const struct prog_instruction *inst = instructions + i;
959 if (inst->Opcode == OPCODE_BGNLOOP) {
960 loopStack[loopStackDepth].Start = i;
961 loopStack[loopStackDepth].End = inst->BranchTarget;
962 loopStackDepth++;
963 }
964 else if (inst->Opcode == OPCODE_ENDLOOP) {
965 loopStackDepth--;
966 }
967 else if (inst->Opcode == OPCODE_CAL) {
968 return GL_FALSE;
969 }
970 else {
971 const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
972 GLuint j;
973 for (j = 0; j < numSrc; j++) {
974 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
975 const GLuint index = inst->SrcReg[j].Index;
976 if (inst->SrcReg[j].RelAddr)
977 return GL_FALSE;
978 update_interval(intBegin, intEnd, index, i);
979 if (loopStackDepth > 0) {
980 /* extend temp register's interval to end of loop */
981 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
982 update_interval(intBegin, intEnd, index, loopEnd);
983 }
984 }
985 }
986 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
987 const GLuint index = inst->DstReg.Index;
988 if (inst->DstReg.RelAddr)
989 return GL_FALSE;
990 update_interval(intBegin, intEnd, index, i);
991 if (loopStackDepth > 0) {
992 /* extend temp register's interval to end of loop */
993 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
994 update_interval(intBegin, intEnd, index, loopEnd);
995 }
996 }
997 }
998 }
999
1000 return GL_TRUE;
1001 }
1002
1003
1004 /**
1005 * Find the live intervals for each temporary register in the program.
1006 * For register R, the interval [A,B] indicates that R is referenced
1007 * from instruction A through instruction B.
1008 * Special consideration is needed for loops and subroutines.
1009 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1010 */
1011 static GLboolean
1012 find_live_intervals(struct gl_program *prog,
1013 struct interval_list *liveIntervals)
1014 {
1015 GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
1016 GLuint i;
1017
1018 /*
1019 * Note: we'll return GL_FALSE below if we find relative indexing
1020 * into the TEMP register file. We can't handle that yet.
1021 * We also give up on subroutines for now.
1022 */
1023
1024 if (dbg) {
1025 printf("Optimize: Begin find intervals\n");
1026 }
1027
1028 /* build intermediate arrays */
1029 if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
1030 intBegin, intEnd))
1031 return GL_FALSE;
1032
1033 /* Build live intervals list from intermediate arrays */
1034 liveIntervals->Num = 0;
1035 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
1036 if (intBegin[i] >= 0) {
1037 struct interval inv;
1038 inv.Reg = i;
1039 inv.Start = intBegin[i];
1040 inv.End = intEnd[i];
1041 append_interval(liveIntervals, &inv);
1042 }
1043 }
1044
1045 /* Sort the list according to interval starts */
1046 sort_interval_list_by_start(liveIntervals);
1047
1048 if (dbg) {
1049 /* print interval info */
1050 for (i = 0; i < liveIntervals->Num; i++) {
1051 const struct interval *inv = liveIntervals->Intervals + i;
1052 printf("Reg[%d] live [%d, %d]:",
1053 inv->Reg, inv->Start, inv->End);
1054 if (1) {
1055 GLuint j;
1056 for (j = 0; j < inv->Start; j++)
1057 printf(" ");
1058 for (j = inv->Start; j <= inv->End; j++)
1059 printf("x");
1060 }
1061 printf("\n");
1062 }
1063 }
1064
1065 return GL_TRUE;
1066 }
1067
1068
1069 /** Scan the array of used register flags to find free entry */
1070 static GLint
1071 alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
1072 {
1073 GLuint k;
1074 for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
1075 if (!usedRegs[k]) {
1076 usedRegs[k] = GL_TRUE;
1077 return k;
1078 }
1079 }
1080 return -1;
1081 }
1082
1083
1084 /**
1085 * This function implements "Linear Scan Register Allocation" to reduce
1086 * the number of temporary registers used by the program.
1087 *
1088 * We compute the "live interval" for all temporary registers then
1089 * examine the overlap of the intervals to allocate new registers.
1090 * Basically, if two intervals do not overlap, they can use the same register.
1091 */
1092 static void
1093 _mesa_reallocate_registers(struct gl_program *prog)
1094 {
1095 struct interval_list liveIntervals;
1096 GLint registerMap[MAX_PROGRAM_TEMPS];
1097 GLboolean usedRegs[MAX_PROGRAM_TEMPS];
1098 GLuint i;
1099 GLint maxTemp = -1;
1100
1101 if (dbg) {
1102 printf("Optimize: Begin live-interval register reallocation\n");
1103 _mesa_print_program(prog);
1104 }
1105
1106 for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
1107 registerMap[i] = -1;
1108 usedRegs[i] = GL_FALSE;
1109 }
1110
1111 if (!find_live_intervals(prog, &liveIntervals)) {
1112 if (dbg)
1113 printf("Aborting register reallocation\n");
1114 return;
1115 }
1116
1117 {
1118 struct interval_list activeIntervals;
1119 activeIntervals.Num = 0;
1120
1121 /* loop over live intervals, allocating a new register for each */
1122 for (i = 0; i < liveIntervals.Num; i++) {
1123 const struct interval *live = liveIntervals.Intervals + i;
1124
1125 if (dbg)
1126 printf("Consider register %u\n", live->Reg);
1127
1128 /* Expire old intervals. Intervals which have ended with respect
1129 * to the live interval can have their remapped registers freed.
1130 */
1131 {
1132 GLint j;
1133 for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1134 const struct interval *inv = activeIntervals.Intervals + j;
1135 if (inv->End >= live->Start) {
1136 /* Stop now. Since the activeInterval list is sorted
1137 * we know we don't have to go further.
1138 */
1139 break;
1140 }
1141 else {
1142 /* Interval 'inv' has expired */
1143 const GLint regNew = registerMap[inv->Reg];
1144 ASSERT(regNew >= 0);
1145
1146 if (dbg)
1147 printf(" expire interval for reg %u\n", inv->Reg);
1148
1149 /* remove interval j from active list */
1150 remove_interval(&activeIntervals, inv);
1151 j--; /* counter-act j++ in for-loop above */
1152
1153 /* return register regNew to the free pool */
1154 if (dbg)
1155 printf(" free reg %d\n", regNew);
1156 ASSERT(usedRegs[regNew] == GL_TRUE);
1157 usedRegs[regNew] = GL_FALSE;
1158 }
1159 }
1160 }
1161
1162 /* find a free register for this live interval */
1163 {
1164 const GLint k = alloc_register(usedRegs);
1165 if (k < 0) {
1166 /* out of registers, give up */
1167 return;
1168 }
1169 registerMap[live->Reg] = k;
1170 maxTemp = MAX2(maxTemp, k);
1171 if (dbg)
1172 printf(" remap register %u -> %d\n", live->Reg, k);
1173 }
1174
1175 /* Insert this live interval into the active list which is sorted
1176 * by increasing end points.
1177 */
1178 insert_interval_by_end(&activeIntervals, live);
1179 }
1180 }
1181
1182 if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1183 /* OK, we've reduced the number of registers needed.
1184 * Scan the program and replace all the old temporary register
1185 * indexes with the new indexes.
1186 */
1187 replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1188
1189 prog->NumTemporaries = maxTemp + 1;
1190 }
1191
1192 if (dbg) {
1193 printf("Optimize: End live-interval register reallocation\n");
1194 printf("Num temp regs before: %u after: %u\n",
1195 liveIntervals.Num, maxTemp + 1);
1196 _mesa_print_program(prog);
1197 }
1198 }
1199
1200
1201 #if 0
1202 static void
1203 print_it(GLcontext *ctx, struct gl_program *program, const char *txt) {
1204 fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
1205 _mesa_print_program(program);
1206 _mesa_print_program_parameters(ctx, program);
1207 fprintf(stderr, "\n\n");
1208 }
1209 #endif
1210
1211
1212 /**
1213 * Apply optimizations to the given program to eliminate unnecessary
1214 * instructions, temp regs, etc.
1215 */
1216 void
1217 _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
1218 {
1219 GLboolean any_change;
1220
1221 /* Stop when no modifications were output */
1222 do {
1223 any_change = GL_FALSE;
1224 _mesa_remove_extra_move_use(program);
1225 if (_mesa_remove_dead_code_global(program))
1226 any_change = GL_TRUE;
1227 if (_mesa_remove_extra_moves(program))
1228 any_change = GL_TRUE;
1229 if (_mesa_remove_dead_code_local(program))
1230 any_change = GL_TRUE;
1231 _mesa_reallocate_registers(program);
1232 } while (any_change);
1233 }
1234