i965: Support new local ID push constant & cross-thread constants
[mesa.git] / src / mesa / program / prog_optimize.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24
25
26 #include "main/glheader.h"
27 #include "main/context.h"
28 #include "main/macros.h"
29 #include "program.h"
30 #include "prog_instruction.h"
31 #include "prog_optimize.h"
32 #include "prog_print.h"
33
34
35 #define MAX_LOOP_NESTING 50
36 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
37 * register allocate many temporary values into that small number of
38 * temps. So allow large temporary indices coming into the register
39 * allocator.
40 */
41 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
42
43 static GLboolean dbg = GL_FALSE;
44
45 #define NO_MASK 0xf
46
47 /**
48 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
49 * are read from the given src in this instruction, We also provide
50 * one optional masks which may mask other components in the dst
51 * register
52 */
53 static GLuint
54 get_src_arg_mask(const struct prog_instruction *inst,
55 GLuint arg, GLuint dst_mask)
56 {
57 GLuint read_mask, channel_mask;
58 GLuint comp;
59
60 assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
61
62 /* Form the dst register, find the written channels */
63 switch (inst->Opcode) {
64 case OPCODE_MOV:
65 case OPCODE_MIN:
66 case OPCODE_MAX:
67 case OPCODE_ABS:
68 case OPCODE_ADD:
69 case OPCODE_MAD:
70 case OPCODE_MUL:
71 case OPCODE_SUB:
72 case OPCODE_CMP:
73 case OPCODE_FLR:
74 case OPCODE_FRC:
75 case OPCODE_LRP:
76 case OPCODE_SGE:
77 case OPCODE_SLT:
78 case OPCODE_SSG:
79 channel_mask = inst->DstReg.WriteMask & dst_mask;
80 break;
81 case OPCODE_RCP:
82 case OPCODE_SIN:
83 case OPCODE_COS:
84 case OPCODE_RSQ:
85 case OPCODE_POW:
86 case OPCODE_EX2:
87 case OPCODE_LOG:
88 channel_mask = WRITEMASK_X;
89 break;
90 case OPCODE_DP2:
91 channel_mask = WRITEMASK_XY;
92 break;
93 case OPCODE_DP3:
94 case OPCODE_XPD:
95 channel_mask = WRITEMASK_XYZ;
96 break;
97 default:
98 channel_mask = WRITEMASK_XYZW;
99 break;
100 }
101
102 /* Now, given the src swizzle and the written channels, find which
103 * components are actually read
104 */
105 read_mask = 0x0;
106 for (comp = 0; comp < 4; ++comp) {
107 const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
108 if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
109 read_mask |= 1 << coord;
110 }
111
112 return read_mask;
113 }
114
115
116 /**
117 * For a MOV instruction, compute a write mask when src register also has
118 * a mask
119 */
120 static GLuint
121 get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
122 {
123 const GLuint mask = mov->DstReg.WriteMask;
124 GLuint comp;
125 GLuint updated_mask = 0x0;
126
127 assert(mov->Opcode == OPCODE_MOV);
128
129 for (comp = 0; comp < 4; ++comp) {
130 GLuint src_comp;
131 if ((mask & (1 << comp)) == 0)
132 continue;
133 src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
134 if ((src_mask & (1 << src_comp)) == 0)
135 continue;
136 updated_mask |= 1 << comp;
137 }
138
139 return updated_mask;
140 }
141
142
143 /**
144 * Ensure that the swizzle is regular. That is, all of the swizzle
145 * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
146 */
147 static GLboolean
148 is_swizzle_regular(GLuint swz)
149 {
150 return GET_SWZ(swz,0) <= SWIZZLE_W &&
151 GET_SWZ(swz,1) <= SWIZZLE_W &&
152 GET_SWZ(swz,2) <= SWIZZLE_W &&
153 GET_SWZ(swz,3) <= SWIZZLE_W;
154 }
155
156
157 /**
158 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
159 * \return number of instructions removed
160 */
161 static GLuint
162 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
163 {
164 GLint i, removeEnd = 0, removeCount = 0;
165 GLuint totalRemoved = 0;
166
167 /* go backward */
168 for (i = prog->NumInstructions - 1; i >= 0; i--) {
169 if (removeFlags[i]) {
170 totalRemoved++;
171 if (removeCount == 0) {
172 /* begin a run of instructions to remove */
173 removeEnd = i;
174 removeCount = 1;
175 }
176 else {
177 /* extend the run of instructions to remove */
178 removeCount++;
179 }
180 }
181 else {
182 /* don't remove this instruction, but check if the preceeding
183 * instructions are to be removed.
184 */
185 if (removeCount > 0) {
186 GLint removeStart = removeEnd - removeCount + 1;
187 _mesa_delete_instructions(prog, removeStart, removeCount);
188 removeStart = removeCount = 0; /* reset removal info */
189 }
190 }
191 }
192 /* Finish removing if the first instruction was to be removed. */
193 if (removeCount > 0) {
194 GLint removeStart = removeEnd - removeCount + 1;
195 _mesa_delete_instructions(prog, removeStart, removeCount);
196 }
197 return totalRemoved;
198 }
199
200
201 /**
202 * Remap register indexes according to map.
203 * \param prog the program to search/replace
204 * \param file the type of register file to search/replace
205 * \param map maps old register indexes to new indexes
206 */
207 static void
208 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
209 {
210 GLuint i;
211
212 for (i = 0; i < prog->NumInstructions; i++) {
213 struct prog_instruction *inst = prog->Instructions + i;
214 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
215 GLuint j;
216 for (j = 0; j < numSrc; j++) {
217 if (inst->SrcReg[j].File == file) {
218 GLuint index = inst->SrcReg[j].Index;
219 assert(map[index] >= 0);
220 inst->SrcReg[j].Index = map[index];
221 }
222 }
223 if (inst->DstReg.File == file) {
224 const GLuint index = inst->DstReg.Index;
225 assert(map[index] >= 0);
226 inst->DstReg.Index = map[index];
227 }
228 }
229 }
230
231
232 /**
233 * Remove dead instructions from the given program.
234 * This is very primitive for now. Basically look for temp registers
235 * that are written to but never read. Remove any instructions that
236 * write to such registers. Be careful with condition code setters.
237 */
238 static GLboolean
239 _mesa_remove_dead_code_global(struct gl_program *prog)
240 {
241 GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
242 GLboolean *removeInst; /* per-instruction removal flag */
243 GLuint i, rem = 0, comp;
244
245 memset(tempRead, 0, sizeof(tempRead));
246
247 if (dbg) {
248 printf("Optimize: Begin dead code removal\n");
249 /*_mesa_print_program(prog);*/
250 }
251
252 removeInst =
253 calloc(prog->NumInstructions, sizeof(GLboolean));
254
255 /* Determine which temps are read and written */
256 for (i = 0; i < prog->NumInstructions; i++) {
257 const struct prog_instruction *inst = prog->Instructions + i;
258 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
259 GLuint j;
260
261 /* check src regs */
262 for (j = 0; j < numSrc; j++) {
263 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
264 const GLuint index = inst->SrcReg[j].Index;
265 GLuint read_mask;
266 assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
267 read_mask = get_src_arg_mask(inst, j, NO_MASK);
268
269 if (inst->SrcReg[j].RelAddr) {
270 if (dbg)
271 printf("abort remove dead code (indirect temp)\n");
272 goto done;
273 }
274
275 for (comp = 0; comp < 4; comp++) {
276 const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
277 if (swz <= SWIZZLE_W) {
278 if ((read_mask & (1 << swz)) == 0)
279 continue;
280 tempRead[index][swz] = GL_TRUE;
281 }
282 }
283 }
284 }
285
286 /* check dst reg */
287 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
288 assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
289
290 if (inst->DstReg.RelAddr) {
291 if (dbg)
292 printf("abort remove dead code (indirect temp)\n");
293 goto done;
294 }
295 }
296 }
297
298 /* find instructions that write to dead registers, flag for removal */
299 for (i = 0; i < prog->NumInstructions; i++) {
300 struct prog_instruction *inst = prog->Instructions + i;
301 const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
302
303 if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
304 GLint chan, index = inst->DstReg.Index;
305
306 for (chan = 0; chan < 4; chan++) {
307 if (!tempRead[index][chan] &&
308 inst->DstReg.WriteMask & (1 << chan)) {
309 if (dbg) {
310 printf("Remove writemask on %u.%c\n", i,
311 chan == 3 ? 'w' : 'x' + chan);
312 }
313 inst->DstReg.WriteMask &= ~(1 << chan);
314 rem++;
315 }
316 }
317
318 if (inst->DstReg.WriteMask == 0) {
319 /* If we cleared all writes, the instruction can be removed. */
320 if (dbg)
321 printf("Remove instruction %u: \n", i);
322 removeInst[i] = GL_TRUE;
323 }
324 }
325 }
326
327 /* now remove the instructions which aren't needed */
328 rem = remove_instructions(prog, removeInst);
329
330 if (dbg) {
331 printf("Optimize: End dead code removal.\n");
332 printf(" %u channel writes removed\n", rem);
333 printf(" %u instructions removed\n", rem);
334 /*_mesa_print_program(prog);*/
335 }
336
337 done:
338 free(removeInst);
339 return rem != 0;
340 }
341
342
343 enum inst_use
344 {
345 READ,
346 WRITE,
347 FLOW,
348 END
349 };
350
351
352 /**
353 * Scan forward in program from 'start' for the next occurances of TEMP[index].
354 * We look if an instruction reads the component given by the masks and if they
355 * are overwritten.
356 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
357 * that we can't look further.
358 */
359 static enum inst_use
360 find_next_use(const struct gl_program *prog,
361 GLuint start,
362 GLuint index,
363 GLuint mask)
364 {
365 GLuint i;
366
367 for (i = start; i < prog->NumInstructions; i++) {
368 const struct prog_instruction *inst = prog->Instructions + i;
369 switch (inst->Opcode) {
370 case OPCODE_BGNLOOP:
371 case OPCODE_BGNSUB:
372 case OPCODE_CAL:
373 case OPCODE_CONT:
374 case OPCODE_IF:
375 case OPCODE_ELSE:
376 case OPCODE_ENDIF:
377 case OPCODE_ENDLOOP:
378 case OPCODE_ENDSUB:
379 case OPCODE_RET:
380 return FLOW;
381 case OPCODE_END:
382 return END;
383 default:
384 {
385 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
386 GLuint j;
387 for (j = 0; j < numSrc; j++) {
388 if (inst->SrcReg[j].RelAddr ||
389 (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
390 inst->SrcReg[j].Index == (GLint)index &&
391 (get_src_arg_mask(inst,j,NO_MASK) & mask)))
392 return READ;
393 }
394 if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
395 inst->DstReg.File == PROGRAM_TEMPORARY &&
396 inst->DstReg.Index == index) {
397 mask &= ~inst->DstReg.WriteMask;
398 if (mask == 0)
399 return WRITE;
400 }
401 }
402 }
403 }
404 return END;
405 }
406
407
408 /**
409 * Is the given instruction opcode a flow-control opcode?
410 * XXX maybe move this into prog_instruction.[ch]
411 */
412 static GLboolean
413 _mesa_is_flow_control_opcode(enum prog_opcode opcode)
414 {
415 switch (opcode) {
416 case OPCODE_BGNLOOP:
417 case OPCODE_BGNSUB:
418 case OPCODE_CAL:
419 case OPCODE_CONT:
420 case OPCODE_IF:
421 case OPCODE_ELSE:
422 case OPCODE_END:
423 case OPCODE_ENDIF:
424 case OPCODE_ENDLOOP:
425 case OPCODE_ENDSUB:
426 case OPCODE_RET:
427 return GL_TRUE;
428 default:
429 return GL_FALSE;
430 }
431 }
432
433
434 /**
435 * Test if the given instruction is a simple MOV (no conditional updating,
436 * not relative addressing, no negation/abs, etc).
437 */
438 static GLboolean
439 can_downward_mov_be_modifed(const struct prog_instruction *mov)
440 {
441 return
442 mov->Opcode == OPCODE_MOV &&
443 mov->SrcReg[0].RelAddr == 0 &&
444 mov->SrcReg[0].Negate == 0 &&
445 mov->DstReg.RelAddr == 0;
446 }
447
448
449 static GLboolean
450 can_upward_mov_be_modifed(const struct prog_instruction *mov)
451 {
452 return
453 can_downward_mov_be_modifed(mov) &&
454 mov->DstReg.File == PROGRAM_TEMPORARY &&
455 !mov->Saturate;
456 }
457
458
459 /**
460 * Try to remove use of extraneous MOV instructions, to free them up for dead
461 * code removal.
462 */
463 static void
464 _mesa_remove_extra_move_use(struct gl_program *prog)
465 {
466 GLuint i, j;
467
468 if (dbg) {
469 printf("Optimize: Begin remove extra move use\n");
470 _mesa_print_program(prog);
471 }
472
473 /*
474 * Look for sequences such as this:
475 * MOV tmpX, arg0;
476 * ...
477 * FOO tmpY, tmpX, arg1;
478 * and convert into:
479 * MOV tmpX, arg0;
480 * ...
481 * FOO tmpY, arg0, arg1;
482 */
483
484 for (i = 0; i + 1 < prog->NumInstructions; i++) {
485 const struct prog_instruction *mov = prog->Instructions + i;
486 GLuint dst_mask, src_mask;
487 if (can_upward_mov_be_modifed(mov) == GL_FALSE)
488 continue;
489
490 /* Scanning the code, we maintain the components which are still active in
491 * these two masks
492 */
493 dst_mask = mov->DstReg.WriteMask;
494 src_mask = get_src_arg_mask(mov, 0, NO_MASK);
495
496 /* Walk through remaining instructions until the or src reg gets
497 * rewritten or we get into some flow-control, eliminating the use of
498 * this MOV.
499 */
500 for (j = i + 1; j < prog->NumInstructions; j++) {
501 struct prog_instruction *inst2 = prog->Instructions + j;
502 GLuint arg;
503
504 if (_mesa_is_flow_control_opcode(inst2->Opcode))
505 break;
506
507 /* First rewrite this instruction's args if appropriate. */
508 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
509 GLuint comp, read_mask;
510
511 if (inst2->SrcReg[arg].File != mov->DstReg.File ||
512 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
513 inst2->SrcReg[arg].RelAddr)
514 continue;
515 read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
516
517 /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
518 * components read still come from the mov instructions
519 */
520 if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
521 (read_mask & dst_mask) == read_mask) {
522 for (comp = 0; comp < 4; comp++) {
523 const GLuint inst2_swz =
524 GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
525 const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
526 inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
527 inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
528 inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
529 inst2_swz) & 0x1) << comp);
530 }
531 inst2->SrcReg[arg].File = mov->SrcReg[0].File;
532 inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
533 }
534 }
535
536 /* The source of MOV is written. This potentially deactivates some
537 * components from the src and dst of the MOV instruction
538 */
539 if (inst2->DstReg.File == mov->DstReg.File &&
540 (inst2->DstReg.RelAddr ||
541 inst2->DstReg.Index == mov->DstReg.Index)) {
542 dst_mask &= ~inst2->DstReg.WriteMask;
543 src_mask = get_src_arg_mask(mov, 0, dst_mask);
544 }
545
546 /* Idem when the destination of mov is written */
547 if (inst2->DstReg.File == mov->SrcReg[0].File &&
548 (inst2->DstReg.RelAddr ||
549 inst2->DstReg.Index == mov->SrcReg[0].Index)) {
550 src_mask &= ~inst2->DstReg.WriteMask;
551 dst_mask &= get_dst_mask_for_mov(mov, src_mask);
552 }
553 if (dst_mask == 0)
554 break;
555 }
556 }
557
558 if (dbg) {
559 printf("Optimize: End remove extra move use.\n");
560 /*_mesa_print_program(prog);*/
561 }
562 }
563
564
565 /**
566 * Complements dead_code_global. Try to remove code in block of code by
567 * carefully monitoring the swizzles. Both functions should be merged into one
568 * with a proper control flow graph
569 */
570 static GLboolean
571 _mesa_remove_dead_code_local(struct gl_program *prog)
572 {
573 GLboolean *removeInst;
574 GLuint i, arg, rem = 0;
575
576 removeInst =
577 calloc(prog->NumInstructions, sizeof(GLboolean));
578
579 for (i = 0; i < prog->NumInstructions; i++) {
580 const struct prog_instruction *inst = prog->Instructions + i;
581 const GLuint index = inst->DstReg.Index;
582 const GLuint mask = inst->DstReg.WriteMask;
583 enum inst_use use;
584
585 /* We must deactivate the pass as soon as some indirection is used */
586 if (inst->DstReg.RelAddr)
587 goto done;
588 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
589 if (inst->SrcReg[arg].RelAddr)
590 goto done;
591
592 if (_mesa_is_flow_control_opcode(inst->Opcode) ||
593 _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
594 inst->DstReg.File != PROGRAM_TEMPORARY ||
595 inst->DstReg.RelAddr)
596 continue;
597
598 use = find_next_use(prog, i+1, index, mask);
599 if (use == WRITE || use == END)
600 removeInst[i] = GL_TRUE;
601 }
602
603 rem = remove_instructions(prog, removeInst);
604
605 done:
606 free(removeInst);
607 return rem != 0;
608 }
609
610
611 /**
612 * Try to inject the destination of mov as the destination of inst and recompute
613 * the swizzles operators for the sources of inst if required. Return GL_TRUE
614 * of the substitution was possible, GL_FALSE otherwise
615 */
616 static GLboolean
617 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
618 const struct prog_instruction *mov)
619 {
620 /* Indirection table which associates destination and source components for
621 * the mov instruction
622 */
623 const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
624
625 /* Some components are not written by inst. We cannot remove the mov */
626 if (mask != (inst->DstReg.WriteMask & mask))
627 return GL_FALSE;
628
629 inst->Saturate |= mov->Saturate;
630
631 /* Depending on the instruction, we may need to recompute the swizzles.
632 * Also, some other instructions (like TEX) are not linear. We will only
633 * consider completely active sources and destinations
634 */
635 switch (inst->Opcode) {
636
637 /* Carstesian instructions: we compute the swizzle */
638 case OPCODE_MOV:
639 case OPCODE_MIN:
640 case OPCODE_MAX:
641 case OPCODE_ABS:
642 case OPCODE_ADD:
643 case OPCODE_MAD:
644 case OPCODE_MUL:
645 case OPCODE_SUB:
646 {
647 GLuint dst_to_src_comp[4] = {0,0,0,0};
648 GLuint dst_comp, arg;
649 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
650 if (mov->DstReg.WriteMask & (1 << dst_comp)) {
651 const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
652 assert(src_comp < 4);
653 dst_to_src_comp[dst_comp] = src_comp;
654 }
655 }
656
657 /* Patch each source of the instruction */
658 for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
659 const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
660 inst->SrcReg[arg].Swizzle = 0;
661
662 /* Reset each active component of the swizzle */
663 for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
664 GLuint src_comp, arg_comp;
665 if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
666 continue;
667 src_comp = dst_to_src_comp[dst_comp];
668 assert(src_comp < 4);
669 arg_comp = GET_SWZ(arg_swz, src_comp);
670 assert(arg_comp < 4);
671 inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
672 }
673 }
674 inst->DstReg = mov->DstReg;
675 return GL_TRUE;
676 }
677
678 /* Dot products and scalar instructions: we only change the destination */
679 case OPCODE_RCP:
680 case OPCODE_SIN:
681 case OPCODE_COS:
682 case OPCODE_RSQ:
683 case OPCODE_POW:
684 case OPCODE_EX2:
685 case OPCODE_LOG:
686 case OPCODE_DP2:
687 case OPCODE_DP3:
688 case OPCODE_DP4:
689 inst->DstReg = mov->DstReg;
690 return GL_TRUE;
691
692 /* All other instructions require fully active components with no swizzle */
693 default:
694 if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
695 inst->DstReg.WriteMask != WRITEMASK_XYZW)
696 return GL_FALSE;
697 inst->DstReg = mov->DstReg;
698 return GL_TRUE;
699 }
700 }
701
702
703 /**
704 * Try to remove extraneous MOV instructions from the given program.
705 */
706 static GLboolean
707 _mesa_remove_extra_moves(struct gl_program *prog)
708 {
709 GLboolean *removeInst; /* per-instruction removal flag */
710 GLuint i, rem = 0, nesting = 0;
711
712 if (dbg) {
713 printf("Optimize: Begin remove extra moves\n");
714 _mesa_print_program(prog);
715 }
716
717 removeInst =
718 calloc(prog->NumInstructions, sizeof(GLboolean));
719
720 /*
721 * Look for sequences such as this:
722 * FOO tmpX, arg0, arg1;
723 * MOV tmpY, tmpX;
724 * and convert into:
725 * FOO tmpY, arg0, arg1;
726 */
727
728 for (i = 0; i < prog->NumInstructions; i++) {
729 const struct prog_instruction *mov = prog->Instructions + i;
730
731 switch (mov->Opcode) {
732 case OPCODE_BGNLOOP:
733 case OPCODE_BGNSUB:
734 case OPCODE_IF:
735 nesting++;
736 break;
737 case OPCODE_ENDLOOP:
738 case OPCODE_ENDSUB:
739 case OPCODE_ENDIF:
740 nesting--;
741 break;
742 case OPCODE_MOV:
743 if (i > 0 &&
744 can_downward_mov_be_modifed(mov) &&
745 mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
746 nesting == 0)
747 {
748
749 /* see if this MOV can be removed */
750 const GLuint id = mov->SrcReg[0].Index;
751 struct prog_instruction *prevInst;
752 GLuint prevI;
753
754 /* get pointer to previous instruction */
755 prevI = i - 1;
756 while (prevI > 0 && removeInst[prevI])
757 prevI--;
758 prevInst = prog->Instructions + prevI;
759
760 if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
761 prevInst->DstReg.Index == id &&
762 prevInst->DstReg.RelAddr == 0) {
763
764 const GLuint dst_mask = prevInst->DstReg.WriteMask;
765 enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
766
767 if (next_use == WRITE || next_use == END) {
768 /* OK, we can safely remove this MOV instruction.
769 * Transform:
770 * prevI: FOO tempIndex, x, y;
771 * i: MOV z, tempIndex;
772 * Into:
773 * prevI: FOO z, x, y;
774 */
775 if (_mesa_merge_mov_into_inst(prevInst, mov)) {
776 removeInst[i] = GL_TRUE;
777 if (dbg) {
778 printf("Remove MOV at %u\n", i);
779 printf("new prev inst %u: ", prevI);
780 _mesa_print_instruction(prevInst);
781 }
782 }
783 }
784 }
785 }
786 break;
787 default:
788 ; /* nothing */
789 }
790 }
791
792 /* now remove the instructions which aren't needed */
793 rem = remove_instructions(prog, removeInst);
794
795 free(removeInst);
796
797 if (dbg) {
798 printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
799 /*_mesa_print_program(prog);*/
800 }
801
802 return rem != 0;
803 }
804
805
806 /** A live register interval */
807 struct interval
808 {
809 GLuint Reg; /** The temporary register index */
810 GLuint Start, End; /** Start/end instruction numbers */
811 };
812
813
814 /** A list of register intervals */
815 struct interval_list
816 {
817 GLuint Num;
818 struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
819 };
820
821
822 static void
823 append_interval(struct interval_list *list, const struct interval *inv)
824 {
825 list->Intervals[list->Num++] = *inv;
826 }
827
828
829 /** Insert interval inv into list, sorted by interval end */
830 static void
831 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
832 {
833 /* XXX we could do a binary search insertion here since list is sorted */
834 GLint i = list->Num - 1;
835 while (i >= 0 && list->Intervals[i].End > inv->End) {
836 list->Intervals[i + 1] = list->Intervals[i];
837 i--;
838 }
839 list->Intervals[i + 1] = *inv;
840 list->Num++;
841
842 #ifdef DEBUG
843 {
844 GLuint i;
845 for (i = 0; i + 1 < list->Num; i++) {
846 assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
847 }
848 }
849 #endif
850 }
851
852
853 /** Remove the given interval from the interval list */
854 static void
855 remove_interval(struct interval_list *list, const struct interval *inv)
856 {
857 /* XXX we could binary search since list is sorted */
858 GLuint k;
859 for (k = 0; k < list->Num; k++) {
860 if (list->Intervals[k].Reg == inv->Reg) {
861 /* found, remove it */
862 assert(list->Intervals[k].Start == inv->Start);
863 assert(list->Intervals[k].End == inv->End);
864 while (k < list->Num - 1) {
865 list->Intervals[k] = list->Intervals[k + 1];
866 k++;
867 }
868 list->Num--;
869 return;
870 }
871 }
872 }
873
874
875 /** called by qsort() */
876 static int
877 compare_start(const void *a, const void *b)
878 {
879 const struct interval *ia = (const struct interval *) a;
880 const struct interval *ib = (const struct interval *) b;
881 if (ia->Start < ib->Start)
882 return -1;
883 else if (ia->Start > ib->Start)
884 return +1;
885 else
886 return 0;
887 }
888
889
890 /** sort the interval list according to interval starts */
891 static void
892 sort_interval_list_by_start(struct interval_list *list)
893 {
894 qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
895 #ifdef DEBUG
896 {
897 GLuint i;
898 for (i = 0; i + 1 < list->Num; i++) {
899 assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
900 }
901 }
902 #endif
903 }
904
905 struct loop_info
906 {
907 GLuint Start, End; /**< Start, end instructions of loop */
908 };
909
910 /**
911 * Update the intermediate interval info for register 'index' and
912 * instruction 'ic'.
913 */
914 static void
915 update_interval(GLint intBegin[], GLint intEnd[],
916 struct loop_info *loopStack, GLuint loopStackDepth,
917 GLuint index, GLuint ic)
918 {
919 unsigned i;
920 GLuint begin = ic;
921 GLuint end = ic;
922
923 /* If the register is used in a loop, extend its lifetime through the end
924 * of the outermost loop that doesn't contain its definition.
925 */
926 for (i = 0; i < loopStackDepth; i++) {
927 if (intBegin[index] < loopStack[i].Start) {
928 end = loopStack[i].End;
929 break;
930 }
931 }
932
933 /* Variables that are live at the end of a loop will also be live at the
934 * beginning, so an instruction inside of a loop should have its live
935 * interval begin at the start of the outermost loop.
936 */
937 if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
938 begin = loopStack[0].Start;
939 }
940
941 assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
942 if (intBegin[index] == -1) {
943 assert(intEnd[index] == -1);
944 intBegin[index] = begin;
945 intEnd[index] = end;
946 }
947 else {
948 intEnd[index] = end;
949 }
950 }
951
952
953 /**
954 * Find first/last instruction that references each temporary register.
955 */
956 GLboolean
957 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
958 GLuint numInstructions,
959 GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
960 GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
961 {
962 struct loop_info loopStack[MAX_LOOP_NESTING];
963 GLuint loopStackDepth = 0;
964 GLuint i;
965
966 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
967 intBegin[i] = intEnd[i] = -1;
968 }
969
970 /* Scan instructions looking for temporary registers */
971 for (i = 0; i < numInstructions; i++) {
972 const struct prog_instruction *inst = instructions + i;
973 if (inst->Opcode == OPCODE_BGNLOOP) {
974 loopStack[loopStackDepth].Start = i;
975 loopStack[loopStackDepth].End = inst->BranchTarget;
976 loopStackDepth++;
977 }
978 else if (inst->Opcode == OPCODE_ENDLOOP) {
979 loopStackDepth--;
980 }
981 else if (inst->Opcode == OPCODE_CAL) {
982 return GL_FALSE;
983 }
984 else {
985 const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
986 GLuint j;
987 for (j = 0; j < numSrc; j++) {
988 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
989 const GLuint index = inst->SrcReg[j].Index;
990 if (inst->SrcReg[j].RelAddr)
991 return GL_FALSE;
992 update_interval(intBegin, intEnd, loopStack, loopStackDepth,
993 index, i);
994 }
995 }
996 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
997 const GLuint index = inst->DstReg.Index;
998 if (inst->DstReg.RelAddr)
999 return GL_FALSE;
1000 update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1001 index, i);
1002 }
1003 }
1004 }
1005
1006 return GL_TRUE;
1007 }
1008
1009
1010 /**
1011 * Find the live intervals for each temporary register in the program.
1012 * For register R, the interval [A,B] indicates that R is referenced
1013 * from instruction A through instruction B.
1014 * Special consideration is needed for loops and subroutines.
1015 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1016 */
1017 static GLboolean
1018 find_live_intervals(struct gl_program *prog,
1019 struct interval_list *liveIntervals)
1020 {
1021 GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1022 GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1023 GLuint i;
1024
1025 /*
1026 * Note: we'll return GL_FALSE below if we find relative indexing
1027 * into the TEMP register file. We can't handle that yet.
1028 * We also give up on subroutines for now.
1029 */
1030
1031 if (dbg) {
1032 printf("Optimize: Begin find intervals\n");
1033 }
1034
1035 /* build intermediate arrays */
1036 if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
1037 intBegin, intEnd))
1038 return GL_FALSE;
1039
1040 /* Build live intervals list from intermediate arrays */
1041 liveIntervals->Num = 0;
1042 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1043 if (intBegin[i] >= 0) {
1044 struct interval inv;
1045 inv.Reg = i;
1046 inv.Start = intBegin[i];
1047 inv.End = intEnd[i];
1048 append_interval(liveIntervals, &inv);
1049 }
1050 }
1051
1052 /* Sort the list according to interval starts */
1053 sort_interval_list_by_start(liveIntervals);
1054
1055 if (dbg) {
1056 /* print interval info */
1057 for (i = 0; i < liveIntervals->Num; i++) {
1058 const struct interval *inv = liveIntervals->Intervals + i;
1059 printf("Reg[%d] live [%d, %d]:",
1060 inv->Reg, inv->Start, inv->End);
1061 if (1) {
1062 GLuint j;
1063 for (j = 0; j < inv->Start; j++)
1064 printf(" ");
1065 for (j = inv->Start; j <= inv->End; j++)
1066 printf("x");
1067 }
1068 printf("\n");
1069 }
1070 }
1071
1072 return GL_TRUE;
1073 }
1074
1075
1076 /** Scan the array of used register flags to find free entry */
1077 static GLint
1078 alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
1079 {
1080 GLuint k;
1081 for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
1082 if (!usedRegs[k]) {
1083 usedRegs[k] = GL_TRUE;
1084 return k;
1085 }
1086 }
1087 return -1;
1088 }
1089
1090
1091 /**
1092 * This function implements "Linear Scan Register Allocation" to reduce
1093 * the number of temporary registers used by the program.
1094 *
1095 * We compute the "live interval" for all temporary registers then
1096 * examine the overlap of the intervals to allocate new registers.
1097 * Basically, if two intervals do not overlap, they can use the same register.
1098 */
1099 static void
1100 _mesa_reallocate_registers(struct gl_program *prog)
1101 {
1102 struct interval_list liveIntervals;
1103 GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1104 GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1105 GLuint i;
1106 GLint maxTemp = -1;
1107
1108 if (dbg) {
1109 printf("Optimize: Begin live-interval register reallocation\n");
1110 _mesa_print_program(prog);
1111 }
1112
1113 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
1114 registerMap[i] = -1;
1115 usedRegs[i] = GL_FALSE;
1116 }
1117
1118 if (!find_live_intervals(prog, &liveIntervals)) {
1119 if (dbg)
1120 printf("Aborting register reallocation\n");
1121 return;
1122 }
1123
1124 {
1125 struct interval_list activeIntervals;
1126 activeIntervals.Num = 0;
1127
1128 /* loop over live intervals, allocating a new register for each */
1129 for (i = 0; i < liveIntervals.Num; i++) {
1130 const struct interval *live = liveIntervals.Intervals + i;
1131
1132 if (dbg)
1133 printf("Consider register %u\n", live->Reg);
1134
1135 /* Expire old intervals. Intervals which have ended with respect
1136 * to the live interval can have their remapped registers freed.
1137 */
1138 {
1139 GLint j;
1140 for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1141 const struct interval *inv = activeIntervals.Intervals + j;
1142 if (inv->End >= live->Start) {
1143 /* Stop now. Since the activeInterval list is sorted
1144 * we know we don't have to go further.
1145 */
1146 break;
1147 }
1148 else {
1149 /* Interval 'inv' has expired */
1150 const GLint regNew = registerMap[inv->Reg];
1151 assert(regNew >= 0);
1152
1153 if (dbg)
1154 printf(" expire interval for reg %u\n", inv->Reg);
1155
1156 /* remove interval j from active list */
1157 remove_interval(&activeIntervals, inv);
1158 j--; /* counter-act j++ in for-loop above */
1159
1160 /* return register regNew to the free pool */
1161 if (dbg)
1162 printf(" free reg %d\n", regNew);
1163 assert(usedRegs[regNew] == GL_TRUE);
1164 usedRegs[regNew] = GL_FALSE;
1165 }
1166 }
1167 }
1168
1169 /* find a free register for this live interval */
1170 {
1171 const GLint k = alloc_register(usedRegs);
1172 if (k < 0) {
1173 /* out of registers, give up */
1174 return;
1175 }
1176 registerMap[live->Reg] = k;
1177 maxTemp = MAX2(maxTemp, k);
1178 if (dbg)
1179 printf(" remap register %u -> %d\n", live->Reg, k);
1180 }
1181
1182 /* Insert this live interval into the active list which is sorted
1183 * by increasing end points.
1184 */
1185 insert_interval_by_end(&activeIntervals, live);
1186 }
1187 }
1188
1189 if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1190 /* OK, we've reduced the number of registers needed.
1191 * Scan the program and replace all the old temporary register
1192 * indexes with the new indexes.
1193 */
1194 replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1195
1196 prog->NumTemporaries = maxTemp + 1;
1197 }
1198
1199 if (dbg) {
1200 printf("Optimize: End live-interval register reallocation\n");
1201 printf("Num temp regs before: %u after: %u\n",
1202 liveIntervals.Num, maxTemp + 1);
1203 _mesa_print_program(prog);
1204 }
1205 }
1206
1207
1208 #if 0
1209 static void
1210 print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
1211 fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
1212 _mesa_print_program(program);
1213 _mesa_print_program_parameters(ctx, program);
1214 fprintf(stderr, "\n\n");
1215 }
1216 #endif
1217
1218 /**
1219 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
1220 * instruction is the first instruction to write to register T0. The are
1221 * several lowering passes done in GLSL IR (e.g. branches and
1222 * relative addressing) that create a large number of conditional assignments
1223 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
1224 *
1225 * Here is why this conversion is safe:
1226 * CMP T0, T1 T2 T0 can be expanded to:
1227 * if (T1 < 0.0)
1228 * MOV T0, T2;
1229 * else
1230 * MOV T0, T0;
1231 *
1232 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
1233 * as the original program. If (T1 < 0.0) evaluates to false, executing
1234 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
1235 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
1236 * because any instruction that was going to read from T0 after this was going
1237 * to read a garbage value anyway.
1238 */
1239 static void
1240 _mesa_simplify_cmp(struct gl_program * program)
1241 {
1242 GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1243 GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
1244 GLuint i;
1245
1246 if (dbg) {
1247 printf("Optimize: Begin reads without writes\n");
1248 _mesa_print_program(program);
1249 }
1250
1251 for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1252 tempWrites[i] = 0;
1253 }
1254
1255 for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
1256 outputWrites[i] = 0;
1257 }
1258
1259 for (i = 0; i < program->NumInstructions; i++) {
1260 struct prog_instruction *inst = program->Instructions + i;
1261 GLuint prevWriteMask;
1262
1263 /* Give up if we encounter relative addressing or flow control. */
1264 if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
1265 return;
1266 }
1267
1268 if (inst->DstReg.File == PROGRAM_OUTPUT) {
1269 assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
1270 prevWriteMask = outputWrites[inst->DstReg.Index];
1271 outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1272 } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1273 assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
1274 prevWriteMask = tempWrites[inst->DstReg.Index];
1275 tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1276 } else {
1277 /* No other register type can be a destination register. */
1278 continue;
1279 }
1280
1281 /* For a CMP to be considered a conditional write, the destination
1282 * register and source register two must be the same. */
1283 if (inst->Opcode == OPCODE_CMP
1284 && !(inst->DstReg.WriteMask & prevWriteMask)
1285 && inst->SrcReg[2].File == inst->DstReg.File
1286 && inst->SrcReg[2].Index == inst->DstReg.Index
1287 && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
1288
1289 inst->Opcode = OPCODE_MOV;
1290 inst->SrcReg[0] = inst->SrcReg[1];
1291
1292 /* Unused operands are expected to have the file set to
1293 * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
1294 * all of the sources.
1295 */
1296 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
1297 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
1298 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
1299 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
1300 }
1301 }
1302 if (dbg) {
1303 printf("Optimize: End reads without writes\n");
1304 _mesa_print_program(program);
1305 }
1306 }
1307
1308 /**
1309 * Apply optimizations to the given program to eliminate unnecessary
1310 * instructions, temp regs, etc.
1311 */
1312 void
1313 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
1314 {
1315 GLboolean any_change;
1316
1317 _mesa_simplify_cmp(program);
1318 /* Stop when no modifications were output */
1319 do {
1320 any_change = GL_FALSE;
1321 _mesa_remove_extra_move_use(program);
1322 if (_mesa_remove_dead_code_global(program))
1323 any_change = GL_TRUE;
1324 if (_mesa_remove_extra_moves(program))
1325 any_change = GL_TRUE;
1326 if (_mesa_remove_dead_code_local(program))
1327 any_change = GL_TRUE;
1328
1329 any_change = _mesa_constant_fold(program) || any_change;
1330 _mesa_reallocate_registers(program);
1331 } while (any_change);
1332 }
1333