s390.c (s390_function_value): Rename to ...
[gcc.git] / gcc / implicit-zee.c
1 /* Redundant Zero-extension elimination for targets that implicitly
2 zero-extend writes to the lower 32-bit portion of 64-bit registers.
3 Copyright (C) 2010 Free Software Foundation, Inc.
4 Contributed by Sriraman Tallam (tmsriram@google.com) and
5 Silvius Rus (rus@google.com)
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 /* Problem Description :
25 --------------------
26 This pass is intended to be applicable only to targets that implicitly
27 zero-extend 64-bit registers after writing to their lower 32-bit half.
28 For instance, x86_64 zero-extends the upper bits of a register
29 implicitly whenever an instruction writes to its lower 32-bit half.
30 For example, the instruction *add edi,eax* also zero-extends the upper
31 32-bits of rax after doing the addition. These zero extensions come
32 for free and GCC does not always exploit this well. That is, it has
33 been observed that there are plenty of cases where GCC explicitly
34 zero-extends registers for x86_64 that are actually useless because
35 these registers were already implicitly zero-extended in a prior
36 instruction. This pass tries to eliminate such useless zero extension
37 instructions.
38
39 How does this pass work ?
40 --------------------------
41
42 This pass is run after register allocation. Hence, all registers that
43 this pass deals with are hard registers. This pass first looks for a
44 zero-extension instruction that could possibly be redundant. Such zero
45 extension instructions show up in RTL with the pattern :
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))).
47 where x can be any one of the 64-bit hard registers.
48 Now, this pass tries to eliminate this instruction by merging the
49 zero-extension with the definitions of register x. For instance, if
50 one of the definitions of register x was :
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
52 then the combination converts this into :
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
54 If all the merged definitions are recognizable assembly instructions,
55 the zero-extension is effectively eliminated. For example, in x86_64,
56 implicit zero-extensions are captured with appropriate patterns in the
57 i386.md file. Hence, these merged definition can be matched to a single
58 assembly instruction. The original zero-extension instruction is then
59 deleted if all the definitions can be merged.
60
61 However, there are cases where the definition instruction cannot be
62 merged with a zero-extend. Examples are CALL instructions. In such
63 cases, the original zero extension is not redundant and this pass does
64 not delete it.
65
66 Handling conditional moves :
67 ----------------------------
68
69 Architectures like x86_64 support conditional moves whose semantics for
70 zero-extension differ from the other instructions. For instance, the
71 instruction *cmov ebx, eax*
72 zero-extends eax onto rax only when the move from ebx to eax happens.
73 Otherwise, eax may not be zero-extended. Conditional moves appear as
74 RTL instructions of the form
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
76 This pass tries to merge a zero-extension with a conditional move by
77 actually merging the defintions of y and z with a zero-extend and then
78 converting the conditional move into :
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
80 Since registers y and z are zero-extended, register x will also be
81 zero-extended after the conditional move. Note that this step has to
82 be done transitively since the definition of a conditional copy can be
83 another conditional copy.
84
85 Motivating Example I :
86 ---------------------
87 For this program :
88 **********************************************
89 bad_code.c
90
91 int mask[1000];
92
93 int foo(unsigned x)
94 {
95 if (x < 10)
96 x = x * 45;
97 else
98 x = x * 78;
99 return mask[x];
100 }
101 **********************************************
102
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
104 ........
105 400315: b8 4e 00 00 00 mov $0x4e,%eax
106 40031a: 0f af f8 imul %eax,%edi
107 40031d: 89 ff mov %edi,%edi --> Useless extend
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
109 400326: c3 retq
110 ......
111 400330: ba 2d 00 00 00 mov $0x2d,%edx
112 400335: 0f af fa imul %edx,%edi
113 400338: 89 ff mov %edi,%edi --> Useless extend
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
115 400341: c3 retq
116
117 $ gcc -O2 -fzee bad_code.c
118 ......
119 400315: 6b ff 4e imul $0x4e,%edi,%edi
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
121 40031f: c3 retq
122 400320: 6b ff 2d imul $0x2d,%edi,%edi
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
124 40032a: c3 retq
125
126 Motivating Example II :
127 ---------------------
128
129 Here is an example with a conditional move.
130
131 For this program :
132 **********************************************
133
134 unsigned long long foo(unsigned x , unsigned y)
135 {
136 unsigned z;
137 if (x > 100)
138 z = x + y;
139 else
140 z = x - y;
141 return (unsigned long long)(z);
142 }
143
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
145 ............
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
147 400363: 89 f8 mov %edi,%eax
148 400365: 29 f0 sub %esi,%eax
149 400367: 83 ff 65 cmp $0x65,%edi
150 40036a: 0f 43 c2 cmovae %edx,%eax
151 40036d: 89 c0 mov %eax,%eax --> Useless extend
152 40036f: c3 retq
153
154 $ gcc -O2 -fzee bad_code.c
155 .............
156 400360: 89 fa mov %edi,%edx
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
158 400365: 29 f2 sub %esi,%edx
159 400367: 83 ff 65 cmp $0x65,%edi
160 40036a: 89 d6 mov %edx,%esi
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax
162 400370: c3 retq
163
164
165 Usefulness :
166 ----------
167
168 This pass reduces the dynamic instruction count of a compression benchmark
169 by 2.8% and improves its run time by about 1%. The compression benchmark
170 had the following code sequence in a very hot region of code before ZEE
171 optimized it :
172
173 shr $0x5, %edx
174 mov %edx, %edx --> Useless zero-extend */
175
176
177 #include "config.h"
178 #include "system.h"
179 #include "coretypes.h"
180 #include "tm.h"
181 #include "rtl.h"
182 #include "tree.h"
183 #include "tm_p.h"
184 #include "flags.h"
185 #include "regs.h"
186 #include "hard-reg-set.h"
187 #include "basic-block.h"
188 #include "insn-config.h"
189 #include "function.h"
190 #include "expr.h"
191 #include "insn-attr.h"
192 #include "recog.h"
193 #include "diagnostic-core.h"
194 #include "target.h"
195 #include "timevar.h"
196 #include "optabs.h"
197 #include "insn-codes.h"
198 #include "rtlhooks-def.h"
199 /* Include output.h for dump_file. */
200 #include "output.h"
201 #include "params.h"
202 #include "timevar.h"
203 #include "tree-pass.h"
204 #include "df.h"
205 #include "cgraph.h"
206
207 /* This says if a register is newly created for the purpose of
208 zero-extension. */
209
210 enum insn_merge_code
211 {
212 MERGE_NOT_ATTEMPTED = 0,
213 MERGE_SUCCESS
214 };
215
216 /* This says if a INSN UID or its definition has already been merged
217 with a zero-extend or not. */
218
219 static enum insn_merge_code *is_insn_merge_attempted;
220 static int max_insn_uid;
221
222 /* Returns the merge code status for INSN. */
223
224 static enum insn_merge_code
225 get_insn_status (rtx insn)
226 {
227 gcc_assert (INSN_UID (insn) < max_insn_uid);
228 return is_insn_merge_attempted[INSN_UID (insn)];
229 }
230
231 /* Sets the merge code status of INSN to CODE. */
232
233 static void
234 set_insn_status (rtx insn, enum insn_merge_code code)
235 {
236 gcc_assert (INSN_UID (insn) < max_insn_uid);
237 is_insn_merge_attempted[INSN_UID (insn)] = code;
238 }
239
240 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
241 that needs to be modified, this code modifies the SET rtx to a
242 new SET rtx that zero_extends the right hand expression into a DImode
243 register (NEWREG) on the left hand side. Note that multiple
244 assumptions are made about the nature of the set that needs
245 to be true for this to work and is called from merge_def_and_ze.
246
247 Original :
248 (set (reg:SI a) (expression))
249
250 Transform :
251 (set (reg:DI a) (zero_extend (expression)))
252
253 Special Cases :
254 If the expression is a constant or another zero_extend directly
255 assign it to the DI mode register. */
256
257 static bool
258 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg)
259 {
260 rtx temp_extension, simplified_temp_extension, new_set, new_const_int;
261 rtx orig_src;
262 HOST_WIDE_INT val;
263 unsigned int mask, delta_width;
264
265 /* Change the SET rtx and validate it. */
266 orig_src = SET_SRC (*orig_set);
267 new_set = NULL_RTX;
268
269 /* The right hand side can also be VOIDmode. These cases have to be
270 handled differently. */
271
272 if (GET_MODE (orig_src) != SImode)
273 {
274 /* Merge constants by directly moving the constant into the
275 DImode register under some conditions. */
276
277 if (GET_CODE (orig_src) == CONST_INT
278 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode))
279 {
280 if (INTVAL (orig_src) >= 0)
281 new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
282 else if (INTVAL (orig_src) < 0)
283 {
284 /* Zero-extending a negative SImode integer into DImode
285 makes it a positive integer. Convert the given negative
286 integer into the appropriate integer when zero-extended. */
287
288 delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
289 mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
290 val = INTVAL (orig_src);
291 val = val & mask;
292 new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
293 new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
294 }
295 else
296 return false;
297 }
298 else
299 {
300 /* This is mostly due to a call insn that should not be
301 optimized. */
302
303 return false;
304 }
305 }
306 else if (GET_CODE (orig_src) == ZERO_EXTEND)
307 {
308 /* Here a zero-extend is used to get to SI. Why not make it
309 all the way till DI. */
310
311 temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0));
312 simplified_temp_extension = simplify_rtx (temp_extension);
313 if (simplified_temp_extension)
314 temp_extension = simplified_temp_extension;
315 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
316 }
317 else if (GET_CODE (orig_src) == IF_THEN_ELSE)
318 {
319 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
320 in general, IF_THEN_ELSE should not be combined. */
321
322 return false;
323 }
324 else
325 {
326 /* This is the normal case we expect. */
327
328 temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src);
329 simplified_temp_extension = simplify_rtx (temp_extension);
330 if (simplified_temp_extension)
331 temp_extension = simplified_temp_extension;
332 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
333 }
334
335 gcc_assert (new_set != NULL_RTX);
336
337 /* This change is a part of a group of changes. Hence,
338 validate_change will not try to commit the change. */
339
340 if (validate_change (curr_insn, orig_set, new_set, true))
341 {
342 if (dump_file)
343 {
344 fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n");
345 print_rtl_single (dump_file, curr_insn);
346 }
347 return true;
348 }
349 return false;
350 }
351
352 /* This returns the DI mode for the SI register REG_SI. */
353
354 static rtx
355 get_reg_di (rtx reg_si)
356 {
357 rtx newreg;
358
359 newreg = gen_rtx_REG (DImode, REGNO (reg_si));
360 gcc_assert (newreg);
361 return newreg;
362 }
363
364 /* Treat if_then_else insns, where the operands of both branches
365 are registers, as copies. For instance,
366 Original :
367 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
368 Transformed :
369 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
370 DEF_INSN is the if_then_else insn. */
371
372 static bool
373 transform_ifelse (rtx def_insn)
374 {
375 rtx set_insn = PATTERN (def_insn);
376 rtx srcreg, dstreg, srcreg2;
377 rtx map_srcreg, map_dstreg, map_srcreg2;
378 rtx ifexpr;
379 rtx cond;
380 rtx new_set;
381
382 gcc_assert (GET_CODE (set_insn) == SET);
383 cond = XEXP (SET_SRC (set_insn), 0);
384 dstreg = SET_DEST (set_insn);
385 srcreg = XEXP (SET_SRC (set_insn), 1);
386 srcreg2 = XEXP (SET_SRC (set_insn), 2);
387 map_srcreg = get_reg_di (srcreg);
388 map_srcreg2 = get_reg_di (srcreg2);
389 map_dstreg = get_reg_di (dstreg);
390 ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2);
391 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
392
393 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
394 {
395 if (dump_file)
396 {
397 fprintf (dump_file, "Cond_Move Instruction's mode extended :\n");
398 print_rtl_single (dump_file, def_insn);
399 }
400 return true;
401 }
402 else
403 return false;
404 }
405
406 /* Function to get all the immediate definitions of an instruction.
407 The reaching definitions are desired for WHICH_REG used in
408 CURR_INSN. This function returns 0 if there was an error getting
409 a definition. Upon success, this function returns the number of
410 definitions and stores the definitions in DEST. */
411
412 static int
413 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest)
414 {
415 df_ref reg_info, *defs;
416 struct df_link *def_chain;
417 int n_refs = 0;
418
419 defs = DF_INSN_USES (curr_insn);
420 reg_info = NULL;
421
422 while (*defs)
423 {
424 reg_info = *defs;
425 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
426 return 0;
427 if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg))
428 break;
429 defs++;
430 }
431
432 gcc_assert (reg_info != NULL && defs != NULL);
433 def_chain = DF_REF_CHAIN (reg_info);
434
435 while (def_chain)
436 {
437 /* Problem getting some definition for this instruction. */
438
439 if (def_chain->ref == NULL)
440 return 0;
441 if (DF_REF_INSN_INFO (def_chain->ref) == NULL)
442 return 0;
443 def_chain = def_chain->next;
444 }
445
446 def_chain = DF_REF_CHAIN (reg_info);
447
448 if (dest == NULL)
449 return 1;
450
451 while (def_chain)
452 {
453 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref));
454 def_chain = def_chain->next;
455 n_refs++;
456 }
457 return n_refs;
458 }
459
460 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
461 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
462 Called from is_insn_cond_copy. DATA stores the two registers on each
463 side of the condition. */
464
465 static int
466 is_this_a_cmove (rtx expr, void *data)
467 {
468 /* Check for conditional (if-then-else) copy. */
469
470 if (GET_CODE (expr) == SET
471 && GET_CODE (SET_DEST (expr)) == REG
472 && GET_MODE (SET_DEST (expr)) == SImode
473 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
474 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
475 && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode
476 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG
477 && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode)
478 {
479 ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1);
480 ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2);
481 return 1;
482 }
483 return 0;
484 }
485
486 /* This returns 1 if it found
487 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
488 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1
489 and COPY_REG_2. */
490
491 static int
492 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2)
493 {
494 int type;
495 rtx set_expr;
496 rtx srcreg[2];
497
498 srcreg[0] = NULL_RTX;
499 srcreg[1] = NULL_RTX;
500
501 set_expr = single_set (def_insn);
502
503 if (set_expr == NULL_RTX)
504 return 0;
505
506 type = is_this_a_cmove (set_expr, (void *) srcreg);
507
508 if (type)
509 {
510 *copy_reg_1 = srcreg[0];
511 *copy_reg_2 = srcreg[1];
512 return type;
513 }
514
515 return 0;
516 }
517
518 /* Reaching Definitions of the zero-extended register could be conditional
519 copies or regular definitions. This function separates the two types into
520 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a
521 reaching definition is a conditional copy, combining the zero_extend with
522 this definition is wrong. Conditional copies are merged by transitively
523 merging its definitions. The defs_list is populated with all the reaching
524 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
525 be merged with a zero_extend. The copies_list contains all the conditional
526 moves that will later be extended into a DI mode conditonal move if all the
527 merges are successful. The function returns false when there is a failure
528 in getting some definitions, like that of parameters. It returns 1 upon
529 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
530 were merged previously. */
531
532 static int
533 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat,
534 VEC (rtx,heap) **defs_list,
535 VEC (rtx,heap) **copies_list)
536 {
537 bool *is_insn_visited;
538 VEC (rtx,heap) *work_list;
539 rtx srcreg, copy_reg_1, copy_reg_2;
540 rtx def_insn;
541 int n_defs = 0;
542 int vec_index = 0;
543 int n_worklist = 0;
544 int i, is_copy;
545
546 srcreg = XEXP (SET_SRC (set_pat), 0);
547 work_list = VEC_alloc (rtx, heap, 8);
548
549 /* Initialize the Work List */
550 n_worklist = get_defs (zero_extend_insn, srcreg, &work_list);
551
552 if (n_worklist == 0)
553 {
554 VEC_free (rtx, heap, work_list);
555 /* The number of defs being equal to zero can only imply that all of its
556 definitions have been previously merged. */
557 return 2;
558 }
559
560 is_insn_visited = XNEWVEC (bool, max_insn_uid);
561
562 for (i = 0; i < max_insn_uid; i++)
563 is_insn_visited[i] = false;
564
565
566 /* Perform transitive closure for conditional copies. */
567 while (n_worklist > vec_index)
568 {
569 def_insn = VEC_index (rtx, work_list, vec_index);
570 gcc_assert (INSN_UID (def_insn) < max_insn_uid);
571
572 if (is_insn_visited[INSN_UID (def_insn)])
573 {
574 vec_index++;
575 continue;
576 }
577
578 is_insn_visited[INSN_UID (def_insn)] = true;
579 copy_reg_1 = copy_reg_2 = NULL_RTX;
580 is_copy = is_insn_cond_copy (def_insn, &copy_reg_1, &copy_reg_2);
581 if (is_copy)
582 {
583 gcc_assert (copy_reg_1 && copy_reg_2);
584
585 /* Push it into the copy list first. */
586
587 VEC_safe_push (rtx, heap, *copies_list, def_insn);
588
589 /* Perform transitive closure here */
590
591 n_defs = get_defs (def_insn, copy_reg_1, &work_list);
592
593 if (n_defs == 0)
594 {
595 VEC_free (rtx, heap, work_list);
596 XDELETEVEC (is_insn_visited);
597 return 0;
598 }
599 n_worklist += n_defs;
600
601 n_defs = get_defs (def_insn, copy_reg_2, &work_list);
602 if (n_defs == 0)
603 {
604 VEC_free (rtx, heap, work_list);
605 XDELETEVEC (is_insn_visited);
606 return 0;
607 }
608 n_worklist += n_defs;
609 }
610 else
611 {
612 VEC_safe_push (rtx, heap, *defs_list, def_insn);
613 }
614 vec_index++;
615 }
616
617 VEC_free (rtx, heap, work_list);
618 XDELETEVEC (is_insn_visited);
619 return 1;
620 }
621
622 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend
623 on the SET pattern. */
624
625 static bool
626 merge_def_and_ze (rtx def_insn)
627 {
628 enum rtx_code code;
629 rtx setreg;
630 rtx *sub_rtx;
631 rtx s_expr;
632 int i;
633
634 code = GET_CODE (PATTERN (def_insn));
635 sub_rtx = NULL;
636
637 if (code == PARALLEL)
638 {
639 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
640 {
641 s_expr = XVECEXP (PATTERN (def_insn), 0, i);
642 if (GET_CODE (s_expr) != SET)
643 continue;
644
645 if (sub_rtx == NULL)
646 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
647 else
648 {
649 /* PARALLEL with multiple SETs. */
650 return false;
651 }
652 }
653 }
654 else if (code == SET)
655 sub_rtx = &PATTERN (def_insn);
656 else
657 {
658 /* It is not a PARALLEL or a SET, what could it be ? */
659 return false;
660 }
661
662 gcc_assert (sub_rtx != NULL);
663
664 if (GET_CODE (SET_DEST (*sub_rtx)) == REG
665 && GET_MODE (SET_DEST (*sub_rtx)) == SImode)
666 {
667 setreg = get_reg_di (SET_DEST (*sub_rtx));
668 return combine_set_zero_extend (def_insn, sub_rtx, setreg);
669 }
670 else
671 return false;
672 return true;
673 }
674
675 /* This function goes through all reaching defs of the source
676 of the zero extension instruction (ZERO_EXTEND_INSN) and
677 tries to combine the zero extension with the definition
678 instruction. The changes are made as a group so that even
679 if one definition cannot be merged, all reaching definitions
680 end up not being merged. When a conditional copy is encountered,
681 merging is attempted transitively on its definitions. It returns
682 true upon success and false upon failure. */
683
684 static bool
685 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat)
686 {
687 rtx def_insn;
688 bool merge_successful = true;
689 int i;
690 int defs_ix;
691 int outcome;
692
693 /* To store the definitions that have been merged. */
694
695 VEC (rtx, heap) *defs_list, *copies_list, *vec;
696 enum insn_merge_code merge_code;
697
698 defs_list = VEC_alloc (rtx, heap, 8);
699 copies_list = VEC_alloc (rtx, heap, 8);
700
701 outcome = make_defs_and_copies_lists (zero_extend_insn,
702 set_pat, &defs_list, &copies_list);
703
704 /* outcome == 2 implies that all the definitions for this zero_extend were
705 merged while previously when handling other zero_extends. */
706
707 if (outcome == 2)
708 {
709 VEC_free (rtx, heap, defs_list);
710 VEC_free (rtx, heap, copies_list);
711 if (dump_file)
712 fprintf (dump_file, "All definitions have been merged previously.\n");
713 return true;
714 }
715
716 if (outcome == 0)
717 {
718 VEC_free (rtx, heap, defs_list);
719 VEC_free (rtx, heap, copies_list);
720 return false;
721 }
722
723 merge_successful = true;
724
725 /* Go through the defs vector and try to merge all the definitions
726 in this vector. */
727
728 vec = VEC_alloc (rtx, heap, 8);
729 FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn)
730 {
731 merge_code = get_insn_status (def_insn);
732 gcc_assert (merge_code == MERGE_NOT_ATTEMPTED);
733
734 if (merge_def_and_ze (def_insn))
735 VEC_safe_push (rtx, heap, vec, def_insn);
736 else
737 {
738 merge_successful = false;
739 break;
740 }
741 }
742
743 /* Now go through the conditional copies vector and try to merge all
744 the copies in this vector. */
745
746 if (merge_successful)
747 {
748 FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn)
749 {
750 if (transform_ifelse (def_insn))
751 {
752 VEC_safe_push (rtx, heap, vec, def_insn);
753 }
754 else
755 {
756 merge_successful = false;
757 break;
758 }
759 }
760 }
761
762 if (merge_successful)
763 {
764 /* Commit the changes here if possible */
765 /* XXX : Now, it is an all or nothing scenario. Even if one definition
766 cannot be merged we totally bail. In future, allow zero-extensions to
767 be partially eliminated along those paths where the definitions could
768 be merged. */
769
770 if (apply_change_group ())
771 {
772 if (dump_file)
773 fprintf (dump_file, "All merges were successful ....\n");
774
775 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
776 {
777 set_insn_status (def_insn, MERGE_SUCCESS);
778 }
779
780 VEC_free (rtx, heap, vec);
781 VEC_free (rtx, heap, defs_list);
782 VEC_free (rtx, heap, copies_list);
783 return true;
784 }
785 else
786 {
787 /* Changes need not be cancelled explicitly as apply_change_group
788 does it. Print list of definitions in the dump_file for debug
789 purposes. This zero-extension cannot be deleted. */
790
791 if (dump_file)
792 {
793 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
794 {
795 fprintf (dump_file, " Ummergable definitions : \n");
796 print_rtl_single (dump_file, def_insn);
797 }
798 }
799 }
800 }
801 else
802 {
803 /* Cancel any changes that have been made so far. */
804 cancel_changes (0);
805 }
806
807 VEC_free (rtx, heap, vec);
808 VEC_free (rtx, heap, defs_list);
809 VEC_free (rtx, heap, copies_list);
810 return false;
811 }
812
813 /* Carry information about zero-extensions while walking the RTL. */
814
815 struct zero_extend_info
816 {
817 /* The insn where the zero-extension is. */
818 rtx insn;
819
820 /* The list of candidates. */
821 VEC (rtx, heap) *insn_list;
822 };
823
824 /* Add a zero-extend pattern that could be eliminated. This is called via
825 note_stores from find_removable_zero_extends. */
826
827 static void
828 add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)
829 {
830 struct zero_extend_info *zei = (struct zero_extend_info *)data;
831 rtx src, dest;
832
833 /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)). */
834 if (GET_CODE (expr) != SET)
835 return;
836
837 src = SET_SRC (expr);
838 dest = SET_DEST (expr);
839
840 if (REG_P (dest)
841 && GET_MODE (dest) == DImode
842 && GET_CODE (src) == ZERO_EXTEND
843 && REG_P (XEXP (src, 0))
844 && GET_MODE (XEXP (src, 0)) == SImode
845 && REGNO (dest) == REGNO (XEXP (src, 0)))
846 {
847 if (get_defs (zei->insn, XEXP (src, 0), NULL))
848 VEC_safe_push (rtx, heap, zei->insn_list, zei->insn);
849 else if (dump_file)
850 {
851 fprintf (dump_file, "Cannot eliminate zero-extension: \n");
852 print_rtl_single (dump_file, zei->insn);
853 fprintf (dump_file, "No defs. Could be extending parameters.\n");
854 }
855 }
856 }
857
858 /* Traverse the instruction stream looking for zero-extends and return the
859 list of candidates. */
860
861 static VEC (rtx,heap)*
862 find_removable_zero_extends (void)
863 {
864 struct zero_extend_info zei;
865 basic_block bb;
866 rtx insn;
867
868 zei.insn_list = VEC_alloc (rtx, heap, 8);
869
870 FOR_EACH_BB (bb)
871 FOR_BB_INSNS (bb, insn)
872 {
873 if (!NONDEBUG_INSN_P (insn))
874 continue;
875
876 zei.insn = insn;
877 note_stores (PATTERN (insn), add_removable_zero_extend, &zei);
878 }
879
880 return zei.insn_list;
881 }
882
883 /* This is the main function that checks the insn stream for redundant
884 zero extensions and tries to remove them if possible. */
885
886 static unsigned int
887 find_and_remove_ze (void)
888 {
889 rtx curr_insn = NULL_RTX;
890 int i;
891 int ix;
892 long long num_realized = 0;
893 long long num_ze_opportunities = 0;
894 VEC (rtx, heap) *zeinsn_list;
895 VEC (rtx, heap) *zeinsn_del_list;
896
897 /* Construct DU chain to get all reaching definitions of each
898 zero-extension instruction. */
899
900 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
901 df_analyze ();
902
903 max_insn_uid = get_max_uid ();
904
905 is_insn_merge_attempted
906 = XNEWVEC (enum insn_merge_code,
907 sizeof (enum insn_merge_code) * max_insn_uid);
908
909 for (i = 0; i < max_insn_uid; i++)
910 is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED;
911
912 num_ze_opportunities = num_realized = 0;
913
914 zeinsn_del_list = VEC_alloc (rtx, heap, 4);
915
916 zeinsn_list = find_removable_zero_extends ();
917
918 FOR_EACH_VEC_ELT (rtx, zeinsn_list, ix, curr_insn)
919 {
920 num_ze_opportunities++;
921 /* Try to combine the zero-extends with the definition here. */
922
923 if (dump_file)
924 {
925 fprintf (dump_file, "Trying to eliminate zero extension : \n");
926 print_rtl_single (dump_file, curr_insn);
927 }
928
929 if (combine_reaching_defs (curr_insn, PATTERN (curr_insn)))
930 {
931 if (dump_file)
932 fprintf (dump_file, "Eliminated the zero extension...\n");
933 num_realized++;
934 VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn);
935 }
936 }
937
938 /* Delete all useless zero extensions here in one sweep. */
939 FOR_EACH_VEC_ELT (rtx, zeinsn_del_list, ix, curr_insn)
940 delete_insn (curr_insn);
941
942 free (is_insn_merge_attempted);
943 VEC_free (rtx, heap, zeinsn_list);
944 VEC_free (rtx, heap, zeinsn_del_list);
945
946 if (dump_file && num_ze_opportunities > 0)
947 fprintf (dump_file, "\n %s : num_zee_opportunities = %lld "
948 "num_realized = %lld \n",
949 current_function_name (),
950 num_ze_opportunities, num_realized);
951
952 df_finish_pass (false);
953 return 0;
954 }
955
956 /* Find and remove redundant zero extensions. */
957
958 static unsigned int
959 rest_of_handle_zee (void)
960 {
961 timevar_push (TV_ZEE);
962 find_and_remove_ze ();
963 timevar_pop (TV_ZEE);
964 return 0;
965 }
966
967 /* Run zee pass when flag_zee is set at optimization level > 0. */
968
969 static bool
970 gate_handle_zee (void)
971 {
972 return (optimize > 0 && flag_zee);
973 }
974
975 struct rtl_opt_pass pass_implicit_zee =
976 {
977 {
978 RTL_PASS,
979 "zee", /* name */
980 gate_handle_zee, /* gate */
981 rest_of_handle_zee, /* execute */
982 NULL, /* sub */
983 NULL, /* next */
984 0, /* static_pass_number */
985 TV_ZEE, /* tv_id */
986 0, /* properties_required */
987 0, /* properties_provided */
988 0, /* properties_destroyed */
989 0, /* todo_flags_start */
990 TODO_ggc_collect |
991 TODO_dump_func |
992 TODO_verify_rtl_sharing, /* todo_flags_finish */
993 }
994 };